{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8504991412623444, "eval_steps": 5963, "global_step": 31693, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.6835551738943754e-05, "grad_norm": 23.0, "learning_rate": 0.0, "loss": 12.1602, "step": 1 }, { "epoch": 5.367110347788751e-05, "grad_norm": 22.0, "learning_rate": 1.610089896685898e-07, "loss": 12.149, "step": 2 }, { "epoch": 8.050665521683126e-05, "grad_norm": 21.75, "learning_rate": 3.220179793371796e-07, "loss": 12.1623, "step": 3 }, { "epoch": 0.00010734220695577502, "grad_norm": 21.625, "learning_rate": 4.830269690057694e-07, "loss": 12.1747, "step": 4 }, { "epoch": 0.00013417775869471876, "grad_norm": 21.25, "learning_rate": 6.440359586743592e-07, "loss": 12.1786, "step": 5 }, { "epoch": 0.00016101331043366253, "grad_norm": 20.875, "learning_rate": 8.05044948342949e-07, "loss": 12.1841, "step": 6 }, { "epoch": 0.00018784886217260627, "grad_norm": 21.625, "learning_rate": 9.660539380115389e-07, "loss": 12.1835, "step": 7 }, { "epoch": 0.00021468441391155003, "grad_norm": 21.5, "learning_rate": 1.1270629276801286e-06, "loss": 12.1947, "step": 8 }, { "epoch": 0.00024151996565049377, "grad_norm": 20.875, "learning_rate": 1.2880719173487185e-06, "loss": 12.187, "step": 9 }, { "epoch": 0.0002683555173894375, "grad_norm": 21.0, "learning_rate": 1.4490809070173084e-06, "loss": 12.1882, "step": 10 }, { "epoch": 0.0002951910691283813, "grad_norm": 20.75, "learning_rate": 1.610089896685898e-06, "loss": 12.1846, "step": 11 }, { "epoch": 0.00032202662086732505, "grad_norm": 20.75, "learning_rate": 1.771098886354488e-06, "loss": 12.1898, "step": 12 }, { "epoch": 0.00034886217260626876, "grad_norm": 20.875, "learning_rate": 1.9321078760230777e-06, "loss": 12.1643, "step": 13 }, { "epoch": 0.00037569772434521253, "grad_norm": 21.125, "learning_rate": 2.0931168656916676e-06, "loss": 12.1671, "step": 14 }, { "epoch": 0.0004025332760841563, "grad_norm": 20.625, "learning_rate": 2.254125855360257e-06, "loss": 12.1522, "step": 15 }, { "epoch": 0.00042936882782310007, "grad_norm": 21.125, "learning_rate": 2.4151348450288475e-06, "loss": 12.1366, "step": 16 }, { "epoch": 0.0004562043795620438, "grad_norm": 20.125, "learning_rate": 2.576143834697437e-06, "loss": 12.1076, "step": 17 }, { "epoch": 0.00048303993130098755, "grad_norm": 20.0, "learning_rate": 2.7371528243660273e-06, "loss": 12.1244, "step": 18 }, { "epoch": 0.0005098754830399313, "grad_norm": 20.0, "learning_rate": 2.8981618140346168e-06, "loss": 12.1028, "step": 19 }, { "epoch": 0.000536711034778875, "grad_norm": 20.0, "learning_rate": 3.0591708037032063e-06, "loss": 12.1003, "step": 20 }, { "epoch": 0.0005635465865178189, "grad_norm": 19.625, "learning_rate": 3.220179793371796e-06, "loss": 12.0558, "step": 21 }, { "epoch": 0.0005903821382567626, "grad_norm": 20.25, "learning_rate": 3.381188783040386e-06, "loss": 12.0888, "step": 22 }, { "epoch": 0.0006172176899957063, "grad_norm": 19.875, "learning_rate": 3.542197772708976e-06, "loss": 12.0481, "step": 23 }, { "epoch": 0.0006440532417346501, "grad_norm": 20.125, "learning_rate": 3.7032067623775655e-06, "loss": 12.0443, "step": 24 }, { "epoch": 0.0006708887934735938, "grad_norm": 19.875, "learning_rate": 3.864215752046155e-06, "loss": 12.0114, "step": 25 }, { "epoch": 0.0006977243452125375, "grad_norm": 20.25, "learning_rate": 4.025224741714746e-06, "loss": 11.9884, "step": 26 }, { "epoch": 0.0007245598969514813, "grad_norm": 19.875, "learning_rate": 4.186233731383335e-06, "loss": 11.9318, "step": 27 }, { "epoch": 0.0007513954486904251, "grad_norm": 19.875, "learning_rate": 4.347242721051925e-06, "loss": 11.8909, "step": 28 }, { "epoch": 0.0007782310004293688, "grad_norm": 19.5, "learning_rate": 4.508251710720514e-06, "loss": 11.8532, "step": 29 }, { "epoch": 0.0008050665521683126, "grad_norm": 19.25, "learning_rate": 4.6692607003891046e-06, "loss": 11.7931, "step": 30 }, { "epoch": 0.0008319021039072563, "grad_norm": 18.875, "learning_rate": 4.830269690057695e-06, "loss": 11.7485, "step": 31 }, { "epoch": 0.0008587376556462001, "grad_norm": 18.125, "learning_rate": 4.9912786797262836e-06, "loss": 11.7241, "step": 32 }, { "epoch": 0.0008855732073851438, "grad_norm": 17.5, "learning_rate": 5.152287669394874e-06, "loss": 11.6546, "step": 33 }, { "epoch": 0.0009124087591240876, "grad_norm": 17.0, "learning_rate": 5.313296659063464e-06, "loss": 11.5976, "step": 34 }, { "epoch": 0.0009392443108630314, "grad_norm": 16.75, "learning_rate": 5.4743056487320546e-06, "loss": 11.5718, "step": 35 }, { "epoch": 0.0009660798626019751, "grad_norm": 16.25, "learning_rate": 5.635314638400643e-06, "loss": 11.5202, "step": 36 }, { "epoch": 0.0009929154143409188, "grad_norm": 16.0, "learning_rate": 5.7963236280692336e-06, "loss": 11.4675, "step": 37 }, { "epoch": 0.0010197509660798626, "grad_norm": 15.5, "learning_rate": 5.957332617737824e-06, "loss": 11.4105, "step": 38 }, { "epoch": 0.0010465865178188065, "grad_norm": 15.0625, "learning_rate": 6.1183416074064125e-06, "loss": 11.3622, "step": 39 }, { "epoch": 0.00107342206955775, "grad_norm": 14.8125, "learning_rate": 6.279350597075003e-06, "loss": 11.3118, "step": 40 }, { "epoch": 0.0011002576212966939, "grad_norm": 14.0625, "learning_rate": 6.440359586743592e-06, "loss": 11.2456, "step": 41 }, { "epoch": 0.0011270931730356377, "grad_norm": 13.8125, "learning_rate": 6.601368576412183e-06, "loss": 11.1746, "step": 42 }, { "epoch": 0.0011539287247745813, "grad_norm": 13.625, "learning_rate": 6.762377566080772e-06, "loss": 11.109, "step": 43 }, { "epoch": 0.0011807642765135251, "grad_norm": 13.125, "learning_rate": 6.923386555749362e-06, "loss": 11.0127, "step": 44 }, { "epoch": 0.001207599828252469, "grad_norm": 12.4375, "learning_rate": 7.084395545417952e-06, "loss": 10.9432, "step": 45 }, { "epoch": 0.0012344353799914126, "grad_norm": 11.8125, "learning_rate": 7.2454045350865415e-06, "loss": 10.8404, "step": 46 }, { "epoch": 0.0012612709317303564, "grad_norm": 11.1875, "learning_rate": 7.406413524755131e-06, "loss": 10.7622, "step": 47 }, { "epoch": 0.0012881064834693002, "grad_norm": 10.5, "learning_rate": 7.567422514423721e-06, "loss": 10.712, "step": 48 }, { "epoch": 0.0013149420352082438, "grad_norm": 10.375, "learning_rate": 7.72843150409231e-06, "loss": 10.5515, "step": 49 }, { "epoch": 0.0013417775869471876, "grad_norm": 9.75, "learning_rate": 7.889440493760901e-06, "loss": 10.5076, "step": 50 }, { "epoch": 0.0013686131386861315, "grad_norm": 9.0625, "learning_rate": 8.050449483429492e-06, "loss": 10.4735, "step": 51 }, { "epoch": 0.001395448690425075, "grad_norm": 8.5, "learning_rate": 8.21145847309808e-06, "loss": 10.4233, "step": 52 }, { "epoch": 0.0014222842421640189, "grad_norm": 8.1875, "learning_rate": 8.37246746276667e-06, "loss": 10.3331, "step": 53 }, { "epoch": 0.0014491197939029627, "grad_norm": 7.78125, "learning_rate": 8.533476452435259e-06, "loss": 10.2333, "step": 54 }, { "epoch": 0.0014759553456419063, "grad_norm": 7.46875, "learning_rate": 8.69448544210385e-06, "loss": 10.1522, "step": 55 }, { "epoch": 0.0015027908973808501, "grad_norm": 7.1875, "learning_rate": 8.85549443177244e-06, "loss": 10.1118, "step": 56 }, { "epoch": 0.001529626449119794, "grad_norm": 6.875, "learning_rate": 9.016503421441028e-06, "loss": 10.0256, "step": 57 }, { "epoch": 0.0015564620008587376, "grad_norm": 6.625, "learning_rate": 9.177512411109619e-06, "loss": 10.011, "step": 58 }, { "epoch": 0.0015832975525976814, "grad_norm": 6.34375, "learning_rate": 9.338521400778209e-06, "loss": 9.939, "step": 59 }, { "epoch": 0.0016101331043366252, "grad_norm": 6.40625, "learning_rate": 9.4995303904468e-06, "loss": 9.8414, "step": 60 }, { "epoch": 0.001636968656075569, "grad_norm": 6.09375, "learning_rate": 9.66053938011539e-06, "loss": 9.7155, "step": 61 }, { "epoch": 0.0016638042078145126, "grad_norm": 6.03125, "learning_rate": 9.82154836978398e-06, "loss": 9.6421, "step": 62 }, { "epoch": 0.0016906397595534564, "grad_norm": 5.59375, "learning_rate": 9.982557359452567e-06, "loss": 9.6509, "step": 63 }, { "epoch": 0.0017174753112924003, "grad_norm": 5.625, "learning_rate": 1.0143566349121157e-05, "loss": 9.5028, "step": 64 }, { "epoch": 0.0017443108630313439, "grad_norm": 5.3125, "learning_rate": 1.0304575338789748e-05, "loss": 9.4766, "step": 65 }, { "epoch": 0.0017711464147702877, "grad_norm": 5.34375, "learning_rate": 1.0465584328458338e-05, "loss": 9.3373, "step": 66 }, { "epoch": 0.0017979819665092315, "grad_norm": 5.15625, "learning_rate": 1.0626593318126928e-05, "loss": 9.2621, "step": 67 }, { "epoch": 0.0018248175182481751, "grad_norm": 5.03125, "learning_rate": 1.0787602307795519e-05, "loss": 9.1743, "step": 68 }, { "epoch": 0.001851653069987119, "grad_norm": 4.84375, "learning_rate": 1.0948611297464109e-05, "loss": 9.1566, "step": 69 }, { "epoch": 0.0018784886217260628, "grad_norm": 4.78125, "learning_rate": 1.1109620287132696e-05, "loss": 9.0834, "step": 70 }, { "epoch": 0.0019053241734650064, "grad_norm": 4.78125, "learning_rate": 1.1270629276801286e-05, "loss": 8.9972, "step": 71 }, { "epoch": 0.0019321597252039502, "grad_norm": 4.46875, "learning_rate": 1.1431638266469877e-05, "loss": 9.004, "step": 72 }, { "epoch": 0.001958995276942894, "grad_norm": 4.40625, "learning_rate": 1.1592647256138467e-05, "loss": 8.8808, "step": 73 }, { "epoch": 0.0019858308286818376, "grad_norm": 4.40625, "learning_rate": 1.1753656245807057e-05, "loss": 8.8098, "step": 74 }, { "epoch": 0.0020126663804207814, "grad_norm": 4.1875, "learning_rate": 1.1914665235475648e-05, "loss": 8.8367, "step": 75 }, { "epoch": 0.0020395019321597253, "grad_norm": 4.09375, "learning_rate": 1.2075674225144236e-05, "loss": 8.8829, "step": 76 }, { "epoch": 0.002066337483898669, "grad_norm": 4.0625, "learning_rate": 1.2236683214812825e-05, "loss": 8.6951, "step": 77 }, { "epoch": 0.002093173035637613, "grad_norm": 3.8125, "learning_rate": 1.2397692204481415e-05, "loss": 8.7055, "step": 78 }, { "epoch": 0.0021200085873765563, "grad_norm": 3.765625, "learning_rate": 1.2558701194150006e-05, "loss": 8.7196, "step": 79 }, { "epoch": 0.0021468441391155, "grad_norm": 3.765625, "learning_rate": 1.2719710183818596e-05, "loss": 8.5661, "step": 80 }, { "epoch": 0.002173679690854444, "grad_norm": 3.734375, "learning_rate": 1.2880719173487185e-05, "loss": 8.5258, "step": 81 }, { "epoch": 0.0022005152425933878, "grad_norm": 3.609375, "learning_rate": 1.3041728163155775e-05, "loss": 8.4935, "step": 82 }, { "epoch": 0.0022273507943323316, "grad_norm": 3.53125, "learning_rate": 1.3202737152824365e-05, "loss": 8.5157, "step": 83 }, { "epoch": 0.0022541863460712754, "grad_norm": 3.390625, "learning_rate": 1.3363746142492954e-05, "loss": 8.4351, "step": 84 }, { "epoch": 0.002281021897810219, "grad_norm": 3.328125, "learning_rate": 1.3524755132161544e-05, "loss": 8.4169, "step": 85 }, { "epoch": 0.0023078574495491626, "grad_norm": 3.171875, "learning_rate": 1.3685764121830135e-05, "loss": 8.3131, "step": 86 }, { "epoch": 0.0023346930012881064, "grad_norm": 3.140625, "learning_rate": 1.3846773111498723e-05, "loss": 8.3111, "step": 87 }, { "epoch": 0.0023615285530270503, "grad_norm": 3.1875, "learning_rate": 1.4007782101167314e-05, "loss": 8.1959, "step": 88 }, { "epoch": 0.002388364104765994, "grad_norm": 3.109375, "learning_rate": 1.4168791090835904e-05, "loss": 8.1868, "step": 89 }, { "epoch": 0.002415199656504938, "grad_norm": 3.0, "learning_rate": 1.4329800080504494e-05, "loss": 8.1668, "step": 90 }, { "epoch": 0.0024420352082438813, "grad_norm": 2.984375, "learning_rate": 1.4490809070173083e-05, "loss": 8.1232, "step": 91 }, { "epoch": 0.002468870759982825, "grad_norm": 3.125, "learning_rate": 1.4651818059841672e-05, "loss": 8.0333, "step": 92 }, { "epoch": 0.002495706311721769, "grad_norm": 3.078125, "learning_rate": 1.4812827049510262e-05, "loss": 8.1015, "step": 93 }, { "epoch": 0.0025225418634607128, "grad_norm": 3.171875, "learning_rate": 1.4973836039178852e-05, "loss": 8.015, "step": 94 }, { "epoch": 0.0025493774151996566, "grad_norm": 3.0625, "learning_rate": 1.5134845028847443e-05, "loss": 7.9092, "step": 95 }, { "epoch": 0.0025762129669386004, "grad_norm": 2.953125, "learning_rate": 1.5295854018516033e-05, "loss": 7.9242, "step": 96 }, { "epoch": 0.0026030485186775442, "grad_norm": 2.96875, "learning_rate": 1.545686300818462e-05, "loss": 7.7889, "step": 97 }, { "epoch": 0.0026298840704164876, "grad_norm": 3.171875, "learning_rate": 1.5617871997853214e-05, "loss": 7.8134, "step": 98 }, { "epoch": 0.0026567196221554314, "grad_norm": 2.890625, "learning_rate": 1.5778880987521802e-05, "loss": 7.8317, "step": 99 }, { "epoch": 0.0026835551738943753, "grad_norm": 2.9375, "learning_rate": 1.593988997719039e-05, "loss": 7.6739, "step": 100 }, { "epoch": 0.002710390725633319, "grad_norm": 3.046875, "learning_rate": 1.6100898966858983e-05, "loss": 7.7654, "step": 101 }, { "epoch": 0.002737226277372263, "grad_norm": 2.9375, "learning_rate": 1.626190795652757e-05, "loss": 7.692, "step": 102 }, { "epoch": 0.0027640618291112067, "grad_norm": 2.875, "learning_rate": 1.642291694619616e-05, "loss": 7.5909, "step": 103 }, { "epoch": 0.00279089738085015, "grad_norm": 3.28125, "learning_rate": 1.6583925935864752e-05, "loss": 7.8658, "step": 104 }, { "epoch": 0.002817732932589094, "grad_norm": 2.8125, "learning_rate": 1.674493492553334e-05, "loss": 7.7836, "step": 105 }, { "epoch": 0.0028445684843280378, "grad_norm": 3.359375, "learning_rate": 1.690594391520193e-05, "loss": 7.5984, "step": 106 }, { "epoch": 0.0028714040360669816, "grad_norm": 3.375, "learning_rate": 1.7066952904870518e-05, "loss": 7.7566, "step": 107 }, { "epoch": 0.0028982395878059254, "grad_norm": 2.75, "learning_rate": 1.722796189453911e-05, "loss": 7.7779, "step": 108 }, { "epoch": 0.0029250751395448692, "grad_norm": 3.125, "learning_rate": 1.73889708842077e-05, "loss": 7.6505, "step": 109 }, { "epoch": 0.0029519106912838126, "grad_norm": 3.578125, "learning_rate": 1.754997987387629e-05, "loss": 7.5853, "step": 110 }, { "epoch": 0.0029787462430227564, "grad_norm": 2.890625, "learning_rate": 1.771098886354488e-05, "loss": 7.6778, "step": 111 }, { "epoch": 0.0030055817947617003, "grad_norm": 3.734375, "learning_rate": 1.787199785321347e-05, "loss": 7.6615, "step": 112 }, { "epoch": 0.003032417346500644, "grad_norm": 3.1875, "learning_rate": 1.8033006842882057e-05, "loss": 7.5462, "step": 113 }, { "epoch": 0.003059252898239588, "grad_norm": 2.859375, "learning_rate": 1.819401583255065e-05, "loss": 7.5449, "step": 114 }, { "epoch": 0.0030860884499785317, "grad_norm": 3.234375, "learning_rate": 1.8355024822219238e-05, "loss": 7.5364, "step": 115 }, { "epoch": 0.003112924001717475, "grad_norm": 2.875, "learning_rate": 1.851603381188783e-05, "loss": 7.6085, "step": 116 }, { "epoch": 0.003139759553456419, "grad_norm": 2.828125, "learning_rate": 1.8677042801556418e-05, "loss": 7.4123, "step": 117 }, { "epoch": 0.0031665951051953628, "grad_norm": 2.921875, "learning_rate": 1.883805179122501e-05, "loss": 7.4504, "step": 118 }, { "epoch": 0.0031934306569343066, "grad_norm": 3.234375, "learning_rate": 1.89990607808936e-05, "loss": 7.4045, "step": 119 }, { "epoch": 0.0032202662086732504, "grad_norm": 2.875, "learning_rate": 1.9160069770562188e-05, "loss": 7.4829, "step": 120 }, { "epoch": 0.003247101760412194, "grad_norm": 2.796875, "learning_rate": 1.932107876023078e-05, "loss": 7.3045, "step": 121 }, { "epoch": 0.003273937312151138, "grad_norm": 3.296875, "learning_rate": 1.9482087749899368e-05, "loss": 7.2618, "step": 122 }, { "epoch": 0.0033007728638900814, "grad_norm": 2.953125, "learning_rate": 1.964309673956796e-05, "loss": 7.3219, "step": 123 }, { "epoch": 0.0033276084156290252, "grad_norm": 3.015625, "learning_rate": 1.9804105729236546e-05, "loss": 7.319, "step": 124 }, { "epoch": 0.003354443967367969, "grad_norm": 3.0625, "learning_rate": 1.9965114718905134e-05, "loss": 7.2301, "step": 125 }, { "epoch": 0.003381279519106913, "grad_norm": 2.859375, "learning_rate": 2.0126123708573726e-05, "loss": 7.2188, "step": 126 }, { "epoch": 0.0034081150708458567, "grad_norm": 3.03125, "learning_rate": 2.0287132698242315e-05, "loss": 7.0592, "step": 127 }, { "epoch": 0.0034349506225848005, "grad_norm": 3.09375, "learning_rate": 2.0448141687910907e-05, "loss": 7.0352, "step": 128 }, { "epoch": 0.003461786174323744, "grad_norm": 3.484375, "learning_rate": 2.0609150677579496e-05, "loss": 7.2081, "step": 129 }, { "epoch": 0.0034886217260626877, "grad_norm": 3.25, "learning_rate": 2.0770159667248088e-05, "loss": 7.1505, "step": 130 }, { "epoch": 0.0035154572778016316, "grad_norm": 3.296875, "learning_rate": 2.0931168656916676e-05, "loss": 7.1278, "step": 131 }, { "epoch": 0.0035422928295405754, "grad_norm": 3.078125, "learning_rate": 2.1092177646585265e-05, "loss": 7.1937, "step": 132 }, { "epoch": 0.003569128381279519, "grad_norm": 3.6875, "learning_rate": 2.1253186636253857e-05, "loss": 7.2224, "step": 133 }, { "epoch": 0.003595963933018463, "grad_norm": 3.78125, "learning_rate": 2.1414195625922446e-05, "loss": 7.1188, "step": 134 }, { "epoch": 0.0036227994847574064, "grad_norm": 2.890625, "learning_rate": 2.1575204615591038e-05, "loss": 7.0988, "step": 135 }, { "epoch": 0.0036496350364963502, "grad_norm": 4.40625, "learning_rate": 2.1736213605259626e-05, "loss": 6.8668, "step": 136 }, { "epoch": 0.003676470588235294, "grad_norm": 7.6875, "learning_rate": 2.1897222594928218e-05, "loss": 7.1607, "step": 137 }, { "epoch": 0.003703306139974238, "grad_norm": 4.71875, "learning_rate": 2.2058231584596804e-05, "loss": 7.2445, "step": 138 }, { "epoch": 0.0037301416917131817, "grad_norm": 4.09375, "learning_rate": 2.2219240574265392e-05, "loss": 7.2161, "step": 139 }, { "epoch": 0.0037569772434521255, "grad_norm": 7.1875, "learning_rate": 2.2380249563933984e-05, "loss": 7.1314, "step": 140 }, { "epoch": 0.003783812795191069, "grad_norm": 7.46875, "learning_rate": 2.2541258553602573e-05, "loss": 7.1222, "step": 141 }, { "epoch": 0.0038106483469300127, "grad_norm": 5.34375, "learning_rate": 2.270226754327116e-05, "loss": 7.0682, "step": 142 }, { "epoch": 0.0038374838986689566, "grad_norm": 4.71875, "learning_rate": 2.2863276532939754e-05, "loss": 7.0054, "step": 143 }, { "epoch": 0.0038643194504079004, "grad_norm": 4.90625, "learning_rate": 2.3024285522608342e-05, "loss": 7.0788, "step": 144 }, { "epoch": 0.003891155002146844, "grad_norm": 4.90625, "learning_rate": 2.3185294512276934e-05, "loss": 7.0394, "step": 145 }, { "epoch": 0.003917990553885788, "grad_norm": 2.78125, "learning_rate": 2.3346303501945523e-05, "loss": 7.1298, "step": 146 }, { "epoch": 0.003944826105624731, "grad_norm": 3.46875, "learning_rate": 2.3507312491614115e-05, "loss": 6.9369, "step": 147 }, { "epoch": 0.003971661657363675, "grad_norm": 3.46875, "learning_rate": 2.3668321481282704e-05, "loss": 6.947, "step": 148 }, { "epoch": 0.003998497209102619, "grad_norm": 3.34375, "learning_rate": 2.3829330470951296e-05, "loss": 6.9587, "step": 149 }, { "epoch": 0.004025332760841563, "grad_norm": 3.3125, "learning_rate": 2.3990339460619884e-05, "loss": 6.9034, "step": 150 }, { "epoch": 0.004052168312580507, "grad_norm": 3.6875, "learning_rate": 2.4151348450288473e-05, "loss": 6.9652, "step": 151 }, { "epoch": 0.0040790038643194505, "grad_norm": 3.15625, "learning_rate": 2.4312357439957065e-05, "loss": 6.9913, "step": 152 }, { "epoch": 0.004105839416058394, "grad_norm": 2.8125, "learning_rate": 2.447336642962565e-05, "loss": 6.8742, "step": 153 }, { "epoch": 0.004132674967797338, "grad_norm": 3.3125, "learning_rate": 2.463437541929424e-05, "loss": 6.9061, "step": 154 }, { "epoch": 0.004159510519536282, "grad_norm": 3.03125, "learning_rate": 2.479538440896283e-05, "loss": 6.8014, "step": 155 }, { "epoch": 0.004186346071275226, "grad_norm": 3.0625, "learning_rate": 2.495639339863142e-05, "loss": 6.8507, "step": 156 }, { "epoch": 0.004213181623014169, "grad_norm": 3.03125, "learning_rate": 2.511740238830001e-05, "loss": 6.7461, "step": 157 }, { "epoch": 0.004240017174753113, "grad_norm": 2.875, "learning_rate": 2.52784113779686e-05, "loss": 6.9362, "step": 158 }, { "epoch": 0.004266852726492056, "grad_norm": 2.765625, "learning_rate": 2.5439420367637192e-05, "loss": 6.762, "step": 159 }, { "epoch": 0.004293688278231, "grad_norm": 3.234375, "learning_rate": 2.560042935730578e-05, "loss": 6.7652, "step": 160 }, { "epoch": 0.004320523829969944, "grad_norm": 3.140625, "learning_rate": 2.576143834697437e-05, "loss": 6.7188, "step": 161 }, { "epoch": 0.004347359381708888, "grad_norm": 3.59375, "learning_rate": 2.592244733664296e-05, "loss": 6.799, "step": 162 }, { "epoch": 0.004374194933447832, "grad_norm": 2.921875, "learning_rate": 2.608345632631155e-05, "loss": 6.6714, "step": 163 }, { "epoch": 0.0044010304851867755, "grad_norm": 2.734375, "learning_rate": 2.6244465315980142e-05, "loss": 6.7328, "step": 164 }, { "epoch": 0.004427866036925719, "grad_norm": 2.71875, "learning_rate": 2.640547430564873e-05, "loss": 6.6445, "step": 165 }, { "epoch": 0.004454701588664663, "grad_norm": 2.640625, "learning_rate": 2.6566483295317323e-05, "loss": 6.6654, "step": 166 }, { "epoch": 0.004481537140403607, "grad_norm": 2.703125, "learning_rate": 2.6727492284985908e-05, "loss": 6.6633, "step": 167 }, { "epoch": 0.004508372692142551, "grad_norm": 3.125, "learning_rate": 2.6888501274654497e-05, "loss": 6.5859, "step": 168 }, { "epoch": 0.004535208243881495, "grad_norm": 2.765625, "learning_rate": 2.704951026432309e-05, "loss": 6.5328, "step": 169 }, { "epoch": 0.004562043795620438, "grad_norm": 3.65625, "learning_rate": 2.7210519253991677e-05, "loss": 6.6346, "step": 170 }, { "epoch": 0.004588879347359381, "grad_norm": 3.125, "learning_rate": 2.737152824366027e-05, "loss": 6.5254, "step": 171 }, { "epoch": 0.004615714899098325, "grad_norm": 3.015625, "learning_rate": 2.7532537233328858e-05, "loss": 6.5831, "step": 172 }, { "epoch": 0.004642550450837269, "grad_norm": 3.171875, "learning_rate": 2.7693546222997447e-05, "loss": 6.575, "step": 173 }, { "epoch": 0.004669386002576213, "grad_norm": 4.625, "learning_rate": 2.785455521266604e-05, "loss": 6.6645, "step": 174 }, { "epoch": 0.004696221554315157, "grad_norm": 5.625, "learning_rate": 2.8015564202334627e-05, "loss": 6.7154, "step": 175 }, { "epoch": 0.0047230571060541005, "grad_norm": 3.828125, "learning_rate": 2.817657319200322e-05, "loss": 6.7483, "step": 176 }, { "epoch": 0.004749892657793044, "grad_norm": 8.3125, "learning_rate": 2.8337582181671808e-05, "loss": 6.6392, "step": 177 }, { "epoch": 0.004776728209531988, "grad_norm": 9.875, "learning_rate": 2.84985911713404e-05, "loss": 6.6392, "step": 178 }, { "epoch": 0.004803563761270932, "grad_norm": 8.25, "learning_rate": 2.865960016100899e-05, "loss": 6.7134, "step": 179 }, { "epoch": 0.004830399313009876, "grad_norm": 4.9375, "learning_rate": 2.8820609150677577e-05, "loss": 6.5937, "step": 180 }, { "epoch": 0.00485723486474882, "grad_norm": 6.25, "learning_rate": 2.8981618140346166e-05, "loss": 6.6392, "step": 181 }, { "epoch": 0.004884070416487763, "grad_norm": 6.96875, "learning_rate": 2.9142627130014755e-05, "loss": 6.737, "step": 182 }, { "epoch": 0.004910905968226706, "grad_norm": 6.03125, "learning_rate": 2.9303636119683343e-05, "loss": 6.6746, "step": 183 }, { "epoch": 0.00493774151996565, "grad_norm": 4.34375, "learning_rate": 2.9464645109351935e-05, "loss": 6.6447, "step": 184 }, { "epoch": 0.004964577071704594, "grad_norm": 5.40625, "learning_rate": 2.9625654099020524e-05, "loss": 6.5394, "step": 185 }, { "epoch": 0.004991412623443538, "grad_norm": 5.125, "learning_rate": 2.9786663088689116e-05, "loss": 6.6231, "step": 186 }, { "epoch": 0.005018248175182482, "grad_norm": 4.09375, "learning_rate": 2.9947672078357705e-05, "loss": 6.6708, "step": 187 }, { "epoch": 0.0050450837269214255, "grad_norm": 4.25, "learning_rate": 3.0108681068026297e-05, "loss": 6.4843, "step": 188 }, { "epoch": 0.005071919278660369, "grad_norm": 4.46875, "learning_rate": 3.0269690057694885e-05, "loss": 6.433, "step": 189 }, { "epoch": 0.005098754830399313, "grad_norm": 3.53125, "learning_rate": 3.0430699047363474e-05, "loss": 6.4681, "step": 190 }, { "epoch": 0.005125590382138257, "grad_norm": 3.828125, "learning_rate": 3.0591708037032066e-05, "loss": 6.4424, "step": 191 }, { "epoch": 0.005152425933877201, "grad_norm": 3.6875, "learning_rate": 3.0752717026700655e-05, "loss": 6.4809, "step": 192 }, { "epoch": 0.005179261485616145, "grad_norm": 3.53125, "learning_rate": 3.091372601636924e-05, "loss": 6.3618, "step": 193 }, { "epoch": 0.0052060970373550884, "grad_norm": 4.09375, "learning_rate": 3.107473500603784e-05, "loss": 6.5046, "step": 194 }, { "epoch": 0.005232932589094031, "grad_norm": 3.609375, "learning_rate": 3.123574399570643e-05, "loss": 6.3556, "step": 195 }, { "epoch": 0.005259768140832975, "grad_norm": 2.578125, "learning_rate": 3.139675298537501e-05, "loss": 6.3739, "step": 196 }, { "epoch": 0.005286603692571919, "grad_norm": 3.71875, "learning_rate": 3.1557761975043605e-05, "loss": 6.3041, "step": 197 }, { "epoch": 0.005313439244310863, "grad_norm": 3.34375, "learning_rate": 3.171877096471219e-05, "loss": 6.2843, "step": 198 }, { "epoch": 0.005340274796049807, "grad_norm": 2.921875, "learning_rate": 3.187977995438078e-05, "loss": 6.4247, "step": 199 }, { "epoch": 0.0053671103477887505, "grad_norm": 4.0, "learning_rate": 3.204078894404937e-05, "loss": 6.4471, "step": 200 }, { "epoch": 0.005393945899527694, "grad_norm": 3.734375, "learning_rate": 3.2201797933717966e-05, "loss": 6.4662, "step": 201 }, { "epoch": 0.005420781451266638, "grad_norm": 3.09375, "learning_rate": 3.2362806923386555e-05, "loss": 6.312, "step": 202 }, { "epoch": 0.005447617003005582, "grad_norm": 3.15625, "learning_rate": 3.252381591305514e-05, "loss": 6.25, "step": 203 }, { "epoch": 0.005474452554744526, "grad_norm": 3.234375, "learning_rate": 3.268482490272373e-05, "loss": 6.3824, "step": 204 }, { "epoch": 0.00550128810648347, "grad_norm": 2.65625, "learning_rate": 3.284583389239232e-05, "loss": 6.2375, "step": 205 }, { "epoch": 0.0055281236582224134, "grad_norm": 2.765625, "learning_rate": 3.3006842882060916e-05, "loss": 6.1955, "step": 206 }, { "epoch": 0.005554959209961356, "grad_norm": 3.265625, "learning_rate": 3.3167851871729505e-05, "loss": 6.2067, "step": 207 }, { "epoch": 0.0055817947617003, "grad_norm": 2.59375, "learning_rate": 3.332886086139809e-05, "loss": 6.1671, "step": 208 }, { "epoch": 0.005608630313439244, "grad_norm": 2.671875, "learning_rate": 3.348986985106668e-05, "loss": 6.2262, "step": 209 }, { "epoch": 0.005635465865178188, "grad_norm": 3.234375, "learning_rate": 3.365087884073527e-05, "loss": 6.138, "step": 210 }, { "epoch": 0.005662301416917132, "grad_norm": 2.703125, "learning_rate": 3.381188783040386e-05, "loss": 6.1329, "step": 211 }, { "epoch": 0.0056891369686560755, "grad_norm": 2.65625, "learning_rate": 3.397289682007245e-05, "loss": 6.1389, "step": 212 }, { "epoch": 0.005715972520395019, "grad_norm": 2.78125, "learning_rate": 3.4133905809741037e-05, "loss": 6.0764, "step": 213 }, { "epoch": 0.005742808072133963, "grad_norm": 2.65625, "learning_rate": 3.429491479940963e-05, "loss": 6.263, "step": 214 }, { "epoch": 0.005769643623872907, "grad_norm": 3.15625, "learning_rate": 3.445592378907822e-05, "loss": 6.0615, "step": 215 }, { "epoch": 0.005796479175611851, "grad_norm": 3.125, "learning_rate": 3.461693277874681e-05, "loss": 6.018, "step": 216 }, { "epoch": 0.005823314727350795, "grad_norm": 3.421875, "learning_rate": 3.47779417684154e-05, "loss": 5.9364, "step": 217 }, { "epoch": 0.0058501502790897384, "grad_norm": 3.515625, "learning_rate": 3.493895075808399e-05, "loss": 6.1946, "step": 218 }, { "epoch": 0.005876985830828682, "grad_norm": 4.46875, "learning_rate": 3.509995974775258e-05, "loss": 6.1747, "step": 219 }, { "epoch": 0.005903821382567625, "grad_norm": 5.65625, "learning_rate": 3.526096873742117e-05, "loss": 6.1512, "step": 220 }, { "epoch": 0.005930656934306569, "grad_norm": 8.25, "learning_rate": 3.542197772708976e-05, "loss": 6.3766, "step": 221 }, { "epoch": 0.005957492486045513, "grad_norm": 5.75, "learning_rate": 3.558298671675835e-05, "loss": 5.937, "step": 222 }, { "epoch": 0.005984328037784457, "grad_norm": 4.40625, "learning_rate": 3.574399570642694e-05, "loss": 6.0303, "step": 223 }, { "epoch": 0.0060111635895234005, "grad_norm": 5.15625, "learning_rate": 3.590500469609553e-05, "loss": 6.1169, "step": 224 }, { "epoch": 0.006037999141262344, "grad_norm": 4.5625, "learning_rate": 3.6066013685764114e-05, "loss": 6.0825, "step": 225 }, { "epoch": 0.006064834693001288, "grad_norm": 4.71875, "learning_rate": 3.622702267543271e-05, "loss": 6.0933, "step": 226 }, { "epoch": 0.006091670244740232, "grad_norm": 3.0625, "learning_rate": 3.63880316651013e-05, "loss": 6.085, "step": 227 }, { "epoch": 0.006118505796479176, "grad_norm": 4.21875, "learning_rate": 3.6549040654769887e-05, "loss": 5.9722, "step": 228 }, { "epoch": 0.00614534134821812, "grad_norm": 3.796875, "learning_rate": 3.6710049644438475e-05, "loss": 5.8789, "step": 229 }, { "epoch": 0.0061721768999570634, "grad_norm": 3.625, "learning_rate": 3.687105863410707e-05, "loss": 5.9658, "step": 230 }, { "epoch": 0.006199012451696007, "grad_norm": 3.8125, "learning_rate": 3.703206762377566e-05, "loss": 5.9983, "step": 231 }, { "epoch": 0.00622584800343495, "grad_norm": 2.875, "learning_rate": 3.719307661344425e-05, "loss": 5.8088, "step": 232 }, { "epoch": 0.006252683555173894, "grad_norm": 3.125, "learning_rate": 3.7354085603112837e-05, "loss": 5.925, "step": 233 }, { "epoch": 0.006279519106912838, "grad_norm": 3.59375, "learning_rate": 3.7515094592781425e-05, "loss": 6.0735, "step": 234 }, { "epoch": 0.006306354658651782, "grad_norm": 2.640625, "learning_rate": 3.767610358245002e-05, "loss": 5.9013, "step": 235 }, { "epoch": 0.0063331902103907255, "grad_norm": 3.890625, "learning_rate": 3.783711257211861e-05, "loss": 5.9263, "step": 236 }, { "epoch": 0.006360025762129669, "grad_norm": 2.9375, "learning_rate": 3.79981215617872e-05, "loss": 5.8919, "step": 237 }, { "epoch": 0.006386861313868613, "grad_norm": 3.328125, "learning_rate": 3.8159130551455787e-05, "loss": 5.995, "step": 238 }, { "epoch": 0.006413696865607557, "grad_norm": 3.40625, "learning_rate": 3.8320139541124375e-05, "loss": 5.764, "step": 239 }, { "epoch": 0.006440532417346501, "grad_norm": 3.140625, "learning_rate": 3.848114853079297e-05, "loss": 5.9017, "step": 240 }, { "epoch": 0.006467367969085445, "grad_norm": 3.4375, "learning_rate": 3.864215752046156e-05, "loss": 5.7845, "step": 241 }, { "epoch": 0.006494203520824388, "grad_norm": 2.40625, "learning_rate": 3.880316651013015e-05, "loss": 5.8538, "step": 242 }, { "epoch": 0.006521039072563332, "grad_norm": 3.9375, "learning_rate": 3.8964175499798737e-05, "loss": 5.8862, "step": 243 }, { "epoch": 0.006547874624302276, "grad_norm": 2.734375, "learning_rate": 3.912518448946733e-05, "loss": 5.832, "step": 244 }, { "epoch": 0.006574710176041219, "grad_norm": 3.65625, "learning_rate": 3.928619347913592e-05, "loss": 5.7982, "step": 245 }, { "epoch": 0.006601545727780163, "grad_norm": 2.5, "learning_rate": 3.94472024688045e-05, "loss": 5.7244, "step": 246 }, { "epoch": 0.006628381279519107, "grad_norm": 3.6875, "learning_rate": 3.960821145847309e-05, "loss": 5.7863, "step": 247 }, { "epoch": 0.0066552168312580505, "grad_norm": 3.4375, "learning_rate": 3.976922044814168e-05, "loss": 5.8122, "step": 248 }, { "epoch": 0.006682052382996994, "grad_norm": 3.046875, "learning_rate": 3.993022943781027e-05, "loss": 5.6676, "step": 249 }, { "epoch": 0.006708887934735938, "grad_norm": 3.671875, "learning_rate": 4.0091238427478864e-05, "loss": 5.6775, "step": 250 }, { "epoch": 0.006735723486474882, "grad_norm": 2.625, "learning_rate": 4.025224741714745e-05, "loss": 5.8062, "step": 251 }, { "epoch": 0.006762559038213826, "grad_norm": 3.515625, "learning_rate": 4.041325640681604e-05, "loss": 5.6329, "step": 252 }, { "epoch": 0.00678939458995277, "grad_norm": 3.171875, "learning_rate": 4.057426539648463e-05, "loss": 5.6866, "step": 253 }, { "epoch": 0.006816230141691713, "grad_norm": 3.28125, "learning_rate": 4.073527438615322e-05, "loss": 5.6298, "step": 254 }, { "epoch": 0.006843065693430657, "grad_norm": 3.421875, "learning_rate": 4.0896283375821814e-05, "loss": 5.5983, "step": 255 }, { "epoch": 0.006869901245169601, "grad_norm": 2.40625, "learning_rate": 4.10572923654904e-05, "loss": 5.721, "step": 256 }, { "epoch": 0.006896736796908544, "grad_norm": 3.84375, "learning_rate": 4.121830135515899e-05, "loss": 5.7755, "step": 257 }, { "epoch": 0.006923572348647488, "grad_norm": 3.171875, "learning_rate": 4.137931034482758e-05, "loss": 5.5555, "step": 258 }, { "epoch": 0.006950407900386432, "grad_norm": 4.09375, "learning_rate": 4.1540319334496175e-05, "loss": 5.7379, "step": 259 }, { "epoch": 0.0069772434521253755, "grad_norm": 4.375, "learning_rate": 4.1701328324164764e-05, "loss": 5.7731, "step": 260 }, { "epoch": 0.007004079003864319, "grad_norm": 2.6875, "learning_rate": 4.186233731383335e-05, "loss": 5.6566, "step": 261 }, { "epoch": 0.007030914555603263, "grad_norm": 2.890625, "learning_rate": 4.202334630350194e-05, "loss": 5.7442, "step": 262 }, { "epoch": 0.007057750107342207, "grad_norm": 3.078125, "learning_rate": 4.218435529317053e-05, "loss": 5.5541, "step": 263 }, { "epoch": 0.007084585659081151, "grad_norm": 3.578125, "learning_rate": 4.2345364282839125e-05, "loss": 5.7359, "step": 264 }, { "epoch": 0.007111421210820095, "grad_norm": 2.765625, "learning_rate": 4.2506373272507714e-05, "loss": 5.4964, "step": 265 }, { "epoch": 0.007138256762559038, "grad_norm": 2.78125, "learning_rate": 4.26673822621763e-05, "loss": 5.6451, "step": 266 }, { "epoch": 0.007165092314297982, "grad_norm": 2.875, "learning_rate": 4.282839125184489e-05, "loss": 5.5282, "step": 267 }, { "epoch": 0.007191927866036926, "grad_norm": 3.03125, "learning_rate": 4.298940024151348e-05, "loss": 5.5497, "step": 268 }, { "epoch": 0.00721876341777587, "grad_norm": 3.25, "learning_rate": 4.3150409231182075e-05, "loss": 5.5675, "step": 269 }, { "epoch": 0.007245598969514813, "grad_norm": 5.1875, "learning_rate": 4.3311418220850664e-05, "loss": 5.6454, "step": 270 }, { "epoch": 0.007272434521253757, "grad_norm": 6.75, "learning_rate": 4.347242721051925e-05, "loss": 5.5869, "step": 271 }, { "epoch": 0.0072992700729927005, "grad_norm": 10.125, "learning_rate": 4.363343620018784e-05, "loss": 5.6696, "step": 272 }, { "epoch": 0.007326105624731644, "grad_norm": 6.34375, "learning_rate": 4.3794445189856437e-05, "loss": 5.7172, "step": 273 }, { "epoch": 0.007352941176470588, "grad_norm": 4.84375, "learning_rate": 4.3955454179525025e-05, "loss": 5.6716, "step": 274 }, { "epoch": 0.007379776728209532, "grad_norm": 5.375, "learning_rate": 4.411646316919361e-05, "loss": 5.4603, "step": 275 }, { "epoch": 0.007406612279948476, "grad_norm": 5.28125, "learning_rate": 4.4277472158862196e-05, "loss": 5.516, "step": 276 }, { "epoch": 0.00743344783168742, "grad_norm": 4.21875, "learning_rate": 4.4438481148530784e-05, "loss": 5.4778, "step": 277 }, { "epoch": 0.007460283383426363, "grad_norm": 3.921875, "learning_rate": 4.459949013819937e-05, "loss": 5.5421, "step": 278 }, { "epoch": 0.007487118935165307, "grad_norm": 3.25, "learning_rate": 4.476049912786797e-05, "loss": 5.5887, "step": 279 }, { "epoch": 0.007513954486904251, "grad_norm": 3.984375, "learning_rate": 4.492150811753656e-05, "loss": 5.4559, "step": 280 }, { "epoch": 0.007540790038643195, "grad_norm": 3.5625, "learning_rate": 4.5082517107205146e-05, "loss": 5.3711, "step": 281 }, { "epoch": 0.007567625590382138, "grad_norm": 2.90625, "learning_rate": 4.5243526096873734e-05, "loss": 5.4183, "step": 282 }, { "epoch": 0.007594461142121082, "grad_norm": 3.453125, "learning_rate": 4.540453508654232e-05, "loss": 5.5646, "step": 283 }, { "epoch": 0.0076212966938600255, "grad_norm": 2.453125, "learning_rate": 4.556554407621092e-05, "loss": 5.5148, "step": 284 }, { "epoch": 0.007648132245598969, "grad_norm": 3.578125, "learning_rate": 4.572655306587951e-05, "loss": 5.4651, "step": 285 }, { "epoch": 0.007674967797337913, "grad_norm": 2.90625, "learning_rate": 4.5887562055548096e-05, "loss": 5.6095, "step": 286 }, { "epoch": 0.007701803349076857, "grad_norm": 3.421875, "learning_rate": 4.6048571045216684e-05, "loss": 5.4367, "step": 287 }, { "epoch": 0.007728638900815801, "grad_norm": 3.546875, "learning_rate": 4.620958003488528e-05, "loss": 5.3764, "step": 288 }, { "epoch": 0.007755474452554745, "grad_norm": 2.78125, "learning_rate": 4.637058902455387e-05, "loss": 5.3227, "step": 289 }, { "epoch": 0.007782310004293688, "grad_norm": 3.140625, "learning_rate": 4.653159801422246e-05, "loss": 5.3613, "step": 290 }, { "epoch": 0.007809145556032632, "grad_norm": 3.046875, "learning_rate": 4.6692607003891046e-05, "loss": 5.4121, "step": 291 }, { "epoch": 0.007835981107771575, "grad_norm": 3.46875, "learning_rate": 4.6853615993559634e-05, "loss": 5.3913, "step": 292 }, { "epoch": 0.007862816659510519, "grad_norm": 3.625, "learning_rate": 4.701462498322823e-05, "loss": 5.447, "step": 293 }, { "epoch": 0.007889652211249463, "grad_norm": 3.0625, "learning_rate": 4.717563397289682e-05, "loss": 5.3991, "step": 294 }, { "epoch": 0.007916487762988407, "grad_norm": 3.328125, "learning_rate": 4.733664296256541e-05, "loss": 5.4411, "step": 295 }, { "epoch": 0.00794332331472735, "grad_norm": 2.40625, "learning_rate": 4.7497651952233996e-05, "loss": 5.3334, "step": 296 }, { "epoch": 0.007970158866466294, "grad_norm": 2.75, "learning_rate": 4.765866094190259e-05, "loss": 5.4973, "step": 297 }, { "epoch": 0.007996994418205238, "grad_norm": 3.140625, "learning_rate": 4.781966993157118e-05, "loss": 5.3276, "step": 298 }, { "epoch": 0.008023829969944182, "grad_norm": 3.296875, "learning_rate": 4.798067892123977e-05, "loss": 5.2546, "step": 299 }, { "epoch": 0.008050665521683126, "grad_norm": 3.078125, "learning_rate": 4.814168791090836e-05, "loss": 5.28, "step": 300 }, { "epoch": 0.00807750107342207, "grad_norm": 3.5625, "learning_rate": 4.8302696900576946e-05, "loss": 5.3101, "step": 301 }, { "epoch": 0.008104336625161013, "grad_norm": 3.015625, "learning_rate": 4.846370589024554e-05, "loss": 5.1925, "step": 302 }, { "epoch": 0.008131172176899957, "grad_norm": 3.921875, "learning_rate": 4.862471487991413e-05, "loss": 5.2962, "step": 303 }, { "epoch": 0.008158007728638901, "grad_norm": 3.015625, "learning_rate": 4.878572386958271e-05, "loss": 5.3234, "step": 304 }, { "epoch": 0.008184843280377845, "grad_norm": 3.34375, "learning_rate": 4.89467328592513e-05, "loss": 5.3471, "step": 305 }, { "epoch": 0.008211678832116789, "grad_norm": 3.296875, "learning_rate": 4.910774184891989e-05, "loss": 5.3631, "step": 306 }, { "epoch": 0.008238514383855733, "grad_norm": 3.65625, "learning_rate": 4.926875083858848e-05, "loss": 5.1211, "step": 307 }, { "epoch": 0.008265349935594676, "grad_norm": 3.125, "learning_rate": 4.942975982825707e-05, "loss": 5.2357, "step": 308 }, { "epoch": 0.00829218548733362, "grad_norm": 3.015625, "learning_rate": 4.959076881792566e-05, "loss": 5.083, "step": 309 }, { "epoch": 0.008319021039072564, "grad_norm": 3.0, "learning_rate": 4.975177780759425e-05, "loss": 5.2269, "step": 310 }, { "epoch": 0.008345856590811508, "grad_norm": 3.71875, "learning_rate": 4.991278679726284e-05, "loss": 5.092, "step": 311 }, { "epoch": 0.008372692142550452, "grad_norm": 4.5, "learning_rate": 5.0073795786931434e-05, "loss": 5.2013, "step": 312 }, { "epoch": 0.008399527694289395, "grad_norm": 2.5, "learning_rate": 5.023480477660002e-05, "loss": 5.1849, "step": 313 }, { "epoch": 0.008426363246028338, "grad_norm": 3.359375, "learning_rate": 5.039581376626861e-05, "loss": 5.2065, "step": 314 }, { "epoch": 0.008453198797767281, "grad_norm": 3.265625, "learning_rate": 5.05568227559372e-05, "loss": 5.3277, "step": 315 }, { "epoch": 0.008480034349506225, "grad_norm": 2.75, "learning_rate": 5.071783174560579e-05, "loss": 5.2914, "step": 316 }, { "epoch": 0.008506869901245169, "grad_norm": 3.078125, "learning_rate": 5.0878840735274384e-05, "loss": 5.2348, "step": 317 }, { "epoch": 0.008533705452984113, "grad_norm": 2.40625, "learning_rate": 5.103984972494297e-05, "loss": 5.1286, "step": 318 }, { "epoch": 0.008560541004723057, "grad_norm": 4.375, "learning_rate": 5.120085871461156e-05, "loss": 5.2853, "step": 319 }, { "epoch": 0.008587376556462, "grad_norm": 3.03125, "learning_rate": 5.136186770428015e-05, "loss": 5.0438, "step": 320 }, { "epoch": 0.008614212108200944, "grad_norm": 3.875, "learning_rate": 5.152287669394874e-05, "loss": 5.0498, "step": 321 }, { "epoch": 0.008641047659939888, "grad_norm": 3.265625, "learning_rate": 5.1683885683617334e-05, "loss": 5.1416, "step": 322 }, { "epoch": 0.008667883211678832, "grad_norm": 4.3125, "learning_rate": 5.184489467328592e-05, "loss": 5.1123, "step": 323 }, { "epoch": 0.008694718763417776, "grad_norm": 3.515625, "learning_rate": 5.200590366295451e-05, "loss": 5.0784, "step": 324 }, { "epoch": 0.00872155431515672, "grad_norm": 2.75, "learning_rate": 5.21669126526231e-05, "loss": 4.9589, "step": 325 }, { "epoch": 0.008748389866895663, "grad_norm": 4.3125, "learning_rate": 5.2327921642291696e-05, "loss": 5.0818, "step": 326 }, { "epoch": 0.008775225418634607, "grad_norm": 2.953125, "learning_rate": 5.2488930631960284e-05, "loss": 5.0381, "step": 327 }, { "epoch": 0.008802060970373551, "grad_norm": 3.90625, "learning_rate": 5.264993962162887e-05, "loss": 5.055, "step": 328 }, { "epoch": 0.008828896522112495, "grad_norm": 3.96875, "learning_rate": 5.281094861129746e-05, "loss": 4.9964, "step": 329 }, { "epoch": 0.008855732073851439, "grad_norm": 3.640625, "learning_rate": 5.297195760096605e-05, "loss": 5.1109, "step": 330 }, { "epoch": 0.008882567625590383, "grad_norm": 3.640625, "learning_rate": 5.3132966590634646e-05, "loss": 5.0884, "step": 331 }, { "epoch": 0.008909403177329326, "grad_norm": 3.828125, "learning_rate": 5.329397558030323e-05, "loss": 5.1278, "step": 332 }, { "epoch": 0.00893623872906827, "grad_norm": 5.34375, "learning_rate": 5.3454984569971816e-05, "loss": 5.0656, "step": 333 }, { "epoch": 0.008963074280807214, "grad_norm": 6.125, "learning_rate": 5.3615993559640405e-05, "loss": 5.1885, "step": 334 }, { "epoch": 0.008989909832546158, "grad_norm": 7.625, "learning_rate": 5.3777002549308993e-05, "loss": 5.1326, "step": 335 }, { "epoch": 0.009016745384285102, "grad_norm": 3.953125, "learning_rate": 5.393801153897758e-05, "loss": 5.2875, "step": 336 }, { "epoch": 0.009043580936024045, "grad_norm": 5.25, "learning_rate": 5.409902052864618e-05, "loss": 5.0404, "step": 337 }, { "epoch": 0.00907041648776299, "grad_norm": 4.90625, "learning_rate": 5.4260029518314766e-05, "loss": 5.1558, "step": 338 }, { "epoch": 0.009097252039501931, "grad_norm": 3.453125, "learning_rate": 5.4421038507983355e-05, "loss": 5.0123, "step": 339 }, { "epoch": 0.009124087591240875, "grad_norm": 3.1875, "learning_rate": 5.4582047497651943e-05, "loss": 5.0245, "step": 340 }, { "epoch": 0.009150923142979819, "grad_norm": 3.28125, "learning_rate": 5.474305648732054e-05, "loss": 5.2881, "step": 341 }, { "epoch": 0.009177758694718763, "grad_norm": 3.03125, "learning_rate": 5.490406547698913e-05, "loss": 4.8829, "step": 342 }, { "epoch": 0.009204594246457707, "grad_norm": 3.203125, "learning_rate": 5.5065074466657716e-05, "loss": 4.982, "step": 343 }, { "epoch": 0.00923142979819665, "grad_norm": 2.453125, "learning_rate": 5.5226083456326305e-05, "loss": 4.9914, "step": 344 }, { "epoch": 0.009258265349935594, "grad_norm": 3.484375, "learning_rate": 5.5387092445994893e-05, "loss": 5.0355, "step": 345 }, { "epoch": 0.009285100901674538, "grad_norm": 2.6875, "learning_rate": 5.554810143566349e-05, "loss": 4.9423, "step": 346 }, { "epoch": 0.009311936453413482, "grad_norm": 2.46875, "learning_rate": 5.570911042533208e-05, "loss": 5.0671, "step": 347 }, { "epoch": 0.009338772005152426, "grad_norm": 3.453125, "learning_rate": 5.5870119415000666e-05, "loss": 4.9237, "step": 348 }, { "epoch": 0.00936560755689137, "grad_norm": 2.375, "learning_rate": 5.6031128404669255e-05, "loss": 5.0533, "step": 349 }, { "epoch": 0.009392443108630313, "grad_norm": 2.953125, "learning_rate": 5.6192137394337843e-05, "loss": 4.8545, "step": 350 }, { "epoch": 0.009419278660369257, "grad_norm": 2.96875, "learning_rate": 5.635314638400644e-05, "loss": 4.9651, "step": 351 }, { "epoch": 0.009446114212108201, "grad_norm": 3.453125, "learning_rate": 5.651415537367503e-05, "loss": 4.8497, "step": 352 }, { "epoch": 0.009472949763847145, "grad_norm": 2.765625, "learning_rate": 5.6675164363343616e-05, "loss": 5.1003, "step": 353 }, { "epoch": 0.009499785315586089, "grad_norm": 3.90625, "learning_rate": 5.6836173353012205e-05, "loss": 5.0038, "step": 354 }, { "epoch": 0.009526620867325033, "grad_norm": 3.375, "learning_rate": 5.69971823426808e-05, "loss": 4.9513, "step": 355 }, { "epoch": 0.009553456419063976, "grad_norm": 3.34375, "learning_rate": 5.715819133234939e-05, "loss": 4.9583, "step": 356 }, { "epoch": 0.00958029197080292, "grad_norm": 2.578125, "learning_rate": 5.731920032201798e-05, "loss": 4.9073, "step": 357 }, { "epoch": 0.009607127522541864, "grad_norm": 3.140625, "learning_rate": 5.7480209311686566e-05, "loss": 4.9059, "step": 358 }, { "epoch": 0.009633963074280808, "grad_norm": 2.671875, "learning_rate": 5.7641218301355155e-05, "loss": 4.9276, "step": 359 }, { "epoch": 0.009660798626019752, "grad_norm": 3.390625, "learning_rate": 5.780222729102375e-05, "loss": 4.8358, "step": 360 }, { "epoch": 0.009687634177758695, "grad_norm": 2.65625, "learning_rate": 5.796323628069233e-05, "loss": 4.9929, "step": 361 }, { "epoch": 0.00971446972949764, "grad_norm": 3.40625, "learning_rate": 5.812424527036092e-05, "loss": 4.8685, "step": 362 }, { "epoch": 0.009741305281236583, "grad_norm": 3.515625, "learning_rate": 5.828525426002951e-05, "loss": 4.9638, "step": 363 }, { "epoch": 0.009768140832975525, "grad_norm": 3.3125, "learning_rate": 5.84462632496981e-05, "loss": 4.9091, "step": 364 }, { "epoch": 0.009794976384714469, "grad_norm": 3.203125, "learning_rate": 5.860727223936669e-05, "loss": 4.769, "step": 365 }, { "epoch": 0.009821811936453413, "grad_norm": 3.21875, "learning_rate": 5.876828122903528e-05, "loss": 4.8881, "step": 366 }, { "epoch": 0.009848647488192357, "grad_norm": 3.546875, "learning_rate": 5.892929021870387e-05, "loss": 5.0081, "step": 367 }, { "epoch": 0.0098754830399313, "grad_norm": 2.890625, "learning_rate": 5.909029920837246e-05, "loss": 4.9537, "step": 368 }, { "epoch": 0.009902318591670244, "grad_norm": 2.875, "learning_rate": 5.925130819804105e-05, "loss": 4.8565, "step": 369 }, { "epoch": 0.009929154143409188, "grad_norm": 2.796875, "learning_rate": 5.9412317187709643e-05, "loss": 4.8879, "step": 370 }, { "epoch": 0.009955989695148132, "grad_norm": 3.453125, "learning_rate": 5.957332617737823e-05, "loss": 4.7965, "step": 371 }, { "epoch": 0.009982825246887076, "grad_norm": 2.984375, "learning_rate": 5.973433516704682e-05, "loss": 4.9045, "step": 372 }, { "epoch": 0.01000966079862602, "grad_norm": 2.859375, "learning_rate": 5.989534415671541e-05, "loss": 4.7572, "step": 373 }, { "epoch": 0.010036496350364963, "grad_norm": 3.671875, "learning_rate": 6.0056353146384e-05, "loss": 4.7887, "step": 374 }, { "epoch": 0.010063331902103907, "grad_norm": 2.453125, "learning_rate": 6.0217362136052593e-05, "loss": 4.8855, "step": 375 }, { "epoch": 0.010090167453842851, "grad_norm": 3.4375, "learning_rate": 6.037837112572118e-05, "loss": 4.8639, "step": 376 }, { "epoch": 0.010117003005581795, "grad_norm": 2.1875, "learning_rate": 6.053938011538977e-05, "loss": 4.7545, "step": 377 }, { "epoch": 0.010143838557320739, "grad_norm": 2.921875, "learning_rate": 6.070038910505836e-05, "loss": 4.7603, "step": 378 }, { "epoch": 0.010170674109059682, "grad_norm": 3.046875, "learning_rate": 6.086139809472695e-05, "loss": 4.724, "step": 379 }, { "epoch": 0.010197509660798626, "grad_norm": 2.6875, "learning_rate": 6.1022407084395543e-05, "loss": 4.7536, "step": 380 }, { "epoch": 0.01022434521253757, "grad_norm": 2.375, "learning_rate": 6.118341607406413e-05, "loss": 4.712, "step": 381 }, { "epoch": 0.010251180764276514, "grad_norm": 2.375, "learning_rate": 6.134442506373272e-05, "loss": 4.7632, "step": 382 }, { "epoch": 0.010278016316015458, "grad_norm": 2.75, "learning_rate": 6.150543405340131e-05, "loss": 4.9034, "step": 383 }, { "epoch": 0.010304851867754402, "grad_norm": 2.515625, "learning_rate": 6.16664430430699e-05, "loss": 4.7229, "step": 384 }, { "epoch": 0.010331687419493345, "grad_norm": 3.140625, "learning_rate": 6.182745203273849e-05, "loss": 4.8983, "step": 385 }, { "epoch": 0.01035852297123229, "grad_norm": 2.671875, "learning_rate": 6.198846102240709e-05, "loss": 4.7807, "step": 386 }, { "epoch": 0.010385358522971233, "grad_norm": 3.203125, "learning_rate": 6.214947001207568e-05, "loss": 4.7362, "step": 387 }, { "epoch": 0.010412194074710177, "grad_norm": 2.546875, "learning_rate": 6.231047900174427e-05, "loss": 4.7374, "step": 388 }, { "epoch": 0.010439029626449119, "grad_norm": 2.453125, "learning_rate": 6.247148799141285e-05, "loss": 4.6404, "step": 389 }, { "epoch": 0.010465865178188063, "grad_norm": 3.40625, "learning_rate": 6.263249698108143e-05, "loss": 4.759, "step": 390 }, { "epoch": 0.010492700729927007, "grad_norm": 2.5625, "learning_rate": 6.279350597075002e-05, "loss": 4.6494, "step": 391 }, { "epoch": 0.01051953628166595, "grad_norm": 2.78125, "learning_rate": 6.295451496041862e-05, "loss": 4.8523, "step": 392 }, { "epoch": 0.010546371833404894, "grad_norm": 3.265625, "learning_rate": 6.311552395008721e-05, "loss": 4.5768, "step": 393 }, { "epoch": 0.010573207385143838, "grad_norm": 2.34375, "learning_rate": 6.32765329397558e-05, "loss": 4.7812, "step": 394 }, { "epoch": 0.010600042936882782, "grad_norm": 3.078125, "learning_rate": 6.343754192942439e-05, "loss": 4.5937, "step": 395 }, { "epoch": 0.010626878488621726, "grad_norm": 2.546875, "learning_rate": 6.359855091909298e-05, "loss": 4.6112, "step": 396 }, { "epoch": 0.01065371404036067, "grad_norm": 2.59375, "learning_rate": 6.375955990876156e-05, "loss": 4.68, "step": 397 }, { "epoch": 0.010680549592099613, "grad_norm": 3.40625, "learning_rate": 6.392056889843015e-05, "loss": 4.7039, "step": 398 }, { "epoch": 0.010707385143838557, "grad_norm": 2.671875, "learning_rate": 6.408157788809874e-05, "loss": 4.7355, "step": 399 }, { "epoch": 0.010734220695577501, "grad_norm": 3.015625, "learning_rate": 6.424258687776733e-05, "loss": 4.4152, "step": 400 }, { "epoch": 0.010761056247316445, "grad_norm": 2.453125, "learning_rate": 6.440359586743593e-05, "loss": 4.6565, "step": 401 }, { "epoch": 0.010787891799055389, "grad_norm": 3.609375, "learning_rate": 6.456460485710452e-05, "loss": 4.6615, "step": 402 }, { "epoch": 0.010814727350794332, "grad_norm": 2.515625, "learning_rate": 6.472561384677311e-05, "loss": 4.7029, "step": 403 }, { "epoch": 0.010841562902533276, "grad_norm": 3.296875, "learning_rate": 6.48866228364417e-05, "loss": 4.6783, "step": 404 }, { "epoch": 0.01086839845427222, "grad_norm": 3.1875, "learning_rate": 6.504763182611029e-05, "loss": 4.5931, "step": 405 }, { "epoch": 0.010895234006011164, "grad_norm": 2.921875, "learning_rate": 6.520864081577888e-05, "loss": 4.5476, "step": 406 }, { "epoch": 0.010922069557750108, "grad_norm": 6.34375, "learning_rate": 6.536964980544746e-05, "loss": 4.8457, "step": 407 }, { "epoch": 0.010948905109489052, "grad_norm": 4.34375, "learning_rate": 6.553065879511605e-05, "loss": 4.7065, "step": 408 }, { "epoch": 0.010975740661227995, "grad_norm": 4.15625, "learning_rate": 6.569166778478464e-05, "loss": 4.774, "step": 409 }, { "epoch": 0.01100257621296694, "grad_norm": 4.8125, "learning_rate": 6.585267677445323e-05, "loss": 4.8545, "step": 410 }, { "epoch": 0.011029411764705883, "grad_norm": 3.90625, "learning_rate": 6.601368576412183e-05, "loss": 4.6974, "step": 411 }, { "epoch": 0.011056247316444827, "grad_norm": 3.34375, "learning_rate": 6.617469475379042e-05, "loss": 4.8324, "step": 412 }, { "epoch": 0.01108308286818377, "grad_norm": 3.59375, "learning_rate": 6.633570374345901e-05, "loss": 4.7435, "step": 413 }, { "epoch": 0.011109918419922713, "grad_norm": 2.703125, "learning_rate": 6.64967127331276e-05, "loss": 4.8363, "step": 414 }, { "epoch": 0.011136753971661657, "grad_norm": 4.125, "learning_rate": 6.665772172279619e-05, "loss": 4.8257, "step": 415 }, { "epoch": 0.0111635895234006, "grad_norm": 2.53125, "learning_rate": 6.681873071246478e-05, "loss": 4.6601, "step": 416 }, { "epoch": 0.011190425075139544, "grad_norm": 3.234375, "learning_rate": 6.697973970213336e-05, "loss": 4.8044, "step": 417 }, { "epoch": 0.011217260626878488, "grad_norm": 2.8125, "learning_rate": 6.714074869180195e-05, "loss": 4.864, "step": 418 }, { "epoch": 0.011244096178617432, "grad_norm": 4.09375, "learning_rate": 6.730175768147054e-05, "loss": 4.5761, "step": 419 }, { "epoch": 0.011270931730356376, "grad_norm": 3.0, "learning_rate": 6.746276667113913e-05, "loss": 4.6949, "step": 420 }, { "epoch": 0.01129776728209532, "grad_norm": 2.75, "learning_rate": 6.762377566080772e-05, "loss": 4.596, "step": 421 }, { "epoch": 0.011324602833834263, "grad_norm": 2.84375, "learning_rate": 6.778478465047631e-05, "loss": 4.8235, "step": 422 }, { "epoch": 0.011351438385573207, "grad_norm": 3.078125, "learning_rate": 6.79457936401449e-05, "loss": 4.6891, "step": 423 }, { "epoch": 0.011378273937312151, "grad_norm": 2.96875, "learning_rate": 6.810680262981348e-05, "loss": 4.5819, "step": 424 }, { "epoch": 0.011405109489051095, "grad_norm": 2.96875, "learning_rate": 6.826781161948207e-05, "loss": 4.6324, "step": 425 }, { "epoch": 0.011431945040790039, "grad_norm": 2.59375, "learning_rate": 6.842882060915068e-05, "loss": 4.6094, "step": 426 }, { "epoch": 0.011458780592528982, "grad_norm": 3.421875, "learning_rate": 6.858982959881926e-05, "loss": 4.6345, "step": 427 }, { "epoch": 0.011485616144267926, "grad_norm": 2.3125, "learning_rate": 6.875083858848785e-05, "loss": 4.6947, "step": 428 }, { "epoch": 0.01151245169600687, "grad_norm": 2.515625, "learning_rate": 6.891184757815644e-05, "loss": 4.5007, "step": 429 }, { "epoch": 0.011539287247745814, "grad_norm": 2.59375, "learning_rate": 6.907285656782503e-05, "loss": 4.5577, "step": 430 }, { "epoch": 0.011566122799484758, "grad_norm": 2.703125, "learning_rate": 6.923386555749362e-05, "loss": 4.5661, "step": 431 }, { "epoch": 0.011592958351223702, "grad_norm": 2.3125, "learning_rate": 6.939487454716221e-05, "loss": 4.564, "step": 432 }, { "epoch": 0.011619793902962645, "grad_norm": 2.46875, "learning_rate": 6.95558835368308e-05, "loss": 4.564, "step": 433 }, { "epoch": 0.01164662945470159, "grad_norm": 2.34375, "learning_rate": 6.971689252649938e-05, "loss": 4.6543, "step": 434 }, { "epoch": 0.011673465006440533, "grad_norm": 3.765625, "learning_rate": 6.987790151616799e-05, "loss": 4.5175, "step": 435 }, { "epoch": 0.011700300558179477, "grad_norm": 2.78125, "learning_rate": 7.003891050583658e-05, "loss": 4.5579, "step": 436 }, { "epoch": 0.01172713610991842, "grad_norm": 3.046875, "learning_rate": 7.019991949550516e-05, "loss": 4.7076, "step": 437 }, { "epoch": 0.011753971661657365, "grad_norm": 3.28125, "learning_rate": 7.036092848517375e-05, "loss": 4.583, "step": 438 }, { "epoch": 0.011780807213396307, "grad_norm": 2.75, "learning_rate": 7.052193747484234e-05, "loss": 4.7049, "step": 439 }, { "epoch": 0.01180764276513525, "grad_norm": 2.703125, "learning_rate": 7.068294646451093e-05, "loss": 4.4888, "step": 440 }, { "epoch": 0.011834478316874194, "grad_norm": 2.4375, "learning_rate": 7.084395545417952e-05, "loss": 4.6231, "step": 441 }, { "epoch": 0.011861313868613138, "grad_norm": 2.65625, "learning_rate": 7.100496444384811e-05, "loss": 4.5448, "step": 442 }, { "epoch": 0.011888149420352082, "grad_norm": 2.5, "learning_rate": 7.11659734335167e-05, "loss": 4.4687, "step": 443 }, { "epoch": 0.011914984972091026, "grad_norm": 3.515625, "learning_rate": 7.13269824231853e-05, "loss": 4.5169, "step": 444 }, { "epoch": 0.01194182052382997, "grad_norm": 2.53125, "learning_rate": 7.148799141285389e-05, "loss": 4.4529, "step": 445 }, { "epoch": 0.011968656075568913, "grad_norm": 2.515625, "learning_rate": 7.164900040252248e-05, "loss": 4.5311, "step": 446 }, { "epoch": 0.011995491627307857, "grad_norm": 2.734375, "learning_rate": 7.181000939219106e-05, "loss": 4.5068, "step": 447 }, { "epoch": 0.012022327179046801, "grad_norm": 3.234375, "learning_rate": 7.197101838185964e-05, "loss": 4.5308, "step": 448 }, { "epoch": 0.012049162730785745, "grad_norm": 2.765625, "learning_rate": 7.213202737152823e-05, "loss": 4.6135, "step": 449 }, { "epoch": 0.012075998282524689, "grad_norm": 2.921875, "learning_rate": 7.229303636119683e-05, "loss": 4.5065, "step": 450 }, { "epoch": 0.012102833834263632, "grad_norm": 2.59375, "learning_rate": 7.245404535086542e-05, "loss": 4.6519, "step": 451 }, { "epoch": 0.012129669386002576, "grad_norm": 2.875, "learning_rate": 7.261505434053401e-05, "loss": 4.5142, "step": 452 }, { "epoch": 0.01215650493774152, "grad_norm": 2.84375, "learning_rate": 7.27760633302026e-05, "loss": 4.6106, "step": 453 }, { "epoch": 0.012183340489480464, "grad_norm": 3.375, "learning_rate": 7.293707231987118e-05, "loss": 4.4563, "step": 454 }, { "epoch": 0.012210176041219408, "grad_norm": 2.921875, "learning_rate": 7.309808130953977e-05, "loss": 4.5274, "step": 455 }, { "epoch": 0.012237011592958352, "grad_norm": 2.890625, "learning_rate": 7.325909029920836e-05, "loss": 4.5655, "step": 456 }, { "epoch": 0.012263847144697295, "grad_norm": 2.875, "learning_rate": 7.342009928887695e-05, "loss": 4.434, "step": 457 }, { "epoch": 0.01229068269643624, "grad_norm": 2.640625, "learning_rate": 7.358110827854554e-05, "loss": 4.4595, "step": 458 }, { "epoch": 0.012317518248175183, "grad_norm": 3.15625, "learning_rate": 7.374211726821414e-05, "loss": 4.5182, "step": 459 }, { "epoch": 0.012344353799914127, "grad_norm": 2.4375, "learning_rate": 7.390312625788273e-05, "loss": 4.42, "step": 460 }, { "epoch": 0.01237118935165307, "grad_norm": 2.71875, "learning_rate": 7.406413524755132e-05, "loss": 4.4532, "step": 461 }, { "epoch": 0.012398024903392015, "grad_norm": 2.796875, "learning_rate": 7.422514423721991e-05, "loss": 4.4388, "step": 462 }, { "epoch": 0.012424860455130958, "grad_norm": 2.96875, "learning_rate": 7.43861532268885e-05, "loss": 4.4623, "step": 463 }, { "epoch": 0.0124516960068699, "grad_norm": 2.578125, "learning_rate": 7.454716221655708e-05, "loss": 4.1937, "step": 464 }, { "epoch": 0.012478531558608844, "grad_norm": 2.765625, "learning_rate": 7.470817120622567e-05, "loss": 4.5488, "step": 465 }, { "epoch": 0.012505367110347788, "grad_norm": 2.390625, "learning_rate": 7.486918019589426e-05, "loss": 4.2779, "step": 466 }, { "epoch": 0.012532202662086732, "grad_norm": 3.453125, "learning_rate": 7.503018918556285e-05, "loss": 4.354, "step": 467 }, { "epoch": 0.012559038213825676, "grad_norm": 2.3125, "learning_rate": 7.519119817523144e-05, "loss": 4.4866, "step": 468 }, { "epoch": 0.01258587376556462, "grad_norm": 2.546875, "learning_rate": 7.535220716490004e-05, "loss": 4.6121, "step": 469 }, { "epoch": 0.012612709317303563, "grad_norm": 2.34375, "learning_rate": 7.551321615456862e-05, "loss": 4.5868, "step": 470 }, { "epoch": 0.012639544869042507, "grad_norm": 3.0625, "learning_rate": 7.567422514423722e-05, "loss": 4.6287, "step": 471 }, { "epoch": 0.012666380420781451, "grad_norm": 2.796875, "learning_rate": 7.58352341339058e-05, "loss": 4.4978, "step": 472 }, { "epoch": 0.012693215972520395, "grad_norm": 2.859375, "learning_rate": 7.59962431235744e-05, "loss": 4.4871, "step": 473 }, { "epoch": 0.012720051524259339, "grad_norm": 2.375, "learning_rate": 7.615725211324298e-05, "loss": 4.3796, "step": 474 }, { "epoch": 0.012746887075998282, "grad_norm": 2.78125, "learning_rate": 7.631826110291157e-05, "loss": 4.5418, "step": 475 }, { "epoch": 0.012773722627737226, "grad_norm": 2.5625, "learning_rate": 7.647927009258016e-05, "loss": 4.5576, "step": 476 }, { "epoch": 0.01280055817947617, "grad_norm": 2.4375, "learning_rate": 7.664027908224875e-05, "loss": 4.4139, "step": 477 }, { "epoch": 0.012827393731215114, "grad_norm": 2.359375, "learning_rate": 7.680128807191734e-05, "loss": 4.3898, "step": 478 }, { "epoch": 0.012854229282954058, "grad_norm": 2.234375, "learning_rate": 7.696229706158594e-05, "loss": 4.4362, "step": 479 }, { "epoch": 0.012881064834693002, "grad_norm": 2.5, "learning_rate": 7.712330605125452e-05, "loss": 4.3359, "step": 480 }, { "epoch": 0.012907900386431945, "grad_norm": 2.28125, "learning_rate": 7.728431504092312e-05, "loss": 4.4681, "step": 481 }, { "epoch": 0.01293473593817089, "grad_norm": 2.453125, "learning_rate": 7.74453240305917e-05, "loss": 4.468, "step": 482 }, { "epoch": 0.012961571489909833, "grad_norm": 2.375, "learning_rate": 7.76063330202603e-05, "loss": 4.3246, "step": 483 }, { "epoch": 0.012988407041648777, "grad_norm": 3.0625, "learning_rate": 7.776734200992888e-05, "loss": 4.5308, "step": 484 }, { "epoch": 0.01301524259338772, "grad_norm": 2.625, "learning_rate": 7.792835099959747e-05, "loss": 4.3174, "step": 485 }, { "epoch": 0.013042078145126665, "grad_norm": 2.828125, "learning_rate": 7.808935998926606e-05, "loss": 4.2576, "step": 486 }, { "epoch": 0.013068913696865608, "grad_norm": 2.59375, "learning_rate": 7.825036897893466e-05, "loss": 4.279, "step": 487 }, { "epoch": 0.013095749248604552, "grad_norm": 2.515625, "learning_rate": 7.841137796860324e-05, "loss": 4.4279, "step": 488 }, { "epoch": 0.013122584800343494, "grad_norm": 2.8125, "learning_rate": 7.857238695827184e-05, "loss": 4.3741, "step": 489 }, { "epoch": 0.013149420352082438, "grad_norm": 2.609375, "learning_rate": 7.873339594794042e-05, "loss": 4.358, "step": 490 }, { "epoch": 0.013176255903821382, "grad_norm": 2.28125, "learning_rate": 7.8894404937609e-05, "loss": 4.4323, "step": 491 }, { "epoch": 0.013203091455560326, "grad_norm": 5.0, "learning_rate": 7.90554139272776e-05, "loss": 4.5566, "step": 492 }, { "epoch": 0.01322992700729927, "grad_norm": 3.875, "learning_rate": 7.921642291694618e-05, "loss": 4.5175, "step": 493 }, { "epoch": 0.013256762559038213, "grad_norm": 5.75, "learning_rate": 7.937743190661478e-05, "loss": 4.542, "step": 494 }, { "epoch": 0.013283598110777157, "grad_norm": 4.4375, "learning_rate": 7.953844089628336e-05, "loss": 4.7196, "step": 495 }, { "epoch": 0.013310433662516101, "grad_norm": 3.328125, "learning_rate": 7.969944988595196e-05, "loss": 4.555, "step": 496 }, { "epoch": 0.013337269214255045, "grad_norm": 3.578125, "learning_rate": 7.986045887562054e-05, "loss": 4.4852, "step": 497 }, { "epoch": 0.013364104765993989, "grad_norm": 2.515625, "learning_rate": 8.002146786528914e-05, "loss": 4.415, "step": 498 }, { "epoch": 0.013390940317732932, "grad_norm": 2.421875, "learning_rate": 8.018247685495773e-05, "loss": 4.5358, "step": 499 }, { "epoch": 0.013417775869471876, "grad_norm": 2.453125, "learning_rate": 8.034348584462632e-05, "loss": 4.4615, "step": 500 }, { "epoch": 0.01344461142121082, "grad_norm": 2.875, "learning_rate": 8.05044948342949e-05, "loss": 4.4845, "step": 501 }, { "epoch": 0.013471446972949764, "grad_norm": 2.296875, "learning_rate": 8.066550382396351e-05, "loss": 4.4389, "step": 502 }, { "epoch": 0.013498282524688708, "grad_norm": 2.453125, "learning_rate": 8.082651281363208e-05, "loss": 4.416, "step": 503 }, { "epoch": 0.013525118076427652, "grad_norm": 2.09375, "learning_rate": 8.098752180330068e-05, "loss": 4.3914, "step": 504 }, { "epoch": 0.013551953628166595, "grad_norm": 2.15625, "learning_rate": 8.114853079296926e-05, "loss": 4.3806, "step": 505 }, { "epoch": 0.01357878917990554, "grad_norm": 2.234375, "learning_rate": 8.130953978263786e-05, "loss": 4.3899, "step": 506 }, { "epoch": 0.013605624731644483, "grad_norm": 2.4375, "learning_rate": 8.147054877230644e-05, "loss": 4.5386, "step": 507 }, { "epoch": 0.013632460283383427, "grad_norm": 2.015625, "learning_rate": 8.163155776197504e-05, "loss": 4.5109, "step": 508 }, { "epoch": 0.01365929583512237, "grad_norm": 2.28125, "learning_rate": 8.179256675164363e-05, "loss": 4.4506, "step": 509 }, { "epoch": 0.013686131386861315, "grad_norm": 2.609375, "learning_rate": 8.195357574131222e-05, "loss": 4.5787, "step": 510 }, { "epoch": 0.013712966938600258, "grad_norm": 2.328125, "learning_rate": 8.21145847309808e-05, "loss": 4.3328, "step": 511 }, { "epoch": 0.013739802490339202, "grad_norm": 2.109375, "learning_rate": 8.227559372064941e-05, "loss": 4.3539, "step": 512 }, { "epoch": 0.013766638042078146, "grad_norm": 2.296875, "learning_rate": 8.243660271031798e-05, "loss": 4.45, "step": 513 }, { "epoch": 0.013793473593817088, "grad_norm": 2.140625, "learning_rate": 8.259761169998658e-05, "loss": 4.3817, "step": 514 }, { "epoch": 0.013820309145556032, "grad_norm": 2.09375, "learning_rate": 8.275862068965516e-05, "loss": 4.3874, "step": 515 }, { "epoch": 0.013847144697294976, "grad_norm": 2.203125, "learning_rate": 8.291962967932376e-05, "loss": 4.4214, "step": 516 }, { "epoch": 0.01387398024903392, "grad_norm": 2.390625, "learning_rate": 8.308063866899235e-05, "loss": 4.509, "step": 517 }, { "epoch": 0.013900815800772863, "grad_norm": 2.390625, "learning_rate": 8.324164765866094e-05, "loss": 4.2456, "step": 518 }, { "epoch": 0.013927651352511807, "grad_norm": 2.25, "learning_rate": 8.340265664832953e-05, "loss": 4.4398, "step": 519 }, { "epoch": 0.013954486904250751, "grad_norm": 2.46875, "learning_rate": 8.35636656379981e-05, "loss": 4.3686, "step": 520 }, { "epoch": 0.013981322455989695, "grad_norm": 2.28125, "learning_rate": 8.37246746276667e-05, "loss": 4.3074, "step": 521 }, { "epoch": 0.014008158007728639, "grad_norm": 2.625, "learning_rate": 8.388568361733528e-05, "loss": 4.4538, "step": 522 }, { "epoch": 0.014034993559467582, "grad_norm": 2.09375, "learning_rate": 8.404669260700388e-05, "loss": 4.2823, "step": 523 }, { "epoch": 0.014061829111206526, "grad_norm": 2.1875, "learning_rate": 8.420770159667247e-05, "loss": 4.3328, "step": 524 }, { "epoch": 0.01408866466294547, "grad_norm": 2.328125, "learning_rate": 8.436871058634106e-05, "loss": 4.2065, "step": 525 }, { "epoch": 0.014115500214684414, "grad_norm": 2.546875, "learning_rate": 8.452971957600965e-05, "loss": 4.2988, "step": 526 }, { "epoch": 0.014142335766423358, "grad_norm": 2.125, "learning_rate": 8.469072856567825e-05, "loss": 4.3695, "step": 527 }, { "epoch": 0.014169171318162302, "grad_norm": 2.328125, "learning_rate": 8.485173755534683e-05, "loss": 4.2152, "step": 528 }, { "epoch": 0.014196006869901245, "grad_norm": 2.140625, "learning_rate": 8.501274654501543e-05, "loss": 4.3909, "step": 529 }, { "epoch": 0.01422284242164019, "grad_norm": 2.390625, "learning_rate": 8.5173755534684e-05, "loss": 4.3848, "step": 530 }, { "epoch": 0.014249677973379133, "grad_norm": 2.0625, "learning_rate": 8.53347645243526e-05, "loss": 4.3558, "step": 531 }, { "epoch": 0.014276513525118077, "grad_norm": 2.15625, "learning_rate": 8.54957735140212e-05, "loss": 4.2749, "step": 532 }, { "epoch": 0.01430334907685702, "grad_norm": 1.953125, "learning_rate": 8.565678250368978e-05, "loss": 4.4349, "step": 533 }, { "epoch": 0.014330184628595964, "grad_norm": 2.046875, "learning_rate": 8.581779149335837e-05, "loss": 4.2349, "step": 534 }, { "epoch": 0.014357020180334908, "grad_norm": 2.546875, "learning_rate": 8.597880048302696e-05, "loss": 4.4174, "step": 535 }, { "epoch": 0.014383855732073852, "grad_norm": 2.625, "learning_rate": 8.613980947269555e-05, "loss": 4.3755, "step": 536 }, { "epoch": 0.014410691283812796, "grad_norm": 2.640625, "learning_rate": 8.630081846236415e-05, "loss": 4.3991, "step": 537 }, { "epoch": 0.01443752683555174, "grad_norm": 2.296875, "learning_rate": 8.646182745203273e-05, "loss": 4.29, "step": 538 }, { "epoch": 0.014464362387290682, "grad_norm": 3.015625, "learning_rate": 8.662283644170133e-05, "loss": 4.3604, "step": 539 }, { "epoch": 0.014491197939029626, "grad_norm": 2.3125, "learning_rate": 8.67838454313699e-05, "loss": 4.3343, "step": 540 }, { "epoch": 0.01451803349076857, "grad_norm": 2.890625, "learning_rate": 8.69448544210385e-05, "loss": 4.2301, "step": 541 }, { "epoch": 0.014544869042507513, "grad_norm": 2.390625, "learning_rate": 8.71058634107071e-05, "loss": 4.1963, "step": 542 }, { "epoch": 0.014571704594246457, "grad_norm": 2.984375, "learning_rate": 8.726687240037568e-05, "loss": 4.3855, "step": 543 }, { "epoch": 0.014598540145985401, "grad_norm": 2.765625, "learning_rate": 8.742788139004427e-05, "loss": 4.2502, "step": 544 }, { "epoch": 0.014625375697724345, "grad_norm": 2.375, "learning_rate": 8.758889037971287e-05, "loss": 4.3246, "step": 545 }, { "epoch": 0.014652211249463289, "grad_norm": 2.390625, "learning_rate": 8.774989936938145e-05, "loss": 4.1753, "step": 546 }, { "epoch": 0.014679046801202232, "grad_norm": 2.5, "learning_rate": 8.791090835905005e-05, "loss": 4.2728, "step": 547 }, { "epoch": 0.014705882352941176, "grad_norm": 1.9765625, "learning_rate": 8.807191734871863e-05, "loss": 4.2105, "step": 548 }, { "epoch": 0.01473271790468012, "grad_norm": 2.078125, "learning_rate": 8.823292633838721e-05, "loss": 4.3461, "step": 549 }, { "epoch": 0.014759553456419064, "grad_norm": 2.234375, "learning_rate": 8.83939353280558e-05, "loss": 4.239, "step": 550 }, { "epoch": 0.014786389008158008, "grad_norm": 2.78125, "learning_rate": 8.855494431772439e-05, "loss": 4.3034, "step": 551 }, { "epoch": 0.014813224559896952, "grad_norm": 2.015625, "learning_rate": 8.8715953307393e-05, "loss": 4.2424, "step": 552 }, { "epoch": 0.014840060111635895, "grad_norm": 2.078125, "learning_rate": 8.887696229706157e-05, "loss": 4.2663, "step": 553 }, { "epoch": 0.01486689566337484, "grad_norm": 1.9375, "learning_rate": 8.903797128673017e-05, "loss": 4.0947, "step": 554 }, { "epoch": 0.014893731215113783, "grad_norm": 2.21875, "learning_rate": 8.919898027639875e-05, "loss": 4.3234, "step": 555 }, { "epoch": 0.014920566766852727, "grad_norm": 2.015625, "learning_rate": 8.935998926606735e-05, "loss": 4.2684, "step": 556 }, { "epoch": 0.01494740231859167, "grad_norm": 2.078125, "learning_rate": 8.952099825573594e-05, "loss": 4.284, "step": 557 }, { "epoch": 0.014974237870330614, "grad_norm": 2.109375, "learning_rate": 8.968200724540453e-05, "loss": 4.2283, "step": 558 }, { "epoch": 0.015001073422069558, "grad_norm": 2.140625, "learning_rate": 8.984301623507311e-05, "loss": 4.1861, "step": 559 }, { "epoch": 0.015027908973808502, "grad_norm": 2.34375, "learning_rate": 9.000402522474172e-05, "loss": 4.3193, "step": 560 }, { "epoch": 0.015054744525547446, "grad_norm": 2.0625, "learning_rate": 9.016503421441029e-05, "loss": 4.066, "step": 561 }, { "epoch": 0.01508158007728639, "grad_norm": 2.375, "learning_rate": 9.03260432040789e-05, "loss": 4.1329, "step": 562 }, { "epoch": 0.015108415629025334, "grad_norm": 1.921875, "learning_rate": 9.048705219374747e-05, "loss": 4.1746, "step": 563 }, { "epoch": 0.015135251180764276, "grad_norm": 2.359375, "learning_rate": 9.064806118341607e-05, "loss": 4.1191, "step": 564 }, { "epoch": 0.01516208673250322, "grad_norm": 1.859375, "learning_rate": 9.080907017308465e-05, "loss": 4.1968, "step": 565 }, { "epoch": 0.015188922284242163, "grad_norm": 2.1875, "learning_rate": 9.097007916275325e-05, "loss": 4.334, "step": 566 }, { "epoch": 0.015215757835981107, "grad_norm": 1.9375, "learning_rate": 9.113108815242184e-05, "loss": 4.2797, "step": 567 }, { "epoch": 0.015242593387720051, "grad_norm": 1.8203125, "learning_rate": 9.129209714209043e-05, "loss": 4.2597, "step": 568 }, { "epoch": 0.015269428939458995, "grad_norm": 2.1875, "learning_rate": 9.145310613175901e-05, "loss": 4.2859, "step": 569 }, { "epoch": 0.015296264491197939, "grad_norm": 2.34375, "learning_rate": 9.161411512142762e-05, "loss": 4.3066, "step": 570 }, { "epoch": 0.015323100042936882, "grad_norm": 2.265625, "learning_rate": 9.177512411109619e-05, "loss": 4.2941, "step": 571 }, { "epoch": 0.015349935594675826, "grad_norm": 2.21875, "learning_rate": 9.19361331007648e-05, "loss": 4.3608, "step": 572 }, { "epoch": 0.01537677114641477, "grad_norm": 2.375, "learning_rate": 9.209714209043337e-05, "loss": 4.1801, "step": 573 }, { "epoch": 0.015403606698153714, "grad_norm": 1.75, "learning_rate": 9.225815108010197e-05, "loss": 4.1517, "step": 574 }, { "epoch": 0.015430442249892658, "grad_norm": 2.15625, "learning_rate": 9.241916006977056e-05, "loss": 4.2347, "step": 575 }, { "epoch": 0.015457277801631602, "grad_norm": 2.09375, "learning_rate": 9.258016905943915e-05, "loss": 4.194, "step": 576 }, { "epoch": 0.015484113353370545, "grad_norm": 2.140625, "learning_rate": 9.274117804910774e-05, "loss": 4.328, "step": 577 }, { "epoch": 0.01551094890510949, "grad_norm": 2.09375, "learning_rate": 9.290218703877631e-05, "loss": 4.1629, "step": 578 }, { "epoch": 0.015537784456848433, "grad_norm": 1.9140625, "learning_rate": 9.306319602844491e-05, "loss": 4.1435, "step": 579 }, { "epoch": 0.015564620008587377, "grad_norm": 1.8828125, "learning_rate": 9.322420501811349e-05, "loss": 4.2625, "step": 580 }, { "epoch": 0.01559145556032632, "grad_norm": 1.9375, "learning_rate": 9.338521400778209e-05, "loss": 4.1197, "step": 581 }, { "epoch": 0.015618291112065264, "grad_norm": 2.296875, "learning_rate": 9.354622299745068e-05, "loss": 4.1133, "step": 582 }, { "epoch": 0.01564512666380421, "grad_norm": 1.9921875, "learning_rate": 9.370723198711927e-05, "loss": 4.1078, "step": 583 }, { "epoch": 0.01567196221554315, "grad_norm": 2.09375, "learning_rate": 9.386824097678786e-05, "loss": 4.1272, "step": 584 }, { "epoch": 0.015698797767282096, "grad_norm": 2.125, "learning_rate": 9.402924996645646e-05, "loss": 4.1002, "step": 585 }, { "epoch": 0.015725633319021038, "grad_norm": 1.9921875, "learning_rate": 9.419025895612503e-05, "loss": 4.2743, "step": 586 }, { "epoch": 0.015752468870759984, "grad_norm": 2.140625, "learning_rate": 9.435126794579364e-05, "loss": 4.1157, "step": 587 }, { "epoch": 0.015779304422498926, "grad_norm": 3.5, "learning_rate": 9.451227693546221e-05, "loss": 4.2077, "step": 588 }, { "epoch": 0.01580613997423787, "grad_norm": 3.359375, "learning_rate": 9.467328592513081e-05, "loss": 4.2435, "step": 589 }, { "epoch": 0.015832975525976813, "grad_norm": 3.3125, "learning_rate": 9.48342949147994e-05, "loss": 4.2211, "step": 590 }, { "epoch": 0.01585981107771576, "grad_norm": 2.75, "learning_rate": 9.499530390446799e-05, "loss": 4.2792, "step": 591 }, { "epoch": 0.0158866466294547, "grad_norm": 2.8125, "learning_rate": 9.515631289413658e-05, "loss": 4.2218, "step": 592 }, { "epoch": 0.015913482181193647, "grad_norm": 2.484375, "learning_rate": 9.531732188380518e-05, "loss": 4.3444, "step": 593 }, { "epoch": 0.01594031773293259, "grad_norm": 1.8984375, "learning_rate": 9.547833087347376e-05, "loss": 4.3018, "step": 594 }, { "epoch": 0.015967153284671534, "grad_norm": 2.125, "learning_rate": 9.563933986314236e-05, "loss": 4.332, "step": 595 }, { "epoch": 0.015993988836410476, "grad_norm": 2.15625, "learning_rate": 9.580034885281093e-05, "loss": 4.0887, "step": 596 }, { "epoch": 0.016020824388149422, "grad_norm": 1.96875, "learning_rate": 9.596135784247954e-05, "loss": 4.1912, "step": 597 }, { "epoch": 0.016047659939888364, "grad_norm": 2.40625, "learning_rate": 9.612236683214811e-05, "loss": 4.1784, "step": 598 }, { "epoch": 0.01607449549162731, "grad_norm": 2.09375, "learning_rate": 9.628337582181671e-05, "loss": 4.0256, "step": 599 }, { "epoch": 0.01610133104336625, "grad_norm": 2.265625, "learning_rate": 9.64443848114853e-05, "loss": 4.0185, "step": 600 }, { "epoch": 0.016128166595105197, "grad_norm": 2.109375, "learning_rate": 9.660539380115389e-05, "loss": 4.1502, "step": 601 }, { "epoch": 0.01615500214684414, "grad_norm": 2.015625, "learning_rate": 9.676640279082248e-05, "loss": 4.1812, "step": 602 }, { "epoch": 0.01618183769858308, "grad_norm": 2.015625, "learning_rate": 9.692741178049108e-05, "loss": 4.0783, "step": 603 }, { "epoch": 0.016208673250322027, "grad_norm": 2.34375, "learning_rate": 9.708842077015966e-05, "loss": 4.1562, "step": 604 }, { "epoch": 0.01623550880206097, "grad_norm": 1.8515625, "learning_rate": 9.724942975982826e-05, "loss": 4.1456, "step": 605 }, { "epoch": 0.016262344353799914, "grad_norm": 2.25, "learning_rate": 9.741043874949683e-05, "loss": 4.198, "step": 606 }, { "epoch": 0.016289179905538857, "grad_norm": 1.875, "learning_rate": 9.757144773916542e-05, "loss": 4.1699, "step": 607 }, { "epoch": 0.016316015457277802, "grad_norm": 2.21875, "learning_rate": 9.773245672883403e-05, "loss": 4.1647, "step": 608 }, { "epoch": 0.016342851009016744, "grad_norm": 1.859375, "learning_rate": 9.78934657185026e-05, "loss": 4.2288, "step": 609 }, { "epoch": 0.01636968656075569, "grad_norm": 1.9921875, "learning_rate": 9.80544747081712e-05, "loss": 4.2107, "step": 610 }, { "epoch": 0.016396522112494632, "grad_norm": 1.953125, "learning_rate": 9.821548369783978e-05, "loss": 4.2672, "step": 611 }, { "epoch": 0.016423357664233577, "grad_norm": 2.015625, "learning_rate": 9.837649268750838e-05, "loss": 4.1112, "step": 612 }, { "epoch": 0.01645019321597252, "grad_norm": 1.8046875, "learning_rate": 9.853750167717696e-05, "loss": 4.1257, "step": 613 }, { "epoch": 0.016477028767711465, "grad_norm": 1.8828125, "learning_rate": 9.869851066684556e-05, "loss": 4.1892, "step": 614 }, { "epoch": 0.016503864319450407, "grad_norm": 1.8359375, "learning_rate": 9.885951965651415e-05, "loss": 4.0687, "step": 615 }, { "epoch": 0.016530699871189353, "grad_norm": 1.9296875, "learning_rate": 9.902052864618273e-05, "loss": 4.1844, "step": 616 }, { "epoch": 0.016557535422928295, "grad_norm": 1.8046875, "learning_rate": 9.918153763585132e-05, "loss": 4.0546, "step": 617 }, { "epoch": 0.01658437097466724, "grad_norm": 2.0, "learning_rate": 9.934254662551993e-05, "loss": 4.0749, "step": 618 }, { "epoch": 0.016611206526406182, "grad_norm": 2.21875, "learning_rate": 9.95035556151885e-05, "loss": 4.1661, "step": 619 }, { "epoch": 0.016638042078145128, "grad_norm": 2.140625, "learning_rate": 9.96645646048571e-05, "loss": 4.1366, "step": 620 }, { "epoch": 0.01666487762988407, "grad_norm": 2.296875, "learning_rate": 9.982557359452568e-05, "loss": 4.0851, "step": 621 }, { "epoch": 0.016691713181623016, "grad_norm": 2.0625, "learning_rate": 9.998658258419428e-05, "loss": 4.0881, "step": 622 }, { "epoch": 0.016718548733361958, "grad_norm": 1.8359375, "learning_rate": 0.00010014759157386287, "loss": 4.1648, "step": 623 }, { "epoch": 0.016745384285100903, "grad_norm": 2.015625, "learning_rate": 0.00010030860056353146, "loss": 4.0939, "step": 624 }, { "epoch": 0.016772219836839845, "grad_norm": 1.984375, "learning_rate": 0.00010046960955320005, "loss": 4.163, "step": 625 }, { "epoch": 0.01679905538857879, "grad_norm": 2.140625, "learning_rate": 0.00010063061854286863, "loss": 4.1306, "step": 626 }, { "epoch": 0.016825890940317733, "grad_norm": 1.7734375, "learning_rate": 0.00010079162753253722, "loss": 4.1816, "step": 627 }, { "epoch": 0.016852726492056675, "grad_norm": 1.8046875, "learning_rate": 0.00010095263652220583, "loss": 4.0524, "step": 628 }, { "epoch": 0.01687956204379562, "grad_norm": 1.8515625, "learning_rate": 0.0001011136455118744, "loss": 4.1276, "step": 629 }, { "epoch": 0.016906397595534563, "grad_norm": 1.796875, "learning_rate": 0.000101274654501543, "loss": 4.1439, "step": 630 }, { "epoch": 0.01693323314727351, "grad_norm": 1.7734375, "learning_rate": 0.00010143566349121158, "loss": 3.959, "step": 631 }, { "epoch": 0.01696006869901245, "grad_norm": 1.9375, "learning_rate": 0.00010159667248088018, "loss": 4.0194, "step": 632 }, { "epoch": 0.016986904250751396, "grad_norm": 1.7265625, "learning_rate": 0.00010175768147054877, "loss": 3.8318, "step": 633 }, { "epoch": 0.017013739802490338, "grad_norm": 1.953125, "learning_rate": 0.00010191869046021736, "loss": 4.1163, "step": 634 }, { "epoch": 0.017040575354229284, "grad_norm": 1.7578125, "learning_rate": 0.00010207969944988595, "loss": 4.1028, "step": 635 }, { "epoch": 0.017067410905968226, "grad_norm": 1.78125, "learning_rate": 0.00010224070843955452, "loss": 3.8749, "step": 636 }, { "epoch": 0.01709424645770717, "grad_norm": 1.703125, "learning_rate": 0.00010240171742922312, "loss": 4.1206, "step": 637 }, { "epoch": 0.017121082009446113, "grad_norm": 1.890625, "learning_rate": 0.00010256272641889171, "loss": 4.0014, "step": 638 }, { "epoch": 0.01714791756118506, "grad_norm": 1.859375, "learning_rate": 0.0001027237354085603, "loss": 4.0073, "step": 639 }, { "epoch": 0.017174753112924, "grad_norm": 1.9296875, "learning_rate": 0.00010288474439822889, "loss": 4.1075, "step": 640 }, { "epoch": 0.017201588664662947, "grad_norm": 1.6875, "learning_rate": 0.00010304575338789748, "loss": 4.0358, "step": 641 }, { "epoch": 0.01722842421640189, "grad_norm": 2.078125, "learning_rate": 0.00010320676237756607, "loss": 4.1089, "step": 642 }, { "epoch": 0.017255259768140834, "grad_norm": 1.7734375, "learning_rate": 0.00010336777136723467, "loss": 4.0175, "step": 643 }, { "epoch": 0.017282095319879776, "grad_norm": 1.8515625, "learning_rate": 0.00010352878035690324, "loss": 4.0349, "step": 644 }, { "epoch": 0.017308930871618722, "grad_norm": 1.75, "learning_rate": 0.00010368978934657185, "loss": 3.9894, "step": 645 }, { "epoch": 0.017335766423357664, "grad_norm": 1.84375, "learning_rate": 0.00010385079833624042, "loss": 4.0339, "step": 646 }, { "epoch": 0.01736260197509661, "grad_norm": 1.796875, "learning_rate": 0.00010401180732590902, "loss": 3.8787, "step": 647 }, { "epoch": 0.01738943752683555, "grad_norm": 2.140625, "learning_rate": 0.00010417281631557761, "loss": 4.1469, "step": 648 }, { "epoch": 0.017416273078574497, "grad_norm": 1.5, "learning_rate": 0.0001043338253052462, "loss": 4.0341, "step": 649 }, { "epoch": 0.01744310863031344, "grad_norm": 2.171875, "learning_rate": 0.00010449483429491479, "loss": 3.9191, "step": 650 }, { "epoch": 0.017469944182052385, "grad_norm": 1.7265625, "learning_rate": 0.00010465584328458339, "loss": 3.9803, "step": 651 }, { "epoch": 0.017496779733791327, "grad_norm": 1.9296875, "learning_rate": 0.00010481685227425197, "loss": 3.951, "step": 652 }, { "epoch": 0.01752361528553027, "grad_norm": 1.796875, "learning_rate": 0.00010497786126392057, "loss": 3.9556, "step": 653 }, { "epoch": 0.017550450837269214, "grad_norm": 1.8828125, "learning_rate": 0.00010513887025358914, "loss": 4.1431, "step": 654 }, { "epoch": 0.017577286389008157, "grad_norm": 1.7890625, "learning_rate": 0.00010529987924325775, "loss": 3.8261, "step": 655 }, { "epoch": 0.017604121940747102, "grad_norm": 1.59375, "learning_rate": 0.00010546088823292632, "loss": 3.8829, "step": 656 }, { "epoch": 0.017630957492486044, "grad_norm": 1.6953125, "learning_rate": 0.00010562189722259492, "loss": 3.9609, "step": 657 }, { "epoch": 0.01765779304422499, "grad_norm": 1.8671875, "learning_rate": 0.00010578290621226351, "loss": 3.8659, "step": 658 }, { "epoch": 0.017684628595963932, "grad_norm": 1.625, "learning_rate": 0.0001059439152019321, "loss": 3.9399, "step": 659 }, { "epoch": 0.017711464147702877, "grad_norm": 1.859375, "learning_rate": 0.00010610492419160069, "loss": 3.9971, "step": 660 }, { "epoch": 0.01773829969944182, "grad_norm": 2.078125, "learning_rate": 0.00010626593318126929, "loss": 4.0625, "step": 661 }, { "epoch": 0.017765135251180765, "grad_norm": 2.203125, "learning_rate": 0.00010642694217093787, "loss": 4.0462, "step": 662 }, { "epoch": 0.017791970802919707, "grad_norm": 1.8046875, "learning_rate": 0.00010658795116060646, "loss": 3.9608, "step": 663 }, { "epoch": 0.017818806354658653, "grad_norm": 1.765625, "learning_rate": 0.00010674896015027504, "loss": 4.0542, "step": 664 }, { "epoch": 0.017845641906397595, "grad_norm": 1.8359375, "learning_rate": 0.00010690996913994363, "loss": 3.8627, "step": 665 }, { "epoch": 0.01787247745813654, "grad_norm": 1.984375, "learning_rate": 0.00010707097812961223, "loss": 3.9198, "step": 666 }, { "epoch": 0.017899313009875482, "grad_norm": 1.84375, "learning_rate": 0.00010723198711928081, "loss": 4.0862, "step": 667 }, { "epoch": 0.017926148561614428, "grad_norm": 1.90625, "learning_rate": 0.00010739299610894941, "loss": 4.0695, "step": 668 }, { "epoch": 0.01795298411335337, "grad_norm": 1.921875, "learning_rate": 0.00010755400509861799, "loss": 4.0381, "step": 669 }, { "epoch": 0.017979819665092316, "grad_norm": 1.859375, "learning_rate": 0.00010771501408828659, "loss": 4.1479, "step": 670 }, { "epoch": 0.018006655216831258, "grad_norm": 2.078125, "learning_rate": 0.00010787602307795516, "loss": 4.0558, "step": 671 }, { "epoch": 0.018033490768570203, "grad_norm": 1.6953125, "learning_rate": 0.00010803703206762377, "loss": 3.9438, "step": 672 }, { "epoch": 0.018060326320309145, "grad_norm": 2.125, "learning_rate": 0.00010819804105729236, "loss": 3.8781, "step": 673 }, { "epoch": 0.01808716187204809, "grad_norm": 1.7265625, "learning_rate": 0.00010835905004696094, "loss": 3.8264, "step": 674 }, { "epoch": 0.018113997423787033, "grad_norm": 2.0, "learning_rate": 0.00010852005903662953, "loss": 3.9665, "step": 675 }, { "epoch": 0.01814083297552598, "grad_norm": 1.7421875, "learning_rate": 0.00010868106802629813, "loss": 3.8974, "step": 676 }, { "epoch": 0.01816766852726492, "grad_norm": 1.6953125, "learning_rate": 0.00010884207701596671, "loss": 3.8475, "step": 677 }, { "epoch": 0.018194504079003863, "grad_norm": 1.7890625, "learning_rate": 0.00010900308600563531, "loss": 3.917, "step": 678 }, { "epoch": 0.01822133963074281, "grad_norm": 1.6640625, "learning_rate": 0.00010916409499530389, "loss": 3.9185, "step": 679 }, { "epoch": 0.01824817518248175, "grad_norm": 1.765625, "learning_rate": 0.00010932510398497249, "loss": 3.9236, "step": 680 }, { "epoch": 0.018275010734220696, "grad_norm": 1.7734375, "learning_rate": 0.00010948611297464108, "loss": 3.903, "step": 681 }, { "epoch": 0.018301846285959638, "grad_norm": 1.921875, "learning_rate": 0.00010964712196430967, "loss": 3.8746, "step": 682 }, { "epoch": 0.018328681837698584, "grad_norm": 1.6640625, "learning_rate": 0.00010980813095397826, "loss": 3.8932, "step": 683 }, { "epoch": 0.018355517389437526, "grad_norm": 1.78125, "learning_rate": 0.00010996913994364684, "loss": 3.9257, "step": 684 }, { "epoch": 0.01838235294117647, "grad_norm": 1.7421875, "learning_rate": 0.00011013014893331543, "loss": 3.8896, "step": 685 }, { "epoch": 0.018409188492915413, "grad_norm": 2.09375, "learning_rate": 0.00011029115792298403, "loss": 4.0075, "step": 686 }, { "epoch": 0.01843602404465436, "grad_norm": 1.90625, "learning_rate": 0.00011045216691265261, "loss": 3.9142, "step": 687 }, { "epoch": 0.0184628595963933, "grad_norm": 1.984375, "learning_rate": 0.00011061317590232121, "loss": 3.8703, "step": 688 }, { "epoch": 0.018489695148132246, "grad_norm": 1.890625, "learning_rate": 0.00011077418489198979, "loss": 3.8852, "step": 689 }, { "epoch": 0.01851653069987119, "grad_norm": 1.7578125, "learning_rate": 0.00011093519388165839, "loss": 3.7523, "step": 690 }, { "epoch": 0.018543366251610134, "grad_norm": 1.6015625, "learning_rate": 0.00011109620287132698, "loss": 3.8398, "step": 691 }, { "epoch": 0.018570201803349076, "grad_norm": 1.65625, "learning_rate": 0.00011125721186099555, "loss": 3.9213, "step": 692 }, { "epoch": 0.018597037355088022, "grad_norm": 1.7109375, "learning_rate": 0.00011141822085066416, "loss": 3.9916, "step": 693 }, { "epoch": 0.018623872906826964, "grad_norm": 1.984375, "learning_rate": 0.00011157922984033273, "loss": 3.9081, "step": 694 }, { "epoch": 0.01865070845856591, "grad_norm": 1.5859375, "learning_rate": 0.00011174023883000133, "loss": 3.8442, "step": 695 }, { "epoch": 0.01867754401030485, "grad_norm": 2.921875, "learning_rate": 0.00011190124781966992, "loss": 4.0953, "step": 696 }, { "epoch": 0.018704379562043797, "grad_norm": 2.6875, "learning_rate": 0.00011206225680933851, "loss": 3.9448, "step": 697 }, { "epoch": 0.01873121511378274, "grad_norm": 3.21875, "learning_rate": 0.0001122232657990071, "loss": 4.0563, "step": 698 }, { "epoch": 0.018758050665521685, "grad_norm": 2.828125, "learning_rate": 0.00011238427478867569, "loss": 4.1322, "step": 699 }, { "epoch": 0.018784886217260627, "grad_norm": 1.96875, "learning_rate": 0.00011254528377834428, "loss": 4.0156, "step": 700 }, { "epoch": 0.018811721768999572, "grad_norm": 2.390625, "learning_rate": 0.00011270629276801288, "loss": 4.1138, "step": 701 }, { "epoch": 0.018838557320738514, "grad_norm": 1.96875, "learning_rate": 0.00011286730175768145, "loss": 3.7771, "step": 702 }, { "epoch": 0.018865392872477457, "grad_norm": 1.984375, "learning_rate": 0.00011302831074735006, "loss": 4.063, "step": 703 }, { "epoch": 0.018892228424216402, "grad_norm": 1.921875, "learning_rate": 0.00011318931973701863, "loss": 3.9774, "step": 704 }, { "epoch": 0.018919063975955344, "grad_norm": 1.6796875, "learning_rate": 0.00011335032872668723, "loss": 3.9903, "step": 705 }, { "epoch": 0.01894589952769429, "grad_norm": 1.625, "learning_rate": 0.00011351133771635582, "loss": 4.0018, "step": 706 }, { "epoch": 0.018972735079433232, "grad_norm": 1.7109375, "learning_rate": 0.00011367234670602441, "loss": 3.9046, "step": 707 }, { "epoch": 0.018999570631172177, "grad_norm": 1.5, "learning_rate": 0.000113833355695693, "loss": 3.9362, "step": 708 }, { "epoch": 0.01902640618291112, "grad_norm": 1.703125, "learning_rate": 0.0001139943646853616, "loss": 3.9763, "step": 709 }, { "epoch": 0.019053241734650065, "grad_norm": 1.4921875, "learning_rate": 0.00011415537367503018, "loss": 3.9076, "step": 710 }, { "epoch": 0.019080077286389007, "grad_norm": 1.9296875, "learning_rate": 0.00011431638266469878, "loss": 3.9149, "step": 711 }, { "epoch": 0.019106912838127953, "grad_norm": 1.703125, "learning_rate": 0.00011447739165436735, "loss": 3.9214, "step": 712 }, { "epoch": 0.019133748389866895, "grad_norm": 1.7734375, "learning_rate": 0.00011463840064403596, "loss": 3.7553, "step": 713 }, { "epoch": 0.01916058394160584, "grad_norm": 1.671875, "learning_rate": 0.00011479940963370453, "loss": 3.8122, "step": 714 }, { "epoch": 0.019187419493344782, "grad_norm": 1.71875, "learning_rate": 0.00011496041862337313, "loss": 3.9086, "step": 715 }, { "epoch": 0.019214255045083728, "grad_norm": 1.65625, "learning_rate": 0.00011512142761304172, "loss": 4.0382, "step": 716 }, { "epoch": 0.01924109059682267, "grad_norm": 1.5859375, "learning_rate": 0.00011528243660271031, "loss": 3.7901, "step": 717 }, { "epoch": 0.019267926148561616, "grad_norm": 1.484375, "learning_rate": 0.0001154434455923789, "loss": 3.8423, "step": 718 }, { "epoch": 0.019294761700300558, "grad_norm": 1.4921875, "learning_rate": 0.0001156044545820475, "loss": 3.8556, "step": 719 }, { "epoch": 0.019321597252039503, "grad_norm": 1.6328125, "learning_rate": 0.00011576546357171608, "loss": 4.0009, "step": 720 }, { "epoch": 0.019348432803778445, "grad_norm": 1.5078125, "learning_rate": 0.00011592647256138466, "loss": 3.9025, "step": 721 }, { "epoch": 0.01937526835551739, "grad_norm": 1.5625, "learning_rate": 0.00011608748155105325, "loss": 3.9875, "step": 722 }, { "epoch": 0.019402103907256333, "grad_norm": 1.7265625, "learning_rate": 0.00011624849054072184, "loss": 3.9835, "step": 723 }, { "epoch": 0.01942893945899528, "grad_norm": 1.5546875, "learning_rate": 0.00011640949953039044, "loss": 3.9105, "step": 724 }, { "epoch": 0.01945577501073422, "grad_norm": 1.546875, "learning_rate": 0.00011657050852005902, "loss": 3.8931, "step": 725 }, { "epoch": 0.019482610562473166, "grad_norm": 1.484375, "learning_rate": 0.00011673151750972762, "loss": 3.876, "step": 726 }, { "epoch": 0.019509446114212108, "grad_norm": 1.6328125, "learning_rate": 0.0001168925264993962, "loss": 3.6738, "step": 727 }, { "epoch": 0.01953628166595105, "grad_norm": 1.734375, "learning_rate": 0.0001170535354890648, "loss": 3.9665, "step": 728 }, { "epoch": 0.019563117217689996, "grad_norm": 1.53125, "learning_rate": 0.00011721454447873337, "loss": 3.9269, "step": 729 }, { "epoch": 0.019589952769428938, "grad_norm": 1.59375, "learning_rate": 0.00011737555346840198, "loss": 3.8257, "step": 730 }, { "epoch": 0.019616788321167884, "grad_norm": 1.5390625, "learning_rate": 0.00011753656245807056, "loss": 3.8177, "step": 731 }, { "epoch": 0.019643623872906826, "grad_norm": 1.6640625, "learning_rate": 0.00011769757144773915, "loss": 3.8925, "step": 732 }, { "epoch": 0.01967045942464577, "grad_norm": 1.578125, "learning_rate": 0.00011785858043740774, "loss": 3.9386, "step": 733 }, { "epoch": 0.019697294976384713, "grad_norm": 1.8984375, "learning_rate": 0.00011801958942707634, "loss": 3.916, "step": 734 }, { "epoch": 0.01972413052812366, "grad_norm": 1.7578125, "learning_rate": 0.00011818059841674492, "loss": 3.9253, "step": 735 }, { "epoch": 0.0197509660798626, "grad_norm": 1.609375, "learning_rate": 0.00011834160740641352, "loss": 4.0344, "step": 736 }, { "epoch": 0.019777801631601546, "grad_norm": 1.515625, "learning_rate": 0.0001185026163960821, "loss": 3.8834, "step": 737 }, { "epoch": 0.01980463718334049, "grad_norm": 1.671875, "learning_rate": 0.0001186636253857507, "loss": 3.7693, "step": 738 }, { "epoch": 0.019831472735079434, "grad_norm": 1.6484375, "learning_rate": 0.00011882463437541929, "loss": 3.7995, "step": 739 }, { "epoch": 0.019858308286818376, "grad_norm": 1.6875, "learning_rate": 0.00011898564336508788, "loss": 3.7882, "step": 740 }, { "epoch": 0.019885143838557322, "grad_norm": 1.5390625, "learning_rate": 0.00011914665235475646, "loss": 3.8731, "step": 741 }, { "epoch": 0.019911979390296264, "grad_norm": 1.5078125, "learning_rate": 0.00011930766134442505, "loss": 4.0078, "step": 742 }, { "epoch": 0.01993881494203521, "grad_norm": 1.5234375, "learning_rate": 0.00011946867033409364, "loss": 3.7673, "step": 743 }, { "epoch": 0.01996565049377415, "grad_norm": 1.5546875, "learning_rate": 0.00011962967932376224, "loss": 3.7289, "step": 744 }, { "epoch": 0.019992486045513097, "grad_norm": 1.546875, "learning_rate": 0.00011979068831343082, "loss": 3.8609, "step": 745 }, { "epoch": 0.02001932159725204, "grad_norm": 1.4765625, "learning_rate": 0.00011995169730309942, "loss": 3.8262, "step": 746 }, { "epoch": 0.020046157148990985, "grad_norm": 1.65625, "learning_rate": 0.000120112706292768, "loss": 3.7783, "step": 747 }, { "epoch": 0.020072992700729927, "grad_norm": 1.4296875, "learning_rate": 0.0001202737152824366, "loss": 3.8311, "step": 748 }, { "epoch": 0.020099828252468872, "grad_norm": 1.46875, "learning_rate": 0.00012043472427210519, "loss": 3.7866, "step": 749 }, { "epoch": 0.020126663804207814, "grad_norm": 1.46875, "learning_rate": 0.00012059573326177376, "loss": 3.9292, "step": 750 }, { "epoch": 0.02015349935594676, "grad_norm": 1.703125, "learning_rate": 0.00012075674225144236, "loss": 3.8925, "step": 751 }, { "epoch": 0.020180334907685702, "grad_norm": 1.53125, "learning_rate": 0.00012091775124111094, "loss": 3.7173, "step": 752 }, { "epoch": 0.020207170459424644, "grad_norm": 1.5859375, "learning_rate": 0.00012107876023077954, "loss": 3.9768, "step": 753 }, { "epoch": 0.02023400601116359, "grad_norm": 1.6171875, "learning_rate": 0.00012123976922044813, "loss": 3.8293, "step": 754 }, { "epoch": 0.020260841562902532, "grad_norm": 1.5859375, "learning_rate": 0.00012140077821011672, "loss": 3.8684, "step": 755 }, { "epoch": 0.020287677114641477, "grad_norm": 1.625, "learning_rate": 0.00012156178719978531, "loss": 3.8662, "step": 756 }, { "epoch": 0.02031451266638042, "grad_norm": 1.4453125, "learning_rate": 0.0001217227961894539, "loss": 3.5238, "step": 757 }, { "epoch": 0.020341348218119365, "grad_norm": 1.5546875, "learning_rate": 0.00012188380517912248, "loss": 3.6905, "step": 758 }, { "epoch": 0.020368183769858307, "grad_norm": 1.46875, "learning_rate": 0.00012204481416879109, "loss": 3.813, "step": 759 }, { "epoch": 0.020395019321597253, "grad_norm": 1.4609375, "learning_rate": 0.00012220582315845968, "loss": 3.8228, "step": 760 }, { "epoch": 0.020421854873336195, "grad_norm": 1.6484375, "learning_rate": 0.00012236683214812826, "loss": 3.7878, "step": 761 }, { "epoch": 0.02044869042507514, "grad_norm": 1.546875, "learning_rate": 0.00012252784113779685, "loss": 3.9346, "step": 762 }, { "epoch": 0.020475525976814082, "grad_norm": 1.453125, "learning_rate": 0.00012268885012746544, "loss": 3.7281, "step": 763 }, { "epoch": 0.020502361528553028, "grad_norm": 1.5390625, "learning_rate": 0.00012284985911713403, "loss": 3.77, "step": 764 }, { "epoch": 0.02052919708029197, "grad_norm": 1.5078125, "learning_rate": 0.00012301086810680262, "loss": 3.7697, "step": 765 }, { "epoch": 0.020556032632030916, "grad_norm": 1.5234375, "learning_rate": 0.0001231718770964712, "loss": 3.6876, "step": 766 }, { "epoch": 0.020582868183769858, "grad_norm": 1.5703125, "learning_rate": 0.0001233328860861398, "loss": 3.8335, "step": 767 }, { "epoch": 0.020609703735508803, "grad_norm": 1.5, "learning_rate": 0.00012349389507580838, "loss": 3.8154, "step": 768 }, { "epoch": 0.020636539287247745, "grad_norm": 1.5859375, "learning_rate": 0.00012365490406547697, "loss": 3.7705, "step": 769 }, { "epoch": 0.02066337483898669, "grad_norm": 1.5625, "learning_rate": 0.00012381591305514556, "loss": 3.7252, "step": 770 }, { "epoch": 0.020690210390725633, "grad_norm": 1.5, "learning_rate": 0.00012397692204481418, "loss": 3.7944, "step": 771 }, { "epoch": 0.02071704594246458, "grad_norm": 1.578125, "learning_rate": 0.00012413793103448274, "loss": 3.6279, "step": 772 }, { "epoch": 0.02074388149420352, "grad_norm": 1.5859375, "learning_rate": 0.00012429894002415136, "loss": 3.6145, "step": 773 }, { "epoch": 0.020770717045942466, "grad_norm": 1.546875, "learning_rate": 0.00012445994901381992, "loss": 3.7395, "step": 774 }, { "epoch": 0.020797552597681408, "grad_norm": 1.5390625, "learning_rate": 0.00012462095800348853, "loss": 3.8173, "step": 775 }, { "epoch": 0.020824388149420354, "grad_norm": 1.5390625, "learning_rate": 0.0001247819669931571, "loss": 3.7668, "step": 776 }, { "epoch": 0.020851223701159296, "grad_norm": 1.5234375, "learning_rate": 0.0001249429759828257, "loss": 3.7202, "step": 777 }, { "epoch": 0.020878059252898238, "grad_norm": 1.59375, "learning_rate": 0.0001251039849724943, "loss": 3.7571, "step": 778 }, { "epoch": 0.020904894804637184, "grad_norm": 1.453125, "learning_rate": 0.00012526499396216286, "loss": 3.7355, "step": 779 }, { "epoch": 0.020931730356376126, "grad_norm": 1.4921875, "learning_rate": 0.00012542600295183148, "loss": 3.6878, "step": 780 }, { "epoch": 0.02095856590811507, "grad_norm": 1.3984375, "learning_rate": 0.00012558701194150004, "loss": 3.8659, "step": 781 }, { "epoch": 0.020985401459854013, "grad_norm": 1.578125, "learning_rate": 0.00012574802093116865, "loss": 3.7737, "step": 782 }, { "epoch": 0.02101223701159296, "grad_norm": 1.5703125, "learning_rate": 0.00012590902992083724, "loss": 3.8595, "step": 783 }, { "epoch": 0.0210390725633319, "grad_norm": 1.4921875, "learning_rate": 0.00012607003891050583, "loss": 3.6682, "step": 784 }, { "epoch": 0.021065908115070846, "grad_norm": 1.484375, "learning_rate": 0.00012623104790017442, "loss": 3.6801, "step": 785 }, { "epoch": 0.02109274366680979, "grad_norm": 1.5390625, "learning_rate": 0.000126392056889843, "loss": 3.7768, "step": 786 }, { "epoch": 0.021119579218548734, "grad_norm": 1.4375, "learning_rate": 0.0001265530658795116, "loss": 3.804, "step": 787 }, { "epoch": 0.021146414770287676, "grad_norm": 1.5, "learning_rate": 0.00012671407486918018, "loss": 3.7769, "step": 788 }, { "epoch": 0.021173250322026622, "grad_norm": 1.4609375, "learning_rate": 0.00012687508385884877, "loss": 3.7102, "step": 789 }, { "epoch": 0.021200085873765564, "grad_norm": 1.515625, "learning_rate": 0.00012703609284851736, "loss": 3.632, "step": 790 }, { "epoch": 0.02122692142550451, "grad_norm": 1.6640625, "learning_rate": 0.00012719710183818595, "loss": 3.7131, "step": 791 }, { "epoch": 0.02125375697724345, "grad_norm": 1.7734375, "learning_rate": 0.00012735811082785454, "loss": 3.6275, "step": 792 }, { "epoch": 0.021280592528982397, "grad_norm": 1.6484375, "learning_rate": 0.00012751911981752313, "loss": 3.666, "step": 793 }, { "epoch": 0.02130742808072134, "grad_norm": 1.6640625, "learning_rate": 0.00012768012880719172, "loss": 3.7023, "step": 794 }, { "epoch": 0.021334263632460285, "grad_norm": 1.4453125, "learning_rate": 0.0001278411377968603, "loss": 3.7081, "step": 795 }, { "epoch": 0.021361099184199227, "grad_norm": 1.6796875, "learning_rate": 0.00012800214678652892, "loss": 3.6594, "step": 796 }, { "epoch": 0.021387934735938172, "grad_norm": 1.453125, "learning_rate": 0.00012816315577619748, "loss": 3.8365, "step": 797 }, { "epoch": 0.021414770287677114, "grad_norm": 1.6328125, "learning_rate": 0.0001283241647658661, "loss": 3.6116, "step": 798 }, { "epoch": 0.02144160583941606, "grad_norm": 1.5625, "learning_rate": 0.00012848517375553466, "loss": 3.6225, "step": 799 }, { "epoch": 0.021468441391155002, "grad_norm": 1.6171875, "learning_rate": 0.00012864618274520328, "loss": 3.6285, "step": 800 }, { "epoch": 0.021495276942893948, "grad_norm": 1.6875, "learning_rate": 0.00012880719173487186, "loss": 3.6176, "step": 801 }, { "epoch": 0.02152211249463289, "grad_norm": 1.578125, "learning_rate": 0.00012896820072454045, "loss": 3.6587, "step": 802 }, { "epoch": 0.021548948046371832, "grad_norm": 1.53125, "learning_rate": 0.00012912920971420904, "loss": 3.509, "step": 803 }, { "epoch": 0.021575783598110777, "grad_norm": 1.3515625, "learning_rate": 0.00012929021870387763, "loss": 3.6912, "step": 804 }, { "epoch": 0.02160261914984972, "grad_norm": 1.609375, "learning_rate": 0.00012945122769354622, "loss": 3.7108, "step": 805 }, { "epoch": 0.021629454701588665, "grad_norm": 1.375, "learning_rate": 0.0001296122366832148, "loss": 3.5954, "step": 806 }, { "epoch": 0.021656290253327607, "grad_norm": 1.53125, "learning_rate": 0.0001297732456728834, "loss": 3.5902, "step": 807 }, { "epoch": 0.021683125805066553, "grad_norm": 1.3984375, "learning_rate": 0.00012993425466255198, "loss": 3.733, "step": 808 }, { "epoch": 0.021709961356805495, "grad_norm": 1.546875, "learning_rate": 0.00013009526365222057, "loss": 3.6035, "step": 809 }, { "epoch": 0.02173679690854444, "grad_norm": 1.453125, "learning_rate": 0.00013025627264188916, "loss": 3.4776, "step": 810 }, { "epoch": 0.021763632460283382, "grad_norm": 1.4609375, "learning_rate": 0.00013041728163155775, "loss": 3.6476, "step": 811 }, { "epoch": 0.021790468012022328, "grad_norm": 2.390625, "learning_rate": 0.00013057829062122634, "loss": 3.8622, "step": 812 }, { "epoch": 0.02181730356376127, "grad_norm": 2.15625, "learning_rate": 0.00013073929961089493, "loss": 3.8223, "step": 813 }, { "epoch": 0.021844139115500216, "grad_norm": 2.46875, "learning_rate": 0.00013090030860056352, "loss": 3.7552, "step": 814 }, { "epoch": 0.021870974667239158, "grad_norm": 1.8984375, "learning_rate": 0.0001310613175902321, "loss": 3.8323, "step": 815 }, { "epoch": 0.021897810218978103, "grad_norm": 1.7109375, "learning_rate": 0.0001312223265799007, "loss": 3.8051, "step": 816 }, { "epoch": 0.021924645770717045, "grad_norm": 1.671875, "learning_rate": 0.00013138333556956928, "loss": 3.7917, "step": 817 }, { "epoch": 0.02195148132245599, "grad_norm": 1.5234375, "learning_rate": 0.00013154434455923787, "loss": 3.7408, "step": 818 }, { "epoch": 0.021978316874194933, "grad_norm": 1.71875, "learning_rate": 0.00013170535354890646, "loss": 3.5985, "step": 819 }, { "epoch": 0.02200515242593388, "grad_norm": 1.453125, "learning_rate": 0.00013186636253857505, "loss": 3.5918, "step": 820 }, { "epoch": 0.02203198797767282, "grad_norm": 1.453125, "learning_rate": 0.00013202737152824366, "loss": 3.6837, "step": 821 }, { "epoch": 0.022058823529411766, "grad_norm": 1.578125, "learning_rate": 0.00013218838051791223, "loss": 3.8302, "step": 822 }, { "epoch": 0.022085659081150708, "grad_norm": 1.4453125, "learning_rate": 0.00013234938950758084, "loss": 3.685, "step": 823 }, { "epoch": 0.022112494632889654, "grad_norm": 1.5, "learning_rate": 0.0001325103984972494, "loss": 3.6808, "step": 824 }, { "epoch": 0.022139330184628596, "grad_norm": 1.4296875, "learning_rate": 0.00013267140748691802, "loss": 3.7412, "step": 825 }, { "epoch": 0.02216616573636754, "grad_norm": 1.4453125, "learning_rate": 0.0001328324164765866, "loss": 3.9098, "step": 826 }, { "epoch": 0.022193001288106484, "grad_norm": 1.4765625, "learning_rate": 0.0001329934254662552, "loss": 3.8009, "step": 827 }, { "epoch": 0.022219836839845426, "grad_norm": 1.4140625, "learning_rate": 0.00013315443445592378, "loss": 3.6907, "step": 828 }, { "epoch": 0.02224667239158437, "grad_norm": 1.453125, "learning_rate": 0.00013331544344559237, "loss": 3.798, "step": 829 }, { "epoch": 0.022273507943323313, "grad_norm": 1.328125, "learning_rate": 0.00013347645243526096, "loss": 3.6705, "step": 830 }, { "epoch": 0.02230034349506226, "grad_norm": 1.375, "learning_rate": 0.00013363746142492955, "loss": 3.7769, "step": 831 }, { "epoch": 0.0223271790468012, "grad_norm": 1.2734375, "learning_rate": 0.00013379847041459814, "loss": 3.4907, "step": 832 }, { "epoch": 0.022354014598540146, "grad_norm": 1.2734375, "learning_rate": 0.00013395947940426673, "loss": 3.7584, "step": 833 }, { "epoch": 0.02238085015027909, "grad_norm": 1.3203125, "learning_rate": 0.00013412048839393532, "loss": 3.698, "step": 834 }, { "epoch": 0.022407685702018034, "grad_norm": 1.3046875, "learning_rate": 0.0001342814973836039, "loss": 3.7226, "step": 835 }, { "epoch": 0.022434521253756976, "grad_norm": 1.4140625, "learning_rate": 0.0001344425063732725, "loss": 3.6024, "step": 836 }, { "epoch": 0.02246135680549592, "grad_norm": 1.28125, "learning_rate": 0.00013460351536294108, "loss": 3.7024, "step": 837 }, { "epoch": 0.022488192357234864, "grad_norm": 1.28125, "learning_rate": 0.00013476452435260967, "loss": 3.8023, "step": 838 }, { "epoch": 0.02251502790897381, "grad_norm": 1.3046875, "learning_rate": 0.00013492553334227826, "loss": 3.6639, "step": 839 }, { "epoch": 0.02254186346071275, "grad_norm": 1.3125, "learning_rate": 0.00013508654233194685, "loss": 3.5956, "step": 840 }, { "epoch": 0.022568699012451697, "grad_norm": 1.2421875, "learning_rate": 0.00013524755132161544, "loss": 3.5356, "step": 841 }, { "epoch": 0.02259553456419064, "grad_norm": 1.265625, "learning_rate": 0.00013540856031128403, "loss": 3.4729, "step": 842 }, { "epoch": 0.022622370115929585, "grad_norm": 1.34375, "learning_rate": 0.00013556956930095261, "loss": 3.6724, "step": 843 }, { "epoch": 0.022649205667668527, "grad_norm": 1.296875, "learning_rate": 0.00013573057829062123, "loss": 3.8117, "step": 844 }, { "epoch": 0.022676041219407472, "grad_norm": 1.4765625, "learning_rate": 0.0001358915872802898, "loss": 3.691, "step": 845 }, { "epoch": 0.022702876771146414, "grad_norm": 1.4921875, "learning_rate": 0.0001360525962699584, "loss": 3.6375, "step": 846 }, { "epoch": 0.02272971232288536, "grad_norm": 1.3203125, "learning_rate": 0.00013621360525962697, "loss": 3.626, "step": 847 }, { "epoch": 0.022756547874624302, "grad_norm": 1.578125, "learning_rate": 0.00013637461424929558, "loss": 3.6559, "step": 848 }, { "epoch": 0.022783383426363248, "grad_norm": 1.359375, "learning_rate": 0.00013653562323896415, "loss": 3.7787, "step": 849 }, { "epoch": 0.02281021897810219, "grad_norm": 1.5078125, "learning_rate": 0.00013669663222863276, "loss": 3.6212, "step": 850 }, { "epoch": 0.022837054529841135, "grad_norm": 1.3359375, "learning_rate": 0.00013685764121830135, "loss": 3.6257, "step": 851 }, { "epoch": 0.022863890081580077, "grad_norm": 1.3359375, "learning_rate": 0.00013701865020796994, "loss": 3.6205, "step": 852 }, { "epoch": 0.02289072563331902, "grad_norm": 1.46875, "learning_rate": 0.00013717965919763853, "loss": 3.6655, "step": 853 }, { "epoch": 0.022917561185057965, "grad_norm": 1.2734375, "learning_rate": 0.00013734066818730712, "loss": 3.6733, "step": 854 }, { "epoch": 0.022944396736796907, "grad_norm": 1.375, "learning_rate": 0.0001375016771769757, "loss": 3.5772, "step": 855 }, { "epoch": 0.022971232288535853, "grad_norm": 1.3359375, "learning_rate": 0.0001376626861666443, "loss": 3.6064, "step": 856 }, { "epoch": 0.022998067840274795, "grad_norm": 1.4375, "learning_rate": 0.00013782369515631288, "loss": 3.4871, "step": 857 }, { "epoch": 0.02302490339201374, "grad_norm": 1.3046875, "learning_rate": 0.00013798470414598147, "loss": 3.656, "step": 858 }, { "epoch": 0.023051738943752682, "grad_norm": 1.390625, "learning_rate": 0.00013814571313565006, "loss": 3.6185, "step": 859 }, { "epoch": 0.023078574495491628, "grad_norm": 1.3125, "learning_rate": 0.00013830672212531865, "loss": 3.4087, "step": 860 }, { "epoch": 0.02310541004723057, "grad_norm": 1.3359375, "learning_rate": 0.00013846773111498724, "loss": 3.491, "step": 861 }, { "epoch": 0.023132245598969516, "grad_norm": 1.2890625, "learning_rate": 0.00013862874010465583, "loss": 3.5423, "step": 862 }, { "epoch": 0.023159081150708458, "grad_norm": 1.3046875, "learning_rate": 0.00013878974909432441, "loss": 3.6958, "step": 863 }, { "epoch": 0.023185916702447403, "grad_norm": 1.28125, "learning_rate": 0.00013895075808399303, "loss": 3.5555, "step": 864 }, { "epoch": 0.023212752254186345, "grad_norm": 1.3515625, "learning_rate": 0.0001391117670736616, "loss": 3.6985, "step": 865 }, { "epoch": 0.02323958780592529, "grad_norm": 1.3046875, "learning_rate": 0.00013927277606333018, "loss": 3.6734, "step": 866 }, { "epoch": 0.023266423357664233, "grad_norm": 1.2421875, "learning_rate": 0.00013943378505299877, "loss": 3.5644, "step": 867 }, { "epoch": 0.02329325890940318, "grad_norm": 1.53125, "learning_rate": 0.00013959479404266736, "loss": 3.5999, "step": 868 }, { "epoch": 0.02332009446114212, "grad_norm": 1.2109375, "learning_rate": 0.00013975580303233597, "loss": 3.5114, "step": 869 }, { "epoch": 0.023346930012881066, "grad_norm": 1.359375, "learning_rate": 0.00013991681202200453, "loss": 3.5204, "step": 870 }, { "epoch": 0.023373765564620008, "grad_norm": 1.4375, "learning_rate": 0.00014007782101167315, "loss": 3.5846, "step": 871 }, { "epoch": 0.023400601116358954, "grad_norm": 1.34375, "learning_rate": 0.0001402388300013417, "loss": 3.6866, "step": 872 }, { "epoch": 0.023427436668097896, "grad_norm": 1.375, "learning_rate": 0.00014039983899101033, "loss": 3.6512, "step": 873 }, { "epoch": 0.02345427221983684, "grad_norm": 1.359375, "learning_rate": 0.00014056084798067892, "loss": 3.7138, "step": 874 }, { "epoch": 0.023481107771575783, "grad_norm": 1.2265625, "learning_rate": 0.0001407218569703475, "loss": 3.5849, "step": 875 }, { "epoch": 0.02350794332331473, "grad_norm": 1.375, "learning_rate": 0.0001408828659600161, "loss": 3.5796, "step": 876 }, { "epoch": 0.02353477887505367, "grad_norm": 1.265625, "learning_rate": 0.00014104387494968468, "loss": 3.6253, "step": 877 }, { "epoch": 0.023561614426792613, "grad_norm": 1.2421875, "learning_rate": 0.00014120488393935327, "loss": 3.5354, "step": 878 }, { "epoch": 0.02358844997853156, "grad_norm": 1.328125, "learning_rate": 0.00014136589292902186, "loss": 3.4828, "step": 879 }, { "epoch": 0.0236152855302705, "grad_norm": 1.1875, "learning_rate": 0.00014152690191869045, "loss": 3.4185, "step": 880 }, { "epoch": 0.023642121082009446, "grad_norm": 1.4375, "learning_rate": 0.00014168791090835904, "loss": 3.7521, "step": 881 }, { "epoch": 0.02366895663374839, "grad_norm": 1.359375, "learning_rate": 0.00014184891989802763, "loss": 3.6745, "step": 882 }, { "epoch": 0.023695792185487334, "grad_norm": 1.296875, "learning_rate": 0.00014200992888769621, "loss": 3.5362, "step": 883 }, { "epoch": 0.023722627737226276, "grad_norm": 1.28125, "learning_rate": 0.0001421709378773648, "loss": 3.5335, "step": 884 }, { "epoch": 0.02374946328896522, "grad_norm": 1.28125, "learning_rate": 0.0001423319468670334, "loss": 3.5547, "step": 885 }, { "epoch": 0.023776298840704164, "grad_norm": 1.2265625, "learning_rate": 0.00014249295585670198, "loss": 3.4435, "step": 886 }, { "epoch": 0.02380313439244311, "grad_norm": 1.296875, "learning_rate": 0.0001426539648463706, "loss": 3.494, "step": 887 }, { "epoch": 0.02382996994418205, "grad_norm": 1.2734375, "learning_rate": 0.00014281497383603916, "loss": 3.5112, "step": 888 }, { "epoch": 0.023856805495920997, "grad_norm": 1.34375, "learning_rate": 0.00014297598282570777, "loss": 3.5231, "step": 889 }, { "epoch": 0.02388364104765994, "grad_norm": 1.2734375, "learning_rate": 0.00014313699181537633, "loss": 3.5708, "step": 890 }, { "epoch": 0.023910476599398885, "grad_norm": 1.25, "learning_rate": 0.00014329800080504495, "loss": 3.4811, "step": 891 }, { "epoch": 0.023937312151137827, "grad_norm": 1.2734375, "learning_rate": 0.0001434590097947135, "loss": 3.419, "step": 892 }, { "epoch": 0.023964147702876772, "grad_norm": 1.3359375, "learning_rate": 0.00014362001878438213, "loss": 3.4177, "step": 893 }, { "epoch": 0.023990983254615714, "grad_norm": 1.2734375, "learning_rate": 0.00014378102777405072, "loss": 3.4994, "step": 894 }, { "epoch": 0.02401781880635466, "grad_norm": 1.3359375, "learning_rate": 0.00014394203676371928, "loss": 3.5836, "step": 895 }, { "epoch": 0.024044654358093602, "grad_norm": 1.3046875, "learning_rate": 0.0001441030457533879, "loss": 3.6999, "step": 896 }, { "epoch": 0.024071489909832548, "grad_norm": 1.3515625, "learning_rate": 0.00014426405474305646, "loss": 3.5302, "step": 897 }, { "epoch": 0.02409832546157149, "grad_norm": 1.3359375, "learning_rate": 0.00014442506373272507, "loss": 3.486, "step": 898 }, { "epoch": 0.024125161013310435, "grad_norm": 1.4296875, "learning_rate": 0.00014458607272239366, "loss": 3.5152, "step": 899 }, { "epoch": 0.024151996565049377, "grad_norm": 1.4609375, "learning_rate": 0.00014474708171206225, "loss": 3.5679, "step": 900 }, { "epoch": 0.024178832116788323, "grad_norm": 1.3515625, "learning_rate": 0.00014490809070173084, "loss": 3.4918, "step": 901 }, { "epoch": 0.024205667668527265, "grad_norm": 1.3515625, "learning_rate": 0.00014506909969139943, "loss": 3.5645, "step": 902 }, { "epoch": 0.024232503220266207, "grad_norm": 1.2109375, "learning_rate": 0.00014523010868106801, "loss": 3.5331, "step": 903 }, { "epoch": 0.024259338772005153, "grad_norm": 1.3203125, "learning_rate": 0.0001453911176707366, "loss": 3.5412, "step": 904 }, { "epoch": 0.024286174323744095, "grad_norm": 1.2109375, "learning_rate": 0.0001455521266604052, "loss": 3.3984, "step": 905 }, { "epoch": 0.02431300987548304, "grad_norm": 1.421875, "learning_rate": 0.00014571313565007378, "loss": 3.5363, "step": 906 }, { "epoch": 0.024339845427221982, "grad_norm": 1.2109375, "learning_rate": 0.00014587414463974237, "loss": 3.4919, "step": 907 }, { "epoch": 0.024366680978960928, "grad_norm": 1.3984375, "learning_rate": 0.00014603515362941096, "loss": 3.5631, "step": 908 }, { "epoch": 0.02439351653069987, "grad_norm": 1.265625, "learning_rate": 0.00014619616261907955, "loss": 3.5353, "step": 909 }, { "epoch": 0.024420352082438816, "grad_norm": 1.21875, "learning_rate": 0.00014635717160874813, "loss": 3.5478, "step": 910 }, { "epoch": 0.024447187634177758, "grad_norm": 1.3046875, "learning_rate": 0.00014651818059841672, "loss": 3.429, "step": 911 }, { "epoch": 0.024474023185916703, "grad_norm": 1.2421875, "learning_rate": 0.00014667918958808534, "loss": 3.5441, "step": 912 }, { "epoch": 0.024500858737655645, "grad_norm": 1.265625, "learning_rate": 0.0001468401985777539, "loss": 3.6053, "step": 913 }, { "epoch": 0.02452769428939459, "grad_norm": 1.25, "learning_rate": 0.00014700120756742252, "loss": 3.5184, "step": 914 }, { "epoch": 0.024554529841133533, "grad_norm": 1.2890625, "learning_rate": 0.00014716221655709108, "loss": 3.4113, "step": 915 }, { "epoch": 0.02458136539287248, "grad_norm": 1.25, "learning_rate": 0.0001473232255467597, "loss": 3.479, "step": 916 }, { "epoch": 0.02460820094461142, "grad_norm": 1.3203125, "learning_rate": 0.00014748423453642828, "loss": 3.417, "step": 917 }, { "epoch": 0.024635036496350366, "grad_norm": 1.2734375, "learning_rate": 0.00014764524352609687, "loss": 3.5538, "step": 918 }, { "epoch": 0.024661872048089308, "grad_norm": 1.28125, "learning_rate": 0.00014780625251576546, "loss": 3.5769, "step": 919 }, { "epoch": 0.024688707599828254, "grad_norm": 1.375, "learning_rate": 0.00014796726150543405, "loss": 3.3831, "step": 920 }, { "epoch": 0.024715543151567196, "grad_norm": 1.3046875, "learning_rate": 0.00014812827049510264, "loss": 3.6672, "step": 921 }, { "epoch": 0.02474237870330614, "grad_norm": 1.234375, "learning_rate": 0.0001482892794847712, "loss": 3.4913, "step": 922 }, { "epoch": 0.024769214255045083, "grad_norm": 1.234375, "learning_rate": 0.00014845028847443981, "loss": 3.5415, "step": 923 }, { "epoch": 0.02479604980678403, "grad_norm": 1.1640625, "learning_rate": 0.0001486112974641084, "loss": 3.3665, "step": 924 }, { "epoch": 0.02482288535852297, "grad_norm": 1.3046875, "learning_rate": 0.000148772306453777, "loss": 3.4448, "step": 925 }, { "epoch": 0.024849720910261917, "grad_norm": 1.296875, "learning_rate": 0.00014893331544344558, "loss": 3.5609, "step": 926 }, { "epoch": 0.02487655646200086, "grad_norm": 1.265625, "learning_rate": 0.00014909432443311417, "loss": 3.4518, "step": 927 }, { "epoch": 0.0249033920137398, "grad_norm": 1.2109375, "learning_rate": 0.00014925533342278276, "loss": 3.498, "step": 928 }, { "epoch": 0.024930227565478746, "grad_norm": 1.2734375, "learning_rate": 0.00014941634241245135, "loss": 3.4253, "step": 929 }, { "epoch": 0.02495706311721769, "grad_norm": 1.1640625, "learning_rate": 0.00014957735140211993, "loss": 3.5174, "step": 930 }, { "epoch": 0.024983898668956634, "grad_norm": 1.359375, "learning_rate": 0.00014973836039178852, "loss": 3.4892, "step": 931 }, { "epoch": 0.025010734220695576, "grad_norm": 1.2890625, "learning_rate": 0.0001498993693814571, "loss": 3.4762, "step": 932 }, { "epoch": 0.02503756977243452, "grad_norm": 1.2109375, "learning_rate": 0.0001500603783711257, "loss": 3.4484, "step": 933 }, { "epoch": 0.025064405324173464, "grad_norm": 1.21875, "learning_rate": 0.00015022138736079432, "loss": 3.5178, "step": 934 }, { "epoch": 0.02509124087591241, "grad_norm": 1.234375, "learning_rate": 0.00015038239635046288, "loss": 3.4962, "step": 935 }, { "epoch": 0.02511807642765135, "grad_norm": 2.046875, "learning_rate": 0.00015054340534013147, "loss": 3.5849, "step": 936 }, { "epoch": 0.025144911979390297, "grad_norm": 1.515625, "learning_rate": 0.00015070441432980008, "loss": 3.6509, "step": 937 }, { "epoch": 0.02517174753112924, "grad_norm": 1.734375, "learning_rate": 0.00015086542331946867, "loss": 3.6672, "step": 938 }, { "epoch": 0.025198583082868185, "grad_norm": 1.6484375, "learning_rate": 0.00015102643230913723, "loss": 3.4389, "step": 939 }, { "epoch": 0.025225418634607127, "grad_norm": 1.359375, "learning_rate": 0.00015118744129880582, "loss": 3.5581, "step": 940 }, { "epoch": 0.025252254186346072, "grad_norm": 1.359375, "learning_rate": 0.00015134845028847444, "loss": 3.6188, "step": 941 }, { "epoch": 0.025279089738085014, "grad_norm": 1.296875, "learning_rate": 0.00015150945927814303, "loss": 3.5844, "step": 942 }, { "epoch": 0.02530592528982396, "grad_norm": 1.34375, "learning_rate": 0.0001516704682678116, "loss": 3.5971, "step": 943 }, { "epoch": 0.025332760841562902, "grad_norm": 1.2109375, "learning_rate": 0.0001518314772574802, "loss": 3.578, "step": 944 }, { "epoch": 0.025359596393301848, "grad_norm": 1.1796875, "learning_rate": 0.0001519924862471488, "loss": 3.534, "step": 945 }, { "epoch": 0.02538643194504079, "grad_norm": 1.265625, "learning_rate": 0.00015215349523681738, "loss": 3.5273, "step": 946 }, { "epoch": 0.025413267496779735, "grad_norm": 1.25, "learning_rate": 0.00015231450422648597, "loss": 3.4895, "step": 947 }, { "epoch": 0.025440103048518677, "grad_norm": 1.28125, "learning_rate": 0.00015247551321615456, "loss": 3.6011, "step": 948 }, { "epoch": 0.025466938600257623, "grad_norm": 1.3046875, "learning_rate": 0.00015263652220582315, "loss": 3.6328, "step": 949 }, { "epoch": 0.025493774151996565, "grad_norm": 1.2421875, "learning_rate": 0.00015279753119549176, "loss": 3.5844, "step": 950 }, { "epoch": 0.02552060970373551, "grad_norm": 1.3046875, "learning_rate": 0.00015295854018516032, "loss": 3.6192, "step": 951 }, { "epoch": 0.025547445255474453, "grad_norm": 1.2578125, "learning_rate": 0.0001531195491748289, "loss": 3.4307, "step": 952 }, { "epoch": 0.025574280807213395, "grad_norm": 1.21875, "learning_rate": 0.0001532805581644975, "loss": 3.4951, "step": 953 }, { "epoch": 0.02560111635895234, "grad_norm": 1.2265625, "learning_rate": 0.0001534415671541661, "loss": 3.5316, "step": 954 }, { "epoch": 0.025627951910691282, "grad_norm": 1.28125, "learning_rate": 0.00015360257614383468, "loss": 3.5851, "step": 955 }, { "epoch": 0.025654787462430228, "grad_norm": 1.234375, "learning_rate": 0.00015376358513350327, "loss": 3.421, "step": 956 }, { "epoch": 0.02568162301416917, "grad_norm": 1.1953125, "learning_rate": 0.00015392459412317188, "loss": 3.4552, "step": 957 }, { "epoch": 0.025708458565908116, "grad_norm": 1.3046875, "learning_rate": 0.00015408560311284044, "loss": 3.5973, "step": 958 }, { "epoch": 0.025735294117647058, "grad_norm": 1.1953125, "learning_rate": 0.00015424661210250903, "loss": 3.4679, "step": 959 }, { "epoch": 0.025762129669386003, "grad_norm": 1.265625, "learning_rate": 0.00015440762109217765, "loss": 3.559, "step": 960 }, { "epoch": 0.025788965221124945, "grad_norm": 1.234375, "learning_rate": 0.00015456863008184624, "loss": 3.4631, "step": 961 }, { "epoch": 0.02581580077286389, "grad_norm": 1.2109375, "learning_rate": 0.0001547296390715148, "loss": 3.518, "step": 962 }, { "epoch": 0.025842636324602833, "grad_norm": 1.1953125, "learning_rate": 0.0001548906480611834, "loss": 3.5009, "step": 963 }, { "epoch": 0.02586947187634178, "grad_norm": 1.1796875, "learning_rate": 0.000155051657050852, "loss": 3.3839, "step": 964 }, { "epoch": 0.02589630742808072, "grad_norm": 1.109375, "learning_rate": 0.0001552126660405206, "loss": 3.3139, "step": 965 }, { "epoch": 0.025923142979819666, "grad_norm": 1.203125, "learning_rate": 0.00015537367503018915, "loss": 3.4186, "step": 966 }, { "epoch": 0.025949978531558608, "grad_norm": 1.1875, "learning_rate": 0.00015553468401985777, "loss": 3.5405, "step": 967 }, { "epoch": 0.025976814083297554, "grad_norm": 1.15625, "learning_rate": 0.00015569569300952636, "loss": 3.4703, "step": 968 }, { "epoch": 0.026003649635036496, "grad_norm": 1.2421875, "learning_rate": 0.00015585670199919495, "loss": 3.59, "step": 969 }, { "epoch": 0.02603048518677544, "grad_norm": 1.171875, "learning_rate": 0.0001560177109888635, "loss": 3.5413, "step": 970 }, { "epoch": 0.026057320738514383, "grad_norm": 1.140625, "learning_rate": 0.00015617871997853212, "loss": 3.4384, "step": 971 }, { "epoch": 0.02608415629025333, "grad_norm": 1.21875, "learning_rate": 0.0001563397289682007, "loss": 3.5588, "step": 972 }, { "epoch": 0.02611099184199227, "grad_norm": 1.1875, "learning_rate": 0.00015650073795786933, "loss": 3.484, "step": 973 }, { "epoch": 0.026137827393731217, "grad_norm": 1.2109375, "learning_rate": 0.0001566617469475379, "loss": 3.4535, "step": 974 }, { "epoch": 0.02616466294547016, "grad_norm": 1.1953125, "learning_rate": 0.00015682275593720648, "loss": 3.4992, "step": 975 }, { "epoch": 0.026191498497209104, "grad_norm": 1.1875, "learning_rate": 0.00015698376492687507, "loss": 3.3889, "step": 976 }, { "epoch": 0.026218334048948046, "grad_norm": 1.203125, "learning_rate": 0.00015714477391654368, "loss": 3.4351, "step": 977 }, { "epoch": 0.02624516960068699, "grad_norm": 1.1796875, "learning_rate": 0.00015730578290621224, "loss": 3.5917, "step": 978 }, { "epoch": 0.026272005152425934, "grad_norm": 1.203125, "learning_rate": 0.00015746679189588083, "loss": 3.5917, "step": 979 }, { "epoch": 0.026298840704164876, "grad_norm": 1.2109375, "learning_rate": 0.00015762780088554945, "loss": 3.4712, "step": 980 }, { "epoch": 0.02632567625590382, "grad_norm": 1.1484375, "learning_rate": 0.000157788809875218, "loss": 3.4141, "step": 981 }, { "epoch": 0.026352511807642764, "grad_norm": 1.1796875, "learning_rate": 0.0001579498188648866, "loss": 3.5105, "step": 982 }, { "epoch": 0.02637934735938171, "grad_norm": 1.171875, "learning_rate": 0.0001581108278545552, "loss": 3.4163, "step": 983 }, { "epoch": 0.02640618291112065, "grad_norm": 1.1875, "learning_rate": 0.0001582718368442238, "loss": 3.5155, "step": 984 }, { "epoch": 0.026433018462859597, "grad_norm": 1.1640625, "learning_rate": 0.00015843284583389236, "loss": 3.5171, "step": 985 }, { "epoch": 0.02645985401459854, "grad_norm": 1.1875, "learning_rate": 0.00015859385482356095, "loss": 3.3915, "step": 986 }, { "epoch": 0.026486689566337485, "grad_norm": 1.140625, "learning_rate": 0.00015875486381322957, "loss": 3.5138, "step": 987 }, { "epoch": 0.026513525118076427, "grad_norm": 1.1640625, "learning_rate": 0.00015891587280289816, "loss": 3.4781, "step": 988 }, { "epoch": 0.026540360669815372, "grad_norm": 1.1953125, "learning_rate": 0.00015907688179256672, "loss": 3.5897, "step": 989 }, { "epoch": 0.026567196221554314, "grad_norm": 1.15625, "learning_rate": 0.00015923789078223533, "loss": 3.2928, "step": 990 }, { "epoch": 0.02659403177329326, "grad_norm": 1.203125, "learning_rate": 0.00015939889977190392, "loss": 3.4606, "step": 991 }, { "epoch": 0.026620867325032202, "grad_norm": 1.2890625, "learning_rate": 0.0001595599087615725, "loss": 3.3684, "step": 992 }, { "epoch": 0.026647702876771148, "grad_norm": 1.234375, "learning_rate": 0.00015972091775124107, "loss": 3.3276, "step": 993 }, { "epoch": 0.02667453842851009, "grad_norm": 1.125, "learning_rate": 0.0001598819267409097, "loss": 3.3486, "step": 994 }, { "epoch": 0.026701373980249035, "grad_norm": 1.1953125, "learning_rate": 0.00016004293573057828, "loss": 3.2979, "step": 995 }, { "epoch": 0.026728209531987977, "grad_norm": 1.2890625, "learning_rate": 0.00016020394472024687, "loss": 3.6421, "step": 996 }, { "epoch": 0.026755045083726923, "grad_norm": 1.1796875, "learning_rate": 0.00016036495370991546, "loss": 3.4298, "step": 997 }, { "epoch": 0.026781880635465865, "grad_norm": 1.1875, "learning_rate": 0.00016052596269958404, "loss": 3.3822, "step": 998 }, { "epoch": 0.02680871618720481, "grad_norm": 1.25, "learning_rate": 0.00016068697168925263, "loss": 3.461, "step": 999 }, { "epoch": 0.026835551738943753, "grad_norm": 1.1640625, "learning_rate": 0.00016084798067892125, "loss": 3.4255, "step": 1000 }, { "epoch": 0.026862387290682698, "grad_norm": 1.21875, "learning_rate": 0.0001610089896685898, "loss": 3.4248, "step": 1001 }, { "epoch": 0.02688922284242164, "grad_norm": 1.1796875, "learning_rate": 0.0001611699986582584, "loss": 3.4282, "step": 1002 }, { "epoch": 0.026916058394160582, "grad_norm": 1.203125, "learning_rate": 0.00016133100764792701, "loss": 3.4328, "step": 1003 }, { "epoch": 0.026942893945899528, "grad_norm": 1.1875, "learning_rate": 0.0001614920166375956, "loss": 3.4072, "step": 1004 }, { "epoch": 0.02696972949763847, "grad_norm": 1.1328125, "learning_rate": 0.00016165302562726416, "loss": 3.3359, "step": 1005 }, { "epoch": 0.026996565049377415, "grad_norm": 1.171875, "learning_rate": 0.00016181403461693275, "loss": 3.3463, "step": 1006 }, { "epoch": 0.027023400601116358, "grad_norm": 1.171875, "learning_rate": 0.00016197504360660137, "loss": 3.6015, "step": 1007 }, { "epoch": 0.027050236152855303, "grad_norm": 1.1875, "learning_rate": 0.00016213605259626996, "loss": 3.4075, "step": 1008 }, { "epoch": 0.027077071704594245, "grad_norm": 1.15625, "learning_rate": 0.00016229706158593852, "loss": 3.2902, "step": 1009 }, { "epoch": 0.02710390725633319, "grad_norm": 1.1484375, "learning_rate": 0.00016245807057560713, "loss": 3.5326, "step": 1010 }, { "epoch": 0.027130742808072133, "grad_norm": 1.3203125, "learning_rate": 0.00016261907956527572, "loss": 3.5119, "step": 1011 }, { "epoch": 0.02715757835981108, "grad_norm": 1.0703125, "learning_rate": 0.00016278008855494429, "loss": 3.4058, "step": 1012 }, { "epoch": 0.02718441391155002, "grad_norm": 1.15625, "learning_rate": 0.00016294109754461287, "loss": 3.3236, "step": 1013 }, { "epoch": 0.027211249463288966, "grad_norm": 1.2109375, "learning_rate": 0.0001631021065342815, "loss": 3.3306, "step": 1014 }, { "epoch": 0.027238085015027908, "grad_norm": 1.1640625, "learning_rate": 0.00016326311552395008, "loss": 3.4045, "step": 1015 }, { "epoch": 0.027264920566766854, "grad_norm": 1.1875, "learning_rate": 0.00016342412451361864, "loss": 3.4068, "step": 1016 }, { "epoch": 0.027291756118505796, "grad_norm": 1.1171875, "learning_rate": 0.00016358513350328726, "loss": 3.3389, "step": 1017 }, { "epoch": 0.02731859167024474, "grad_norm": 1.21875, "learning_rate": 0.00016374614249295584, "loss": 3.3914, "step": 1018 }, { "epoch": 0.027345427221983683, "grad_norm": 1.09375, "learning_rate": 0.00016390715148262443, "loss": 3.3271, "step": 1019 }, { "epoch": 0.02737226277372263, "grad_norm": 1.1328125, "learning_rate": 0.00016406816047229302, "loss": 3.3304, "step": 1020 }, { "epoch": 0.02739909832546157, "grad_norm": 1.1875, "learning_rate": 0.0001642291694619616, "loss": 3.3338, "step": 1021 }, { "epoch": 0.027425933877200517, "grad_norm": 1.0625, "learning_rate": 0.0001643901784516302, "loss": 3.3065, "step": 1022 }, { "epoch": 0.02745276942893946, "grad_norm": 1.2265625, "learning_rate": 0.00016455118744129881, "loss": 3.3539, "step": 1023 }, { "epoch": 0.027479604980678404, "grad_norm": 1.1015625, "learning_rate": 0.00016471219643096738, "loss": 3.4375, "step": 1024 }, { "epoch": 0.027506440532417346, "grad_norm": 1.2421875, "learning_rate": 0.00016487320542063596, "loss": 3.3889, "step": 1025 }, { "epoch": 0.027533276084156292, "grad_norm": 1.125, "learning_rate": 0.00016503421441030455, "loss": 3.3815, "step": 1026 }, { "epoch": 0.027560111635895234, "grad_norm": 1.1015625, "learning_rate": 0.00016519522339997317, "loss": 3.2168, "step": 1027 }, { "epoch": 0.027586947187634176, "grad_norm": 1.125, "learning_rate": 0.00016535623238964173, "loss": 3.4694, "step": 1028 }, { "epoch": 0.02761378273937312, "grad_norm": 1.140625, "learning_rate": 0.00016551724137931032, "loss": 3.2762, "step": 1029 }, { "epoch": 0.027640618291112064, "grad_norm": 1.046875, "learning_rate": 0.00016567825036897893, "loss": 3.3963, "step": 1030 }, { "epoch": 0.02766745384285101, "grad_norm": 1.1171875, "learning_rate": 0.00016583925935864752, "loss": 3.2426, "step": 1031 }, { "epoch": 0.02769428939458995, "grad_norm": 1.046875, "learning_rate": 0.00016600026834831609, "loss": 3.1745, "step": 1032 }, { "epoch": 0.027721124946328897, "grad_norm": 1.1328125, "learning_rate": 0.0001661612773379847, "loss": 3.3726, "step": 1033 }, { "epoch": 0.02774796049806784, "grad_norm": 1.21875, "learning_rate": 0.0001663222863276533, "loss": 3.3836, "step": 1034 }, { "epoch": 0.027774796049806785, "grad_norm": 1.1171875, "learning_rate": 0.00016648329531732188, "loss": 3.2947, "step": 1035 }, { "epoch": 0.027801631601545727, "grad_norm": 1.265625, "learning_rate": 0.00016664430430699044, "loss": 3.3395, "step": 1036 }, { "epoch": 0.027828467153284672, "grad_norm": 1.1328125, "learning_rate": 0.00016680531329665906, "loss": 3.3422, "step": 1037 }, { "epoch": 0.027855302705023614, "grad_norm": 1.140625, "learning_rate": 0.00016696632228632764, "loss": 3.3001, "step": 1038 }, { "epoch": 0.02788213825676256, "grad_norm": 1.1875, "learning_rate": 0.0001671273312759962, "loss": 3.3934, "step": 1039 }, { "epoch": 0.027908973808501502, "grad_norm": 1.234375, "learning_rate": 0.00016728834026566482, "loss": 3.3415, "step": 1040 }, { "epoch": 0.027935809360240448, "grad_norm": 1.125, "learning_rate": 0.0001674493492553334, "loss": 3.3031, "step": 1041 }, { "epoch": 0.02796264491197939, "grad_norm": 1.140625, "learning_rate": 0.000167610358245002, "loss": 3.4066, "step": 1042 }, { "epoch": 0.027989480463718335, "grad_norm": 1.1484375, "learning_rate": 0.00016777136723467056, "loss": 3.3284, "step": 1043 }, { "epoch": 0.028016316015457277, "grad_norm": 1.078125, "learning_rate": 0.00016793237622433918, "loss": 3.2619, "step": 1044 }, { "epoch": 0.028043151567196223, "grad_norm": 1.1953125, "learning_rate": 0.00016809338521400776, "loss": 3.2832, "step": 1045 }, { "epoch": 0.028069987118935165, "grad_norm": 1.0859375, "learning_rate": 0.00016825439420367638, "loss": 3.3195, "step": 1046 }, { "epoch": 0.02809682267067411, "grad_norm": 1.125, "learning_rate": 0.00016841540319334494, "loss": 3.2514, "step": 1047 }, { "epoch": 0.028123658222413053, "grad_norm": 1.09375, "learning_rate": 0.00016857641218301353, "loss": 3.3695, "step": 1048 }, { "epoch": 0.028150493774151998, "grad_norm": 1.0703125, "learning_rate": 0.00016873742117268212, "loss": 3.2464, "step": 1049 }, { "epoch": 0.02817732932589094, "grad_norm": 1.0703125, "learning_rate": 0.00016889843016235073, "loss": 3.2029, "step": 1050 }, { "epoch": 0.028204164877629886, "grad_norm": 1.0703125, "learning_rate": 0.0001690594391520193, "loss": 3.2504, "step": 1051 }, { "epoch": 0.028231000429368828, "grad_norm": 1.09375, "learning_rate": 0.00016922044814168789, "loss": 3.2681, "step": 1052 }, { "epoch": 0.02825783598110777, "grad_norm": 1.1171875, "learning_rate": 0.0001693814571313565, "loss": 3.3696, "step": 1053 }, { "epoch": 0.028284671532846715, "grad_norm": 1.1015625, "learning_rate": 0.0001695424661210251, "loss": 3.3559, "step": 1054 }, { "epoch": 0.028311507084585658, "grad_norm": 1.1484375, "learning_rate": 0.00016970347511069365, "loss": 3.3236, "step": 1055 }, { "epoch": 0.028338342636324603, "grad_norm": 1.15625, "learning_rate": 0.00016986448410036224, "loss": 3.1549, "step": 1056 }, { "epoch": 0.028365178188063545, "grad_norm": 1.09375, "learning_rate": 0.00017002549309003086, "loss": 3.3013, "step": 1057 }, { "epoch": 0.02839201373980249, "grad_norm": 1.078125, "learning_rate": 0.00017018650207969944, "loss": 3.3316, "step": 1058 }, { "epoch": 0.028418849291541433, "grad_norm": 1.0546875, "learning_rate": 0.000170347511069368, "loss": 3.187, "step": 1059 }, { "epoch": 0.02844568484328038, "grad_norm": 1.125, "learning_rate": 0.00017050852005903662, "loss": 3.1832, "step": 1060 }, { "epoch": 0.02847252039501932, "grad_norm": 1.1015625, "learning_rate": 0.0001706695290487052, "loss": 3.2691, "step": 1061 }, { "epoch": 0.028499355946758266, "grad_norm": 1.1875, "learning_rate": 0.0001708305380383738, "loss": 3.5328, "step": 1062 }, { "epoch": 0.028526191498497208, "grad_norm": 1.09375, "learning_rate": 0.0001709915470280424, "loss": 3.3268, "step": 1063 }, { "epoch": 0.028553027050236154, "grad_norm": 1.1328125, "learning_rate": 0.00017115255601771098, "loss": 3.3539, "step": 1064 }, { "epoch": 0.028579862601975096, "grad_norm": 1.2265625, "learning_rate": 0.00017131356500737956, "loss": 3.3164, "step": 1065 }, { "epoch": 0.02860669815371404, "grad_norm": 1.0703125, "learning_rate": 0.00017147457399704813, "loss": 3.3345, "step": 1066 }, { "epoch": 0.028633533705452983, "grad_norm": 1.8046875, "learning_rate": 0.00017163558298671674, "loss": 3.5914, "step": 1067 }, { "epoch": 0.02866036925719193, "grad_norm": 1.1640625, "learning_rate": 0.00017179659197638533, "loss": 3.3197, "step": 1068 }, { "epoch": 0.02868720480893087, "grad_norm": 1.40625, "learning_rate": 0.00017195760096605392, "loss": 3.2757, "step": 1069 }, { "epoch": 0.028714040360669817, "grad_norm": 1.3203125, "learning_rate": 0.0001721186099557225, "loss": 3.2758, "step": 1070 }, { "epoch": 0.02874087591240876, "grad_norm": 1.3203125, "learning_rate": 0.0001722796189453911, "loss": 3.451, "step": 1071 }, { "epoch": 0.028767711464147704, "grad_norm": 1.21875, "learning_rate": 0.00017244062793505969, "loss": 3.4522, "step": 1072 }, { "epoch": 0.028794547015886646, "grad_norm": 1.125, "learning_rate": 0.0001726016369247283, "loss": 3.3694, "step": 1073 }, { "epoch": 0.028821382567625592, "grad_norm": 1.2109375, "learning_rate": 0.00017276264591439686, "loss": 3.4196, "step": 1074 }, { "epoch": 0.028848218119364534, "grad_norm": 1.109375, "learning_rate": 0.00017292365490406545, "loss": 3.4537, "step": 1075 }, { "epoch": 0.02887505367110348, "grad_norm": 1.0625, "learning_rate": 0.00017308466389373407, "loss": 3.3036, "step": 1076 }, { "epoch": 0.02890188922284242, "grad_norm": 1.21875, "learning_rate": 0.00017324567288340266, "loss": 3.3959, "step": 1077 }, { "epoch": 0.028928724774581364, "grad_norm": 1.1171875, "learning_rate": 0.00017340668187307122, "loss": 3.4231, "step": 1078 }, { "epoch": 0.02895556032632031, "grad_norm": 1.140625, "learning_rate": 0.0001735676908627398, "loss": 3.415, "step": 1079 }, { "epoch": 0.02898239587805925, "grad_norm": 1.1328125, "learning_rate": 0.00017372869985240842, "loss": 3.3907, "step": 1080 }, { "epoch": 0.029009231429798197, "grad_norm": 1.21875, "learning_rate": 0.000173889708842077, "loss": 3.4086, "step": 1081 }, { "epoch": 0.02903606698153714, "grad_norm": 1.0625, "learning_rate": 0.00017405071783174557, "loss": 3.2937, "step": 1082 }, { "epoch": 0.029062902533276085, "grad_norm": 1.1953125, "learning_rate": 0.0001742117268214142, "loss": 3.4096, "step": 1083 }, { "epoch": 0.029089738085015027, "grad_norm": 1.078125, "learning_rate": 0.00017437273581108278, "loss": 3.257, "step": 1084 }, { "epoch": 0.029116573636753972, "grad_norm": 1.0703125, "learning_rate": 0.00017453374480075136, "loss": 3.3087, "step": 1085 }, { "epoch": 0.029143409188492914, "grad_norm": 1.2109375, "learning_rate": 0.00017469475379041993, "loss": 3.4123, "step": 1086 }, { "epoch": 0.02917024474023186, "grad_norm": 1.1484375, "learning_rate": 0.00017485576278008854, "loss": 3.3571, "step": 1087 }, { "epoch": 0.029197080291970802, "grad_norm": 1.1171875, "learning_rate": 0.00017501677176975713, "loss": 3.1766, "step": 1088 }, { "epoch": 0.029223915843709748, "grad_norm": 1.09375, "learning_rate": 0.00017517778075942575, "loss": 3.2894, "step": 1089 }, { "epoch": 0.02925075139544869, "grad_norm": 1.0625, "learning_rate": 0.0001753387897490943, "loss": 3.3291, "step": 1090 }, { "epoch": 0.029277586947187635, "grad_norm": 1.125, "learning_rate": 0.0001754997987387629, "loss": 3.3278, "step": 1091 }, { "epoch": 0.029304422498926577, "grad_norm": 1.0546875, "learning_rate": 0.00017566080772843149, "loss": 3.2824, "step": 1092 }, { "epoch": 0.029331258050665523, "grad_norm": 1.078125, "learning_rate": 0.0001758218167181001, "loss": 3.2774, "step": 1093 }, { "epoch": 0.029358093602404465, "grad_norm": 1.1015625, "learning_rate": 0.00017598282570776866, "loss": 3.3995, "step": 1094 }, { "epoch": 0.02938492915414341, "grad_norm": 1.0078125, "learning_rate": 0.00017614383469743725, "loss": 3.1015, "step": 1095 }, { "epoch": 0.029411764705882353, "grad_norm": 1.0703125, "learning_rate": 0.00017630484368710587, "loss": 3.2803, "step": 1096 }, { "epoch": 0.029438600257621298, "grad_norm": 1.078125, "learning_rate": 0.00017646585267677443, "loss": 3.3522, "step": 1097 }, { "epoch": 0.02946543580936024, "grad_norm": 1.0703125, "learning_rate": 0.00017662686166644302, "loss": 3.2818, "step": 1098 }, { "epoch": 0.029492271361099186, "grad_norm": 1.0078125, "learning_rate": 0.0001767878706561116, "loss": 3.291, "step": 1099 }, { "epoch": 0.029519106912838128, "grad_norm": 1.078125, "learning_rate": 0.00017694887964578022, "loss": 3.3674, "step": 1100 }, { "epoch": 0.029545942464577073, "grad_norm": 1.0703125, "learning_rate": 0.00017710988863544878, "loss": 3.3126, "step": 1101 }, { "epoch": 0.029572778016316015, "grad_norm": 1.0390625, "learning_rate": 0.00017727089762511737, "loss": 3.3212, "step": 1102 }, { "epoch": 0.029599613568054958, "grad_norm": 1.140625, "learning_rate": 0.000177431906614786, "loss": 3.3208, "step": 1103 }, { "epoch": 0.029626449119793903, "grad_norm": 1.0625, "learning_rate": 0.00017759291560445458, "loss": 3.2629, "step": 1104 }, { "epoch": 0.029653284671532845, "grad_norm": 1.0078125, "learning_rate": 0.00017775392459412314, "loss": 3.209, "step": 1105 }, { "epoch": 0.02968012022327179, "grad_norm": 1.09375, "learning_rate": 0.00017791493358379175, "loss": 3.2774, "step": 1106 }, { "epoch": 0.029706955775010733, "grad_norm": 1.109375, "learning_rate": 0.00017807594257346034, "loss": 3.286, "step": 1107 }, { "epoch": 0.02973379132674968, "grad_norm": 1.0546875, "learning_rate": 0.00017823695156312893, "loss": 3.3028, "step": 1108 }, { "epoch": 0.02976062687848862, "grad_norm": 1.09375, "learning_rate": 0.0001783979605527975, "loss": 3.214, "step": 1109 }, { "epoch": 0.029787462430227566, "grad_norm": 1.046875, "learning_rate": 0.0001785589695424661, "loss": 3.1914, "step": 1110 }, { "epoch": 0.029814297981966508, "grad_norm": 1.0859375, "learning_rate": 0.0001787199785321347, "loss": 3.4439, "step": 1111 }, { "epoch": 0.029841133533705454, "grad_norm": 1.015625, "learning_rate": 0.00017888098752180329, "loss": 3.2948, "step": 1112 }, { "epoch": 0.029867969085444396, "grad_norm": 1.0859375, "learning_rate": 0.00017904199651147187, "loss": 3.3537, "step": 1113 }, { "epoch": 0.02989480463718334, "grad_norm": 1.0546875, "learning_rate": 0.00017920300550114046, "loss": 3.2866, "step": 1114 }, { "epoch": 0.029921640188922283, "grad_norm": 1.109375, "learning_rate": 0.00017936401449080905, "loss": 3.3393, "step": 1115 }, { "epoch": 0.02994847574066123, "grad_norm": 1.0234375, "learning_rate": 0.00017952502348047767, "loss": 3.3518, "step": 1116 }, { "epoch": 0.02997531129240017, "grad_norm": 1.0859375, "learning_rate": 0.00017968603247014623, "loss": 3.2411, "step": 1117 }, { "epoch": 0.030002146844139117, "grad_norm": 1.0390625, "learning_rate": 0.00017984704145981482, "loss": 3.4197, "step": 1118 }, { "epoch": 0.03002898239587806, "grad_norm": 1.0390625, "learning_rate": 0.00018000805044948343, "loss": 3.3593, "step": 1119 }, { "epoch": 0.030055817947617004, "grad_norm": 1.0390625, "learning_rate": 0.00018016905943915202, "loss": 3.2242, "step": 1120 }, { "epoch": 0.030082653499355946, "grad_norm": 1.0390625, "learning_rate": 0.00018033006842882058, "loss": 3.2843, "step": 1121 }, { "epoch": 0.030109489051094892, "grad_norm": 1.09375, "learning_rate": 0.00018049107741848917, "loss": 3.2721, "step": 1122 }, { "epoch": 0.030136324602833834, "grad_norm": 1.0546875, "learning_rate": 0.0001806520864081578, "loss": 3.2174, "step": 1123 }, { "epoch": 0.03016316015457278, "grad_norm": 1.0625, "learning_rate": 0.00018081309539782635, "loss": 3.3512, "step": 1124 }, { "epoch": 0.03018999570631172, "grad_norm": 1.078125, "learning_rate": 0.00018097410438749494, "loss": 3.3502, "step": 1125 }, { "epoch": 0.030216831258050667, "grad_norm": 1.0546875, "learning_rate": 0.00018113511337716355, "loss": 3.2803, "step": 1126 }, { "epoch": 0.03024366680978961, "grad_norm": 1.0234375, "learning_rate": 0.00018129612236683214, "loss": 3.2409, "step": 1127 }, { "epoch": 0.03027050236152855, "grad_norm": 1.03125, "learning_rate": 0.0001814571313565007, "loss": 3.1482, "step": 1128 }, { "epoch": 0.030297337913267497, "grad_norm": 1.046875, "learning_rate": 0.0001816181403461693, "loss": 3.2426, "step": 1129 }, { "epoch": 0.03032417346500644, "grad_norm": 1.0625, "learning_rate": 0.0001817791493358379, "loss": 3.193, "step": 1130 }, { "epoch": 0.030351009016745385, "grad_norm": 1.0390625, "learning_rate": 0.0001819401583255065, "loss": 3.3449, "step": 1131 }, { "epoch": 0.030377844568484327, "grad_norm": 1.046875, "learning_rate": 0.00018210116731517506, "loss": 3.2078, "step": 1132 }, { "epoch": 0.030404680120223272, "grad_norm": 1.0390625, "learning_rate": 0.00018226217630484367, "loss": 3.2691, "step": 1133 }, { "epoch": 0.030431515671962214, "grad_norm": 1.03125, "learning_rate": 0.00018242318529451226, "loss": 3.2506, "step": 1134 }, { "epoch": 0.03045835122370116, "grad_norm": 1.0625, "learning_rate": 0.00018258419428418085, "loss": 3.1518, "step": 1135 }, { "epoch": 0.030485186775440102, "grad_norm": 1.0234375, "learning_rate": 0.00018274520327384944, "loss": 3.2713, "step": 1136 }, { "epoch": 0.030512022327179047, "grad_norm": 1.0625, "learning_rate": 0.00018290621226351803, "loss": 3.3099, "step": 1137 }, { "epoch": 0.03053885787891799, "grad_norm": 1.0078125, "learning_rate": 0.00018306722125318662, "loss": 3.2464, "step": 1138 }, { "epoch": 0.030565693430656935, "grad_norm": 1.03125, "learning_rate": 0.00018322823024285523, "loss": 3.207, "step": 1139 }, { "epoch": 0.030592528982395877, "grad_norm": 1.0703125, "learning_rate": 0.0001833892392325238, "loss": 3.2552, "step": 1140 }, { "epoch": 0.030619364534134823, "grad_norm": 1.0546875, "learning_rate": 0.00018355024822219238, "loss": 3.2088, "step": 1141 }, { "epoch": 0.030646200085873765, "grad_norm": 1.0234375, "learning_rate": 0.00018371125721186097, "loss": 3.1084, "step": 1142 }, { "epoch": 0.03067303563761271, "grad_norm": 1.03125, "learning_rate": 0.0001838722662015296, "loss": 3.1088, "step": 1143 }, { "epoch": 0.030699871189351653, "grad_norm": 1.046875, "learning_rate": 0.00018403327519119815, "loss": 3.2273, "step": 1144 }, { "epoch": 0.030726706741090598, "grad_norm": 1.0234375, "learning_rate": 0.00018419428418086674, "loss": 3.2364, "step": 1145 }, { "epoch": 0.03075354229282954, "grad_norm": 1.0234375, "learning_rate": 0.00018435529317053535, "loss": 3.2825, "step": 1146 }, { "epoch": 0.030780377844568486, "grad_norm": 1.078125, "learning_rate": 0.00018451630216020394, "loss": 3.3676, "step": 1147 }, { "epoch": 0.030807213396307428, "grad_norm": 1.109375, "learning_rate": 0.0001846773111498725, "loss": 3.2099, "step": 1148 }, { "epoch": 0.030834048948046373, "grad_norm": 1.0078125, "learning_rate": 0.00018483832013954112, "loss": 3.0913, "step": 1149 }, { "epoch": 0.030860884499785315, "grad_norm": 1.109375, "learning_rate": 0.0001849993291292097, "loss": 3.2194, "step": 1150 }, { "epoch": 0.03088772005152426, "grad_norm": 1.1171875, "learning_rate": 0.0001851603381188783, "loss": 3.1509, "step": 1151 }, { "epoch": 0.030914555603263203, "grad_norm": 1.0390625, "learning_rate": 0.00018532134710854686, "loss": 3.2605, "step": 1152 }, { "epoch": 0.030941391155002145, "grad_norm": 1.03125, "learning_rate": 0.00018548235609821547, "loss": 3.147, "step": 1153 }, { "epoch": 0.03096822670674109, "grad_norm": 1.0625, "learning_rate": 0.00018564336508788406, "loss": 3.383, "step": 1154 }, { "epoch": 0.030995062258480033, "grad_norm": 1.0234375, "learning_rate": 0.00018580437407755262, "loss": 3.0912, "step": 1155 }, { "epoch": 0.03102189781021898, "grad_norm": 1.0234375, "learning_rate": 0.00018596538306722124, "loss": 3.2207, "step": 1156 }, { "epoch": 0.03104873336195792, "grad_norm": 1.015625, "learning_rate": 0.00018612639205688983, "loss": 3.1102, "step": 1157 }, { "epoch": 0.031075568913696866, "grad_norm": 1.0703125, "learning_rate": 0.00018628740104655842, "loss": 3.2322, "step": 1158 }, { "epoch": 0.031102404465435808, "grad_norm": 1.0390625, "learning_rate": 0.00018644841003622698, "loss": 3.2158, "step": 1159 }, { "epoch": 0.031129240017174754, "grad_norm": 1.0546875, "learning_rate": 0.0001866094190258956, "loss": 3.372, "step": 1160 }, { "epoch": 0.031156075568913696, "grad_norm": 1.0390625, "learning_rate": 0.00018677042801556418, "loss": 3.1121, "step": 1161 }, { "epoch": 0.03118291112065264, "grad_norm": 1.0234375, "learning_rate": 0.0001869314370052328, "loss": 3.1977, "step": 1162 }, { "epoch": 0.031209746672391583, "grad_norm": 1.046875, "learning_rate": 0.00018709244599490136, "loss": 3.1649, "step": 1163 }, { "epoch": 0.03123658222413053, "grad_norm": 1.078125, "learning_rate": 0.00018725345498456995, "loss": 3.2144, "step": 1164 }, { "epoch": 0.031263417775869475, "grad_norm": 1.0703125, "learning_rate": 0.00018741446397423854, "loss": 3.2791, "step": 1165 }, { "epoch": 0.03129025332760842, "grad_norm": 1.1171875, "learning_rate": 0.00018757547296390715, "loss": 3.3145, "step": 1166 }, { "epoch": 0.03131708887934736, "grad_norm": 1.0390625, "learning_rate": 0.00018773648195357571, "loss": 3.273, "step": 1167 }, { "epoch": 0.0313439244310863, "grad_norm": 1.078125, "learning_rate": 0.0001878974909432443, "loss": 3.2765, "step": 1168 }, { "epoch": 0.03137075998282525, "grad_norm": 1.0078125, "learning_rate": 0.00018805849993291292, "loss": 3.153, "step": 1169 }, { "epoch": 0.03139759553456419, "grad_norm": 1.0078125, "learning_rate": 0.0001882195089225815, "loss": 3.1452, "step": 1170 }, { "epoch": 0.031424431086303134, "grad_norm": 1.0625, "learning_rate": 0.00018838051791225007, "loss": 3.4695, "step": 1171 }, { "epoch": 0.031451266638042076, "grad_norm": 0.99609375, "learning_rate": 0.00018854152690191866, "loss": 3.362, "step": 1172 }, { "epoch": 0.031478102189781025, "grad_norm": 1.0390625, "learning_rate": 0.00018870253589158727, "loss": 3.2571, "step": 1173 }, { "epoch": 0.03150493774151997, "grad_norm": 1.015625, "learning_rate": 0.00018886354488125586, "loss": 3.1321, "step": 1174 }, { "epoch": 0.03153177329325891, "grad_norm": 1.0, "learning_rate": 0.00018902455387092442, "loss": 3.152, "step": 1175 }, { "epoch": 0.03155860884499785, "grad_norm": 1.0078125, "learning_rate": 0.00018918556286059304, "loss": 3.3171, "step": 1176 }, { "epoch": 0.0315854443967368, "grad_norm": 1.0078125, "learning_rate": 0.00018934657185026163, "loss": 3.1892, "step": 1177 }, { "epoch": 0.03161227994847574, "grad_norm": 0.99609375, "learning_rate": 0.00018950758083993022, "loss": 3.1515, "step": 1178 }, { "epoch": 0.031639115500214685, "grad_norm": 0.9765625, "learning_rate": 0.0001896685898295988, "loss": 3.0861, "step": 1179 }, { "epoch": 0.03166595105195363, "grad_norm": 0.99609375, "learning_rate": 0.0001898295988192674, "loss": 3.2345, "step": 1180 }, { "epoch": 0.03169278660369257, "grad_norm": 0.9453125, "learning_rate": 0.00018999060780893598, "loss": 3.1252, "step": 1181 }, { "epoch": 0.03171962215543152, "grad_norm": 1.03125, "learning_rate": 0.00019015161679860454, "loss": 3.1582, "step": 1182 }, { "epoch": 0.03174645770717046, "grad_norm": 0.98828125, "learning_rate": 0.00019031262578827316, "loss": 3.2284, "step": 1183 }, { "epoch": 0.0317732932589094, "grad_norm": 1.0, "learning_rate": 0.00019047363477794175, "loss": 3.1218, "step": 1184 }, { "epoch": 0.031800128810648344, "grad_norm": 1.046875, "learning_rate": 0.00019063464376761036, "loss": 3.2521, "step": 1185 }, { "epoch": 0.03182696436238729, "grad_norm": 0.98046875, "learning_rate": 0.00019079565275727893, "loss": 3.1455, "step": 1186 }, { "epoch": 0.031853799914126235, "grad_norm": 1.0390625, "learning_rate": 0.00019095666174694751, "loss": 3.1281, "step": 1187 }, { "epoch": 0.03188063546586518, "grad_norm": 0.99609375, "learning_rate": 0.0001911176707366161, "loss": 3.2586, "step": 1188 }, { "epoch": 0.03190747101760412, "grad_norm": 1.0625, "learning_rate": 0.00019127867972628472, "loss": 3.2001, "step": 1189 }, { "epoch": 0.03193430656934307, "grad_norm": 1.03125, "learning_rate": 0.00019143968871595328, "loss": 3.3013, "step": 1190 }, { "epoch": 0.03196114212108201, "grad_norm": 1.015625, "learning_rate": 0.00019160069770562187, "loss": 3.2149, "step": 1191 }, { "epoch": 0.03198797767282095, "grad_norm": 0.9921875, "learning_rate": 0.00019176170669529048, "loss": 3.0869, "step": 1192 }, { "epoch": 0.032014813224559895, "grad_norm": 1.0078125, "learning_rate": 0.00019192271568495907, "loss": 3.1859, "step": 1193 }, { "epoch": 0.032041648776298844, "grad_norm": 1.03125, "learning_rate": 0.00019208372467462764, "loss": 3.2404, "step": 1194 }, { "epoch": 0.032068484328037786, "grad_norm": 1.0078125, "learning_rate": 0.00019224473366429622, "loss": 3.185, "step": 1195 }, { "epoch": 0.03209531987977673, "grad_norm": 1.0078125, "learning_rate": 0.00019240574265396484, "loss": 3.1209, "step": 1196 }, { "epoch": 0.03212215543151567, "grad_norm": 1.0, "learning_rate": 0.00019256675164363343, "loss": 3.2315, "step": 1197 }, { "epoch": 0.03214899098325462, "grad_norm": 1.0234375, "learning_rate": 0.000192727760633302, "loss": 3.0752, "step": 1198 }, { "epoch": 0.03217582653499356, "grad_norm": 0.98828125, "learning_rate": 0.0001928887696229706, "loss": 3.198, "step": 1199 }, { "epoch": 0.0322026620867325, "grad_norm": 0.9609375, "learning_rate": 0.0001930497786126392, "loss": 3.0928, "step": 1200 }, { "epoch": 0.032229497638471445, "grad_norm": 1.0078125, "learning_rate": 0.00019321078760230778, "loss": 3.1812, "step": 1201 }, { "epoch": 0.032256333190210394, "grad_norm": 1.0390625, "learning_rate": 0.00019337179659197634, "loss": 3.1725, "step": 1202 }, { "epoch": 0.032283168741949336, "grad_norm": 0.96875, "learning_rate": 0.00019353280558164496, "loss": 3.1483, "step": 1203 }, { "epoch": 0.03231000429368828, "grad_norm": 1.390625, "learning_rate": 0.00019369381457131355, "loss": 3.37, "step": 1204 }, { "epoch": 0.03233683984542722, "grad_norm": 1.15625, "learning_rate": 0.00019385482356098216, "loss": 3.2528, "step": 1205 }, { "epoch": 0.03236367539716616, "grad_norm": 1.078125, "learning_rate": 0.00019401583255065073, "loss": 3.2051, "step": 1206 }, { "epoch": 0.03239051094890511, "grad_norm": 1.28125, "learning_rate": 0.00019417684154031931, "loss": 3.2647, "step": 1207 }, { "epoch": 0.032417346500644054, "grad_norm": 1.0703125, "learning_rate": 0.0001943378505299879, "loss": 3.2111, "step": 1208 }, { "epoch": 0.032444182052382996, "grad_norm": 1.0703125, "learning_rate": 0.00019449885951965652, "loss": 3.2081, "step": 1209 }, { "epoch": 0.03247101760412194, "grad_norm": 1.078125, "learning_rate": 0.00019465986850932508, "loss": 3.1465, "step": 1210 }, { "epoch": 0.03249785315586089, "grad_norm": 0.97265625, "learning_rate": 0.00019482087749899367, "loss": 3.1905, "step": 1211 }, { "epoch": 0.03252468870759983, "grad_norm": 0.9765625, "learning_rate": 0.00019498188648866228, "loss": 3.129, "step": 1212 }, { "epoch": 0.03255152425933877, "grad_norm": 0.984375, "learning_rate": 0.00019514289547833085, "loss": 3.2943, "step": 1213 }, { "epoch": 0.03257835981107771, "grad_norm": 1.0078125, "learning_rate": 0.00019530390446799944, "loss": 3.1246, "step": 1214 }, { "epoch": 0.03260519536281666, "grad_norm": 0.96875, "learning_rate": 0.00019546491345766805, "loss": 3.3517, "step": 1215 }, { "epoch": 0.032632030914555604, "grad_norm": 1.0, "learning_rate": 0.00019562592244733664, "loss": 3.3842, "step": 1216 }, { "epoch": 0.032658866466294546, "grad_norm": 1.0078125, "learning_rate": 0.0001957869314370052, "loss": 3.1444, "step": 1217 }, { "epoch": 0.03268570201803349, "grad_norm": 0.97265625, "learning_rate": 0.0001959479404266738, "loss": 3.1297, "step": 1218 }, { "epoch": 0.03271253756977244, "grad_norm": 0.984375, "learning_rate": 0.0001961089494163424, "loss": 3.2589, "step": 1219 }, { "epoch": 0.03273937312151138, "grad_norm": 0.94921875, "learning_rate": 0.000196269958406011, "loss": 3.0521, "step": 1220 }, { "epoch": 0.03276620867325032, "grad_norm": 0.953125, "learning_rate": 0.00019643096739567956, "loss": 3.2497, "step": 1221 }, { "epoch": 0.032793044224989264, "grad_norm": 1.0078125, "learning_rate": 0.00019659197638534817, "loss": 3.1162, "step": 1222 }, { "epoch": 0.03281987977672821, "grad_norm": 0.99609375, "learning_rate": 0.00019675298537501676, "loss": 3.1521, "step": 1223 }, { "epoch": 0.032846715328467155, "grad_norm": 0.96484375, "learning_rate": 0.00019691399436468535, "loss": 3.0775, "step": 1224 }, { "epoch": 0.0328735508802061, "grad_norm": 1.0078125, "learning_rate": 0.0001970750033543539, "loss": 3.104, "step": 1225 }, { "epoch": 0.03290038643194504, "grad_norm": 0.9921875, "learning_rate": 0.00019723601234402253, "loss": 3.1447, "step": 1226 }, { "epoch": 0.03292722198368399, "grad_norm": 0.9140625, "learning_rate": 0.00019739702133369111, "loss": 2.988, "step": 1227 }, { "epoch": 0.03295405753542293, "grad_norm": 0.9609375, "learning_rate": 0.00019755803032335973, "loss": 3.0605, "step": 1228 }, { "epoch": 0.03298089308716187, "grad_norm": 0.94921875, "learning_rate": 0.0001977190393130283, "loss": 3.1155, "step": 1229 }, { "epoch": 0.033007728638900814, "grad_norm": 0.953125, "learning_rate": 0.00019788004830269688, "loss": 3.1813, "step": 1230 }, { "epoch": 0.033034564190639756, "grad_norm": 0.9609375, "learning_rate": 0.00019804105729236547, "loss": 3.1318, "step": 1231 }, { "epoch": 0.033061399742378705, "grad_norm": 0.9453125, "learning_rate": 0.00019820206628203408, "loss": 3.1955, "step": 1232 }, { "epoch": 0.03308823529411765, "grad_norm": 1.03125, "learning_rate": 0.00019836307527170265, "loss": 3.1263, "step": 1233 }, { "epoch": 0.03311507084585659, "grad_norm": 1.0234375, "learning_rate": 0.00019852408426137124, "loss": 3.2044, "step": 1234 }, { "epoch": 0.03314190639759553, "grad_norm": 1.0234375, "learning_rate": 0.00019868509325103985, "loss": 3.3208, "step": 1235 }, { "epoch": 0.03316874194933448, "grad_norm": 0.953125, "learning_rate": 0.00019884610224070844, "loss": 3.0702, "step": 1236 }, { "epoch": 0.03319557750107342, "grad_norm": 1.0546875, "learning_rate": 0.000199007111230377, "loss": 3.0882, "step": 1237 }, { "epoch": 0.033222413052812365, "grad_norm": 0.9765625, "learning_rate": 0.0001991681202200456, "loss": 3.1467, "step": 1238 }, { "epoch": 0.03324924860455131, "grad_norm": 0.9765625, "learning_rate": 0.0001993291292097142, "loss": 3.1961, "step": 1239 }, { "epoch": 0.033276084156290256, "grad_norm": 0.99609375, "learning_rate": 0.00019949013819938277, "loss": 3.1711, "step": 1240 }, { "epoch": 0.0333029197080292, "grad_norm": 1.0625, "learning_rate": 0.00019965114718905136, "loss": 3.2161, "step": 1241 }, { "epoch": 0.03332975525976814, "grad_norm": 0.98828125, "learning_rate": 0.00019981215617871997, "loss": 3.0657, "step": 1242 }, { "epoch": 0.03335659081150708, "grad_norm": 1.0, "learning_rate": 0.00019997316516838856, "loss": 3.1826, "step": 1243 }, { "epoch": 0.03338342636324603, "grad_norm": 0.9765625, "learning_rate": 0.00020013417415805712, "loss": 3.1895, "step": 1244 }, { "epoch": 0.03341026191498497, "grad_norm": 0.97265625, "learning_rate": 0.00020029518314772574, "loss": 3.0644, "step": 1245 }, { "epoch": 0.033437097466723915, "grad_norm": 0.9375, "learning_rate": 0.00020045619213739433, "loss": 3.1232, "step": 1246 }, { "epoch": 0.03346393301846286, "grad_norm": 1.015625, "learning_rate": 0.00020061720112706291, "loss": 3.1753, "step": 1247 }, { "epoch": 0.03349076857020181, "grad_norm": 1.015625, "learning_rate": 0.00020077821011673148, "loss": 3.2788, "step": 1248 }, { "epoch": 0.03351760412194075, "grad_norm": 0.94140625, "learning_rate": 0.0002009392191064001, "loss": 2.9088, "step": 1249 }, { "epoch": 0.03354443967367969, "grad_norm": 0.98046875, "learning_rate": 0.00020110022809606868, "loss": 3.1212, "step": 1250 }, { "epoch": 0.03357127522541863, "grad_norm": 1.03125, "learning_rate": 0.00020126123708573727, "loss": 3.1775, "step": 1251 }, { "epoch": 0.03359811077715758, "grad_norm": 0.953125, "learning_rate": 0.00020142224607540586, "loss": 3.2424, "step": 1252 }, { "epoch": 0.033624946328896524, "grad_norm": 0.953125, "learning_rate": 0.00020158325506507445, "loss": 3.1149, "step": 1253 }, { "epoch": 0.033651781880635466, "grad_norm": 0.984375, "learning_rate": 0.00020174426405474304, "loss": 3.0716, "step": 1254 }, { "epoch": 0.03367861743237441, "grad_norm": 0.9296875, "learning_rate": 0.00020190527304441165, "loss": 3.0194, "step": 1255 }, { "epoch": 0.03370545298411335, "grad_norm": 0.9375, "learning_rate": 0.0002020662820340802, "loss": 3.1703, "step": 1256 }, { "epoch": 0.0337322885358523, "grad_norm": 0.94921875, "learning_rate": 0.0002022272910237488, "loss": 3.1648, "step": 1257 }, { "epoch": 0.03375912408759124, "grad_norm": 0.9375, "learning_rate": 0.00020238830001341742, "loss": 3.0707, "step": 1258 }, { "epoch": 0.03378595963933018, "grad_norm": 0.98046875, "learning_rate": 0.000202549309003086, "loss": 3.0283, "step": 1259 }, { "epoch": 0.033812795191069125, "grad_norm": 0.94921875, "learning_rate": 0.00020271031799275457, "loss": 3.1787, "step": 1260 }, { "epoch": 0.033839630742808074, "grad_norm": 0.984375, "learning_rate": 0.00020287132698242316, "loss": 3.1198, "step": 1261 }, { "epoch": 0.03386646629454702, "grad_norm": 0.94140625, "learning_rate": 0.00020303233597209177, "loss": 3.1024, "step": 1262 }, { "epoch": 0.03389330184628596, "grad_norm": 0.921875, "learning_rate": 0.00020319334496176036, "loss": 3.1464, "step": 1263 }, { "epoch": 0.0339201373980249, "grad_norm": 0.97265625, "learning_rate": 0.00020335435395142892, "loss": 3.0483, "step": 1264 }, { "epoch": 0.03394697294976385, "grad_norm": 0.9453125, "learning_rate": 0.00020351536294109754, "loss": 3.1813, "step": 1265 }, { "epoch": 0.03397380850150279, "grad_norm": 0.94140625, "learning_rate": 0.00020367637193076613, "loss": 3.1618, "step": 1266 }, { "epoch": 0.034000644053241734, "grad_norm": 0.94140625, "learning_rate": 0.00020383738092043471, "loss": 3.1468, "step": 1267 }, { "epoch": 0.034027479604980676, "grad_norm": 0.94140625, "learning_rate": 0.00020399838991010328, "loss": 3.0597, "step": 1268 }, { "epoch": 0.034054315156719625, "grad_norm": 1.03125, "learning_rate": 0.0002041593988997719, "loss": 3.1421, "step": 1269 }, { "epoch": 0.03408115070845857, "grad_norm": 0.9453125, "learning_rate": 0.00020432040788944048, "loss": 3.0846, "step": 1270 }, { "epoch": 0.03410798626019751, "grad_norm": 0.953125, "learning_rate": 0.00020448141687910904, "loss": 3.052, "step": 1271 }, { "epoch": 0.03413482181193645, "grad_norm": 1.0390625, "learning_rate": 0.00020464242586877766, "loss": 3.1055, "step": 1272 }, { "epoch": 0.0341616573636754, "grad_norm": 0.9375, "learning_rate": 0.00020480343485844625, "loss": 3.123, "step": 1273 }, { "epoch": 0.03418849291541434, "grad_norm": 0.9296875, "learning_rate": 0.00020496444384811484, "loss": 3.0993, "step": 1274 }, { "epoch": 0.034215328467153285, "grad_norm": 1.015625, "learning_rate": 0.00020512545283778342, "loss": 3.1061, "step": 1275 }, { "epoch": 0.03424216401889223, "grad_norm": 0.91015625, "learning_rate": 0.000205286461827452, "loss": 3.1222, "step": 1276 }, { "epoch": 0.034268999570631176, "grad_norm": 0.9765625, "learning_rate": 0.0002054474708171206, "loss": 3.1774, "step": 1277 }, { "epoch": 0.03429583512237012, "grad_norm": 1.0234375, "learning_rate": 0.00020560847980678922, "loss": 2.9872, "step": 1278 }, { "epoch": 0.03432267067410906, "grad_norm": 0.93359375, "learning_rate": 0.00020576948879645778, "loss": 3.0648, "step": 1279 }, { "epoch": 0.034349506225848, "grad_norm": 0.98046875, "learning_rate": 0.00020593049778612637, "loss": 3.1229, "step": 1280 }, { "epoch": 0.034376341777586944, "grad_norm": 0.95703125, "learning_rate": 0.00020609150677579496, "loss": 3.0456, "step": 1281 }, { "epoch": 0.03440317732932589, "grad_norm": 0.94921875, "learning_rate": 0.00020625251576546357, "loss": 3.0632, "step": 1282 }, { "epoch": 0.034430012881064835, "grad_norm": 0.98046875, "learning_rate": 0.00020641352475513213, "loss": 3.0529, "step": 1283 }, { "epoch": 0.03445684843280378, "grad_norm": 0.95703125, "learning_rate": 0.00020657453374480072, "loss": 3.137, "step": 1284 }, { "epoch": 0.03448368398454272, "grad_norm": 0.96875, "learning_rate": 0.00020673554273446934, "loss": 3.0696, "step": 1285 }, { "epoch": 0.03451051953628167, "grad_norm": 0.91015625, "learning_rate": 0.00020689655172413793, "loss": 2.9727, "step": 1286 }, { "epoch": 0.03453735508802061, "grad_norm": 0.93359375, "learning_rate": 0.0002070575607138065, "loss": 3.07, "step": 1287 }, { "epoch": 0.03456419063975955, "grad_norm": 0.9609375, "learning_rate": 0.0002072185697034751, "loss": 3.0885, "step": 1288 }, { "epoch": 0.034591026191498495, "grad_norm": 0.953125, "learning_rate": 0.0002073795786931437, "loss": 2.9558, "step": 1289 }, { "epoch": 0.034617861743237444, "grad_norm": 0.94921875, "learning_rate": 0.00020754058768281228, "loss": 3.1002, "step": 1290 }, { "epoch": 0.034644697294976386, "grad_norm": 0.98828125, "learning_rate": 0.00020770159667248084, "loss": 3.1185, "step": 1291 }, { "epoch": 0.03467153284671533, "grad_norm": 0.9453125, "learning_rate": 0.00020786260566214946, "loss": 3.0278, "step": 1292 }, { "epoch": 0.03469836839845427, "grad_norm": 0.90625, "learning_rate": 0.00020802361465181805, "loss": 3.1207, "step": 1293 }, { "epoch": 0.03472520395019322, "grad_norm": 0.97265625, "learning_rate": 0.00020818462364148664, "loss": 2.9504, "step": 1294 }, { "epoch": 0.03475203950193216, "grad_norm": 0.94921875, "learning_rate": 0.00020834563263115522, "loss": 3.0407, "step": 1295 }, { "epoch": 0.0347788750536711, "grad_norm": 0.9375, "learning_rate": 0.0002085066416208238, "loss": 3.0086, "step": 1296 }, { "epoch": 0.034805710605410045, "grad_norm": 0.88671875, "learning_rate": 0.0002086676506104924, "loss": 2.9256, "step": 1297 }, { "epoch": 0.034832546157148994, "grad_norm": 0.9453125, "learning_rate": 0.00020882865960016096, "loss": 3.1073, "step": 1298 }, { "epoch": 0.034859381708887936, "grad_norm": 0.91796875, "learning_rate": 0.00020898966858982958, "loss": 3.0028, "step": 1299 }, { "epoch": 0.03488621726062688, "grad_norm": 0.9140625, "learning_rate": 0.00020915067757949817, "loss": 3.1733, "step": 1300 }, { "epoch": 0.03491305281236582, "grad_norm": 0.91015625, "learning_rate": 0.00020931168656916678, "loss": 2.9798, "step": 1301 }, { "epoch": 0.03493988836410477, "grad_norm": 0.92578125, "learning_rate": 0.00020947269555883534, "loss": 3.0842, "step": 1302 }, { "epoch": 0.03496672391584371, "grad_norm": 0.93359375, "learning_rate": 0.00020963370454850393, "loss": 3.1152, "step": 1303 }, { "epoch": 0.034993559467582654, "grad_norm": 0.93359375, "learning_rate": 0.00020979471353817252, "loss": 2.8923, "step": 1304 }, { "epoch": 0.035020395019321596, "grad_norm": 0.9375, "learning_rate": 0.00020995572252784114, "loss": 3.0332, "step": 1305 }, { "epoch": 0.03504723057106054, "grad_norm": 0.93359375, "learning_rate": 0.0002101167315175097, "loss": 2.9837, "step": 1306 }, { "epoch": 0.03507406612279949, "grad_norm": 0.953125, "learning_rate": 0.0002102777405071783, "loss": 3.0622, "step": 1307 }, { "epoch": 0.03510090167453843, "grad_norm": 0.94140625, "learning_rate": 0.0002104387494968469, "loss": 3.0518, "step": 1308 }, { "epoch": 0.03512773722627737, "grad_norm": 0.90234375, "learning_rate": 0.0002105997584865155, "loss": 2.9856, "step": 1309 }, { "epoch": 0.03515457277801631, "grad_norm": 0.93359375, "learning_rate": 0.00021076076747618405, "loss": 2.9625, "step": 1310 }, { "epoch": 0.03518140832975526, "grad_norm": 0.93359375, "learning_rate": 0.00021092177646585264, "loss": 2.9508, "step": 1311 }, { "epoch": 0.035208243881494204, "grad_norm": 0.96875, "learning_rate": 0.00021108278545552126, "loss": 3.1491, "step": 1312 }, { "epoch": 0.035235079433233146, "grad_norm": 0.91015625, "learning_rate": 0.00021124379444518985, "loss": 2.9649, "step": 1313 }, { "epoch": 0.03526191498497209, "grad_norm": 0.953125, "learning_rate": 0.0002114048034348584, "loss": 2.9823, "step": 1314 }, { "epoch": 0.03528875053671104, "grad_norm": 0.9375, "learning_rate": 0.00021156581242452702, "loss": 2.9985, "step": 1315 }, { "epoch": 0.03531558608844998, "grad_norm": 0.91015625, "learning_rate": 0.0002117268214141956, "loss": 3.1275, "step": 1316 }, { "epoch": 0.03534242164018892, "grad_norm": 0.90234375, "learning_rate": 0.0002118878304038642, "loss": 2.9962, "step": 1317 }, { "epoch": 0.035369257191927864, "grad_norm": 0.9453125, "learning_rate": 0.0002120488393935328, "loss": 2.9955, "step": 1318 }, { "epoch": 0.03539609274366681, "grad_norm": 0.94921875, "learning_rate": 0.00021220984838320138, "loss": 3.1374, "step": 1319 }, { "epoch": 0.035422928295405755, "grad_norm": 0.96484375, "learning_rate": 0.00021237085737286997, "loss": 3.115, "step": 1320 }, { "epoch": 0.0354497638471447, "grad_norm": 0.921875, "learning_rate": 0.00021253186636253858, "loss": 3.0876, "step": 1321 }, { "epoch": 0.03547659939888364, "grad_norm": 0.87109375, "learning_rate": 0.00021269287535220714, "loss": 2.9045, "step": 1322 }, { "epoch": 0.03550343495062259, "grad_norm": 0.921875, "learning_rate": 0.00021285388434187573, "loss": 3.0532, "step": 1323 }, { "epoch": 0.03553027050236153, "grad_norm": 0.91796875, "learning_rate": 0.00021301489333154432, "loss": 3.0082, "step": 1324 }, { "epoch": 0.03555710605410047, "grad_norm": 0.9140625, "learning_rate": 0.0002131759023212129, "loss": 3.0198, "step": 1325 }, { "epoch": 0.035583941605839414, "grad_norm": 0.91015625, "learning_rate": 0.0002133369113108815, "loss": 2.9081, "step": 1326 }, { "epoch": 0.03561077715757836, "grad_norm": 0.94140625, "learning_rate": 0.0002134979203005501, "loss": 3.1296, "step": 1327 }, { "epoch": 0.035637612709317305, "grad_norm": 0.90625, "learning_rate": 0.0002136589292902187, "loss": 2.9706, "step": 1328 }, { "epoch": 0.03566444826105625, "grad_norm": 0.921875, "learning_rate": 0.00021381993827988726, "loss": 2.8771, "step": 1329 }, { "epoch": 0.03569128381279519, "grad_norm": 0.91796875, "learning_rate": 0.00021398094726955585, "loss": 3.0407, "step": 1330 }, { "epoch": 0.03571811936453413, "grad_norm": 0.953125, "learning_rate": 0.00021414195625922447, "loss": 3.06, "step": 1331 }, { "epoch": 0.03574495491627308, "grad_norm": 0.90625, "learning_rate": 0.00021430296524889306, "loss": 3.0132, "step": 1332 }, { "epoch": 0.03577179046801202, "grad_norm": 0.91015625, "learning_rate": 0.00021446397423856162, "loss": 2.8994, "step": 1333 }, { "epoch": 0.035798626019750965, "grad_norm": 0.90234375, "learning_rate": 0.0002146249832282302, "loss": 2.8713, "step": 1334 }, { "epoch": 0.03582546157148991, "grad_norm": 0.94140625, "learning_rate": 0.00021478599221789882, "loss": 3.0819, "step": 1335 }, { "epoch": 0.035852297123228856, "grad_norm": 0.91796875, "learning_rate": 0.0002149470012075674, "loss": 3.031, "step": 1336 }, { "epoch": 0.0358791326749678, "grad_norm": 0.90625, "learning_rate": 0.00021510801019723597, "loss": 2.9987, "step": 1337 }, { "epoch": 0.03590596822670674, "grad_norm": 0.921875, "learning_rate": 0.0002152690191869046, "loss": 2.9839, "step": 1338 }, { "epoch": 0.03593280377844568, "grad_norm": 0.91015625, "learning_rate": 0.00021543002817657318, "loss": 2.9428, "step": 1339 }, { "epoch": 0.03595963933018463, "grad_norm": 0.9140625, "learning_rate": 0.00021559103716624177, "loss": 3.0012, "step": 1340 }, { "epoch": 0.03598647488192357, "grad_norm": 0.93359375, "learning_rate": 0.00021575204615591033, "loss": 3.0369, "step": 1341 }, { "epoch": 0.036013310433662515, "grad_norm": 0.94140625, "learning_rate": 0.00021591305514557894, "loss": 2.9953, "step": 1342 }, { "epoch": 0.03604014598540146, "grad_norm": 0.93359375, "learning_rate": 0.00021607406413524753, "loss": 3.0919, "step": 1343 }, { "epoch": 0.036066981537140406, "grad_norm": 0.94921875, "learning_rate": 0.00021623507312491615, "loss": 2.9372, "step": 1344 }, { "epoch": 0.03609381708887935, "grad_norm": 0.91796875, "learning_rate": 0.0002163960821145847, "loss": 2.9364, "step": 1345 }, { "epoch": 0.03612065264061829, "grad_norm": 0.921875, "learning_rate": 0.0002165570911042533, "loss": 2.8996, "step": 1346 }, { "epoch": 0.03614748819235723, "grad_norm": 0.92578125, "learning_rate": 0.0002167181000939219, "loss": 3.07, "step": 1347 }, { "epoch": 0.03617432374409618, "grad_norm": 1.28125, "learning_rate": 0.0002168791090835905, "loss": 3.3109, "step": 1348 }, { "epoch": 0.036201159295835124, "grad_norm": 1.0, "learning_rate": 0.00021704011807325906, "loss": 3.0416, "step": 1349 }, { "epoch": 0.036227994847574066, "grad_norm": 1.0078125, "learning_rate": 0.00021720112706292765, "loss": 3.1127, "step": 1350 }, { "epoch": 0.03625483039931301, "grad_norm": 1.0859375, "learning_rate": 0.00021736213605259627, "loss": 3.2011, "step": 1351 }, { "epoch": 0.03628166595105196, "grad_norm": 1.015625, "learning_rate": 0.00021752314504226486, "loss": 3.235, "step": 1352 }, { "epoch": 0.0363085015027909, "grad_norm": 0.9765625, "learning_rate": 0.00021768415403193342, "loss": 3.0952, "step": 1353 }, { "epoch": 0.03633533705452984, "grad_norm": 0.96875, "learning_rate": 0.000217845163021602, "loss": 3.2017, "step": 1354 }, { "epoch": 0.03636217260626878, "grad_norm": 0.94140625, "learning_rate": 0.00021800617201127062, "loss": 2.9939, "step": 1355 }, { "epoch": 0.036389008158007725, "grad_norm": 0.9140625, "learning_rate": 0.00021816718100093919, "loss": 3.0364, "step": 1356 }, { "epoch": 0.036415843709746674, "grad_norm": 0.875, "learning_rate": 0.00021832818999060777, "loss": 3.0996, "step": 1357 }, { "epoch": 0.03644267926148562, "grad_norm": 0.91796875, "learning_rate": 0.0002184891989802764, "loss": 3.101, "step": 1358 }, { "epoch": 0.03646951481322456, "grad_norm": 0.91015625, "learning_rate": 0.00021865020796994498, "loss": 2.983, "step": 1359 }, { "epoch": 0.0364963503649635, "grad_norm": 0.8984375, "learning_rate": 0.00021881121695961354, "loss": 3.0506, "step": 1360 }, { "epoch": 0.03652318591670245, "grad_norm": 0.88671875, "learning_rate": 0.00021897222594928216, "loss": 3.0282, "step": 1361 }, { "epoch": 0.03655002146844139, "grad_norm": 0.92578125, "learning_rate": 0.00021913323493895074, "loss": 3.0155, "step": 1362 }, { "epoch": 0.036576857020180334, "grad_norm": 0.90234375, "learning_rate": 0.00021929424392861933, "loss": 3.0682, "step": 1363 }, { "epoch": 0.036603692571919276, "grad_norm": 0.90234375, "learning_rate": 0.0002194552529182879, "loss": 3.0707, "step": 1364 }, { "epoch": 0.036630528123658225, "grad_norm": 0.89453125, "learning_rate": 0.0002196162619079565, "loss": 3.0265, "step": 1365 }, { "epoch": 0.03665736367539717, "grad_norm": 0.91796875, "learning_rate": 0.0002197772708976251, "loss": 2.9809, "step": 1366 }, { "epoch": 0.03668419922713611, "grad_norm": 0.93359375, "learning_rate": 0.0002199382798872937, "loss": 3.0329, "step": 1367 }, { "epoch": 0.03671103477887505, "grad_norm": 0.87109375, "learning_rate": 0.00022009928887696228, "loss": 3.0152, "step": 1368 }, { "epoch": 0.036737870330614, "grad_norm": 0.90234375, "learning_rate": 0.00022026029786663086, "loss": 3.1627, "step": 1369 }, { "epoch": 0.03676470588235294, "grad_norm": 0.94140625, "learning_rate": 0.00022042130685629945, "loss": 3.1402, "step": 1370 }, { "epoch": 0.036791541434091884, "grad_norm": 0.8671875, "learning_rate": 0.00022058231584596807, "loss": 2.9275, "step": 1371 }, { "epoch": 0.03681837698583083, "grad_norm": 0.9296875, "learning_rate": 0.00022074332483563663, "loss": 2.975, "step": 1372 }, { "epoch": 0.036845212537569776, "grad_norm": 0.890625, "learning_rate": 0.00022090433382530522, "loss": 3.0165, "step": 1373 }, { "epoch": 0.03687204808930872, "grad_norm": 0.875, "learning_rate": 0.00022106534281497384, "loss": 2.9816, "step": 1374 }, { "epoch": 0.03689888364104766, "grad_norm": 0.89453125, "learning_rate": 0.00022122635180464242, "loss": 3.0532, "step": 1375 }, { "epoch": 0.0369257191927866, "grad_norm": 0.8984375, "learning_rate": 0.00022138736079431099, "loss": 3.0233, "step": 1376 }, { "epoch": 0.03695255474452555, "grad_norm": 0.87109375, "learning_rate": 0.00022154836978397957, "loss": 2.9194, "step": 1377 }, { "epoch": 0.03697939029626449, "grad_norm": 0.87109375, "learning_rate": 0.0002217093787736482, "loss": 2.9432, "step": 1378 }, { "epoch": 0.037006225848003435, "grad_norm": 0.89453125, "learning_rate": 0.00022187038776331678, "loss": 3.0977, "step": 1379 }, { "epoch": 0.03703306139974238, "grad_norm": 0.90234375, "learning_rate": 0.00022203139675298534, "loss": 2.983, "step": 1380 }, { "epoch": 0.03705989695148132, "grad_norm": 0.890625, "learning_rate": 0.00022219240574265396, "loss": 2.9273, "step": 1381 }, { "epoch": 0.03708673250322027, "grad_norm": 0.87109375, "learning_rate": 0.00022235341473232254, "loss": 3.0071, "step": 1382 }, { "epoch": 0.03711356805495921, "grad_norm": 0.91796875, "learning_rate": 0.0002225144237219911, "loss": 3.0717, "step": 1383 }, { "epoch": 0.03714040360669815, "grad_norm": 0.92578125, "learning_rate": 0.0002226754327116597, "loss": 3.0658, "step": 1384 }, { "epoch": 0.037167239158437095, "grad_norm": 0.89453125, "learning_rate": 0.0002228364417013283, "loss": 2.932, "step": 1385 }, { "epoch": 0.037194074710176044, "grad_norm": 0.9140625, "learning_rate": 0.0002229974506909969, "loss": 2.921, "step": 1386 }, { "epoch": 0.037220910261914986, "grad_norm": 0.90625, "learning_rate": 0.00022315845968066546, "loss": 2.975, "step": 1387 }, { "epoch": 0.03724774581365393, "grad_norm": 0.86328125, "learning_rate": 0.00022331946867033408, "loss": 2.9392, "step": 1388 }, { "epoch": 0.03727458136539287, "grad_norm": 0.89453125, "learning_rate": 0.00022348047766000266, "loss": 2.9988, "step": 1389 }, { "epoch": 0.03730141691713182, "grad_norm": 0.87890625, "learning_rate": 0.00022364148664967125, "loss": 3.0429, "step": 1390 }, { "epoch": 0.03732825246887076, "grad_norm": 0.88671875, "learning_rate": 0.00022380249563933984, "loss": 3.0623, "step": 1391 }, { "epoch": 0.0373550880206097, "grad_norm": 0.87109375, "learning_rate": 0.00022396350462900843, "loss": 2.9429, "step": 1392 }, { "epoch": 0.037381923572348645, "grad_norm": 0.8984375, "learning_rate": 0.00022412451361867702, "loss": 3.0222, "step": 1393 }, { "epoch": 0.037408759124087594, "grad_norm": 0.875, "learning_rate": 0.00022428552260834564, "loss": 2.9512, "step": 1394 }, { "epoch": 0.037435594675826536, "grad_norm": 0.921875, "learning_rate": 0.0002244465315980142, "loss": 2.985, "step": 1395 }, { "epoch": 0.03746243022756548, "grad_norm": 0.8828125, "learning_rate": 0.00022460754058768279, "loss": 2.9191, "step": 1396 }, { "epoch": 0.03748926577930442, "grad_norm": 0.84765625, "learning_rate": 0.00022476854957735137, "loss": 2.9258, "step": 1397 }, { "epoch": 0.03751610133104337, "grad_norm": 0.87890625, "learning_rate": 0.00022492955856702, "loss": 3.0535, "step": 1398 }, { "epoch": 0.03754293688278231, "grad_norm": 0.86328125, "learning_rate": 0.00022509056755668855, "loss": 2.9233, "step": 1399 }, { "epoch": 0.037569772434521254, "grad_norm": 0.87109375, "learning_rate": 0.00022525157654635714, "loss": 2.8713, "step": 1400 }, { "epoch": 0.037596607986260196, "grad_norm": 0.89453125, "learning_rate": 0.00022541258553602576, "loss": 2.9582, "step": 1401 }, { "epoch": 0.037623443537999145, "grad_norm": 0.875, "learning_rate": 0.00022557359452569434, "loss": 2.9719, "step": 1402 }, { "epoch": 0.03765027908973809, "grad_norm": 0.86328125, "learning_rate": 0.0002257346035153629, "loss": 3.0025, "step": 1403 }, { "epoch": 0.03767711464147703, "grad_norm": 0.91796875, "learning_rate": 0.00022589561250503152, "loss": 3.0213, "step": 1404 }, { "epoch": 0.03770395019321597, "grad_norm": 0.91796875, "learning_rate": 0.0002260566214947001, "loss": 3.035, "step": 1405 }, { "epoch": 0.03773078574495491, "grad_norm": 0.8984375, "learning_rate": 0.0002262176304843687, "loss": 2.9896, "step": 1406 }, { "epoch": 0.03775762129669386, "grad_norm": 0.94921875, "learning_rate": 0.00022637863947403726, "loss": 3.0089, "step": 1407 }, { "epoch": 0.037784456848432804, "grad_norm": 0.91796875, "learning_rate": 0.00022653964846370588, "loss": 3.0543, "step": 1408 }, { "epoch": 0.037811292400171746, "grad_norm": 0.88671875, "learning_rate": 0.00022670065745337446, "loss": 2.9769, "step": 1409 }, { "epoch": 0.03783812795191069, "grad_norm": 0.8515625, "learning_rate": 0.00022686166644304305, "loss": 2.9573, "step": 1410 }, { "epoch": 0.03786496350364964, "grad_norm": 0.87890625, "learning_rate": 0.00022702267543271164, "loss": 2.9826, "step": 1411 }, { "epoch": 0.03789179905538858, "grad_norm": 0.890625, "learning_rate": 0.00022718368442238023, "loss": 2.9843, "step": 1412 }, { "epoch": 0.03791863460712752, "grad_norm": 0.875, "learning_rate": 0.00022734469341204882, "loss": 2.9408, "step": 1413 }, { "epoch": 0.037945470158866464, "grad_norm": 0.88671875, "learning_rate": 0.00022750570240171738, "loss": 3.0179, "step": 1414 }, { "epoch": 0.03797230571060541, "grad_norm": 0.92578125, "learning_rate": 0.000227666711391386, "loss": 2.9906, "step": 1415 }, { "epoch": 0.037999141262344355, "grad_norm": 0.94140625, "learning_rate": 0.00022782772038105459, "loss": 3.1455, "step": 1416 }, { "epoch": 0.0380259768140833, "grad_norm": 0.90234375, "learning_rate": 0.0002279887293707232, "loss": 2.9348, "step": 1417 }, { "epoch": 0.03805281236582224, "grad_norm": 0.890625, "learning_rate": 0.00022814973836039176, "loss": 2.9492, "step": 1418 }, { "epoch": 0.03807964791756119, "grad_norm": 0.9140625, "learning_rate": 0.00022831074735006035, "loss": 3.0309, "step": 1419 }, { "epoch": 0.03810648346930013, "grad_norm": 0.8515625, "learning_rate": 0.00022847175633972894, "loss": 2.8592, "step": 1420 }, { "epoch": 0.03813331902103907, "grad_norm": 0.90234375, "learning_rate": 0.00022863276532939756, "loss": 2.9759, "step": 1421 }, { "epoch": 0.038160154572778014, "grad_norm": 0.91015625, "learning_rate": 0.00022879377431906612, "loss": 2.966, "step": 1422 }, { "epoch": 0.03818699012451696, "grad_norm": 0.88671875, "learning_rate": 0.0002289547833087347, "loss": 3.0257, "step": 1423 }, { "epoch": 0.038213825676255905, "grad_norm": 0.890625, "learning_rate": 0.00022911579229840332, "loss": 2.9713, "step": 1424 }, { "epoch": 0.03824066122799485, "grad_norm": 0.89453125, "learning_rate": 0.0002292768012880719, "loss": 2.8722, "step": 1425 }, { "epoch": 0.03826749677973379, "grad_norm": 0.87890625, "learning_rate": 0.00022943781027774047, "loss": 2.9734, "step": 1426 }, { "epoch": 0.03829433233147274, "grad_norm": 0.90234375, "learning_rate": 0.00022959881926740906, "loss": 3.0283, "step": 1427 }, { "epoch": 0.03832116788321168, "grad_norm": 0.8671875, "learning_rate": 0.00022975982825707768, "loss": 2.8279, "step": 1428 }, { "epoch": 0.03834800343495062, "grad_norm": 0.9140625, "learning_rate": 0.00022992083724674626, "loss": 2.9624, "step": 1429 }, { "epoch": 0.038374838986689565, "grad_norm": 0.89453125, "learning_rate": 0.00023008184623641483, "loss": 2.9644, "step": 1430 }, { "epoch": 0.03840167453842851, "grad_norm": 0.90625, "learning_rate": 0.00023024285522608344, "loss": 2.9982, "step": 1431 }, { "epoch": 0.038428510090167456, "grad_norm": 0.859375, "learning_rate": 0.00023040386421575203, "loss": 2.9081, "step": 1432 }, { "epoch": 0.0384553456419064, "grad_norm": 0.87890625, "learning_rate": 0.00023056487320542062, "loss": 2.988, "step": 1433 }, { "epoch": 0.03848218119364534, "grad_norm": 0.91015625, "learning_rate": 0.0002307258821950892, "loss": 2.9073, "step": 1434 }, { "epoch": 0.03850901674538428, "grad_norm": 0.8671875, "learning_rate": 0.0002308868911847578, "loss": 2.9893, "step": 1435 }, { "epoch": 0.03853585229712323, "grad_norm": 0.87890625, "learning_rate": 0.00023104790017442639, "loss": 2.9292, "step": 1436 }, { "epoch": 0.03856268784886217, "grad_norm": 0.86328125, "learning_rate": 0.000231208909164095, "loss": 2.8994, "step": 1437 }, { "epoch": 0.038589523400601115, "grad_norm": 0.8671875, "learning_rate": 0.00023136991815376356, "loss": 2.9649, "step": 1438 }, { "epoch": 0.03861635895234006, "grad_norm": 0.86328125, "learning_rate": 0.00023153092714343215, "loss": 2.978, "step": 1439 }, { "epoch": 0.038643194504079006, "grad_norm": 0.8828125, "learning_rate": 0.00023169193613310074, "loss": 2.9621, "step": 1440 }, { "epoch": 0.03867003005581795, "grad_norm": 0.890625, "learning_rate": 0.00023185294512276933, "loss": 2.8658, "step": 1441 }, { "epoch": 0.03869686560755689, "grad_norm": 0.8671875, "learning_rate": 0.00023201395411243792, "loss": 2.9033, "step": 1442 }, { "epoch": 0.03872370115929583, "grad_norm": 0.859375, "learning_rate": 0.0002321749631021065, "loss": 2.7867, "step": 1443 }, { "epoch": 0.03875053671103478, "grad_norm": 0.875, "learning_rate": 0.00023233597209177512, "loss": 2.835, "step": 1444 }, { "epoch": 0.038777372262773724, "grad_norm": 0.87109375, "learning_rate": 0.00023249698108144368, "loss": 2.8523, "step": 1445 }, { "epoch": 0.038804207814512666, "grad_norm": 0.87890625, "learning_rate": 0.00023265799007111227, "loss": 2.8578, "step": 1446 }, { "epoch": 0.03883104336625161, "grad_norm": 0.8515625, "learning_rate": 0.0002328189990607809, "loss": 2.9445, "step": 1447 }, { "epoch": 0.03885787891799056, "grad_norm": 0.8828125, "learning_rate": 0.00023298000805044948, "loss": 2.8847, "step": 1448 }, { "epoch": 0.0388847144697295, "grad_norm": 0.8671875, "learning_rate": 0.00023314101704011804, "loss": 2.9182, "step": 1449 }, { "epoch": 0.03891155002146844, "grad_norm": 0.89453125, "learning_rate": 0.00023330202602978663, "loss": 3.0153, "step": 1450 }, { "epoch": 0.03893838557320738, "grad_norm": 0.8828125, "learning_rate": 0.00023346303501945524, "loss": 2.8827, "step": 1451 }, { "epoch": 0.03896522112494633, "grad_norm": 0.87890625, "learning_rate": 0.00023362404400912383, "loss": 2.9677, "step": 1452 }, { "epoch": 0.038992056676685274, "grad_norm": 0.88671875, "learning_rate": 0.0002337850529987924, "loss": 2.9125, "step": 1453 }, { "epoch": 0.039018892228424217, "grad_norm": 0.94921875, "learning_rate": 0.000233946061988461, "loss": 2.9882, "step": 1454 }, { "epoch": 0.03904572778016316, "grad_norm": 0.90625, "learning_rate": 0.0002341070709781296, "loss": 2.8818, "step": 1455 }, { "epoch": 0.0390725633319021, "grad_norm": 0.89453125, "learning_rate": 0.00023426807996779819, "loss": 2.8842, "step": 1456 }, { "epoch": 0.03909939888364105, "grad_norm": 0.8828125, "learning_rate": 0.00023442908895746675, "loss": 2.8386, "step": 1457 }, { "epoch": 0.03912623443537999, "grad_norm": 0.890625, "learning_rate": 0.00023459009794713536, "loss": 2.874, "step": 1458 }, { "epoch": 0.039153069987118934, "grad_norm": 0.890625, "learning_rate": 0.00023475110693680395, "loss": 2.9348, "step": 1459 }, { "epoch": 0.039179905538857876, "grad_norm": 0.8828125, "learning_rate": 0.00023491211592647257, "loss": 2.9259, "step": 1460 }, { "epoch": 0.039206741090596825, "grad_norm": 0.87890625, "learning_rate": 0.00023507312491614113, "loss": 3.0793, "step": 1461 }, { "epoch": 0.03923357664233577, "grad_norm": 0.8828125, "learning_rate": 0.00023523413390580972, "loss": 2.9528, "step": 1462 }, { "epoch": 0.03926041219407471, "grad_norm": 0.86328125, "learning_rate": 0.0002353951428954783, "loss": 2.9578, "step": 1463 }, { "epoch": 0.03928724774581365, "grad_norm": 0.87890625, "learning_rate": 0.00023555615188514692, "loss": 2.9546, "step": 1464 }, { "epoch": 0.0393140832975526, "grad_norm": 0.8203125, "learning_rate": 0.00023571716087481548, "loss": 2.7855, "step": 1465 }, { "epoch": 0.03934091884929154, "grad_norm": 0.859375, "learning_rate": 0.00023587816986448407, "loss": 2.9714, "step": 1466 }, { "epoch": 0.039367754401030484, "grad_norm": 0.8828125, "learning_rate": 0.0002360391788541527, "loss": 2.9185, "step": 1467 }, { "epoch": 0.03939458995276943, "grad_norm": 0.8984375, "learning_rate": 0.00023620018784382128, "loss": 2.8718, "step": 1468 }, { "epoch": 0.039421425504508376, "grad_norm": 0.87890625, "learning_rate": 0.00023636119683348984, "loss": 2.9838, "step": 1469 }, { "epoch": 0.03944826105624732, "grad_norm": 0.8671875, "learning_rate": 0.00023652220582315843, "loss": 2.9133, "step": 1470 }, { "epoch": 0.03947509660798626, "grad_norm": 0.8984375, "learning_rate": 0.00023668321481282704, "loss": 3.0375, "step": 1471 }, { "epoch": 0.0395019321597252, "grad_norm": 0.8671875, "learning_rate": 0.0002368442238024956, "loss": 2.822, "step": 1472 }, { "epoch": 0.03952876771146415, "grad_norm": 0.875, "learning_rate": 0.0002370052327921642, "loss": 3.0057, "step": 1473 }, { "epoch": 0.03955560326320309, "grad_norm": 0.9140625, "learning_rate": 0.0002371662417818328, "loss": 2.9955, "step": 1474 }, { "epoch": 0.039582438814942035, "grad_norm": 0.87109375, "learning_rate": 0.0002373272507715014, "loss": 2.8429, "step": 1475 }, { "epoch": 0.03960927436668098, "grad_norm": 0.89453125, "learning_rate": 0.00023748825976116996, "loss": 3.0102, "step": 1476 }, { "epoch": 0.039636109918419926, "grad_norm": 0.8359375, "learning_rate": 0.00023764926875083857, "loss": 2.8396, "step": 1477 }, { "epoch": 0.03966294547015887, "grad_norm": 0.85546875, "learning_rate": 0.00023781027774050716, "loss": 2.8401, "step": 1478 }, { "epoch": 0.03968978102189781, "grad_norm": 0.8984375, "learning_rate": 0.00023797128673017575, "loss": 2.9391, "step": 1479 }, { "epoch": 0.03971661657363675, "grad_norm": 0.88671875, "learning_rate": 0.0002381322957198443, "loss": 2.8184, "step": 1480 }, { "epoch": 0.039743452125375694, "grad_norm": 0.859375, "learning_rate": 0.00023829330470951293, "loss": 2.8761, "step": 1481 }, { "epoch": 0.039770287677114644, "grad_norm": 0.84765625, "learning_rate": 0.00023845431369918152, "loss": 2.7676, "step": 1482 }, { "epoch": 0.039797123228853586, "grad_norm": 0.8515625, "learning_rate": 0.0002386153226888501, "loss": 2.9497, "step": 1483 }, { "epoch": 0.03982395878059253, "grad_norm": 0.8828125, "learning_rate": 0.0002387763316785187, "loss": 2.8766, "step": 1484 }, { "epoch": 0.03985079433233147, "grad_norm": 0.84375, "learning_rate": 0.00023893734066818728, "loss": 2.8721, "step": 1485 }, { "epoch": 0.03987762988407042, "grad_norm": 0.83203125, "learning_rate": 0.00023909834965785587, "loss": 2.9345, "step": 1486 }, { "epoch": 0.03990446543580936, "grad_norm": 0.86328125, "learning_rate": 0.0002392593586475245, "loss": 2.9289, "step": 1487 }, { "epoch": 0.0399313009875483, "grad_norm": 0.86328125, "learning_rate": 0.00023942036763719305, "loss": 2.847, "step": 1488 }, { "epoch": 0.039958136539287245, "grad_norm": 0.87109375, "learning_rate": 0.00023958137662686164, "loss": 2.804, "step": 1489 }, { "epoch": 0.039984972091026194, "grad_norm": 0.859375, "learning_rate": 0.00023974238561653025, "loss": 2.8791, "step": 1490 }, { "epoch": 0.040011807642765136, "grad_norm": 0.8671875, "learning_rate": 0.00023990339460619884, "loss": 2.9341, "step": 1491 }, { "epoch": 0.04003864319450408, "grad_norm": 0.84375, "learning_rate": 0.0002400644035958674, "loss": 2.8463, "step": 1492 }, { "epoch": 0.04006547874624302, "grad_norm": 0.89453125, "learning_rate": 0.000240225412585536, "loss": 2.9911, "step": 1493 }, { "epoch": 0.04009231429798197, "grad_norm": 0.84375, "learning_rate": 0.0002403864215752046, "loss": 2.8185, "step": 1494 }, { "epoch": 0.04011914984972091, "grad_norm": 0.88671875, "learning_rate": 0.0002405474305648732, "loss": 2.8568, "step": 1495 }, { "epoch": 0.040145985401459854, "grad_norm": 0.875, "learning_rate": 0.00024070843955454176, "loss": 3.0509, "step": 1496 }, { "epoch": 0.040172820953198796, "grad_norm": 0.875, "learning_rate": 0.00024086944854421037, "loss": 2.8949, "step": 1497 }, { "epoch": 0.040199656504937745, "grad_norm": 1.1484375, "learning_rate": 0.00024103045753387896, "loss": 2.9426, "step": 1498 }, { "epoch": 0.04022649205667669, "grad_norm": 1.046875, "learning_rate": 0.00024119146652354752, "loss": 2.96, "step": 1499 }, { "epoch": 0.04025332760841563, "grad_norm": 0.90234375, "learning_rate": 0.0002413524755132161, "loss": 2.9635, "step": 1500 }, { "epoch": 0.04028016316015457, "grad_norm": 0.97265625, "learning_rate": 0.00024151348450288473, "loss": 3.012, "step": 1501 }, { "epoch": 0.04030699871189352, "grad_norm": 0.97265625, "learning_rate": 0.00024167449349255332, "loss": 2.9471, "step": 1502 }, { "epoch": 0.04033383426363246, "grad_norm": 0.91796875, "learning_rate": 0.00024183550248222188, "loss": 2.9858, "step": 1503 }, { "epoch": 0.040360669815371404, "grad_norm": 0.90234375, "learning_rate": 0.0002419965114718905, "loss": 3.0064, "step": 1504 }, { "epoch": 0.040387505367110346, "grad_norm": 0.890625, "learning_rate": 0.00024215752046155908, "loss": 2.8701, "step": 1505 }, { "epoch": 0.04041434091884929, "grad_norm": 0.90625, "learning_rate": 0.00024231852945122767, "loss": 2.9757, "step": 1506 }, { "epoch": 0.04044117647058824, "grad_norm": 0.87890625, "learning_rate": 0.00024247953844089626, "loss": 2.9081, "step": 1507 }, { "epoch": 0.04046801202232718, "grad_norm": 0.90234375, "learning_rate": 0.00024264054743056485, "loss": 2.8991, "step": 1508 }, { "epoch": 0.04049484757406612, "grad_norm": 0.8671875, "learning_rate": 0.00024280155642023344, "loss": 2.9488, "step": 1509 }, { "epoch": 0.040521683125805064, "grad_norm": 0.89453125, "learning_rate": 0.00024296256540990205, "loss": 2.963, "step": 1510 }, { "epoch": 0.04054851867754401, "grad_norm": 0.8671875, "learning_rate": 0.00024312357439957061, "loss": 2.9224, "step": 1511 }, { "epoch": 0.040575354229282955, "grad_norm": 0.8828125, "learning_rate": 0.0002432845833892392, "loss": 3.0195, "step": 1512 }, { "epoch": 0.0406021897810219, "grad_norm": 0.890625, "learning_rate": 0.0002434455923789078, "loss": 2.8589, "step": 1513 }, { "epoch": 0.04062902533276084, "grad_norm": 0.81640625, "learning_rate": 0.0002436066013685764, "loss": 2.879, "step": 1514 }, { "epoch": 0.04065586088449979, "grad_norm": 0.87890625, "learning_rate": 0.00024376761035824497, "loss": 2.9513, "step": 1515 }, { "epoch": 0.04068269643623873, "grad_norm": 0.83984375, "learning_rate": 0.00024392861934791356, "loss": 2.9792, "step": 1516 }, { "epoch": 0.04070953198797767, "grad_norm": 0.89453125, "learning_rate": 0.00024408962833758217, "loss": 2.9457, "step": 1517 }, { "epoch": 0.040736367539716614, "grad_norm": 0.8203125, "learning_rate": 0.00024425063732725076, "loss": 2.8994, "step": 1518 }, { "epoch": 0.04076320309145556, "grad_norm": 0.85546875, "learning_rate": 0.00024441164631691935, "loss": 2.9849, "step": 1519 }, { "epoch": 0.040790038643194505, "grad_norm": 0.85546875, "learning_rate": 0.00024457265530658794, "loss": 2.7704, "step": 1520 }, { "epoch": 0.04081687419493345, "grad_norm": 0.8515625, "learning_rate": 0.00024473366429625653, "loss": 2.846, "step": 1521 }, { "epoch": 0.04084370974667239, "grad_norm": 0.88671875, "learning_rate": 0.0002448946732859251, "loss": 2.8884, "step": 1522 }, { "epoch": 0.04087054529841134, "grad_norm": 0.85546875, "learning_rate": 0.0002450556822755937, "loss": 2.8603, "step": 1523 }, { "epoch": 0.04089738085015028, "grad_norm": 0.87890625, "learning_rate": 0.0002452166912652623, "loss": 2.9536, "step": 1524 }, { "epoch": 0.04092421640188922, "grad_norm": 0.8359375, "learning_rate": 0.0002453777002549309, "loss": 2.9654, "step": 1525 }, { "epoch": 0.040951051953628165, "grad_norm": 0.83203125, "learning_rate": 0.00024553870924459947, "loss": 2.7827, "step": 1526 }, { "epoch": 0.040977887505367114, "grad_norm": 0.87890625, "learning_rate": 0.00024569971823426806, "loss": 3.0233, "step": 1527 }, { "epoch": 0.041004723057106056, "grad_norm": 0.8359375, "learning_rate": 0.00024586072722393665, "loss": 2.8386, "step": 1528 }, { "epoch": 0.041031558608845, "grad_norm": 0.83203125, "learning_rate": 0.00024602173621360524, "loss": 2.9908, "step": 1529 }, { "epoch": 0.04105839416058394, "grad_norm": 0.84375, "learning_rate": 0.0002461827452032738, "loss": 2.9373, "step": 1530 }, { "epoch": 0.04108522971232288, "grad_norm": 0.859375, "learning_rate": 0.0002463437541929424, "loss": 2.8407, "step": 1531 }, { "epoch": 0.04111206526406183, "grad_norm": 0.83203125, "learning_rate": 0.000246504763182611, "loss": 2.9132, "step": 1532 }, { "epoch": 0.04113890081580077, "grad_norm": 0.80859375, "learning_rate": 0.0002466657721722796, "loss": 2.7197, "step": 1533 }, { "epoch": 0.041165736367539715, "grad_norm": 0.87890625, "learning_rate": 0.0002468267811619482, "loss": 2.975, "step": 1534 }, { "epoch": 0.04119257191927866, "grad_norm": 0.875, "learning_rate": 0.00024698779015161677, "loss": 2.8912, "step": 1535 }, { "epoch": 0.041219407471017606, "grad_norm": 0.859375, "learning_rate": 0.00024714879914128536, "loss": 2.9144, "step": 1536 }, { "epoch": 0.04124624302275655, "grad_norm": 0.875, "learning_rate": 0.00024730980813095395, "loss": 2.8848, "step": 1537 }, { "epoch": 0.04127307857449549, "grad_norm": 0.8125, "learning_rate": 0.00024747081712062254, "loss": 2.7814, "step": 1538 }, { "epoch": 0.04129991412623443, "grad_norm": 0.859375, "learning_rate": 0.0002476318261102911, "loss": 2.913, "step": 1539 }, { "epoch": 0.04132674967797338, "grad_norm": 0.87890625, "learning_rate": 0.0002477928350999597, "loss": 2.9425, "step": 1540 }, { "epoch": 0.041353585229712324, "grad_norm": 0.8359375, "learning_rate": 0.00024795384408962836, "loss": 2.8842, "step": 1541 }, { "epoch": 0.041380420781451266, "grad_norm": 0.82421875, "learning_rate": 0.0002481148530792969, "loss": 2.8565, "step": 1542 }, { "epoch": 0.04140725633319021, "grad_norm": 0.8203125, "learning_rate": 0.0002482758620689655, "loss": 2.8324, "step": 1543 }, { "epoch": 0.04143409188492916, "grad_norm": 0.8203125, "learning_rate": 0.00024843687105863407, "loss": 2.771, "step": 1544 }, { "epoch": 0.0414609274366681, "grad_norm": 0.84765625, "learning_rate": 0.0002485978800483027, "loss": 2.9505, "step": 1545 }, { "epoch": 0.04148776298840704, "grad_norm": 0.84375, "learning_rate": 0.00024875888903797124, "loss": 2.7727, "step": 1546 }, { "epoch": 0.04151459854014598, "grad_norm": 0.81640625, "learning_rate": 0.00024891989802763983, "loss": 2.8291, "step": 1547 }, { "epoch": 0.04154143409188493, "grad_norm": 0.83984375, "learning_rate": 0.0002490809070173085, "loss": 2.8738, "step": 1548 }, { "epoch": 0.041568269643623874, "grad_norm": 0.85546875, "learning_rate": 0.00024924191600697706, "loss": 2.9224, "step": 1549 }, { "epoch": 0.041595105195362816, "grad_norm": 0.8203125, "learning_rate": 0.0002494029249966456, "loss": 2.7853, "step": 1550 }, { "epoch": 0.04162194074710176, "grad_norm": 0.84765625, "learning_rate": 0.0002495639339863142, "loss": 2.9317, "step": 1551 }, { "epoch": 0.04164877629884071, "grad_norm": 0.84375, "learning_rate": 0.00024972494297598283, "loss": 2.9539, "step": 1552 }, { "epoch": 0.04167561185057965, "grad_norm": 0.84765625, "learning_rate": 0.0002498859519656514, "loss": 2.9717, "step": 1553 }, { "epoch": 0.04170244740231859, "grad_norm": 0.82421875, "learning_rate": 0.00025004696095531995, "loss": 2.8604, "step": 1554 }, { "epoch": 0.041729282954057534, "grad_norm": 0.83203125, "learning_rate": 0.0002502079699449886, "loss": 2.8896, "step": 1555 }, { "epoch": 0.041756118505796476, "grad_norm": 0.83203125, "learning_rate": 0.0002503689789346572, "loss": 2.8676, "step": 1556 }, { "epoch": 0.041782954057535425, "grad_norm": 0.8359375, "learning_rate": 0.0002505299879243257, "loss": 2.9274, "step": 1557 }, { "epoch": 0.04180978960927437, "grad_norm": 0.84765625, "learning_rate": 0.0002506909969139943, "loss": 2.7659, "step": 1558 }, { "epoch": 0.04183662516101331, "grad_norm": 0.84375, "learning_rate": 0.00025085200590366295, "loss": 2.7905, "step": 1559 }, { "epoch": 0.04186346071275225, "grad_norm": 0.8515625, "learning_rate": 0.00025101301489333154, "loss": 2.91, "step": 1560 }, { "epoch": 0.0418902962644912, "grad_norm": 0.84375, "learning_rate": 0.0002511740238830001, "loss": 2.8241, "step": 1561 }, { "epoch": 0.04191713181623014, "grad_norm": 0.8125, "learning_rate": 0.0002513350328726687, "loss": 2.8048, "step": 1562 }, { "epoch": 0.041943967367969084, "grad_norm": 0.85546875, "learning_rate": 0.0002514960418623373, "loss": 2.905, "step": 1563 }, { "epoch": 0.041970802919708027, "grad_norm": 0.83984375, "learning_rate": 0.0002516570508520059, "loss": 2.9055, "step": 1564 }, { "epoch": 0.041997638471446976, "grad_norm": 0.8203125, "learning_rate": 0.0002518180598416745, "loss": 2.7998, "step": 1565 }, { "epoch": 0.04202447402318592, "grad_norm": 0.87109375, "learning_rate": 0.00025197906883134307, "loss": 2.9608, "step": 1566 }, { "epoch": 0.04205130957492486, "grad_norm": 0.84375, "learning_rate": 0.00025214007782101166, "loss": 2.8332, "step": 1567 }, { "epoch": 0.0420781451266638, "grad_norm": 0.8203125, "learning_rate": 0.00025230108681068025, "loss": 2.8725, "step": 1568 }, { "epoch": 0.04210498067840275, "grad_norm": 0.828125, "learning_rate": 0.00025246209580034884, "loss": 2.9009, "step": 1569 }, { "epoch": 0.04213181623014169, "grad_norm": 0.859375, "learning_rate": 0.0002526231047900174, "loss": 2.8897, "step": 1570 }, { "epoch": 0.042158651781880635, "grad_norm": 0.8359375, "learning_rate": 0.000252784113779686, "loss": 2.7992, "step": 1571 }, { "epoch": 0.04218548733361958, "grad_norm": 0.85546875, "learning_rate": 0.0002529451227693546, "loss": 2.8337, "step": 1572 }, { "epoch": 0.042212322885358526, "grad_norm": 0.8359375, "learning_rate": 0.0002531061317590232, "loss": 2.8367, "step": 1573 }, { "epoch": 0.04223915843709747, "grad_norm": 0.859375, "learning_rate": 0.0002532671407486918, "loss": 2.8805, "step": 1574 }, { "epoch": 0.04226599398883641, "grad_norm": 0.8125, "learning_rate": 0.00025342814973836037, "loss": 2.741, "step": 1575 }, { "epoch": 0.04229282954057535, "grad_norm": 0.80859375, "learning_rate": 0.00025358915872802896, "loss": 2.7214, "step": 1576 }, { "epoch": 0.0423196650923143, "grad_norm": 0.859375, "learning_rate": 0.00025375016771769755, "loss": 2.9345, "step": 1577 }, { "epoch": 0.042346500644053243, "grad_norm": 0.85546875, "learning_rate": 0.00025391117670736614, "loss": 2.9204, "step": 1578 }, { "epoch": 0.042373336195792186, "grad_norm": 0.83984375, "learning_rate": 0.0002540721856970347, "loss": 2.767, "step": 1579 }, { "epoch": 0.04240017174753113, "grad_norm": 0.859375, "learning_rate": 0.0002542331946867033, "loss": 2.8656, "step": 1580 }, { "epoch": 0.04242700729927007, "grad_norm": 0.82421875, "learning_rate": 0.0002543942036763719, "loss": 2.7953, "step": 1581 }, { "epoch": 0.04245384285100902, "grad_norm": 0.8359375, "learning_rate": 0.0002545552126660405, "loss": 2.8572, "step": 1582 }, { "epoch": 0.04248067840274796, "grad_norm": 0.828125, "learning_rate": 0.0002547162216557091, "loss": 2.931, "step": 1583 }, { "epoch": 0.0425075139544869, "grad_norm": 0.875, "learning_rate": 0.00025487723064537767, "loss": 2.9523, "step": 1584 }, { "epoch": 0.042534349506225845, "grad_norm": 0.80859375, "learning_rate": 0.00025503823963504626, "loss": 2.7862, "step": 1585 }, { "epoch": 0.042561185057964794, "grad_norm": 0.8359375, "learning_rate": 0.00025519924862471484, "loss": 2.705, "step": 1586 }, { "epoch": 0.042588020609703736, "grad_norm": 0.82421875, "learning_rate": 0.00025536025761438343, "loss": 2.7432, "step": 1587 }, { "epoch": 0.04261485616144268, "grad_norm": 0.8515625, "learning_rate": 0.000255521266604052, "loss": 2.8809, "step": 1588 }, { "epoch": 0.04264169171318162, "grad_norm": 0.8359375, "learning_rate": 0.0002556822755937206, "loss": 2.8662, "step": 1589 }, { "epoch": 0.04266852726492057, "grad_norm": 0.8359375, "learning_rate": 0.0002558432845833892, "loss": 2.889, "step": 1590 }, { "epoch": 0.04269536281665951, "grad_norm": 0.8359375, "learning_rate": 0.00025600429357305784, "loss": 2.8805, "step": 1591 }, { "epoch": 0.042722198368398454, "grad_norm": 0.84375, "learning_rate": 0.0002561653025627264, "loss": 2.7923, "step": 1592 }, { "epoch": 0.042749033920137396, "grad_norm": 0.8125, "learning_rate": 0.00025632631155239497, "loss": 2.7405, "step": 1593 }, { "epoch": 0.042775869471876345, "grad_norm": 0.81640625, "learning_rate": 0.00025648732054206355, "loss": 2.7591, "step": 1594 }, { "epoch": 0.04280270502361529, "grad_norm": 0.8515625, "learning_rate": 0.0002566483295317322, "loss": 2.8005, "step": 1595 }, { "epoch": 0.04282954057535423, "grad_norm": 0.8359375, "learning_rate": 0.00025680933852140073, "loss": 2.8139, "step": 1596 }, { "epoch": 0.04285637612709317, "grad_norm": 0.8359375, "learning_rate": 0.0002569703475110693, "loss": 2.8619, "step": 1597 }, { "epoch": 0.04288321167883212, "grad_norm": 0.82421875, "learning_rate": 0.00025713135650073796, "loss": 2.8523, "step": 1598 }, { "epoch": 0.04291004723057106, "grad_norm": 0.8359375, "learning_rate": 0.00025729236549040655, "loss": 2.8417, "step": 1599 }, { "epoch": 0.042936882782310004, "grad_norm": 0.83984375, "learning_rate": 0.0002574533744800751, "loss": 2.792, "step": 1600 }, { "epoch": 0.042963718334048946, "grad_norm": 0.828125, "learning_rate": 0.00025761438346974373, "loss": 2.9233, "step": 1601 }, { "epoch": 0.042990553885787895, "grad_norm": 0.8515625, "learning_rate": 0.0002577753924594123, "loss": 2.8726, "step": 1602 }, { "epoch": 0.04301738943752684, "grad_norm": 0.828125, "learning_rate": 0.0002579364014490809, "loss": 2.7326, "step": 1603 }, { "epoch": 0.04304422498926578, "grad_norm": 0.8203125, "learning_rate": 0.00025809741043874944, "loss": 2.8478, "step": 1604 }, { "epoch": 0.04307106054100472, "grad_norm": 0.84375, "learning_rate": 0.0002582584194284181, "loss": 2.9068, "step": 1605 }, { "epoch": 0.043097896092743664, "grad_norm": 0.83203125, "learning_rate": 0.00025841942841808667, "loss": 2.7116, "step": 1606 }, { "epoch": 0.04312473164448261, "grad_norm": 0.8125, "learning_rate": 0.00025858043740775526, "loss": 2.8128, "step": 1607 }, { "epoch": 0.043151567196221555, "grad_norm": 0.796875, "learning_rate": 0.00025874144639742385, "loss": 2.7672, "step": 1608 }, { "epoch": 0.0431784027479605, "grad_norm": 0.81640625, "learning_rate": 0.00025890245538709244, "loss": 2.757, "step": 1609 }, { "epoch": 0.04320523829969944, "grad_norm": 0.80078125, "learning_rate": 0.000259063464376761, "loss": 2.7267, "step": 1610 }, { "epoch": 0.04323207385143839, "grad_norm": 0.84375, "learning_rate": 0.0002592244733664296, "loss": 2.9007, "step": 1611 }, { "epoch": 0.04325890940317733, "grad_norm": 0.8359375, "learning_rate": 0.0002593854823560982, "loss": 2.8139, "step": 1612 }, { "epoch": 0.04328574495491627, "grad_norm": 0.8203125, "learning_rate": 0.0002595464913457668, "loss": 2.7726, "step": 1613 }, { "epoch": 0.043312580506655214, "grad_norm": 0.8359375, "learning_rate": 0.0002597075003354354, "loss": 2.8094, "step": 1614 }, { "epoch": 0.04333941605839416, "grad_norm": 0.81640625, "learning_rate": 0.00025986850932510397, "loss": 2.7366, "step": 1615 }, { "epoch": 0.043366251610133105, "grad_norm": 0.828125, "learning_rate": 0.00026002951831477256, "loss": 2.8556, "step": 1616 }, { "epoch": 0.04339308716187205, "grad_norm": 0.8125, "learning_rate": 0.00026019052730444115, "loss": 2.744, "step": 1617 }, { "epoch": 0.04341992271361099, "grad_norm": 0.80859375, "learning_rate": 0.00026035153629410974, "loss": 2.6729, "step": 1618 }, { "epoch": 0.04344675826534994, "grad_norm": 0.83984375, "learning_rate": 0.0002605125452837783, "loss": 2.7628, "step": 1619 }, { "epoch": 0.04347359381708888, "grad_norm": 0.83203125, "learning_rate": 0.0002606735542734469, "loss": 2.748, "step": 1620 }, { "epoch": 0.04350042936882782, "grad_norm": 0.828125, "learning_rate": 0.0002608345632631155, "loss": 2.7177, "step": 1621 }, { "epoch": 0.043527264920566765, "grad_norm": 0.81640625, "learning_rate": 0.0002609955722527841, "loss": 2.7603, "step": 1622 }, { "epoch": 0.043554100472305714, "grad_norm": 0.8125, "learning_rate": 0.0002611565812424527, "loss": 2.7532, "step": 1623 }, { "epoch": 0.043580936024044656, "grad_norm": 0.8125, "learning_rate": 0.00026131759023212127, "loss": 2.7968, "step": 1624 }, { "epoch": 0.0436077715757836, "grad_norm": 0.82421875, "learning_rate": 0.00026147859922178986, "loss": 2.7584, "step": 1625 }, { "epoch": 0.04363460712752254, "grad_norm": 0.80078125, "learning_rate": 0.00026163960821145844, "loss": 2.723, "step": 1626 }, { "epoch": 0.04366144267926149, "grad_norm": 0.8359375, "learning_rate": 0.00026180061720112703, "loss": 2.8978, "step": 1627 }, { "epoch": 0.04368827823100043, "grad_norm": 0.8125, "learning_rate": 0.0002619616261907956, "loss": 2.6957, "step": 1628 }, { "epoch": 0.04371511378273937, "grad_norm": 0.82421875, "learning_rate": 0.0002621226351804642, "loss": 2.7052, "step": 1629 }, { "epoch": 0.043741949334478315, "grad_norm": 0.84765625, "learning_rate": 0.0002622836441701328, "loss": 2.7502, "step": 1630 }, { "epoch": 0.04376878488621726, "grad_norm": 0.8046875, "learning_rate": 0.0002624446531598014, "loss": 2.6625, "step": 1631 }, { "epoch": 0.043795620437956206, "grad_norm": 0.80859375, "learning_rate": 0.00026260566214947, "loss": 2.7123, "step": 1632 }, { "epoch": 0.04382245598969515, "grad_norm": 0.82421875, "learning_rate": 0.00026276667113913857, "loss": 2.7743, "step": 1633 }, { "epoch": 0.04384929154143409, "grad_norm": 0.83984375, "learning_rate": 0.0002629276801288072, "loss": 2.7127, "step": 1634 }, { "epoch": 0.04387612709317303, "grad_norm": 0.83203125, "learning_rate": 0.00026308868911847574, "loss": 2.7747, "step": 1635 }, { "epoch": 0.04390296264491198, "grad_norm": 0.81640625, "learning_rate": 0.00026324969810814433, "loss": 2.6675, "step": 1636 }, { "epoch": 0.043929798196650924, "grad_norm": 0.82421875, "learning_rate": 0.0002634107070978129, "loss": 2.7424, "step": 1637 }, { "epoch": 0.043956633748389866, "grad_norm": 0.8359375, "learning_rate": 0.00026357171608748156, "loss": 2.8201, "step": 1638 }, { "epoch": 0.04398346930012881, "grad_norm": 0.8203125, "learning_rate": 0.0002637327250771501, "loss": 2.7125, "step": 1639 }, { "epoch": 0.04401030485186776, "grad_norm": 0.8125, "learning_rate": 0.0002638937340668187, "loss": 2.6944, "step": 1640 }, { "epoch": 0.0440371404036067, "grad_norm": 0.8359375, "learning_rate": 0.00026405474305648733, "loss": 2.7159, "step": 1641 }, { "epoch": 0.04406397595534564, "grad_norm": 0.8359375, "learning_rate": 0.00026421575204615586, "loss": 2.7675, "step": 1642 }, { "epoch": 0.04409081150708458, "grad_norm": 0.8046875, "learning_rate": 0.00026437676103582445, "loss": 2.7736, "step": 1643 }, { "epoch": 0.04411764705882353, "grad_norm": 0.83203125, "learning_rate": 0.0002645377700254931, "loss": 2.7837, "step": 1644 }, { "epoch": 0.044144482610562474, "grad_norm": 0.8203125, "learning_rate": 0.0002646987790151617, "loss": 2.8088, "step": 1645 }, { "epoch": 0.044171318162301416, "grad_norm": 0.81640625, "learning_rate": 0.0002648597880048302, "loss": 2.7622, "step": 1646 }, { "epoch": 0.04419815371404036, "grad_norm": 0.8671875, "learning_rate": 0.0002650207969944988, "loss": 2.8021, "step": 1647 }, { "epoch": 0.04422498926577931, "grad_norm": 0.80859375, "learning_rate": 0.00026518180598416745, "loss": 2.7316, "step": 1648 }, { "epoch": 0.04425182481751825, "grad_norm": 0.8359375, "learning_rate": 0.00026534281497383604, "loss": 2.773, "step": 1649 }, { "epoch": 0.04427866036925719, "grad_norm": 0.83203125, "learning_rate": 0.00026550382396350457, "loss": 2.7733, "step": 1650 }, { "epoch": 0.044305495920996134, "grad_norm": 0.8359375, "learning_rate": 0.0002656648329531732, "loss": 2.8114, "step": 1651 }, { "epoch": 0.04433233147273508, "grad_norm": 0.83984375, "learning_rate": 0.0002658258419428418, "loss": 2.7177, "step": 1652 }, { "epoch": 0.044359167024474025, "grad_norm": 0.83984375, "learning_rate": 0.0002659868509325104, "loss": 2.795, "step": 1653 }, { "epoch": 0.04438600257621297, "grad_norm": 1.1484375, "learning_rate": 0.0002661478599221789, "loss": 3.0009, "step": 1654 }, { "epoch": 0.04441283812795191, "grad_norm": 0.87109375, "learning_rate": 0.00026630886891184757, "loss": 2.7952, "step": 1655 }, { "epoch": 0.04443967367969085, "grad_norm": 0.8515625, "learning_rate": 0.00026646987790151616, "loss": 2.8503, "step": 1656 }, { "epoch": 0.0444665092314298, "grad_norm": 0.921875, "learning_rate": 0.00026663088689118475, "loss": 2.8316, "step": 1657 }, { "epoch": 0.04449334478316874, "grad_norm": 0.8984375, "learning_rate": 0.00026679189588085334, "loss": 2.9571, "step": 1658 }, { "epoch": 0.044520180334907684, "grad_norm": 0.85546875, "learning_rate": 0.0002669529048705219, "loss": 2.8877, "step": 1659 }, { "epoch": 0.044547015886646626, "grad_norm": 0.84375, "learning_rate": 0.0002671139138601905, "loss": 2.822, "step": 1660 }, { "epoch": 0.044573851438385576, "grad_norm": 0.8671875, "learning_rate": 0.0002672749228498591, "loss": 2.8455, "step": 1661 }, { "epoch": 0.04460068699012452, "grad_norm": 0.83984375, "learning_rate": 0.0002674359318395277, "loss": 2.7705, "step": 1662 }, { "epoch": 0.04462752254186346, "grad_norm": 0.859375, "learning_rate": 0.0002675969408291963, "loss": 2.9065, "step": 1663 }, { "epoch": 0.0446543580936024, "grad_norm": 0.81640625, "learning_rate": 0.00026775794981886487, "loss": 2.7841, "step": 1664 }, { "epoch": 0.04468119364534135, "grad_norm": 0.82421875, "learning_rate": 0.00026791895880853346, "loss": 2.8697, "step": 1665 }, { "epoch": 0.04470802919708029, "grad_norm": 0.8359375, "learning_rate": 0.00026807996779820204, "loss": 2.8659, "step": 1666 }, { "epoch": 0.044734864748819235, "grad_norm": 0.8515625, "learning_rate": 0.00026824097678787063, "loss": 2.8592, "step": 1667 }, { "epoch": 0.04476170030055818, "grad_norm": 0.80859375, "learning_rate": 0.0002684019857775392, "loss": 2.812, "step": 1668 }, { "epoch": 0.044788535852297126, "grad_norm": 0.78515625, "learning_rate": 0.0002685629947672078, "loss": 2.7447, "step": 1669 }, { "epoch": 0.04481537140403607, "grad_norm": 0.8125, "learning_rate": 0.0002687240037568764, "loss": 2.7882, "step": 1670 }, { "epoch": 0.04484220695577501, "grad_norm": 0.84375, "learning_rate": 0.000268885012746545, "loss": 2.8704, "step": 1671 }, { "epoch": 0.04486904250751395, "grad_norm": 0.8515625, "learning_rate": 0.0002690460217362136, "loss": 2.8823, "step": 1672 }, { "epoch": 0.0448958780592529, "grad_norm": 0.7890625, "learning_rate": 0.00026920703072588217, "loss": 2.7105, "step": 1673 }, { "epoch": 0.04492271361099184, "grad_norm": 0.82421875, "learning_rate": 0.00026936803971555075, "loss": 2.7793, "step": 1674 }, { "epoch": 0.044949549162730786, "grad_norm": 0.84765625, "learning_rate": 0.00026952904870521934, "loss": 2.9568, "step": 1675 }, { "epoch": 0.04497638471446973, "grad_norm": 0.859375, "learning_rate": 0.00026969005769488793, "loss": 2.8898, "step": 1676 }, { "epoch": 0.04500322026620868, "grad_norm": 0.82421875, "learning_rate": 0.0002698510666845565, "loss": 2.8166, "step": 1677 }, { "epoch": 0.04503005581794762, "grad_norm": 0.8203125, "learning_rate": 0.0002700120756742251, "loss": 2.7037, "step": 1678 }, { "epoch": 0.04505689136968656, "grad_norm": 0.8203125, "learning_rate": 0.0002701730846638937, "loss": 2.7706, "step": 1679 }, { "epoch": 0.0450837269214255, "grad_norm": 0.80078125, "learning_rate": 0.0002703340936535623, "loss": 2.7751, "step": 1680 }, { "epoch": 0.045110562473164445, "grad_norm": 0.8046875, "learning_rate": 0.0002704951026432309, "loss": 2.7818, "step": 1681 }, { "epoch": 0.045137398024903394, "grad_norm": 0.82421875, "learning_rate": 0.00027065611163289946, "loss": 2.8888, "step": 1682 }, { "epoch": 0.045164233576642336, "grad_norm": 0.80859375, "learning_rate": 0.00027081712062256805, "loss": 2.7256, "step": 1683 }, { "epoch": 0.04519106912838128, "grad_norm": 0.796875, "learning_rate": 0.0002709781296122367, "loss": 2.8889, "step": 1684 }, { "epoch": 0.04521790468012022, "grad_norm": 0.80078125, "learning_rate": 0.00027113913860190523, "loss": 2.8078, "step": 1685 }, { "epoch": 0.04524474023185917, "grad_norm": 0.77734375, "learning_rate": 0.0002713001475915738, "loss": 2.6761, "step": 1686 }, { "epoch": 0.04527157578359811, "grad_norm": 0.7890625, "learning_rate": 0.00027146115658124246, "loss": 2.8305, "step": 1687 }, { "epoch": 0.045298411335337053, "grad_norm": 0.79296875, "learning_rate": 0.00027162216557091105, "loss": 2.7802, "step": 1688 }, { "epoch": 0.045325246887075996, "grad_norm": 0.8203125, "learning_rate": 0.0002717831745605796, "loss": 2.8236, "step": 1689 }, { "epoch": 0.045352082438814945, "grad_norm": 0.8046875, "learning_rate": 0.00027194418355024817, "loss": 2.6709, "step": 1690 }, { "epoch": 0.04537891799055389, "grad_norm": 0.8203125, "learning_rate": 0.0002721051925399168, "loss": 2.9413, "step": 1691 }, { "epoch": 0.04540575354229283, "grad_norm": 0.8046875, "learning_rate": 0.0002722662015295854, "loss": 2.6753, "step": 1692 }, { "epoch": 0.04543258909403177, "grad_norm": 0.8125, "learning_rate": 0.00027242721051925394, "loss": 2.7471, "step": 1693 }, { "epoch": 0.04545942464577072, "grad_norm": 0.80078125, "learning_rate": 0.0002725882195089226, "loss": 2.7122, "step": 1694 }, { "epoch": 0.04548626019750966, "grad_norm": 0.8125, "learning_rate": 0.00027274922849859117, "loss": 2.7635, "step": 1695 }, { "epoch": 0.045513095749248604, "grad_norm": 0.796875, "learning_rate": 0.00027291023748825976, "loss": 2.6768, "step": 1696 }, { "epoch": 0.045539931300987546, "grad_norm": 0.81640625, "learning_rate": 0.0002730712464779283, "loss": 2.8393, "step": 1697 }, { "epoch": 0.045566766852726495, "grad_norm": 0.77734375, "learning_rate": 0.00027323225546759694, "loss": 2.6844, "step": 1698 }, { "epoch": 0.04559360240446544, "grad_norm": 0.796875, "learning_rate": 0.0002733932644572655, "loss": 2.7408, "step": 1699 }, { "epoch": 0.04562043795620438, "grad_norm": 0.79296875, "learning_rate": 0.00027355427344693406, "loss": 2.7425, "step": 1700 }, { "epoch": 0.04564727350794332, "grad_norm": 0.78515625, "learning_rate": 0.0002737152824366027, "loss": 2.7052, "step": 1701 }, { "epoch": 0.04567410905968227, "grad_norm": 0.7890625, "learning_rate": 0.0002738762914262713, "loss": 2.7462, "step": 1702 }, { "epoch": 0.04570094461142121, "grad_norm": 0.796875, "learning_rate": 0.0002740373004159399, "loss": 2.7462, "step": 1703 }, { "epoch": 0.045727780163160155, "grad_norm": 0.796875, "learning_rate": 0.00027419830940560847, "loss": 2.7119, "step": 1704 }, { "epoch": 0.0457546157148991, "grad_norm": 0.828125, "learning_rate": 0.00027435931839527706, "loss": 2.8244, "step": 1705 }, { "epoch": 0.04578145126663804, "grad_norm": 0.80078125, "learning_rate": 0.00027452032738494564, "loss": 2.7504, "step": 1706 }, { "epoch": 0.04580828681837699, "grad_norm": 0.80859375, "learning_rate": 0.00027468133637461423, "loss": 2.7937, "step": 1707 }, { "epoch": 0.04583512237011593, "grad_norm": 0.81640625, "learning_rate": 0.0002748423453642828, "loss": 2.9305, "step": 1708 }, { "epoch": 0.04586195792185487, "grad_norm": 0.79296875, "learning_rate": 0.0002750033543539514, "loss": 2.7309, "step": 1709 }, { "epoch": 0.045888793473593814, "grad_norm": 0.8046875, "learning_rate": 0.00027516436334362, "loss": 2.7752, "step": 1710 }, { "epoch": 0.04591562902533276, "grad_norm": 0.796875, "learning_rate": 0.0002753253723332886, "loss": 2.7777, "step": 1711 }, { "epoch": 0.045942464577071705, "grad_norm": 0.80078125, "learning_rate": 0.0002754863813229572, "loss": 2.7092, "step": 1712 }, { "epoch": 0.04596930012881065, "grad_norm": 0.7890625, "learning_rate": 0.00027564739031262577, "loss": 2.674, "step": 1713 }, { "epoch": 0.04599613568054959, "grad_norm": 0.82421875, "learning_rate": 0.00027580839930229435, "loss": 2.7669, "step": 1714 }, { "epoch": 0.04602297123228854, "grad_norm": 0.78125, "learning_rate": 0.00027596940829196294, "loss": 2.7157, "step": 1715 }, { "epoch": 0.04604980678402748, "grad_norm": 0.7890625, "learning_rate": 0.00027613041728163153, "loss": 2.7485, "step": 1716 }, { "epoch": 0.04607664233576642, "grad_norm": 0.80859375, "learning_rate": 0.0002762914262713001, "loss": 2.7884, "step": 1717 }, { "epoch": 0.046103477887505365, "grad_norm": 0.8046875, "learning_rate": 0.0002764524352609687, "loss": 2.6622, "step": 1718 }, { "epoch": 0.046130313439244314, "grad_norm": 0.7890625, "learning_rate": 0.0002766134442506373, "loss": 2.7758, "step": 1719 }, { "epoch": 0.046157148990983256, "grad_norm": 0.80859375, "learning_rate": 0.0002767744532403059, "loss": 2.6664, "step": 1720 }, { "epoch": 0.0461839845427222, "grad_norm": 0.8125, "learning_rate": 0.0002769354622299745, "loss": 2.7685, "step": 1721 }, { "epoch": 0.04621082009446114, "grad_norm": 0.8046875, "learning_rate": 0.00027709647121964306, "loss": 2.7715, "step": 1722 }, { "epoch": 0.04623765564620009, "grad_norm": 0.80859375, "learning_rate": 0.00027725748020931165, "loss": 2.9193, "step": 1723 }, { "epoch": 0.04626449119793903, "grad_norm": 0.78125, "learning_rate": 0.00027741848919898024, "loss": 2.7718, "step": 1724 }, { "epoch": 0.04629132674967797, "grad_norm": 0.83203125, "learning_rate": 0.00027757949818864883, "loss": 2.8218, "step": 1725 }, { "epoch": 0.046318162301416915, "grad_norm": 0.78515625, "learning_rate": 0.0002777405071783174, "loss": 2.5668, "step": 1726 }, { "epoch": 0.046344997853155864, "grad_norm": 0.796875, "learning_rate": 0.00027790151616798606, "loss": 2.7434, "step": 1727 }, { "epoch": 0.046371833404894806, "grad_norm": 0.83203125, "learning_rate": 0.0002780625251576546, "loss": 2.812, "step": 1728 }, { "epoch": 0.04639866895663375, "grad_norm": 0.81640625, "learning_rate": 0.0002782235341473232, "loss": 2.7126, "step": 1729 }, { "epoch": 0.04642550450837269, "grad_norm": 0.796875, "learning_rate": 0.0002783845431369918, "loss": 2.7482, "step": 1730 }, { "epoch": 0.04645234006011163, "grad_norm": 0.80078125, "learning_rate": 0.00027854555212666036, "loss": 2.8238, "step": 1731 }, { "epoch": 0.04647917561185058, "grad_norm": 0.78515625, "learning_rate": 0.00027870656111632895, "loss": 2.728, "step": 1732 }, { "epoch": 0.046506011163589524, "grad_norm": 0.80859375, "learning_rate": 0.00027886757010599754, "loss": 2.7129, "step": 1733 }, { "epoch": 0.046532846715328466, "grad_norm": 0.79296875, "learning_rate": 0.0002790285790956662, "loss": 2.8053, "step": 1734 }, { "epoch": 0.04655968226706741, "grad_norm": 0.80859375, "learning_rate": 0.0002791895880853347, "loss": 2.7256, "step": 1735 }, { "epoch": 0.04658651781880636, "grad_norm": 0.7890625, "learning_rate": 0.0002793505970750033, "loss": 2.7224, "step": 1736 }, { "epoch": 0.0466133533705453, "grad_norm": 0.8046875, "learning_rate": 0.00027951160606467195, "loss": 2.7245, "step": 1737 }, { "epoch": 0.04664018892228424, "grad_norm": 0.828125, "learning_rate": 0.00027967261505434054, "loss": 2.8445, "step": 1738 }, { "epoch": 0.04666702447402318, "grad_norm": 0.80078125, "learning_rate": 0.00027983362404400907, "loss": 2.688, "step": 1739 }, { "epoch": 0.04669386002576213, "grad_norm": 0.796875, "learning_rate": 0.00027999463303367766, "loss": 2.7506, "step": 1740 }, { "epoch": 0.046720695577501074, "grad_norm": 0.7890625, "learning_rate": 0.0002801556420233463, "loss": 2.626, "step": 1741 }, { "epoch": 0.046747531129240016, "grad_norm": 0.80078125, "learning_rate": 0.0002803166510130149, "loss": 2.6589, "step": 1742 }, { "epoch": 0.04677436668097896, "grad_norm": 0.77734375, "learning_rate": 0.0002804776600026834, "loss": 2.6998, "step": 1743 }, { "epoch": 0.04680120223271791, "grad_norm": 0.8125, "learning_rate": 0.00028063866899235207, "loss": 2.7769, "step": 1744 }, { "epoch": 0.04682803778445685, "grad_norm": 0.82421875, "learning_rate": 0.00028079967798202066, "loss": 2.8107, "step": 1745 }, { "epoch": 0.04685487333619579, "grad_norm": 0.80078125, "learning_rate": 0.00028096068697168924, "loss": 2.7845, "step": 1746 }, { "epoch": 0.046881708887934734, "grad_norm": 0.8125, "learning_rate": 0.00028112169596135783, "loss": 2.6414, "step": 1747 }, { "epoch": 0.04690854443967368, "grad_norm": 0.8046875, "learning_rate": 0.0002812827049510264, "loss": 2.6287, "step": 1748 }, { "epoch": 0.046935379991412625, "grad_norm": 0.80078125, "learning_rate": 0.000281443713940695, "loss": 2.7508, "step": 1749 }, { "epoch": 0.04696221554315157, "grad_norm": 0.83203125, "learning_rate": 0.0002816047229303636, "loss": 2.7736, "step": 1750 }, { "epoch": 0.04698905109489051, "grad_norm": 0.80859375, "learning_rate": 0.0002817657319200322, "loss": 2.732, "step": 1751 }, { "epoch": 0.04701588664662946, "grad_norm": 0.78515625, "learning_rate": 0.0002819267409097008, "loss": 2.5665, "step": 1752 }, { "epoch": 0.0470427221983684, "grad_norm": 0.7578125, "learning_rate": 0.00028208774989936937, "loss": 2.6569, "step": 1753 }, { "epoch": 0.04706955775010734, "grad_norm": 0.796875, "learning_rate": 0.00028224875888903795, "loss": 2.7756, "step": 1754 }, { "epoch": 0.047096393301846284, "grad_norm": 0.81640625, "learning_rate": 0.00028240976787870654, "loss": 2.6592, "step": 1755 }, { "epoch": 0.047123228853585226, "grad_norm": 0.78515625, "learning_rate": 0.00028257077686837513, "loss": 2.7105, "step": 1756 }, { "epoch": 0.047150064405324175, "grad_norm": 0.80859375, "learning_rate": 0.0002827317858580437, "loss": 2.7084, "step": 1757 }, { "epoch": 0.04717689995706312, "grad_norm": 0.79296875, "learning_rate": 0.0002828927948477123, "loss": 2.66, "step": 1758 }, { "epoch": 0.04720373550880206, "grad_norm": 0.83203125, "learning_rate": 0.0002830538038373809, "loss": 2.7446, "step": 1759 }, { "epoch": 0.047230571060541, "grad_norm": 0.828125, "learning_rate": 0.0002832148128270495, "loss": 2.7413, "step": 1760 }, { "epoch": 0.04725740661227995, "grad_norm": 0.82421875, "learning_rate": 0.0002833758218167181, "loss": 2.7451, "step": 1761 }, { "epoch": 0.04728424216401889, "grad_norm": 0.7734375, "learning_rate": 0.00028353683080638666, "loss": 2.6443, "step": 1762 }, { "epoch": 0.047311077715757835, "grad_norm": 0.77734375, "learning_rate": 0.00028369783979605525, "loss": 2.7488, "step": 1763 }, { "epoch": 0.04733791326749678, "grad_norm": 0.77734375, "learning_rate": 0.00028385884878572384, "loss": 2.6894, "step": 1764 }, { "epoch": 0.047364748819235726, "grad_norm": 0.80078125, "learning_rate": 0.00028401985777539243, "loss": 2.6595, "step": 1765 }, { "epoch": 0.04739158437097467, "grad_norm": 0.76953125, "learning_rate": 0.000284180866765061, "loss": 2.7148, "step": 1766 }, { "epoch": 0.04741841992271361, "grad_norm": 0.76953125, "learning_rate": 0.0002843418757547296, "loss": 2.6249, "step": 1767 }, { "epoch": 0.04744525547445255, "grad_norm": 0.83203125, "learning_rate": 0.0002845028847443982, "loss": 2.8061, "step": 1768 }, { "epoch": 0.0474720910261915, "grad_norm": 0.7890625, "learning_rate": 0.0002846638937340668, "loss": 2.7255, "step": 1769 }, { "epoch": 0.04749892657793044, "grad_norm": 0.81640625, "learning_rate": 0.00028482490272373537, "loss": 2.7558, "step": 1770 }, { "epoch": 0.047525762129669386, "grad_norm": 0.80859375, "learning_rate": 0.00028498591171340396, "loss": 2.6417, "step": 1771 }, { "epoch": 0.04755259768140833, "grad_norm": 0.77734375, "learning_rate": 0.00028514692070307255, "loss": 2.6428, "step": 1772 }, { "epoch": 0.04757943323314728, "grad_norm": 0.7890625, "learning_rate": 0.0002853079296927412, "loss": 2.6798, "step": 1773 }, { "epoch": 0.04760626878488622, "grad_norm": 0.76953125, "learning_rate": 0.0002854689386824097, "loss": 2.6662, "step": 1774 }, { "epoch": 0.04763310433662516, "grad_norm": 0.78125, "learning_rate": 0.0002856299476720783, "loss": 2.7113, "step": 1775 }, { "epoch": 0.0476599398883641, "grad_norm": 0.78515625, "learning_rate": 0.0002857909566617469, "loss": 2.6003, "step": 1776 }, { "epoch": 0.04768677544010305, "grad_norm": 0.79296875, "learning_rate": 0.00028595196565141555, "loss": 2.7548, "step": 1777 }, { "epoch": 0.047713610991841994, "grad_norm": 0.80078125, "learning_rate": 0.0002861129746410841, "loss": 2.6895, "step": 1778 }, { "epoch": 0.047740446543580936, "grad_norm": 0.82421875, "learning_rate": 0.00028627398363075267, "loss": 2.7796, "step": 1779 }, { "epoch": 0.04776728209531988, "grad_norm": 0.7890625, "learning_rate": 0.0002864349926204213, "loss": 2.6731, "step": 1780 }, { "epoch": 0.04779411764705882, "grad_norm": 0.796875, "learning_rate": 0.0002865960016100899, "loss": 2.7117, "step": 1781 }, { "epoch": 0.04782095319879777, "grad_norm": 0.765625, "learning_rate": 0.00028675701059975844, "loss": 2.6182, "step": 1782 }, { "epoch": 0.04784778875053671, "grad_norm": 0.76953125, "learning_rate": 0.000286918019589427, "loss": 2.7254, "step": 1783 }, { "epoch": 0.04787462430227565, "grad_norm": 0.78125, "learning_rate": 0.00028707902857909567, "loss": 2.6994, "step": 1784 }, { "epoch": 0.047901459854014596, "grad_norm": 0.79296875, "learning_rate": 0.00028724003756876426, "loss": 2.7859, "step": 1785 }, { "epoch": 0.047928295405753545, "grad_norm": 0.8125, "learning_rate": 0.0002874010465584328, "loss": 2.7528, "step": 1786 }, { "epoch": 0.04795513095749249, "grad_norm": 0.7890625, "learning_rate": 0.00028756205554810143, "loss": 2.665, "step": 1787 }, { "epoch": 0.04798196650923143, "grad_norm": 0.81640625, "learning_rate": 0.00028772306453777, "loss": 2.7065, "step": 1788 }, { "epoch": 0.04800880206097037, "grad_norm": 0.7890625, "learning_rate": 0.00028788407352743856, "loss": 2.8113, "step": 1789 }, { "epoch": 0.04803563761270932, "grad_norm": 0.8046875, "learning_rate": 0.0002880450825171072, "loss": 2.7008, "step": 1790 }, { "epoch": 0.04806247316444826, "grad_norm": 0.80078125, "learning_rate": 0.0002882060915067758, "loss": 2.8022, "step": 1791 }, { "epoch": 0.048089308716187204, "grad_norm": 0.78515625, "learning_rate": 0.0002883671004964444, "loss": 2.6126, "step": 1792 }, { "epoch": 0.048116144267926146, "grad_norm": 0.79296875, "learning_rate": 0.0002885281094861129, "loss": 2.6755, "step": 1793 }, { "epoch": 0.048142979819665095, "grad_norm": 0.7734375, "learning_rate": 0.00028868911847578155, "loss": 2.606, "step": 1794 }, { "epoch": 0.04816981537140404, "grad_norm": 0.7890625, "learning_rate": 0.00028885012746545014, "loss": 2.718, "step": 1795 }, { "epoch": 0.04819665092314298, "grad_norm": 0.81640625, "learning_rate": 0.00028901113645511873, "loss": 2.6636, "step": 1796 }, { "epoch": 0.04822348647488192, "grad_norm": 0.80859375, "learning_rate": 0.0002891721454447873, "loss": 2.6607, "step": 1797 }, { "epoch": 0.04825032202662087, "grad_norm": 0.80859375, "learning_rate": 0.0002893331544344559, "loss": 2.6939, "step": 1798 }, { "epoch": 0.04827715757835981, "grad_norm": 0.79296875, "learning_rate": 0.0002894941634241245, "loss": 2.5555, "step": 1799 }, { "epoch": 0.048303993130098755, "grad_norm": 0.80078125, "learning_rate": 0.0002896551724137931, "loss": 2.6875, "step": 1800 }, { "epoch": 0.0483308286818377, "grad_norm": 0.796875, "learning_rate": 0.0002898161814034617, "loss": 2.7783, "step": 1801 }, { "epoch": 0.048357664233576646, "grad_norm": 0.7890625, "learning_rate": 0.00028997719039313026, "loss": 2.7044, "step": 1802 }, { "epoch": 0.04838449978531559, "grad_norm": 0.82421875, "learning_rate": 0.00029013819938279885, "loss": 2.7031, "step": 1803 }, { "epoch": 0.04841133533705453, "grad_norm": 0.8046875, "learning_rate": 0.00029029920837246744, "loss": 2.6871, "step": 1804 }, { "epoch": 0.04843817088879347, "grad_norm": 0.7890625, "learning_rate": 0.00029046021736213603, "loss": 2.6469, "step": 1805 }, { "epoch": 0.048465006440532414, "grad_norm": 0.8046875, "learning_rate": 0.0002906212263518046, "loss": 2.6689, "step": 1806 }, { "epoch": 0.04849184199227136, "grad_norm": 0.7890625, "learning_rate": 0.0002907822353414732, "loss": 2.6359, "step": 1807 }, { "epoch": 0.048518677544010305, "grad_norm": 0.78125, "learning_rate": 0.0002909432443311418, "loss": 2.618, "step": 1808 }, { "epoch": 0.04854551309574925, "grad_norm": 0.8046875, "learning_rate": 0.0002911042533208104, "loss": 2.7628, "step": 1809 }, { "epoch": 0.04857234864748819, "grad_norm": 0.76953125, "learning_rate": 0.00029126526231047897, "loss": 2.6516, "step": 1810 }, { "epoch": 0.04859918419922714, "grad_norm": 0.78515625, "learning_rate": 0.00029142627130014756, "loss": 2.5757, "step": 1811 }, { "epoch": 0.04862601975096608, "grad_norm": 0.76171875, "learning_rate": 0.00029158728028981615, "loss": 2.633, "step": 1812 }, { "epoch": 0.04865285530270502, "grad_norm": 0.8359375, "learning_rate": 0.00029174828927948474, "loss": 2.6378, "step": 1813 }, { "epoch": 0.048679690854443965, "grad_norm": 0.9296875, "learning_rate": 0.0002919092982691533, "loss": 2.7088, "step": 1814 }, { "epoch": 0.048706526406182914, "grad_norm": 0.82421875, "learning_rate": 0.0002920703072588219, "loss": 2.7359, "step": 1815 }, { "epoch": 0.048733361957921856, "grad_norm": 0.84375, "learning_rate": 0.0002922313162484905, "loss": 2.7962, "step": 1816 }, { "epoch": 0.0487601975096608, "grad_norm": 0.828125, "learning_rate": 0.0002923923252381591, "loss": 2.7257, "step": 1817 }, { "epoch": 0.04878703306139974, "grad_norm": 0.8671875, "learning_rate": 0.0002925533342278277, "loss": 2.8456, "step": 1818 }, { "epoch": 0.04881386861313869, "grad_norm": 0.81640625, "learning_rate": 0.00029271434321749627, "loss": 2.7069, "step": 1819 }, { "epoch": 0.04884070416487763, "grad_norm": 0.82421875, "learning_rate": 0.00029287535220716486, "loss": 2.6791, "step": 1820 }, { "epoch": 0.04886753971661657, "grad_norm": 0.80859375, "learning_rate": 0.00029303636119683345, "loss": 2.7579, "step": 1821 }, { "epoch": 0.048894375268355515, "grad_norm": 0.78125, "learning_rate": 0.00029319737018650204, "loss": 2.5808, "step": 1822 }, { "epoch": 0.048921210820094464, "grad_norm": 0.7890625, "learning_rate": 0.0002933583791761707, "loss": 2.6581, "step": 1823 }, { "epoch": 0.048948046371833406, "grad_norm": 0.76953125, "learning_rate": 0.0002935193881658392, "loss": 2.6671, "step": 1824 }, { "epoch": 0.04897488192357235, "grad_norm": 0.77734375, "learning_rate": 0.0002936803971555078, "loss": 2.7139, "step": 1825 }, { "epoch": 0.04900171747531129, "grad_norm": 0.79296875, "learning_rate": 0.0002938414061451764, "loss": 2.8145, "step": 1826 }, { "epoch": 0.04902855302705024, "grad_norm": 0.80859375, "learning_rate": 0.00029400241513484503, "loss": 2.7731, "step": 1827 }, { "epoch": 0.04905538857878918, "grad_norm": 0.765625, "learning_rate": 0.00029416342412451357, "loss": 2.6558, "step": 1828 }, { "epoch": 0.049082224130528124, "grad_norm": 0.78125, "learning_rate": 0.00029432443311418216, "loss": 2.7318, "step": 1829 }, { "epoch": 0.049109059682267066, "grad_norm": 0.796875, "learning_rate": 0.0002944854421038508, "loss": 2.6576, "step": 1830 }, { "epoch": 0.04913589523400601, "grad_norm": 0.8046875, "learning_rate": 0.0002946464510935194, "loss": 2.7598, "step": 1831 }, { "epoch": 0.04916273078574496, "grad_norm": 0.7890625, "learning_rate": 0.0002948074600831879, "loss": 2.7584, "step": 1832 }, { "epoch": 0.0491895663374839, "grad_norm": 0.7890625, "learning_rate": 0.00029496846907285657, "loss": 2.7653, "step": 1833 }, { "epoch": 0.04921640188922284, "grad_norm": 0.76171875, "learning_rate": 0.00029512947806252515, "loss": 2.6159, "step": 1834 }, { "epoch": 0.04924323744096178, "grad_norm": 0.76171875, "learning_rate": 0.00029529048705219374, "loss": 2.645, "step": 1835 }, { "epoch": 0.04927007299270073, "grad_norm": 0.78125, "learning_rate": 0.0002954514960418623, "loss": 2.7059, "step": 1836 }, { "epoch": 0.049296908544439674, "grad_norm": 0.78515625, "learning_rate": 0.0002956125050315309, "loss": 2.711, "step": 1837 }, { "epoch": 0.049323744096178616, "grad_norm": 0.76171875, "learning_rate": 0.0002957735140211995, "loss": 2.5697, "step": 1838 }, { "epoch": 0.04935057964791756, "grad_norm": 0.79296875, "learning_rate": 0.0002959345230108681, "loss": 2.73, "step": 1839 }, { "epoch": 0.04937741519965651, "grad_norm": 0.7734375, "learning_rate": 0.0002960955320005367, "loss": 2.6544, "step": 1840 }, { "epoch": 0.04940425075139545, "grad_norm": 0.77734375, "learning_rate": 0.0002962565409902053, "loss": 2.7151, "step": 1841 }, { "epoch": 0.04943108630313439, "grad_norm": 0.78515625, "learning_rate": 0.00029641754997987386, "loss": 2.6956, "step": 1842 }, { "epoch": 0.049457921854873334, "grad_norm": 0.76171875, "learning_rate": 0.0002965785589695424, "loss": 2.6631, "step": 1843 }, { "epoch": 0.04948475740661228, "grad_norm": 0.76953125, "learning_rate": 0.00029673956795921104, "loss": 2.51, "step": 1844 }, { "epoch": 0.049511592958351225, "grad_norm": 0.76171875, "learning_rate": 0.00029690057694887963, "loss": 2.6566, "step": 1845 }, { "epoch": 0.04953842851009017, "grad_norm": 0.7734375, "learning_rate": 0.0002970615859385482, "loss": 2.6371, "step": 1846 }, { "epoch": 0.04956526406182911, "grad_norm": 0.7734375, "learning_rate": 0.0002972225949282168, "loss": 2.6144, "step": 1847 }, { "epoch": 0.04959209961356806, "grad_norm": 0.78515625, "learning_rate": 0.0002973836039178854, "loss": 2.7074, "step": 1848 }, { "epoch": 0.049618935165307, "grad_norm": 0.75, "learning_rate": 0.000297544612907554, "loss": 2.5936, "step": 1849 }, { "epoch": 0.04964577071704594, "grad_norm": 0.7734375, "learning_rate": 0.00029770562189722257, "loss": 2.6453, "step": 1850 }, { "epoch": 0.049672606268784884, "grad_norm": 0.76953125, "learning_rate": 0.00029786663088689116, "loss": 2.6662, "step": 1851 }, { "epoch": 0.04969944182052383, "grad_norm": 0.78515625, "learning_rate": 0.00029802763987655975, "loss": 2.6702, "step": 1852 }, { "epoch": 0.049726277372262775, "grad_norm": 0.75390625, "learning_rate": 0.00029818864886622834, "loss": 2.6058, "step": 1853 }, { "epoch": 0.04975311292400172, "grad_norm": 0.7734375, "learning_rate": 0.0002983496578558969, "loss": 2.6713, "step": 1854 }, { "epoch": 0.04977994847574066, "grad_norm": 0.7578125, "learning_rate": 0.0002985106668455655, "loss": 2.669, "step": 1855 }, { "epoch": 0.0498067840274796, "grad_norm": 0.7578125, "learning_rate": 0.0002986716758352341, "loss": 2.6637, "step": 1856 }, { "epoch": 0.04983361957921855, "grad_norm": 0.7734375, "learning_rate": 0.0002988326848249027, "loss": 2.6224, "step": 1857 }, { "epoch": 0.04986045513095749, "grad_norm": 0.75390625, "learning_rate": 0.0002989936938145713, "loss": 2.6527, "step": 1858 }, { "epoch": 0.049887290682696435, "grad_norm": 0.78515625, "learning_rate": 0.00029915470280423987, "loss": 2.7686, "step": 1859 }, { "epoch": 0.04991412623443538, "grad_norm": 0.75390625, "learning_rate": 0.00029931571179390846, "loss": 2.6808, "step": 1860 }, { "epoch": 0.049940961786174326, "grad_norm": 0.7890625, "learning_rate": 0.00029947672078357705, "loss": 2.7117, "step": 1861 }, { "epoch": 0.04996779733791327, "grad_norm": 0.765625, "learning_rate": 0.00029963772977324564, "loss": 2.5946, "step": 1862 }, { "epoch": 0.04999463288965221, "grad_norm": 0.7734375, "learning_rate": 0.0002997987387629142, "loss": 2.6952, "step": 1863 }, { "epoch": 0.05002146844139115, "grad_norm": 0.7578125, "learning_rate": 0.0002999597477525828, "loss": 2.5601, "step": 1864 }, { "epoch": 0.0500483039931301, "grad_norm": 0.7578125, "learning_rate": 0.0003001207567422514, "loss": 2.6048, "step": 1865 }, { "epoch": 0.05007513954486904, "grad_norm": 0.7734375, "learning_rate": 0.00030028176573192004, "loss": 2.7445, "step": 1866 }, { "epoch": 0.050101975096607985, "grad_norm": 0.7890625, "learning_rate": 0.00030044277472158863, "loss": 2.7306, "step": 1867 }, { "epoch": 0.05012881064834693, "grad_norm": 0.75390625, "learning_rate": 0.0003006037837112572, "loss": 2.5768, "step": 1868 }, { "epoch": 0.05015564620008588, "grad_norm": 0.77734375, "learning_rate": 0.00030076479270092576, "loss": 2.6199, "step": 1869 }, { "epoch": 0.05018248175182482, "grad_norm": 0.76171875, "learning_rate": 0.00030092580169059434, "loss": 2.662, "step": 1870 }, { "epoch": 0.05020931730356376, "grad_norm": 0.78515625, "learning_rate": 0.00030108681068026293, "loss": 2.6471, "step": 1871 }, { "epoch": 0.0502361528553027, "grad_norm": 0.76171875, "learning_rate": 0.0003012478196699315, "loss": 2.7031, "step": 1872 }, { "epoch": 0.05026298840704165, "grad_norm": 0.75390625, "learning_rate": 0.00030140882865960017, "loss": 2.5558, "step": 1873 }, { "epoch": 0.050289823958780594, "grad_norm": 0.75390625, "learning_rate": 0.00030156983764926875, "loss": 2.5877, "step": 1874 }, { "epoch": 0.050316659510519536, "grad_norm": 0.765625, "learning_rate": 0.00030173084663893734, "loss": 2.6018, "step": 1875 }, { "epoch": 0.05034349506225848, "grad_norm": 0.78125, "learning_rate": 0.0003018918556286059, "loss": 2.7474, "step": 1876 }, { "epoch": 0.05037033061399743, "grad_norm": 0.76171875, "learning_rate": 0.00030205286461827447, "loss": 2.7287, "step": 1877 }, { "epoch": 0.05039716616573637, "grad_norm": 0.7578125, "learning_rate": 0.00030221387360794305, "loss": 2.627, "step": 1878 }, { "epoch": 0.05042400171747531, "grad_norm": 0.75390625, "learning_rate": 0.00030237488259761164, "loss": 2.5963, "step": 1879 }, { "epoch": 0.05045083726921425, "grad_norm": 0.78125, "learning_rate": 0.0003025358915872803, "loss": 2.7031, "step": 1880 }, { "epoch": 0.050477672820953196, "grad_norm": 0.7890625, "learning_rate": 0.0003026969005769489, "loss": 2.6682, "step": 1881 }, { "epoch": 0.050504508372692145, "grad_norm": 0.765625, "learning_rate": 0.00030285790956661746, "loss": 2.5829, "step": 1882 }, { "epoch": 0.05053134392443109, "grad_norm": 0.78125, "learning_rate": 0.00030301891855628605, "loss": 2.7747, "step": 1883 }, { "epoch": 0.05055817947617003, "grad_norm": 0.7734375, "learning_rate": 0.0003031799275459546, "loss": 2.6266, "step": 1884 }, { "epoch": 0.05058501502790897, "grad_norm": 0.75, "learning_rate": 0.0003033409365356232, "loss": 2.5975, "step": 1885 }, { "epoch": 0.05061185057964792, "grad_norm": 0.79296875, "learning_rate": 0.00030350194552529176, "loss": 2.663, "step": 1886 }, { "epoch": 0.05063868613138686, "grad_norm": 0.77734375, "learning_rate": 0.0003036629545149604, "loss": 2.5805, "step": 1887 }, { "epoch": 0.050665521683125804, "grad_norm": 0.7890625, "learning_rate": 0.000303823963504629, "loss": 2.7054, "step": 1888 }, { "epoch": 0.050692357234864746, "grad_norm": 0.78125, "learning_rate": 0.0003039849724942976, "loss": 2.6711, "step": 1889 }, { "epoch": 0.050719192786603695, "grad_norm": 0.76171875, "learning_rate": 0.00030414598148396617, "loss": 2.5925, "step": 1890 }, { "epoch": 0.05074602833834264, "grad_norm": 0.75390625, "learning_rate": 0.00030430699047363476, "loss": 2.6376, "step": 1891 }, { "epoch": 0.05077286389008158, "grad_norm": 0.765625, "learning_rate": 0.0003044679994633033, "loss": 2.6712, "step": 1892 }, { "epoch": 0.05079969944182052, "grad_norm": 0.75, "learning_rate": 0.00030462900845297194, "loss": 2.4826, "step": 1893 }, { "epoch": 0.05082653499355947, "grad_norm": 0.796875, "learning_rate": 0.0003047900174426405, "loss": 2.6671, "step": 1894 }, { "epoch": 0.05085337054529841, "grad_norm": 0.7734375, "learning_rate": 0.0003049510264323091, "loss": 2.5909, "step": 1895 }, { "epoch": 0.050880206097037355, "grad_norm": 0.7421875, "learning_rate": 0.0003051120354219777, "loss": 2.6233, "step": 1896 }, { "epoch": 0.0509070416487763, "grad_norm": 0.7734375, "learning_rate": 0.0003052730444116463, "loss": 2.664, "step": 1897 }, { "epoch": 0.050933877200515246, "grad_norm": 0.78125, "learning_rate": 0.0003054340534013149, "loss": 2.6587, "step": 1898 }, { "epoch": 0.05096071275225419, "grad_norm": 0.76953125, "learning_rate": 0.0003055950623909835, "loss": 2.6427, "step": 1899 }, { "epoch": 0.05098754830399313, "grad_norm": 0.7734375, "learning_rate": 0.00030575607138065206, "loss": 2.5914, "step": 1900 }, { "epoch": 0.05101438385573207, "grad_norm": 0.78515625, "learning_rate": 0.00030591708037032065, "loss": 2.6501, "step": 1901 }, { "epoch": 0.05104121940747102, "grad_norm": 0.78125, "learning_rate": 0.00030607808935998924, "loss": 2.5698, "step": 1902 }, { "epoch": 0.05106805495920996, "grad_norm": 0.78515625, "learning_rate": 0.0003062390983496578, "loss": 2.6415, "step": 1903 }, { "epoch": 0.051094890510948905, "grad_norm": 0.78515625, "learning_rate": 0.0003064001073393264, "loss": 2.6684, "step": 1904 }, { "epoch": 0.05112172606268785, "grad_norm": 0.74609375, "learning_rate": 0.000306561116328995, "loss": 2.4899, "step": 1905 }, { "epoch": 0.05114856161442679, "grad_norm": 0.79296875, "learning_rate": 0.00030672212531866364, "loss": 2.6315, "step": 1906 }, { "epoch": 0.05117539716616574, "grad_norm": 0.80859375, "learning_rate": 0.0003068831343083322, "loss": 2.7958, "step": 1907 }, { "epoch": 0.05120223271790468, "grad_norm": 0.75, "learning_rate": 0.00030704414329800077, "loss": 2.629, "step": 1908 }, { "epoch": 0.05122906826964362, "grad_norm": 0.80078125, "learning_rate": 0.00030720515228766936, "loss": 2.5741, "step": 1909 }, { "epoch": 0.051255903821382565, "grad_norm": 0.7890625, "learning_rate": 0.00030736616127733794, "loss": 2.6328, "step": 1910 }, { "epoch": 0.051282739373121514, "grad_norm": 0.78515625, "learning_rate": 0.00030752717026700653, "loss": 2.6861, "step": 1911 }, { "epoch": 0.051309574924860456, "grad_norm": 0.78125, "learning_rate": 0.0003076881792566751, "loss": 2.6668, "step": 1912 }, { "epoch": 0.0513364104765994, "grad_norm": 0.7734375, "learning_rate": 0.00030784918824634377, "loss": 2.6163, "step": 1913 }, { "epoch": 0.05136324602833834, "grad_norm": 0.7734375, "learning_rate": 0.00030801019723601235, "loss": 2.7277, "step": 1914 }, { "epoch": 0.05139008158007729, "grad_norm": 0.79296875, "learning_rate": 0.0003081712062256809, "loss": 2.6293, "step": 1915 }, { "epoch": 0.05141691713181623, "grad_norm": 0.7734375, "learning_rate": 0.0003083322152153495, "loss": 2.6796, "step": 1916 }, { "epoch": 0.05144375268355517, "grad_norm": 0.7578125, "learning_rate": 0.00030849322420501807, "loss": 2.6127, "step": 1917 }, { "epoch": 0.051470588235294115, "grad_norm": 0.78125, "learning_rate": 0.00030865423319468665, "loss": 2.6861, "step": 1918 }, { "epoch": 0.051497423787033064, "grad_norm": 0.7734375, "learning_rate": 0.0003088152421843553, "loss": 2.6105, "step": 1919 }, { "epoch": 0.051524259338772006, "grad_norm": 0.75390625, "learning_rate": 0.0003089762511740239, "loss": 2.5653, "step": 1920 }, { "epoch": 0.05155109489051095, "grad_norm": 0.75390625, "learning_rate": 0.0003091372601636925, "loss": 2.6068, "step": 1921 }, { "epoch": 0.05157793044224989, "grad_norm": 0.75390625, "learning_rate": 0.00030929826915336106, "loss": 2.5793, "step": 1922 }, { "epoch": 0.05160476599398884, "grad_norm": 0.765625, "learning_rate": 0.0003094592781430296, "loss": 2.5819, "step": 1923 }, { "epoch": 0.05163160154572778, "grad_norm": 0.7734375, "learning_rate": 0.0003096202871326982, "loss": 2.6622, "step": 1924 }, { "epoch": 0.051658437097466724, "grad_norm": 0.76171875, "learning_rate": 0.0003097812961223668, "loss": 2.6131, "step": 1925 }, { "epoch": 0.051685272649205666, "grad_norm": 0.76953125, "learning_rate": 0.0003099423051120354, "loss": 2.4712, "step": 1926 }, { "epoch": 0.051712108200944615, "grad_norm": 0.765625, "learning_rate": 0.000310103314101704, "loss": 2.5728, "step": 1927 }, { "epoch": 0.05173894375268356, "grad_norm": 0.7734375, "learning_rate": 0.0003102643230913726, "loss": 2.6627, "step": 1928 }, { "epoch": 0.0517657793044225, "grad_norm": 0.78125, "learning_rate": 0.0003104253320810412, "loss": 2.6597, "step": 1929 }, { "epoch": 0.05179261485616144, "grad_norm": 0.75390625, "learning_rate": 0.0003105863410707097, "loss": 2.5685, "step": 1930 }, { "epoch": 0.05181945040790038, "grad_norm": 0.7734375, "learning_rate": 0.0003107473500603783, "loss": 2.6623, "step": 1931 }, { "epoch": 0.05184628595963933, "grad_norm": 0.78125, "learning_rate": 0.0003109083590500469, "loss": 2.6374, "step": 1932 }, { "epoch": 0.051873121511378274, "grad_norm": 0.765625, "learning_rate": 0.00031106936803971554, "loss": 2.6155, "step": 1933 }, { "epoch": 0.051899957063117216, "grad_norm": 0.7890625, "learning_rate": 0.0003112303770293841, "loss": 2.6944, "step": 1934 }, { "epoch": 0.05192679261485616, "grad_norm": 0.73828125, "learning_rate": 0.0003113913860190527, "loss": 2.5728, "step": 1935 }, { "epoch": 0.05195362816659511, "grad_norm": 0.765625, "learning_rate": 0.0003115523950087213, "loss": 2.5618, "step": 1936 }, { "epoch": 0.05198046371833405, "grad_norm": 0.74609375, "learning_rate": 0.0003117134039983899, "loss": 2.5707, "step": 1937 }, { "epoch": 0.05200729927007299, "grad_norm": 0.73046875, "learning_rate": 0.0003118744129880584, "loss": 2.4062, "step": 1938 }, { "epoch": 0.052034134821811934, "grad_norm": 0.796875, "learning_rate": 0.000312035421977727, "loss": 2.6973, "step": 1939 }, { "epoch": 0.05206097037355088, "grad_norm": 0.7734375, "learning_rate": 0.00031219643096739566, "loss": 2.5731, "step": 1940 }, { "epoch": 0.052087805925289825, "grad_norm": 0.75390625, "learning_rate": 0.00031235743995706425, "loss": 2.629, "step": 1941 }, { "epoch": 0.05211464147702877, "grad_norm": 0.734375, "learning_rate": 0.00031251844894673284, "loss": 2.6312, "step": 1942 }, { "epoch": 0.05214147702876771, "grad_norm": 0.7578125, "learning_rate": 0.0003126794579364014, "loss": 2.5582, "step": 1943 }, { "epoch": 0.05216831258050666, "grad_norm": 0.74609375, "learning_rate": 0.00031284046692607, "loss": 2.5593, "step": 1944 }, { "epoch": 0.0521951481322456, "grad_norm": 0.75, "learning_rate": 0.00031300147591573866, "loss": 2.6423, "step": 1945 }, { "epoch": 0.05222198368398454, "grad_norm": 0.76171875, "learning_rate": 0.00031316248490540714, "loss": 2.5516, "step": 1946 }, { "epoch": 0.052248819235723484, "grad_norm": 0.75390625, "learning_rate": 0.0003133234938950758, "loss": 2.5479, "step": 1947 }, { "epoch": 0.05227565478746243, "grad_norm": 0.7421875, "learning_rate": 0.00031348450288474437, "loss": 2.5045, "step": 1948 }, { "epoch": 0.052302490339201375, "grad_norm": 0.76171875, "learning_rate": 0.00031364551187441296, "loss": 2.5557, "step": 1949 }, { "epoch": 0.05232932589094032, "grad_norm": 0.7578125, "learning_rate": 0.00031380652086408154, "loss": 2.6192, "step": 1950 }, { "epoch": 0.05235616144267926, "grad_norm": 0.76953125, "learning_rate": 0.00031396752985375013, "loss": 2.6535, "step": 1951 }, { "epoch": 0.05238299699441821, "grad_norm": 0.765625, "learning_rate": 0.0003141285388434188, "loss": 2.5302, "step": 1952 }, { "epoch": 0.05240983254615715, "grad_norm": 0.7578125, "learning_rate": 0.00031428954783308736, "loss": 2.5589, "step": 1953 }, { "epoch": 0.05243666809789609, "grad_norm": 0.7578125, "learning_rate": 0.0003144505568227559, "loss": 2.5087, "step": 1954 }, { "epoch": 0.052463503649635035, "grad_norm": 0.78515625, "learning_rate": 0.0003146115658124245, "loss": 2.6568, "step": 1955 }, { "epoch": 0.05249033920137398, "grad_norm": 0.75, "learning_rate": 0.0003147725748020931, "loss": 2.5641, "step": 1956 }, { "epoch": 0.052517174753112926, "grad_norm": 0.74609375, "learning_rate": 0.00031493358379176167, "loss": 2.5384, "step": 1957 }, { "epoch": 0.05254401030485187, "grad_norm": 0.70703125, "learning_rate": 0.00031509459278143025, "loss": 2.3863, "step": 1958 }, { "epoch": 0.05257084585659081, "grad_norm": 0.75390625, "learning_rate": 0.0003152556017710989, "loss": 2.5652, "step": 1959 }, { "epoch": 0.05259768140832975, "grad_norm": 0.734375, "learning_rate": 0.0003154166107607675, "loss": 2.4931, "step": 1960 }, { "epoch": 0.0526245169600687, "grad_norm": 0.7734375, "learning_rate": 0.000315577619750436, "loss": 2.6572, "step": 1961 }, { "epoch": 0.05265135251180764, "grad_norm": 0.75390625, "learning_rate": 0.0003157386287401046, "loss": 2.6319, "step": 1962 }, { "epoch": 0.052678188063546585, "grad_norm": 0.828125, "learning_rate": 0.0003158996377297732, "loss": 2.7291, "step": 1963 }, { "epoch": 0.05270502361528553, "grad_norm": 0.75, "learning_rate": 0.0003160606467194418, "loss": 2.5863, "step": 1964 }, { "epoch": 0.05273185916702448, "grad_norm": 0.77734375, "learning_rate": 0.0003162216557091104, "loss": 2.6025, "step": 1965 }, { "epoch": 0.05275869471876342, "grad_norm": 0.734375, "learning_rate": 0.000316382664698779, "loss": 2.5337, "step": 1966 }, { "epoch": 0.05278553027050236, "grad_norm": 0.75390625, "learning_rate": 0.0003165436736884476, "loss": 2.7117, "step": 1967 }, { "epoch": 0.0528123658222413, "grad_norm": 0.74609375, "learning_rate": 0.0003167046826781162, "loss": 2.5076, "step": 1968 }, { "epoch": 0.05283920137398025, "grad_norm": 0.76171875, "learning_rate": 0.00031686569166778473, "loss": 2.5249, "step": 1969 }, { "epoch": 0.052866036925719194, "grad_norm": 0.75, "learning_rate": 0.0003170267006574533, "loss": 2.5152, "step": 1970 }, { "epoch": 0.052892872477458136, "grad_norm": 0.76171875, "learning_rate": 0.0003171877096471219, "loss": 2.5851, "step": 1971 }, { "epoch": 0.05291970802919708, "grad_norm": 0.77734375, "learning_rate": 0.0003173487186367905, "loss": 2.6113, "step": 1972 }, { "epoch": 0.05294654358093603, "grad_norm": 0.75390625, "learning_rate": 0.00031750972762645914, "loss": 2.5955, "step": 1973 }, { "epoch": 0.05297337913267497, "grad_norm": 0.74609375, "learning_rate": 0.0003176707366161277, "loss": 2.6195, "step": 1974 }, { "epoch": 0.05300021468441391, "grad_norm": 0.75, "learning_rate": 0.0003178317456057963, "loss": 2.4805, "step": 1975 }, { "epoch": 0.05302705023615285, "grad_norm": 0.76171875, "learning_rate": 0.0003179927545954649, "loss": 2.4733, "step": 1976 }, { "epoch": 0.0530538857878918, "grad_norm": 0.74609375, "learning_rate": 0.00031815376358513344, "loss": 2.4814, "step": 1977 }, { "epoch": 0.053080721339630745, "grad_norm": 1.0, "learning_rate": 0.000318314772574802, "loss": 2.7811, "step": 1978 }, { "epoch": 0.05310755689136969, "grad_norm": 0.83984375, "learning_rate": 0.00031847578156447067, "loss": 2.7792, "step": 1979 }, { "epoch": 0.05313439244310863, "grad_norm": 0.77734375, "learning_rate": 0.00031863679055413926, "loss": 2.5279, "step": 1980 }, { "epoch": 0.05316122799484757, "grad_norm": 0.83203125, "learning_rate": 0.00031879779954380785, "loss": 2.6945, "step": 1981 }, { "epoch": 0.05318806354658652, "grad_norm": 0.8046875, "learning_rate": 0.00031895880853347644, "loss": 2.6664, "step": 1982 }, { "epoch": 0.05321489909832546, "grad_norm": 0.78515625, "learning_rate": 0.000319119817523145, "loss": 2.6772, "step": 1983 }, { "epoch": 0.053241734650064404, "grad_norm": 0.765625, "learning_rate": 0.0003192808265128136, "loss": 2.6847, "step": 1984 }, { "epoch": 0.053268570201803346, "grad_norm": 0.765625, "learning_rate": 0.00031944183550248215, "loss": 2.6648, "step": 1985 }, { "epoch": 0.053295405753542295, "grad_norm": 0.76953125, "learning_rate": 0.0003196028444921508, "loss": 2.6413, "step": 1986 }, { "epoch": 0.05332224130528124, "grad_norm": 0.7578125, "learning_rate": 0.0003197638534818194, "loss": 2.5886, "step": 1987 }, { "epoch": 0.05334907685702018, "grad_norm": 0.734375, "learning_rate": 0.00031992486247148797, "loss": 2.6898, "step": 1988 }, { "epoch": 0.05337591240875912, "grad_norm": 0.7734375, "learning_rate": 0.00032008587146115656, "loss": 2.7429, "step": 1989 }, { "epoch": 0.05340274796049807, "grad_norm": 0.73046875, "learning_rate": 0.00032024688045082514, "loss": 2.536, "step": 1990 }, { "epoch": 0.05342958351223701, "grad_norm": 0.7421875, "learning_rate": 0.00032040788944049373, "loss": 2.4519, "step": 1991 }, { "epoch": 0.053456419063975955, "grad_norm": 0.73046875, "learning_rate": 0.00032056889843016227, "loss": 2.6497, "step": 1992 }, { "epoch": 0.0534832546157149, "grad_norm": 0.76171875, "learning_rate": 0.0003207299074198309, "loss": 2.6151, "step": 1993 }, { "epoch": 0.053510090167453846, "grad_norm": 0.7578125, "learning_rate": 0.0003208909164094995, "loss": 2.711, "step": 1994 }, { "epoch": 0.05353692571919279, "grad_norm": 0.734375, "learning_rate": 0.0003210519253991681, "loss": 2.6297, "step": 1995 }, { "epoch": 0.05356376127093173, "grad_norm": 0.74609375, "learning_rate": 0.0003212129343888367, "loss": 2.6877, "step": 1996 }, { "epoch": 0.05359059682267067, "grad_norm": 0.77734375, "learning_rate": 0.00032137394337850527, "loss": 2.7676, "step": 1997 }, { "epoch": 0.05361743237440962, "grad_norm": 0.7421875, "learning_rate": 0.0003215349523681739, "loss": 2.5739, "step": 1998 }, { "epoch": 0.05364426792614856, "grad_norm": 0.7578125, "learning_rate": 0.0003216959613578425, "loss": 2.5807, "step": 1999 }, { "epoch": 0.053671103477887505, "grad_norm": 0.74609375, "learning_rate": 0.00032185697034751103, "loss": 2.6408, "step": 2000 }, { "epoch": 0.05369793902962645, "grad_norm": 0.75390625, "learning_rate": 0.0003220179793371796, "loss": 2.646, "step": 2001 }, { "epoch": 0.053724774581365396, "grad_norm": 0.73828125, "learning_rate": 0.0003221789883268482, "loss": 2.6492, "step": 2002 }, { "epoch": 0.05375161013310434, "grad_norm": 0.75, "learning_rate": 0.0003223399973165168, "loss": 2.5734, "step": 2003 }, { "epoch": 0.05377844568484328, "grad_norm": 0.74609375, "learning_rate": 0.0003225010063061854, "loss": 2.6243, "step": 2004 }, { "epoch": 0.05380528123658222, "grad_norm": 0.7421875, "learning_rate": 0.00032266201529585403, "loss": 2.5639, "step": 2005 }, { "epoch": 0.053832116788321165, "grad_norm": 0.75, "learning_rate": 0.0003228230242855226, "loss": 2.6506, "step": 2006 }, { "epoch": 0.053858952340060114, "grad_norm": 0.74609375, "learning_rate": 0.0003229840332751912, "loss": 2.6137, "step": 2007 }, { "epoch": 0.053885787891799056, "grad_norm": 0.75, "learning_rate": 0.00032314504226485974, "loss": 2.5968, "step": 2008 }, { "epoch": 0.053912623443538, "grad_norm": 0.71484375, "learning_rate": 0.00032330605125452833, "loss": 2.5323, "step": 2009 }, { "epoch": 0.05393945899527694, "grad_norm": 0.74609375, "learning_rate": 0.0003234670602441969, "loss": 2.6261, "step": 2010 }, { "epoch": 0.05396629454701589, "grad_norm": 0.73828125, "learning_rate": 0.0003236280692338655, "loss": 2.573, "step": 2011 }, { "epoch": 0.05399313009875483, "grad_norm": 0.71875, "learning_rate": 0.00032378907822353415, "loss": 2.5102, "step": 2012 }, { "epoch": 0.05401996565049377, "grad_norm": 0.71875, "learning_rate": 0.00032395008721320274, "loss": 2.526, "step": 2013 }, { "epoch": 0.054046801202232715, "grad_norm": 0.73828125, "learning_rate": 0.0003241110962028713, "loss": 2.6457, "step": 2014 }, { "epoch": 0.054073636753971664, "grad_norm": 0.734375, "learning_rate": 0.0003242721051925399, "loss": 2.4931, "step": 2015 }, { "epoch": 0.054100472305710606, "grad_norm": 0.7421875, "learning_rate": 0.00032443311418220845, "loss": 2.6254, "step": 2016 }, { "epoch": 0.05412730785744955, "grad_norm": 0.74609375, "learning_rate": 0.00032459412317187704, "loss": 2.637, "step": 2017 }, { "epoch": 0.05415414340918849, "grad_norm": 0.7421875, "learning_rate": 0.0003247551321615456, "loss": 2.4951, "step": 2018 }, { "epoch": 0.05418097896092744, "grad_norm": 0.734375, "learning_rate": 0.00032491614115121427, "loss": 2.6004, "step": 2019 }, { "epoch": 0.05420781451266638, "grad_norm": 0.73828125, "learning_rate": 0.00032507715014088286, "loss": 2.589, "step": 2020 }, { "epoch": 0.054234650064405324, "grad_norm": 0.73828125, "learning_rate": 0.00032523815913055145, "loss": 2.5851, "step": 2021 }, { "epoch": 0.054261485616144266, "grad_norm": 0.7265625, "learning_rate": 0.00032539916812022004, "loss": 2.5686, "step": 2022 }, { "epoch": 0.054288321167883215, "grad_norm": 0.71875, "learning_rate": 0.00032556017710988857, "loss": 2.5696, "step": 2023 }, { "epoch": 0.05431515671962216, "grad_norm": 0.72265625, "learning_rate": 0.00032572118609955716, "loss": 2.5028, "step": 2024 }, { "epoch": 0.0543419922713611, "grad_norm": 0.7421875, "learning_rate": 0.00032588219508922575, "loss": 2.5779, "step": 2025 }, { "epoch": 0.05436882782310004, "grad_norm": 0.7421875, "learning_rate": 0.0003260432040788944, "loss": 2.6057, "step": 2026 }, { "epoch": 0.05439566337483899, "grad_norm": 0.75, "learning_rate": 0.000326204213068563, "loss": 2.4584, "step": 2027 }, { "epoch": 0.05442249892657793, "grad_norm": 0.7421875, "learning_rate": 0.00032636522205823157, "loss": 2.5883, "step": 2028 }, { "epoch": 0.054449334478316874, "grad_norm": 0.73828125, "learning_rate": 0.00032652623104790016, "loss": 2.5501, "step": 2029 }, { "epoch": 0.054476170030055816, "grad_norm": 0.7578125, "learning_rate": 0.00032668724003756874, "loss": 2.7045, "step": 2030 }, { "epoch": 0.05450300558179476, "grad_norm": 0.734375, "learning_rate": 0.0003268482490272373, "loss": 2.6894, "step": 2031 }, { "epoch": 0.05452984113353371, "grad_norm": 0.75, "learning_rate": 0.0003270092580169059, "loss": 2.6774, "step": 2032 }, { "epoch": 0.05455667668527265, "grad_norm": 0.71875, "learning_rate": 0.0003271702670065745, "loss": 2.4293, "step": 2033 }, { "epoch": 0.05458351223701159, "grad_norm": 0.734375, "learning_rate": 0.0003273312759962431, "loss": 2.5165, "step": 2034 }, { "epoch": 0.054610347788750534, "grad_norm": 0.75, "learning_rate": 0.0003274922849859117, "loss": 2.7236, "step": 2035 }, { "epoch": 0.05463718334048948, "grad_norm": 0.7109375, "learning_rate": 0.0003276532939755803, "loss": 2.4734, "step": 2036 }, { "epoch": 0.054664018892228425, "grad_norm": 0.7265625, "learning_rate": 0.00032781430296524887, "loss": 2.568, "step": 2037 }, { "epoch": 0.05469085444396737, "grad_norm": 0.73046875, "learning_rate": 0.0003279753119549175, "loss": 2.6153, "step": 2038 }, { "epoch": 0.05471768999570631, "grad_norm": 0.7578125, "learning_rate": 0.00032813632094458604, "loss": 2.7512, "step": 2039 }, { "epoch": 0.05474452554744526, "grad_norm": 0.72265625, "learning_rate": 0.00032829732993425463, "loss": 2.5102, "step": 2040 }, { "epoch": 0.0547713610991842, "grad_norm": 0.75, "learning_rate": 0.0003284583389239232, "loss": 2.6018, "step": 2041 }, { "epoch": 0.05479819665092314, "grad_norm": 0.71875, "learning_rate": 0.0003286193479135918, "loss": 2.47, "step": 2042 }, { "epoch": 0.054825032202662084, "grad_norm": 0.7265625, "learning_rate": 0.0003287803569032604, "loss": 2.4632, "step": 2043 }, { "epoch": 0.05485186775440103, "grad_norm": 0.75, "learning_rate": 0.000328941365892929, "loss": 2.63, "step": 2044 }, { "epoch": 0.054878703306139975, "grad_norm": 0.74609375, "learning_rate": 0.00032910237488259763, "loss": 2.5795, "step": 2045 }, { "epoch": 0.05490553885787892, "grad_norm": 0.75390625, "learning_rate": 0.00032926338387226616, "loss": 2.5243, "step": 2046 }, { "epoch": 0.05493237440961786, "grad_norm": 0.73046875, "learning_rate": 0.00032942439286193475, "loss": 2.5236, "step": 2047 }, { "epoch": 0.05495920996135681, "grad_norm": 0.75, "learning_rate": 0.00032958540185160334, "loss": 2.5889, "step": 2048 }, { "epoch": 0.05498604551309575, "grad_norm": 0.73828125, "learning_rate": 0.00032974641084127193, "loss": 2.5308, "step": 2049 }, { "epoch": 0.05501288106483469, "grad_norm": 0.75390625, "learning_rate": 0.0003299074198309405, "loss": 2.5895, "step": 2050 }, { "epoch": 0.055039716616573635, "grad_norm": 0.71484375, "learning_rate": 0.0003300684288206091, "loss": 2.554, "step": 2051 }, { "epoch": 0.055066552168312584, "grad_norm": 0.70703125, "learning_rate": 0.00033022943781027775, "loss": 2.3684, "step": 2052 }, { "epoch": 0.055093387720051526, "grad_norm": 0.74609375, "learning_rate": 0.00033039044679994634, "loss": 2.5638, "step": 2053 }, { "epoch": 0.05512022327179047, "grad_norm": 0.73046875, "learning_rate": 0.00033055145578961487, "loss": 2.4552, "step": 2054 }, { "epoch": 0.05514705882352941, "grad_norm": 0.73828125, "learning_rate": 0.00033071246477928346, "loss": 2.5358, "step": 2055 }, { "epoch": 0.05517389437526835, "grad_norm": 0.77734375, "learning_rate": 0.00033087347376895205, "loss": 2.6235, "step": 2056 }, { "epoch": 0.0552007299270073, "grad_norm": 0.7421875, "learning_rate": 0.00033103448275862064, "loss": 2.5556, "step": 2057 }, { "epoch": 0.05522756547874624, "grad_norm": 0.734375, "learning_rate": 0.0003311954917482893, "loss": 2.5078, "step": 2058 }, { "epoch": 0.055254401030485185, "grad_norm": 0.75390625, "learning_rate": 0.00033135650073795787, "loss": 2.603, "step": 2059 }, { "epoch": 0.05528123658222413, "grad_norm": 0.7421875, "learning_rate": 0.00033151750972762646, "loss": 2.6152, "step": 2060 }, { "epoch": 0.05530807213396308, "grad_norm": 0.71484375, "learning_rate": 0.00033167851871729505, "loss": 2.5761, "step": 2061 }, { "epoch": 0.05533490768570202, "grad_norm": 0.71875, "learning_rate": 0.0003318395277069636, "loss": 2.4869, "step": 2062 }, { "epoch": 0.05536174323744096, "grad_norm": 0.7421875, "learning_rate": 0.00033200053669663217, "loss": 2.5583, "step": 2063 }, { "epoch": 0.0553885787891799, "grad_norm": 0.73828125, "learning_rate": 0.00033216154568630076, "loss": 2.5568, "step": 2064 }, { "epoch": 0.05541541434091885, "grad_norm": 0.7109375, "learning_rate": 0.0003323225546759694, "loss": 2.5146, "step": 2065 }, { "epoch": 0.055442249892657794, "grad_norm": 0.7421875, "learning_rate": 0.000332483563665638, "loss": 2.4566, "step": 2066 }, { "epoch": 0.055469085444396736, "grad_norm": 0.75, "learning_rate": 0.0003326445726553066, "loss": 2.4982, "step": 2067 }, { "epoch": 0.05549592099613568, "grad_norm": 0.75390625, "learning_rate": 0.00033280558164497517, "loss": 2.507, "step": 2068 }, { "epoch": 0.05552275654787463, "grad_norm": 0.73828125, "learning_rate": 0.00033296659063464376, "loss": 2.5154, "step": 2069 }, { "epoch": 0.05554959209961357, "grad_norm": 0.7265625, "learning_rate": 0.0003331275996243123, "loss": 2.4822, "step": 2070 }, { "epoch": 0.05557642765135251, "grad_norm": 0.7421875, "learning_rate": 0.0003332886086139809, "loss": 2.7066, "step": 2071 }, { "epoch": 0.05560326320309145, "grad_norm": 0.734375, "learning_rate": 0.0003334496176036495, "loss": 2.5826, "step": 2072 }, { "epoch": 0.0556300987548304, "grad_norm": 0.71875, "learning_rate": 0.0003336106265933181, "loss": 2.532, "step": 2073 }, { "epoch": 0.055656934306569344, "grad_norm": 0.71875, "learning_rate": 0.0003337716355829867, "loss": 2.5401, "step": 2074 }, { "epoch": 0.05568376985830829, "grad_norm": 0.734375, "learning_rate": 0.0003339326445726553, "loss": 2.5734, "step": 2075 }, { "epoch": 0.05571060541004723, "grad_norm": 0.75, "learning_rate": 0.0003340936535623239, "loss": 2.5919, "step": 2076 }, { "epoch": 0.05573744096178618, "grad_norm": 0.734375, "learning_rate": 0.0003342546625519924, "loss": 2.551, "step": 2077 }, { "epoch": 0.05576427651352512, "grad_norm": 0.71875, "learning_rate": 0.000334415671541661, "loss": 2.4848, "step": 2078 }, { "epoch": 0.05579111206526406, "grad_norm": 0.72265625, "learning_rate": 0.00033457668053132964, "loss": 2.5472, "step": 2079 }, { "epoch": 0.055817947617003004, "grad_norm": 0.73046875, "learning_rate": 0.00033473768952099823, "loss": 2.5973, "step": 2080 }, { "epoch": 0.055844783168741946, "grad_norm": 0.7109375, "learning_rate": 0.0003348986985106668, "loss": 2.3461, "step": 2081 }, { "epoch": 0.055871618720480895, "grad_norm": 0.7578125, "learning_rate": 0.0003350597075003354, "loss": 2.6246, "step": 2082 }, { "epoch": 0.05589845427221984, "grad_norm": 0.7109375, "learning_rate": 0.000335220716490004, "loss": 2.5271, "step": 2083 }, { "epoch": 0.05592528982395878, "grad_norm": 0.73828125, "learning_rate": 0.00033538172547967264, "loss": 2.5076, "step": 2084 }, { "epoch": 0.05595212537569772, "grad_norm": 0.7421875, "learning_rate": 0.0003355427344693411, "loss": 2.5203, "step": 2085 }, { "epoch": 0.05597896092743667, "grad_norm": 0.71875, "learning_rate": 0.00033570374345900976, "loss": 2.4272, "step": 2086 }, { "epoch": 0.05600579647917561, "grad_norm": 0.7265625, "learning_rate": 0.00033586475244867835, "loss": 2.4898, "step": 2087 }, { "epoch": 0.056032632030914555, "grad_norm": 0.734375, "learning_rate": 0.00033602576143834694, "loss": 2.4893, "step": 2088 }, { "epoch": 0.0560594675826535, "grad_norm": 0.72265625, "learning_rate": 0.00033618677042801553, "loss": 2.4958, "step": 2089 }, { "epoch": 0.056086303134392446, "grad_norm": 0.73828125, "learning_rate": 0.0003363477794176841, "loss": 2.4343, "step": 2090 }, { "epoch": 0.05611313868613139, "grad_norm": 0.703125, "learning_rate": 0.00033650878840735276, "loss": 2.4501, "step": 2091 }, { "epoch": 0.05613997423787033, "grad_norm": 0.71875, "learning_rate": 0.00033666979739702135, "loss": 2.4114, "step": 2092 }, { "epoch": 0.05616680978960927, "grad_norm": 0.7265625, "learning_rate": 0.0003368308063866899, "loss": 2.6315, "step": 2093 }, { "epoch": 0.05619364534134822, "grad_norm": 0.7265625, "learning_rate": 0.00033699181537635847, "loss": 2.5218, "step": 2094 }, { "epoch": 0.05622048089308716, "grad_norm": 0.70703125, "learning_rate": 0.00033715282436602706, "loss": 2.3901, "step": 2095 }, { "epoch": 0.056247316444826105, "grad_norm": 0.72265625, "learning_rate": 0.00033731383335569565, "loss": 2.4586, "step": 2096 }, { "epoch": 0.05627415199656505, "grad_norm": 0.74609375, "learning_rate": 0.00033747484234536424, "loss": 2.4955, "step": 2097 }, { "epoch": 0.056300987548303996, "grad_norm": 0.7578125, "learning_rate": 0.0003376358513350329, "loss": 2.6629, "step": 2098 }, { "epoch": 0.05632782310004294, "grad_norm": 0.71875, "learning_rate": 0.00033779686032470147, "loss": 2.3871, "step": 2099 }, { "epoch": 0.05635465865178188, "grad_norm": 0.7421875, "learning_rate": 0.00033795786931437006, "loss": 2.5943, "step": 2100 }, { "epoch": 0.05638149420352082, "grad_norm": 0.734375, "learning_rate": 0.0003381188783040386, "loss": 2.4978, "step": 2101 }, { "epoch": 0.05640832975525977, "grad_norm": 0.72265625, "learning_rate": 0.0003382798872937072, "loss": 2.453, "step": 2102 }, { "epoch": 0.056435165306998714, "grad_norm": 0.71484375, "learning_rate": 0.00033844089628337577, "loss": 2.4678, "step": 2103 }, { "epoch": 0.056462000858737656, "grad_norm": 0.73046875, "learning_rate": 0.00033860190527304436, "loss": 2.5635, "step": 2104 }, { "epoch": 0.0564888364104766, "grad_norm": 0.71875, "learning_rate": 0.000338762914262713, "loss": 2.5197, "step": 2105 }, { "epoch": 0.05651567196221554, "grad_norm": 0.74609375, "learning_rate": 0.0003389239232523816, "loss": 2.5287, "step": 2106 }, { "epoch": 0.05654250751395449, "grad_norm": 0.72265625, "learning_rate": 0.0003390849322420502, "loss": 2.5043, "step": 2107 }, { "epoch": 0.05656934306569343, "grad_norm": 0.73046875, "learning_rate": 0.0003392459412317187, "loss": 2.4606, "step": 2108 }, { "epoch": 0.05659617861743237, "grad_norm": 0.74609375, "learning_rate": 0.0003394069502213873, "loss": 2.5437, "step": 2109 }, { "epoch": 0.056623014169171315, "grad_norm": 0.71875, "learning_rate": 0.0003395679592110559, "loss": 2.5776, "step": 2110 }, { "epoch": 0.056649849720910264, "grad_norm": 0.7421875, "learning_rate": 0.0003397289682007245, "loss": 2.5033, "step": 2111 }, { "epoch": 0.056676685272649206, "grad_norm": 0.7265625, "learning_rate": 0.0003398899771903931, "loss": 2.4792, "step": 2112 }, { "epoch": 0.05670352082438815, "grad_norm": 0.73046875, "learning_rate": 0.0003400509861800617, "loss": 2.4742, "step": 2113 }, { "epoch": 0.05673035637612709, "grad_norm": 0.71875, "learning_rate": 0.0003402119951697303, "loss": 2.4397, "step": 2114 }, { "epoch": 0.05675719192786604, "grad_norm": 0.70703125, "learning_rate": 0.0003403730041593989, "loss": 2.477, "step": 2115 }, { "epoch": 0.05678402747960498, "grad_norm": 0.71484375, "learning_rate": 0.0003405340131490674, "loss": 2.4753, "step": 2116 }, { "epoch": 0.056810863031343924, "grad_norm": 0.71875, "learning_rate": 0.000340695022138736, "loss": 2.4808, "step": 2117 }, { "epoch": 0.056837698583082866, "grad_norm": 0.734375, "learning_rate": 0.00034085603112840465, "loss": 2.5183, "step": 2118 }, { "epoch": 0.056864534134821815, "grad_norm": 0.7109375, "learning_rate": 0.00034101704011807324, "loss": 2.4084, "step": 2119 }, { "epoch": 0.05689136968656076, "grad_norm": 0.69140625, "learning_rate": 0.00034117804910774183, "loss": 2.362, "step": 2120 }, { "epoch": 0.0569182052382997, "grad_norm": 0.72265625, "learning_rate": 0.0003413390580974104, "loss": 2.4849, "step": 2121 }, { "epoch": 0.05694504079003864, "grad_norm": 0.73046875, "learning_rate": 0.000341500067087079, "loss": 2.4931, "step": 2122 }, { "epoch": 0.05697187634177759, "grad_norm": 0.71875, "learning_rate": 0.0003416610760767476, "loss": 2.4212, "step": 2123 }, { "epoch": 0.05699871189351653, "grad_norm": 0.734375, "learning_rate": 0.00034182208506641613, "loss": 2.5464, "step": 2124 }, { "epoch": 0.057025547445255474, "grad_norm": 0.73046875, "learning_rate": 0.0003419830940560848, "loss": 2.5083, "step": 2125 }, { "epoch": 0.057052382996994416, "grad_norm": 0.703125, "learning_rate": 0.00034214410304575336, "loss": 2.462, "step": 2126 }, { "epoch": 0.057079218548733365, "grad_norm": 0.73046875, "learning_rate": 0.00034230511203542195, "loss": 2.5619, "step": 2127 }, { "epoch": 0.05710605410047231, "grad_norm": 0.71875, "learning_rate": 0.00034246612102509054, "loss": 2.478, "step": 2128 }, { "epoch": 0.05713288965221125, "grad_norm": 0.7421875, "learning_rate": 0.00034262713001475913, "loss": 2.5019, "step": 2129 }, { "epoch": 0.05715972520395019, "grad_norm": 0.734375, "learning_rate": 0.0003427881390044277, "loss": 2.5433, "step": 2130 }, { "epoch": 0.057186560755689134, "grad_norm": 0.71484375, "learning_rate": 0.00034294914799409625, "loss": 2.4991, "step": 2131 }, { "epoch": 0.05721339630742808, "grad_norm": 0.6953125, "learning_rate": 0.0003431101569837649, "loss": 2.4277, "step": 2132 }, { "epoch": 0.057240231859167025, "grad_norm": 0.70703125, "learning_rate": 0.0003432711659734335, "loss": 2.3881, "step": 2133 }, { "epoch": 0.05726706741090597, "grad_norm": 0.71484375, "learning_rate": 0.00034343217496310207, "loss": 2.527, "step": 2134 }, { "epoch": 0.05729390296264491, "grad_norm": 0.7265625, "learning_rate": 0.00034359318395277066, "loss": 2.5046, "step": 2135 }, { "epoch": 0.05732073851438386, "grad_norm": 0.69921875, "learning_rate": 0.00034375419294243925, "loss": 2.3852, "step": 2136 }, { "epoch": 0.0573475740661228, "grad_norm": 0.70703125, "learning_rate": 0.00034391520193210784, "loss": 2.3827, "step": 2137 }, { "epoch": 0.05737440961786174, "grad_norm": 0.7109375, "learning_rate": 0.0003440762109217765, "loss": 2.4407, "step": 2138 }, { "epoch": 0.057401245169600684, "grad_norm": 0.734375, "learning_rate": 0.000344237219911445, "loss": 2.5755, "step": 2139 }, { "epoch": 0.05742808072133963, "grad_norm": 0.73046875, "learning_rate": 0.0003443982289011136, "loss": 2.4976, "step": 2140 }, { "epoch": 0.057454916273078575, "grad_norm": 0.71875, "learning_rate": 0.0003445592378907822, "loss": 2.4608, "step": 2141 }, { "epoch": 0.05748175182481752, "grad_norm": 0.73046875, "learning_rate": 0.0003447202468804508, "loss": 2.5455, "step": 2142 }, { "epoch": 0.05750858737655646, "grad_norm": 0.71875, "learning_rate": 0.00034488125587011937, "loss": 2.4646, "step": 2143 }, { "epoch": 0.05753542292829541, "grad_norm": 0.73828125, "learning_rate": 0.000345042264859788, "loss": 2.5752, "step": 2144 }, { "epoch": 0.05756225848003435, "grad_norm": 0.72265625, "learning_rate": 0.0003452032738494566, "loss": 2.5314, "step": 2145 }, { "epoch": 0.05758909403177329, "grad_norm": 0.7734375, "learning_rate": 0.0003453642828391252, "loss": 2.6021, "step": 2146 }, { "epoch": 0.057615929583512235, "grad_norm": 0.8203125, "learning_rate": 0.0003455252918287937, "loss": 2.5241, "step": 2147 }, { "epoch": 0.057642765135251184, "grad_norm": 0.7265625, "learning_rate": 0.0003456863008184623, "loss": 2.5622, "step": 2148 }, { "epoch": 0.057669600686990126, "grad_norm": 0.75390625, "learning_rate": 0.0003458473098081309, "loss": 2.561, "step": 2149 }, { "epoch": 0.05769643623872907, "grad_norm": 0.75390625, "learning_rate": 0.0003460083187977995, "loss": 2.5521, "step": 2150 }, { "epoch": 0.05772327179046801, "grad_norm": 0.78515625, "learning_rate": 0.00034616932778746813, "loss": 2.593, "step": 2151 }, { "epoch": 0.05775010734220696, "grad_norm": 0.7578125, "learning_rate": 0.0003463303367771367, "loss": 2.6483, "step": 2152 }, { "epoch": 0.0577769428939459, "grad_norm": 0.74609375, "learning_rate": 0.0003464913457668053, "loss": 2.5039, "step": 2153 }, { "epoch": 0.05780377844568484, "grad_norm": 0.71484375, "learning_rate": 0.0003466523547564739, "loss": 2.4684, "step": 2154 }, { "epoch": 0.057830613997423785, "grad_norm": 0.73046875, "learning_rate": 0.00034681336374614243, "loss": 2.4994, "step": 2155 }, { "epoch": 0.05785744954916273, "grad_norm": 0.72265625, "learning_rate": 0.000346974372735811, "loss": 2.6182, "step": 2156 }, { "epoch": 0.057884285100901677, "grad_norm": 0.69140625, "learning_rate": 0.0003471353817254796, "loss": 2.4819, "step": 2157 }, { "epoch": 0.05791112065264062, "grad_norm": 0.71875, "learning_rate": 0.00034729639071514825, "loss": 2.5328, "step": 2158 }, { "epoch": 0.05793795620437956, "grad_norm": 0.71875, "learning_rate": 0.00034745739970481684, "loss": 2.544, "step": 2159 }, { "epoch": 0.0579647917561185, "grad_norm": 0.6953125, "learning_rate": 0.00034761840869448543, "loss": 2.4591, "step": 2160 }, { "epoch": 0.05799162730785745, "grad_norm": 0.7109375, "learning_rate": 0.000347779417684154, "loss": 2.4534, "step": 2161 }, { "epoch": 0.058018462859596394, "grad_norm": 0.7421875, "learning_rate": 0.00034794042667382255, "loss": 2.5824, "step": 2162 }, { "epoch": 0.058045298411335336, "grad_norm": 0.703125, "learning_rate": 0.00034810143566349114, "loss": 2.4293, "step": 2163 }, { "epoch": 0.05807213396307428, "grad_norm": 0.6953125, "learning_rate": 0.00034826244465315973, "loss": 2.4892, "step": 2164 }, { "epoch": 0.05809896951481323, "grad_norm": 0.7265625, "learning_rate": 0.0003484234536428284, "loss": 2.5834, "step": 2165 }, { "epoch": 0.05812580506655217, "grad_norm": 0.7109375, "learning_rate": 0.00034858446263249696, "loss": 2.4528, "step": 2166 }, { "epoch": 0.05815264061829111, "grad_norm": 0.7265625, "learning_rate": 0.00034874547162216555, "loss": 2.5691, "step": 2167 }, { "epoch": 0.05817947617003005, "grad_norm": 0.7578125, "learning_rate": 0.00034890648061183414, "loss": 2.7157, "step": 2168 }, { "epoch": 0.058206311721769, "grad_norm": 0.71484375, "learning_rate": 0.00034906748960150273, "loss": 2.4859, "step": 2169 }, { "epoch": 0.058233147273507944, "grad_norm": 0.734375, "learning_rate": 0.00034922849859117126, "loss": 2.5481, "step": 2170 }, { "epoch": 0.05825998282524689, "grad_norm": 0.71875, "learning_rate": 0.00034938950758083985, "loss": 2.5044, "step": 2171 }, { "epoch": 0.05828681837698583, "grad_norm": 0.734375, "learning_rate": 0.0003495505165705085, "loss": 2.5332, "step": 2172 }, { "epoch": 0.05831365392872478, "grad_norm": 0.734375, "learning_rate": 0.0003497115255601771, "loss": 2.4053, "step": 2173 }, { "epoch": 0.05834048948046372, "grad_norm": 0.71875, "learning_rate": 0.00034987253454984567, "loss": 2.5103, "step": 2174 }, { "epoch": 0.05836732503220266, "grad_norm": 0.7109375, "learning_rate": 0.00035003354353951426, "loss": 2.4784, "step": 2175 }, { "epoch": 0.058394160583941604, "grad_norm": 0.703125, "learning_rate": 0.00035019455252918285, "loss": 2.489, "step": 2176 }, { "epoch": 0.05842099613568055, "grad_norm": 0.71875, "learning_rate": 0.0003503555615188515, "loss": 2.5062, "step": 2177 }, { "epoch": 0.058447831687419495, "grad_norm": 0.69921875, "learning_rate": 0.00035051657050852, "loss": 2.4373, "step": 2178 }, { "epoch": 0.05847466723915844, "grad_norm": 0.7421875, "learning_rate": 0.0003506775794981886, "loss": 2.5597, "step": 2179 }, { "epoch": 0.05850150279089738, "grad_norm": 0.7109375, "learning_rate": 0.0003508385884878572, "loss": 2.5166, "step": 2180 }, { "epoch": 0.05852833834263632, "grad_norm": 0.7109375, "learning_rate": 0.0003509995974775258, "loss": 2.5786, "step": 2181 }, { "epoch": 0.05855517389437527, "grad_norm": 0.7109375, "learning_rate": 0.0003511606064671944, "loss": 2.5713, "step": 2182 }, { "epoch": 0.05858200944611421, "grad_norm": 0.6875, "learning_rate": 0.00035132161545686297, "loss": 2.3883, "step": 2183 }, { "epoch": 0.058608844997853154, "grad_norm": 0.72265625, "learning_rate": 0.0003514826244465316, "loss": 2.5378, "step": 2184 }, { "epoch": 0.0586356805495921, "grad_norm": 0.6953125, "learning_rate": 0.0003516436334362002, "loss": 2.3454, "step": 2185 }, { "epoch": 0.058662516101331046, "grad_norm": 0.72265625, "learning_rate": 0.00035180464242586874, "loss": 2.4802, "step": 2186 }, { "epoch": 0.05868935165306999, "grad_norm": 0.69921875, "learning_rate": 0.0003519656514155373, "loss": 2.4133, "step": 2187 }, { "epoch": 0.05871618720480893, "grad_norm": 0.703125, "learning_rate": 0.0003521266604052059, "loss": 2.5052, "step": 2188 }, { "epoch": 0.05874302275654787, "grad_norm": 0.7421875, "learning_rate": 0.0003522876693948745, "loss": 2.5544, "step": 2189 }, { "epoch": 0.05876985830828682, "grad_norm": 0.69921875, "learning_rate": 0.0003524486783845431, "loss": 2.493, "step": 2190 }, { "epoch": 0.05879669386002576, "grad_norm": 0.69921875, "learning_rate": 0.00035260968737421173, "loss": 2.5163, "step": 2191 }, { "epoch": 0.058823529411764705, "grad_norm": 0.72265625, "learning_rate": 0.0003527706963638803, "loss": 2.5169, "step": 2192 }, { "epoch": 0.05885036496350365, "grad_norm": 0.6953125, "learning_rate": 0.00035293170535354886, "loss": 2.496, "step": 2193 }, { "epoch": 0.058877200515242596, "grad_norm": 0.7109375, "learning_rate": 0.00035309271434321745, "loss": 2.542, "step": 2194 }, { "epoch": 0.05890403606698154, "grad_norm": 0.734375, "learning_rate": 0.00035325372333288603, "loss": 2.5372, "step": 2195 }, { "epoch": 0.05893087161872048, "grad_norm": 0.69140625, "learning_rate": 0.0003534147323225546, "loss": 2.4644, "step": 2196 }, { "epoch": 0.05895770717045942, "grad_norm": 0.69921875, "learning_rate": 0.0003535757413122232, "loss": 2.4677, "step": 2197 }, { "epoch": 0.05898454272219837, "grad_norm": 0.6953125, "learning_rate": 0.00035373675030189185, "loss": 2.4168, "step": 2198 }, { "epoch": 0.059011378273937314, "grad_norm": 0.69921875, "learning_rate": 0.00035389775929156044, "loss": 2.4949, "step": 2199 }, { "epoch": 0.059038213825676256, "grad_norm": 0.6875, "learning_rate": 0.00035405876828122903, "loss": 2.4394, "step": 2200 }, { "epoch": 0.0590650493774152, "grad_norm": 0.69921875, "learning_rate": 0.00035421977727089757, "loss": 2.3503, "step": 2201 }, { "epoch": 0.05909188492915415, "grad_norm": 0.68359375, "learning_rate": 0.00035438078626056615, "loss": 2.52, "step": 2202 }, { "epoch": 0.05911872048089309, "grad_norm": 0.6875, "learning_rate": 0.00035454179525023474, "loss": 2.5424, "step": 2203 }, { "epoch": 0.05914555603263203, "grad_norm": 0.70703125, "learning_rate": 0.0003547028042399034, "loss": 2.4897, "step": 2204 }, { "epoch": 0.05917239158437097, "grad_norm": 0.66015625, "learning_rate": 0.000354863813229572, "loss": 2.2439, "step": 2205 }, { "epoch": 0.059199227136109915, "grad_norm": 0.69921875, "learning_rate": 0.00035502482221924056, "loss": 2.5279, "step": 2206 }, { "epoch": 0.059226062687848864, "grad_norm": 0.72265625, "learning_rate": 0.00035518583120890915, "loss": 2.5453, "step": 2207 }, { "epoch": 0.059252898239587806, "grad_norm": 0.7109375, "learning_rate": 0.00035534684019857774, "loss": 2.5284, "step": 2208 }, { "epoch": 0.05927973379132675, "grad_norm": 0.71875, "learning_rate": 0.0003555078491882463, "loss": 2.5242, "step": 2209 }, { "epoch": 0.05930656934306569, "grad_norm": 0.69921875, "learning_rate": 0.00035566885817791486, "loss": 2.5051, "step": 2210 }, { "epoch": 0.05933340489480464, "grad_norm": 0.65234375, "learning_rate": 0.0003558298671675835, "loss": 2.417, "step": 2211 }, { "epoch": 0.05936024044654358, "grad_norm": 0.6875, "learning_rate": 0.0003559908761572521, "loss": 2.5143, "step": 2212 }, { "epoch": 0.059387075998282524, "grad_norm": 0.68359375, "learning_rate": 0.0003561518851469207, "loss": 2.4411, "step": 2213 }, { "epoch": 0.059413911550021466, "grad_norm": 0.69140625, "learning_rate": 0.00035631289413658927, "loss": 2.4808, "step": 2214 }, { "epoch": 0.059440747101760415, "grad_norm": 0.69921875, "learning_rate": 0.00035647390312625786, "loss": 2.4137, "step": 2215 }, { "epoch": 0.05946758265349936, "grad_norm": 0.7265625, "learning_rate": 0.00035663491211592645, "loss": 2.4519, "step": 2216 }, { "epoch": 0.0594944182052383, "grad_norm": 0.68359375, "learning_rate": 0.000356795921105595, "loss": 2.4245, "step": 2217 }, { "epoch": 0.05952125375697724, "grad_norm": 0.70703125, "learning_rate": 0.0003569569300952636, "loss": 2.5256, "step": 2218 }, { "epoch": 0.05954808930871619, "grad_norm": 0.70703125, "learning_rate": 0.0003571179390849322, "loss": 2.5418, "step": 2219 }, { "epoch": 0.05957492486045513, "grad_norm": 0.68359375, "learning_rate": 0.0003572789480746008, "loss": 2.4433, "step": 2220 }, { "epoch": 0.059601760412194074, "grad_norm": 0.6875, "learning_rate": 0.0003574399570642694, "loss": 2.4601, "step": 2221 }, { "epoch": 0.059628595963933016, "grad_norm": 0.69921875, "learning_rate": 0.000357600966053938, "loss": 2.4134, "step": 2222 }, { "epoch": 0.059655431515671965, "grad_norm": 0.6953125, "learning_rate": 0.00035776197504360657, "loss": 2.4366, "step": 2223 }, { "epoch": 0.05968226706741091, "grad_norm": 0.71484375, "learning_rate": 0.0003579229840332751, "loss": 2.5022, "step": 2224 }, { "epoch": 0.05970910261914985, "grad_norm": 0.7109375, "learning_rate": 0.00035808399302294375, "loss": 2.5491, "step": 2225 }, { "epoch": 0.05973593817088879, "grad_norm": 0.7109375, "learning_rate": 0.00035824500201261234, "loss": 2.5617, "step": 2226 }, { "epoch": 0.05976277372262774, "grad_norm": 0.703125, "learning_rate": 0.0003584060110022809, "loss": 2.5411, "step": 2227 }, { "epoch": 0.05978960927436668, "grad_norm": 0.7265625, "learning_rate": 0.0003585670199919495, "loss": 2.5504, "step": 2228 }, { "epoch": 0.059816444826105625, "grad_norm": 0.70703125, "learning_rate": 0.0003587280289816181, "loss": 2.4289, "step": 2229 }, { "epoch": 0.05984328037784457, "grad_norm": 0.73046875, "learning_rate": 0.00035888903797128674, "loss": 2.5677, "step": 2230 }, { "epoch": 0.05987011592958351, "grad_norm": 0.703125, "learning_rate": 0.00035905004696095533, "loss": 2.4658, "step": 2231 }, { "epoch": 0.05989695148132246, "grad_norm": 0.72265625, "learning_rate": 0.00035921105595062387, "loss": 2.4837, "step": 2232 }, { "epoch": 0.0599237870330614, "grad_norm": 0.68359375, "learning_rate": 0.00035937206494029246, "loss": 2.4292, "step": 2233 }, { "epoch": 0.05995062258480034, "grad_norm": 0.70703125, "learning_rate": 0.00035953307392996105, "loss": 2.5609, "step": 2234 }, { "epoch": 0.059977458136539284, "grad_norm": 0.72265625, "learning_rate": 0.00035969408291962963, "loss": 2.4937, "step": 2235 }, { "epoch": 0.06000429368827823, "grad_norm": 0.69921875, "learning_rate": 0.0003598550919092982, "loss": 2.4324, "step": 2236 }, { "epoch": 0.060031129240017175, "grad_norm": 0.73046875, "learning_rate": 0.00036001610089896687, "loss": 2.5745, "step": 2237 }, { "epoch": 0.06005796479175612, "grad_norm": 0.69140625, "learning_rate": 0.00036017710988863545, "loss": 2.4609, "step": 2238 }, { "epoch": 0.06008480034349506, "grad_norm": 0.68359375, "learning_rate": 0.00036033811887830404, "loss": 2.41, "step": 2239 }, { "epoch": 0.06011163589523401, "grad_norm": 0.69140625, "learning_rate": 0.0003604991278679726, "loss": 2.465, "step": 2240 }, { "epoch": 0.06013847144697295, "grad_norm": 0.71484375, "learning_rate": 0.00036066013685764117, "loss": 2.51, "step": 2241 }, { "epoch": 0.06016530699871189, "grad_norm": 0.6953125, "learning_rate": 0.00036082114584730975, "loss": 2.4584, "step": 2242 }, { "epoch": 0.060192142550450835, "grad_norm": 0.69921875, "learning_rate": 0.00036098215483697834, "loss": 2.4845, "step": 2243 }, { "epoch": 0.060218978102189784, "grad_norm": 0.6796875, "learning_rate": 0.000361143163826647, "loss": 2.4566, "step": 2244 }, { "epoch": 0.060245813653928726, "grad_norm": 0.72265625, "learning_rate": 0.0003613041728163156, "loss": 2.5572, "step": 2245 }, { "epoch": 0.06027264920566767, "grad_norm": 0.703125, "learning_rate": 0.00036146518180598416, "loss": 2.4691, "step": 2246 }, { "epoch": 0.06029948475740661, "grad_norm": 0.6875, "learning_rate": 0.0003616261907956527, "loss": 2.5115, "step": 2247 }, { "epoch": 0.06032632030914556, "grad_norm": 0.69140625, "learning_rate": 0.0003617871997853213, "loss": 2.4702, "step": 2248 }, { "epoch": 0.0603531558608845, "grad_norm": 0.703125, "learning_rate": 0.0003619482087749899, "loss": 2.4947, "step": 2249 }, { "epoch": 0.06037999141262344, "grad_norm": 0.6953125, "learning_rate": 0.00036210921776465846, "loss": 2.4151, "step": 2250 }, { "epoch": 0.060406826964362385, "grad_norm": 0.70703125, "learning_rate": 0.0003622702267543271, "loss": 2.4268, "step": 2251 }, { "epoch": 0.060433662516101334, "grad_norm": 0.6875, "learning_rate": 0.0003624312357439957, "loss": 2.3991, "step": 2252 }, { "epoch": 0.060460498067840276, "grad_norm": 0.703125, "learning_rate": 0.0003625922447336643, "loss": 2.4544, "step": 2253 }, { "epoch": 0.06048733361957922, "grad_norm": 0.69921875, "learning_rate": 0.00036275325372333287, "loss": 2.4298, "step": 2254 }, { "epoch": 0.06051416917131816, "grad_norm": 0.7265625, "learning_rate": 0.0003629142627130014, "loss": 2.506, "step": 2255 }, { "epoch": 0.0605410047230571, "grad_norm": 0.7109375, "learning_rate": 0.00036307527170267, "loss": 2.5185, "step": 2256 }, { "epoch": 0.06056784027479605, "grad_norm": 0.703125, "learning_rate": 0.0003632362806923386, "loss": 2.5135, "step": 2257 }, { "epoch": 0.060594675826534994, "grad_norm": 0.69921875, "learning_rate": 0.0003633972896820072, "loss": 2.4403, "step": 2258 }, { "epoch": 0.060621511378273936, "grad_norm": 0.70703125, "learning_rate": 0.0003635582986716758, "loss": 2.4481, "step": 2259 }, { "epoch": 0.06064834693001288, "grad_norm": 0.73046875, "learning_rate": 0.0003637193076613444, "loss": 2.4576, "step": 2260 }, { "epoch": 0.06067518248175183, "grad_norm": 0.69921875, "learning_rate": 0.000363880316651013, "loss": 2.3936, "step": 2261 }, { "epoch": 0.06070201803349077, "grad_norm": 0.68359375, "learning_rate": 0.0003640413256406816, "loss": 2.4367, "step": 2262 }, { "epoch": 0.06072885358522971, "grad_norm": 0.69921875, "learning_rate": 0.0003642023346303501, "loss": 2.4112, "step": 2263 }, { "epoch": 0.06075568913696865, "grad_norm": 0.7109375, "learning_rate": 0.00036436334362001876, "loss": 2.5042, "step": 2264 }, { "epoch": 0.0607825246887076, "grad_norm": 0.68359375, "learning_rate": 0.00036452435260968735, "loss": 2.4163, "step": 2265 }, { "epoch": 0.060809360240446544, "grad_norm": 0.67578125, "learning_rate": 0.00036468536159935594, "loss": 2.3935, "step": 2266 }, { "epoch": 0.060836195792185487, "grad_norm": 0.68359375, "learning_rate": 0.0003648463705890245, "loss": 2.383, "step": 2267 }, { "epoch": 0.06086303134392443, "grad_norm": 0.671875, "learning_rate": 0.0003650073795786931, "loss": 2.436, "step": 2268 }, { "epoch": 0.06088986689566338, "grad_norm": 0.69140625, "learning_rate": 0.0003651683885683617, "loss": 2.4275, "step": 2269 }, { "epoch": 0.06091670244740232, "grad_norm": 0.69921875, "learning_rate": 0.00036532939755803034, "loss": 2.5329, "step": 2270 }, { "epoch": 0.06094353799914126, "grad_norm": 0.65234375, "learning_rate": 0.0003654904065476989, "loss": 2.3368, "step": 2271 }, { "epoch": 0.060970373550880204, "grad_norm": 0.68359375, "learning_rate": 0.00036565141553736747, "loss": 2.4454, "step": 2272 }, { "epoch": 0.06099720910261915, "grad_norm": 0.6796875, "learning_rate": 0.00036581242452703606, "loss": 2.3942, "step": 2273 }, { "epoch": 0.061024044654358095, "grad_norm": 0.6953125, "learning_rate": 0.00036597343351670465, "loss": 2.4501, "step": 2274 }, { "epoch": 0.06105088020609704, "grad_norm": 0.69140625, "learning_rate": 0.00036613444250637323, "loss": 2.4735, "step": 2275 }, { "epoch": 0.06107771575783598, "grad_norm": 0.6875, "learning_rate": 0.0003662954514960418, "loss": 2.4287, "step": 2276 }, { "epoch": 0.06110455130957493, "grad_norm": 0.7109375, "learning_rate": 0.00036645646048571047, "loss": 2.5121, "step": 2277 }, { "epoch": 0.06113138686131387, "grad_norm": 0.71484375, "learning_rate": 0.000366617469475379, "loss": 2.4972, "step": 2278 }, { "epoch": 0.06115822241305281, "grad_norm": 0.68359375, "learning_rate": 0.0003667784784650476, "loss": 2.479, "step": 2279 }, { "epoch": 0.061185057964791754, "grad_norm": 0.6953125, "learning_rate": 0.0003669394874547162, "loss": 2.5297, "step": 2280 }, { "epoch": 0.0612118935165307, "grad_norm": 0.69921875, "learning_rate": 0.00036710049644438477, "loss": 2.4768, "step": 2281 }, { "epoch": 0.061238729068269646, "grad_norm": 0.68359375, "learning_rate": 0.00036726150543405335, "loss": 2.4907, "step": 2282 }, { "epoch": 0.06126556462000859, "grad_norm": 0.6953125, "learning_rate": 0.00036742251442372194, "loss": 2.4744, "step": 2283 }, { "epoch": 0.06129240017174753, "grad_norm": 0.69140625, "learning_rate": 0.0003675835234133906, "loss": 2.4261, "step": 2284 }, { "epoch": 0.06131923572348647, "grad_norm": 0.66796875, "learning_rate": 0.0003677445324030592, "loss": 2.3911, "step": 2285 }, { "epoch": 0.06134607127522542, "grad_norm": 0.6796875, "learning_rate": 0.0003679055413927277, "loss": 2.366, "step": 2286 }, { "epoch": 0.06137290682696436, "grad_norm": 0.67578125, "learning_rate": 0.0003680665503823963, "loss": 2.4631, "step": 2287 }, { "epoch": 0.061399742378703305, "grad_norm": 0.6640625, "learning_rate": 0.0003682275593720649, "loss": 2.437, "step": 2288 }, { "epoch": 0.06142657793044225, "grad_norm": 0.703125, "learning_rate": 0.0003683885683617335, "loss": 2.4281, "step": 2289 }, { "epoch": 0.061453413482181196, "grad_norm": 0.6875, "learning_rate": 0.0003685495773514021, "loss": 2.4456, "step": 2290 }, { "epoch": 0.06148024903392014, "grad_norm": 0.6875, "learning_rate": 0.0003687105863410707, "loss": 2.43, "step": 2291 }, { "epoch": 0.06150708458565908, "grad_norm": 0.68359375, "learning_rate": 0.0003688715953307393, "loss": 2.4616, "step": 2292 }, { "epoch": 0.06153392013739802, "grad_norm": 0.6875, "learning_rate": 0.0003690326043204079, "loss": 2.3822, "step": 2293 }, { "epoch": 0.06156075568913697, "grad_norm": 0.6953125, "learning_rate": 0.0003691936133100764, "loss": 2.3978, "step": 2294 }, { "epoch": 0.061587591240875914, "grad_norm": 0.69140625, "learning_rate": 0.000369354622299745, "loss": 2.4605, "step": 2295 }, { "epoch": 0.061614426792614856, "grad_norm": 0.703125, "learning_rate": 0.0003695156312894136, "loss": 2.4304, "step": 2296 }, { "epoch": 0.0616412623443538, "grad_norm": 0.671875, "learning_rate": 0.00036967664027908224, "loss": 2.3549, "step": 2297 }, { "epoch": 0.06166809789609275, "grad_norm": 0.69140625, "learning_rate": 0.0003698376492687508, "loss": 2.4012, "step": 2298 }, { "epoch": 0.06169493344783169, "grad_norm": 0.671875, "learning_rate": 0.0003699986582584194, "loss": 2.4248, "step": 2299 }, { "epoch": 0.06172176899957063, "grad_norm": 0.69140625, "learning_rate": 0.000370159667248088, "loss": 2.4378, "step": 2300 }, { "epoch": 0.06174860455130957, "grad_norm": 0.69140625, "learning_rate": 0.0003703206762377566, "loss": 2.4153, "step": 2301 }, { "epoch": 0.06177544010304852, "grad_norm": 0.69140625, "learning_rate": 0.00037048168522742513, "loss": 2.4649, "step": 2302 }, { "epoch": 0.061802275654787464, "grad_norm": 0.6953125, "learning_rate": 0.0003706426942170937, "loss": 2.3298, "step": 2303 }, { "epoch": 0.061829111206526406, "grad_norm": 0.71484375, "learning_rate": 0.00037080370320676236, "loss": 2.4573, "step": 2304 }, { "epoch": 0.06185594675826535, "grad_norm": 0.6953125, "learning_rate": 0.00037096471219643095, "loss": 2.4559, "step": 2305 }, { "epoch": 0.06188278231000429, "grad_norm": 0.67578125, "learning_rate": 0.00037112572118609954, "loss": 2.352, "step": 2306 }, { "epoch": 0.06190961786174324, "grad_norm": 0.69140625, "learning_rate": 0.0003712867301757681, "loss": 2.418, "step": 2307 }, { "epoch": 0.06193645341348218, "grad_norm": 0.71875, "learning_rate": 0.0003714477391654367, "loss": 2.4802, "step": 2308 }, { "epoch": 0.061963288965221124, "grad_norm": 0.6875, "learning_rate": 0.00037160874815510525, "loss": 2.3826, "step": 2309 }, { "epoch": 0.061990124516960066, "grad_norm": 0.6875, "learning_rate": 0.00037176975714477384, "loss": 2.4628, "step": 2310 }, { "epoch": 0.062016960068699015, "grad_norm": 0.70703125, "learning_rate": 0.0003719307661344425, "loss": 2.4378, "step": 2311 }, { "epoch": 0.06204379562043796, "grad_norm": 0.6875, "learning_rate": 0.00037209177512411107, "loss": 2.5151, "step": 2312 }, { "epoch": 0.0620706311721769, "grad_norm": 0.69140625, "learning_rate": 0.00037225278411377966, "loss": 2.3771, "step": 2313 }, { "epoch": 0.06209746672391584, "grad_norm": 0.6796875, "learning_rate": 0.00037241379310344825, "loss": 2.4378, "step": 2314 }, { "epoch": 0.06212430227565479, "grad_norm": 0.69140625, "learning_rate": 0.00037257480209311683, "loss": 2.3944, "step": 2315 }, { "epoch": 0.06215113782739373, "grad_norm": 0.6953125, "learning_rate": 0.0003727358110827855, "loss": 2.332, "step": 2316 }, { "epoch": 0.062177973379132674, "grad_norm": 0.7578125, "learning_rate": 0.00037289682007245396, "loss": 2.4868, "step": 2317 }, { "epoch": 0.062204808930871616, "grad_norm": 0.80859375, "learning_rate": 0.0003730578290621226, "loss": 2.6492, "step": 2318 }, { "epoch": 0.062231644482610565, "grad_norm": 0.70703125, "learning_rate": 0.0003732188380517912, "loss": 2.558, "step": 2319 }, { "epoch": 0.06225848003434951, "grad_norm": 0.70703125, "learning_rate": 0.0003733798470414598, "loss": 2.5464, "step": 2320 }, { "epoch": 0.06228531558608845, "grad_norm": 0.69921875, "learning_rate": 0.00037354085603112837, "loss": 2.4097, "step": 2321 }, { "epoch": 0.06231215113782739, "grad_norm": 0.74609375, "learning_rate": 0.00037370186502079695, "loss": 2.4989, "step": 2322 }, { "epoch": 0.06233898668956634, "grad_norm": 0.703125, "learning_rate": 0.0003738628740104656, "loss": 2.3598, "step": 2323 }, { "epoch": 0.06236582224130528, "grad_norm": 0.6953125, "learning_rate": 0.0003740238830001342, "loss": 2.4826, "step": 2324 }, { "epoch": 0.062392657793044225, "grad_norm": 0.71484375, "learning_rate": 0.0003741848919898027, "loss": 2.4873, "step": 2325 }, { "epoch": 0.06241949334478317, "grad_norm": 0.7109375, "learning_rate": 0.0003743459009794713, "loss": 2.5229, "step": 2326 }, { "epoch": 0.062446328896522116, "grad_norm": 0.70703125, "learning_rate": 0.0003745069099691399, "loss": 2.5013, "step": 2327 }, { "epoch": 0.06247316444826106, "grad_norm": 0.7109375, "learning_rate": 0.0003746679189588085, "loss": 2.4852, "step": 2328 }, { "epoch": 0.0625, "grad_norm": 0.70703125, "learning_rate": 0.0003748289279484771, "loss": 2.5251, "step": 2329 }, { "epoch": 0.06252683555173895, "grad_norm": 0.7109375, "learning_rate": 0.0003749899369381457, "loss": 2.4608, "step": 2330 }, { "epoch": 0.06255367110347788, "grad_norm": 0.67578125, "learning_rate": 0.0003751509459278143, "loss": 2.4456, "step": 2331 }, { "epoch": 0.06258050665521683, "grad_norm": 0.70703125, "learning_rate": 0.00037531195491748284, "loss": 2.5245, "step": 2332 }, { "epoch": 0.06260734220695577, "grad_norm": 0.66015625, "learning_rate": 0.00037547296390715143, "loss": 2.4914, "step": 2333 }, { "epoch": 0.06263417775869472, "grad_norm": 0.6875, "learning_rate": 0.00037563397289682, "loss": 2.4453, "step": 2334 }, { "epoch": 0.06266101331043367, "grad_norm": 0.6875, "learning_rate": 0.0003757949818864886, "loss": 2.5083, "step": 2335 }, { "epoch": 0.0626878488621726, "grad_norm": 0.68359375, "learning_rate": 0.0003759559908761572, "loss": 2.4205, "step": 2336 }, { "epoch": 0.06271468441391155, "grad_norm": 0.69140625, "learning_rate": 0.00037611699986582584, "loss": 2.5237, "step": 2337 }, { "epoch": 0.0627415199656505, "grad_norm": 0.7109375, "learning_rate": 0.0003762780088554944, "loss": 2.6277, "step": 2338 }, { "epoch": 0.06276835551738943, "grad_norm": 0.67578125, "learning_rate": 0.000376439017845163, "loss": 2.438, "step": 2339 }, { "epoch": 0.06279519106912838, "grad_norm": 0.67578125, "learning_rate": 0.00037660002683483155, "loss": 2.4278, "step": 2340 }, { "epoch": 0.06282202662086732, "grad_norm": 0.69921875, "learning_rate": 0.00037676103582450014, "loss": 2.5966, "step": 2341 }, { "epoch": 0.06284886217260627, "grad_norm": 0.671875, "learning_rate": 0.00037692204481416873, "loss": 2.2723, "step": 2342 }, { "epoch": 0.06287569772434522, "grad_norm": 0.6796875, "learning_rate": 0.0003770830538038373, "loss": 2.532, "step": 2343 }, { "epoch": 0.06290253327608415, "grad_norm": 0.6640625, "learning_rate": 0.00037724406279350596, "loss": 2.4273, "step": 2344 }, { "epoch": 0.0629293688278231, "grad_norm": 0.6796875, "learning_rate": 0.00037740507178317455, "loss": 2.3784, "step": 2345 }, { "epoch": 0.06295620437956205, "grad_norm": 0.69140625, "learning_rate": 0.00037756608077284314, "loss": 2.3893, "step": 2346 }, { "epoch": 0.06298303993130099, "grad_norm": 0.69140625, "learning_rate": 0.0003777270897625117, "loss": 2.4158, "step": 2347 }, { "epoch": 0.06300987548303993, "grad_norm": 0.671875, "learning_rate": 0.00037788809875218026, "loss": 2.4401, "step": 2348 }, { "epoch": 0.06303671103477887, "grad_norm": 0.6796875, "learning_rate": 0.00037804910774184885, "loss": 2.3126, "step": 2349 }, { "epoch": 0.06306354658651782, "grad_norm": 0.6796875, "learning_rate": 0.0003782101167315175, "loss": 2.4297, "step": 2350 }, { "epoch": 0.06309038213825677, "grad_norm": 0.67578125, "learning_rate": 0.0003783711257211861, "loss": 2.3619, "step": 2351 }, { "epoch": 0.0631172176899957, "grad_norm": 0.6953125, "learning_rate": 0.00037853213471085467, "loss": 2.518, "step": 2352 }, { "epoch": 0.06314405324173465, "grad_norm": 0.69921875, "learning_rate": 0.00037869314370052326, "loss": 2.4612, "step": 2353 }, { "epoch": 0.0631708887934736, "grad_norm": 0.6796875, "learning_rate": 0.00037885415269019185, "loss": 2.518, "step": 2354 }, { "epoch": 0.06319772434521254, "grad_norm": 0.6796875, "learning_rate": 0.00037901516167986043, "loss": 2.5184, "step": 2355 }, { "epoch": 0.06322455989695148, "grad_norm": 0.69921875, "learning_rate": 0.00037917617066952897, "loss": 2.4371, "step": 2356 }, { "epoch": 0.06325139544869042, "grad_norm": 0.6796875, "learning_rate": 0.0003793371796591976, "loss": 2.4556, "step": 2357 }, { "epoch": 0.06327823100042937, "grad_norm": 0.640625, "learning_rate": 0.0003794981886488662, "loss": 2.3284, "step": 2358 }, { "epoch": 0.06330506655216832, "grad_norm": 0.68359375, "learning_rate": 0.0003796591976385348, "loss": 2.474, "step": 2359 }, { "epoch": 0.06333190210390725, "grad_norm": 0.67578125, "learning_rate": 0.0003798202066282034, "loss": 2.4348, "step": 2360 }, { "epoch": 0.0633587376556462, "grad_norm": 0.6953125, "learning_rate": 0.00037998121561787197, "loss": 2.4684, "step": 2361 }, { "epoch": 0.06338557320738514, "grad_norm": 0.6953125, "learning_rate": 0.00038014222460754055, "loss": 2.4877, "step": 2362 }, { "epoch": 0.06341240875912409, "grad_norm": 0.69140625, "learning_rate": 0.0003803032335972091, "loss": 2.4618, "step": 2363 }, { "epoch": 0.06343924431086304, "grad_norm": 0.703125, "learning_rate": 0.00038046424258687773, "loss": 2.5235, "step": 2364 }, { "epoch": 0.06346607986260197, "grad_norm": 0.71875, "learning_rate": 0.0003806252515765463, "loss": 2.5989, "step": 2365 }, { "epoch": 0.06349291541434092, "grad_norm": 0.69140625, "learning_rate": 0.0003807862605662149, "loss": 2.4537, "step": 2366 }, { "epoch": 0.06351975096607987, "grad_norm": 0.6640625, "learning_rate": 0.0003809472695558835, "loss": 2.4057, "step": 2367 }, { "epoch": 0.0635465865178188, "grad_norm": 0.6796875, "learning_rate": 0.0003811082785455521, "loss": 2.441, "step": 2368 }, { "epoch": 0.06357342206955775, "grad_norm": 0.6640625, "learning_rate": 0.00038126928753522073, "loss": 2.4976, "step": 2369 }, { "epoch": 0.06360025762129669, "grad_norm": 0.68359375, "learning_rate": 0.0003814302965248893, "loss": 2.4633, "step": 2370 }, { "epoch": 0.06362709317303564, "grad_norm": 0.66796875, "learning_rate": 0.00038159130551455785, "loss": 2.3934, "step": 2371 }, { "epoch": 0.06365392872477459, "grad_norm": 0.703125, "learning_rate": 0.00038175231450422644, "loss": 2.4632, "step": 2372 }, { "epoch": 0.06368076427651352, "grad_norm": 0.67578125, "learning_rate": 0.00038191332349389503, "loss": 2.434, "step": 2373 }, { "epoch": 0.06370759982825247, "grad_norm": 0.68359375, "learning_rate": 0.0003820743324835636, "loss": 2.3716, "step": 2374 }, { "epoch": 0.06373443537999142, "grad_norm": 0.65234375, "learning_rate": 0.0003822353414732322, "loss": 2.3342, "step": 2375 }, { "epoch": 0.06376127093173035, "grad_norm": 0.6796875, "learning_rate": 0.00038239635046290085, "loss": 2.4409, "step": 2376 }, { "epoch": 0.0637881064834693, "grad_norm": 0.6875, "learning_rate": 0.00038255735945256944, "loss": 2.3629, "step": 2377 }, { "epoch": 0.06381494203520824, "grad_norm": 0.70703125, "learning_rate": 0.000382718368442238, "loss": 2.4872, "step": 2378 }, { "epoch": 0.06384177758694719, "grad_norm": 0.6796875, "learning_rate": 0.00038287937743190656, "loss": 2.3514, "step": 2379 }, { "epoch": 0.06386861313868614, "grad_norm": 0.6875, "learning_rate": 0.00038304038642157515, "loss": 2.4524, "step": 2380 }, { "epoch": 0.06389544869042507, "grad_norm": 0.671875, "learning_rate": 0.00038320139541124374, "loss": 2.3761, "step": 2381 }, { "epoch": 0.06392228424216402, "grad_norm": 0.66796875, "learning_rate": 0.00038336240440091233, "loss": 2.365, "step": 2382 }, { "epoch": 0.06394911979390296, "grad_norm": 0.66015625, "learning_rate": 0.00038352341339058097, "loss": 2.3866, "step": 2383 }, { "epoch": 0.0639759553456419, "grad_norm": 0.66015625, "learning_rate": 0.00038368442238024956, "loss": 2.3538, "step": 2384 }, { "epoch": 0.06400279089738085, "grad_norm": 0.66015625, "learning_rate": 0.00038384543136991815, "loss": 2.4537, "step": 2385 }, { "epoch": 0.06402962644911979, "grad_norm": 0.65234375, "learning_rate": 0.00038400644035958674, "loss": 2.3533, "step": 2386 }, { "epoch": 0.06405646200085874, "grad_norm": 0.6640625, "learning_rate": 0.00038416744934925527, "loss": 2.3977, "step": 2387 }, { "epoch": 0.06408329755259769, "grad_norm": 0.65234375, "learning_rate": 0.00038432845833892386, "loss": 2.3356, "step": 2388 }, { "epoch": 0.06411013310433662, "grad_norm": 0.64453125, "learning_rate": 0.00038448946732859245, "loss": 2.4372, "step": 2389 }, { "epoch": 0.06413696865607557, "grad_norm": 0.6796875, "learning_rate": 0.0003846504763182611, "loss": 2.3341, "step": 2390 }, { "epoch": 0.0641638042078145, "grad_norm": 0.66796875, "learning_rate": 0.0003848114853079297, "loss": 2.396, "step": 2391 }, { "epoch": 0.06419063975955346, "grad_norm": 0.67578125, "learning_rate": 0.00038497249429759827, "loss": 2.4084, "step": 2392 }, { "epoch": 0.0642174753112924, "grad_norm": 0.66015625, "learning_rate": 0.00038513350328726686, "loss": 2.3454, "step": 2393 }, { "epoch": 0.06424431086303134, "grad_norm": 0.67578125, "learning_rate": 0.0003852945122769354, "loss": 2.4533, "step": 2394 }, { "epoch": 0.06427114641477029, "grad_norm": 0.6875, "learning_rate": 0.000385455521266604, "loss": 2.4317, "step": 2395 }, { "epoch": 0.06429798196650924, "grad_norm": 0.66015625, "learning_rate": 0.00038561653025627257, "loss": 2.2836, "step": 2396 }, { "epoch": 0.06432481751824817, "grad_norm": 0.67578125, "learning_rate": 0.0003857775392459412, "loss": 2.4176, "step": 2397 }, { "epoch": 0.06435165306998712, "grad_norm": 0.671875, "learning_rate": 0.0003859385482356098, "loss": 2.3997, "step": 2398 }, { "epoch": 0.06437848862172606, "grad_norm": 0.671875, "learning_rate": 0.0003860995572252784, "loss": 2.4764, "step": 2399 }, { "epoch": 0.064405324173465, "grad_norm": 0.6640625, "learning_rate": 0.000386260566214947, "loss": 2.422, "step": 2400 }, { "epoch": 0.06443215972520396, "grad_norm": 0.6796875, "learning_rate": 0.00038642157520461557, "loss": 2.3464, "step": 2401 }, { "epoch": 0.06445899527694289, "grad_norm": 0.64453125, "learning_rate": 0.0003865825841942841, "loss": 2.2981, "step": 2402 }, { "epoch": 0.06448583082868184, "grad_norm": 0.6875, "learning_rate": 0.0003867435931839527, "loss": 2.5123, "step": 2403 }, { "epoch": 0.06451266638042079, "grad_norm": 0.66796875, "learning_rate": 0.00038690460217362133, "loss": 2.354, "step": 2404 }, { "epoch": 0.06453950193215972, "grad_norm": 0.69140625, "learning_rate": 0.0003870656111632899, "loss": 2.557, "step": 2405 }, { "epoch": 0.06456633748389867, "grad_norm": 0.67578125, "learning_rate": 0.0003872266201529585, "loss": 2.4093, "step": 2406 }, { "epoch": 0.06459317303563761, "grad_norm": 0.6953125, "learning_rate": 0.0003873876291426271, "loss": 2.441, "step": 2407 }, { "epoch": 0.06462000858737656, "grad_norm": 0.6953125, "learning_rate": 0.0003875486381322957, "loss": 2.3209, "step": 2408 }, { "epoch": 0.0646468441391155, "grad_norm": 0.66796875, "learning_rate": 0.00038770964712196433, "loss": 2.338, "step": 2409 }, { "epoch": 0.06467367969085444, "grad_norm": 0.67578125, "learning_rate": 0.00038787065611163286, "loss": 2.4403, "step": 2410 }, { "epoch": 0.06470051524259339, "grad_norm": 0.67578125, "learning_rate": 0.00038803166510130145, "loss": 2.5068, "step": 2411 }, { "epoch": 0.06472735079433233, "grad_norm": 0.6875, "learning_rate": 0.00038819267409097004, "loss": 2.4814, "step": 2412 }, { "epoch": 0.06475418634607127, "grad_norm": 0.69140625, "learning_rate": 0.00038835368308063863, "loss": 2.4883, "step": 2413 }, { "epoch": 0.06478102189781022, "grad_norm": 0.70703125, "learning_rate": 0.0003885146920703072, "loss": 2.4098, "step": 2414 }, { "epoch": 0.06480785744954916, "grad_norm": 0.66796875, "learning_rate": 0.0003886757010599758, "loss": 2.3622, "step": 2415 }, { "epoch": 0.06483469300128811, "grad_norm": 0.66015625, "learning_rate": 0.00038883671004964445, "loss": 2.3029, "step": 2416 }, { "epoch": 0.06486152855302706, "grad_norm": 0.671875, "learning_rate": 0.00038899771903931304, "loss": 2.3553, "step": 2417 }, { "epoch": 0.06488836410476599, "grad_norm": 0.67578125, "learning_rate": 0.00038915872802898157, "loss": 2.4481, "step": 2418 }, { "epoch": 0.06491519965650494, "grad_norm": 0.64453125, "learning_rate": 0.00038931973701865016, "loss": 2.2706, "step": 2419 }, { "epoch": 0.06494203520824388, "grad_norm": 0.67578125, "learning_rate": 0.00038948074600831875, "loss": 2.2443, "step": 2420 }, { "epoch": 0.06496887075998282, "grad_norm": 0.66796875, "learning_rate": 0.00038964175499798734, "loss": 2.4488, "step": 2421 }, { "epoch": 0.06499570631172177, "grad_norm": 0.671875, "learning_rate": 0.00038980276398765593, "loss": 2.5155, "step": 2422 }, { "epoch": 0.06502254186346071, "grad_norm": 0.6796875, "learning_rate": 0.00038996377297732457, "loss": 2.4428, "step": 2423 }, { "epoch": 0.06504937741519966, "grad_norm": 0.6875, "learning_rate": 0.00039012478196699316, "loss": 2.4024, "step": 2424 }, { "epoch": 0.0650762129669386, "grad_norm": 0.6796875, "learning_rate": 0.0003902857909566617, "loss": 2.5004, "step": 2425 }, { "epoch": 0.06510304851867754, "grad_norm": 0.66796875, "learning_rate": 0.0003904467999463303, "loss": 2.3832, "step": 2426 }, { "epoch": 0.06512988407041649, "grad_norm": 0.640625, "learning_rate": 0.00039060780893599887, "loss": 2.3295, "step": 2427 }, { "epoch": 0.06515671962215543, "grad_norm": 0.70703125, "learning_rate": 0.00039076881792566746, "loss": 2.3555, "step": 2428 }, { "epoch": 0.06518355517389438, "grad_norm": 0.6640625, "learning_rate": 0.0003909298269153361, "loss": 2.2812, "step": 2429 }, { "epoch": 0.06521039072563332, "grad_norm": 0.6640625, "learning_rate": 0.0003910908359050047, "loss": 2.3802, "step": 2430 }, { "epoch": 0.06523722627737226, "grad_norm": 0.66796875, "learning_rate": 0.0003912518448946733, "loss": 2.5326, "step": 2431 }, { "epoch": 0.06526406182911121, "grad_norm": 0.6796875, "learning_rate": 0.00039141285388434187, "loss": 2.3109, "step": 2432 }, { "epoch": 0.06529089738085014, "grad_norm": 0.68359375, "learning_rate": 0.0003915738628740104, "loss": 2.3792, "step": 2433 }, { "epoch": 0.06531773293258909, "grad_norm": 0.6875, "learning_rate": 0.000391734871863679, "loss": 2.4652, "step": 2434 }, { "epoch": 0.06534456848432804, "grad_norm": 0.65234375, "learning_rate": 0.0003918958808533476, "loss": 2.2718, "step": 2435 }, { "epoch": 0.06537140403606698, "grad_norm": 0.6875, "learning_rate": 0.0003920568898430162, "loss": 2.4468, "step": 2436 }, { "epoch": 0.06539823958780593, "grad_norm": 0.67578125, "learning_rate": 0.0003922178988326848, "loss": 2.3268, "step": 2437 }, { "epoch": 0.06542507513954487, "grad_norm": 0.65625, "learning_rate": 0.0003923789078223534, "loss": 2.3442, "step": 2438 }, { "epoch": 0.06545191069128381, "grad_norm": 0.67578125, "learning_rate": 0.000392539916812022, "loss": 2.3781, "step": 2439 }, { "epoch": 0.06547874624302276, "grad_norm": 0.65625, "learning_rate": 0.0003927009258016906, "loss": 2.4055, "step": 2440 }, { "epoch": 0.0655055817947617, "grad_norm": 0.640625, "learning_rate": 0.0003928619347913591, "loss": 2.2772, "step": 2441 }, { "epoch": 0.06553241734650064, "grad_norm": 0.69140625, "learning_rate": 0.0003930229437810277, "loss": 2.3931, "step": 2442 }, { "epoch": 0.06555925289823959, "grad_norm": 0.66796875, "learning_rate": 0.00039318395277069634, "loss": 2.3482, "step": 2443 }, { "epoch": 0.06558608844997853, "grad_norm": 0.66796875, "learning_rate": 0.00039334496176036493, "loss": 2.412, "step": 2444 }, { "epoch": 0.06561292400171748, "grad_norm": 0.65625, "learning_rate": 0.0003935059707500335, "loss": 2.3092, "step": 2445 }, { "epoch": 0.06563975955345643, "grad_norm": 0.65234375, "learning_rate": 0.0003936669797397021, "loss": 2.3825, "step": 2446 }, { "epoch": 0.06566659510519536, "grad_norm": 0.65625, "learning_rate": 0.0003938279887293707, "loss": 2.3404, "step": 2447 }, { "epoch": 0.06569343065693431, "grad_norm": 0.6640625, "learning_rate": 0.00039398899771903923, "loss": 2.3856, "step": 2448 }, { "epoch": 0.06572026620867324, "grad_norm": 0.640625, "learning_rate": 0.0003941500067087078, "loss": 2.4364, "step": 2449 }, { "epoch": 0.0657471017604122, "grad_norm": 0.66015625, "learning_rate": 0.00039431101569837646, "loss": 2.4011, "step": 2450 }, { "epoch": 0.06577393731215114, "grad_norm": 0.6796875, "learning_rate": 0.00039447202468804505, "loss": 2.4388, "step": 2451 }, { "epoch": 0.06580077286389008, "grad_norm": 0.65625, "learning_rate": 0.00039463303367771364, "loss": 2.3717, "step": 2452 }, { "epoch": 0.06582760841562903, "grad_norm": 0.62109375, "learning_rate": 0.00039479404266738223, "loss": 2.1888, "step": 2453 }, { "epoch": 0.06585444396736798, "grad_norm": 0.66796875, "learning_rate": 0.0003949550516570508, "loss": 2.3902, "step": 2454 }, { "epoch": 0.06588127951910691, "grad_norm": 0.65625, "learning_rate": 0.00039511606064671946, "loss": 2.3784, "step": 2455 }, { "epoch": 0.06590811507084586, "grad_norm": 0.6953125, "learning_rate": 0.00039527706963638794, "loss": 2.4753, "step": 2456 }, { "epoch": 0.0659349506225848, "grad_norm": 0.6796875, "learning_rate": 0.0003954380786260566, "loss": 2.4067, "step": 2457 }, { "epoch": 0.06596178617432374, "grad_norm": 0.68359375, "learning_rate": 0.00039559908761572517, "loss": 2.3578, "step": 2458 }, { "epoch": 0.0659886217260627, "grad_norm": 0.671875, "learning_rate": 0.00039576009660539376, "loss": 2.4027, "step": 2459 }, { "epoch": 0.06601545727780163, "grad_norm": 0.66015625, "learning_rate": 0.00039592110559506235, "loss": 2.3339, "step": 2460 }, { "epoch": 0.06604229282954058, "grad_norm": 0.65625, "learning_rate": 0.00039608211458473094, "loss": 2.353, "step": 2461 }, { "epoch": 0.06606912838127951, "grad_norm": 0.65234375, "learning_rate": 0.0003962431235743996, "loss": 2.3407, "step": 2462 }, { "epoch": 0.06609596393301846, "grad_norm": 0.6640625, "learning_rate": 0.00039640413256406817, "loss": 2.3457, "step": 2463 }, { "epoch": 0.06612279948475741, "grad_norm": 0.65234375, "learning_rate": 0.0003965651415537367, "loss": 2.3577, "step": 2464 }, { "epoch": 0.06614963503649635, "grad_norm": 0.68359375, "learning_rate": 0.0003967261505434053, "loss": 2.3865, "step": 2465 }, { "epoch": 0.0661764705882353, "grad_norm": 0.65625, "learning_rate": 0.0003968871595330739, "loss": 2.4318, "step": 2466 }, { "epoch": 0.06620330613997424, "grad_norm": 0.64453125, "learning_rate": 0.00039704816852274247, "loss": 2.3723, "step": 2467 }, { "epoch": 0.06623014169171318, "grad_norm": 0.65625, "learning_rate": 0.00039720917751241106, "loss": 2.2721, "step": 2468 }, { "epoch": 0.06625697724345213, "grad_norm": 0.66015625, "learning_rate": 0.0003973701865020797, "loss": 2.3043, "step": 2469 }, { "epoch": 0.06628381279519106, "grad_norm": 0.66796875, "learning_rate": 0.0003975311954917483, "loss": 2.3708, "step": 2470 }, { "epoch": 0.06631064834693001, "grad_norm": 0.65234375, "learning_rate": 0.0003976922044814169, "loss": 2.357, "step": 2471 }, { "epoch": 0.06633748389866896, "grad_norm": 0.65625, "learning_rate": 0.0003978532134710854, "loss": 2.4351, "step": 2472 }, { "epoch": 0.0663643194504079, "grad_norm": 0.67578125, "learning_rate": 0.000398014222460754, "loss": 2.4592, "step": 2473 }, { "epoch": 0.06639115500214685, "grad_norm": 0.6484375, "learning_rate": 0.0003981752314504226, "loss": 2.2991, "step": 2474 }, { "epoch": 0.0664179905538858, "grad_norm": 0.6484375, "learning_rate": 0.0003983362404400912, "loss": 2.343, "step": 2475 }, { "epoch": 0.06644482610562473, "grad_norm": 0.66796875, "learning_rate": 0.0003984972494297598, "loss": 2.3978, "step": 2476 }, { "epoch": 0.06647166165736368, "grad_norm": 0.6875, "learning_rate": 0.0003986582584194284, "loss": 2.4086, "step": 2477 }, { "epoch": 0.06649849720910261, "grad_norm": 0.6484375, "learning_rate": 0.000398819267409097, "loss": 2.3377, "step": 2478 }, { "epoch": 0.06652533276084156, "grad_norm": 0.66015625, "learning_rate": 0.00039898027639876553, "loss": 2.3567, "step": 2479 }, { "epoch": 0.06655216831258051, "grad_norm": 0.671875, "learning_rate": 0.0003991412853884341, "loss": 2.3643, "step": 2480 }, { "epoch": 0.06657900386431945, "grad_norm": 0.62109375, "learning_rate": 0.0003993022943781027, "loss": 2.2525, "step": 2481 }, { "epoch": 0.0666058394160584, "grad_norm": 0.6484375, "learning_rate": 0.0003994633033677713, "loss": 2.3242, "step": 2482 }, { "epoch": 0.06663267496779733, "grad_norm": 0.66796875, "learning_rate": 0.00039962431235743994, "loss": 2.434, "step": 2483 }, { "epoch": 0.06665951051953628, "grad_norm": 0.65234375, "learning_rate": 0.00039978532134710853, "loss": 2.3357, "step": 2484 }, { "epoch": 0.06668634607127523, "grad_norm": 0.68359375, "learning_rate": 0.0003999463303367771, "loss": 2.3587, "step": 2485 }, { "epoch": 0.06671318162301416, "grad_norm": 0.67578125, "learning_rate": 0.0004001073393264457, "loss": 2.4202, "step": 2486 }, { "epoch": 0.06674001717475311, "grad_norm": 0.6640625, "learning_rate": 0.00040026834831611424, "loss": 2.321, "step": 2487 }, { "epoch": 0.06676685272649206, "grad_norm": 0.67578125, "learning_rate": 0.00040042935730578283, "loss": 2.3937, "step": 2488 }, { "epoch": 0.066793688278231, "grad_norm": 0.64453125, "learning_rate": 0.0004005903662954515, "loss": 2.2651, "step": 2489 }, { "epoch": 0.06682052382996995, "grad_norm": 0.6484375, "learning_rate": 0.00040075137528512006, "loss": 2.3325, "step": 2490 }, { "epoch": 0.06684735938170888, "grad_norm": 0.66015625, "learning_rate": 0.00040091238427478865, "loss": 2.3922, "step": 2491 }, { "epoch": 0.06687419493344783, "grad_norm": 0.73828125, "learning_rate": 0.00040107339326445724, "loss": 2.3842, "step": 2492 }, { "epoch": 0.06690103048518678, "grad_norm": 0.72265625, "learning_rate": 0.00040123440225412583, "loss": 2.4249, "step": 2493 }, { "epoch": 0.06692786603692572, "grad_norm": 0.671875, "learning_rate": 0.0004013954112437944, "loss": 2.366, "step": 2494 }, { "epoch": 0.06695470158866466, "grad_norm": 0.66796875, "learning_rate": 0.00040155642023346295, "loss": 2.3394, "step": 2495 }, { "epoch": 0.06698153714040361, "grad_norm": 0.6875, "learning_rate": 0.0004017174292231316, "loss": 2.4568, "step": 2496 }, { "epoch": 0.06700837269214255, "grad_norm": 0.6875, "learning_rate": 0.0004018784382128002, "loss": 2.434, "step": 2497 }, { "epoch": 0.0670352082438815, "grad_norm": 0.703125, "learning_rate": 0.00040203944720246877, "loss": 2.4767, "step": 2498 }, { "epoch": 0.06706204379562043, "grad_norm": 0.671875, "learning_rate": 0.00040220045619213736, "loss": 2.3353, "step": 2499 }, { "epoch": 0.06708887934735938, "grad_norm": 0.67578125, "learning_rate": 0.00040236146518180595, "loss": 2.3036, "step": 2500 }, { "epoch": 0.06711571489909833, "grad_norm": 0.6953125, "learning_rate": 0.00040252247417147454, "loss": 2.4092, "step": 2501 }, { "epoch": 0.06714255045083727, "grad_norm": 0.6875, "learning_rate": 0.0004026834831611432, "loss": 2.5025, "step": 2502 }, { "epoch": 0.06716938600257621, "grad_norm": 0.6640625, "learning_rate": 0.0004028444921508117, "loss": 2.3158, "step": 2503 }, { "epoch": 0.06719622155431516, "grad_norm": 0.67578125, "learning_rate": 0.0004030055011404803, "loss": 2.3745, "step": 2504 }, { "epoch": 0.0672230571060541, "grad_norm": 0.6796875, "learning_rate": 0.0004031665101301489, "loss": 2.5131, "step": 2505 }, { "epoch": 0.06724989265779305, "grad_norm": 0.65234375, "learning_rate": 0.0004033275191198175, "loss": 2.3497, "step": 2506 }, { "epoch": 0.06727672820953198, "grad_norm": 0.6484375, "learning_rate": 0.00040348852810948607, "loss": 2.3899, "step": 2507 }, { "epoch": 0.06730356376127093, "grad_norm": 0.63671875, "learning_rate": 0.00040364953709915466, "loss": 2.4202, "step": 2508 }, { "epoch": 0.06733039931300988, "grad_norm": 0.6796875, "learning_rate": 0.0004038105460888233, "loss": 2.5183, "step": 2509 }, { "epoch": 0.06735723486474882, "grad_norm": 0.63671875, "learning_rate": 0.00040397155507849184, "loss": 2.4025, "step": 2510 }, { "epoch": 0.06738407041648777, "grad_norm": 0.64453125, "learning_rate": 0.0004041325640681604, "loss": 2.4539, "step": 2511 }, { "epoch": 0.0674109059682267, "grad_norm": 0.65625, "learning_rate": 0.000404293573057829, "loss": 2.4173, "step": 2512 }, { "epoch": 0.06743774151996565, "grad_norm": 0.66796875, "learning_rate": 0.0004044545820474976, "loss": 2.4009, "step": 2513 }, { "epoch": 0.0674645770717046, "grad_norm": 0.65625, "learning_rate": 0.0004046155910371662, "loss": 2.2908, "step": 2514 }, { "epoch": 0.06749141262344353, "grad_norm": 0.66796875, "learning_rate": 0.00040477660002683483, "loss": 2.3695, "step": 2515 }, { "epoch": 0.06751824817518248, "grad_norm": 0.65625, "learning_rate": 0.0004049376090165034, "loss": 2.4428, "step": 2516 }, { "epoch": 0.06754508372692143, "grad_norm": 0.6484375, "learning_rate": 0.000405098618006172, "loss": 2.3517, "step": 2517 }, { "epoch": 0.06757191927866037, "grad_norm": 0.6796875, "learning_rate": 0.00040525962699584055, "loss": 2.4095, "step": 2518 }, { "epoch": 0.06759875483039932, "grad_norm": 0.65625, "learning_rate": 0.00040542063598550913, "loss": 2.4274, "step": 2519 }, { "epoch": 0.06762559038213825, "grad_norm": 0.65625, "learning_rate": 0.0004055816449751777, "loss": 2.3772, "step": 2520 }, { "epoch": 0.0676524259338772, "grad_norm": 0.65625, "learning_rate": 0.0004057426539648463, "loss": 2.401, "step": 2521 }, { "epoch": 0.06767926148561615, "grad_norm": 0.62109375, "learning_rate": 0.00040590366295451495, "loss": 2.258, "step": 2522 }, { "epoch": 0.06770609703735508, "grad_norm": 0.65625, "learning_rate": 0.00040606467194418354, "loss": 2.3054, "step": 2523 }, { "epoch": 0.06773293258909403, "grad_norm": 0.66015625, "learning_rate": 0.00040622568093385213, "loss": 2.4066, "step": 2524 }, { "epoch": 0.06775976814083298, "grad_norm": 0.67578125, "learning_rate": 0.0004063866899235207, "loss": 2.4317, "step": 2525 }, { "epoch": 0.06778660369257192, "grad_norm": 0.6484375, "learning_rate": 0.00040654769891318925, "loss": 2.2166, "step": 2526 }, { "epoch": 0.06781343924431087, "grad_norm": 0.65625, "learning_rate": 0.00040670870790285784, "loss": 2.4038, "step": 2527 }, { "epoch": 0.0678402747960498, "grad_norm": 0.6328125, "learning_rate": 0.00040686971689252643, "loss": 2.3909, "step": 2528 }, { "epoch": 0.06786711034778875, "grad_norm": 0.6484375, "learning_rate": 0.0004070307258821951, "loss": 2.3918, "step": 2529 }, { "epoch": 0.0678939458995277, "grad_norm": 0.640625, "learning_rate": 0.00040719173487186366, "loss": 2.3429, "step": 2530 }, { "epoch": 0.06792078145126663, "grad_norm": 0.640625, "learning_rate": 0.00040735274386153225, "loss": 2.3475, "step": 2531 }, { "epoch": 0.06794761700300558, "grad_norm": 0.63671875, "learning_rate": 0.00040751375285120084, "loss": 2.4019, "step": 2532 }, { "epoch": 0.06797445255474452, "grad_norm": 0.65234375, "learning_rate": 0.00040767476184086943, "loss": 2.3457, "step": 2533 }, { "epoch": 0.06800128810648347, "grad_norm": 0.6484375, "learning_rate": 0.00040783577083053796, "loss": 2.2958, "step": 2534 }, { "epoch": 0.06802812365822242, "grad_norm": 0.64453125, "learning_rate": 0.00040799677982020655, "loss": 2.3398, "step": 2535 }, { "epoch": 0.06805495920996135, "grad_norm": 0.640625, "learning_rate": 0.0004081577888098752, "loss": 2.3177, "step": 2536 }, { "epoch": 0.0680817947617003, "grad_norm": 0.65625, "learning_rate": 0.0004083187977995438, "loss": 2.383, "step": 2537 }, { "epoch": 0.06810863031343925, "grad_norm": 0.6640625, "learning_rate": 0.00040847980678921237, "loss": 2.4592, "step": 2538 }, { "epoch": 0.06813546586517819, "grad_norm": 0.6640625, "learning_rate": 0.00040864081577888096, "loss": 2.4282, "step": 2539 }, { "epoch": 0.06816230141691713, "grad_norm": 0.66796875, "learning_rate": 0.00040880182476854955, "loss": 2.5189, "step": 2540 }, { "epoch": 0.06818913696865607, "grad_norm": 0.640625, "learning_rate": 0.0004089628337582181, "loss": 2.3912, "step": 2541 }, { "epoch": 0.06821597252039502, "grad_norm": 0.66015625, "learning_rate": 0.00040912384274788667, "loss": 2.4729, "step": 2542 }, { "epoch": 0.06824280807213397, "grad_norm": 0.65625, "learning_rate": 0.0004092848517375553, "loss": 2.4181, "step": 2543 }, { "epoch": 0.0682696436238729, "grad_norm": 0.6640625, "learning_rate": 0.0004094458607272239, "loss": 2.3659, "step": 2544 }, { "epoch": 0.06829647917561185, "grad_norm": 0.67578125, "learning_rate": 0.0004096068697168925, "loss": 2.5079, "step": 2545 }, { "epoch": 0.0683233147273508, "grad_norm": 0.6484375, "learning_rate": 0.0004097678787065611, "loss": 2.3304, "step": 2546 }, { "epoch": 0.06835015027908974, "grad_norm": 0.6484375, "learning_rate": 0.00040992888769622967, "loss": 2.3392, "step": 2547 }, { "epoch": 0.06837698583082868, "grad_norm": 0.6796875, "learning_rate": 0.0004100898966858983, "loss": 2.4659, "step": 2548 }, { "epoch": 0.06840382138256762, "grad_norm": 0.64453125, "learning_rate": 0.00041025090567556685, "loss": 2.4636, "step": 2549 }, { "epoch": 0.06843065693430657, "grad_norm": 0.64453125, "learning_rate": 0.00041041191466523544, "loss": 2.3865, "step": 2550 }, { "epoch": 0.06845749248604552, "grad_norm": 0.64453125, "learning_rate": 0.000410572923654904, "loss": 2.4289, "step": 2551 }, { "epoch": 0.06848432803778445, "grad_norm": 0.64453125, "learning_rate": 0.0004107339326445726, "loss": 2.3536, "step": 2552 }, { "epoch": 0.0685111635895234, "grad_norm": 0.671875, "learning_rate": 0.0004108949416342412, "loss": 2.3571, "step": 2553 }, { "epoch": 0.06853799914126235, "grad_norm": 0.6640625, "learning_rate": 0.0004110559506239098, "loss": 2.4443, "step": 2554 }, { "epoch": 0.06856483469300129, "grad_norm": 0.6640625, "learning_rate": 0.00041121695961357843, "loss": 2.396, "step": 2555 }, { "epoch": 0.06859167024474024, "grad_norm": 0.64453125, "learning_rate": 0.000411377968603247, "loss": 2.4135, "step": 2556 }, { "epoch": 0.06861850579647917, "grad_norm": 0.640625, "learning_rate": 0.00041153897759291556, "loss": 2.3607, "step": 2557 }, { "epoch": 0.06864534134821812, "grad_norm": 0.625, "learning_rate": 0.00041169998658258415, "loss": 2.2913, "step": 2558 }, { "epoch": 0.06867217689995707, "grad_norm": 0.65625, "learning_rate": 0.00041186099557225273, "loss": 2.4723, "step": 2559 }, { "epoch": 0.068699012451696, "grad_norm": 0.65234375, "learning_rate": 0.0004120220045619213, "loss": 2.4425, "step": 2560 }, { "epoch": 0.06872584800343495, "grad_norm": 0.63671875, "learning_rate": 0.0004121830135515899, "loss": 2.294, "step": 2561 }, { "epoch": 0.06875268355517389, "grad_norm": 0.64453125, "learning_rate": 0.00041234402254125855, "loss": 2.4533, "step": 2562 }, { "epoch": 0.06877951910691284, "grad_norm": 0.65234375, "learning_rate": 0.00041250503153092714, "loss": 2.3118, "step": 2563 }, { "epoch": 0.06880635465865179, "grad_norm": 0.64453125, "learning_rate": 0.0004126660405205957, "loss": 2.3019, "step": 2564 }, { "epoch": 0.06883319021039072, "grad_norm": 0.6484375, "learning_rate": 0.00041282704951026427, "loss": 2.3286, "step": 2565 }, { "epoch": 0.06886002576212967, "grad_norm": 0.6640625, "learning_rate": 0.00041298805849993285, "loss": 2.4648, "step": 2566 }, { "epoch": 0.06888686131386862, "grad_norm": 0.6484375, "learning_rate": 0.00041314906748960144, "loss": 2.3397, "step": 2567 }, { "epoch": 0.06891369686560755, "grad_norm": 0.6484375, "learning_rate": 0.00041331007647927003, "loss": 2.3032, "step": 2568 }, { "epoch": 0.0689405324173465, "grad_norm": 0.65625, "learning_rate": 0.0004134710854689387, "loss": 2.4803, "step": 2569 }, { "epoch": 0.06896736796908544, "grad_norm": 0.6640625, "learning_rate": 0.00041363209445860726, "loss": 2.4045, "step": 2570 }, { "epoch": 0.06899420352082439, "grad_norm": 0.64453125, "learning_rate": 0.00041379310344827585, "loss": 2.3247, "step": 2571 }, { "epoch": 0.06902103907256334, "grad_norm": 0.63671875, "learning_rate": 0.0004139541124379444, "loss": 2.3463, "step": 2572 }, { "epoch": 0.06904787462430227, "grad_norm": 0.6015625, "learning_rate": 0.000414115121427613, "loss": 2.2438, "step": 2573 }, { "epoch": 0.06907471017604122, "grad_norm": 0.62890625, "learning_rate": 0.00041427613041728156, "loss": 2.3028, "step": 2574 }, { "epoch": 0.06910154572778017, "grad_norm": 0.65625, "learning_rate": 0.0004144371394069502, "loss": 2.3897, "step": 2575 }, { "epoch": 0.0691283812795191, "grad_norm": 0.65625, "learning_rate": 0.0004145981483966188, "loss": 2.4787, "step": 2576 }, { "epoch": 0.06915521683125805, "grad_norm": 0.671875, "learning_rate": 0.0004147591573862874, "loss": 2.4068, "step": 2577 }, { "epoch": 0.06918205238299699, "grad_norm": 0.64453125, "learning_rate": 0.00041492016637595597, "loss": 2.3659, "step": 2578 }, { "epoch": 0.06920888793473594, "grad_norm": 0.640625, "learning_rate": 0.00041508117536562456, "loss": 2.2678, "step": 2579 }, { "epoch": 0.06923572348647489, "grad_norm": 0.640625, "learning_rate": 0.0004152421843552931, "loss": 2.3541, "step": 2580 }, { "epoch": 0.06926255903821382, "grad_norm": 0.6640625, "learning_rate": 0.0004154031933449617, "loss": 2.3482, "step": 2581 }, { "epoch": 0.06928939458995277, "grad_norm": 0.640625, "learning_rate": 0.0004155642023346303, "loss": 2.3587, "step": 2582 }, { "epoch": 0.0693162301416917, "grad_norm": 0.6328125, "learning_rate": 0.0004157252113242989, "loss": 2.2699, "step": 2583 }, { "epoch": 0.06934306569343066, "grad_norm": 0.63671875, "learning_rate": 0.0004158862203139675, "loss": 2.3199, "step": 2584 }, { "epoch": 0.0693699012451696, "grad_norm": 0.64453125, "learning_rate": 0.0004160472293036361, "loss": 2.2551, "step": 2585 }, { "epoch": 0.06939673679690854, "grad_norm": 0.640625, "learning_rate": 0.0004162082382933047, "loss": 2.3065, "step": 2586 }, { "epoch": 0.06942357234864749, "grad_norm": 0.65234375, "learning_rate": 0.00041636924728297327, "loss": 2.3822, "step": 2587 }, { "epoch": 0.06945040790038644, "grad_norm": 0.640625, "learning_rate": 0.0004165302562726418, "loss": 2.3462, "step": 2588 }, { "epoch": 0.06947724345212537, "grad_norm": 0.62890625, "learning_rate": 0.00041669126526231045, "loss": 2.3859, "step": 2589 }, { "epoch": 0.06950407900386432, "grad_norm": 0.640625, "learning_rate": 0.00041685227425197904, "loss": 2.2751, "step": 2590 }, { "epoch": 0.06953091455560326, "grad_norm": 0.640625, "learning_rate": 0.0004170132832416476, "loss": 2.3959, "step": 2591 }, { "epoch": 0.0695577501073422, "grad_norm": 0.640625, "learning_rate": 0.0004171742922313162, "loss": 2.3474, "step": 2592 }, { "epoch": 0.06958458565908116, "grad_norm": 0.640625, "learning_rate": 0.0004173353012209848, "loss": 2.3433, "step": 2593 }, { "epoch": 0.06961142121082009, "grad_norm": 0.65625, "learning_rate": 0.0004174963102106534, "loss": 2.3732, "step": 2594 }, { "epoch": 0.06963825676255904, "grad_norm": 0.65234375, "learning_rate": 0.0004176573192003219, "loss": 2.4231, "step": 2595 }, { "epoch": 0.06966509231429799, "grad_norm": 0.64453125, "learning_rate": 0.00041781832818999057, "loss": 2.4496, "step": 2596 }, { "epoch": 0.06969192786603692, "grad_norm": 0.6328125, "learning_rate": 0.00041797933717965916, "loss": 2.2994, "step": 2597 }, { "epoch": 0.06971876341777587, "grad_norm": 0.64453125, "learning_rate": 0.00041814034616932775, "loss": 2.3201, "step": 2598 }, { "epoch": 0.06974559896951481, "grad_norm": 0.64453125, "learning_rate": 0.00041830135515899633, "loss": 2.3265, "step": 2599 }, { "epoch": 0.06977243452125376, "grad_norm": 0.65234375, "learning_rate": 0.0004184623641486649, "loss": 2.3413, "step": 2600 }, { "epoch": 0.0697992700729927, "grad_norm": 0.63671875, "learning_rate": 0.00041862337313833357, "loss": 2.2818, "step": 2601 }, { "epoch": 0.06982610562473164, "grad_norm": 0.6640625, "learning_rate": 0.00041878438212800215, "loss": 2.3315, "step": 2602 }, { "epoch": 0.06985294117647059, "grad_norm": 0.625, "learning_rate": 0.0004189453911176707, "loss": 2.2283, "step": 2603 }, { "epoch": 0.06987977672820954, "grad_norm": 0.62890625, "learning_rate": 0.0004191064001073393, "loss": 2.2542, "step": 2604 }, { "epoch": 0.06990661227994847, "grad_norm": 0.66015625, "learning_rate": 0.00041926740909700787, "loss": 2.459, "step": 2605 }, { "epoch": 0.06993344783168742, "grad_norm": 0.6484375, "learning_rate": 0.00041942841808667645, "loss": 2.3397, "step": 2606 }, { "epoch": 0.06996028338342636, "grad_norm": 0.6328125, "learning_rate": 0.00041958942707634504, "loss": 2.3715, "step": 2607 }, { "epoch": 0.06998711893516531, "grad_norm": 0.63671875, "learning_rate": 0.0004197504360660137, "loss": 2.3917, "step": 2608 }, { "epoch": 0.07001395448690426, "grad_norm": 0.65234375, "learning_rate": 0.0004199114450556823, "loss": 2.4024, "step": 2609 }, { "epoch": 0.07004079003864319, "grad_norm": 0.64453125, "learning_rate": 0.00042007245404535086, "loss": 2.2844, "step": 2610 }, { "epoch": 0.07006762559038214, "grad_norm": 0.625, "learning_rate": 0.0004202334630350194, "loss": 2.3471, "step": 2611 }, { "epoch": 0.07009446114212108, "grad_norm": 0.63671875, "learning_rate": 0.000420394472024688, "loss": 2.3803, "step": 2612 }, { "epoch": 0.07012129669386002, "grad_norm": 0.640625, "learning_rate": 0.0004205554810143566, "loss": 2.2931, "step": 2613 }, { "epoch": 0.07014813224559897, "grad_norm": 0.640625, "learning_rate": 0.00042071649000402516, "loss": 2.3274, "step": 2614 }, { "epoch": 0.07017496779733791, "grad_norm": 0.6484375, "learning_rate": 0.0004208774989936938, "loss": 2.4407, "step": 2615 }, { "epoch": 0.07020180334907686, "grad_norm": 0.65234375, "learning_rate": 0.0004210385079833624, "loss": 2.3807, "step": 2616 }, { "epoch": 0.0702286389008158, "grad_norm": 0.640625, "learning_rate": 0.000421199516973031, "loss": 2.2886, "step": 2617 }, { "epoch": 0.07025547445255474, "grad_norm": 0.65234375, "learning_rate": 0.00042136052596269957, "loss": 2.3896, "step": 2618 }, { "epoch": 0.07028231000429369, "grad_norm": 0.6484375, "learning_rate": 0.0004215215349523681, "loss": 2.288, "step": 2619 }, { "epoch": 0.07030914555603263, "grad_norm": 0.6328125, "learning_rate": 0.0004216825439420367, "loss": 2.2571, "step": 2620 }, { "epoch": 0.07033598110777158, "grad_norm": 0.640625, "learning_rate": 0.0004218435529317053, "loss": 2.335, "step": 2621 }, { "epoch": 0.07036281665951052, "grad_norm": 0.6484375, "learning_rate": 0.0004220045619213739, "loss": 2.3502, "step": 2622 }, { "epoch": 0.07038965221124946, "grad_norm": 0.65625, "learning_rate": 0.0004221655709110425, "loss": 2.3898, "step": 2623 }, { "epoch": 0.07041648776298841, "grad_norm": 0.6640625, "learning_rate": 0.0004223265799007111, "loss": 2.3433, "step": 2624 }, { "epoch": 0.07044332331472736, "grad_norm": 0.62890625, "learning_rate": 0.0004224875888903797, "loss": 2.2785, "step": 2625 }, { "epoch": 0.07047015886646629, "grad_norm": 0.640625, "learning_rate": 0.00042264859788004823, "loss": 2.3222, "step": 2626 }, { "epoch": 0.07049699441820524, "grad_norm": 0.6640625, "learning_rate": 0.0004228096068697168, "loss": 2.4153, "step": 2627 }, { "epoch": 0.07052382996994418, "grad_norm": 0.6328125, "learning_rate": 0.0004229706158593854, "loss": 2.2932, "step": 2628 }, { "epoch": 0.07055066552168313, "grad_norm": 0.62109375, "learning_rate": 0.00042313162484905405, "loss": 2.2142, "step": 2629 }, { "epoch": 0.07057750107342207, "grad_norm": 0.65234375, "learning_rate": 0.00042329263383872264, "loss": 2.3198, "step": 2630 }, { "epoch": 0.07060433662516101, "grad_norm": 0.640625, "learning_rate": 0.0004234536428283912, "loss": 2.4245, "step": 2631 }, { "epoch": 0.07063117217689996, "grad_norm": 0.640625, "learning_rate": 0.0004236146518180598, "loss": 2.2564, "step": 2632 }, { "epoch": 0.0706580077286389, "grad_norm": 0.65234375, "learning_rate": 0.0004237756608077284, "loss": 2.3753, "step": 2633 }, { "epoch": 0.07068484328037784, "grad_norm": 0.66015625, "learning_rate": 0.00042393666979739694, "loss": 2.3577, "step": 2634 }, { "epoch": 0.07071167883211679, "grad_norm": 0.640625, "learning_rate": 0.0004240976787870656, "loss": 2.29, "step": 2635 }, { "epoch": 0.07073851438385573, "grad_norm": 0.62890625, "learning_rate": 0.00042425868777673417, "loss": 2.3216, "step": 2636 }, { "epoch": 0.07076534993559468, "grad_norm": 0.6640625, "learning_rate": 0.00042441969676640276, "loss": 2.4214, "step": 2637 }, { "epoch": 0.07079218548733363, "grad_norm": 0.640625, "learning_rate": 0.00042458070575607135, "loss": 2.3317, "step": 2638 }, { "epoch": 0.07081902103907256, "grad_norm": 0.640625, "learning_rate": 0.00042474171474573993, "loss": 2.3422, "step": 2639 }, { "epoch": 0.07084585659081151, "grad_norm": 0.64453125, "learning_rate": 0.0004249027237354085, "loss": 2.3189, "step": 2640 }, { "epoch": 0.07087269214255044, "grad_norm": 0.625, "learning_rate": 0.00042506373272507717, "loss": 2.2222, "step": 2641 }, { "epoch": 0.0708995276942894, "grad_norm": 0.671875, "learning_rate": 0.0004252247417147457, "loss": 2.4113, "step": 2642 }, { "epoch": 0.07092636324602834, "grad_norm": 0.65234375, "learning_rate": 0.0004253857507044143, "loss": 2.3884, "step": 2643 }, { "epoch": 0.07095319879776728, "grad_norm": 0.625, "learning_rate": 0.0004255467596940829, "loss": 2.3319, "step": 2644 }, { "epoch": 0.07098003434950623, "grad_norm": 0.62890625, "learning_rate": 0.00042570776868375147, "loss": 2.3393, "step": 2645 }, { "epoch": 0.07100686990124518, "grad_norm": 0.6171875, "learning_rate": 0.00042586877767342005, "loss": 2.2347, "step": 2646 }, { "epoch": 0.07103370545298411, "grad_norm": 0.640625, "learning_rate": 0.00042602978666308864, "loss": 2.3746, "step": 2647 }, { "epoch": 0.07106054100472306, "grad_norm": 0.62890625, "learning_rate": 0.0004261907956527573, "loss": 2.2471, "step": 2648 }, { "epoch": 0.071087376556462, "grad_norm": 0.6171875, "learning_rate": 0.0004263518046424258, "loss": 2.2338, "step": 2649 }, { "epoch": 0.07111421210820094, "grad_norm": 0.62109375, "learning_rate": 0.0004265128136320944, "loss": 2.2894, "step": 2650 }, { "epoch": 0.0711410476599399, "grad_norm": 0.6640625, "learning_rate": 0.000426673822621763, "loss": 2.3492, "step": 2651 }, { "epoch": 0.07116788321167883, "grad_norm": 0.6328125, "learning_rate": 0.0004268348316114316, "loss": 2.265, "step": 2652 }, { "epoch": 0.07119471876341778, "grad_norm": 0.640625, "learning_rate": 0.0004269958406011002, "loss": 2.2933, "step": 2653 }, { "epoch": 0.07122155431515673, "grad_norm": 0.6171875, "learning_rate": 0.00042715684959076876, "loss": 2.3098, "step": 2654 }, { "epoch": 0.07124838986689566, "grad_norm": 0.62890625, "learning_rate": 0.0004273178585804374, "loss": 2.3189, "step": 2655 }, { "epoch": 0.07127522541863461, "grad_norm": 0.62890625, "learning_rate": 0.000427478867570106, "loss": 2.3133, "step": 2656 }, { "epoch": 0.07130206097037355, "grad_norm": 0.6484375, "learning_rate": 0.00042763987655977453, "loss": 2.3558, "step": 2657 }, { "epoch": 0.0713288965221125, "grad_norm": 0.62890625, "learning_rate": 0.0004278008855494431, "loss": 2.2571, "step": 2658 }, { "epoch": 0.07135573207385144, "grad_norm": 0.6484375, "learning_rate": 0.0004279618945391117, "loss": 2.3814, "step": 2659 }, { "epoch": 0.07138256762559038, "grad_norm": 0.61328125, "learning_rate": 0.0004281229035287803, "loss": 2.2924, "step": 2660 }, { "epoch": 0.07140940317732933, "grad_norm": 0.6328125, "learning_rate": 0.00042828391251844894, "loss": 2.2142, "step": 2661 }, { "epoch": 0.07143623872906826, "grad_norm": 0.65625, "learning_rate": 0.0004284449215081175, "loss": 2.3424, "step": 2662 }, { "epoch": 0.07146307428080721, "grad_norm": 0.640625, "learning_rate": 0.0004286059304977861, "loss": 2.3199, "step": 2663 }, { "epoch": 0.07148990983254616, "grad_norm": 0.62109375, "learning_rate": 0.0004287669394874547, "loss": 2.2, "step": 2664 }, { "epoch": 0.0715167453842851, "grad_norm": 0.65234375, "learning_rate": 0.00042892794847712324, "loss": 2.339, "step": 2665 }, { "epoch": 0.07154358093602405, "grad_norm": 0.62890625, "learning_rate": 0.00042908895746679183, "loss": 2.4072, "step": 2666 }, { "epoch": 0.071570416487763, "grad_norm": 0.63671875, "learning_rate": 0.0004292499664564604, "loss": 2.357, "step": 2667 }, { "epoch": 0.07159725203950193, "grad_norm": 0.64453125, "learning_rate": 0.00042941097544612906, "loss": 2.3542, "step": 2668 }, { "epoch": 0.07162408759124088, "grad_norm": 0.765625, "learning_rate": 0.00042957198443579765, "loss": 2.5061, "step": 2669 }, { "epoch": 0.07165092314297981, "grad_norm": 0.671875, "learning_rate": 0.00042973299342546624, "loss": 2.4219, "step": 2670 }, { "epoch": 0.07167775869471876, "grad_norm": 0.65625, "learning_rate": 0.0004298940024151348, "loss": 2.414, "step": 2671 }, { "epoch": 0.07170459424645771, "grad_norm": 0.65234375, "learning_rate": 0.0004300550114048034, "loss": 2.3668, "step": 2672 }, { "epoch": 0.07173142979819665, "grad_norm": 0.65234375, "learning_rate": 0.00043021602039447195, "loss": 2.3047, "step": 2673 }, { "epoch": 0.0717582653499356, "grad_norm": 0.6328125, "learning_rate": 0.00043037702938414054, "loss": 2.3478, "step": 2674 }, { "epoch": 0.07178510090167455, "grad_norm": 0.64453125, "learning_rate": 0.0004305380383738092, "loss": 2.4266, "step": 2675 }, { "epoch": 0.07181193645341348, "grad_norm": 0.640625, "learning_rate": 0.00043069904736347777, "loss": 2.4057, "step": 2676 }, { "epoch": 0.07183877200515243, "grad_norm": 0.65625, "learning_rate": 0.00043086005635314636, "loss": 2.3496, "step": 2677 }, { "epoch": 0.07186560755689136, "grad_norm": 0.640625, "learning_rate": 0.00043102106534281495, "loss": 2.4449, "step": 2678 }, { "epoch": 0.07189244310863031, "grad_norm": 0.6328125, "learning_rate": 0.00043118207433248353, "loss": 2.3574, "step": 2679 }, { "epoch": 0.07191927866036926, "grad_norm": 0.63671875, "learning_rate": 0.00043134308332215207, "loss": 2.2733, "step": 2680 }, { "epoch": 0.0719461142121082, "grad_norm": 0.66796875, "learning_rate": 0.00043150409231182066, "loss": 2.4335, "step": 2681 }, { "epoch": 0.07197294976384715, "grad_norm": 0.64453125, "learning_rate": 0.0004316651013014893, "loss": 2.3287, "step": 2682 }, { "epoch": 0.07199978531558608, "grad_norm": 0.6328125, "learning_rate": 0.0004318261102911579, "loss": 2.3174, "step": 2683 }, { "epoch": 0.07202662086732503, "grad_norm": 0.62109375, "learning_rate": 0.0004319871192808265, "loss": 2.3572, "step": 2684 }, { "epoch": 0.07205345641906398, "grad_norm": 0.6328125, "learning_rate": 0.00043214812827049507, "loss": 2.3593, "step": 2685 }, { "epoch": 0.07208029197080291, "grad_norm": 0.625, "learning_rate": 0.00043230913726016365, "loss": 2.3463, "step": 2686 }, { "epoch": 0.07210712752254186, "grad_norm": 0.6328125, "learning_rate": 0.0004324701462498323, "loss": 2.2229, "step": 2687 }, { "epoch": 0.07213396307428081, "grad_norm": 0.625, "learning_rate": 0.0004326311552395008, "loss": 2.3253, "step": 2688 }, { "epoch": 0.07216079862601975, "grad_norm": 0.63671875, "learning_rate": 0.0004327921642291694, "loss": 2.3633, "step": 2689 }, { "epoch": 0.0721876341777587, "grad_norm": 0.65234375, "learning_rate": 0.000432953173218838, "loss": 2.4341, "step": 2690 }, { "epoch": 0.07221446972949763, "grad_norm": 0.6328125, "learning_rate": 0.0004331141822085066, "loss": 2.3206, "step": 2691 }, { "epoch": 0.07224130528123658, "grad_norm": 0.6328125, "learning_rate": 0.0004332751911981752, "loss": 2.4564, "step": 2692 }, { "epoch": 0.07226814083297553, "grad_norm": 0.63671875, "learning_rate": 0.0004334362001878438, "loss": 2.3828, "step": 2693 }, { "epoch": 0.07229497638471447, "grad_norm": 0.6171875, "learning_rate": 0.0004335972091775124, "loss": 2.3595, "step": 2694 }, { "epoch": 0.07232181193645341, "grad_norm": 0.609375, "learning_rate": 0.000433758218167181, "loss": 2.3471, "step": 2695 }, { "epoch": 0.07234864748819236, "grad_norm": 0.62890625, "learning_rate": 0.00043391922715684954, "loss": 2.379, "step": 2696 }, { "epoch": 0.0723754830399313, "grad_norm": 0.625, "learning_rate": 0.00043408023614651813, "loss": 2.2721, "step": 2697 }, { "epoch": 0.07240231859167025, "grad_norm": 0.61328125, "learning_rate": 0.0004342412451361867, "loss": 2.2606, "step": 2698 }, { "epoch": 0.07242915414340918, "grad_norm": 0.625, "learning_rate": 0.0004344022541258553, "loss": 2.2863, "step": 2699 }, { "epoch": 0.07245598969514813, "grad_norm": 0.640625, "learning_rate": 0.0004345632631155239, "loss": 2.3855, "step": 2700 }, { "epoch": 0.07248282524688708, "grad_norm": 0.63671875, "learning_rate": 0.00043472427210519254, "loss": 2.3882, "step": 2701 }, { "epoch": 0.07250966079862602, "grad_norm": 0.62109375, "learning_rate": 0.0004348852810948611, "loss": 2.3028, "step": 2702 }, { "epoch": 0.07253649635036497, "grad_norm": 0.66015625, "learning_rate": 0.0004350462900845297, "loss": 2.369, "step": 2703 }, { "epoch": 0.07256333190210391, "grad_norm": 0.62109375, "learning_rate": 0.00043520729907419825, "loss": 2.3673, "step": 2704 }, { "epoch": 0.07259016745384285, "grad_norm": 0.64453125, "learning_rate": 0.00043536830806386684, "loss": 2.3854, "step": 2705 }, { "epoch": 0.0726170030055818, "grad_norm": 0.6015625, "learning_rate": 0.00043552931705353543, "loss": 2.265, "step": 2706 }, { "epoch": 0.07264383855732073, "grad_norm": 0.61328125, "learning_rate": 0.000435690326043204, "loss": 2.3089, "step": 2707 }, { "epoch": 0.07267067410905968, "grad_norm": 0.62109375, "learning_rate": 0.00043585133503287266, "loss": 2.372, "step": 2708 }, { "epoch": 0.07269750966079863, "grad_norm": 0.6328125, "learning_rate": 0.00043601234402254125, "loss": 2.4593, "step": 2709 }, { "epoch": 0.07272434521253757, "grad_norm": 0.625, "learning_rate": 0.00043617335301220984, "loss": 2.2408, "step": 2710 }, { "epoch": 0.07275118076427652, "grad_norm": 0.6484375, "learning_rate": 0.00043633436200187837, "loss": 2.3435, "step": 2711 }, { "epoch": 0.07277801631601545, "grad_norm": 0.6171875, "learning_rate": 0.00043649537099154696, "loss": 2.2786, "step": 2712 }, { "epoch": 0.0728048518677544, "grad_norm": 0.6484375, "learning_rate": 0.00043665637998121555, "loss": 2.398, "step": 2713 }, { "epoch": 0.07283168741949335, "grad_norm": 0.6328125, "learning_rate": 0.00043681738897088414, "loss": 2.3271, "step": 2714 }, { "epoch": 0.07285852297123228, "grad_norm": 0.60546875, "learning_rate": 0.0004369783979605528, "loss": 2.2565, "step": 2715 }, { "epoch": 0.07288535852297123, "grad_norm": 0.60546875, "learning_rate": 0.00043713940695022137, "loss": 2.2332, "step": 2716 }, { "epoch": 0.07291219407471018, "grad_norm": 0.6328125, "learning_rate": 0.00043730041593988996, "loss": 2.2782, "step": 2717 }, { "epoch": 0.07293902962644912, "grad_norm": 0.6484375, "learning_rate": 0.00043746142492955855, "loss": 2.387, "step": 2718 }, { "epoch": 0.07296586517818807, "grad_norm": 0.625, "learning_rate": 0.0004376224339192271, "loss": 2.4616, "step": 2719 }, { "epoch": 0.072992700729927, "grad_norm": 0.64453125, "learning_rate": 0.00043778344290889567, "loss": 2.4228, "step": 2720 }, { "epoch": 0.07301953628166595, "grad_norm": 0.62890625, "learning_rate": 0.0004379444518985643, "loss": 2.3623, "step": 2721 }, { "epoch": 0.0730463718334049, "grad_norm": 0.61328125, "learning_rate": 0.0004381054608882329, "loss": 2.3109, "step": 2722 }, { "epoch": 0.07307320738514383, "grad_norm": 0.609375, "learning_rate": 0.0004382664698779015, "loss": 2.1633, "step": 2723 }, { "epoch": 0.07310004293688278, "grad_norm": 0.62109375, "learning_rate": 0.0004384274788675701, "loss": 2.3, "step": 2724 }, { "epoch": 0.07312687848862173, "grad_norm": 0.61328125, "learning_rate": 0.00043858848785723867, "loss": 2.261, "step": 2725 }, { "epoch": 0.07315371404036067, "grad_norm": 0.63671875, "learning_rate": 0.00043874949684690725, "loss": 2.4151, "step": 2726 }, { "epoch": 0.07318054959209962, "grad_norm": 0.62109375, "learning_rate": 0.0004389105058365758, "loss": 2.3368, "step": 2727 }, { "epoch": 0.07320738514383855, "grad_norm": 0.6171875, "learning_rate": 0.00043907151482624443, "loss": 2.2552, "step": 2728 }, { "epoch": 0.0732342206955775, "grad_norm": 0.6328125, "learning_rate": 0.000439232523815913, "loss": 2.2721, "step": 2729 }, { "epoch": 0.07326105624731645, "grad_norm": 0.62890625, "learning_rate": 0.0004393935328055816, "loss": 2.2425, "step": 2730 }, { "epoch": 0.07328789179905539, "grad_norm": 0.609375, "learning_rate": 0.0004395545417952502, "loss": 2.2318, "step": 2731 }, { "epoch": 0.07331472735079433, "grad_norm": 0.6171875, "learning_rate": 0.0004397155507849188, "loss": 2.2956, "step": 2732 }, { "epoch": 0.07334156290253327, "grad_norm": 0.62109375, "learning_rate": 0.0004398765597745874, "loss": 2.3568, "step": 2733 }, { "epoch": 0.07336839845427222, "grad_norm": 0.63671875, "learning_rate": 0.000440037568764256, "loss": 2.39, "step": 2734 }, { "epoch": 0.07339523400601117, "grad_norm": 0.61328125, "learning_rate": 0.00044019857775392455, "loss": 2.2834, "step": 2735 }, { "epoch": 0.0734220695577501, "grad_norm": 0.625, "learning_rate": 0.00044035958674359314, "loss": 2.3033, "step": 2736 }, { "epoch": 0.07344890510948905, "grad_norm": 0.62890625, "learning_rate": 0.00044052059573326173, "loss": 2.2679, "step": 2737 }, { "epoch": 0.073475740661228, "grad_norm": 0.65234375, "learning_rate": 0.0004406816047229303, "loss": 2.3142, "step": 2738 }, { "epoch": 0.07350257621296694, "grad_norm": 0.62890625, "learning_rate": 0.0004408426137125989, "loss": 2.397, "step": 2739 }, { "epoch": 0.07352941176470588, "grad_norm": 0.6328125, "learning_rate": 0.00044100362270226755, "loss": 2.4275, "step": 2740 }, { "epoch": 0.07355624731644482, "grad_norm": 0.6171875, "learning_rate": 0.00044116463169193614, "loss": 2.308, "step": 2741 }, { "epoch": 0.07358308286818377, "grad_norm": 0.64453125, "learning_rate": 0.00044132564068160467, "loss": 2.4039, "step": 2742 }, { "epoch": 0.07360991841992272, "grad_norm": 0.625, "learning_rate": 0.00044148664967127326, "loss": 2.2399, "step": 2743 }, { "epoch": 0.07363675397166165, "grad_norm": 0.61328125, "learning_rate": 0.00044164765866094185, "loss": 2.3716, "step": 2744 }, { "epoch": 0.0736635895234006, "grad_norm": 0.61328125, "learning_rate": 0.00044180866765061044, "loss": 2.2774, "step": 2745 }, { "epoch": 0.07369042507513955, "grad_norm": 0.62890625, "learning_rate": 0.00044196967664027903, "loss": 2.4012, "step": 2746 }, { "epoch": 0.07371726062687849, "grad_norm": 0.6328125, "learning_rate": 0.00044213068562994767, "loss": 2.3177, "step": 2747 }, { "epoch": 0.07374409617861744, "grad_norm": 0.6328125, "learning_rate": 0.00044229169461961626, "loss": 2.3438, "step": 2748 }, { "epoch": 0.07377093173035637, "grad_norm": 0.59765625, "learning_rate": 0.00044245270360928485, "loss": 2.2014, "step": 2749 }, { "epoch": 0.07379776728209532, "grad_norm": 0.625, "learning_rate": 0.0004426137125989534, "loss": 2.3465, "step": 2750 }, { "epoch": 0.07382460283383427, "grad_norm": 0.609375, "learning_rate": 0.00044277472158862197, "loss": 2.2945, "step": 2751 }, { "epoch": 0.0738514383855732, "grad_norm": 0.62109375, "learning_rate": 0.00044293573057829056, "loss": 2.3444, "step": 2752 }, { "epoch": 0.07387827393731215, "grad_norm": 0.60546875, "learning_rate": 0.00044309673956795915, "loss": 2.2243, "step": 2753 }, { "epoch": 0.0739051094890511, "grad_norm": 0.625, "learning_rate": 0.0004432577485576278, "loss": 2.231, "step": 2754 }, { "epoch": 0.07393194504079004, "grad_norm": 0.6171875, "learning_rate": 0.0004434187575472964, "loss": 2.4075, "step": 2755 }, { "epoch": 0.07395878059252899, "grad_norm": 0.58984375, "learning_rate": 0.00044357976653696497, "loss": 2.3556, "step": 2756 }, { "epoch": 0.07398561614426792, "grad_norm": 0.63671875, "learning_rate": 0.00044374077552663356, "loss": 2.3556, "step": 2757 }, { "epoch": 0.07401245169600687, "grad_norm": 0.61328125, "learning_rate": 0.0004439017845163021, "loss": 2.3094, "step": 2758 }, { "epoch": 0.07403928724774582, "grad_norm": 0.62109375, "learning_rate": 0.0004440627935059707, "loss": 2.3232, "step": 2759 }, { "epoch": 0.07406612279948475, "grad_norm": 0.62109375, "learning_rate": 0.00044422380249563927, "loss": 2.2696, "step": 2760 }, { "epoch": 0.0740929583512237, "grad_norm": 0.64453125, "learning_rate": 0.0004443848114853079, "loss": 2.3545, "step": 2761 }, { "epoch": 0.07411979390296264, "grad_norm": 0.625, "learning_rate": 0.0004445458204749765, "loss": 2.3837, "step": 2762 }, { "epoch": 0.07414662945470159, "grad_norm": 0.60546875, "learning_rate": 0.0004447068294646451, "loss": 2.2286, "step": 2763 }, { "epoch": 0.07417346500644054, "grad_norm": 0.61328125, "learning_rate": 0.0004448678384543137, "loss": 2.2648, "step": 2764 }, { "epoch": 0.07420030055817947, "grad_norm": 0.6328125, "learning_rate": 0.0004450288474439822, "loss": 2.3235, "step": 2765 }, { "epoch": 0.07422713610991842, "grad_norm": 0.64453125, "learning_rate": 0.0004451898564336508, "loss": 2.3483, "step": 2766 }, { "epoch": 0.07425397166165737, "grad_norm": 0.62109375, "learning_rate": 0.0004453508654233194, "loss": 2.305, "step": 2767 }, { "epoch": 0.0742808072133963, "grad_norm": 0.62890625, "learning_rate": 0.00044551187441298803, "loss": 2.2845, "step": 2768 }, { "epoch": 0.07430764276513525, "grad_norm": 0.609375, "learning_rate": 0.0004456728834026566, "loss": 2.2782, "step": 2769 }, { "epoch": 0.07433447831687419, "grad_norm": 0.625, "learning_rate": 0.0004458338923923252, "loss": 2.236, "step": 2770 }, { "epoch": 0.07436131386861314, "grad_norm": 0.61328125, "learning_rate": 0.0004459949013819938, "loss": 2.2673, "step": 2771 }, { "epoch": 0.07438814942035209, "grad_norm": 0.61328125, "learning_rate": 0.0004461559103716624, "loss": 2.2501, "step": 2772 }, { "epoch": 0.07441498497209102, "grad_norm": 0.625, "learning_rate": 0.0004463169193613309, "loss": 2.3439, "step": 2773 }, { "epoch": 0.07444182052382997, "grad_norm": 0.61328125, "learning_rate": 0.0004464779283509995, "loss": 2.3813, "step": 2774 }, { "epoch": 0.07446865607556892, "grad_norm": 0.63671875, "learning_rate": 0.00044663893734066815, "loss": 2.2547, "step": 2775 }, { "epoch": 0.07449549162730786, "grad_norm": 0.609375, "learning_rate": 0.00044679994633033674, "loss": 2.252, "step": 2776 }, { "epoch": 0.0745223271790468, "grad_norm": 0.62890625, "learning_rate": 0.00044696095532000533, "loss": 2.3223, "step": 2777 }, { "epoch": 0.07454916273078574, "grad_norm": 0.61328125, "learning_rate": 0.0004471219643096739, "loss": 2.2887, "step": 2778 }, { "epoch": 0.07457599828252469, "grad_norm": 0.640625, "learning_rate": 0.0004472829732993425, "loss": 2.2977, "step": 2779 }, { "epoch": 0.07460283383426364, "grad_norm": 0.62109375, "learning_rate": 0.00044744398228901115, "loss": 2.3196, "step": 2780 }, { "epoch": 0.07462966938600257, "grad_norm": 0.61328125, "learning_rate": 0.0004476049912786797, "loss": 2.23, "step": 2781 }, { "epoch": 0.07465650493774152, "grad_norm": 0.6171875, "learning_rate": 0.00044776600026834827, "loss": 2.2946, "step": 2782 }, { "epoch": 0.07468334048948046, "grad_norm": 0.63671875, "learning_rate": 0.00044792700925801686, "loss": 2.3235, "step": 2783 }, { "epoch": 0.0747101760412194, "grad_norm": 0.625, "learning_rate": 0.00044808801824768545, "loss": 2.2381, "step": 2784 }, { "epoch": 0.07473701159295836, "grad_norm": 0.60546875, "learning_rate": 0.00044824902723735404, "loss": 2.2336, "step": 2785 }, { "epoch": 0.07476384714469729, "grad_norm": 0.609375, "learning_rate": 0.00044841003622702263, "loss": 2.2684, "step": 2786 }, { "epoch": 0.07479068269643624, "grad_norm": 0.61328125, "learning_rate": 0.00044857104521669127, "loss": 2.3668, "step": 2787 }, { "epoch": 0.07481751824817519, "grad_norm": 0.63671875, "learning_rate": 0.00044873205420635986, "loss": 2.3876, "step": 2788 }, { "epoch": 0.07484435379991412, "grad_norm": 0.62109375, "learning_rate": 0.0004488930631960284, "loss": 2.3374, "step": 2789 }, { "epoch": 0.07487118935165307, "grad_norm": 0.6015625, "learning_rate": 0.000449054072185697, "loss": 2.1587, "step": 2790 }, { "epoch": 0.07489802490339201, "grad_norm": 0.59375, "learning_rate": 0.00044921508117536557, "loss": 2.2489, "step": 2791 }, { "epoch": 0.07492486045513096, "grad_norm": 0.6171875, "learning_rate": 0.00044937609016503416, "loss": 2.2956, "step": 2792 }, { "epoch": 0.0749516960068699, "grad_norm": 0.63671875, "learning_rate": 0.00044953709915470275, "loss": 2.3337, "step": 2793 }, { "epoch": 0.07497853155860884, "grad_norm": 0.60546875, "learning_rate": 0.0004496981081443714, "loss": 2.2854, "step": 2794 }, { "epoch": 0.07500536711034779, "grad_norm": 0.6484375, "learning_rate": 0.00044985911713404, "loss": 2.3123, "step": 2795 }, { "epoch": 0.07503220266208674, "grad_norm": 0.625, "learning_rate": 0.0004500201261237085, "loss": 2.2863, "step": 2796 }, { "epoch": 0.07505903821382567, "grad_norm": 0.6484375, "learning_rate": 0.0004501811351133771, "loss": 2.3573, "step": 2797 }, { "epoch": 0.07508587376556462, "grad_norm": 0.6171875, "learning_rate": 0.0004503421441030457, "loss": 2.3573, "step": 2798 }, { "epoch": 0.07511270931730356, "grad_norm": 0.61328125, "learning_rate": 0.0004505031530927143, "loss": 2.2046, "step": 2799 }, { "epoch": 0.07513954486904251, "grad_norm": 0.61328125, "learning_rate": 0.0004506641620823829, "loss": 2.2858, "step": 2800 }, { "epoch": 0.07516638042078146, "grad_norm": 0.61328125, "learning_rate": 0.0004508251710720515, "loss": 2.3271, "step": 2801 }, { "epoch": 0.07519321597252039, "grad_norm": 0.609375, "learning_rate": 0.0004509861800617201, "loss": 2.2522, "step": 2802 }, { "epoch": 0.07522005152425934, "grad_norm": 0.625, "learning_rate": 0.0004511471890513887, "loss": 2.238, "step": 2803 }, { "epoch": 0.07524688707599829, "grad_norm": 0.625, "learning_rate": 0.0004513081980410572, "loss": 2.3023, "step": 2804 }, { "epoch": 0.07527372262773722, "grad_norm": 0.62109375, "learning_rate": 0.0004514692070307258, "loss": 2.3857, "step": 2805 }, { "epoch": 0.07530055817947617, "grad_norm": 0.6171875, "learning_rate": 0.0004516302160203944, "loss": 2.324, "step": 2806 }, { "epoch": 0.07532739373121511, "grad_norm": 0.62109375, "learning_rate": 0.00045179122501006304, "loss": 2.3661, "step": 2807 }, { "epoch": 0.07535422928295406, "grad_norm": 0.5859375, "learning_rate": 0.00045195223399973163, "loss": 2.1811, "step": 2808 }, { "epoch": 0.075381064834693, "grad_norm": 0.62109375, "learning_rate": 0.0004521132429894002, "loss": 2.3445, "step": 2809 }, { "epoch": 0.07540790038643194, "grad_norm": 0.609375, "learning_rate": 0.0004522742519790688, "loss": 2.3066, "step": 2810 }, { "epoch": 0.07543473593817089, "grad_norm": 0.60546875, "learning_rate": 0.0004524352609687374, "loss": 2.224, "step": 2811 }, { "epoch": 0.07546157148990983, "grad_norm": 0.6015625, "learning_rate": 0.00045259626995840593, "loss": 2.3114, "step": 2812 }, { "epoch": 0.07548840704164878, "grad_norm": 0.61328125, "learning_rate": 0.0004527572789480745, "loss": 2.3465, "step": 2813 }, { "epoch": 0.07551524259338772, "grad_norm": 0.61328125, "learning_rate": 0.00045291828793774316, "loss": 2.2563, "step": 2814 }, { "epoch": 0.07554207814512666, "grad_norm": 0.6171875, "learning_rate": 0.00045307929692741175, "loss": 2.3057, "step": 2815 }, { "epoch": 0.07556891369686561, "grad_norm": 0.640625, "learning_rate": 0.00045324030591708034, "loss": 2.396, "step": 2816 }, { "epoch": 0.07559574924860456, "grad_norm": 0.57421875, "learning_rate": 0.00045340131490674893, "loss": 2.0943, "step": 2817 }, { "epoch": 0.07562258480034349, "grad_norm": 0.6171875, "learning_rate": 0.0004535623238964175, "loss": 2.3366, "step": 2818 }, { "epoch": 0.07564942035208244, "grad_norm": 0.62109375, "learning_rate": 0.0004537233328860861, "loss": 2.3825, "step": 2819 }, { "epoch": 0.07567625590382138, "grad_norm": 0.59765625, "learning_rate": 0.00045388434187575464, "loss": 2.2172, "step": 2820 }, { "epoch": 0.07570309145556033, "grad_norm": 0.6015625, "learning_rate": 0.0004540453508654233, "loss": 2.2512, "step": 2821 }, { "epoch": 0.07572992700729927, "grad_norm": 0.62109375, "learning_rate": 0.00045420635985509187, "loss": 2.2496, "step": 2822 }, { "epoch": 0.07575676255903821, "grad_norm": 0.61328125, "learning_rate": 0.00045436736884476046, "loss": 2.2281, "step": 2823 }, { "epoch": 0.07578359811077716, "grad_norm": 0.6328125, "learning_rate": 0.00045452837783442905, "loss": 2.304, "step": 2824 }, { "epoch": 0.07581043366251611, "grad_norm": 0.59765625, "learning_rate": 0.00045468938682409764, "loss": 2.2591, "step": 2825 }, { "epoch": 0.07583726921425504, "grad_norm": 0.62109375, "learning_rate": 0.0004548503958137663, "loss": 2.3409, "step": 2826 }, { "epoch": 0.07586410476599399, "grad_norm": 0.60546875, "learning_rate": 0.00045501140480343476, "loss": 2.1858, "step": 2827 }, { "epoch": 0.07589094031773293, "grad_norm": 0.59765625, "learning_rate": 0.0004551724137931034, "loss": 2.2057, "step": 2828 }, { "epoch": 0.07591777586947188, "grad_norm": 0.6171875, "learning_rate": 0.000455333422782772, "loss": 2.2824, "step": 2829 }, { "epoch": 0.07594461142121083, "grad_norm": 0.62109375, "learning_rate": 0.0004554944317724406, "loss": 2.2863, "step": 2830 }, { "epoch": 0.07597144697294976, "grad_norm": 0.6015625, "learning_rate": 0.00045565544076210917, "loss": 2.2087, "step": 2831 }, { "epoch": 0.07599828252468871, "grad_norm": 0.61328125, "learning_rate": 0.00045581644975177776, "loss": 2.2545, "step": 2832 }, { "epoch": 0.07602511807642764, "grad_norm": 0.609375, "learning_rate": 0.0004559774587414464, "loss": 2.2594, "step": 2833 }, { "epoch": 0.0760519536281666, "grad_norm": 0.62109375, "learning_rate": 0.000456138467731115, "loss": 2.2661, "step": 2834 }, { "epoch": 0.07607878917990554, "grad_norm": 0.60546875, "learning_rate": 0.0004562994767207835, "loss": 2.2948, "step": 2835 }, { "epoch": 0.07610562473164448, "grad_norm": 0.6015625, "learning_rate": 0.0004564604857104521, "loss": 2.2623, "step": 2836 }, { "epoch": 0.07613246028338343, "grad_norm": 0.64453125, "learning_rate": 0.0004566214947001207, "loss": 2.3624, "step": 2837 }, { "epoch": 0.07615929583512238, "grad_norm": 0.61328125, "learning_rate": 0.0004567825036897893, "loss": 2.3165, "step": 2838 }, { "epoch": 0.07618613138686131, "grad_norm": 0.63671875, "learning_rate": 0.0004569435126794579, "loss": 2.4317, "step": 2839 }, { "epoch": 0.07621296693860026, "grad_norm": 0.625, "learning_rate": 0.0004571045216691265, "loss": 2.3365, "step": 2840 }, { "epoch": 0.0762398024903392, "grad_norm": 0.59765625, "learning_rate": 0.0004572655306587951, "loss": 2.2616, "step": 2841 }, { "epoch": 0.07626663804207814, "grad_norm": 0.609375, "learning_rate": 0.0004574265396484637, "loss": 2.2574, "step": 2842 }, { "epoch": 0.0762934735938171, "grad_norm": 0.6171875, "learning_rate": 0.00045758754863813223, "loss": 2.2692, "step": 2843 }, { "epoch": 0.07632030914555603, "grad_norm": 0.59765625, "learning_rate": 0.0004577485576278008, "loss": 2.248, "step": 2844 }, { "epoch": 0.07634714469729498, "grad_norm": 0.59765625, "learning_rate": 0.0004579095666174694, "loss": 2.2422, "step": 2845 }, { "epoch": 0.07637398024903393, "grad_norm": 0.5859375, "learning_rate": 0.000458070575607138, "loss": 2.1471, "step": 2846 }, { "epoch": 0.07640081580077286, "grad_norm": 0.65234375, "learning_rate": 0.00045823158459680664, "loss": 2.3599, "step": 2847 }, { "epoch": 0.07642765135251181, "grad_norm": 0.59375, "learning_rate": 0.00045839259358647523, "loss": 2.1488, "step": 2848 }, { "epoch": 0.07645448690425075, "grad_norm": 0.7421875, "learning_rate": 0.0004585536025761438, "loss": 2.4799, "step": 2849 }, { "epoch": 0.0764813224559897, "grad_norm": 0.63671875, "learning_rate": 0.00045871461156581235, "loss": 2.297, "step": 2850 }, { "epoch": 0.07650815800772864, "grad_norm": 0.61328125, "learning_rate": 0.00045887562055548094, "loss": 2.3232, "step": 2851 }, { "epoch": 0.07653499355946758, "grad_norm": 0.6328125, "learning_rate": 0.00045903662954514953, "loss": 2.2787, "step": 2852 }, { "epoch": 0.07656182911120653, "grad_norm": 0.65234375, "learning_rate": 0.0004591976385348181, "loss": 2.3837, "step": 2853 }, { "epoch": 0.07658866466294548, "grad_norm": 0.609375, "learning_rate": 0.00045935864752448676, "loss": 2.3223, "step": 2854 }, { "epoch": 0.07661550021468441, "grad_norm": 0.6171875, "learning_rate": 0.00045951965651415535, "loss": 2.3338, "step": 2855 }, { "epoch": 0.07664233576642336, "grad_norm": 0.6484375, "learning_rate": 0.00045968066550382394, "loss": 2.4943, "step": 2856 }, { "epoch": 0.0766691713181623, "grad_norm": 0.625, "learning_rate": 0.00045984167449349253, "loss": 2.3357, "step": 2857 }, { "epoch": 0.07669600686990125, "grad_norm": 0.62109375, "learning_rate": 0.00046000268348316106, "loss": 2.342, "step": 2858 }, { "epoch": 0.0767228424216402, "grad_norm": 0.59765625, "learning_rate": 0.00046016369247282965, "loss": 2.2806, "step": 2859 }, { "epoch": 0.07674967797337913, "grad_norm": 0.625, "learning_rate": 0.0004603247014624983, "loss": 2.3456, "step": 2860 }, { "epoch": 0.07677651352511808, "grad_norm": 0.625, "learning_rate": 0.0004604857104521669, "loss": 2.3756, "step": 2861 }, { "epoch": 0.07680334907685701, "grad_norm": 0.625, "learning_rate": 0.00046064671944183547, "loss": 2.395, "step": 2862 }, { "epoch": 0.07683018462859596, "grad_norm": 0.61328125, "learning_rate": 0.00046080772843150406, "loss": 2.363, "step": 2863 }, { "epoch": 0.07685702018033491, "grad_norm": 0.640625, "learning_rate": 0.00046096873742117265, "loss": 2.315, "step": 2864 }, { "epoch": 0.07688385573207385, "grad_norm": 0.6015625, "learning_rate": 0.00046112974641084124, "loss": 2.3128, "step": 2865 }, { "epoch": 0.0769106912838128, "grad_norm": 0.62890625, "learning_rate": 0.0004612907554005098, "loss": 2.3506, "step": 2866 }, { "epoch": 0.07693752683555174, "grad_norm": 0.6015625, "learning_rate": 0.0004614517643901784, "loss": 2.3002, "step": 2867 }, { "epoch": 0.07696436238729068, "grad_norm": 0.62890625, "learning_rate": 0.000461612773379847, "loss": 2.3087, "step": 2868 }, { "epoch": 0.07699119793902963, "grad_norm": 0.625, "learning_rate": 0.0004617737823695156, "loss": 2.374, "step": 2869 }, { "epoch": 0.07701803349076856, "grad_norm": 0.61328125, "learning_rate": 0.0004619347913591842, "loss": 2.347, "step": 2870 }, { "epoch": 0.07704486904250751, "grad_norm": 0.609375, "learning_rate": 0.00046209580034885277, "loss": 2.3011, "step": 2871 }, { "epoch": 0.07707170459424646, "grad_norm": 0.6015625, "learning_rate": 0.00046225680933852136, "loss": 2.2792, "step": 2872 }, { "epoch": 0.0770985401459854, "grad_norm": 0.609375, "learning_rate": 0.00046241781832819, "loss": 2.337, "step": 2873 }, { "epoch": 0.07712537569772435, "grad_norm": 0.59765625, "learning_rate": 0.00046257882731785854, "loss": 2.3277, "step": 2874 }, { "epoch": 0.0771522112494633, "grad_norm": 0.5859375, "learning_rate": 0.0004627398363075271, "loss": 2.2619, "step": 2875 }, { "epoch": 0.07717904680120223, "grad_norm": 0.58984375, "learning_rate": 0.0004629008452971957, "loss": 2.1973, "step": 2876 }, { "epoch": 0.07720588235294118, "grad_norm": 0.6171875, "learning_rate": 0.0004630618542868643, "loss": 2.357, "step": 2877 }, { "epoch": 0.07723271790468011, "grad_norm": 0.59765625, "learning_rate": 0.0004632228632765329, "loss": 2.2852, "step": 2878 }, { "epoch": 0.07725955345641906, "grad_norm": 0.6328125, "learning_rate": 0.0004633838722662015, "loss": 2.2923, "step": 2879 }, { "epoch": 0.07728638900815801, "grad_norm": 0.60546875, "learning_rate": 0.0004635448812558701, "loss": 2.2216, "step": 2880 }, { "epoch": 0.07731322455989695, "grad_norm": 0.60546875, "learning_rate": 0.00046370589024553866, "loss": 2.1803, "step": 2881 }, { "epoch": 0.0773400601116359, "grad_norm": 0.60546875, "learning_rate": 0.00046386689923520725, "loss": 2.2321, "step": 2882 }, { "epoch": 0.07736689566337483, "grad_norm": 0.58984375, "learning_rate": 0.00046402790822487583, "loss": 2.2088, "step": 2883 }, { "epoch": 0.07739373121511378, "grad_norm": 0.61328125, "learning_rate": 0.0004641889172145444, "loss": 2.309, "step": 2884 }, { "epoch": 0.07742056676685273, "grad_norm": 0.59765625, "learning_rate": 0.000464349926204213, "loss": 2.3069, "step": 2885 }, { "epoch": 0.07744740231859167, "grad_norm": 0.6015625, "learning_rate": 0.00046451093519388165, "loss": 2.3046, "step": 2886 }, { "epoch": 0.07747423787033061, "grad_norm": 0.60546875, "learning_rate": 0.00046467194418355024, "loss": 2.305, "step": 2887 }, { "epoch": 0.07750107342206956, "grad_norm": 0.609375, "learning_rate": 0.00046483295317321883, "loss": 2.3549, "step": 2888 }, { "epoch": 0.0775279089738085, "grad_norm": 0.609375, "learning_rate": 0.00046499396216288737, "loss": 2.3038, "step": 2889 }, { "epoch": 0.07755474452554745, "grad_norm": 0.609375, "learning_rate": 0.00046515497115255595, "loss": 2.2973, "step": 2890 }, { "epoch": 0.07758158007728638, "grad_norm": 0.578125, "learning_rate": 0.00046531598014222454, "loss": 2.2023, "step": 2891 }, { "epoch": 0.07760841562902533, "grad_norm": 0.60546875, "learning_rate": 0.00046547698913189313, "loss": 2.3006, "step": 2892 }, { "epoch": 0.07763525118076428, "grad_norm": 0.60546875, "learning_rate": 0.0004656379981215618, "loss": 2.2614, "step": 2893 }, { "epoch": 0.07766208673250322, "grad_norm": 0.6171875, "learning_rate": 0.00046579900711123036, "loss": 2.334, "step": 2894 }, { "epoch": 0.07768892228424217, "grad_norm": 0.58984375, "learning_rate": 0.00046596001610089895, "loss": 2.2638, "step": 2895 }, { "epoch": 0.07771575783598111, "grad_norm": 0.6171875, "learning_rate": 0.00046612102509056754, "loss": 2.3478, "step": 2896 }, { "epoch": 0.07774259338772005, "grad_norm": 0.60546875, "learning_rate": 0.0004662820340802361, "loss": 2.3499, "step": 2897 }, { "epoch": 0.077769428939459, "grad_norm": 0.59765625, "learning_rate": 0.00046644304306990466, "loss": 2.3344, "step": 2898 }, { "epoch": 0.07779626449119793, "grad_norm": 0.6015625, "learning_rate": 0.00046660405205957325, "loss": 2.3324, "step": 2899 }, { "epoch": 0.07782310004293688, "grad_norm": 0.60546875, "learning_rate": 0.0004667650610492419, "loss": 2.347, "step": 2900 }, { "epoch": 0.07784993559467583, "grad_norm": 0.60546875, "learning_rate": 0.0004669260700389105, "loss": 2.3935, "step": 2901 }, { "epoch": 0.07787677114641477, "grad_norm": 0.61328125, "learning_rate": 0.00046708707902857907, "loss": 2.3171, "step": 2902 }, { "epoch": 0.07790360669815372, "grad_norm": 0.58984375, "learning_rate": 0.00046724808801824766, "loss": 2.2716, "step": 2903 }, { "epoch": 0.07793044224989266, "grad_norm": 0.59375, "learning_rate": 0.00046740909700791625, "loss": 2.1943, "step": 2904 }, { "epoch": 0.0779572778016316, "grad_norm": 0.609375, "learning_rate": 0.0004675701059975848, "loss": 2.311, "step": 2905 }, { "epoch": 0.07798411335337055, "grad_norm": 0.62109375, "learning_rate": 0.0004677311149872534, "loss": 2.2486, "step": 2906 }, { "epoch": 0.07801094890510948, "grad_norm": 0.6015625, "learning_rate": 0.000467892123976922, "loss": 2.3062, "step": 2907 }, { "epoch": 0.07803778445684843, "grad_norm": 0.609375, "learning_rate": 0.0004680531329665906, "loss": 2.3576, "step": 2908 }, { "epoch": 0.07806462000858738, "grad_norm": 0.609375, "learning_rate": 0.0004682141419562592, "loss": 2.2888, "step": 2909 }, { "epoch": 0.07809145556032632, "grad_norm": 0.609375, "learning_rate": 0.0004683751509459278, "loss": 2.2986, "step": 2910 }, { "epoch": 0.07811829111206527, "grad_norm": 0.609375, "learning_rate": 0.00046853615993559637, "loss": 2.2725, "step": 2911 }, { "epoch": 0.0781451266638042, "grad_norm": 0.60546875, "learning_rate": 0.0004686971689252649, "loss": 2.3576, "step": 2912 }, { "epoch": 0.07817196221554315, "grad_norm": 0.5859375, "learning_rate": 0.0004688581779149335, "loss": 2.3285, "step": 2913 }, { "epoch": 0.0781987977672821, "grad_norm": 0.58203125, "learning_rate": 0.00046901918690460214, "loss": 2.2904, "step": 2914 }, { "epoch": 0.07822563331902103, "grad_norm": 0.609375, "learning_rate": 0.0004691801958942707, "loss": 2.355, "step": 2915 }, { "epoch": 0.07825246887075998, "grad_norm": 0.6015625, "learning_rate": 0.0004693412048839393, "loss": 2.2001, "step": 2916 }, { "epoch": 0.07827930442249893, "grad_norm": 0.62109375, "learning_rate": 0.0004695022138736079, "loss": 2.2988, "step": 2917 }, { "epoch": 0.07830613997423787, "grad_norm": 0.6015625, "learning_rate": 0.0004696632228632765, "loss": 2.2848, "step": 2918 }, { "epoch": 0.07833297552597682, "grad_norm": 0.6015625, "learning_rate": 0.00046982423185294513, "loss": 2.2587, "step": 2919 }, { "epoch": 0.07835981107771575, "grad_norm": 0.59765625, "learning_rate": 0.00046998524084261367, "loss": 2.2876, "step": 2920 }, { "epoch": 0.0783866466294547, "grad_norm": 0.6015625, "learning_rate": 0.00047014624983228226, "loss": 2.2917, "step": 2921 }, { "epoch": 0.07841348218119365, "grad_norm": 0.62109375, "learning_rate": 0.00047030725882195085, "loss": 2.3483, "step": 2922 }, { "epoch": 0.07844031773293259, "grad_norm": 0.55859375, "learning_rate": 0.00047046826781161943, "loss": 2.1415, "step": 2923 }, { "epoch": 0.07846715328467153, "grad_norm": 0.57421875, "learning_rate": 0.000470629276801288, "loss": 2.2206, "step": 2924 }, { "epoch": 0.07849398883641048, "grad_norm": 0.58984375, "learning_rate": 0.0004707902857909566, "loss": 2.1678, "step": 2925 }, { "epoch": 0.07852082438814942, "grad_norm": 0.60546875, "learning_rate": 0.00047095129478062525, "loss": 2.2899, "step": 2926 }, { "epoch": 0.07854765993988837, "grad_norm": 0.5859375, "learning_rate": 0.00047111230377029384, "loss": 2.2296, "step": 2927 }, { "epoch": 0.0785744954916273, "grad_norm": 0.59375, "learning_rate": 0.0004712733127599624, "loss": 2.2552, "step": 2928 }, { "epoch": 0.07860133104336625, "grad_norm": 0.59765625, "learning_rate": 0.00047143432174963097, "loss": 2.2503, "step": 2929 }, { "epoch": 0.0786281665951052, "grad_norm": 0.61328125, "learning_rate": 0.00047159533073929955, "loss": 2.328, "step": 2930 }, { "epoch": 0.07865500214684414, "grad_norm": 0.60546875, "learning_rate": 0.00047175633972896814, "loss": 2.258, "step": 2931 }, { "epoch": 0.07868183769858308, "grad_norm": 0.60546875, "learning_rate": 0.00047191734871863673, "loss": 2.2136, "step": 2932 }, { "epoch": 0.07870867325032202, "grad_norm": 0.6171875, "learning_rate": 0.0004720783577083054, "loss": 2.3388, "step": 2933 }, { "epoch": 0.07873550880206097, "grad_norm": 0.59765625, "learning_rate": 0.00047223936669797396, "loss": 2.2422, "step": 2934 }, { "epoch": 0.07876234435379992, "grad_norm": 0.5859375, "learning_rate": 0.00047240037568764255, "loss": 2.3023, "step": 2935 }, { "epoch": 0.07878917990553885, "grad_norm": 0.59765625, "learning_rate": 0.0004725613846773111, "loss": 2.2508, "step": 2936 }, { "epoch": 0.0788160154572778, "grad_norm": 0.6171875, "learning_rate": 0.0004727223936669797, "loss": 2.3909, "step": 2937 }, { "epoch": 0.07884285100901675, "grad_norm": 0.609375, "learning_rate": 0.00047288340265664826, "loss": 2.3311, "step": 2938 }, { "epoch": 0.07886968656075569, "grad_norm": 0.6015625, "learning_rate": 0.00047304441164631685, "loss": 2.2743, "step": 2939 }, { "epoch": 0.07889652211249464, "grad_norm": 0.60546875, "learning_rate": 0.0004732054206359855, "loss": 2.3334, "step": 2940 }, { "epoch": 0.07892335766423357, "grad_norm": 0.60546875, "learning_rate": 0.0004733664296256541, "loss": 2.3672, "step": 2941 }, { "epoch": 0.07895019321597252, "grad_norm": 0.5703125, "learning_rate": 0.00047352743861532267, "loss": 2.2602, "step": 2942 }, { "epoch": 0.07897702876771147, "grad_norm": 0.62109375, "learning_rate": 0.0004736884476049912, "loss": 2.3575, "step": 2943 }, { "epoch": 0.0790038643194504, "grad_norm": 0.58984375, "learning_rate": 0.0004738494565946598, "loss": 2.2439, "step": 2944 }, { "epoch": 0.07903069987118935, "grad_norm": 0.58984375, "learning_rate": 0.0004740104655843284, "loss": 2.2695, "step": 2945 }, { "epoch": 0.0790575354229283, "grad_norm": 0.61328125, "learning_rate": 0.00047417147457399703, "loss": 2.2354, "step": 2946 }, { "epoch": 0.07908437097466724, "grad_norm": 0.6171875, "learning_rate": 0.0004743324835636656, "loss": 2.3863, "step": 2947 }, { "epoch": 0.07911120652640619, "grad_norm": 0.61328125, "learning_rate": 0.0004744934925533342, "loss": 2.2475, "step": 2948 }, { "epoch": 0.07913804207814512, "grad_norm": 0.6015625, "learning_rate": 0.0004746545015430028, "loss": 2.2628, "step": 2949 }, { "epoch": 0.07916487762988407, "grad_norm": 0.609375, "learning_rate": 0.0004748155105326714, "loss": 2.3744, "step": 2950 }, { "epoch": 0.07919171318162302, "grad_norm": 0.59375, "learning_rate": 0.0004749765195223399, "loss": 2.2468, "step": 2951 }, { "epoch": 0.07921854873336195, "grad_norm": 0.5859375, "learning_rate": 0.0004751375285120085, "loss": 2.3097, "step": 2952 }, { "epoch": 0.0792453842851009, "grad_norm": 0.59375, "learning_rate": 0.00047529853750167715, "loss": 2.2449, "step": 2953 }, { "epoch": 0.07927221983683985, "grad_norm": 0.58203125, "learning_rate": 0.00047545954649134574, "loss": 2.2033, "step": 2954 }, { "epoch": 0.07929905538857879, "grad_norm": 0.5859375, "learning_rate": 0.0004756205554810143, "loss": 2.31, "step": 2955 }, { "epoch": 0.07932589094031774, "grad_norm": 0.58984375, "learning_rate": 0.0004757815644706829, "loss": 2.2412, "step": 2956 }, { "epoch": 0.07935272649205667, "grad_norm": 0.59375, "learning_rate": 0.0004759425734603515, "loss": 2.2614, "step": 2957 }, { "epoch": 0.07937956204379562, "grad_norm": 0.59765625, "learning_rate": 0.0004761035824500201, "loss": 2.2795, "step": 2958 }, { "epoch": 0.07940639759553457, "grad_norm": 0.58984375, "learning_rate": 0.0004762645914396886, "loss": 2.303, "step": 2959 }, { "epoch": 0.0794332331472735, "grad_norm": 0.57421875, "learning_rate": 0.00047642560042935727, "loss": 2.2479, "step": 2960 }, { "epoch": 0.07946006869901245, "grad_norm": 0.625, "learning_rate": 0.00047658660941902586, "loss": 2.364, "step": 2961 }, { "epoch": 0.07948690425075139, "grad_norm": 0.6015625, "learning_rate": 0.00047674761840869445, "loss": 2.2693, "step": 2962 }, { "epoch": 0.07951373980249034, "grad_norm": 0.58203125, "learning_rate": 0.00047690862739836303, "loss": 2.284, "step": 2963 }, { "epoch": 0.07954057535422929, "grad_norm": 0.578125, "learning_rate": 0.0004770696363880316, "loss": 2.2199, "step": 2964 }, { "epoch": 0.07956741090596822, "grad_norm": 0.59765625, "learning_rate": 0.0004772306453777002, "loss": 2.2438, "step": 2965 }, { "epoch": 0.07959424645770717, "grad_norm": 0.59375, "learning_rate": 0.00047739165436736875, "loss": 2.2444, "step": 2966 }, { "epoch": 0.07962108200944612, "grad_norm": 0.60546875, "learning_rate": 0.0004775526633570374, "loss": 2.2705, "step": 2967 }, { "epoch": 0.07964791756118506, "grad_norm": 0.60546875, "learning_rate": 0.000477713672346706, "loss": 2.2237, "step": 2968 }, { "epoch": 0.079674753112924, "grad_norm": 0.58984375, "learning_rate": 0.00047787468133637457, "loss": 2.231, "step": 2969 }, { "epoch": 0.07970158866466294, "grad_norm": 0.60546875, "learning_rate": 0.00047803569032604315, "loss": 2.2933, "step": 2970 }, { "epoch": 0.07972842421640189, "grad_norm": 0.58984375, "learning_rate": 0.00047819669931571174, "loss": 2.1995, "step": 2971 }, { "epoch": 0.07975525976814084, "grad_norm": 0.59765625, "learning_rate": 0.0004783577083053804, "loss": 2.3137, "step": 2972 }, { "epoch": 0.07978209531987977, "grad_norm": 0.62109375, "learning_rate": 0.000478518717295049, "loss": 2.256, "step": 2973 }, { "epoch": 0.07980893087161872, "grad_norm": 0.60546875, "learning_rate": 0.0004786797262847175, "loss": 2.2779, "step": 2974 }, { "epoch": 0.07983576642335767, "grad_norm": 0.578125, "learning_rate": 0.0004788407352743861, "loss": 2.2079, "step": 2975 }, { "epoch": 0.0798626019750966, "grad_norm": 0.6015625, "learning_rate": 0.0004790017442640547, "loss": 2.2979, "step": 2976 }, { "epoch": 0.07988943752683555, "grad_norm": 0.58984375, "learning_rate": 0.0004791627532537233, "loss": 2.3055, "step": 2977 }, { "epoch": 0.07991627307857449, "grad_norm": 0.58203125, "learning_rate": 0.00047932376224339186, "loss": 2.2522, "step": 2978 }, { "epoch": 0.07994310863031344, "grad_norm": 0.58203125, "learning_rate": 0.0004794847712330605, "loss": 2.2413, "step": 2979 }, { "epoch": 0.07996994418205239, "grad_norm": 0.61328125, "learning_rate": 0.0004796457802227291, "loss": 2.3707, "step": 2980 }, { "epoch": 0.07999677973379132, "grad_norm": 0.59375, "learning_rate": 0.0004798067892123977, "loss": 2.2407, "step": 2981 }, { "epoch": 0.08002361528553027, "grad_norm": 0.59765625, "learning_rate": 0.0004799677982020662, "loss": 2.2683, "step": 2982 }, { "epoch": 0.08005045083726921, "grad_norm": 0.58203125, "learning_rate": 0.0004801288071917348, "loss": 2.3168, "step": 2983 }, { "epoch": 0.08007728638900816, "grad_norm": 0.58203125, "learning_rate": 0.0004802898161814034, "loss": 2.2521, "step": 2984 }, { "epoch": 0.0801041219407471, "grad_norm": 0.59765625, "learning_rate": 0.000480450825171072, "loss": 2.1829, "step": 2985 }, { "epoch": 0.08013095749248604, "grad_norm": 0.609375, "learning_rate": 0.00048061183416074063, "loss": 2.3874, "step": 2986 }, { "epoch": 0.08015779304422499, "grad_norm": 0.5859375, "learning_rate": 0.0004807728431504092, "loss": 2.2704, "step": 2987 }, { "epoch": 0.08018462859596394, "grad_norm": 0.5859375, "learning_rate": 0.0004809338521400778, "loss": 2.2045, "step": 2988 }, { "epoch": 0.08021146414770287, "grad_norm": 0.59765625, "learning_rate": 0.0004810948611297464, "loss": 2.2586, "step": 2989 }, { "epoch": 0.08023829969944182, "grad_norm": 0.59375, "learning_rate": 0.00048125587011941493, "loss": 2.2225, "step": 2990 }, { "epoch": 0.08026513525118076, "grad_norm": 0.58203125, "learning_rate": 0.0004814168791090835, "loss": 2.193, "step": 2991 }, { "epoch": 0.08029197080291971, "grad_norm": 0.6015625, "learning_rate": 0.0004815778880987521, "loss": 2.2779, "step": 2992 }, { "epoch": 0.08031880635465866, "grad_norm": 0.578125, "learning_rate": 0.00048173889708842075, "loss": 2.2007, "step": 2993 }, { "epoch": 0.08034564190639759, "grad_norm": 0.5859375, "learning_rate": 0.00048189990607808934, "loss": 2.1625, "step": 2994 }, { "epoch": 0.08037247745813654, "grad_norm": 0.5859375, "learning_rate": 0.0004820609150677579, "loss": 2.3055, "step": 2995 }, { "epoch": 0.08039931300987549, "grad_norm": 0.578125, "learning_rate": 0.0004822219240574265, "loss": 2.2077, "step": 2996 }, { "epoch": 0.08042614856161442, "grad_norm": 0.59375, "learning_rate": 0.00048238293304709505, "loss": 2.2905, "step": 2997 }, { "epoch": 0.08045298411335337, "grad_norm": 0.57421875, "learning_rate": 0.00048254394203676364, "loss": 2.1543, "step": 2998 }, { "epoch": 0.08047981966509231, "grad_norm": 0.59375, "learning_rate": 0.0004827049510264322, "loss": 2.2044, "step": 2999 }, { "epoch": 0.08050665521683126, "grad_norm": 0.58203125, "learning_rate": 0.00048286596001610087, "loss": 2.1973, "step": 3000 }, { "epoch": 0.0805334907685702, "grad_norm": 0.6015625, "learning_rate": 0.00048302696900576946, "loss": 2.3123, "step": 3001 }, { "epoch": 0.08056032632030914, "grad_norm": 0.625, "learning_rate": 0.00048318797799543805, "loss": 2.363, "step": 3002 }, { "epoch": 0.08058716187204809, "grad_norm": 0.58203125, "learning_rate": 0.00048334898698510663, "loss": 2.1596, "step": 3003 }, { "epoch": 0.08061399742378704, "grad_norm": 0.58203125, "learning_rate": 0.0004835099959747752, "loss": 2.2788, "step": 3004 }, { "epoch": 0.08064083297552598, "grad_norm": 0.5703125, "learning_rate": 0.00048367100496444376, "loss": 2.0911, "step": 3005 }, { "epoch": 0.08066766852726492, "grad_norm": 0.59765625, "learning_rate": 0.0004838320139541124, "loss": 2.2556, "step": 3006 }, { "epoch": 0.08069450407900386, "grad_norm": 0.59765625, "learning_rate": 0.000483993022943781, "loss": 2.3064, "step": 3007 }, { "epoch": 0.08072133963074281, "grad_norm": 0.6015625, "learning_rate": 0.0004841540319334496, "loss": 2.2291, "step": 3008 }, { "epoch": 0.08074817518248176, "grad_norm": 0.609375, "learning_rate": 0.00048431504092311817, "loss": 2.2768, "step": 3009 }, { "epoch": 0.08077501073422069, "grad_norm": 0.59765625, "learning_rate": 0.00048447604991278675, "loss": 2.3144, "step": 3010 }, { "epoch": 0.08080184628595964, "grad_norm": 0.5859375, "learning_rate": 0.00048463705890245534, "loss": 2.2047, "step": 3011 }, { "epoch": 0.08082868183769858, "grad_norm": 0.57421875, "learning_rate": 0.000484798067892124, "loss": 2.3006, "step": 3012 }, { "epoch": 0.08085551738943753, "grad_norm": 0.58984375, "learning_rate": 0.0004849590768817925, "loss": 2.2284, "step": 3013 }, { "epoch": 0.08088235294117647, "grad_norm": 0.57421875, "learning_rate": 0.0004851200858714611, "loss": 2.2692, "step": 3014 }, { "epoch": 0.08090918849291541, "grad_norm": 0.58203125, "learning_rate": 0.0004852810948611297, "loss": 2.138, "step": 3015 }, { "epoch": 0.08093602404465436, "grad_norm": 0.58203125, "learning_rate": 0.0004854421038507983, "loss": 2.2191, "step": 3016 }, { "epoch": 0.08096285959639331, "grad_norm": 0.58984375, "learning_rate": 0.0004856031128404669, "loss": 2.2646, "step": 3017 }, { "epoch": 0.08098969514813224, "grad_norm": 0.59765625, "learning_rate": 0.00048576412183013546, "loss": 2.3026, "step": 3018 }, { "epoch": 0.08101653069987119, "grad_norm": 0.57421875, "learning_rate": 0.0004859251308198041, "loss": 2.2066, "step": 3019 }, { "epoch": 0.08104336625161013, "grad_norm": 0.61328125, "learning_rate": 0.0004860861398094727, "loss": 2.3243, "step": 3020 }, { "epoch": 0.08107020180334908, "grad_norm": 0.5859375, "learning_rate": 0.00048624714879914123, "loss": 2.2284, "step": 3021 }, { "epoch": 0.08109703735508803, "grad_norm": 0.58203125, "learning_rate": 0.0004864081577888098, "loss": 2.1573, "step": 3022 }, { "epoch": 0.08112387290682696, "grad_norm": 0.5859375, "learning_rate": 0.0004865691667784784, "loss": 2.3021, "step": 3023 }, { "epoch": 0.08115070845856591, "grad_norm": 0.59765625, "learning_rate": 0.000486730175768147, "loss": 2.2385, "step": 3024 }, { "epoch": 0.08117754401030486, "grad_norm": 0.6171875, "learning_rate": 0.0004868911847578156, "loss": 2.2996, "step": 3025 }, { "epoch": 0.0812043795620438, "grad_norm": 0.59375, "learning_rate": 0.00048705219374748423, "loss": 2.188, "step": 3026 }, { "epoch": 0.08123121511378274, "grad_norm": 0.5859375, "learning_rate": 0.0004872132027371528, "loss": 2.1771, "step": 3027 }, { "epoch": 0.08125805066552168, "grad_norm": 0.69140625, "learning_rate": 0.00048737421172682135, "loss": 2.3169, "step": 3028 }, { "epoch": 0.08128488621726063, "grad_norm": 0.62890625, "learning_rate": 0.00048753522071648994, "loss": 2.3451, "step": 3029 }, { "epoch": 0.08131172176899958, "grad_norm": 0.59375, "learning_rate": 0.00048769622970615853, "loss": 2.21, "step": 3030 }, { "epoch": 0.08133855732073851, "grad_norm": 0.6171875, "learning_rate": 0.0004878572386958271, "loss": 2.3346, "step": 3031 }, { "epoch": 0.08136539287247746, "grad_norm": 0.5859375, "learning_rate": 0.00048801824768549576, "loss": 2.2547, "step": 3032 }, { "epoch": 0.0813922284242164, "grad_norm": 0.59375, "learning_rate": 0.00048817925667516435, "loss": 2.3114, "step": 3033 }, { "epoch": 0.08141906397595534, "grad_norm": 0.6015625, "learning_rate": 0.0004883402656648329, "loss": 2.2419, "step": 3034 }, { "epoch": 0.0814458995276943, "grad_norm": 0.61328125, "learning_rate": 0.0004885012746545015, "loss": 2.2828, "step": 3035 }, { "epoch": 0.08147273507943323, "grad_norm": 0.58203125, "learning_rate": 0.0004886622836441701, "loss": 2.3017, "step": 3036 }, { "epoch": 0.08149957063117218, "grad_norm": 0.59765625, "learning_rate": 0.0004888232926338387, "loss": 2.2466, "step": 3037 }, { "epoch": 0.08152640618291113, "grad_norm": 0.58203125, "learning_rate": 0.0004889843016235072, "loss": 2.2961, "step": 3038 }, { "epoch": 0.08155324173465006, "grad_norm": 0.6015625, "learning_rate": 0.0004891453106131759, "loss": 2.3183, "step": 3039 }, { "epoch": 0.08158007728638901, "grad_norm": 0.625, "learning_rate": 0.0004893063196028444, "loss": 2.3989, "step": 3040 }, { "epoch": 0.08160691283812795, "grad_norm": 0.58203125, "learning_rate": 0.0004894673285925131, "loss": 2.2775, "step": 3041 }, { "epoch": 0.0816337483898669, "grad_norm": 0.58984375, "learning_rate": 0.0004896283375821817, "loss": 2.3054, "step": 3042 }, { "epoch": 0.08166058394160584, "grad_norm": 0.58203125, "learning_rate": 0.0004897893465718502, "loss": 2.2786, "step": 3043 }, { "epoch": 0.08168741949334478, "grad_norm": 0.609375, "learning_rate": 0.0004899503555615188, "loss": 2.2823, "step": 3044 }, { "epoch": 0.08171425504508373, "grad_norm": 0.5859375, "learning_rate": 0.0004901113645511874, "loss": 2.3164, "step": 3045 }, { "epoch": 0.08174109059682268, "grad_norm": 0.58984375, "learning_rate": 0.000490272373540856, "loss": 2.3031, "step": 3046 }, { "epoch": 0.08176792614856161, "grad_norm": 0.59375, "learning_rate": 0.0004904333825305246, "loss": 2.2369, "step": 3047 }, { "epoch": 0.08179476170030056, "grad_norm": 0.58984375, "learning_rate": 0.0004905943915201931, "loss": 2.2366, "step": 3048 }, { "epoch": 0.0818215972520395, "grad_norm": 0.59375, "learning_rate": 0.0004907554005098618, "loss": 2.2777, "step": 3049 }, { "epoch": 0.08184843280377845, "grad_norm": 0.57421875, "learning_rate": 0.0004909164094995304, "loss": 2.2744, "step": 3050 }, { "epoch": 0.0818752683555174, "grad_norm": 0.578125, "learning_rate": 0.0004910774184891989, "loss": 2.2728, "step": 3051 }, { "epoch": 0.08190210390725633, "grad_norm": 0.6015625, "learning_rate": 0.0004912384274788675, "loss": 2.3806, "step": 3052 }, { "epoch": 0.08192893945899528, "grad_norm": 0.58984375, "learning_rate": 0.0004913994364685361, "loss": 2.2794, "step": 3053 }, { "epoch": 0.08195577501073423, "grad_norm": 0.5859375, "learning_rate": 0.0004915604454582047, "loss": 2.2774, "step": 3054 }, { "epoch": 0.08198261056247316, "grad_norm": 0.57421875, "learning_rate": 0.0004917214544478733, "loss": 2.2805, "step": 3055 }, { "epoch": 0.08200944611421211, "grad_norm": 0.5859375, "learning_rate": 0.0004918824634375419, "loss": 2.3013, "step": 3056 }, { "epoch": 0.08203628166595105, "grad_norm": 0.578125, "learning_rate": 0.0004920434724272105, "loss": 2.1622, "step": 3057 }, { "epoch": 0.08206311721769, "grad_norm": 0.58203125, "learning_rate": 0.0004922044814168791, "loss": 2.3014, "step": 3058 }, { "epoch": 0.08208995276942894, "grad_norm": 0.5703125, "learning_rate": 0.0004923654904065477, "loss": 2.2285, "step": 3059 }, { "epoch": 0.08211678832116788, "grad_norm": 0.578125, "learning_rate": 0.0004925264993962162, "loss": 2.236, "step": 3060 }, { "epoch": 0.08214362387290683, "grad_norm": 0.5859375, "learning_rate": 0.0004926875083858848, "loss": 2.2353, "step": 3061 }, { "epoch": 0.08217045942464576, "grad_norm": 0.56640625, "learning_rate": 0.0004928485173755535, "loss": 2.2571, "step": 3062 }, { "epoch": 0.08219729497638471, "grad_norm": 0.6171875, "learning_rate": 0.000493009526365222, "loss": 2.3519, "step": 3063 }, { "epoch": 0.08222413052812366, "grad_norm": 0.57421875, "learning_rate": 0.0004931705353548907, "loss": 2.2288, "step": 3064 }, { "epoch": 0.0822509660798626, "grad_norm": 0.59375, "learning_rate": 0.0004933315443445592, "loss": 2.2991, "step": 3065 }, { "epoch": 0.08227780163160155, "grad_norm": 0.60546875, "learning_rate": 0.0004934925533342278, "loss": 2.3009, "step": 3066 }, { "epoch": 0.0823046371833405, "grad_norm": 0.578125, "learning_rate": 0.0004936535623238964, "loss": 2.243, "step": 3067 }, { "epoch": 0.08233147273507943, "grad_norm": 0.5625, "learning_rate": 0.0004938145713135649, "loss": 2.1865, "step": 3068 }, { "epoch": 0.08235830828681838, "grad_norm": 0.59765625, "learning_rate": 0.0004939755803032335, "loss": 2.3477, "step": 3069 }, { "epoch": 0.08238514383855731, "grad_norm": 0.58203125, "learning_rate": 0.0004941365892929022, "loss": 2.3038, "step": 3070 }, { "epoch": 0.08241197939029626, "grad_norm": 0.5703125, "learning_rate": 0.0004942975982825707, "loss": 2.3588, "step": 3071 }, { "epoch": 0.08243881494203521, "grad_norm": 0.55859375, "learning_rate": 0.0004944586072722394, "loss": 2.1799, "step": 3072 }, { "epoch": 0.08246565049377415, "grad_norm": 0.55859375, "learning_rate": 0.0004946196162619079, "loss": 2.1987, "step": 3073 }, { "epoch": 0.0824924860455131, "grad_norm": 0.58203125, "learning_rate": 0.0004947806252515765, "loss": 2.2438, "step": 3074 }, { "epoch": 0.08251932159725205, "grad_norm": 0.58203125, "learning_rate": 0.0004949416342412451, "loss": 2.266, "step": 3075 }, { "epoch": 0.08254615714899098, "grad_norm": 0.578125, "learning_rate": 0.0004951026432309137, "loss": 2.2689, "step": 3076 }, { "epoch": 0.08257299270072993, "grad_norm": 0.59375, "learning_rate": 0.0004952636522205822, "loss": 2.3424, "step": 3077 }, { "epoch": 0.08259982825246887, "grad_norm": 0.578125, "learning_rate": 0.0004954246612102509, "loss": 2.2954, "step": 3078 }, { "epoch": 0.08262666380420781, "grad_norm": 0.5703125, "learning_rate": 0.0004955856701999194, "loss": 2.1496, "step": 3079 }, { "epoch": 0.08265349935594676, "grad_norm": 0.57421875, "learning_rate": 0.0004957466791895881, "loss": 2.2887, "step": 3080 }, { "epoch": 0.0826803349076857, "grad_norm": 0.5859375, "learning_rate": 0.0004959076881792567, "loss": 2.2522, "step": 3081 }, { "epoch": 0.08270717045942465, "grad_norm": 0.578125, "learning_rate": 0.0004960686971689251, "loss": 2.3038, "step": 3082 }, { "epoch": 0.08273400601116358, "grad_norm": 0.5703125, "learning_rate": 0.0004962297061585938, "loss": 2.2709, "step": 3083 }, { "epoch": 0.08276084156290253, "grad_norm": 0.56640625, "learning_rate": 0.0004963907151482624, "loss": 2.2401, "step": 3084 }, { "epoch": 0.08278767711464148, "grad_norm": 0.59375, "learning_rate": 0.000496551724137931, "loss": 2.324, "step": 3085 }, { "epoch": 0.08281451266638042, "grad_norm": 0.5625, "learning_rate": 0.0004967127331275996, "loss": 2.1613, "step": 3086 }, { "epoch": 0.08284134821811936, "grad_norm": 0.578125, "learning_rate": 0.0004968737421172681, "loss": 2.2865, "step": 3087 }, { "epoch": 0.08286818376985831, "grad_norm": 0.59765625, "learning_rate": 0.0004970347511069368, "loss": 2.2375, "step": 3088 }, { "epoch": 0.08289501932159725, "grad_norm": 0.58984375, "learning_rate": 0.0004971957600966054, "loss": 2.21, "step": 3089 }, { "epoch": 0.0829218548733362, "grad_norm": 0.5859375, "learning_rate": 0.000497356769086274, "loss": 2.3077, "step": 3090 }, { "epoch": 0.08294869042507513, "grad_norm": 0.59765625, "learning_rate": 0.0004975177780759425, "loss": 2.315, "step": 3091 }, { "epoch": 0.08297552597681408, "grad_norm": 0.58984375, "learning_rate": 0.0004976787870656111, "loss": 2.2516, "step": 3092 }, { "epoch": 0.08300236152855303, "grad_norm": 0.59765625, "learning_rate": 0.0004978397960552797, "loss": 2.3046, "step": 3093 }, { "epoch": 0.08302919708029197, "grad_norm": 0.58984375, "learning_rate": 0.0004980008050449483, "loss": 2.217, "step": 3094 }, { "epoch": 0.08305603263203092, "grad_norm": 0.58203125, "learning_rate": 0.000498161814034617, "loss": 2.218, "step": 3095 }, { "epoch": 0.08308286818376986, "grad_norm": 0.58984375, "learning_rate": 0.0004983228230242855, "loss": 2.2706, "step": 3096 }, { "epoch": 0.0831097037355088, "grad_norm": 0.58984375, "learning_rate": 0.0004984838320139541, "loss": 2.3424, "step": 3097 }, { "epoch": 0.08313653928724775, "grad_norm": 0.55859375, "learning_rate": 0.0004986448410036227, "loss": 2.2807, "step": 3098 }, { "epoch": 0.08316337483898668, "grad_norm": 0.56640625, "learning_rate": 0.0004988058499932912, "loss": 2.2322, "step": 3099 }, { "epoch": 0.08319021039072563, "grad_norm": 0.5703125, "learning_rate": 0.0004989668589829598, "loss": 2.2752, "step": 3100 }, { "epoch": 0.08321704594246458, "grad_norm": 0.578125, "learning_rate": 0.0004991278679726284, "loss": 2.2841, "step": 3101 }, { "epoch": 0.08324388149420352, "grad_norm": 0.5703125, "learning_rate": 0.000499288876962297, "loss": 2.2655, "step": 3102 }, { "epoch": 0.08327071704594247, "grad_norm": 0.5625, "learning_rate": 0.0004994498859519657, "loss": 2.2521, "step": 3103 }, { "epoch": 0.08329755259768142, "grad_norm": 0.5859375, "learning_rate": 0.0004996108949416342, "loss": 2.2034, "step": 3104 }, { "epoch": 0.08332438814942035, "grad_norm": 0.5703125, "learning_rate": 0.0004997719039313028, "loss": 2.3031, "step": 3105 }, { "epoch": 0.0833512237011593, "grad_norm": 0.5625, "learning_rate": 0.0004999329129209714, "loss": 2.1945, "step": 3106 }, { "epoch": 0.08337805925289823, "grad_norm": 0.56640625, "learning_rate": 0.0005000939219106399, "loss": 2.1756, "step": 3107 }, { "epoch": 0.08340489480463718, "grad_norm": 0.55859375, "learning_rate": 0.0005002549309003086, "loss": 2.2566, "step": 3108 }, { "epoch": 0.08343173035637613, "grad_norm": 0.5703125, "learning_rate": 0.0005004159398899772, "loss": 2.3109, "step": 3109 }, { "epoch": 0.08345856590811507, "grad_norm": 0.61328125, "learning_rate": 0.0005005769488796457, "loss": 2.266, "step": 3110 }, { "epoch": 0.08348540145985402, "grad_norm": 0.5859375, "learning_rate": 0.0005007379578693144, "loss": 2.3113, "step": 3111 }, { "epoch": 0.08351223701159295, "grad_norm": 0.5859375, "learning_rate": 0.0005008989668589829, "loss": 2.2834, "step": 3112 }, { "epoch": 0.0835390725633319, "grad_norm": 0.60546875, "learning_rate": 0.0005010599758486514, "loss": 2.3099, "step": 3113 }, { "epoch": 0.08356590811507085, "grad_norm": 0.5859375, "learning_rate": 0.0005012209848383201, "loss": 2.245, "step": 3114 }, { "epoch": 0.08359274366680979, "grad_norm": 0.58203125, "learning_rate": 0.0005013819938279886, "loss": 2.2555, "step": 3115 }, { "epoch": 0.08361957921854873, "grad_norm": 0.56640625, "learning_rate": 0.0005015430028176573, "loss": 2.2009, "step": 3116 }, { "epoch": 0.08364641477028768, "grad_norm": 0.578125, "learning_rate": 0.0005017040118073259, "loss": 2.2845, "step": 3117 }, { "epoch": 0.08367325032202662, "grad_norm": 0.578125, "learning_rate": 0.0005018650207969944, "loss": 2.166, "step": 3118 }, { "epoch": 0.08370008587376557, "grad_norm": 0.57421875, "learning_rate": 0.0005020260297866631, "loss": 2.2789, "step": 3119 }, { "epoch": 0.0837269214255045, "grad_norm": 0.5859375, "learning_rate": 0.0005021870387763316, "loss": 2.3293, "step": 3120 }, { "epoch": 0.08375375697724345, "grad_norm": 0.56640625, "learning_rate": 0.0005023480477660001, "loss": 2.2153, "step": 3121 }, { "epoch": 0.0837805925289824, "grad_norm": 0.5703125, "learning_rate": 0.0005025090567556688, "loss": 2.2582, "step": 3122 }, { "epoch": 0.08380742808072134, "grad_norm": 0.56640625, "learning_rate": 0.0005026700657453374, "loss": 2.1057, "step": 3123 }, { "epoch": 0.08383426363246028, "grad_norm": 0.58203125, "learning_rate": 0.000502831074735006, "loss": 2.2653, "step": 3124 }, { "epoch": 0.08386109918419923, "grad_norm": 0.58203125, "learning_rate": 0.0005029920837246746, "loss": 2.3335, "step": 3125 }, { "epoch": 0.08388793473593817, "grad_norm": 0.578125, "learning_rate": 0.0005031530927143431, "loss": 2.3328, "step": 3126 }, { "epoch": 0.08391477028767712, "grad_norm": 0.58984375, "learning_rate": 0.0005033141017040118, "loss": 2.2476, "step": 3127 }, { "epoch": 0.08394160583941605, "grad_norm": 0.578125, "learning_rate": 0.0005034751106936804, "loss": 2.2626, "step": 3128 }, { "epoch": 0.083968441391155, "grad_norm": 0.56640625, "learning_rate": 0.000503636119683349, "loss": 2.1132, "step": 3129 }, { "epoch": 0.08399527694289395, "grad_norm": 0.57421875, "learning_rate": 0.0005037971286730175, "loss": 2.3122, "step": 3130 }, { "epoch": 0.08402211249463289, "grad_norm": 0.56640625, "learning_rate": 0.0005039581376626861, "loss": 2.2565, "step": 3131 }, { "epoch": 0.08404894804637184, "grad_norm": 0.5625, "learning_rate": 0.0005041191466523547, "loss": 2.2564, "step": 3132 }, { "epoch": 0.08407578359811077, "grad_norm": 0.5859375, "learning_rate": 0.0005042801556420233, "loss": 2.2024, "step": 3133 }, { "epoch": 0.08410261914984972, "grad_norm": 0.57421875, "learning_rate": 0.0005044411646316919, "loss": 2.188, "step": 3134 }, { "epoch": 0.08412945470158867, "grad_norm": 0.58984375, "learning_rate": 0.0005046021736213605, "loss": 2.1594, "step": 3135 }, { "epoch": 0.0841562902533276, "grad_norm": 0.56640625, "learning_rate": 0.0005047631826110291, "loss": 2.2409, "step": 3136 }, { "epoch": 0.08418312580506655, "grad_norm": 0.55859375, "learning_rate": 0.0005049241916006977, "loss": 2.1787, "step": 3137 }, { "epoch": 0.0842099613568055, "grad_norm": 0.5625, "learning_rate": 0.0005050852005903662, "loss": 2.2159, "step": 3138 }, { "epoch": 0.08423679690854444, "grad_norm": 0.578125, "learning_rate": 0.0005052462095800349, "loss": 2.1607, "step": 3139 }, { "epoch": 0.08426363246028339, "grad_norm": 0.578125, "learning_rate": 0.0005054072185697034, "loss": 2.206, "step": 3140 }, { "epoch": 0.08429046801202232, "grad_norm": 0.55859375, "learning_rate": 0.000505568227559372, "loss": 2.0672, "step": 3141 }, { "epoch": 0.08431730356376127, "grad_norm": 0.5703125, "learning_rate": 0.0005057292365490407, "loss": 2.1439, "step": 3142 }, { "epoch": 0.08434413911550022, "grad_norm": 0.578125, "learning_rate": 0.0005058902455387092, "loss": 2.2017, "step": 3143 }, { "epoch": 0.08437097466723915, "grad_norm": 0.58984375, "learning_rate": 0.0005060512545283777, "loss": 2.2861, "step": 3144 }, { "epoch": 0.0843978102189781, "grad_norm": 0.58203125, "learning_rate": 0.0005062122635180464, "loss": 2.226, "step": 3145 }, { "epoch": 0.08442464577071705, "grad_norm": 0.57421875, "learning_rate": 0.0005063732725077149, "loss": 2.1936, "step": 3146 }, { "epoch": 0.08445148132245599, "grad_norm": 0.578125, "learning_rate": 0.0005065342814973836, "loss": 2.2233, "step": 3147 }, { "epoch": 0.08447831687419494, "grad_norm": 0.578125, "learning_rate": 0.0005066952904870522, "loss": 2.2545, "step": 3148 }, { "epoch": 0.08450515242593387, "grad_norm": 0.56640625, "learning_rate": 0.0005068562994767207, "loss": 2.1398, "step": 3149 }, { "epoch": 0.08453198797767282, "grad_norm": 0.56640625, "learning_rate": 0.0005070173084663894, "loss": 2.2413, "step": 3150 }, { "epoch": 0.08455882352941177, "grad_norm": 0.5546875, "learning_rate": 0.0005071783174560579, "loss": 2.1855, "step": 3151 }, { "epoch": 0.0845856590811507, "grad_norm": 0.5859375, "learning_rate": 0.0005073393264457265, "loss": 2.2249, "step": 3152 }, { "epoch": 0.08461249463288965, "grad_norm": 0.59375, "learning_rate": 0.0005075003354353951, "loss": 2.2889, "step": 3153 }, { "epoch": 0.0846393301846286, "grad_norm": 0.58984375, "learning_rate": 0.0005076613444250636, "loss": 2.2731, "step": 3154 }, { "epoch": 0.08466616573636754, "grad_norm": 0.58203125, "learning_rate": 0.0005078223534147323, "loss": 2.2795, "step": 3155 }, { "epoch": 0.08469300128810649, "grad_norm": 0.58203125, "learning_rate": 0.0005079833624044009, "loss": 2.2642, "step": 3156 }, { "epoch": 0.08471983683984542, "grad_norm": 0.5546875, "learning_rate": 0.0005081443713940694, "loss": 2.1711, "step": 3157 }, { "epoch": 0.08474667239158437, "grad_norm": 0.5703125, "learning_rate": 0.0005083053803837381, "loss": 2.249, "step": 3158 }, { "epoch": 0.08477350794332332, "grad_norm": 0.578125, "learning_rate": 0.0005084663893734066, "loss": 2.21, "step": 3159 }, { "epoch": 0.08480034349506226, "grad_norm": 0.5703125, "learning_rate": 0.0005086273983630752, "loss": 2.2275, "step": 3160 }, { "epoch": 0.0848271790468012, "grad_norm": 0.58203125, "learning_rate": 0.0005087884073527438, "loss": 2.2401, "step": 3161 }, { "epoch": 0.08485401459854014, "grad_norm": 0.5859375, "learning_rate": 0.0005089494163424124, "loss": 2.26, "step": 3162 }, { "epoch": 0.08488085015027909, "grad_norm": 0.5625, "learning_rate": 0.000509110425332081, "loss": 2.2093, "step": 3163 }, { "epoch": 0.08490768570201804, "grad_norm": 0.55078125, "learning_rate": 0.0005092714343217496, "loss": 2.1558, "step": 3164 }, { "epoch": 0.08493452125375697, "grad_norm": 0.578125, "learning_rate": 0.0005094324433114182, "loss": 2.2768, "step": 3165 }, { "epoch": 0.08496135680549592, "grad_norm": 0.5625, "learning_rate": 0.0005095934523010868, "loss": 2.1464, "step": 3166 }, { "epoch": 0.08498819235723487, "grad_norm": 0.57421875, "learning_rate": 0.0005097544612907553, "loss": 2.1217, "step": 3167 }, { "epoch": 0.0850150279089738, "grad_norm": 0.57421875, "learning_rate": 0.0005099154702804239, "loss": 2.2729, "step": 3168 }, { "epoch": 0.08504186346071275, "grad_norm": 0.578125, "learning_rate": 0.0005100764792700925, "loss": 2.1725, "step": 3169 }, { "epoch": 0.08506869901245169, "grad_norm": 0.5546875, "learning_rate": 0.0005102374882597612, "loss": 2.1818, "step": 3170 }, { "epoch": 0.08509553456419064, "grad_norm": 0.56640625, "learning_rate": 0.0005103984972494297, "loss": 2.1824, "step": 3171 }, { "epoch": 0.08512237011592959, "grad_norm": 0.57421875, "learning_rate": 0.0005105595062390983, "loss": 2.202, "step": 3172 }, { "epoch": 0.08514920566766852, "grad_norm": 0.5703125, "learning_rate": 0.0005107205152287669, "loss": 2.1949, "step": 3173 }, { "epoch": 0.08517604121940747, "grad_norm": 0.5703125, "learning_rate": 0.0005108815242184355, "loss": 2.2403, "step": 3174 }, { "epoch": 0.08520287677114642, "grad_norm": 0.5859375, "learning_rate": 0.000511042533208104, "loss": 2.2178, "step": 3175 }, { "epoch": 0.08522971232288536, "grad_norm": 0.58203125, "learning_rate": 0.0005112035421977727, "loss": 2.2782, "step": 3176 }, { "epoch": 0.0852565478746243, "grad_norm": 0.5703125, "learning_rate": 0.0005113645511874412, "loss": 2.1715, "step": 3177 }, { "epoch": 0.08528338342636324, "grad_norm": 0.5546875, "learning_rate": 0.0005115255601771099, "loss": 2.1673, "step": 3178 }, { "epoch": 0.08531021897810219, "grad_norm": 0.5625, "learning_rate": 0.0005116865691667784, "loss": 2.22, "step": 3179 }, { "epoch": 0.08533705452984114, "grad_norm": 0.58203125, "learning_rate": 0.000511847578156447, "loss": 2.3215, "step": 3180 }, { "epoch": 0.08536389008158007, "grad_norm": 0.55859375, "learning_rate": 0.0005120085871461157, "loss": 2.306, "step": 3181 }, { "epoch": 0.08539072563331902, "grad_norm": 0.56640625, "learning_rate": 0.0005121695961357842, "loss": 2.238, "step": 3182 }, { "epoch": 0.08541756118505796, "grad_norm": 0.55859375, "learning_rate": 0.0005123306051254528, "loss": 2.1827, "step": 3183 }, { "epoch": 0.08544439673679691, "grad_norm": 0.5703125, "learning_rate": 0.0005124916141151214, "loss": 2.1761, "step": 3184 }, { "epoch": 0.08547123228853586, "grad_norm": 0.5703125, "learning_rate": 0.0005126526231047899, "loss": 2.257, "step": 3185 }, { "epoch": 0.08549806784027479, "grad_norm": 0.5703125, "learning_rate": 0.0005128136320944586, "loss": 2.1938, "step": 3186 }, { "epoch": 0.08552490339201374, "grad_norm": 0.58203125, "learning_rate": 0.0005129746410841271, "loss": 2.2068, "step": 3187 }, { "epoch": 0.08555173894375269, "grad_norm": 0.55859375, "learning_rate": 0.0005131356500737958, "loss": 2.2143, "step": 3188 }, { "epoch": 0.08557857449549162, "grad_norm": 0.57421875, "learning_rate": 0.0005132966590634644, "loss": 2.2536, "step": 3189 }, { "epoch": 0.08560541004723057, "grad_norm": 0.5625, "learning_rate": 0.0005134576680531329, "loss": 2.2607, "step": 3190 }, { "epoch": 0.08563224559896951, "grad_norm": 0.57421875, "learning_rate": 0.0005136186770428015, "loss": 2.2693, "step": 3191 }, { "epoch": 0.08565908115070846, "grad_norm": 0.5625, "learning_rate": 0.0005137796860324701, "loss": 2.1869, "step": 3192 }, { "epoch": 0.0856859167024474, "grad_norm": 0.58203125, "learning_rate": 0.0005139406950221386, "loss": 2.2519, "step": 3193 }, { "epoch": 0.08571275225418634, "grad_norm": 0.546875, "learning_rate": 0.0005141017040118073, "loss": 2.1933, "step": 3194 }, { "epoch": 0.08573958780592529, "grad_norm": 0.5859375, "learning_rate": 0.0005142627130014759, "loss": 2.2711, "step": 3195 }, { "epoch": 0.08576642335766424, "grad_norm": 0.5546875, "learning_rate": 0.0005144237219911445, "loss": 2.1986, "step": 3196 }, { "epoch": 0.08579325890940318, "grad_norm": 0.55078125, "learning_rate": 0.0005145847309808131, "loss": 2.2275, "step": 3197 }, { "epoch": 0.08582009446114212, "grad_norm": 0.55078125, "learning_rate": 0.0005147457399704816, "loss": 2.1852, "step": 3198 }, { "epoch": 0.08584693001288106, "grad_norm": 0.5703125, "learning_rate": 0.0005149067489601502, "loss": 2.1916, "step": 3199 }, { "epoch": 0.08587376556462001, "grad_norm": 0.54296875, "learning_rate": 0.0005150677579498188, "loss": 2.1861, "step": 3200 }, { "epoch": 0.08590060111635896, "grad_norm": 0.5546875, "learning_rate": 0.0005152287669394875, "loss": 2.2126, "step": 3201 }, { "epoch": 0.08592743666809789, "grad_norm": 0.5625, "learning_rate": 0.000515389775929156, "loss": 2.1429, "step": 3202 }, { "epoch": 0.08595427221983684, "grad_norm": 0.55859375, "learning_rate": 0.0005155507849188246, "loss": 2.2081, "step": 3203 }, { "epoch": 0.08598110777157579, "grad_norm": 0.58984375, "learning_rate": 0.0005157117939084932, "loss": 2.3355, "step": 3204 }, { "epoch": 0.08600794332331473, "grad_norm": 0.58203125, "learning_rate": 0.0005158728028981618, "loss": 2.184, "step": 3205 }, { "epoch": 0.08603477887505367, "grad_norm": 0.578125, "learning_rate": 0.0005160338118878303, "loss": 2.2216, "step": 3206 }, { "epoch": 0.08606161442679261, "grad_norm": 0.5703125, "learning_rate": 0.0005161948208774989, "loss": 2.1939, "step": 3207 }, { "epoch": 0.08608844997853156, "grad_norm": 0.5703125, "learning_rate": 0.0005163558298671675, "loss": 2.2208, "step": 3208 }, { "epoch": 0.08611528553027051, "grad_norm": 0.5703125, "learning_rate": 0.0005165168388568362, "loss": 2.2019, "step": 3209 }, { "epoch": 0.08614212108200944, "grad_norm": 0.6015625, "learning_rate": 0.0005166778478465047, "loss": 2.3198, "step": 3210 }, { "epoch": 0.08616895663374839, "grad_norm": 0.6484375, "learning_rate": 0.0005168388568361733, "loss": 2.294, "step": 3211 }, { "epoch": 0.08619579218548733, "grad_norm": 0.57421875, "learning_rate": 0.0005169998658258419, "loss": 2.241, "step": 3212 }, { "epoch": 0.08622262773722628, "grad_norm": 0.59375, "learning_rate": 0.0005171608748155105, "loss": 2.3609, "step": 3213 }, { "epoch": 0.08624946328896523, "grad_norm": 0.58984375, "learning_rate": 0.0005173218838051791, "loss": 2.2372, "step": 3214 }, { "epoch": 0.08627629884070416, "grad_norm": 0.5859375, "learning_rate": 0.0005174828927948477, "loss": 2.3023, "step": 3215 }, { "epoch": 0.08630313439244311, "grad_norm": 0.59765625, "learning_rate": 0.0005176439017845162, "loss": 2.3212, "step": 3216 }, { "epoch": 0.08632996994418206, "grad_norm": 0.57421875, "learning_rate": 0.0005178049107741849, "loss": 2.234, "step": 3217 }, { "epoch": 0.086356805495921, "grad_norm": 0.5625, "learning_rate": 0.0005179659197638534, "loss": 2.2225, "step": 3218 }, { "epoch": 0.08638364104765994, "grad_norm": 0.59375, "learning_rate": 0.000518126928753522, "loss": 2.3104, "step": 3219 }, { "epoch": 0.08641047659939888, "grad_norm": 0.578125, "learning_rate": 0.0005182879377431906, "loss": 2.3045, "step": 3220 }, { "epoch": 0.08643731215113783, "grad_norm": 0.5546875, "learning_rate": 0.0005184489467328592, "loss": 2.1935, "step": 3221 }, { "epoch": 0.08646414770287678, "grad_norm": 0.56640625, "learning_rate": 0.0005186099557225278, "loss": 2.3287, "step": 3222 }, { "epoch": 0.08649098325461571, "grad_norm": 0.55859375, "learning_rate": 0.0005187709647121964, "loss": 2.1843, "step": 3223 }, { "epoch": 0.08651781880635466, "grad_norm": 0.58203125, "learning_rate": 0.0005189319737018649, "loss": 2.2066, "step": 3224 }, { "epoch": 0.08654465435809361, "grad_norm": 0.55859375, "learning_rate": 0.0005190929826915336, "loss": 2.1994, "step": 3225 }, { "epoch": 0.08657148990983254, "grad_norm": 0.58203125, "learning_rate": 0.0005192539916812021, "loss": 2.2202, "step": 3226 }, { "epoch": 0.0865983254615715, "grad_norm": 0.56640625, "learning_rate": 0.0005194150006708708, "loss": 2.2478, "step": 3227 }, { "epoch": 0.08662516101331043, "grad_norm": 0.56640625, "learning_rate": 0.0005195760096605394, "loss": 2.2065, "step": 3228 }, { "epoch": 0.08665199656504938, "grad_norm": 0.56640625, "learning_rate": 0.0005197370186502079, "loss": 2.2156, "step": 3229 }, { "epoch": 0.08667883211678833, "grad_norm": 0.5703125, "learning_rate": 0.0005198980276398765, "loss": 2.2885, "step": 3230 }, { "epoch": 0.08670566766852726, "grad_norm": 0.56640625, "learning_rate": 0.0005200590366295451, "loss": 2.2538, "step": 3231 }, { "epoch": 0.08673250322026621, "grad_norm": 0.5546875, "learning_rate": 0.0005202200456192137, "loss": 2.1929, "step": 3232 }, { "epoch": 0.08675933877200515, "grad_norm": 0.58203125, "learning_rate": 0.0005203810546088823, "loss": 2.3373, "step": 3233 }, { "epoch": 0.0867861743237441, "grad_norm": 0.5546875, "learning_rate": 0.0005205420635985509, "loss": 2.1818, "step": 3234 }, { "epoch": 0.08681300987548304, "grad_norm": 0.58203125, "learning_rate": 0.0005207030725882195, "loss": 2.3277, "step": 3235 }, { "epoch": 0.08683984542722198, "grad_norm": 0.55078125, "learning_rate": 0.0005208640815778881, "loss": 2.2631, "step": 3236 }, { "epoch": 0.08686668097896093, "grad_norm": 0.57421875, "learning_rate": 0.0005210250905675566, "loss": 2.3016, "step": 3237 }, { "epoch": 0.08689351653069988, "grad_norm": 0.5546875, "learning_rate": 0.0005211860995572252, "loss": 2.1477, "step": 3238 }, { "epoch": 0.08692035208243881, "grad_norm": 0.5546875, "learning_rate": 0.0005213471085468938, "loss": 2.2057, "step": 3239 }, { "epoch": 0.08694718763417776, "grad_norm": 0.55078125, "learning_rate": 0.0005215081175365624, "loss": 2.2937, "step": 3240 }, { "epoch": 0.0869740231859167, "grad_norm": 0.5546875, "learning_rate": 0.000521669126526231, "loss": 2.2732, "step": 3241 }, { "epoch": 0.08700085873765565, "grad_norm": 0.55078125, "learning_rate": 0.0005218301355158996, "loss": 2.2389, "step": 3242 }, { "epoch": 0.0870276942893946, "grad_norm": 0.55859375, "learning_rate": 0.0005219911445055682, "loss": 2.2131, "step": 3243 }, { "epoch": 0.08705452984113353, "grad_norm": 0.56640625, "learning_rate": 0.0005221521534952368, "loss": 2.2257, "step": 3244 }, { "epoch": 0.08708136539287248, "grad_norm": 0.55078125, "learning_rate": 0.0005223131624849054, "loss": 2.2183, "step": 3245 }, { "epoch": 0.08710820094461143, "grad_norm": 0.55859375, "learning_rate": 0.0005224741714745739, "loss": 2.1626, "step": 3246 }, { "epoch": 0.08713503649635036, "grad_norm": 0.55078125, "learning_rate": 0.0005226351804642425, "loss": 2.1947, "step": 3247 }, { "epoch": 0.08716187204808931, "grad_norm": 0.56640625, "learning_rate": 0.0005227961894539112, "loss": 2.2448, "step": 3248 }, { "epoch": 0.08718870759982825, "grad_norm": 0.5859375, "learning_rate": 0.0005229571984435797, "loss": 2.3695, "step": 3249 }, { "epoch": 0.0872155431515672, "grad_norm": 0.56640625, "learning_rate": 0.0005231182074332484, "loss": 2.2662, "step": 3250 }, { "epoch": 0.08724237870330614, "grad_norm": 0.55078125, "learning_rate": 0.0005232792164229169, "loss": 2.1896, "step": 3251 }, { "epoch": 0.08726921425504508, "grad_norm": 0.55859375, "learning_rate": 0.0005234402254125855, "loss": 2.2144, "step": 3252 }, { "epoch": 0.08729604980678403, "grad_norm": 0.55859375, "learning_rate": 0.0005236012344022541, "loss": 2.201, "step": 3253 }, { "epoch": 0.08732288535852298, "grad_norm": 0.5625, "learning_rate": 0.0005237622433919226, "loss": 2.1846, "step": 3254 }, { "epoch": 0.08734972091026191, "grad_norm": 0.59765625, "learning_rate": 0.0005239232523815912, "loss": 2.36, "step": 3255 }, { "epoch": 0.08737655646200086, "grad_norm": 0.56640625, "learning_rate": 0.0005240842613712599, "loss": 2.1462, "step": 3256 }, { "epoch": 0.0874033920137398, "grad_norm": 0.5546875, "learning_rate": 0.0005242452703609284, "loss": 2.2221, "step": 3257 }, { "epoch": 0.08743022756547875, "grad_norm": 0.54296875, "learning_rate": 0.0005244062793505971, "loss": 2.094, "step": 3258 }, { "epoch": 0.0874570631172177, "grad_norm": 0.57421875, "learning_rate": 0.0005245672883402656, "loss": 2.2358, "step": 3259 }, { "epoch": 0.08748389866895663, "grad_norm": 0.58203125, "learning_rate": 0.0005247282973299341, "loss": 2.2078, "step": 3260 }, { "epoch": 0.08751073422069558, "grad_norm": 0.55078125, "learning_rate": 0.0005248893063196028, "loss": 2.1978, "step": 3261 }, { "epoch": 0.08753756977243451, "grad_norm": 0.56640625, "learning_rate": 0.0005250503153092714, "loss": 2.2854, "step": 3262 }, { "epoch": 0.08756440532417346, "grad_norm": 0.54296875, "learning_rate": 0.00052521132429894, "loss": 2.1638, "step": 3263 }, { "epoch": 0.08759124087591241, "grad_norm": 0.5625, "learning_rate": 0.0005253723332886086, "loss": 2.2588, "step": 3264 }, { "epoch": 0.08761807642765135, "grad_norm": 0.55078125, "learning_rate": 0.0005255333422782771, "loss": 2.2315, "step": 3265 }, { "epoch": 0.0876449119793903, "grad_norm": 0.5625, "learning_rate": 0.0005256943512679458, "loss": 2.1883, "step": 3266 }, { "epoch": 0.08767174753112925, "grad_norm": 0.5625, "learning_rate": 0.0005258553602576144, "loss": 2.2292, "step": 3267 }, { "epoch": 0.08769858308286818, "grad_norm": 0.5625, "learning_rate": 0.000526016369247283, "loss": 2.2713, "step": 3268 }, { "epoch": 0.08772541863460713, "grad_norm": 0.56640625, "learning_rate": 0.0005261773782369515, "loss": 2.2051, "step": 3269 }, { "epoch": 0.08775225418634607, "grad_norm": 0.5546875, "learning_rate": 0.0005263383872266201, "loss": 2.1563, "step": 3270 }, { "epoch": 0.08777908973808501, "grad_norm": 0.58984375, "learning_rate": 0.0005264993962162887, "loss": 2.2596, "step": 3271 }, { "epoch": 0.08780592528982396, "grad_norm": 0.57421875, "learning_rate": 0.0005266604052059573, "loss": 2.288, "step": 3272 }, { "epoch": 0.0878327608415629, "grad_norm": 0.5546875, "learning_rate": 0.0005268214141956258, "loss": 2.189, "step": 3273 }, { "epoch": 0.08785959639330185, "grad_norm": 0.59375, "learning_rate": 0.0005269824231852945, "loss": 2.3553, "step": 3274 }, { "epoch": 0.0878864319450408, "grad_norm": 0.54296875, "learning_rate": 0.0005271434321749631, "loss": 2.1871, "step": 3275 }, { "epoch": 0.08791326749677973, "grad_norm": 0.57421875, "learning_rate": 0.0005273044411646317, "loss": 2.2327, "step": 3276 }, { "epoch": 0.08794010304851868, "grad_norm": 0.57421875, "learning_rate": 0.0005274654501543002, "loss": 2.3364, "step": 3277 }, { "epoch": 0.08796693860025762, "grad_norm": 0.56640625, "learning_rate": 0.0005276264591439688, "loss": 2.2342, "step": 3278 }, { "epoch": 0.08799377415199656, "grad_norm": 0.55859375, "learning_rate": 0.0005277874681336374, "loss": 2.2471, "step": 3279 }, { "epoch": 0.08802060970373551, "grad_norm": 0.57421875, "learning_rate": 0.000527948477123306, "loss": 2.2232, "step": 3280 }, { "epoch": 0.08804744525547445, "grad_norm": 0.56640625, "learning_rate": 0.0005281094861129747, "loss": 2.2014, "step": 3281 }, { "epoch": 0.0880742808072134, "grad_norm": 0.5625, "learning_rate": 0.0005282704951026432, "loss": 2.2134, "step": 3282 }, { "epoch": 0.08810111635895233, "grad_norm": 0.5703125, "learning_rate": 0.0005284315040923117, "loss": 2.228, "step": 3283 }, { "epoch": 0.08812795191069128, "grad_norm": 0.5625, "learning_rate": 0.0005285925130819804, "loss": 2.274, "step": 3284 }, { "epoch": 0.08815478746243023, "grad_norm": 0.5546875, "learning_rate": 0.0005287535220716489, "loss": 2.1666, "step": 3285 }, { "epoch": 0.08818162301416917, "grad_norm": 0.57421875, "learning_rate": 0.0005289145310613175, "loss": 2.2673, "step": 3286 }, { "epoch": 0.08820845856590812, "grad_norm": 0.5703125, "learning_rate": 0.0005290755400509862, "loss": 2.2029, "step": 3287 }, { "epoch": 0.08823529411764706, "grad_norm": 0.5625, "learning_rate": 0.0005292365490406547, "loss": 2.2392, "step": 3288 }, { "epoch": 0.088262129669386, "grad_norm": 0.55859375, "learning_rate": 0.0005293975580303234, "loss": 2.1999, "step": 3289 }, { "epoch": 0.08828896522112495, "grad_norm": 0.546875, "learning_rate": 0.0005295585670199919, "loss": 2.1837, "step": 3290 }, { "epoch": 0.08831580077286388, "grad_norm": 0.5625, "learning_rate": 0.0005297195760096604, "loss": 2.2275, "step": 3291 }, { "epoch": 0.08834263632460283, "grad_norm": 0.5703125, "learning_rate": 0.0005298805849993291, "loss": 2.1792, "step": 3292 }, { "epoch": 0.08836947187634178, "grad_norm": 0.5546875, "learning_rate": 0.0005300415939889976, "loss": 2.2349, "step": 3293 }, { "epoch": 0.08839630742808072, "grad_norm": 0.5546875, "learning_rate": 0.0005302026029786663, "loss": 2.181, "step": 3294 }, { "epoch": 0.08842314297981967, "grad_norm": 0.5703125, "learning_rate": 0.0005303636119683349, "loss": 2.2811, "step": 3295 }, { "epoch": 0.08844997853155862, "grad_norm": 0.57421875, "learning_rate": 0.0005305246209580034, "loss": 2.202, "step": 3296 }, { "epoch": 0.08847681408329755, "grad_norm": 0.56640625, "learning_rate": 0.0005306856299476721, "loss": 2.2534, "step": 3297 }, { "epoch": 0.0885036496350365, "grad_norm": 0.55078125, "learning_rate": 0.0005308466389373406, "loss": 2.1865, "step": 3298 }, { "epoch": 0.08853048518677543, "grad_norm": 0.5625, "learning_rate": 0.0005310076479270091, "loss": 2.3091, "step": 3299 }, { "epoch": 0.08855732073851438, "grad_norm": 0.56640625, "learning_rate": 0.0005311686569166778, "loss": 2.2024, "step": 3300 }, { "epoch": 0.08858415629025333, "grad_norm": 0.55078125, "learning_rate": 0.0005313296659063464, "loss": 2.284, "step": 3301 }, { "epoch": 0.08861099184199227, "grad_norm": 0.56640625, "learning_rate": 0.000531490674896015, "loss": 2.2266, "step": 3302 }, { "epoch": 0.08863782739373122, "grad_norm": 0.5625, "learning_rate": 0.0005316516838856836, "loss": 2.168, "step": 3303 }, { "epoch": 0.08866466294547017, "grad_norm": 0.57421875, "learning_rate": 0.0005318126928753521, "loss": 2.3205, "step": 3304 }, { "epoch": 0.0886914984972091, "grad_norm": 0.58203125, "learning_rate": 0.0005319737018650208, "loss": 2.2474, "step": 3305 }, { "epoch": 0.08871833404894805, "grad_norm": 0.5546875, "learning_rate": 0.0005321347108546893, "loss": 2.1306, "step": 3306 }, { "epoch": 0.08874516960068699, "grad_norm": 0.5546875, "learning_rate": 0.0005322957198443579, "loss": 2.2154, "step": 3307 }, { "epoch": 0.08877200515242593, "grad_norm": 0.56640625, "learning_rate": 0.0005324567288340265, "loss": 2.2562, "step": 3308 }, { "epoch": 0.08879884070416488, "grad_norm": 0.5546875, "learning_rate": 0.0005326177378236951, "loss": 2.2022, "step": 3309 }, { "epoch": 0.08882567625590382, "grad_norm": 0.5546875, "learning_rate": 0.0005327787468133637, "loss": 2.2109, "step": 3310 }, { "epoch": 0.08885251180764277, "grad_norm": 0.5390625, "learning_rate": 0.0005329397558030323, "loss": 2.118, "step": 3311 }, { "epoch": 0.0888793473593817, "grad_norm": 0.55078125, "learning_rate": 0.0005331007647927009, "loss": 2.2165, "step": 3312 }, { "epoch": 0.08890618291112065, "grad_norm": 0.5625, "learning_rate": 0.0005332617737823695, "loss": 2.1996, "step": 3313 }, { "epoch": 0.0889330184628596, "grad_norm": 0.5390625, "learning_rate": 0.000533422782772038, "loss": 2.0684, "step": 3314 }, { "epoch": 0.08895985401459854, "grad_norm": 0.54296875, "learning_rate": 0.0005335837917617067, "loss": 2.1196, "step": 3315 }, { "epoch": 0.08898668956633748, "grad_norm": 0.55859375, "learning_rate": 0.0005337448007513752, "loss": 2.144, "step": 3316 }, { "epoch": 0.08901352511807643, "grad_norm": 0.5625, "learning_rate": 0.0005339058097410438, "loss": 2.1653, "step": 3317 }, { "epoch": 0.08904036066981537, "grad_norm": 0.546875, "learning_rate": 0.0005340668187307124, "loss": 2.2164, "step": 3318 }, { "epoch": 0.08906719622155432, "grad_norm": 0.5546875, "learning_rate": 0.000534227827720381, "loss": 2.1548, "step": 3319 }, { "epoch": 0.08909403177329325, "grad_norm": 0.5546875, "learning_rate": 0.0005343888367100497, "loss": 2.2091, "step": 3320 }, { "epoch": 0.0891208673250322, "grad_norm": 0.57421875, "learning_rate": 0.0005345498456997182, "loss": 2.234, "step": 3321 }, { "epoch": 0.08914770287677115, "grad_norm": 0.5625, "learning_rate": 0.0005347108546893867, "loss": 2.171, "step": 3322 }, { "epoch": 0.08917453842851009, "grad_norm": 0.56640625, "learning_rate": 0.0005348718636790554, "loss": 2.1862, "step": 3323 }, { "epoch": 0.08920137398024904, "grad_norm": 0.546875, "learning_rate": 0.0005350328726687239, "loss": 2.1357, "step": 3324 }, { "epoch": 0.08922820953198798, "grad_norm": 0.57421875, "learning_rate": 0.0005351938816583926, "loss": 2.2977, "step": 3325 }, { "epoch": 0.08925504508372692, "grad_norm": 0.55078125, "learning_rate": 0.0005353548906480611, "loss": 2.1449, "step": 3326 }, { "epoch": 0.08928188063546587, "grad_norm": 0.57421875, "learning_rate": 0.0005355158996377297, "loss": 2.2748, "step": 3327 }, { "epoch": 0.0893087161872048, "grad_norm": 0.57421875, "learning_rate": 0.0005356769086273984, "loss": 2.2272, "step": 3328 }, { "epoch": 0.08933555173894375, "grad_norm": 0.5703125, "learning_rate": 0.0005358379176170669, "loss": 2.2625, "step": 3329 }, { "epoch": 0.0893623872906827, "grad_norm": 0.578125, "learning_rate": 0.0005359989266067354, "loss": 2.2848, "step": 3330 }, { "epoch": 0.08938922284242164, "grad_norm": 0.55859375, "learning_rate": 0.0005361599355964041, "loss": 2.2347, "step": 3331 }, { "epoch": 0.08941605839416059, "grad_norm": 0.5625, "learning_rate": 0.0005363209445860726, "loss": 2.2498, "step": 3332 }, { "epoch": 0.08944289394589952, "grad_norm": 0.5390625, "learning_rate": 0.0005364819535757413, "loss": 2.1825, "step": 3333 }, { "epoch": 0.08946972949763847, "grad_norm": 0.56640625, "learning_rate": 0.0005366429625654099, "loss": 2.3252, "step": 3334 }, { "epoch": 0.08949656504937742, "grad_norm": 0.55078125, "learning_rate": 0.0005368039715550784, "loss": 2.2241, "step": 3335 }, { "epoch": 0.08952340060111635, "grad_norm": 0.5859375, "learning_rate": 0.0005369649805447471, "loss": 2.2081, "step": 3336 }, { "epoch": 0.0895502361528553, "grad_norm": 0.52734375, "learning_rate": 0.0005371259895344156, "loss": 2.1628, "step": 3337 }, { "epoch": 0.08957707170459425, "grad_norm": 0.55859375, "learning_rate": 0.0005372869985240842, "loss": 2.1499, "step": 3338 }, { "epoch": 0.08960390725633319, "grad_norm": 0.5625, "learning_rate": 0.0005374480075137528, "loss": 2.2033, "step": 3339 }, { "epoch": 0.08963074280807214, "grad_norm": 0.5546875, "learning_rate": 0.0005376090165034213, "loss": 2.1467, "step": 3340 }, { "epoch": 0.08965757835981107, "grad_norm": 0.56640625, "learning_rate": 0.00053777002549309, "loss": 2.1931, "step": 3341 }, { "epoch": 0.08968441391155002, "grad_norm": 0.546875, "learning_rate": 0.0005379310344827586, "loss": 2.2194, "step": 3342 }, { "epoch": 0.08971124946328897, "grad_norm": 0.58984375, "learning_rate": 0.0005380920434724272, "loss": 2.2698, "step": 3343 }, { "epoch": 0.0897380850150279, "grad_norm": 0.5859375, "learning_rate": 0.0005382530524620958, "loss": 2.2954, "step": 3344 }, { "epoch": 0.08976492056676685, "grad_norm": 0.56640625, "learning_rate": 0.0005384140614517643, "loss": 2.2575, "step": 3345 }, { "epoch": 0.0897917561185058, "grad_norm": 0.5625, "learning_rate": 0.0005385750704414329, "loss": 2.2252, "step": 3346 }, { "epoch": 0.08981859167024474, "grad_norm": 0.56640625, "learning_rate": 0.0005387360794311015, "loss": 2.3006, "step": 3347 }, { "epoch": 0.08984542722198369, "grad_norm": 0.54296875, "learning_rate": 0.0005388970884207702, "loss": 2.1806, "step": 3348 }, { "epoch": 0.08987226277372262, "grad_norm": 0.54296875, "learning_rate": 0.0005390580974104387, "loss": 2.1291, "step": 3349 }, { "epoch": 0.08989909832546157, "grad_norm": 0.5546875, "learning_rate": 0.0005392191064001073, "loss": 2.2909, "step": 3350 }, { "epoch": 0.08992593387720052, "grad_norm": 0.55859375, "learning_rate": 0.0005393801153897759, "loss": 2.2059, "step": 3351 }, { "epoch": 0.08995276942893946, "grad_norm": 0.578125, "learning_rate": 0.0005395411243794445, "loss": 2.2141, "step": 3352 }, { "epoch": 0.0899796049806784, "grad_norm": 0.57421875, "learning_rate": 0.000539702133369113, "loss": 2.2079, "step": 3353 }, { "epoch": 0.09000644053241735, "grad_norm": 0.56640625, "learning_rate": 0.0005398631423587817, "loss": 2.2722, "step": 3354 }, { "epoch": 0.09003327608415629, "grad_norm": 0.5546875, "learning_rate": 0.0005400241513484502, "loss": 2.2255, "step": 3355 }, { "epoch": 0.09006011163589524, "grad_norm": 0.57421875, "learning_rate": 0.0005401851603381189, "loss": 2.287, "step": 3356 }, { "epoch": 0.09008694718763417, "grad_norm": 0.55078125, "learning_rate": 0.0005403461693277874, "loss": 2.2514, "step": 3357 }, { "epoch": 0.09011378273937312, "grad_norm": 0.5390625, "learning_rate": 0.000540507178317456, "loss": 2.1436, "step": 3358 }, { "epoch": 0.09014061829111207, "grad_norm": 0.5625, "learning_rate": 0.0005406681873071246, "loss": 2.2622, "step": 3359 }, { "epoch": 0.090167453842851, "grad_norm": 0.546875, "learning_rate": 0.0005408291962967932, "loss": 2.1482, "step": 3360 }, { "epoch": 0.09019428939458995, "grad_norm": 0.5625, "learning_rate": 0.0005409902052864617, "loss": 2.1248, "step": 3361 }, { "epoch": 0.09022112494632889, "grad_norm": 0.55859375, "learning_rate": 0.0005411512142761304, "loss": 2.105, "step": 3362 }, { "epoch": 0.09024796049806784, "grad_norm": 0.55078125, "learning_rate": 0.0005413122232657989, "loss": 2.1357, "step": 3363 }, { "epoch": 0.09027479604980679, "grad_norm": 0.5703125, "learning_rate": 0.0005414732322554676, "loss": 2.2595, "step": 3364 }, { "epoch": 0.09030163160154572, "grad_norm": 0.54296875, "learning_rate": 0.0005416342412451361, "loss": 2.1756, "step": 3365 }, { "epoch": 0.09032846715328467, "grad_norm": 0.5546875, "learning_rate": 0.0005417952502348047, "loss": 2.1805, "step": 3366 }, { "epoch": 0.09035530270502362, "grad_norm": 0.54296875, "learning_rate": 0.0005419562592244734, "loss": 2.1698, "step": 3367 }, { "epoch": 0.09038213825676256, "grad_norm": 0.53125, "learning_rate": 0.0005421172682141419, "loss": 2.0728, "step": 3368 }, { "epoch": 0.0904089738085015, "grad_norm": 0.56640625, "learning_rate": 0.0005422782772038105, "loss": 2.2055, "step": 3369 }, { "epoch": 0.09043580936024044, "grad_norm": 0.5703125, "learning_rate": 0.0005424392861934791, "loss": 2.3233, "step": 3370 }, { "epoch": 0.09046264491197939, "grad_norm": 0.5546875, "learning_rate": 0.0005426002951831476, "loss": 2.0426, "step": 3371 }, { "epoch": 0.09048948046371834, "grad_norm": 0.55859375, "learning_rate": 0.0005427613041728163, "loss": 2.2057, "step": 3372 }, { "epoch": 0.09051631601545727, "grad_norm": 0.5625, "learning_rate": 0.0005429223131624849, "loss": 2.2447, "step": 3373 }, { "epoch": 0.09054315156719622, "grad_norm": 0.55078125, "learning_rate": 0.0005430833221521535, "loss": 2.216, "step": 3374 }, { "epoch": 0.09056998711893517, "grad_norm": 0.546875, "learning_rate": 0.0005432443311418221, "loss": 2.1338, "step": 3375 }, { "epoch": 0.09059682267067411, "grad_norm": 0.5625, "learning_rate": 0.0005434053401314906, "loss": 2.1658, "step": 3376 }, { "epoch": 0.09062365822241306, "grad_norm": 0.5390625, "learning_rate": 0.0005435663491211592, "loss": 2.118, "step": 3377 }, { "epoch": 0.09065049377415199, "grad_norm": 0.56640625, "learning_rate": 0.0005437273581108278, "loss": 2.2139, "step": 3378 }, { "epoch": 0.09067732932589094, "grad_norm": 0.55859375, "learning_rate": 0.0005438883671004963, "loss": 2.2217, "step": 3379 }, { "epoch": 0.09070416487762989, "grad_norm": 0.5625, "learning_rate": 0.000544049376090165, "loss": 2.2667, "step": 3380 }, { "epoch": 0.09073100042936882, "grad_norm": 0.57421875, "learning_rate": 0.0005442103850798336, "loss": 2.1392, "step": 3381 }, { "epoch": 0.09075783598110777, "grad_norm": 0.53515625, "learning_rate": 0.0005443713940695022, "loss": 2.0493, "step": 3382 }, { "epoch": 0.09078467153284671, "grad_norm": 0.578125, "learning_rate": 0.0005445324030591708, "loss": 2.1952, "step": 3383 }, { "epoch": 0.09081150708458566, "grad_norm": 0.57421875, "learning_rate": 0.0005446934120488393, "loss": 2.2042, "step": 3384 }, { "epoch": 0.0908383426363246, "grad_norm": 0.5546875, "learning_rate": 0.0005448544210385079, "loss": 2.1862, "step": 3385 }, { "epoch": 0.09086517818806354, "grad_norm": 0.52734375, "learning_rate": 0.0005450154300281765, "loss": 2.11, "step": 3386 }, { "epoch": 0.09089201373980249, "grad_norm": 0.55859375, "learning_rate": 0.0005451764390178452, "loss": 2.2268, "step": 3387 }, { "epoch": 0.09091884929154144, "grad_norm": 0.54296875, "learning_rate": 0.0005453374480075137, "loss": 2.2414, "step": 3388 }, { "epoch": 0.09094568484328037, "grad_norm": 0.55078125, "learning_rate": 0.0005454984569971823, "loss": 2.2466, "step": 3389 }, { "epoch": 0.09097252039501932, "grad_norm": 0.54296875, "learning_rate": 0.0005456594659868509, "loss": 2.1452, "step": 3390 }, { "epoch": 0.09099935594675826, "grad_norm": 0.54296875, "learning_rate": 0.0005458204749765195, "loss": 2.1626, "step": 3391 }, { "epoch": 0.09102619149849721, "grad_norm": 0.609375, "learning_rate": 0.000545981483966188, "loss": 2.2054, "step": 3392 }, { "epoch": 0.09105302705023616, "grad_norm": 0.58203125, "learning_rate": 0.0005461424929558566, "loss": 2.2115, "step": 3393 }, { "epoch": 0.09107986260197509, "grad_norm": 0.60546875, "learning_rate": 0.0005463035019455252, "loss": 2.2704, "step": 3394 }, { "epoch": 0.09110669815371404, "grad_norm": 0.56640625, "learning_rate": 0.0005464645109351939, "loss": 2.2499, "step": 3395 }, { "epoch": 0.09113353370545299, "grad_norm": 0.55859375, "learning_rate": 0.0005466255199248624, "loss": 2.1834, "step": 3396 }, { "epoch": 0.09116036925719193, "grad_norm": 0.55078125, "learning_rate": 0.000546786528914531, "loss": 2.1642, "step": 3397 }, { "epoch": 0.09118720480893087, "grad_norm": 0.57421875, "learning_rate": 0.0005469475379041996, "loss": 2.3151, "step": 3398 }, { "epoch": 0.09121404036066981, "grad_norm": 0.55859375, "learning_rate": 0.0005471085468938681, "loss": 2.2731, "step": 3399 }, { "epoch": 0.09124087591240876, "grad_norm": 0.55859375, "learning_rate": 0.0005472695558835368, "loss": 2.2287, "step": 3400 }, { "epoch": 0.09126771146414771, "grad_norm": 0.54296875, "learning_rate": 0.0005474305648732054, "loss": 2.1035, "step": 3401 }, { "epoch": 0.09129454701588664, "grad_norm": 0.5546875, "learning_rate": 0.0005475915738628739, "loss": 2.1537, "step": 3402 }, { "epoch": 0.09132138256762559, "grad_norm": 0.5546875, "learning_rate": 0.0005477525828525426, "loss": 2.2146, "step": 3403 }, { "epoch": 0.09134821811936454, "grad_norm": 0.5625, "learning_rate": 0.0005479135918422111, "loss": 2.3061, "step": 3404 }, { "epoch": 0.09137505367110348, "grad_norm": 0.546875, "learning_rate": 0.0005480746008318798, "loss": 2.1907, "step": 3405 }, { "epoch": 0.09140188922284243, "grad_norm": 0.56640625, "learning_rate": 0.0005482356098215484, "loss": 2.2573, "step": 3406 }, { "epoch": 0.09142872477458136, "grad_norm": 0.55859375, "learning_rate": 0.0005483966188112169, "loss": 2.2109, "step": 3407 }, { "epoch": 0.09145556032632031, "grad_norm": 0.54296875, "learning_rate": 0.0005485576278008855, "loss": 2.1998, "step": 3408 }, { "epoch": 0.09148239587805926, "grad_norm": 0.5625, "learning_rate": 0.0005487186367905541, "loss": 2.2191, "step": 3409 }, { "epoch": 0.0915092314297982, "grad_norm": 0.55859375, "learning_rate": 0.0005488796457802226, "loss": 2.2538, "step": 3410 }, { "epoch": 0.09153606698153714, "grad_norm": 0.54296875, "learning_rate": 0.0005490406547698913, "loss": 2.1915, "step": 3411 }, { "epoch": 0.09156290253327608, "grad_norm": 0.5625, "learning_rate": 0.0005492016637595598, "loss": 2.3194, "step": 3412 }, { "epoch": 0.09158973808501503, "grad_norm": 0.55078125, "learning_rate": 0.0005493626727492285, "loss": 2.2942, "step": 3413 }, { "epoch": 0.09161657363675398, "grad_norm": 0.5546875, "learning_rate": 0.0005495236817388971, "loss": 2.2564, "step": 3414 }, { "epoch": 0.09164340918849291, "grad_norm": 0.5546875, "learning_rate": 0.0005496846907285656, "loss": 2.2422, "step": 3415 }, { "epoch": 0.09167024474023186, "grad_norm": 0.546875, "learning_rate": 0.0005498456997182342, "loss": 2.2246, "step": 3416 }, { "epoch": 0.09169708029197081, "grad_norm": 0.54296875, "learning_rate": 0.0005500067087079028, "loss": 2.1564, "step": 3417 }, { "epoch": 0.09172391584370974, "grad_norm": 0.55859375, "learning_rate": 0.0005501677176975714, "loss": 2.1404, "step": 3418 }, { "epoch": 0.0917507513954487, "grad_norm": 0.578125, "learning_rate": 0.00055032872668724, "loss": 2.3184, "step": 3419 }, { "epoch": 0.09177758694718763, "grad_norm": 0.53515625, "learning_rate": 0.0005504897356769086, "loss": 2.1618, "step": 3420 }, { "epoch": 0.09180442249892658, "grad_norm": 0.5625, "learning_rate": 0.0005506507446665772, "loss": 2.2744, "step": 3421 }, { "epoch": 0.09183125805066553, "grad_norm": 0.546875, "learning_rate": 0.0005508117536562458, "loss": 2.1916, "step": 3422 }, { "epoch": 0.09185809360240446, "grad_norm": 0.52734375, "learning_rate": 0.0005509727626459144, "loss": 2.1336, "step": 3423 }, { "epoch": 0.09188492915414341, "grad_norm": 0.53515625, "learning_rate": 0.0005511337716355829, "loss": 2.1848, "step": 3424 }, { "epoch": 0.09191176470588236, "grad_norm": 0.5390625, "learning_rate": 0.0005512947806252515, "loss": 2.2088, "step": 3425 }, { "epoch": 0.0919386002576213, "grad_norm": 0.546875, "learning_rate": 0.0005514557896149201, "loss": 2.2614, "step": 3426 }, { "epoch": 0.09196543580936024, "grad_norm": 0.5703125, "learning_rate": 0.0005516167986045887, "loss": 2.2951, "step": 3427 }, { "epoch": 0.09199227136109918, "grad_norm": 0.546875, "learning_rate": 0.0005517778075942574, "loss": 2.2495, "step": 3428 }, { "epoch": 0.09201910691283813, "grad_norm": 0.546875, "learning_rate": 0.0005519388165839259, "loss": 2.2663, "step": 3429 }, { "epoch": 0.09204594246457708, "grad_norm": 0.5625, "learning_rate": 0.0005520998255735944, "loss": 2.2943, "step": 3430 }, { "epoch": 0.09207277801631601, "grad_norm": 0.53125, "learning_rate": 0.0005522608345632631, "loss": 2.1536, "step": 3431 }, { "epoch": 0.09209961356805496, "grad_norm": 0.546875, "learning_rate": 0.0005524218435529316, "loss": 2.3029, "step": 3432 }, { "epoch": 0.0921264491197939, "grad_norm": 0.5546875, "learning_rate": 0.0005525828525426002, "loss": 2.2655, "step": 3433 }, { "epoch": 0.09215328467153285, "grad_norm": 0.53515625, "learning_rate": 0.0005527438615322689, "loss": 2.1689, "step": 3434 }, { "epoch": 0.0921801202232718, "grad_norm": 0.5390625, "learning_rate": 0.0005529048705219374, "loss": 2.1324, "step": 3435 }, { "epoch": 0.09220695577501073, "grad_norm": 0.53515625, "learning_rate": 0.0005530658795116061, "loss": 2.1298, "step": 3436 }, { "epoch": 0.09223379132674968, "grad_norm": 0.546875, "learning_rate": 0.0005532268885012746, "loss": 2.2277, "step": 3437 }, { "epoch": 0.09226062687848863, "grad_norm": 0.53515625, "learning_rate": 0.0005533878974909431, "loss": 2.2026, "step": 3438 }, { "epoch": 0.09228746243022756, "grad_norm": 0.52734375, "learning_rate": 0.0005535489064806118, "loss": 2.1777, "step": 3439 }, { "epoch": 0.09231429798196651, "grad_norm": 0.51953125, "learning_rate": 0.0005537099154702804, "loss": 2.2052, "step": 3440 }, { "epoch": 0.09234113353370545, "grad_norm": 0.55078125, "learning_rate": 0.000553870924459949, "loss": 2.1881, "step": 3441 }, { "epoch": 0.0923679690854444, "grad_norm": 0.54296875, "learning_rate": 0.0005540319334496176, "loss": 2.2595, "step": 3442 }, { "epoch": 0.09239480463718334, "grad_norm": 0.54296875, "learning_rate": 0.0005541929424392861, "loss": 2.286, "step": 3443 }, { "epoch": 0.09242164018892228, "grad_norm": 0.54296875, "learning_rate": 0.0005543539514289548, "loss": 2.2479, "step": 3444 }, { "epoch": 0.09244847574066123, "grad_norm": 0.5703125, "learning_rate": 0.0005545149604186233, "loss": 2.3279, "step": 3445 }, { "epoch": 0.09247531129240018, "grad_norm": 0.53125, "learning_rate": 0.0005546759694082918, "loss": 2.2888, "step": 3446 }, { "epoch": 0.09250214684413911, "grad_norm": 0.53515625, "learning_rate": 0.0005548369783979605, "loss": 2.2001, "step": 3447 }, { "epoch": 0.09252898239587806, "grad_norm": 0.546875, "learning_rate": 0.0005549979873876291, "loss": 2.1909, "step": 3448 }, { "epoch": 0.092555817947617, "grad_norm": 0.53125, "learning_rate": 0.0005551589963772977, "loss": 2.1782, "step": 3449 }, { "epoch": 0.09258265349935595, "grad_norm": 0.5390625, "learning_rate": 0.0005553200053669663, "loss": 2.2527, "step": 3450 }, { "epoch": 0.0926094890510949, "grad_norm": 0.54296875, "learning_rate": 0.0005554810143566348, "loss": 2.1388, "step": 3451 }, { "epoch": 0.09263632460283383, "grad_norm": 0.5546875, "learning_rate": 0.0005556420233463035, "loss": 2.1876, "step": 3452 }, { "epoch": 0.09266316015457278, "grad_norm": 0.54296875, "learning_rate": 0.0005558030323359721, "loss": 2.2872, "step": 3453 }, { "epoch": 0.09268999570631173, "grad_norm": 0.52734375, "learning_rate": 0.0005559640413256407, "loss": 2.1521, "step": 3454 }, { "epoch": 0.09271683125805066, "grad_norm": 0.53515625, "learning_rate": 0.0005561250503153092, "loss": 2.1241, "step": 3455 }, { "epoch": 0.09274366680978961, "grad_norm": 0.54296875, "learning_rate": 0.0005562860593049778, "loss": 2.2525, "step": 3456 }, { "epoch": 0.09277050236152855, "grad_norm": 0.54296875, "learning_rate": 0.0005564470682946464, "loss": 2.1617, "step": 3457 }, { "epoch": 0.0927973379132675, "grad_norm": 0.55859375, "learning_rate": 0.000556608077284315, "loss": 2.2402, "step": 3458 }, { "epoch": 0.09282417346500645, "grad_norm": 0.5390625, "learning_rate": 0.0005567690862739837, "loss": 2.1624, "step": 3459 }, { "epoch": 0.09285100901674538, "grad_norm": 0.5390625, "learning_rate": 0.0005569300952636522, "loss": 2.2247, "step": 3460 }, { "epoch": 0.09287784456848433, "grad_norm": 0.546875, "learning_rate": 0.0005570911042533207, "loss": 2.1844, "step": 3461 }, { "epoch": 0.09290468012022327, "grad_norm": 0.55078125, "learning_rate": 0.0005572521132429894, "loss": 2.1793, "step": 3462 }, { "epoch": 0.09293151567196221, "grad_norm": 0.52734375, "learning_rate": 0.0005574131222326579, "loss": 2.1373, "step": 3463 }, { "epoch": 0.09295835122370116, "grad_norm": 0.5546875, "learning_rate": 0.0005575741312223265, "loss": 2.3498, "step": 3464 }, { "epoch": 0.0929851867754401, "grad_norm": 0.546875, "learning_rate": 0.0005577351402119951, "loss": 2.1553, "step": 3465 }, { "epoch": 0.09301202232717905, "grad_norm": 0.53515625, "learning_rate": 0.0005578961492016637, "loss": 2.1138, "step": 3466 }, { "epoch": 0.093038857878918, "grad_norm": 0.546875, "learning_rate": 0.0005580571581913324, "loss": 2.1824, "step": 3467 }, { "epoch": 0.09306569343065693, "grad_norm": 0.52734375, "learning_rate": 0.0005582181671810009, "loss": 2.1317, "step": 3468 }, { "epoch": 0.09309252898239588, "grad_norm": 0.52734375, "learning_rate": 0.0005583791761706694, "loss": 2.2279, "step": 3469 }, { "epoch": 0.09311936453413482, "grad_norm": 0.51953125, "learning_rate": 0.0005585401851603381, "loss": 2.1346, "step": 3470 }, { "epoch": 0.09314620008587376, "grad_norm": 0.55078125, "learning_rate": 0.0005587011941500066, "loss": 2.1773, "step": 3471 }, { "epoch": 0.09317303563761271, "grad_norm": 0.546875, "learning_rate": 0.0005588622031396753, "loss": 2.1736, "step": 3472 }, { "epoch": 0.09319987118935165, "grad_norm": 0.55859375, "learning_rate": 0.0005590232121293439, "loss": 2.3085, "step": 3473 }, { "epoch": 0.0932267067410906, "grad_norm": 0.53515625, "learning_rate": 0.0005591842211190124, "loss": 2.1628, "step": 3474 }, { "epoch": 0.09325354229282955, "grad_norm": 0.5390625, "learning_rate": 0.0005593452301086811, "loss": 2.207, "step": 3475 }, { "epoch": 0.09328037784456848, "grad_norm": 0.5390625, "learning_rate": 0.0005595062390983496, "loss": 2.1231, "step": 3476 }, { "epoch": 0.09330721339630743, "grad_norm": 0.52734375, "learning_rate": 0.0005596672480880181, "loss": 2.1159, "step": 3477 }, { "epoch": 0.09333404894804637, "grad_norm": 0.51953125, "learning_rate": 0.0005598282570776868, "loss": 2.1501, "step": 3478 }, { "epoch": 0.09336088449978532, "grad_norm": 0.5390625, "learning_rate": 0.0005599892660673553, "loss": 2.1868, "step": 3479 }, { "epoch": 0.09338772005152426, "grad_norm": 0.546875, "learning_rate": 0.000560150275057024, "loss": 2.2169, "step": 3480 }, { "epoch": 0.0934145556032632, "grad_norm": 0.53125, "learning_rate": 0.0005603112840466926, "loss": 2.2225, "step": 3481 }, { "epoch": 0.09344139115500215, "grad_norm": 0.55078125, "learning_rate": 0.0005604722930363611, "loss": 2.2828, "step": 3482 }, { "epoch": 0.09346822670674108, "grad_norm": 0.54296875, "learning_rate": 0.0005606333020260298, "loss": 2.2186, "step": 3483 }, { "epoch": 0.09349506225848003, "grad_norm": 0.53515625, "learning_rate": 0.0005607943110156983, "loss": 2.1323, "step": 3484 }, { "epoch": 0.09352189781021898, "grad_norm": 0.51953125, "learning_rate": 0.0005609553200053668, "loss": 2.1667, "step": 3485 }, { "epoch": 0.09354873336195792, "grad_norm": 0.53515625, "learning_rate": 0.0005611163289950355, "loss": 2.1747, "step": 3486 }, { "epoch": 0.09357556891369687, "grad_norm": 0.5546875, "learning_rate": 0.0005612773379847041, "loss": 2.2173, "step": 3487 }, { "epoch": 0.09360240446543582, "grad_norm": 0.53515625, "learning_rate": 0.0005614383469743727, "loss": 2.264, "step": 3488 }, { "epoch": 0.09362924001717475, "grad_norm": 0.52734375, "learning_rate": 0.0005615993559640413, "loss": 1.9981, "step": 3489 }, { "epoch": 0.0936560755689137, "grad_norm": 0.55078125, "learning_rate": 0.0005617603649537098, "loss": 2.1915, "step": 3490 }, { "epoch": 0.09368291112065263, "grad_norm": 0.52734375, "learning_rate": 0.0005619213739433785, "loss": 2.1064, "step": 3491 }, { "epoch": 0.09370974667239158, "grad_norm": 0.51953125, "learning_rate": 0.000562082382933047, "loss": 2.0825, "step": 3492 }, { "epoch": 0.09373658222413053, "grad_norm": 0.5390625, "learning_rate": 0.0005622433919227157, "loss": 2.1776, "step": 3493 }, { "epoch": 0.09376341777586947, "grad_norm": 0.53515625, "learning_rate": 0.0005624044009123842, "loss": 2.1777, "step": 3494 }, { "epoch": 0.09379025332760842, "grad_norm": 0.53515625, "learning_rate": 0.0005625654099020528, "loss": 2.1841, "step": 3495 }, { "epoch": 0.09381708887934737, "grad_norm": 0.546875, "learning_rate": 0.0005627264188917214, "loss": 2.226, "step": 3496 }, { "epoch": 0.0938439244310863, "grad_norm": 0.5546875, "learning_rate": 0.00056288742788139, "loss": 2.2146, "step": 3497 }, { "epoch": 0.09387075998282525, "grad_norm": 0.51953125, "learning_rate": 0.0005630484368710586, "loss": 2.1647, "step": 3498 }, { "epoch": 0.09389759553456418, "grad_norm": 0.546875, "learning_rate": 0.0005632094458607272, "loss": 2.249, "step": 3499 }, { "epoch": 0.09392443108630313, "grad_norm": 0.546875, "learning_rate": 0.0005633704548503957, "loss": 2.1619, "step": 3500 }, { "epoch": 0.09395126663804208, "grad_norm": 0.54296875, "learning_rate": 0.0005635314638400644, "loss": 2.1835, "step": 3501 }, { "epoch": 0.09397810218978102, "grad_norm": 0.51171875, "learning_rate": 0.0005636924728297329, "loss": 2.0916, "step": 3502 }, { "epoch": 0.09400493774151997, "grad_norm": 0.5703125, "learning_rate": 0.0005638534818194016, "loss": 2.334, "step": 3503 }, { "epoch": 0.09403177329325892, "grad_norm": 0.5078125, "learning_rate": 0.0005640144908090701, "loss": 2.0402, "step": 3504 }, { "epoch": 0.09405860884499785, "grad_norm": 0.53515625, "learning_rate": 0.0005641754997987387, "loss": 2.2102, "step": 3505 }, { "epoch": 0.0940854443967368, "grad_norm": 0.5546875, "learning_rate": 0.0005643365087884074, "loss": 2.2687, "step": 3506 }, { "epoch": 0.09411227994847574, "grad_norm": 0.5390625, "learning_rate": 0.0005644975177780759, "loss": 2.0961, "step": 3507 }, { "epoch": 0.09413911550021468, "grad_norm": 0.53125, "learning_rate": 0.0005646585267677444, "loss": 2.1258, "step": 3508 }, { "epoch": 0.09416595105195363, "grad_norm": 0.53515625, "learning_rate": 0.0005648195357574131, "loss": 2.2418, "step": 3509 }, { "epoch": 0.09419278660369257, "grad_norm": 0.53515625, "learning_rate": 0.0005649805447470816, "loss": 2.1734, "step": 3510 }, { "epoch": 0.09421962215543152, "grad_norm": 0.55078125, "learning_rate": 0.0005651415537367503, "loss": 2.2234, "step": 3511 }, { "epoch": 0.09424645770717045, "grad_norm": 0.546875, "learning_rate": 0.0005653025627264189, "loss": 2.266, "step": 3512 }, { "epoch": 0.0942732932589094, "grad_norm": 0.5390625, "learning_rate": 0.0005654635717160874, "loss": 2.2123, "step": 3513 }, { "epoch": 0.09430012881064835, "grad_norm": 0.53125, "learning_rate": 0.0005656245807057561, "loss": 2.1883, "step": 3514 }, { "epoch": 0.09432696436238729, "grad_norm": 0.54296875, "learning_rate": 0.0005657855896954246, "loss": 2.1101, "step": 3515 }, { "epoch": 0.09435379991412624, "grad_norm": 0.53125, "learning_rate": 0.0005659465986850932, "loss": 2.161, "step": 3516 }, { "epoch": 0.09438063546586518, "grad_norm": 0.53515625, "learning_rate": 0.0005661076076747618, "loss": 2.2275, "step": 3517 }, { "epoch": 0.09440747101760412, "grad_norm": 0.54296875, "learning_rate": 0.0005662686166644303, "loss": 2.2558, "step": 3518 }, { "epoch": 0.09443430656934307, "grad_norm": 0.546875, "learning_rate": 0.000566429625654099, "loss": 2.2587, "step": 3519 }, { "epoch": 0.094461142121082, "grad_norm": 0.53515625, "learning_rate": 0.0005665906346437676, "loss": 2.3001, "step": 3520 }, { "epoch": 0.09448797767282095, "grad_norm": 0.52734375, "learning_rate": 0.0005667516436334361, "loss": 2.171, "step": 3521 }, { "epoch": 0.0945148132245599, "grad_norm": 0.54296875, "learning_rate": 0.0005669126526231048, "loss": 2.1944, "step": 3522 }, { "epoch": 0.09454164877629884, "grad_norm": 0.51953125, "learning_rate": 0.0005670736616127733, "loss": 2.1582, "step": 3523 }, { "epoch": 0.09456848432803779, "grad_norm": 0.54296875, "learning_rate": 0.0005672346706024419, "loss": 2.2445, "step": 3524 }, { "epoch": 0.09459531987977673, "grad_norm": 0.5234375, "learning_rate": 0.0005673956795921105, "loss": 2.1229, "step": 3525 }, { "epoch": 0.09462215543151567, "grad_norm": 0.546875, "learning_rate": 0.0005675566885817791, "loss": 2.1499, "step": 3526 }, { "epoch": 0.09464899098325462, "grad_norm": 0.53125, "learning_rate": 0.0005677176975714477, "loss": 2.1877, "step": 3527 }, { "epoch": 0.09467582653499355, "grad_norm": 0.54296875, "learning_rate": 0.0005678787065611163, "loss": 2.2154, "step": 3528 }, { "epoch": 0.0947026620867325, "grad_norm": 0.54296875, "learning_rate": 0.0005680397155507849, "loss": 2.0893, "step": 3529 }, { "epoch": 0.09472949763847145, "grad_norm": 0.5234375, "learning_rate": 0.0005682007245404535, "loss": 2.1625, "step": 3530 }, { "epoch": 0.09475633319021039, "grad_norm": 0.5546875, "learning_rate": 0.000568361733530122, "loss": 2.2856, "step": 3531 }, { "epoch": 0.09478316874194934, "grad_norm": 0.52734375, "learning_rate": 0.0005685227425197906, "loss": 2.1126, "step": 3532 }, { "epoch": 0.09481000429368827, "grad_norm": 0.5390625, "learning_rate": 0.0005686837515094592, "loss": 2.2064, "step": 3533 }, { "epoch": 0.09483683984542722, "grad_norm": 0.53125, "learning_rate": 0.0005688447604991279, "loss": 2.1012, "step": 3534 }, { "epoch": 0.09486367539716617, "grad_norm": 0.53515625, "learning_rate": 0.0005690057694887964, "loss": 2.2033, "step": 3535 }, { "epoch": 0.0948905109489051, "grad_norm": 0.54296875, "learning_rate": 0.000569166778478465, "loss": 2.2063, "step": 3536 }, { "epoch": 0.09491734650064405, "grad_norm": 0.51171875, "learning_rate": 0.0005693277874681336, "loss": 2.1603, "step": 3537 }, { "epoch": 0.094944182052383, "grad_norm": 0.53515625, "learning_rate": 0.0005694887964578022, "loss": 2.1967, "step": 3538 }, { "epoch": 0.09497101760412194, "grad_norm": 0.515625, "learning_rate": 0.0005696498054474707, "loss": 2.0923, "step": 3539 }, { "epoch": 0.09499785315586089, "grad_norm": 0.53125, "learning_rate": 0.0005698108144371394, "loss": 2.2888, "step": 3540 }, { "epoch": 0.09502468870759982, "grad_norm": 0.53125, "learning_rate": 0.0005699718234268079, "loss": 2.1284, "step": 3541 }, { "epoch": 0.09505152425933877, "grad_norm": 0.5234375, "learning_rate": 0.0005701328324164766, "loss": 2.169, "step": 3542 }, { "epoch": 0.09507835981107772, "grad_norm": 0.5234375, "learning_rate": 0.0005702938414061451, "loss": 2.1089, "step": 3543 }, { "epoch": 0.09510519536281666, "grad_norm": 0.55078125, "learning_rate": 0.0005704548503958137, "loss": 2.2412, "step": 3544 }, { "epoch": 0.0951320309145556, "grad_norm": 0.53125, "learning_rate": 0.0005706158593854824, "loss": 2.1468, "step": 3545 }, { "epoch": 0.09515886646629455, "grad_norm": 0.53515625, "learning_rate": 0.0005707768683751508, "loss": 2.1458, "step": 3546 }, { "epoch": 0.09518570201803349, "grad_norm": 0.55078125, "learning_rate": 0.0005709378773648195, "loss": 2.1534, "step": 3547 }, { "epoch": 0.09521253756977244, "grad_norm": 0.5234375, "learning_rate": 0.0005710988863544881, "loss": 2.1237, "step": 3548 }, { "epoch": 0.09523937312151137, "grad_norm": 0.52734375, "learning_rate": 0.0005712598953441566, "loss": 2.1122, "step": 3549 }, { "epoch": 0.09526620867325032, "grad_norm": 0.53125, "learning_rate": 0.0005714209043338253, "loss": 2.1333, "step": 3550 }, { "epoch": 0.09529304422498927, "grad_norm": 0.52734375, "learning_rate": 0.0005715819133234938, "loss": 2.0956, "step": 3551 }, { "epoch": 0.0953198797767282, "grad_norm": 0.546875, "learning_rate": 0.0005717429223131625, "loss": 2.1443, "step": 3552 }, { "epoch": 0.09534671532846715, "grad_norm": 0.53515625, "learning_rate": 0.0005719039313028311, "loss": 2.1777, "step": 3553 }, { "epoch": 0.0953735508802061, "grad_norm": 0.52734375, "learning_rate": 0.0005720649402924996, "loss": 2.162, "step": 3554 }, { "epoch": 0.09540038643194504, "grad_norm": 0.53125, "learning_rate": 0.0005722259492821682, "loss": 2.1428, "step": 3555 }, { "epoch": 0.09542722198368399, "grad_norm": 0.55078125, "learning_rate": 0.0005723869582718368, "loss": 2.2351, "step": 3556 }, { "epoch": 0.09545405753542292, "grad_norm": 0.53125, "learning_rate": 0.0005725479672615053, "loss": 2.2369, "step": 3557 }, { "epoch": 0.09548089308716187, "grad_norm": 0.515625, "learning_rate": 0.000572708976251174, "loss": 2.1521, "step": 3558 }, { "epoch": 0.09550772863890082, "grad_norm": 0.5234375, "learning_rate": 0.0005728699852408426, "loss": 2.145, "step": 3559 }, { "epoch": 0.09553456419063976, "grad_norm": 0.515625, "learning_rate": 0.0005730309942305112, "loss": 2.1236, "step": 3560 }, { "epoch": 0.0955613997423787, "grad_norm": 0.515625, "learning_rate": 0.0005731920032201798, "loss": 2.1208, "step": 3561 }, { "epoch": 0.09558823529411764, "grad_norm": 0.5234375, "learning_rate": 0.0005733530122098483, "loss": 2.1538, "step": 3562 }, { "epoch": 0.09561507084585659, "grad_norm": 0.546875, "learning_rate": 0.0005735140211995169, "loss": 2.189, "step": 3563 }, { "epoch": 0.09564190639759554, "grad_norm": 0.52734375, "learning_rate": 0.0005736750301891855, "loss": 2.1602, "step": 3564 }, { "epoch": 0.09566874194933447, "grad_norm": 0.51953125, "learning_rate": 0.000573836039178854, "loss": 2.14, "step": 3565 }, { "epoch": 0.09569557750107342, "grad_norm": 0.51171875, "learning_rate": 0.0005739970481685227, "loss": 2.0614, "step": 3566 }, { "epoch": 0.09572241305281237, "grad_norm": 0.53125, "learning_rate": 0.0005741580571581913, "loss": 2.1652, "step": 3567 }, { "epoch": 0.0957492486045513, "grad_norm": 0.53125, "learning_rate": 0.0005743190661478599, "loss": 2.1292, "step": 3568 }, { "epoch": 0.09577608415629026, "grad_norm": 0.53125, "learning_rate": 0.0005744800751375285, "loss": 2.1493, "step": 3569 }, { "epoch": 0.09580291970802919, "grad_norm": 0.5390625, "learning_rate": 0.000574641084127197, "loss": 2.1369, "step": 3570 }, { "epoch": 0.09582975525976814, "grad_norm": 0.53125, "learning_rate": 0.0005748020931168656, "loss": 2.1228, "step": 3571 }, { "epoch": 0.09585659081150709, "grad_norm": 0.546875, "learning_rate": 0.0005749631021065342, "loss": 2.2442, "step": 3572 }, { "epoch": 0.09588342636324602, "grad_norm": 0.53125, "learning_rate": 0.0005751241110962029, "loss": 2.1839, "step": 3573 }, { "epoch": 0.09591026191498497, "grad_norm": 0.52734375, "learning_rate": 0.0005752851200858714, "loss": 2.1231, "step": 3574 }, { "epoch": 0.09593709746672392, "grad_norm": 0.5859375, "learning_rate": 0.00057544612907554, "loss": 2.2402, "step": 3575 }, { "epoch": 0.09596393301846286, "grad_norm": 0.5859375, "learning_rate": 0.0005756071380652086, "loss": 2.2422, "step": 3576 }, { "epoch": 0.0959907685702018, "grad_norm": 0.55078125, "learning_rate": 0.0005757681470548771, "loss": 2.2934, "step": 3577 }, { "epoch": 0.09601760412194074, "grad_norm": 0.54296875, "learning_rate": 0.0005759291560445458, "loss": 2.3103, "step": 3578 }, { "epoch": 0.09604443967367969, "grad_norm": 0.5390625, "learning_rate": 0.0005760901650342144, "loss": 2.1657, "step": 3579 }, { "epoch": 0.09607127522541864, "grad_norm": 0.55078125, "learning_rate": 0.0005762511740238829, "loss": 2.2204, "step": 3580 }, { "epoch": 0.09609811077715757, "grad_norm": 0.55078125, "learning_rate": 0.0005764121830135516, "loss": 2.2492, "step": 3581 }, { "epoch": 0.09612494632889652, "grad_norm": 0.5390625, "learning_rate": 0.0005765731920032201, "loss": 2.1856, "step": 3582 }, { "epoch": 0.09615178188063546, "grad_norm": 0.53515625, "learning_rate": 0.0005767342009928888, "loss": 2.1961, "step": 3583 }, { "epoch": 0.09617861743237441, "grad_norm": 0.53125, "learning_rate": 0.0005768952099825573, "loss": 2.172, "step": 3584 }, { "epoch": 0.09620545298411336, "grad_norm": 0.54296875, "learning_rate": 0.0005770562189722258, "loss": 2.1644, "step": 3585 }, { "epoch": 0.09623228853585229, "grad_norm": 0.5625, "learning_rate": 0.0005772172279618945, "loss": 2.2844, "step": 3586 }, { "epoch": 0.09625912408759124, "grad_norm": 0.52734375, "learning_rate": 0.0005773782369515631, "loss": 2.1508, "step": 3587 }, { "epoch": 0.09628595963933019, "grad_norm": 0.515625, "learning_rate": 0.0005775392459412316, "loss": 2.1701, "step": 3588 }, { "epoch": 0.09631279519106913, "grad_norm": 0.5390625, "learning_rate": 0.0005777002549309003, "loss": 2.2559, "step": 3589 }, { "epoch": 0.09633963074280807, "grad_norm": 0.55859375, "learning_rate": 0.0005778612639205688, "loss": 2.3054, "step": 3590 }, { "epoch": 0.09636646629454701, "grad_norm": 0.52734375, "learning_rate": 0.0005780222729102375, "loss": 2.1693, "step": 3591 }, { "epoch": 0.09639330184628596, "grad_norm": 0.51953125, "learning_rate": 0.0005781832818999061, "loss": 2.1559, "step": 3592 }, { "epoch": 0.09642013739802491, "grad_norm": 0.53515625, "learning_rate": 0.0005783442908895746, "loss": 2.1892, "step": 3593 }, { "epoch": 0.09644697294976384, "grad_norm": 0.5234375, "learning_rate": 0.0005785052998792432, "loss": 2.1577, "step": 3594 }, { "epoch": 0.09647380850150279, "grad_norm": 0.53515625, "learning_rate": 0.0005786663088689118, "loss": 2.1884, "step": 3595 }, { "epoch": 0.09650064405324174, "grad_norm": 0.52734375, "learning_rate": 0.0005788273178585804, "loss": 2.2058, "step": 3596 }, { "epoch": 0.09652747960498068, "grad_norm": 0.52734375, "learning_rate": 0.000578988326848249, "loss": 2.1796, "step": 3597 }, { "epoch": 0.09655431515671963, "grad_norm": 0.5390625, "learning_rate": 0.0005791493358379176, "loss": 2.2166, "step": 3598 }, { "epoch": 0.09658115070845856, "grad_norm": 0.5390625, "learning_rate": 0.0005793103448275862, "loss": 2.2438, "step": 3599 }, { "epoch": 0.09660798626019751, "grad_norm": 0.53515625, "learning_rate": 0.0005794713538172547, "loss": 2.2162, "step": 3600 }, { "epoch": 0.09663482181193646, "grad_norm": 0.54296875, "learning_rate": 0.0005796323628069233, "loss": 2.2224, "step": 3601 }, { "epoch": 0.0966616573636754, "grad_norm": 0.53125, "learning_rate": 0.0005797933717965919, "loss": 2.2274, "step": 3602 }, { "epoch": 0.09668849291541434, "grad_norm": 0.546875, "learning_rate": 0.0005799543807862605, "loss": 2.2274, "step": 3603 }, { "epoch": 0.09671532846715329, "grad_norm": 0.53515625, "learning_rate": 0.0005801153897759291, "loss": 2.1092, "step": 3604 }, { "epoch": 0.09674216401889223, "grad_norm": 0.53515625, "learning_rate": 0.0005802763987655977, "loss": 2.3201, "step": 3605 }, { "epoch": 0.09676899957063118, "grad_norm": 0.53515625, "learning_rate": 0.0005804374077552663, "loss": 2.1658, "step": 3606 }, { "epoch": 0.09679583512237011, "grad_norm": 0.5234375, "learning_rate": 0.0005805984167449349, "loss": 2.1811, "step": 3607 }, { "epoch": 0.09682267067410906, "grad_norm": 0.53125, "learning_rate": 0.0005807594257346034, "loss": 2.1504, "step": 3608 }, { "epoch": 0.09684950622584801, "grad_norm": 0.5546875, "learning_rate": 0.0005809204347242721, "loss": 2.3398, "step": 3609 }, { "epoch": 0.09687634177758694, "grad_norm": 0.52734375, "learning_rate": 0.0005810814437139406, "loss": 2.2218, "step": 3610 }, { "epoch": 0.09690317732932589, "grad_norm": 0.51953125, "learning_rate": 0.0005812424527036092, "loss": 2.1089, "step": 3611 }, { "epoch": 0.09693001288106483, "grad_norm": 0.52734375, "learning_rate": 0.0005814034616932779, "loss": 2.2151, "step": 3612 }, { "epoch": 0.09695684843280378, "grad_norm": 0.5078125, "learning_rate": 0.0005815644706829464, "loss": 2.0928, "step": 3613 }, { "epoch": 0.09698368398454273, "grad_norm": 0.52734375, "learning_rate": 0.0005817254796726151, "loss": 2.1915, "step": 3614 }, { "epoch": 0.09701051953628166, "grad_norm": 0.53125, "learning_rate": 0.0005818864886622836, "loss": 2.2296, "step": 3615 }, { "epoch": 0.09703735508802061, "grad_norm": 0.53125, "learning_rate": 0.0005820474976519521, "loss": 2.2383, "step": 3616 }, { "epoch": 0.09706419063975956, "grad_norm": 0.52734375, "learning_rate": 0.0005822085066416208, "loss": 2.2028, "step": 3617 }, { "epoch": 0.0970910261914985, "grad_norm": 0.546875, "learning_rate": 0.0005823695156312893, "loss": 2.2224, "step": 3618 }, { "epoch": 0.09711786174323744, "grad_norm": 0.515625, "learning_rate": 0.0005825305246209579, "loss": 2.1577, "step": 3619 }, { "epoch": 0.09714469729497638, "grad_norm": 0.53515625, "learning_rate": 0.0005826915336106266, "loss": 2.1689, "step": 3620 }, { "epoch": 0.09717153284671533, "grad_norm": 0.515625, "learning_rate": 0.0005828525426002951, "loss": 2.1448, "step": 3621 }, { "epoch": 0.09719836839845428, "grad_norm": 0.53125, "learning_rate": 0.0005830135515899638, "loss": 2.2278, "step": 3622 }, { "epoch": 0.09722520395019321, "grad_norm": 0.52734375, "learning_rate": 0.0005831745605796323, "loss": 2.1179, "step": 3623 }, { "epoch": 0.09725203950193216, "grad_norm": 0.52734375, "learning_rate": 0.0005833355695693008, "loss": 2.2271, "step": 3624 }, { "epoch": 0.09727887505367111, "grad_norm": 0.53125, "learning_rate": 0.0005834965785589695, "loss": 2.1289, "step": 3625 }, { "epoch": 0.09730571060541005, "grad_norm": 0.52734375, "learning_rate": 0.0005836575875486381, "loss": 2.2029, "step": 3626 }, { "epoch": 0.097332546157149, "grad_norm": 0.51171875, "learning_rate": 0.0005838185965383067, "loss": 2.1505, "step": 3627 }, { "epoch": 0.09735938170888793, "grad_norm": 0.54296875, "learning_rate": 0.0005839796055279753, "loss": 2.2017, "step": 3628 }, { "epoch": 0.09738621726062688, "grad_norm": 0.5390625, "learning_rate": 0.0005841406145176438, "loss": 2.239, "step": 3629 }, { "epoch": 0.09741305281236583, "grad_norm": 0.53125, "learning_rate": 0.0005843016235073125, "loss": 2.2616, "step": 3630 }, { "epoch": 0.09743988836410476, "grad_norm": 0.54296875, "learning_rate": 0.000584462632496981, "loss": 2.2477, "step": 3631 }, { "epoch": 0.09746672391584371, "grad_norm": 0.51953125, "learning_rate": 0.0005846236414866497, "loss": 2.1418, "step": 3632 }, { "epoch": 0.09749355946758265, "grad_norm": 0.51953125, "learning_rate": 0.0005847846504763182, "loss": 2.0803, "step": 3633 }, { "epoch": 0.0975203950193216, "grad_norm": 0.53125, "learning_rate": 0.0005849456594659868, "loss": 2.2192, "step": 3634 }, { "epoch": 0.09754723057106054, "grad_norm": 0.5234375, "learning_rate": 0.0005851066684556554, "loss": 2.2607, "step": 3635 }, { "epoch": 0.09757406612279948, "grad_norm": 0.51953125, "learning_rate": 0.000585267677445324, "loss": 2.1457, "step": 3636 }, { "epoch": 0.09760090167453843, "grad_norm": 0.5234375, "learning_rate": 0.0005854286864349925, "loss": 2.1204, "step": 3637 }, { "epoch": 0.09762773722627738, "grad_norm": 0.55078125, "learning_rate": 0.0005855896954246612, "loss": 2.2358, "step": 3638 }, { "epoch": 0.09765457277801631, "grad_norm": 0.53515625, "learning_rate": 0.0005857507044143297, "loss": 2.2132, "step": 3639 }, { "epoch": 0.09768140832975526, "grad_norm": 0.53125, "learning_rate": 0.0005859117134039984, "loss": 2.3105, "step": 3640 }, { "epoch": 0.0977082438814942, "grad_norm": 0.52734375, "learning_rate": 0.0005860727223936669, "loss": 2.1888, "step": 3641 }, { "epoch": 0.09773507943323315, "grad_norm": 0.5234375, "learning_rate": 0.0005862337313833355, "loss": 2.2034, "step": 3642 }, { "epoch": 0.0977619149849721, "grad_norm": 0.52734375, "learning_rate": 0.0005863947403730041, "loss": 2.2809, "step": 3643 }, { "epoch": 0.09778875053671103, "grad_norm": 0.5078125, "learning_rate": 0.0005865557493626727, "loss": 2.1905, "step": 3644 }, { "epoch": 0.09781558608844998, "grad_norm": 0.52734375, "learning_rate": 0.0005867167583523414, "loss": 2.2069, "step": 3645 }, { "epoch": 0.09784242164018893, "grad_norm": 0.54296875, "learning_rate": 0.0005868777673420099, "loss": 2.3039, "step": 3646 }, { "epoch": 0.09786925719192786, "grad_norm": 0.53515625, "learning_rate": 0.0005870387763316784, "loss": 2.2607, "step": 3647 }, { "epoch": 0.09789609274366681, "grad_norm": 0.515625, "learning_rate": 0.0005871997853213471, "loss": 2.2295, "step": 3648 }, { "epoch": 0.09792292829540575, "grad_norm": 0.53125, "learning_rate": 0.0005873607943110156, "loss": 2.2491, "step": 3649 }, { "epoch": 0.0979497638471447, "grad_norm": 0.51171875, "learning_rate": 0.0005875218033006842, "loss": 2.168, "step": 3650 }, { "epoch": 0.09797659939888365, "grad_norm": 0.515625, "learning_rate": 0.0005876828122903528, "loss": 2.1485, "step": 3651 }, { "epoch": 0.09800343495062258, "grad_norm": 0.5390625, "learning_rate": 0.0005878438212800214, "loss": 2.1749, "step": 3652 }, { "epoch": 0.09803027050236153, "grad_norm": 0.51171875, "learning_rate": 0.0005880048302696901, "loss": 2.1643, "step": 3653 }, { "epoch": 0.09805710605410048, "grad_norm": 0.54296875, "learning_rate": 0.0005881658392593586, "loss": 2.2576, "step": 3654 }, { "epoch": 0.09808394160583941, "grad_norm": 0.55078125, "learning_rate": 0.0005883268482490271, "loss": 2.19, "step": 3655 }, { "epoch": 0.09811077715757836, "grad_norm": 0.515625, "learning_rate": 0.0005884878572386958, "loss": 2.0953, "step": 3656 }, { "epoch": 0.0981376127093173, "grad_norm": 0.546875, "learning_rate": 0.0005886488662283643, "loss": 2.2274, "step": 3657 }, { "epoch": 0.09816444826105625, "grad_norm": 0.53125, "learning_rate": 0.000588809875218033, "loss": 2.2435, "step": 3658 }, { "epoch": 0.0981912838127952, "grad_norm": 0.51953125, "learning_rate": 0.0005889708842077016, "loss": 2.1927, "step": 3659 }, { "epoch": 0.09821811936453413, "grad_norm": 0.5234375, "learning_rate": 0.0005891318931973701, "loss": 2.1703, "step": 3660 }, { "epoch": 0.09824495491627308, "grad_norm": 0.50390625, "learning_rate": 0.0005892929021870388, "loss": 2.096, "step": 3661 }, { "epoch": 0.09827179046801202, "grad_norm": 0.5078125, "learning_rate": 0.0005894539111767073, "loss": 2.1186, "step": 3662 }, { "epoch": 0.09829862601975096, "grad_norm": 0.5078125, "learning_rate": 0.0005896149201663758, "loss": 2.1278, "step": 3663 }, { "epoch": 0.09832546157148991, "grad_norm": 0.51953125, "learning_rate": 0.0005897759291560445, "loss": 2.1484, "step": 3664 }, { "epoch": 0.09835229712322885, "grad_norm": 0.5234375, "learning_rate": 0.0005899369381457131, "loss": 2.1343, "step": 3665 }, { "epoch": 0.0983791326749678, "grad_norm": 0.53125, "learning_rate": 0.0005900979471353817, "loss": 2.1394, "step": 3666 }, { "epoch": 0.09840596822670675, "grad_norm": 0.52734375, "learning_rate": 0.0005902589561250503, "loss": 2.229, "step": 3667 }, { "epoch": 0.09843280377844568, "grad_norm": 0.51171875, "learning_rate": 0.0005904199651147188, "loss": 2.1778, "step": 3668 }, { "epoch": 0.09845963933018463, "grad_norm": 0.5078125, "learning_rate": 0.0005905809741043875, "loss": 2.1351, "step": 3669 }, { "epoch": 0.09848647488192357, "grad_norm": 0.53515625, "learning_rate": 0.000590741983094056, "loss": 2.1593, "step": 3670 }, { "epoch": 0.09851331043366252, "grad_norm": 0.51171875, "learning_rate": 0.0005909029920837246, "loss": 2.1674, "step": 3671 }, { "epoch": 0.09854014598540146, "grad_norm": 0.52734375, "learning_rate": 0.0005910640010733932, "loss": 2.2052, "step": 3672 }, { "epoch": 0.0985669815371404, "grad_norm": 0.515625, "learning_rate": 0.0005912250100630618, "loss": 2.0679, "step": 3673 }, { "epoch": 0.09859381708887935, "grad_norm": 0.546875, "learning_rate": 0.0005913860190527304, "loss": 2.3083, "step": 3674 }, { "epoch": 0.0986206526406183, "grad_norm": 0.546875, "learning_rate": 0.000591547028042399, "loss": 2.1979, "step": 3675 }, { "epoch": 0.09864748819235723, "grad_norm": 0.51171875, "learning_rate": 0.0005917080370320676, "loss": 2.1054, "step": 3676 }, { "epoch": 0.09867432374409618, "grad_norm": 0.51171875, "learning_rate": 0.0005918690460217362, "loss": 2.2676, "step": 3677 }, { "epoch": 0.09870115929583512, "grad_norm": 0.53125, "learning_rate": 0.0005920300550114047, "loss": 2.187, "step": 3678 }, { "epoch": 0.09872799484757407, "grad_norm": 0.53125, "learning_rate": 0.0005921910640010734, "loss": 2.1946, "step": 3679 }, { "epoch": 0.09875483039931301, "grad_norm": 0.53125, "learning_rate": 0.0005923520729907419, "loss": 2.1503, "step": 3680 }, { "epoch": 0.09878166595105195, "grad_norm": 0.515625, "learning_rate": 0.0005925130819804105, "loss": 2.1587, "step": 3681 }, { "epoch": 0.0988085015027909, "grad_norm": 0.5234375, "learning_rate": 0.0005926740909700791, "loss": 2.1656, "step": 3682 }, { "epoch": 0.09883533705452983, "grad_norm": 0.51953125, "learning_rate": 0.0005928350999597477, "loss": 2.1378, "step": 3683 }, { "epoch": 0.09886217260626878, "grad_norm": 0.51953125, "learning_rate": 0.0005929961089494164, "loss": 2.0977, "step": 3684 }, { "epoch": 0.09888900815800773, "grad_norm": 0.51171875, "learning_rate": 0.0005931571179390848, "loss": 2.1396, "step": 3685 }, { "epoch": 0.09891584370974667, "grad_norm": 0.5234375, "learning_rate": 0.0005933181269287534, "loss": 2.1991, "step": 3686 }, { "epoch": 0.09894267926148562, "grad_norm": 0.5078125, "learning_rate": 0.0005934791359184221, "loss": 2.1558, "step": 3687 }, { "epoch": 0.09896951481322457, "grad_norm": 0.5390625, "learning_rate": 0.0005936401449080906, "loss": 2.2264, "step": 3688 }, { "epoch": 0.0989963503649635, "grad_norm": 0.51953125, "learning_rate": 0.0005938011538977593, "loss": 2.252, "step": 3689 }, { "epoch": 0.09902318591670245, "grad_norm": 0.5234375, "learning_rate": 0.0005939621628874278, "loss": 2.0916, "step": 3690 }, { "epoch": 0.09905002146844138, "grad_norm": 0.5234375, "learning_rate": 0.0005941231718770964, "loss": 2.2344, "step": 3691 }, { "epoch": 0.09907685702018033, "grad_norm": 0.53125, "learning_rate": 0.0005942841808667651, "loss": 2.219, "step": 3692 }, { "epoch": 0.09910369257191928, "grad_norm": 0.53515625, "learning_rate": 0.0005944451898564336, "loss": 2.2031, "step": 3693 }, { "epoch": 0.09913052812365822, "grad_norm": 0.51953125, "learning_rate": 0.0005946061988461021, "loss": 2.1956, "step": 3694 }, { "epoch": 0.09915736367539717, "grad_norm": 0.51171875, "learning_rate": 0.0005947672078357708, "loss": 2.1887, "step": 3695 }, { "epoch": 0.09918419922713612, "grad_norm": 0.52734375, "learning_rate": 0.0005949282168254393, "loss": 2.2356, "step": 3696 }, { "epoch": 0.09921103477887505, "grad_norm": 0.51171875, "learning_rate": 0.000595089225815108, "loss": 2.1286, "step": 3697 }, { "epoch": 0.099237870330614, "grad_norm": 0.51953125, "learning_rate": 0.0005952502348047766, "loss": 2.269, "step": 3698 }, { "epoch": 0.09926470588235294, "grad_norm": 0.52734375, "learning_rate": 0.0005954112437944451, "loss": 2.2014, "step": 3699 }, { "epoch": 0.09929154143409188, "grad_norm": 0.51953125, "learning_rate": 0.0005955722527841138, "loss": 2.1539, "step": 3700 }, { "epoch": 0.09931837698583083, "grad_norm": 0.50390625, "learning_rate": 0.0005957332617737823, "loss": 2.1487, "step": 3701 }, { "epoch": 0.09934521253756977, "grad_norm": 0.53515625, "learning_rate": 0.0005958942707634509, "loss": 2.1609, "step": 3702 }, { "epoch": 0.09937204808930872, "grad_norm": 0.53125, "learning_rate": 0.0005960552797531195, "loss": 2.2064, "step": 3703 }, { "epoch": 0.09939888364104767, "grad_norm": 0.5234375, "learning_rate": 0.000596216288742788, "loss": 2.1381, "step": 3704 }, { "epoch": 0.0994257191927866, "grad_norm": 0.515625, "learning_rate": 0.0005963772977324567, "loss": 2.1838, "step": 3705 }, { "epoch": 0.09945255474452555, "grad_norm": 0.52734375, "learning_rate": 0.0005965383067221253, "loss": 2.158, "step": 3706 }, { "epoch": 0.09947939029626449, "grad_norm": 0.53125, "learning_rate": 0.0005966993157117939, "loss": 2.2192, "step": 3707 }, { "epoch": 0.09950622584800344, "grad_norm": 0.5234375, "learning_rate": 0.0005968603247014625, "loss": 2.2115, "step": 3708 }, { "epoch": 0.09953306139974238, "grad_norm": 0.5390625, "learning_rate": 0.000597021333691131, "loss": 2.1136, "step": 3709 }, { "epoch": 0.09955989695148132, "grad_norm": 0.5234375, "learning_rate": 0.0005971823426807996, "loss": 2.1859, "step": 3710 }, { "epoch": 0.09958673250322027, "grad_norm": 0.51953125, "learning_rate": 0.0005973433516704682, "loss": 2.1969, "step": 3711 }, { "epoch": 0.0996135680549592, "grad_norm": 0.53515625, "learning_rate": 0.0005975043606601369, "loss": 2.3047, "step": 3712 }, { "epoch": 0.09964040360669815, "grad_norm": 0.53515625, "learning_rate": 0.0005976653696498054, "loss": 2.2136, "step": 3713 }, { "epoch": 0.0996672391584371, "grad_norm": 0.53125, "learning_rate": 0.000597826378639474, "loss": 2.2022, "step": 3714 }, { "epoch": 0.09969407471017604, "grad_norm": 0.515625, "learning_rate": 0.0005979873876291426, "loss": 2.0954, "step": 3715 }, { "epoch": 0.09972091026191499, "grad_norm": 0.5234375, "learning_rate": 0.0005981483966188111, "loss": 2.1341, "step": 3716 }, { "epoch": 0.09974774581365393, "grad_norm": 0.5234375, "learning_rate": 0.0005983094056084797, "loss": 2.1876, "step": 3717 }, { "epoch": 0.09977458136539287, "grad_norm": 0.51953125, "learning_rate": 0.0005984704145981484, "loss": 2.0878, "step": 3718 }, { "epoch": 0.09980141691713182, "grad_norm": 0.515625, "learning_rate": 0.0005986314235878169, "loss": 2.0896, "step": 3719 }, { "epoch": 0.09982825246887075, "grad_norm": 0.52734375, "learning_rate": 0.0005987924325774856, "loss": 2.1923, "step": 3720 }, { "epoch": 0.0998550880206097, "grad_norm": 0.53125, "learning_rate": 0.0005989534415671541, "loss": 2.192, "step": 3721 }, { "epoch": 0.09988192357234865, "grad_norm": 0.5234375, "learning_rate": 0.0005991144505568227, "loss": 2.1892, "step": 3722 }, { "epoch": 0.09990875912408759, "grad_norm": 0.515625, "learning_rate": 0.0005992754595464913, "loss": 2.1825, "step": 3723 }, { "epoch": 0.09993559467582654, "grad_norm": 0.53125, "learning_rate": 0.0005994364685361598, "loss": 2.2069, "step": 3724 }, { "epoch": 0.09996243022756549, "grad_norm": 0.51953125, "learning_rate": 0.0005995974775258284, "loss": 2.2214, "step": 3725 }, { "epoch": 0.09998926577930442, "grad_norm": 0.5234375, "learning_rate": 0.0005997584865154971, "loss": 2.2525, "step": 3726 }, { "epoch": 0.10001610133104337, "grad_norm": 0.49609375, "learning_rate": 0.0005999194955051656, "loss": 2.0237, "step": 3727 }, { "epoch": 0.1000429368827823, "grad_norm": 0.50390625, "learning_rate": 0.0006000805044948343, "loss": 2.071, "step": 3728 }, { "epoch": 0.10006977243452125, "grad_norm": 0.515625, "learning_rate": 0.0006002415134845028, "loss": 2.2185, "step": 3729 }, { "epoch": 0.1000966079862602, "grad_norm": 0.50390625, "learning_rate": 0.0006004025224741714, "loss": 2.1238, "step": 3730 }, { "epoch": 0.10012344353799914, "grad_norm": 0.50390625, "learning_rate": 0.0006005635314638401, "loss": 2.0317, "step": 3731 }, { "epoch": 0.10015027908973809, "grad_norm": 0.52734375, "learning_rate": 0.0006007245404535086, "loss": 2.1118, "step": 3732 }, { "epoch": 0.10017711464147702, "grad_norm": 0.5234375, "learning_rate": 0.0006008855494431773, "loss": 2.1759, "step": 3733 }, { "epoch": 0.10020395019321597, "grad_norm": 0.5390625, "learning_rate": 0.0006010465584328458, "loss": 2.2237, "step": 3734 }, { "epoch": 0.10023078574495492, "grad_norm": 0.5234375, "learning_rate": 0.0006012075674225144, "loss": 2.192, "step": 3735 }, { "epoch": 0.10025762129669386, "grad_norm": 0.51171875, "learning_rate": 0.0006013685764121829, "loss": 2.1343, "step": 3736 }, { "epoch": 0.1002844568484328, "grad_norm": 0.5078125, "learning_rate": 0.0006015295854018515, "loss": 2.1666, "step": 3737 }, { "epoch": 0.10031129240017175, "grad_norm": 0.53125, "learning_rate": 0.00060169059439152, "loss": 2.2405, "step": 3738 }, { "epoch": 0.10033812795191069, "grad_norm": 0.53125, "learning_rate": 0.0006018516033811887, "loss": 2.0864, "step": 3739 }, { "epoch": 0.10036496350364964, "grad_norm": 0.53125, "learning_rate": 0.0006020126123708573, "loss": 2.1853, "step": 3740 }, { "epoch": 0.10039179905538857, "grad_norm": 0.5234375, "learning_rate": 0.0006021736213605259, "loss": 2.1765, "step": 3741 }, { "epoch": 0.10041863460712752, "grad_norm": 0.5234375, "learning_rate": 0.0006023346303501945, "loss": 2.1161, "step": 3742 }, { "epoch": 0.10044547015886647, "grad_norm": 0.54296875, "learning_rate": 0.000602495639339863, "loss": 2.1876, "step": 3743 }, { "epoch": 0.1004723057106054, "grad_norm": 0.51171875, "learning_rate": 0.0006026566483295317, "loss": 2.0802, "step": 3744 }, { "epoch": 0.10049914126234435, "grad_norm": 0.515625, "learning_rate": 0.0006028176573192003, "loss": 2.1458, "step": 3745 }, { "epoch": 0.1005259768140833, "grad_norm": 0.55078125, "learning_rate": 0.0006029786663088689, "loss": 2.2513, "step": 3746 }, { "epoch": 0.10055281236582224, "grad_norm": 0.51953125, "learning_rate": 0.0006031396752985375, "loss": 2.1648, "step": 3747 }, { "epoch": 0.10057964791756119, "grad_norm": 0.53515625, "learning_rate": 0.000603300684288206, "loss": 2.1852, "step": 3748 }, { "epoch": 0.10060648346930012, "grad_norm": 0.53125, "learning_rate": 0.0006034616932778747, "loss": 2.1723, "step": 3749 }, { "epoch": 0.10063331902103907, "grad_norm": 0.53515625, "learning_rate": 0.0006036227022675433, "loss": 2.2696, "step": 3750 }, { "epoch": 0.10066015457277802, "grad_norm": 0.5234375, "learning_rate": 0.0006037837112572118, "loss": 2.1674, "step": 3751 }, { "epoch": 0.10068699012451696, "grad_norm": 0.5078125, "learning_rate": 0.0006039447202468804, "loss": 2.1996, "step": 3752 }, { "epoch": 0.1007138256762559, "grad_norm": 0.546875, "learning_rate": 0.0006041057292365489, "loss": 2.2914, "step": 3753 }, { "epoch": 0.10074066122799485, "grad_norm": 0.515625, "learning_rate": 0.0006042667382262176, "loss": 2.1903, "step": 3754 }, { "epoch": 0.10076749677973379, "grad_norm": 0.51171875, "learning_rate": 0.0006044277472158861, "loss": 2.1276, "step": 3755 }, { "epoch": 0.10079433233147274, "grad_norm": 0.53125, "learning_rate": 0.0006045887562055548, "loss": 2.1094, "step": 3756 }, { "epoch": 0.10082116788321167, "grad_norm": 0.51953125, "learning_rate": 0.0006047497651952233, "loss": 2.1148, "step": 3757 }, { "epoch": 0.10084800343495062, "grad_norm": 0.515625, "learning_rate": 0.0006049107741848919, "loss": 2.1333, "step": 3758 }, { "epoch": 0.10087483898668957, "grad_norm": 0.61328125, "learning_rate": 0.0006050717831745606, "loss": 2.3041, "step": 3759 }, { "epoch": 0.1009016745384285, "grad_norm": 0.5546875, "learning_rate": 0.0006052327921642291, "loss": 2.3181, "step": 3760 }, { "epoch": 0.10092851009016746, "grad_norm": 0.515625, "learning_rate": 0.0006053938011538977, "loss": 2.1112, "step": 3761 }, { "epoch": 0.10095534564190639, "grad_norm": 0.5546875, "learning_rate": 0.0006055548101435663, "loss": 2.1694, "step": 3762 }, { "epoch": 0.10098218119364534, "grad_norm": 0.52734375, "learning_rate": 0.0006057158191332349, "loss": 2.1769, "step": 3763 }, { "epoch": 0.10100901674538429, "grad_norm": 0.53515625, "learning_rate": 0.0006058768281229036, "loss": 2.2276, "step": 3764 }, { "epoch": 0.10103585229712322, "grad_norm": 0.52734375, "learning_rate": 0.0006060378371125721, "loss": 2.2488, "step": 3765 }, { "epoch": 0.10106268784886217, "grad_norm": 0.51171875, "learning_rate": 0.0006061988461022407, "loss": 2.1446, "step": 3766 }, { "epoch": 0.10108952340060112, "grad_norm": 0.53515625, "learning_rate": 0.0006063598550919092, "loss": 2.1722, "step": 3767 }, { "epoch": 0.10111635895234006, "grad_norm": 0.52734375, "learning_rate": 0.0006065208640815778, "loss": 2.1522, "step": 3768 }, { "epoch": 0.101143194504079, "grad_norm": 0.51953125, "learning_rate": 0.0006066818730712463, "loss": 2.1977, "step": 3769 }, { "epoch": 0.10117003005581794, "grad_norm": 0.51953125, "learning_rate": 0.000606842882060915, "loss": 2.176, "step": 3770 }, { "epoch": 0.10119686560755689, "grad_norm": 0.52734375, "learning_rate": 0.0006070038910505835, "loss": 2.2628, "step": 3771 }, { "epoch": 0.10122370115929584, "grad_norm": 0.5, "learning_rate": 0.0006071649000402522, "loss": 2.1664, "step": 3772 }, { "epoch": 0.10125053671103477, "grad_norm": 0.5234375, "learning_rate": 0.0006073259090299208, "loss": 2.1703, "step": 3773 }, { "epoch": 0.10127737226277372, "grad_norm": 0.52734375, "learning_rate": 0.0006074869180195893, "loss": 2.2424, "step": 3774 }, { "epoch": 0.10130420781451267, "grad_norm": 0.5078125, "learning_rate": 0.000607647927009258, "loss": 2.1464, "step": 3775 }, { "epoch": 0.10133104336625161, "grad_norm": 0.51953125, "learning_rate": 0.0006078089359989265, "loss": 2.1864, "step": 3776 }, { "epoch": 0.10135787891799056, "grad_norm": 0.515625, "learning_rate": 0.0006079699449885952, "loss": 2.2609, "step": 3777 }, { "epoch": 0.10138471446972949, "grad_norm": 0.53125, "learning_rate": 0.0006081309539782638, "loss": 2.2373, "step": 3778 }, { "epoch": 0.10141155002146844, "grad_norm": 0.5078125, "learning_rate": 0.0006082919629679323, "loss": 2.1726, "step": 3779 }, { "epoch": 0.10143838557320739, "grad_norm": 0.5078125, "learning_rate": 0.000608452971957601, "loss": 2.1913, "step": 3780 }, { "epoch": 0.10146522112494633, "grad_norm": 0.515625, "learning_rate": 0.0006086139809472695, "loss": 2.1442, "step": 3781 }, { "epoch": 0.10149205667668527, "grad_norm": 0.5234375, "learning_rate": 0.0006087749899369381, "loss": 2.2032, "step": 3782 }, { "epoch": 0.10151889222842421, "grad_norm": 0.515625, "learning_rate": 0.0006089359989266066, "loss": 2.1565, "step": 3783 }, { "epoch": 0.10154572778016316, "grad_norm": 0.53515625, "learning_rate": 0.0006090970079162752, "loss": 2.2008, "step": 3784 }, { "epoch": 0.10157256333190211, "grad_norm": 0.51953125, "learning_rate": 0.0006092580169059439, "loss": 2.1212, "step": 3785 }, { "epoch": 0.10159939888364104, "grad_norm": 0.5078125, "learning_rate": 0.0006094190258956124, "loss": 2.1019, "step": 3786 }, { "epoch": 0.10162623443537999, "grad_norm": 0.52734375, "learning_rate": 0.000609580034885281, "loss": 2.2441, "step": 3787 }, { "epoch": 0.10165306998711894, "grad_norm": 0.515625, "learning_rate": 0.0006097410438749496, "loss": 2.1406, "step": 3788 }, { "epoch": 0.10167990553885788, "grad_norm": 0.5078125, "learning_rate": 0.0006099020528646182, "loss": 2.1932, "step": 3789 }, { "epoch": 0.10170674109059682, "grad_norm": 0.50390625, "learning_rate": 0.0006100630618542868, "loss": 2.1941, "step": 3790 }, { "epoch": 0.10173357664233576, "grad_norm": 0.5234375, "learning_rate": 0.0006102240708439554, "loss": 2.2561, "step": 3791 }, { "epoch": 0.10176041219407471, "grad_norm": 0.50390625, "learning_rate": 0.000610385079833624, "loss": 2.1742, "step": 3792 }, { "epoch": 0.10178724774581366, "grad_norm": 0.51953125, "learning_rate": 0.0006105460888232926, "loss": 2.1522, "step": 3793 }, { "epoch": 0.1018140832975526, "grad_norm": 0.51953125, "learning_rate": 0.0006107070978129612, "loss": 2.1441, "step": 3794 }, { "epoch": 0.10184091884929154, "grad_norm": 0.5234375, "learning_rate": 0.0006108681068026298, "loss": 2.2334, "step": 3795 }, { "epoch": 0.10186775440103049, "grad_norm": 0.53125, "learning_rate": 0.0006110291157922984, "loss": 2.1894, "step": 3796 }, { "epoch": 0.10189458995276943, "grad_norm": 0.498046875, "learning_rate": 0.000611190124781967, "loss": 2.1443, "step": 3797 }, { "epoch": 0.10192142550450838, "grad_norm": 0.515625, "learning_rate": 0.0006113511337716355, "loss": 2.1536, "step": 3798 }, { "epoch": 0.10194826105624731, "grad_norm": 0.5, "learning_rate": 0.0006115121427613041, "loss": 2.1502, "step": 3799 }, { "epoch": 0.10197509660798626, "grad_norm": 0.515625, "learning_rate": 0.0006116731517509727, "loss": 2.2479, "step": 3800 }, { "epoch": 0.10200193215972521, "grad_norm": 0.515625, "learning_rate": 0.0006118341607406413, "loss": 2.1851, "step": 3801 }, { "epoch": 0.10202876771146414, "grad_norm": 0.5078125, "learning_rate": 0.0006119951697303098, "loss": 2.1781, "step": 3802 }, { "epoch": 0.10205560326320309, "grad_norm": 0.515625, "learning_rate": 0.0006121561787199785, "loss": 2.1622, "step": 3803 }, { "epoch": 0.10208243881494204, "grad_norm": 0.515625, "learning_rate": 0.0006123171877096471, "loss": 2.1162, "step": 3804 }, { "epoch": 0.10210927436668098, "grad_norm": 0.51171875, "learning_rate": 0.0006124781966993156, "loss": 2.1664, "step": 3805 }, { "epoch": 0.10213610991841993, "grad_norm": 0.51171875, "learning_rate": 0.0006126392056889843, "loss": 2.1161, "step": 3806 }, { "epoch": 0.10216294547015886, "grad_norm": 0.498046875, "learning_rate": 0.0006128002146786528, "loss": 2.1527, "step": 3807 }, { "epoch": 0.10218978102189781, "grad_norm": 0.51171875, "learning_rate": 0.0006129612236683215, "loss": 2.1868, "step": 3808 }, { "epoch": 0.10221661657363676, "grad_norm": 0.5078125, "learning_rate": 0.00061312223265799, "loss": 2.1406, "step": 3809 }, { "epoch": 0.1022434521253757, "grad_norm": 0.5078125, "learning_rate": 0.0006132832416476586, "loss": 2.1538, "step": 3810 }, { "epoch": 0.10227028767711464, "grad_norm": 0.5, "learning_rate": 0.0006134442506373273, "loss": 2.155, "step": 3811 }, { "epoch": 0.10229712322885358, "grad_norm": 0.51171875, "learning_rate": 0.0006136052596269958, "loss": 2.2224, "step": 3812 }, { "epoch": 0.10232395878059253, "grad_norm": 0.515625, "learning_rate": 0.0006137662686166644, "loss": 2.2244, "step": 3813 }, { "epoch": 0.10235079433233148, "grad_norm": 0.5078125, "learning_rate": 0.0006139272776063329, "loss": 2.2216, "step": 3814 }, { "epoch": 0.10237762988407041, "grad_norm": 0.51171875, "learning_rate": 0.0006140882865960015, "loss": 2.1617, "step": 3815 }, { "epoch": 0.10240446543580936, "grad_norm": 0.5, "learning_rate": 0.0006142492955856701, "loss": 2.146, "step": 3816 }, { "epoch": 0.10243130098754831, "grad_norm": 0.51171875, "learning_rate": 0.0006144103045753387, "loss": 2.2195, "step": 3817 }, { "epoch": 0.10245813653928725, "grad_norm": 0.51171875, "learning_rate": 0.0006145713135650074, "loss": 2.1259, "step": 3818 }, { "epoch": 0.1024849720910262, "grad_norm": 0.51953125, "learning_rate": 0.0006147323225546759, "loss": 2.163, "step": 3819 }, { "epoch": 0.10251180764276513, "grad_norm": 0.5078125, "learning_rate": 0.0006148933315443445, "loss": 2.0789, "step": 3820 }, { "epoch": 0.10253864319450408, "grad_norm": 0.51171875, "learning_rate": 0.0006150543405340131, "loss": 2.0984, "step": 3821 }, { "epoch": 0.10256547874624303, "grad_norm": 0.515625, "learning_rate": 0.0006152153495236817, "loss": 2.128, "step": 3822 }, { "epoch": 0.10259231429798196, "grad_norm": 0.50390625, "learning_rate": 0.0006153763585133502, "loss": 2.0961, "step": 3823 }, { "epoch": 0.10261914984972091, "grad_norm": 0.5, "learning_rate": 0.0006155373675030189, "loss": 2.1627, "step": 3824 }, { "epoch": 0.10264598540145986, "grad_norm": 0.546875, "learning_rate": 0.0006156983764926875, "loss": 2.1986, "step": 3825 }, { "epoch": 0.1026728209531988, "grad_norm": 0.5078125, "learning_rate": 0.0006158593854823561, "loss": 2.1327, "step": 3826 }, { "epoch": 0.10269965650493774, "grad_norm": 0.515625, "learning_rate": 0.0006160203944720247, "loss": 2.1038, "step": 3827 }, { "epoch": 0.10272649205667668, "grad_norm": 0.50390625, "learning_rate": 0.0006161814034616932, "loss": 2.0885, "step": 3828 }, { "epoch": 0.10275332760841563, "grad_norm": 0.50390625, "learning_rate": 0.0006163424124513618, "loss": 2.1199, "step": 3829 }, { "epoch": 0.10278016316015458, "grad_norm": 0.5078125, "learning_rate": 0.0006165034214410303, "loss": 2.2162, "step": 3830 }, { "epoch": 0.10280699871189351, "grad_norm": 0.50390625, "learning_rate": 0.000616664430430699, "loss": 2.0809, "step": 3831 }, { "epoch": 0.10283383426363246, "grad_norm": 0.498046875, "learning_rate": 0.0006168254394203676, "loss": 2.1614, "step": 3832 }, { "epoch": 0.1028606698153714, "grad_norm": 0.50390625, "learning_rate": 0.0006169864484100361, "loss": 2.1758, "step": 3833 }, { "epoch": 0.10288750536711035, "grad_norm": 0.50390625, "learning_rate": 0.0006171474573997048, "loss": 2.1453, "step": 3834 }, { "epoch": 0.1029143409188493, "grad_norm": 0.498046875, "learning_rate": 0.0006173084663893733, "loss": 2.0553, "step": 3835 }, { "epoch": 0.10294117647058823, "grad_norm": 0.515625, "learning_rate": 0.000617469475379042, "loss": 2.2495, "step": 3836 }, { "epoch": 0.10296801202232718, "grad_norm": 0.50390625, "learning_rate": 0.0006176304843687106, "loss": 2.115, "step": 3837 }, { "epoch": 0.10299484757406613, "grad_norm": 0.5078125, "learning_rate": 0.0006177914933583791, "loss": 2.173, "step": 3838 }, { "epoch": 0.10302168312580506, "grad_norm": 0.51953125, "learning_rate": 0.0006179525023480478, "loss": 2.1641, "step": 3839 }, { "epoch": 0.10304851867754401, "grad_norm": 0.5, "learning_rate": 0.0006181135113377163, "loss": 2.1462, "step": 3840 }, { "epoch": 0.10307535422928295, "grad_norm": 0.5234375, "learning_rate": 0.000618274520327385, "loss": 2.1795, "step": 3841 }, { "epoch": 0.1031021897810219, "grad_norm": 0.5234375, "learning_rate": 0.0006184355293170535, "loss": 2.2122, "step": 3842 }, { "epoch": 0.10312902533276085, "grad_norm": 0.49609375, "learning_rate": 0.0006185965383067221, "loss": 2.0924, "step": 3843 }, { "epoch": 0.10315586088449978, "grad_norm": 0.52734375, "learning_rate": 0.0006187575472963906, "loss": 2.0731, "step": 3844 }, { "epoch": 0.10318269643623873, "grad_norm": 0.50390625, "learning_rate": 0.0006189185562860592, "loss": 2.1859, "step": 3845 }, { "epoch": 0.10320953198797768, "grad_norm": 0.51171875, "learning_rate": 0.0006190795652757278, "loss": 2.2094, "step": 3846 }, { "epoch": 0.10323636753971661, "grad_norm": 0.52734375, "learning_rate": 0.0006192405742653964, "loss": 2.1951, "step": 3847 }, { "epoch": 0.10326320309145556, "grad_norm": 0.515625, "learning_rate": 0.000619401583255065, "loss": 2.1143, "step": 3848 }, { "epoch": 0.1032900386431945, "grad_norm": 0.5, "learning_rate": 0.0006195625922447335, "loss": 2.1165, "step": 3849 }, { "epoch": 0.10331687419493345, "grad_norm": 0.5234375, "learning_rate": 0.0006197236012344022, "loss": 2.1898, "step": 3850 }, { "epoch": 0.1033437097466724, "grad_norm": 0.50390625, "learning_rate": 0.0006198846102240708, "loss": 2.0691, "step": 3851 }, { "epoch": 0.10337054529841133, "grad_norm": 0.53515625, "learning_rate": 0.0006200456192137394, "loss": 2.1844, "step": 3852 }, { "epoch": 0.10339738085015028, "grad_norm": 0.5234375, "learning_rate": 0.000620206628203408, "loss": 2.1062, "step": 3853 }, { "epoch": 0.10342421640188923, "grad_norm": 0.494140625, "learning_rate": 0.0006203676371930765, "loss": 2.0685, "step": 3854 }, { "epoch": 0.10345105195362816, "grad_norm": 0.494140625, "learning_rate": 0.0006205286461827452, "loss": 2.1895, "step": 3855 }, { "epoch": 0.10347788750536711, "grad_norm": 0.5, "learning_rate": 0.0006206896551724138, "loss": 2.0409, "step": 3856 }, { "epoch": 0.10350472305710605, "grad_norm": 0.482421875, "learning_rate": 0.0006208506641620824, "loss": 2.0756, "step": 3857 }, { "epoch": 0.103531558608845, "grad_norm": 0.5234375, "learning_rate": 0.000621011673151751, "loss": 2.1824, "step": 3858 }, { "epoch": 0.10355839416058395, "grad_norm": 0.490234375, "learning_rate": 0.0006211726821414194, "loss": 2.0169, "step": 3859 }, { "epoch": 0.10358522971232288, "grad_norm": 0.4921875, "learning_rate": 0.0006213336911310881, "loss": 2.1375, "step": 3860 }, { "epoch": 0.10361206526406183, "grad_norm": 0.515625, "learning_rate": 0.0006214947001207566, "loss": 2.1235, "step": 3861 }, { "epoch": 0.10363890081580077, "grad_norm": 0.4921875, "learning_rate": 0.0006216557091104253, "loss": 2.0291, "step": 3862 }, { "epoch": 0.10366573636753972, "grad_norm": 0.51953125, "learning_rate": 0.0006218167181000938, "loss": 2.2314, "step": 3863 }, { "epoch": 0.10369257191927866, "grad_norm": 0.52734375, "learning_rate": 0.0006219777270897624, "loss": 2.2835, "step": 3864 }, { "epoch": 0.1037194074710176, "grad_norm": 0.49609375, "learning_rate": 0.0006221387360794311, "loss": 2.0797, "step": 3865 }, { "epoch": 0.10374624302275655, "grad_norm": 0.52734375, "learning_rate": 0.0006222997450690996, "loss": 2.1292, "step": 3866 }, { "epoch": 0.1037730785744955, "grad_norm": 0.51171875, "learning_rate": 0.0006224607540587683, "loss": 2.1343, "step": 3867 }, { "epoch": 0.10379991412623443, "grad_norm": 0.51171875, "learning_rate": 0.0006226217630484368, "loss": 2.2454, "step": 3868 }, { "epoch": 0.10382674967797338, "grad_norm": 0.5, "learning_rate": 0.0006227827720381054, "loss": 2.2721, "step": 3869 }, { "epoch": 0.10385358522971232, "grad_norm": 0.5, "learning_rate": 0.0006229437810277741, "loss": 2.123, "step": 3870 }, { "epoch": 0.10388042078145127, "grad_norm": 0.51171875, "learning_rate": 0.0006231047900174426, "loss": 2.2073, "step": 3871 }, { "epoch": 0.10390725633319021, "grad_norm": 0.5078125, "learning_rate": 0.0006232657990071113, "loss": 2.2148, "step": 3872 }, { "epoch": 0.10393409188492915, "grad_norm": 0.51953125, "learning_rate": 0.0006234268079967798, "loss": 2.1613, "step": 3873 }, { "epoch": 0.1039609274366681, "grad_norm": 0.50390625, "learning_rate": 0.0006235878169864484, "loss": 2.1945, "step": 3874 }, { "epoch": 0.10398776298840705, "grad_norm": 0.50390625, "learning_rate": 0.0006237488259761169, "loss": 2.203, "step": 3875 }, { "epoch": 0.10401459854014598, "grad_norm": 0.52734375, "learning_rate": 0.0006239098349657855, "loss": 2.1108, "step": 3876 }, { "epoch": 0.10404143409188493, "grad_norm": 0.50390625, "learning_rate": 0.000624070843955454, "loss": 2.135, "step": 3877 }, { "epoch": 0.10406826964362387, "grad_norm": 0.5078125, "learning_rate": 0.0006242318529451227, "loss": 2.2013, "step": 3878 }, { "epoch": 0.10409510519536282, "grad_norm": 0.515625, "learning_rate": 0.0006243928619347913, "loss": 2.2363, "step": 3879 }, { "epoch": 0.10412194074710177, "grad_norm": 0.4921875, "learning_rate": 0.0006245538709244599, "loss": 2.1252, "step": 3880 }, { "epoch": 0.1041487762988407, "grad_norm": 0.48828125, "learning_rate": 0.0006247148799141285, "loss": 2.1436, "step": 3881 }, { "epoch": 0.10417561185057965, "grad_norm": 0.486328125, "learning_rate": 0.000624875888903797, "loss": 2.1363, "step": 3882 }, { "epoch": 0.10420244740231858, "grad_norm": 0.5078125, "learning_rate": 0.0006250368978934657, "loss": 2.2436, "step": 3883 }, { "epoch": 0.10422928295405753, "grad_norm": 0.5078125, "learning_rate": 0.0006251979068831343, "loss": 2.1999, "step": 3884 }, { "epoch": 0.10425611850579648, "grad_norm": 0.51171875, "learning_rate": 0.0006253589158728028, "loss": 2.1738, "step": 3885 }, { "epoch": 0.10428295405753542, "grad_norm": 0.5078125, "learning_rate": 0.0006255199248624715, "loss": 2.217, "step": 3886 }, { "epoch": 0.10430978960927437, "grad_norm": 0.50390625, "learning_rate": 0.00062568093385214, "loss": 2.1525, "step": 3887 }, { "epoch": 0.10433662516101332, "grad_norm": 0.5078125, "learning_rate": 0.0006258419428418087, "loss": 2.1416, "step": 3888 }, { "epoch": 0.10436346071275225, "grad_norm": 0.51953125, "learning_rate": 0.0006260029518314773, "loss": 2.1571, "step": 3889 }, { "epoch": 0.1043902962644912, "grad_norm": 0.50390625, "learning_rate": 0.0006261639608211457, "loss": 2.1753, "step": 3890 }, { "epoch": 0.10441713181623014, "grad_norm": 0.51171875, "learning_rate": 0.0006263249698108143, "loss": 2.1623, "step": 3891 }, { "epoch": 0.10444396736796908, "grad_norm": 0.5, "learning_rate": 0.0006264859788004829, "loss": 2.0829, "step": 3892 }, { "epoch": 0.10447080291970803, "grad_norm": 0.515625, "learning_rate": 0.0006266469877901516, "loss": 2.1133, "step": 3893 }, { "epoch": 0.10449763847144697, "grad_norm": 0.5, "learning_rate": 0.0006268079967798201, "loss": 2.1132, "step": 3894 }, { "epoch": 0.10452447402318592, "grad_norm": 0.50390625, "learning_rate": 0.0006269690057694887, "loss": 1.9974, "step": 3895 }, { "epoch": 0.10455130957492487, "grad_norm": 0.5078125, "learning_rate": 0.0006271300147591573, "loss": 2.1851, "step": 3896 }, { "epoch": 0.1045781451266638, "grad_norm": 0.52734375, "learning_rate": 0.0006272910237488259, "loss": 2.1357, "step": 3897 }, { "epoch": 0.10460498067840275, "grad_norm": 0.5234375, "learning_rate": 0.0006274520327384946, "loss": 2.2229, "step": 3898 }, { "epoch": 0.10463181623014169, "grad_norm": 0.5078125, "learning_rate": 0.0006276130417281631, "loss": 2.1338, "step": 3899 }, { "epoch": 0.10465865178188064, "grad_norm": 0.5078125, "learning_rate": 0.0006277740507178317, "loss": 2.1286, "step": 3900 }, { "epoch": 0.10468548733361958, "grad_norm": 0.498046875, "learning_rate": 0.0006279350597075003, "loss": 2.1579, "step": 3901 }, { "epoch": 0.10471232288535852, "grad_norm": 0.52734375, "learning_rate": 0.0006280960686971689, "loss": 2.148, "step": 3902 }, { "epoch": 0.10473915843709747, "grad_norm": 0.50390625, "learning_rate": 0.0006282570776868376, "loss": 2.191, "step": 3903 }, { "epoch": 0.10476599398883642, "grad_norm": 0.5, "learning_rate": 0.0006284180866765061, "loss": 2.0681, "step": 3904 }, { "epoch": 0.10479282954057535, "grad_norm": 0.5078125, "learning_rate": 0.0006285790956661747, "loss": 2.1504, "step": 3905 }, { "epoch": 0.1048196650923143, "grad_norm": 0.51171875, "learning_rate": 0.0006287401046558432, "loss": 2.2105, "step": 3906 }, { "epoch": 0.10484650064405324, "grad_norm": 0.51953125, "learning_rate": 0.0006289011136455118, "loss": 2.1513, "step": 3907 }, { "epoch": 0.10487333619579219, "grad_norm": 0.51953125, "learning_rate": 0.0006290621226351803, "loss": 2.1693, "step": 3908 }, { "epoch": 0.10490017174753113, "grad_norm": 0.5078125, "learning_rate": 0.000629223131624849, "loss": 2.1896, "step": 3909 }, { "epoch": 0.10492700729927007, "grad_norm": 0.5078125, "learning_rate": 0.0006293841406145175, "loss": 2.2507, "step": 3910 }, { "epoch": 0.10495384285100902, "grad_norm": 0.515625, "learning_rate": 0.0006295451496041862, "loss": 2.1874, "step": 3911 }, { "epoch": 0.10498067840274795, "grad_norm": 0.50390625, "learning_rate": 0.0006297061585938548, "loss": 2.1105, "step": 3912 }, { "epoch": 0.1050075139544869, "grad_norm": 0.50390625, "learning_rate": 0.0006298671675835233, "loss": 2.2118, "step": 3913 }, { "epoch": 0.10503434950622585, "grad_norm": 0.5, "learning_rate": 0.000630028176573192, "loss": 2.1792, "step": 3914 }, { "epoch": 0.10506118505796479, "grad_norm": 0.51171875, "learning_rate": 0.0006301891855628605, "loss": 2.1814, "step": 3915 }, { "epoch": 0.10508802060970374, "grad_norm": 0.515625, "learning_rate": 0.0006303501945525292, "loss": 2.2076, "step": 3916 }, { "epoch": 0.10511485616144269, "grad_norm": 0.494140625, "learning_rate": 0.0006305112035421978, "loss": 2.0443, "step": 3917 }, { "epoch": 0.10514169171318162, "grad_norm": 0.50390625, "learning_rate": 0.0006306722125318663, "loss": 2.2162, "step": 3918 }, { "epoch": 0.10516852726492057, "grad_norm": 0.51953125, "learning_rate": 0.000630833221521535, "loss": 2.2177, "step": 3919 }, { "epoch": 0.1051953628166595, "grad_norm": 0.5, "learning_rate": 0.0006309942305112035, "loss": 2.1389, "step": 3920 }, { "epoch": 0.10522219836839845, "grad_norm": 0.50390625, "learning_rate": 0.000631155239500872, "loss": 2.077, "step": 3921 }, { "epoch": 0.1052490339201374, "grad_norm": 0.5, "learning_rate": 0.0006313162484905406, "loss": 2.1938, "step": 3922 }, { "epoch": 0.10527586947187634, "grad_norm": 0.490234375, "learning_rate": 0.0006314772574802092, "loss": 2.0616, "step": 3923 }, { "epoch": 0.10530270502361529, "grad_norm": 0.5, "learning_rate": 0.0006316382664698779, "loss": 2.0515, "step": 3924 }, { "epoch": 0.10532954057535424, "grad_norm": 0.474609375, "learning_rate": 0.0006317992754595464, "loss": 2.0262, "step": 3925 }, { "epoch": 0.10535637612709317, "grad_norm": 0.484375, "learning_rate": 0.000631960284449215, "loss": 2.0117, "step": 3926 }, { "epoch": 0.10538321167883212, "grad_norm": 0.515625, "learning_rate": 0.0006321212934388836, "loss": 2.1829, "step": 3927 }, { "epoch": 0.10541004723057106, "grad_norm": 0.515625, "learning_rate": 0.0006322823024285522, "loss": 2.1176, "step": 3928 }, { "epoch": 0.10543688278231, "grad_norm": 0.498046875, "learning_rate": 0.0006324433114182207, "loss": 2.0377, "step": 3929 }, { "epoch": 0.10546371833404895, "grad_norm": 0.5234375, "learning_rate": 0.0006326043204078894, "loss": 2.1921, "step": 3930 }, { "epoch": 0.10549055388578789, "grad_norm": 0.5078125, "learning_rate": 0.000632765329397558, "loss": 2.0564, "step": 3931 }, { "epoch": 0.10551738943752684, "grad_norm": 0.50390625, "learning_rate": 0.0006329263383872266, "loss": 2.0459, "step": 3932 }, { "epoch": 0.10554422498926577, "grad_norm": 0.51953125, "learning_rate": 0.0006330873473768952, "loss": 2.1955, "step": 3933 }, { "epoch": 0.10557106054100472, "grad_norm": 0.51171875, "learning_rate": 0.0006332483563665637, "loss": 2.0946, "step": 3934 }, { "epoch": 0.10559789609274367, "grad_norm": 0.515625, "learning_rate": 0.0006334093653562324, "loss": 2.1299, "step": 3935 }, { "epoch": 0.1056247316444826, "grad_norm": 0.498046875, "learning_rate": 0.000633570374345901, "loss": 2.1067, "step": 3936 }, { "epoch": 0.10565156719622155, "grad_norm": 0.50390625, "learning_rate": 0.0006337313833355695, "loss": 2.1194, "step": 3937 }, { "epoch": 0.1056784027479605, "grad_norm": 0.515625, "learning_rate": 0.0006338923923252381, "loss": 2.1241, "step": 3938 }, { "epoch": 0.10570523829969944, "grad_norm": 0.490234375, "learning_rate": 0.0006340534013149066, "loss": 2.0805, "step": 3939 }, { "epoch": 0.10573207385143839, "grad_norm": 0.51171875, "learning_rate": 0.0006342144103045753, "loss": 2.1521, "step": 3940 }, { "epoch": 0.10575890940317732, "grad_norm": 0.5078125, "learning_rate": 0.0006343754192942438, "loss": 2.1405, "step": 3941 }, { "epoch": 0.10578574495491627, "grad_norm": 0.49609375, "learning_rate": 0.0006345364282839125, "loss": 2.0571, "step": 3942 }, { "epoch": 0.10581258050665522, "grad_norm": 0.5234375, "learning_rate": 0.000634697437273581, "loss": 2.1501, "step": 3943 }, { "epoch": 0.10583941605839416, "grad_norm": 0.55859375, "learning_rate": 0.0006348584462632496, "loss": 2.2966, "step": 3944 }, { "epoch": 0.1058662516101331, "grad_norm": 0.53515625, "learning_rate": 0.0006350194552529183, "loss": 2.1892, "step": 3945 }, { "epoch": 0.10589308716187205, "grad_norm": 0.52734375, "learning_rate": 0.0006351804642425868, "loss": 2.2066, "step": 3946 }, { "epoch": 0.10591992271361099, "grad_norm": 0.546875, "learning_rate": 0.0006353414732322555, "loss": 2.1402, "step": 3947 }, { "epoch": 0.10594675826534994, "grad_norm": 0.51953125, "learning_rate": 0.000635502482221924, "loss": 2.1815, "step": 3948 }, { "epoch": 0.10597359381708887, "grad_norm": 0.515625, "learning_rate": 0.0006356634912115926, "loss": 2.1658, "step": 3949 }, { "epoch": 0.10600042936882782, "grad_norm": 0.53125, "learning_rate": 0.0006358245002012613, "loss": 2.2652, "step": 3950 }, { "epoch": 0.10602726492056677, "grad_norm": 0.50390625, "learning_rate": 0.0006359855091909298, "loss": 2.1645, "step": 3951 }, { "epoch": 0.1060541004723057, "grad_norm": 0.5, "learning_rate": 0.0006361465181805983, "loss": 2.0161, "step": 3952 }, { "epoch": 0.10608093602404466, "grad_norm": 0.51171875, "learning_rate": 0.0006363075271702669, "loss": 2.201, "step": 3953 }, { "epoch": 0.1061077715757836, "grad_norm": 0.51171875, "learning_rate": 0.0006364685361599355, "loss": 2.2479, "step": 3954 }, { "epoch": 0.10613460712752254, "grad_norm": 0.50390625, "learning_rate": 0.000636629545149604, "loss": 2.147, "step": 3955 }, { "epoch": 0.10616144267926149, "grad_norm": 0.484375, "learning_rate": 0.0006367905541392727, "loss": 2.0448, "step": 3956 }, { "epoch": 0.10618827823100042, "grad_norm": 0.498046875, "learning_rate": 0.0006369515631289413, "loss": 2.0601, "step": 3957 }, { "epoch": 0.10621511378273937, "grad_norm": 0.48828125, "learning_rate": 0.0006371125721186099, "loss": 2.0675, "step": 3958 }, { "epoch": 0.10624194933447832, "grad_norm": 0.494140625, "learning_rate": 0.0006372735811082785, "loss": 2.0751, "step": 3959 }, { "epoch": 0.10626878488621726, "grad_norm": 0.494140625, "learning_rate": 0.000637434590097947, "loss": 2.1295, "step": 3960 }, { "epoch": 0.1062956204379562, "grad_norm": 0.484375, "learning_rate": 0.0006375955990876157, "loss": 2.1195, "step": 3961 }, { "epoch": 0.10632245598969514, "grad_norm": 0.50390625, "learning_rate": 0.0006377566080772842, "loss": 2.2019, "step": 3962 }, { "epoch": 0.10634929154143409, "grad_norm": 0.53125, "learning_rate": 0.0006379176170669529, "loss": 2.1849, "step": 3963 }, { "epoch": 0.10637612709317304, "grad_norm": 0.482421875, "learning_rate": 0.0006380786260566215, "loss": 2.0563, "step": 3964 }, { "epoch": 0.10640296264491197, "grad_norm": 0.4765625, "learning_rate": 0.00063823963504629, "loss": 2.083, "step": 3965 }, { "epoch": 0.10642979819665092, "grad_norm": 0.494140625, "learning_rate": 0.0006384006440359587, "loss": 2.0604, "step": 3966 }, { "epoch": 0.10645663374838987, "grad_norm": 0.48046875, "learning_rate": 0.0006385616530256272, "loss": 2.123, "step": 3967 }, { "epoch": 0.10648346930012881, "grad_norm": 0.4921875, "learning_rate": 0.0006387226620152958, "loss": 2.214, "step": 3968 }, { "epoch": 0.10651030485186776, "grad_norm": 0.50390625, "learning_rate": 0.0006388836710049643, "loss": 2.0759, "step": 3969 }, { "epoch": 0.10653714040360669, "grad_norm": 0.498046875, "learning_rate": 0.0006390446799946329, "loss": 2.121, "step": 3970 }, { "epoch": 0.10656397595534564, "grad_norm": 0.494140625, "learning_rate": 0.0006392056889843016, "loss": 2.0386, "step": 3971 }, { "epoch": 0.10659081150708459, "grad_norm": 0.4609375, "learning_rate": 0.0006393666979739701, "loss": 1.9437, "step": 3972 }, { "epoch": 0.10661764705882353, "grad_norm": 0.48828125, "learning_rate": 0.0006395277069636388, "loss": 2.0349, "step": 3973 }, { "epoch": 0.10664448261056247, "grad_norm": 0.51171875, "learning_rate": 0.0006396887159533073, "loss": 2.1311, "step": 3974 }, { "epoch": 0.10667131816230142, "grad_norm": 0.51171875, "learning_rate": 0.0006398497249429759, "loss": 2.2269, "step": 3975 }, { "epoch": 0.10669815371404036, "grad_norm": 0.48828125, "learning_rate": 0.0006400107339326446, "loss": 2.0441, "step": 3976 }, { "epoch": 0.10672498926577931, "grad_norm": 0.484375, "learning_rate": 0.0006401717429223131, "loss": 2.0647, "step": 3977 }, { "epoch": 0.10675182481751824, "grad_norm": 0.484375, "learning_rate": 0.0006403327519119818, "loss": 2.055, "step": 3978 }, { "epoch": 0.10677866036925719, "grad_norm": 0.494140625, "learning_rate": 0.0006404937609016503, "loss": 2.1367, "step": 3979 }, { "epoch": 0.10680549592099614, "grad_norm": 0.49609375, "learning_rate": 0.0006406547698913189, "loss": 2.1517, "step": 3980 }, { "epoch": 0.10683233147273508, "grad_norm": 0.484375, "learning_rate": 0.0006408157788809875, "loss": 2.0381, "step": 3981 }, { "epoch": 0.10685916702447402, "grad_norm": 0.482421875, "learning_rate": 0.0006409767878706561, "loss": 2.169, "step": 3982 }, { "epoch": 0.10688600257621296, "grad_norm": 0.49609375, "learning_rate": 0.0006411377968603245, "loss": 2.2195, "step": 3983 }, { "epoch": 0.10691283812795191, "grad_norm": 0.50390625, "learning_rate": 0.0006412988058499932, "loss": 2.1703, "step": 3984 }, { "epoch": 0.10693967367969086, "grad_norm": 0.47265625, "learning_rate": 0.0006414598148396618, "loss": 2.0656, "step": 3985 }, { "epoch": 0.1069665092314298, "grad_norm": 0.48046875, "learning_rate": 0.0006416208238293304, "loss": 2.1236, "step": 3986 }, { "epoch": 0.10699334478316874, "grad_norm": 0.490234375, "learning_rate": 0.000641781832818999, "loss": 2.1299, "step": 3987 }, { "epoch": 0.10702018033490769, "grad_norm": 0.490234375, "learning_rate": 0.0006419428418086675, "loss": 2.1369, "step": 3988 }, { "epoch": 0.10704701588664663, "grad_norm": 0.5, "learning_rate": 0.0006421038507983362, "loss": 2.1205, "step": 3989 }, { "epoch": 0.10707385143838558, "grad_norm": 0.478515625, "learning_rate": 0.0006422648597880048, "loss": 2.1179, "step": 3990 }, { "epoch": 0.10710068699012451, "grad_norm": 0.48046875, "learning_rate": 0.0006424258687776734, "loss": 2.1317, "step": 3991 }, { "epoch": 0.10712752254186346, "grad_norm": 0.4921875, "learning_rate": 0.000642586877767342, "loss": 2.1297, "step": 3992 }, { "epoch": 0.10715435809360241, "grad_norm": 0.478515625, "learning_rate": 0.0006427478867570105, "loss": 2.0731, "step": 3993 }, { "epoch": 0.10718119364534134, "grad_norm": 0.48046875, "learning_rate": 0.0006429088957466792, "loss": 2.0597, "step": 3994 }, { "epoch": 0.10720802919708029, "grad_norm": 0.48046875, "learning_rate": 0.0006430699047363478, "loss": 2.024, "step": 3995 }, { "epoch": 0.10723486474881924, "grad_norm": 0.51171875, "learning_rate": 0.0006432309137260164, "loss": 2.2463, "step": 3996 }, { "epoch": 0.10726170030055818, "grad_norm": 0.5, "learning_rate": 0.000643391922715685, "loss": 2.123, "step": 3997 }, { "epoch": 0.10728853585229713, "grad_norm": 0.486328125, "learning_rate": 0.0006435529317053535, "loss": 2.1086, "step": 3998 }, { "epoch": 0.10731537140403606, "grad_norm": 0.470703125, "learning_rate": 0.0006437139406950221, "loss": 1.9717, "step": 3999 }, { "epoch": 0.10734220695577501, "grad_norm": 0.49609375, "learning_rate": 0.0006438749496846906, "loss": 2.1173, "step": 4000 }, { "epoch": 0.10736904250751396, "grad_norm": 0.494140625, "learning_rate": 0.0006440359586743592, "loss": 2.0951, "step": 4001 }, { "epoch": 0.1073958780592529, "grad_norm": 0.48046875, "learning_rate": 0.0006441969676640278, "loss": 2.0951, "step": 4002 }, { "epoch": 0.10742271361099184, "grad_norm": 0.50390625, "learning_rate": 0.0006443579766536964, "loss": 1.9975, "step": 4003 }, { "epoch": 0.10744954916273079, "grad_norm": 0.48828125, "learning_rate": 0.0006445189856433651, "loss": 2.0351, "step": 4004 }, { "epoch": 0.10747638471446973, "grad_norm": 0.50390625, "learning_rate": 0.0006446799946330336, "loss": 2.2358, "step": 4005 }, { "epoch": 0.10750322026620868, "grad_norm": 0.486328125, "learning_rate": 0.0006448410036227022, "loss": 2.0497, "step": 4006 }, { "epoch": 0.10753005581794761, "grad_norm": 0.490234375, "learning_rate": 0.0006450020126123708, "loss": 2.1346, "step": 4007 }, { "epoch": 0.10755689136968656, "grad_norm": 0.490234375, "learning_rate": 0.0006451630216020394, "loss": 2.0742, "step": 4008 }, { "epoch": 0.10758372692142551, "grad_norm": 0.50390625, "learning_rate": 0.0006453240305917081, "loss": 2.1263, "step": 4009 }, { "epoch": 0.10761056247316445, "grad_norm": 0.490234375, "learning_rate": 0.0006454850395813766, "loss": 2.0705, "step": 4010 }, { "epoch": 0.1076373980249034, "grad_norm": 0.494140625, "learning_rate": 0.0006456460485710452, "loss": 2.063, "step": 4011 }, { "epoch": 0.10766423357664233, "grad_norm": 0.48046875, "learning_rate": 0.0006458070575607138, "loss": 2.0723, "step": 4012 }, { "epoch": 0.10769106912838128, "grad_norm": 0.4765625, "learning_rate": 0.0006459680665503824, "loss": 2.082, "step": 4013 }, { "epoch": 0.10771790468012023, "grad_norm": 0.48828125, "learning_rate": 0.0006461290755400508, "loss": 2.0548, "step": 4014 }, { "epoch": 0.10774474023185916, "grad_norm": 0.494140625, "learning_rate": 0.0006462900845297195, "loss": 2.1225, "step": 4015 }, { "epoch": 0.10777157578359811, "grad_norm": 0.5234375, "learning_rate": 0.000646451093519388, "loss": 2.1591, "step": 4016 }, { "epoch": 0.10779841133533706, "grad_norm": 0.478515625, "learning_rate": 0.0006466121025090567, "loss": 1.978, "step": 4017 }, { "epoch": 0.107825246887076, "grad_norm": 0.494140625, "learning_rate": 0.0006467731114987253, "loss": 2.156, "step": 4018 }, { "epoch": 0.10785208243881494, "grad_norm": 0.48046875, "learning_rate": 0.0006469341204883938, "loss": 2.0436, "step": 4019 }, { "epoch": 0.10787891799055388, "grad_norm": 0.5, "learning_rate": 0.0006470951294780625, "loss": 2.1357, "step": 4020 }, { "epoch": 0.10790575354229283, "grad_norm": 0.484375, "learning_rate": 0.000647256138467731, "loss": 2.135, "step": 4021 }, { "epoch": 0.10793258909403178, "grad_norm": 0.50390625, "learning_rate": 0.0006474171474573997, "loss": 2.1099, "step": 4022 }, { "epoch": 0.10795942464577071, "grad_norm": 0.5078125, "learning_rate": 0.0006475781564470683, "loss": 2.1331, "step": 4023 }, { "epoch": 0.10798626019750966, "grad_norm": 0.47265625, "learning_rate": 0.0006477391654367368, "loss": 1.9928, "step": 4024 }, { "epoch": 0.10801309574924861, "grad_norm": 0.4921875, "learning_rate": 0.0006479001744264055, "loss": 2.1891, "step": 4025 }, { "epoch": 0.10803993130098755, "grad_norm": 0.5, "learning_rate": 0.000648061183416074, "loss": 2.1745, "step": 4026 }, { "epoch": 0.1080667668527265, "grad_norm": 0.494140625, "learning_rate": 0.0006482221924057427, "loss": 2.1586, "step": 4027 }, { "epoch": 0.10809360240446543, "grad_norm": 0.498046875, "learning_rate": 0.0006483832013954113, "loss": 2.1612, "step": 4028 }, { "epoch": 0.10812043795620438, "grad_norm": 0.4609375, "learning_rate": 0.0006485442103850798, "loss": 1.9905, "step": 4029 }, { "epoch": 0.10814727350794333, "grad_norm": 0.49609375, "learning_rate": 0.0006487052193747483, "loss": 2.0818, "step": 4030 }, { "epoch": 0.10817410905968226, "grad_norm": 0.4921875, "learning_rate": 0.0006488662283644169, "loss": 2.1734, "step": 4031 }, { "epoch": 0.10820094461142121, "grad_norm": 0.490234375, "learning_rate": 0.0006490272373540855, "loss": 2.0869, "step": 4032 }, { "epoch": 0.10822778016316015, "grad_norm": 0.498046875, "learning_rate": 0.0006491882463437541, "loss": 2.0766, "step": 4033 }, { "epoch": 0.1082546157148991, "grad_norm": 0.50390625, "learning_rate": 0.0006493492553334227, "loss": 2.1213, "step": 4034 }, { "epoch": 0.10828145126663805, "grad_norm": 0.494140625, "learning_rate": 0.0006495102643230913, "loss": 2.0723, "step": 4035 }, { "epoch": 0.10830828681837698, "grad_norm": 0.46875, "learning_rate": 0.0006496712733127599, "loss": 1.9919, "step": 4036 }, { "epoch": 0.10833512237011593, "grad_norm": 0.48828125, "learning_rate": 0.0006498322823024285, "loss": 2.0487, "step": 4037 }, { "epoch": 0.10836195792185488, "grad_norm": 0.482421875, "learning_rate": 0.0006499932912920971, "loss": 2.0607, "step": 4038 }, { "epoch": 0.10838879347359381, "grad_norm": 0.48046875, "learning_rate": 0.0006501543002817657, "loss": 2.0138, "step": 4039 }, { "epoch": 0.10841562902533276, "grad_norm": 0.48046875, "learning_rate": 0.0006503153092714343, "loss": 2.1105, "step": 4040 }, { "epoch": 0.1084424645770717, "grad_norm": 0.50390625, "learning_rate": 0.0006504763182611029, "loss": 2.123, "step": 4041 }, { "epoch": 0.10846930012881065, "grad_norm": 0.5, "learning_rate": 0.0006506373272507715, "loss": 2.0497, "step": 4042 }, { "epoch": 0.1084961356805496, "grad_norm": 0.5, "learning_rate": 0.0006507983362404401, "loss": 2.1331, "step": 4043 }, { "epoch": 0.10852297123228853, "grad_norm": 0.50390625, "learning_rate": 0.0006509593452301087, "loss": 2.1662, "step": 4044 }, { "epoch": 0.10854980678402748, "grad_norm": 0.46484375, "learning_rate": 0.0006511203542197771, "loss": 1.9688, "step": 4045 }, { "epoch": 0.10857664233576643, "grad_norm": 0.494140625, "learning_rate": 0.0006512813632094458, "loss": 2.0555, "step": 4046 }, { "epoch": 0.10860347788750536, "grad_norm": 0.46484375, "learning_rate": 0.0006514423721991143, "loss": 1.9652, "step": 4047 }, { "epoch": 0.10863031343924431, "grad_norm": 0.46875, "learning_rate": 0.000651603381188783, "loss": 2.0886, "step": 4048 }, { "epoch": 0.10865714899098325, "grad_norm": 0.498046875, "learning_rate": 0.0006517643901784515, "loss": 2.1001, "step": 4049 }, { "epoch": 0.1086839845427222, "grad_norm": 0.486328125, "learning_rate": 0.0006519253991681201, "loss": 2.1149, "step": 4050 }, { "epoch": 0.10871082009446115, "grad_norm": 0.482421875, "learning_rate": 0.0006520864081577888, "loss": 2.1436, "step": 4051 }, { "epoch": 0.10873765564620008, "grad_norm": 0.48046875, "learning_rate": 0.0006522474171474573, "loss": 2.0487, "step": 4052 }, { "epoch": 0.10876449119793903, "grad_norm": 0.486328125, "learning_rate": 0.000652408426137126, "loss": 2.0944, "step": 4053 }, { "epoch": 0.10879132674967798, "grad_norm": 0.5, "learning_rate": 0.0006525694351267945, "loss": 2.0868, "step": 4054 }, { "epoch": 0.10881816230141692, "grad_norm": 0.474609375, "learning_rate": 0.0006527304441164631, "loss": 2.0319, "step": 4055 }, { "epoch": 0.10884499785315586, "grad_norm": 0.47265625, "learning_rate": 0.0006528914531061318, "loss": 2.0657, "step": 4056 }, { "epoch": 0.1088718334048948, "grad_norm": 0.484375, "learning_rate": 0.0006530524620958003, "loss": 2.1409, "step": 4057 }, { "epoch": 0.10889866895663375, "grad_norm": 0.47265625, "learning_rate": 0.000653213471085469, "loss": 2.0571, "step": 4058 }, { "epoch": 0.1089255045083727, "grad_norm": 0.48828125, "learning_rate": 0.0006533744800751375, "loss": 2.0856, "step": 4059 }, { "epoch": 0.10895234006011163, "grad_norm": 0.46484375, "learning_rate": 0.000653535489064806, "loss": 1.9879, "step": 4060 }, { "epoch": 0.10897917561185058, "grad_norm": 0.49609375, "learning_rate": 0.0006536964980544746, "loss": 2.1787, "step": 4061 }, { "epoch": 0.10900601116358952, "grad_norm": 0.474609375, "learning_rate": 0.0006538575070441432, "loss": 2.0801, "step": 4062 }, { "epoch": 0.10903284671532847, "grad_norm": 0.478515625, "learning_rate": 0.0006540185160338118, "loss": 2.0042, "step": 4063 }, { "epoch": 0.10905968226706741, "grad_norm": 0.50390625, "learning_rate": 0.0006541795250234804, "loss": 2.1021, "step": 4064 }, { "epoch": 0.10908651781880635, "grad_norm": 0.474609375, "learning_rate": 0.000654340534013149, "loss": 2.0787, "step": 4065 }, { "epoch": 0.1091133533705453, "grad_norm": 0.5078125, "learning_rate": 0.0006545015430028176, "loss": 2.0727, "step": 4066 }, { "epoch": 0.10914018892228425, "grad_norm": 0.482421875, "learning_rate": 0.0006546625519924862, "loss": 2.0378, "step": 4067 }, { "epoch": 0.10916702447402318, "grad_norm": 0.4765625, "learning_rate": 0.0006548235609821547, "loss": 2.0455, "step": 4068 }, { "epoch": 0.10919386002576213, "grad_norm": 0.482421875, "learning_rate": 0.0006549845699718234, "loss": 2.0663, "step": 4069 }, { "epoch": 0.10922069557750107, "grad_norm": 0.49609375, "learning_rate": 0.000655145578961492, "loss": 2.1566, "step": 4070 }, { "epoch": 0.10924753112924002, "grad_norm": 0.48046875, "learning_rate": 0.0006553065879511606, "loss": 2.0673, "step": 4071 }, { "epoch": 0.10927436668097897, "grad_norm": 0.470703125, "learning_rate": 0.0006554675969408292, "loss": 2.045, "step": 4072 }, { "epoch": 0.1093012022327179, "grad_norm": 0.486328125, "learning_rate": 0.0006556286059304977, "loss": 2.0235, "step": 4073 }, { "epoch": 0.10932803778445685, "grad_norm": 0.490234375, "learning_rate": 0.0006557896149201664, "loss": 2.0428, "step": 4074 }, { "epoch": 0.1093548733361958, "grad_norm": 0.5, "learning_rate": 0.000655950623909835, "loss": 2.1564, "step": 4075 }, { "epoch": 0.10938170888793473, "grad_norm": 0.48828125, "learning_rate": 0.0006561116328995034, "loss": 2.0872, "step": 4076 }, { "epoch": 0.10940854443967368, "grad_norm": 0.47265625, "learning_rate": 0.0006562726418891721, "loss": 1.9427, "step": 4077 }, { "epoch": 0.10943537999141262, "grad_norm": 0.49609375, "learning_rate": 0.0006564336508788406, "loss": 2.0599, "step": 4078 }, { "epoch": 0.10946221554315157, "grad_norm": 0.5078125, "learning_rate": 0.0006565946598685093, "loss": 2.083, "step": 4079 }, { "epoch": 0.10948905109489052, "grad_norm": 0.474609375, "learning_rate": 0.0006567556688581778, "loss": 2.0277, "step": 4080 }, { "epoch": 0.10951588664662945, "grad_norm": 0.5, "learning_rate": 0.0006569166778478464, "loss": 2.0967, "step": 4081 }, { "epoch": 0.1095427221983684, "grad_norm": 0.47265625, "learning_rate": 0.000657077686837515, "loss": 2.0094, "step": 4082 }, { "epoch": 0.10956955775010734, "grad_norm": 0.484375, "learning_rate": 0.0006572386958271836, "loss": 2.0549, "step": 4083 }, { "epoch": 0.10959639330184628, "grad_norm": 0.498046875, "learning_rate": 0.0006573997048168523, "loss": 2.1017, "step": 4084 }, { "epoch": 0.10962322885358523, "grad_norm": 0.4921875, "learning_rate": 0.0006575607138065208, "loss": 2.1383, "step": 4085 }, { "epoch": 0.10965006440532417, "grad_norm": 0.498046875, "learning_rate": 0.0006577217227961894, "loss": 1.9848, "step": 4086 }, { "epoch": 0.10967689995706312, "grad_norm": 0.47265625, "learning_rate": 0.000657882731785858, "loss": 2.0406, "step": 4087 }, { "epoch": 0.10970373550880207, "grad_norm": 0.48046875, "learning_rate": 0.0006580437407755266, "loss": 2.0153, "step": 4088 }, { "epoch": 0.109730571060541, "grad_norm": 0.486328125, "learning_rate": 0.0006582047497651953, "loss": 2.0906, "step": 4089 }, { "epoch": 0.10975740661227995, "grad_norm": 0.49609375, "learning_rate": 0.0006583657587548638, "loss": 2.1695, "step": 4090 }, { "epoch": 0.10978424216401889, "grad_norm": 0.470703125, "learning_rate": 0.0006585267677445323, "loss": 1.9644, "step": 4091 }, { "epoch": 0.10981107771575783, "grad_norm": 0.4765625, "learning_rate": 0.0006586877767342009, "loss": 2.0471, "step": 4092 }, { "epoch": 0.10983791326749678, "grad_norm": 0.490234375, "learning_rate": 0.0006588487857238695, "loss": 2.0581, "step": 4093 }, { "epoch": 0.10986474881923572, "grad_norm": 0.47265625, "learning_rate": 0.000659009794713538, "loss": 2.0296, "step": 4094 }, { "epoch": 0.10989158437097467, "grad_norm": 0.50390625, "learning_rate": 0.0006591708037032067, "loss": 2.1345, "step": 4095 }, { "epoch": 0.10991841992271362, "grad_norm": 0.5078125, "learning_rate": 0.0006593318126928753, "loss": 2.1781, "step": 4096 }, { "epoch": 0.10994525547445255, "grad_norm": 0.494140625, "learning_rate": 0.0006594928216825439, "loss": 2.1503, "step": 4097 }, { "epoch": 0.1099720910261915, "grad_norm": 0.478515625, "learning_rate": 0.0006596538306722125, "loss": 2.0473, "step": 4098 }, { "epoch": 0.10999892657793044, "grad_norm": 0.48046875, "learning_rate": 0.000659814839661881, "loss": 2.1036, "step": 4099 }, { "epoch": 0.11002576212966939, "grad_norm": 0.4921875, "learning_rate": 0.0006599758486515497, "loss": 2.0702, "step": 4100 }, { "epoch": 0.11005259768140833, "grad_norm": 0.490234375, "learning_rate": 0.0006601368576412182, "loss": 2.1622, "step": 4101 }, { "epoch": 0.11007943323314727, "grad_norm": 0.5, "learning_rate": 0.0006602978666308869, "loss": 2.0848, "step": 4102 }, { "epoch": 0.11010626878488622, "grad_norm": 0.48828125, "learning_rate": 0.0006604588756205555, "loss": 2.0721, "step": 4103 }, { "epoch": 0.11013310433662517, "grad_norm": 0.48046875, "learning_rate": 0.000660619884610224, "loss": 2.0771, "step": 4104 }, { "epoch": 0.1101599398883641, "grad_norm": 0.494140625, "learning_rate": 0.0006607808935998927, "loss": 2.1338, "step": 4105 }, { "epoch": 0.11018677544010305, "grad_norm": 0.4765625, "learning_rate": 0.0006609419025895612, "loss": 2.0555, "step": 4106 }, { "epoch": 0.11021361099184199, "grad_norm": 0.474609375, "learning_rate": 0.0006611029115792297, "loss": 2.0492, "step": 4107 }, { "epoch": 0.11024044654358094, "grad_norm": 0.498046875, "learning_rate": 0.0006612639205688983, "loss": 2.0412, "step": 4108 }, { "epoch": 0.11026728209531989, "grad_norm": 0.4921875, "learning_rate": 0.0006614249295585669, "loss": 2.0971, "step": 4109 }, { "epoch": 0.11029411764705882, "grad_norm": 0.486328125, "learning_rate": 0.0006615859385482356, "loss": 2.1216, "step": 4110 }, { "epoch": 0.11032095319879777, "grad_norm": 0.4765625, "learning_rate": 0.0006617469475379041, "loss": 2.087, "step": 4111 }, { "epoch": 0.1103477887505367, "grad_norm": 0.48046875, "learning_rate": 0.0006619079565275727, "loss": 2.0036, "step": 4112 }, { "epoch": 0.11037462430227565, "grad_norm": 0.48828125, "learning_rate": 0.0006620689655172413, "loss": 2.1702, "step": 4113 }, { "epoch": 0.1104014598540146, "grad_norm": 0.486328125, "learning_rate": 0.0006622299745069099, "loss": 2.0855, "step": 4114 }, { "epoch": 0.11042829540575354, "grad_norm": 0.494140625, "learning_rate": 0.0006623909834965786, "loss": 2.074, "step": 4115 }, { "epoch": 0.11045513095749249, "grad_norm": 0.474609375, "learning_rate": 0.0006625519924862471, "loss": 2.0014, "step": 4116 }, { "epoch": 0.11048196650923144, "grad_norm": 0.46875, "learning_rate": 0.0006627130014759157, "loss": 2.0636, "step": 4117 }, { "epoch": 0.11050880206097037, "grad_norm": 0.486328125, "learning_rate": 0.0006628740104655843, "loss": 2.0997, "step": 4118 }, { "epoch": 0.11053563761270932, "grad_norm": 0.49609375, "learning_rate": 0.0006630350194552529, "loss": 2.0061, "step": 4119 }, { "epoch": 0.11056247316444826, "grad_norm": 0.47265625, "learning_rate": 0.0006631960284449215, "loss": 1.9798, "step": 4120 }, { "epoch": 0.1105893087161872, "grad_norm": 0.494140625, "learning_rate": 0.0006633570374345901, "loss": 2.1171, "step": 4121 }, { "epoch": 0.11061614426792615, "grad_norm": 0.466796875, "learning_rate": 0.0006635180464242585, "loss": 1.997, "step": 4122 }, { "epoch": 0.11064297981966509, "grad_norm": 0.48828125, "learning_rate": 0.0006636790554139272, "loss": 2.0035, "step": 4123 }, { "epoch": 0.11066981537140404, "grad_norm": 0.482421875, "learning_rate": 0.0006638400644035958, "loss": 2.02, "step": 4124 }, { "epoch": 0.11069665092314299, "grad_norm": 0.498046875, "learning_rate": 0.0006640010733932643, "loss": 2.1031, "step": 4125 }, { "epoch": 0.11072348647488192, "grad_norm": 0.484375, "learning_rate": 0.000664162082382933, "loss": 2.0472, "step": 4126 }, { "epoch": 0.11075032202662087, "grad_norm": 0.5, "learning_rate": 0.0006643230913726015, "loss": 2.1514, "step": 4127 }, { "epoch": 0.1107771575783598, "grad_norm": 0.462890625, "learning_rate": 0.0006644841003622702, "loss": 1.9833, "step": 4128 }, { "epoch": 0.11080399313009875, "grad_norm": 0.47265625, "learning_rate": 0.0006646451093519388, "loss": 2.0325, "step": 4129 }, { "epoch": 0.1108308286818377, "grad_norm": 0.48046875, "learning_rate": 0.0006648061183416073, "loss": 2.0817, "step": 4130 }, { "epoch": 0.11085766423357664, "grad_norm": 0.53515625, "learning_rate": 0.000664967127331276, "loss": 2.1667, "step": 4131 }, { "epoch": 0.11088449978531559, "grad_norm": 0.51171875, "learning_rate": 0.0006651281363209445, "loss": 2.1417, "step": 4132 }, { "epoch": 0.11091133533705452, "grad_norm": 0.50390625, "learning_rate": 0.0006652891453106132, "loss": 2.1143, "step": 4133 }, { "epoch": 0.11093817088879347, "grad_norm": 0.48828125, "learning_rate": 0.0006654501543002817, "loss": 2.0506, "step": 4134 }, { "epoch": 0.11096500644053242, "grad_norm": 0.50390625, "learning_rate": 0.0006656111632899503, "loss": 2.1271, "step": 4135 }, { "epoch": 0.11099184199227136, "grad_norm": 0.51171875, "learning_rate": 0.000665772172279619, "loss": 2.065, "step": 4136 }, { "epoch": 0.1110186775440103, "grad_norm": 0.5, "learning_rate": 0.0006659331812692875, "loss": 2.0297, "step": 4137 }, { "epoch": 0.11104551309574925, "grad_norm": 0.51171875, "learning_rate": 0.000666094190258956, "loss": 2.164, "step": 4138 }, { "epoch": 0.11107234864748819, "grad_norm": 0.49609375, "learning_rate": 0.0006662551992486246, "loss": 2.0205, "step": 4139 }, { "epoch": 0.11109918419922714, "grad_norm": 0.50390625, "learning_rate": 0.0006664162082382932, "loss": 2.0979, "step": 4140 }, { "epoch": 0.11112601975096607, "grad_norm": 0.474609375, "learning_rate": 0.0006665772172279618, "loss": 2.0451, "step": 4141 }, { "epoch": 0.11115285530270502, "grad_norm": 0.490234375, "learning_rate": 0.0006667382262176304, "loss": 1.9859, "step": 4142 }, { "epoch": 0.11117969085444397, "grad_norm": 0.4765625, "learning_rate": 0.000666899235207299, "loss": 2.0203, "step": 4143 }, { "epoch": 0.1112065264061829, "grad_norm": 0.48828125, "learning_rate": 0.0006670602441969676, "loss": 2.1714, "step": 4144 }, { "epoch": 0.11123336195792186, "grad_norm": 0.466796875, "learning_rate": 0.0006672212531866362, "loss": 1.983, "step": 4145 }, { "epoch": 0.1112601975096608, "grad_norm": 0.50390625, "learning_rate": 0.0006673822621763048, "loss": 2.0726, "step": 4146 }, { "epoch": 0.11128703306139974, "grad_norm": 0.46875, "learning_rate": 0.0006675432711659734, "loss": 2.0194, "step": 4147 }, { "epoch": 0.11131386861313869, "grad_norm": 0.48828125, "learning_rate": 0.000667704280155642, "loss": 2.0665, "step": 4148 }, { "epoch": 0.11134070416487762, "grad_norm": 0.498046875, "learning_rate": 0.0006678652891453106, "loss": 2.094, "step": 4149 }, { "epoch": 0.11136753971661657, "grad_norm": 0.498046875, "learning_rate": 0.0006680262981349792, "loss": 2.1279, "step": 4150 }, { "epoch": 0.11139437526835552, "grad_norm": 0.47265625, "learning_rate": 0.0006681873071246478, "loss": 2.0197, "step": 4151 }, { "epoch": 0.11142121082009446, "grad_norm": 0.48046875, "learning_rate": 0.0006683483161143164, "loss": 2.0733, "step": 4152 }, { "epoch": 0.1114480463718334, "grad_norm": 0.482421875, "learning_rate": 0.0006685093251039848, "loss": 2.0286, "step": 4153 }, { "epoch": 0.11147488192357236, "grad_norm": 0.482421875, "learning_rate": 0.0006686703340936535, "loss": 2.0277, "step": 4154 }, { "epoch": 0.11150171747531129, "grad_norm": 0.470703125, "learning_rate": 0.000668831343083322, "loss": 2.0468, "step": 4155 }, { "epoch": 0.11152855302705024, "grad_norm": 0.474609375, "learning_rate": 0.0006689923520729906, "loss": 2.0619, "step": 4156 }, { "epoch": 0.11155538857878917, "grad_norm": 0.484375, "learning_rate": 0.0006691533610626593, "loss": 2.1536, "step": 4157 }, { "epoch": 0.11158222413052812, "grad_norm": 0.48828125, "learning_rate": 0.0006693143700523278, "loss": 2.1261, "step": 4158 }, { "epoch": 0.11160905968226707, "grad_norm": 0.48046875, "learning_rate": 0.0006694753790419965, "loss": 2.067, "step": 4159 }, { "epoch": 0.11163589523400601, "grad_norm": 0.474609375, "learning_rate": 0.000669636388031665, "loss": 1.9874, "step": 4160 }, { "epoch": 0.11166273078574496, "grad_norm": 0.4921875, "learning_rate": 0.0006697973970213336, "loss": 2.1412, "step": 4161 }, { "epoch": 0.11168956633748389, "grad_norm": 0.484375, "learning_rate": 0.0006699584060110023, "loss": 2.0927, "step": 4162 }, { "epoch": 0.11171640188922284, "grad_norm": 0.490234375, "learning_rate": 0.0006701194150006708, "loss": 2.019, "step": 4163 }, { "epoch": 0.11174323744096179, "grad_norm": 0.4765625, "learning_rate": 0.0006702804239903395, "loss": 2.1753, "step": 4164 }, { "epoch": 0.11177007299270073, "grad_norm": 0.48046875, "learning_rate": 0.000670441432980008, "loss": 2.1306, "step": 4165 }, { "epoch": 0.11179690854443967, "grad_norm": 0.474609375, "learning_rate": 0.0006706024419696766, "loss": 2.0902, "step": 4166 }, { "epoch": 0.11182374409617862, "grad_norm": 0.47265625, "learning_rate": 0.0006707634509593453, "loss": 2.0359, "step": 4167 }, { "epoch": 0.11185057964791756, "grad_norm": 0.48046875, "learning_rate": 0.0006709244599490138, "loss": 2.0752, "step": 4168 }, { "epoch": 0.11187741519965651, "grad_norm": 0.47265625, "learning_rate": 0.0006710854689386822, "loss": 2.0031, "step": 4169 }, { "epoch": 0.11190425075139544, "grad_norm": 0.482421875, "learning_rate": 0.0006712464779283509, "loss": 2.0261, "step": 4170 }, { "epoch": 0.11193108630313439, "grad_norm": 0.47265625, "learning_rate": 0.0006714074869180195, "loss": 2.0197, "step": 4171 }, { "epoch": 0.11195792185487334, "grad_norm": 0.486328125, "learning_rate": 0.0006715684959076881, "loss": 2.0985, "step": 4172 }, { "epoch": 0.11198475740661228, "grad_norm": 0.470703125, "learning_rate": 0.0006717295048973567, "loss": 2.048, "step": 4173 }, { "epoch": 0.11201159295835122, "grad_norm": 0.482421875, "learning_rate": 0.0006718905138870252, "loss": 2.079, "step": 4174 }, { "epoch": 0.11203842851009017, "grad_norm": 0.45703125, "learning_rate": 0.0006720515228766939, "loss": 2.0405, "step": 4175 }, { "epoch": 0.11206526406182911, "grad_norm": 0.49609375, "learning_rate": 0.0006722125318663625, "loss": 2.0872, "step": 4176 }, { "epoch": 0.11209209961356806, "grad_norm": 0.478515625, "learning_rate": 0.0006723735408560311, "loss": 2.1409, "step": 4177 }, { "epoch": 0.112118935165307, "grad_norm": 0.482421875, "learning_rate": 0.0006725345498456997, "loss": 2.0413, "step": 4178 }, { "epoch": 0.11214577071704594, "grad_norm": 0.46484375, "learning_rate": 0.0006726955588353682, "loss": 2.0099, "step": 4179 }, { "epoch": 0.11217260626878489, "grad_norm": 0.47265625, "learning_rate": 0.0006728565678250369, "loss": 2.0038, "step": 4180 }, { "epoch": 0.11219944182052383, "grad_norm": 0.4765625, "learning_rate": 0.0006730175768147055, "loss": 2.1292, "step": 4181 }, { "epoch": 0.11222627737226278, "grad_norm": 0.466796875, "learning_rate": 0.0006731785858043741, "loss": 2.0204, "step": 4182 }, { "epoch": 0.11225311292400171, "grad_norm": 0.462890625, "learning_rate": 0.0006733395947940427, "loss": 2.0326, "step": 4183 }, { "epoch": 0.11227994847574066, "grad_norm": 0.498046875, "learning_rate": 0.0006735006037837111, "loss": 2.1315, "step": 4184 }, { "epoch": 0.11230678402747961, "grad_norm": 0.486328125, "learning_rate": 0.0006736616127733798, "loss": 2.0396, "step": 4185 }, { "epoch": 0.11233361957921854, "grad_norm": 0.4609375, "learning_rate": 0.0006738226217630483, "loss": 1.9851, "step": 4186 }, { "epoch": 0.11236045513095749, "grad_norm": 0.474609375, "learning_rate": 0.0006739836307527169, "loss": 2.0493, "step": 4187 }, { "epoch": 0.11238729068269644, "grad_norm": 0.48046875, "learning_rate": 0.0006741446397423855, "loss": 2.0438, "step": 4188 }, { "epoch": 0.11241412623443538, "grad_norm": 0.478515625, "learning_rate": 0.0006743056487320541, "loss": 2.0872, "step": 4189 }, { "epoch": 0.11244096178617433, "grad_norm": 0.466796875, "learning_rate": 0.0006744666577217228, "loss": 1.9514, "step": 4190 }, { "epoch": 0.11246779733791326, "grad_norm": 0.4609375, "learning_rate": 0.0006746276667113913, "loss": 1.9871, "step": 4191 }, { "epoch": 0.11249463288965221, "grad_norm": 0.466796875, "learning_rate": 0.0006747886757010599, "loss": 2.0478, "step": 4192 }, { "epoch": 0.11252146844139116, "grad_norm": 0.4921875, "learning_rate": 0.0006749496846907285, "loss": 2.0205, "step": 4193 }, { "epoch": 0.1125483039931301, "grad_norm": 0.474609375, "learning_rate": 0.0006751106936803971, "loss": 2.0996, "step": 4194 }, { "epoch": 0.11257513954486904, "grad_norm": 0.462890625, "learning_rate": 0.0006752717026700658, "loss": 1.9923, "step": 4195 }, { "epoch": 0.11260197509660799, "grad_norm": 0.48046875, "learning_rate": 0.0006754327116597343, "loss": 2.0568, "step": 4196 }, { "epoch": 0.11262881064834693, "grad_norm": 0.46875, "learning_rate": 0.0006755937206494029, "loss": 1.9667, "step": 4197 }, { "epoch": 0.11265564620008588, "grad_norm": 0.490234375, "learning_rate": 0.0006757547296390715, "loss": 2.0955, "step": 4198 }, { "epoch": 0.11268248175182481, "grad_norm": 0.46875, "learning_rate": 0.0006759157386287401, "loss": 2.0202, "step": 4199 }, { "epoch": 0.11270931730356376, "grad_norm": 0.48828125, "learning_rate": 0.0006760767476184085, "loss": 2.0559, "step": 4200 }, { "epoch": 0.11273615285530271, "grad_norm": 0.470703125, "learning_rate": 0.0006762377566080772, "loss": 2.0708, "step": 4201 }, { "epoch": 0.11276298840704164, "grad_norm": 0.484375, "learning_rate": 0.0006763987655977457, "loss": 1.9986, "step": 4202 }, { "epoch": 0.1127898239587806, "grad_norm": 0.47265625, "learning_rate": 0.0006765597745874144, "loss": 2.0206, "step": 4203 }, { "epoch": 0.11281665951051954, "grad_norm": 0.44921875, "learning_rate": 0.000676720783577083, "loss": 1.9669, "step": 4204 }, { "epoch": 0.11284349506225848, "grad_norm": 0.458984375, "learning_rate": 0.0006768817925667515, "loss": 2.016, "step": 4205 }, { "epoch": 0.11287033061399743, "grad_norm": 0.46484375, "learning_rate": 0.0006770428015564202, "loss": 2.0436, "step": 4206 }, { "epoch": 0.11289716616573636, "grad_norm": 0.4765625, "learning_rate": 0.0006772038105460887, "loss": 2.0292, "step": 4207 }, { "epoch": 0.11292400171747531, "grad_norm": 0.451171875, "learning_rate": 0.0006773648195357574, "loss": 1.9043, "step": 4208 }, { "epoch": 0.11295083726921426, "grad_norm": 0.470703125, "learning_rate": 0.000677525828525426, "loss": 2.0954, "step": 4209 }, { "epoch": 0.1129776728209532, "grad_norm": 0.462890625, "learning_rate": 0.0006776868375150945, "loss": 2.0533, "step": 4210 }, { "epoch": 0.11300450837269214, "grad_norm": 0.4609375, "learning_rate": 0.0006778478465047632, "loss": 2.0429, "step": 4211 }, { "epoch": 0.11303134392443108, "grad_norm": 0.470703125, "learning_rate": 0.0006780088554944317, "loss": 2.0096, "step": 4212 }, { "epoch": 0.11305817947617003, "grad_norm": 0.482421875, "learning_rate": 0.0006781698644841004, "loss": 2.0678, "step": 4213 }, { "epoch": 0.11308501502790898, "grad_norm": 0.478515625, "learning_rate": 0.000678330873473769, "loss": 1.9467, "step": 4214 }, { "epoch": 0.11311185057964791, "grad_norm": 0.484375, "learning_rate": 0.0006784918824634374, "loss": 1.9806, "step": 4215 }, { "epoch": 0.11313868613138686, "grad_norm": 0.484375, "learning_rate": 0.0006786528914531061, "loss": 2.075, "step": 4216 }, { "epoch": 0.11316552168312581, "grad_norm": 0.462890625, "learning_rate": 0.0006788139004427746, "loss": 1.9296, "step": 4217 }, { "epoch": 0.11319235723486475, "grad_norm": 0.48046875, "learning_rate": 0.0006789749094324432, "loss": 2.0704, "step": 4218 }, { "epoch": 0.1132191927866037, "grad_norm": 0.47265625, "learning_rate": 0.0006791359184221118, "loss": 2.0051, "step": 4219 }, { "epoch": 0.11324602833834263, "grad_norm": 0.482421875, "learning_rate": 0.0006792969274117804, "loss": 2.1156, "step": 4220 }, { "epoch": 0.11327286389008158, "grad_norm": 0.482421875, "learning_rate": 0.000679457936401449, "loss": 2.178, "step": 4221 }, { "epoch": 0.11329969944182053, "grad_norm": 0.48828125, "learning_rate": 0.0006796189453911176, "loss": 2.1296, "step": 4222 }, { "epoch": 0.11332653499355946, "grad_norm": 0.474609375, "learning_rate": 0.0006797799543807862, "loss": 2.0859, "step": 4223 }, { "epoch": 0.11335337054529841, "grad_norm": 0.474609375, "learning_rate": 0.0006799409633704548, "loss": 2.0926, "step": 4224 }, { "epoch": 0.11338020609703736, "grad_norm": 0.48046875, "learning_rate": 0.0006801019723601234, "loss": 2.0932, "step": 4225 }, { "epoch": 0.1134070416487763, "grad_norm": 0.466796875, "learning_rate": 0.000680262981349792, "loss": 2.0198, "step": 4226 }, { "epoch": 0.11343387720051525, "grad_norm": 0.4609375, "learning_rate": 0.0006804239903394606, "loss": 1.9946, "step": 4227 }, { "epoch": 0.11346071275225418, "grad_norm": 0.4765625, "learning_rate": 0.0006805849993291292, "loss": 2.0856, "step": 4228 }, { "epoch": 0.11348754830399313, "grad_norm": 0.4609375, "learning_rate": 0.0006807460083187978, "loss": 1.9818, "step": 4229 }, { "epoch": 0.11351438385573208, "grad_norm": 0.484375, "learning_rate": 0.0006809070173084664, "loss": 2.0771, "step": 4230 }, { "epoch": 0.11354121940747101, "grad_norm": 0.482421875, "learning_rate": 0.0006810680262981348, "loss": 2.1105, "step": 4231 }, { "epoch": 0.11356805495920996, "grad_norm": 0.47265625, "learning_rate": 0.0006812290352878035, "loss": 2.0214, "step": 4232 }, { "epoch": 0.1135948905109489, "grad_norm": 0.474609375, "learning_rate": 0.000681390044277472, "loss": 2.087, "step": 4233 }, { "epoch": 0.11362172606268785, "grad_norm": 0.478515625, "learning_rate": 0.0006815510532671407, "loss": 2.0882, "step": 4234 }, { "epoch": 0.1136485616144268, "grad_norm": 0.48046875, "learning_rate": 0.0006817120622568093, "loss": 2.1134, "step": 4235 }, { "epoch": 0.11367539716616573, "grad_norm": 0.458984375, "learning_rate": 0.0006818730712464778, "loss": 1.9521, "step": 4236 }, { "epoch": 0.11370223271790468, "grad_norm": 0.470703125, "learning_rate": 0.0006820340802361465, "loss": 2.0693, "step": 4237 }, { "epoch": 0.11372906826964363, "grad_norm": 0.466796875, "learning_rate": 0.000682195089225815, "loss": 1.9859, "step": 4238 }, { "epoch": 0.11375590382138256, "grad_norm": 0.45703125, "learning_rate": 0.0006823560982154837, "loss": 2.0667, "step": 4239 }, { "epoch": 0.11378273937312151, "grad_norm": 0.455078125, "learning_rate": 0.0006825171072051522, "loss": 1.9796, "step": 4240 }, { "epoch": 0.11380957492486045, "grad_norm": 0.5, "learning_rate": 0.0006826781161948208, "loss": 2.1164, "step": 4241 }, { "epoch": 0.1138364104765994, "grad_norm": 0.46484375, "learning_rate": 0.0006828391251844895, "loss": 2.0415, "step": 4242 }, { "epoch": 0.11386324602833835, "grad_norm": 0.46875, "learning_rate": 0.000683000134174158, "loss": 2.0361, "step": 4243 }, { "epoch": 0.11389008158007728, "grad_norm": 0.486328125, "learning_rate": 0.0006831611431638267, "loss": 2.1465, "step": 4244 }, { "epoch": 0.11391691713181623, "grad_norm": 0.46484375, "learning_rate": 0.0006833221521534952, "loss": 2.0294, "step": 4245 }, { "epoch": 0.11394375268355518, "grad_norm": 0.470703125, "learning_rate": 0.0006834831611431637, "loss": 2.0383, "step": 4246 }, { "epoch": 0.11397058823529412, "grad_norm": 0.494140625, "learning_rate": 0.0006836441701328323, "loss": 2.0613, "step": 4247 }, { "epoch": 0.11399742378703306, "grad_norm": 0.46875, "learning_rate": 0.0006838051791225009, "loss": 2.0308, "step": 4248 }, { "epoch": 0.114024259338772, "grad_norm": 0.455078125, "learning_rate": 0.0006839661881121695, "loss": 1.9872, "step": 4249 }, { "epoch": 0.11405109489051095, "grad_norm": 0.474609375, "learning_rate": 0.0006841271971018381, "loss": 2.0627, "step": 4250 }, { "epoch": 0.1140779304422499, "grad_norm": 0.44921875, "learning_rate": 0.0006842882060915067, "loss": 1.9931, "step": 4251 }, { "epoch": 0.11410476599398883, "grad_norm": 0.4609375, "learning_rate": 0.0006844492150811753, "loss": 1.9785, "step": 4252 }, { "epoch": 0.11413160154572778, "grad_norm": 0.470703125, "learning_rate": 0.0006846102240708439, "loss": 2.0176, "step": 4253 }, { "epoch": 0.11415843709746673, "grad_norm": 0.478515625, "learning_rate": 0.0006847712330605124, "loss": 1.9594, "step": 4254 }, { "epoch": 0.11418527264920567, "grad_norm": 0.458984375, "learning_rate": 0.0006849322420501811, "loss": 2.0083, "step": 4255 }, { "epoch": 0.11421210820094461, "grad_norm": 0.462890625, "learning_rate": 0.0006850932510398497, "loss": 2.1161, "step": 4256 }, { "epoch": 0.11423894375268355, "grad_norm": 0.47265625, "learning_rate": 0.0006852542600295183, "loss": 2.0129, "step": 4257 }, { "epoch": 0.1142657793044225, "grad_norm": 0.466796875, "learning_rate": 0.0006854152690191869, "loss": 2.0477, "step": 4258 }, { "epoch": 0.11429261485616145, "grad_norm": 0.47265625, "learning_rate": 0.0006855762780088554, "loss": 2.0832, "step": 4259 }, { "epoch": 0.11431945040790038, "grad_norm": 0.462890625, "learning_rate": 0.0006857372869985241, "loss": 2.0107, "step": 4260 }, { "epoch": 0.11434628595963933, "grad_norm": 0.4609375, "learning_rate": 0.0006858982959881925, "loss": 2.0141, "step": 4261 }, { "epoch": 0.11437312151137827, "grad_norm": 0.458984375, "learning_rate": 0.0006860593049778611, "loss": 2.0148, "step": 4262 }, { "epoch": 0.11439995706311722, "grad_norm": 0.4765625, "learning_rate": 0.0006862203139675298, "loss": 2.0424, "step": 4263 }, { "epoch": 0.11442679261485617, "grad_norm": 0.48046875, "learning_rate": 0.0006863813229571983, "loss": 2.0894, "step": 4264 }, { "epoch": 0.1144536281665951, "grad_norm": 0.4765625, "learning_rate": 0.000686542331946867, "loss": 2.0798, "step": 4265 }, { "epoch": 0.11448046371833405, "grad_norm": 0.451171875, "learning_rate": 0.0006867033409365355, "loss": 2.0208, "step": 4266 }, { "epoch": 0.114507299270073, "grad_norm": 0.458984375, "learning_rate": 0.0006868643499262041, "loss": 1.9776, "step": 4267 }, { "epoch": 0.11453413482181193, "grad_norm": 0.4765625, "learning_rate": 0.0006870253589158728, "loss": 2.0456, "step": 4268 }, { "epoch": 0.11456097037355088, "grad_norm": 0.4609375, "learning_rate": 0.0006871863679055413, "loss": 2.0513, "step": 4269 }, { "epoch": 0.11458780592528982, "grad_norm": 0.482421875, "learning_rate": 0.00068734737689521, "loss": 2.0516, "step": 4270 }, { "epoch": 0.11461464147702877, "grad_norm": 0.470703125, "learning_rate": 0.0006875083858848785, "loss": 1.9834, "step": 4271 }, { "epoch": 0.11464147702876772, "grad_norm": 0.470703125, "learning_rate": 0.0006876693948745471, "loss": 2.081, "step": 4272 }, { "epoch": 0.11466831258050665, "grad_norm": 0.490234375, "learning_rate": 0.0006878304038642157, "loss": 2.0323, "step": 4273 }, { "epoch": 0.1146951481322456, "grad_norm": 0.484375, "learning_rate": 0.0006879914128538843, "loss": 2.1386, "step": 4274 }, { "epoch": 0.11472198368398455, "grad_norm": 0.46484375, "learning_rate": 0.000688152421843553, "loss": 1.9628, "step": 4275 }, { "epoch": 0.11474881923572348, "grad_norm": 0.484375, "learning_rate": 0.0006883134308332215, "loss": 2.126, "step": 4276 }, { "epoch": 0.11477565478746243, "grad_norm": 0.4765625, "learning_rate": 0.00068847443982289, "loss": 2.0371, "step": 4277 }, { "epoch": 0.11480249033920137, "grad_norm": 0.482421875, "learning_rate": 0.0006886354488125586, "loss": 1.9186, "step": 4278 }, { "epoch": 0.11482932589094032, "grad_norm": 0.4609375, "learning_rate": 0.0006887964578022272, "loss": 2.0195, "step": 4279 }, { "epoch": 0.11485616144267927, "grad_norm": 0.453125, "learning_rate": 0.0006889574667918957, "loss": 2.0264, "step": 4280 }, { "epoch": 0.1148829969944182, "grad_norm": 0.470703125, "learning_rate": 0.0006891184757815644, "loss": 1.9802, "step": 4281 }, { "epoch": 0.11490983254615715, "grad_norm": 0.44921875, "learning_rate": 0.000689279484771233, "loss": 2.0623, "step": 4282 }, { "epoch": 0.11493666809789609, "grad_norm": 0.45703125, "learning_rate": 0.0006894404937609016, "loss": 1.9037, "step": 4283 }, { "epoch": 0.11496350364963503, "grad_norm": 0.470703125, "learning_rate": 0.0006896015027505702, "loss": 1.9904, "step": 4284 }, { "epoch": 0.11499033920137398, "grad_norm": 0.482421875, "learning_rate": 0.0006897625117402387, "loss": 2.1029, "step": 4285 }, { "epoch": 0.11501717475311292, "grad_norm": 0.458984375, "learning_rate": 0.0006899235207299074, "loss": 1.9234, "step": 4286 }, { "epoch": 0.11504401030485187, "grad_norm": 0.46875, "learning_rate": 0.000690084529719576, "loss": 2.017, "step": 4287 }, { "epoch": 0.11507084585659082, "grad_norm": 0.48046875, "learning_rate": 0.0006902455387092446, "loss": 2.0692, "step": 4288 }, { "epoch": 0.11509768140832975, "grad_norm": 0.46875, "learning_rate": 0.0006904065476989132, "loss": 1.9911, "step": 4289 }, { "epoch": 0.1151245169600687, "grad_norm": 0.484375, "learning_rate": 0.0006905675566885817, "loss": 2.0974, "step": 4290 }, { "epoch": 0.11515135251180764, "grad_norm": 0.474609375, "learning_rate": 0.0006907285656782504, "loss": 1.9998, "step": 4291 }, { "epoch": 0.11517818806354659, "grad_norm": 0.458984375, "learning_rate": 0.0006908895746679188, "loss": 2.021, "step": 4292 }, { "epoch": 0.11520502361528553, "grad_norm": 0.46484375, "learning_rate": 0.0006910505836575874, "loss": 1.9584, "step": 4293 }, { "epoch": 0.11523185916702447, "grad_norm": 0.462890625, "learning_rate": 0.000691211592647256, "loss": 2.063, "step": 4294 }, { "epoch": 0.11525869471876342, "grad_norm": 0.46484375, "learning_rate": 0.0006913726016369246, "loss": 2.0101, "step": 4295 }, { "epoch": 0.11528553027050237, "grad_norm": 0.47265625, "learning_rate": 0.0006915336106265933, "loss": 1.9884, "step": 4296 }, { "epoch": 0.1153123658222413, "grad_norm": 0.48046875, "learning_rate": 0.0006916946196162618, "loss": 2.0565, "step": 4297 }, { "epoch": 0.11533920137398025, "grad_norm": 0.4609375, "learning_rate": 0.0006918556286059304, "loss": 1.9723, "step": 4298 }, { "epoch": 0.11536603692571919, "grad_norm": 0.453125, "learning_rate": 0.000692016637595599, "loss": 1.9936, "step": 4299 }, { "epoch": 0.11539287247745814, "grad_norm": 0.466796875, "learning_rate": 0.0006921776465852676, "loss": 2.0692, "step": 4300 }, { "epoch": 0.11541970802919709, "grad_norm": 0.46484375, "learning_rate": 0.0006923386555749363, "loss": 1.9906, "step": 4301 }, { "epoch": 0.11544654358093602, "grad_norm": 0.44921875, "learning_rate": 0.0006924996645646048, "loss": 1.9529, "step": 4302 }, { "epoch": 0.11547337913267497, "grad_norm": 0.458984375, "learning_rate": 0.0006926606735542734, "loss": 2.0053, "step": 4303 }, { "epoch": 0.11550021468441392, "grad_norm": 0.45703125, "learning_rate": 0.000692821682543942, "loss": 1.9705, "step": 4304 }, { "epoch": 0.11552705023615285, "grad_norm": 0.46875, "learning_rate": 0.0006929826915336106, "loss": 2.133, "step": 4305 }, { "epoch": 0.1155538857878918, "grad_norm": 0.47265625, "learning_rate": 0.0006931437005232793, "loss": 2.0204, "step": 4306 }, { "epoch": 0.11558072133963074, "grad_norm": 0.45703125, "learning_rate": 0.0006933047095129478, "loss": 1.9904, "step": 4307 }, { "epoch": 0.11560755689136969, "grad_norm": 0.46875, "learning_rate": 0.0006934657185026162, "loss": 2.0206, "step": 4308 }, { "epoch": 0.11563439244310864, "grad_norm": 0.458984375, "learning_rate": 0.0006936267274922849, "loss": 2.0004, "step": 4309 }, { "epoch": 0.11566122799484757, "grad_norm": 0.458984375, "learning_rate": 0.0006937877364819535, "loss": 2.0017, "step": 4310 }, { "epoch": 0.11568806354658652, "grad_norm": 0.478515625, "learning_rate": 0.000693948745471622, "loss": 2.1398, "step": 4311 }, { "epoch": 0.11571489909832545, "grad_norm": 0.466796875, "learning_rate": 0.0006941097544612907, "loss": 2.0115, "step": 4312 }, { "epoch": 0.1157417346500644, "grad_norm": 0.4609375, "learning_rate": 0.0006942707634509592, "loss": 1.949, "step": 4313 }, { "epoch": 0.11576857020180335, "grad_norm": 0.47265625, "learning_rate": 0.0006944317724406279, "loss": 2.1032, "step": 4314 }, { "epoch": 0.11579540575354229, "grad_norm": 0.46875, "learning_rate": 0.0006945927814302965, "loss": 2.0217, "step": 4315 }, { "epoch": 0.11582224130528124, "grad_norm": 0.47265625, "learning_rate": 0.000694753790419965, "loss": 2.0301, "step": 4316 }, { "epoch": 0.11584907685702019, "grad_norm": 0.478515625, "learning_rate": 0.0006949147994096337, "loss": 2.0493, "step": 4317 }, { "epoch": 0.11587591240875912, "grad_norm": 0.470703125, "learning_rate": 0.0006950758083993022, "loss": 2.053, "step": 4318 }, { "epoch": 0.11590274796049807, "grad_norm": 0.455078125, "learning_rate": 0.0006952368173889709, "loss": 1.9152, "step": 4319 }, { "epoch": 0.115929583512237, "grad_norm": 0.45703125, "learning_rate": 0.0006953978263786395, "loss": 1.9966, "step": 4320 }, { "epoch": 0.11595641906397595, "grad_norm": 0.45703125, "learning_rate": 0.000695558835368308, "loss": 1.9702, "step": 4321 }, { "epoch": 0.1159832546157149, "grad_norm": 0.4609375, "learning_rate": 0.0006957198443579767, "loss": 1.9312, "step": 4322 }, { "epoch": 0.11601009016745384, "grad_norm": 0.5703125, "learning_rate": 0.0006958808533476451, "loss": 2.0403, "step": 4323 }, { "epoch": 0.11603692571919279, "grad_norm": 0.58203125, "learning_rate": 0.0006960418623373138, "loss": 1.8155, "step": 4324 }, { "epoch": 0.11606376127093174, "grad_norm": 0.53125, "learning_rate": 0.0006962028713269823, "loss": 1.8089, "step": 4325 }, { "epoch": 0.11609059682267067, "grad_norm": 0.46484375, "learning_rate": 0.0006963638803166509, "loss": 1.6708, "step": 4326 }, { "epoch": 0.11611743237440962, "grad_norm": 0.53125, "learning_rate": 0.0006965248893063195, "loss": 1.6877, "step": 4327 }, { "epoch": 0.11614426792614856, "grad_norm": 0.42578125, "learning_rate": 0.0006966858982959881, "loss": 1.5135, "step": 4328 }, { "epoch": 0.1161711034778875, "grad_norm": 0.44921875, "learning_rate": 0.0006968469072856567, "loss": 1.6691, "step": 4329 }, { "epoch": 0.11619793902962645, "grad_norm": 0.4296875, "learning_rate": 0.0006970079162753253, "loss": 1.5796, "step": 4330 }, { "epoch": 0.11622477458136539, "grad_norm": 0.4296875, "learning_rate": 0.0006971689252649939, "loss": 1.6192, "step": 4331 }, { "epoch": 0.11625161013310434, "grad_norm": 0.439453125, "learning_rate": 0.0006973299342546625, "loss": 1.6941, "step": 4332 }, { "epoch": 0.11627844568484327, "grad_norm": 0.453125, "learning_rate": 0.0006974909432443311, "loss": 1.6868, "step": 4333 }, { "epoch": 0.11630528123658222, "grad_norm": 0.44140625, "learning_rate": 0.0006976519522339997, "loss": 1.6796, "step": 4334 }, { "epoch": 0.11633211678832117, "grad_norm": 0.451171875, "learning_rate": 0.0006978129612236683, "loss": 1.4875, "step": 4335 }, { "epoch": 0.1163589523400601, "grad_norm": 0.455078125, "learning_rate": 0.0006979739702133369, "loss": 1.6928, "step": 4336 }, { "epoch": 0.11638578789179906, "grad_norm": 0.412109375, "learning_rate": 0.0006981349792030055, "loss": 1.5293, "step": 4337 }, { "epoch": 0.116412623443538, "grad_norm": 0.43359375, "learning_rate": 0.0006982959881926741, "loss": 1.5318, "step": 4338 }, { "epoch": 0.11643945899527694, "grad_norm": 0.419921875, "learning_rate": 0.0006984569971823425, "loss": 1.68, "step": 4339 }, { "epoch": 0.11646629454701589, "grad_norm": 0.43359375, "learning_rate": 0.0006986180061720112, "loss": 1.5655, "step": 4340 }, { "epoch": 0.11649313009875482, "grad_norm": 0.421875, "learning_rate": 0.0006987790151616797, "loss": 1.5837, "step": 4341 }, { "epoch": 0.11651996565049377, "grad_norm": 0.439453125, "learning_rate": 0.0006989400241513483, "loss": 1.6844, "step": 4342 }, { "epoch": 0.11654680120223272, "grad_norm": 0.451171875, "learning_rate": 0.000699101033141017, "loss": 1.7015, "step": 4343 }, { "epoch": 0.11657363675397166, "grad_norm": 0.41015625, "learning_rate": 0.0006992620421306855, "loss": 1.5088, "step": 4344 }, { "epoch": 0.1166004723057106, "grad_norm": 0.41015625, "learning_rate": 0.0006994230511203542, "loss": 1.5659, "step": 4345 }, { "epoch": 0.11662730785744956, "grad_norm": 0.435546875, "learning_rate": 0.0006995840601100227, "loss": 1.6329, "step": 4346 }, { "epoch": 0.11665414340918849, "grad_norm": 0.43359375, "learning_rate": 0.0006997450690996913, "loss": 1.7459, "step": 4347 }, { "epoch": 0.11668097896092744, "grad_norm": 0.423828125, "learning_rate": 0.00069990607808936, "loss": 1.6093, "step": 4348 }, { "epoch": 0.11670781451266637, "grad_norm": 0.42578125, "learning_rate": 0.0007000670870790285, "loss": 1.5751, "step": 4349 }, { "epoch": 0.11673465006440532, "grad_norm": 0.41015625, "learning_rate": 0.0007002280960686972, "loss": 1.5454, "step": 4350 }, { "epoch": 0.11676148561614427, "grad_norm": 0.427734375, "learning_rate": 0.0007003891050583657, "loss": 1.6582, "step": 4351 }, { "epoch": 0.11678832116788321, "grad_norm": 0.4296875, "learning_rate": 0.0007005501140480343, "loss": 1.6885, "step": 4352 }, { "epoch": 0.11681515671962216, "grad_norm": 0.443359375, "learning_rate": 0.000700711123037703, "loss": 1.6804, "step": 4353 }, { "epoch": 0.1168419922713611, "grad_norm": 0.419921875, "learning_rate": 0.0007008721320273714, "loss": 1.521, "step": 4354 }, { "epoch": 0.11686882782310004, "grad_norm": 0.400390625, "learning_rate": 0.00070103314101704, "loss": 1.5043, "step": 4355 }, { "epoch": 0.11689566337483899, "grad_norm": 0.439453125, "learning_rate": 0.0007011941500067086, "loss": 1.7059, "step": 4356 }, { "epoch": 0.11692249892657793, "grad_norm": 0.404296875, "learning_rate": 0.0007013551589963772, "loss": 1.595, "step": 4357 }, { "epoch": 0.11694933447831687, "grad_norm": 0.419921875, "learning_rate": 0.0007015161679860458, "loss": 1.6889, "step": 4358 }, { "epoch": 0.11697617003005582, "grad_norm": 0.392578125, "learning_rate": 0.0007016771769757144, "loss": 1.5138, "step": 4359 }, { "epoch": 0.11700300558179476, "grad_norm": 0.416015625, "learning_rate": 0.0007018381859653829, "loss": 1.589, "step": 4360 }, { "epoch": 0.11702984113353371, "grad_norm": 0.423828125, "learning_rate": 0.0007019991949550516, "loss": 1.5966, "step": 4361 }, { "epoch": 0.11705667668527264, "grad_norm": 0.41796875, "learning_rate": 0.0007021602039447202, "loss": 1.575, "step": 4362 }, { "epoch": 0.11708351223701159, "grad_norm": 0.43359375, "learning_rate": 0.0007023212129343888, "loss": 1.6458, "step": 4363 }, { "epoch": 0.11711034778875054, "grad_norm": 0.431640625, "learning_rate": 0.0007024822219240574, "loss": 1.5391, "step": 4364 }, { "epoch": 0.11713718334048948, "grad_norm": 0.416015625, "learning_rate": 0.0007026432309137259, "loss": 1.6121, "step": 4365 }, { "epoch": 0.11716401889222842, "grad_norm": 0.42578125, "learning_rate": 0.0007028042399033946, "loss": 1.654, "step": 4366 }, { "epoch": 0.11719085444396737, "grad_norm": 0.423828125, "learning_rate": 0.0007029652488930632, "loss": 1.6354, "step": 4367 }, { "epoch": 0.11721768999570631, "grad_norm": 0.431640625, "learning_rate": 0.0007031262578827318, "loss": 1.5427, "step": 4368 }, { "epoch": 0.11724452554744526, "grad_norm": 0.3984375, "learning_rate": 0.0007032872668724004, "loss": 1.4484, "step": 4369 }, { "epoch": 0.1172713610991842, "grad_norm": 0.416015625, "learning_rate": 0.0007034482758620688, "loss": 1.5067, "step": 4370 }, { "epoch": 0.11729819665092314, "grad_norm": 0.412109375, "learning_rate": 0.0007036092848517375, "loss": 1.6519, "step": 4371 }, { "epoch": 0.11732503220266209, "grad_norm": 0.416015625, "learning_rate": 0.000703770293841406, "loss": 1.615, "step": 4372 }, { "epoch": 0.11735186775440103, "grad_norm": 0.423828125, "learning_rate": 0.0007039313028310746, "loss": 1.6744, "step": 4373 }, { "epoch": 0.11737870330613998, "grad_norm": 0.404296875, "learning_rate": 0.0007040923118207432, "loss": 1.6271, "step": 4374 }, { "epoch": 0.11740553885787892, "grad_norm": 0.421875, "learning_rate": 0.0007042533208104118, "loss": 1.678, "step": 4375 }, { "epoch": 0.11743237440961786, "grad_norm": 0.4375, "learning_rate": 0.0007044143298000805, "loss": 1.615, "step": 4376 }, { "epoch": 0.11745920996135681, "grad_norm": 0.39453125, "learning_rate": 0.000704575338789749, "loss": 1.5143, "step": 4377 }, { "epoch": 0.11748604551309574, "grad_norm": 0.3984375, "learning_rate": 0.0007047363477794176, "loss": 1.6314, "step": 4378 }, { "epoch": 0.11751288106483469, "grad_norm": 0.427734375, "learning_rate": 0.0007048973567690862, "loss": 1.6266, "step": 4379 }, { "epoch": 0.11753971661657364, "grad_norm": 0.42578125, "learning_rate": 0.0007050583657587548, "loss": 1.6459, "step": 4380 }, { "epoch": 0.11756655216831258, "grad_norm": 0.412109375, "learning_rate": 0.0007052193747484235, "loss": 1.6104, "step": 4381 }, { "epoch": 0.11759338772005153, "grad_norm": 0.43359375, "learning_rate": 0.000705380383738092, "loss": 1.7424, "step": 4382 }, { "epoch": 0.11762022327179046, "grad_norm": 0.400390625, "learning_rate": 0.0007055413927277606, "loss": 1.5102, "step": 4383 }, { "epoch": 0.11764705882352941, "grad_norm": 0.4296875, "learning_rate": 0.0007057024017174292, "loss": 1.5622, "step": 4384 }, { "epoch": 0.11767389437526836, "grad_norm": 0.4140625, "learning_rate": 0.0007058634107070977, "loss": 1.6443, "step": 4385 }, { "epoch": 0.1177007299270073, "grad_norm": 0.388671875, "learning_rate": 0.0007060244196967662, "loss": 1.4802, "step": 4386 }, { "epoch": 0.11772756547874624, "grad_norm": 0.400390625, "learning_rate": 0.0007061854286864349, "loss": 1.5336, "step": 4387 }, { "epoch": 0.11775440103048519, "grad_norm": 0.404296875, "learning_rate": 0.0007063464376761035, "loss": 1.6386, "step": 4388 }, { "epoch": 0.11778123658222413, "grad_norm": 0.419921875, "learning_rate": 0.0007065074466657721, "loss": 1.6491, "step": 4389 }, { "epoch": 0.11780807213396308, "grad_norm": 0.41796875, "learning_rate": 0.0007066684556554407, "loss": 1.731, "step": 4390 }, { "epoch": 0.11783490768570201, "grad_norm": 0.41796875, "learning_rate": 0.0007068294646451092, "loss": 1.6597, "step": 4391 }, { "epoch": 0.11786174323744096, "grad_norm": 0.40625, "learning_rate": 0.0007069904736347779, "loss": 1.541, "step": 4392 }, { "epoch": 0.11788857878917991, "grad_norm": 0.42578125, "learning_rate": 0.0007071514826244464, "loss": 1.6095, "step": 4393 }, { "epoch": 0.11791541434091884, "grad_norm": 0.412109375, "learning_rate": 0.0007073124916141151, "loss": 1.5816, "step": 4394 }, { "epoch": 0.1179422498926578, "grad_norm": 0.408203125, "learning_rate": 0.0007074735006037837, "loss": 1.4893, "step": 4395 }, { "epoch": 0.11796908544439674, "grad_norm": 0.412109375, "learning_rate": 0.0007076345095934522, "loss": 1.5415, "step": 4396 }, { "epoch": 0.11799592099613568, "grad_norm": 0.431640625, "learning_rate": 0.0007077955185831209, "loss": 1.6145, "step": 4397 }, { "epoch": 0.11802275654787463, "grad_norm": 0.41015625, "learning_rate": 0.0007079565275727894, "loss": 1.6174, "step": 4398 }, { "epoch": 0.11804959209961356, "grad_norm": 0.40234375, "learning_rate": 0.0007081175365624581, "loss": 1.5177, "step": 4399 }, { "epoch": 0.11807642765135251, "grad_norm": 0.4375, "learning_rate": 0.0007082785455521267, "loss": 1.7308, "step": 4400 }, { "epoch": 0.11810326320309146, "grad_norm": 0.38671875, "learning_rate": 0.0007084395545417951, "loss": 1.4706, "step": 4401 }, { "epoch": 0.1181300987548304, "grad_norm": 0.390625, "learning_rate": 0.0007086005635314638, "loss": 1.5931, "step": 4402 }, { "epoch": 0.11815693430656934, "grad_norm": 0.3828125, "learning_rate": 0.0007087615725211323, "loss": 1.4737, "step": 4403 }, { "epoch": 0.1181837698583083, "grad_norm": 0.404296875, "learning_rate": 0.000708922581510801, "loss": 1.5784, "step": 4404 }, { "epoch": 0.11821060541004723, "grad_norm": 0.39453125, "learning_rate": 0.0007090835905004695, "loss": 1.5567, "step": 4405 }, { "epoch": 0.11823744096178618, "grad_norm": 0.404296875, "learning_rate": 0.0007092445994901381, "loss": 1.5722, "step": 4406 }, { "epoch": 0.11826427651352511, "grad_norm": 0.43359375, "learning_rate": 0.0007094056084798068, "loss": 1.7455, "step": 4407 }, { "epoch": 0.11829111206526406, "grad_norm": 0.404296875, "learning_rate": 0.0007095666174694753, "loss": 1.5791, "step": 4408 }, { "epoch": 0.11831794761700301, "grad_norm": 0.408203125, "learning_rate": 0.000709727626459144, "loss": 1.4753, "step": 4409 }, { "epoch": 0.11834478316874195, "grad_norm": 0.40234375, "learning_rate": 0.0007098886354488125, "loss": 1.5273, "step": 4410 }, { "epoch": 0.1183716187204809, "grad_norm": 0.408203125, "learning_rate": 0.0007100496444384811, "loss": 1.5679, "step": 4411 }, { "epoch": 0.11839845427221983, "grad_norm": 0.416015625, "learning_rate": 0.0007102106534281497, "loss": 1.6522, "step": 4412 }, { "epoch": 0.11842528982395878, "grad_norm": 0.412109375, "learning_rate": 0.0007103716624178183, "loss": 1.6843, "step": 4413 }, { "epoch": 0.11845212537569773, "grad_norm": 0.404296875, "learning_rate": 0.000710532671407487, "loss": 1.503, "step": 4414 }, { "epoch": 0.11847896092743666, "grad_norm": 0.40234375, "learning_rate": 0.0007106936803971555, "loss": 1.572, "step": 4415 }, { "epoch": 0.11850579647917561, "grad_norm": 0.37890625, "learning_rate": 0.000710854689386824, "loss": 1.433, "step": 4416 }, { "epoch": 0.11853263203091456, "grad_norm": 0.408203125, "learning_rate": 0.0007110156983764925, "loss": 1.6308, "step": 4417 }, { "epoch": 0.1185594675826535, "grad_norm": 0.400390625, "learning_rate": 0.0007111767073661612, "loss": 1.609, "step": 4418 }, { "epoch": 0.11858630313439245, "grad_norm": 0.41015625, "learning_rate": 0.0007113377163558297, "loss": 1.6091, "step": 4419 }, { "epoch": 0.11861313868613138, "grad_norm": 0.419921875, "learning_rate": 0.0007114987253454984, "loss": 1.6383, "step": 4420 }, { "epoch": 0.11863997423787033, "grad_norm": 0.390625, "learning_rate": 0.000711659734335167, "loss": 1.5389, "step": 4421 }, { "epoch": 0.11866680978960928, "grad_norm": 0.3984375, "learning_rate": 0.0007118207433248355, "loss": 1.539, "step": 4422 }, { "epoch": 0.11869364534134821, "grad_norm": 0.390625, "learning_rate": 0.0007119817523145042, "loss": 1.5293, "step": 4423 }, { "epoch": 0.11872048089308716, "grad_norm": 0.41015625, "learning_rate": 0.0007121427613041727, "loss": 1.5996, "step": 4424 }, { "epoch": 0.11874731644482611, "grad_norm": 0.419921875, "learning_rate": 0.0007123037702938414, "loss": 1.5913, "step": 4425 }, { "epoch": 0.11877415199656505, "grad_norm": 0.404296875, "learning_rate": 0.00071246477928351, "loss": 1.4892, "step": 4426 }, { "epoch": 0.118800987548304, "grad_norm": 0.404296875, "learning_rate": 0.0007126257882731785, "loss": 1.63, "step": 4427 }, { "epoch": 0.11882782310004293, "grad_norm": 0.384765625, "learning_rate": 0.0007127867972628472, "loss": 1.4343, "step": 4428 }, { "epoch": 0.11885465865178188, "grad_norm": 0.41796875, "learning_rate": 0.0007129478062525157, "loss": 1.646, "step": 4429 }, { "epoch": 0.11888149420352083, "grad_norm": 0.41796875, "learning_rate": 0.0007131088152421844, "loss": 1.6524, "step": 4430 }, { "epoch": 0.11890832975525976, "grad_norm": 0.4140625, "learning_rate": 0.0007132698242318529, "loss": 1.6619, "step": 4431 }, { "epoch": 0.11893516530699871, "grad_norm": 0.416015625, "learning_rate": 0.0007134308332215214, "loss": 1.6111, "step": 4432 }, { "epoch": 0.11896200085873765, "grad_norm": 0.40625, "learning_rate": 0.00071359184221119, "loss": 1.6897, "step": 4433 }, { "epoch": 0.1189888364104766, "grad_norm": 0.3984375, "learning_rate": 0.0007137528512008586, "loss": 1.4717, "step": 4434 }, { "epoch": 0.11901567196221555, "grad_norm": 0.40234375, "learning_rate": 0.0007139138601905273, "loss": 1.5527, "step": 4435 }, { "epoch": 0.11904250751395448, "grad_norm": 0.4140625, "learning_rate": 0.0007140748691801958, "loss": 1.6529, "step": 4436 }, { "epoch": 0.11906934306569343, "grad_norm": 0.423828125, "learning_rate": 0.0007142358781698644, "loss": 1.6497, "step": 4437 }, { "epoch": 0.11909617861743238, "grad_norm": 0.41015625, "learning_rate": 0.000714396887159533, "loss": 1.5739, "step": 4438 }, { "epoch": 0.11912301416917132, "grad_norm": 0.3984375, "learning_rate": 0.0007145578961492016, "loss": 1.5519, "step": 4439 }, { "epoch": 0.11914984972091026, "grad_norm": 0.390625, "learning_rate": 0.0007147189051388703, "loss": 1.5112, "step": 4440 }, { "epoch": 0.1191766852726492, "grad_norm": 0.400390625, "learning_rate": 0.0007148799141285388, "loss": 1.5384, "step": 4441 }, { "epoch": 0.11920352082438815, "grad_norm": 0.400390625, "learning_rate": 0.0007150409231182074, "loss": 1.5298, "step": 4442 }, { "epoch": 0.1192303563761271, "grad_norm": 0.400390625, "learning_rate": 0.000715201932107876, "loss": 1.5854, "step": 4443 }, { "epoch": 0.11925719192786603, "grad_norm": 0.40234375, "learning_rate": 0.0007153629410975446, "loss": 1.4748, "step": 4444 }, { "epoch": 0.11928402747960498, "grad_norm": 0.400390625, "learning_rate": 0.0007155239500872131, "loss": 1.5455, "step": 4445 }, { "epoch": 0.11931086303134393, "grad_norm": 0.404296875, "learning_rate": 0.0007156849590768818, "loss": 1.6558, "step": 4446 }, { "epoch": 0.11933769858308287, "grad_norm": 0.39453125, "learning_rate": 0.0007158459680665502, "loss": 1.5487, "step": 4447 }, { "epoch": 0.11936453413482181, "grad_norm": 0.40234375, "learning_rate": 0.0007160069770562189, "loss": 1.6083, "step": 4448 }, { "epoch": 0.11939136968656075, "grad_norm": 0.390625, "learning_rate": 0.0007161679860458875, "loss": 1.5437, "step": 4449 }, { "epoch": 0.1194182052382997, "grad_norm": 0.400390625, "learning_rate": 0.000716328995035556, "loss": 1.5398, "step": 4450 }, { "epoch": 0.11944504079003865, "grad_norm": 0.41796875, "learning_rate": 0.0007164900040252247, "loss": 1.7116, "step": 4451 }, { "epoch": 0.11947187634177758, "grad_norm": 0.390625, "learning_rate": 0.0007166510130148932, "loss": 1.5677, "step": 4452 }, { "epoch": 0.11949871189351653, "grad_norm": 0.421875, "learning_rate": 0.0007168120220045618, "loss": 1.6404, "step": 4453 }, { "epoch": 0.11952554744525548, "grad_norm": 0.412109375, "learning_rate": 0.0007169730309942305, "loss": 1.6721, "step": 4454 }, { "epoch": 0.11955238299699442, "grad_norm": 0.40234375, "learning_rate": 0.000717134039983899, "loss": 1.5628, "step": 4455 }, { "epoch": 0.11957921854873337, "grad_norm": 0.404296875, "learning_rate": 0.0007172950489735677, "loss": 1.6043, "step": 4456 }, { "epoch": 0.1196060541004723, "grad_norm": 0.396484375, "learning_rate": 0.0007174560579632362, "loss": 1.6779, "step": 4457 }, { "epoch": 0.11963288965221125, "grad_norm": 0.380859375, "learning_rate": 0.0007176170669529048, "loss": 1.4732, "step": 4458 }, { "epoch": 0.1196597252039502, "grad_norm": 0.419921875, "learning_rate": 0.0007177780759425735, "loss": 1.6153, "step": 4459 }, { "epoch": 0.11968656075568913, "grad_norm": 0.380859375, "learning_rate": 0.000717939084932242, "loss": 1.3742, "step": 4460 }, { "epoch": 0.11971339630742808, "grad_norm": 0.384765625, "learning_rate": 0.0007181000939219107, "loss": 1.4855, "step": 4461 }, { "epoch": 0.11974023185916702, "grad_norm": 0.392578125, "learning_rate": 0.0007182611029115791, "loss": 1.5615, "step": 4462 }, { "epoch": 0.11976706741090597, "grad_norm": 0.40234375, "learning_rate": 0.0007184221119012477, "loss": 1.5346, "step": 4463 }, { "epoch": 0.11979390296264492, "grad_norm": 0.423828125, "learning_rate": 0.0007185831208909163, "loss": 1.6828, "step": 4464 }, { "epoch": 0.11982073851438385, "grad_norm": 0.40234375, "learning_rate": 0.0007187441298805849, "loss": 1.5742, "step": 4465 }, { "epoch": 0.1198475740661228, "grad_norm": 0.39453125, "learning_rate": 0.0007189051388702534, "loss": 1.5685, "step": 4466 }, { "epoch": 0.11987440961786175, "grad_norm": 0.388671875, "learning_rate": 0.0007190661478599221, "loss": 1.4576, "step": 4467 }, { "epoch": 0.11990124516960068, "grad_norm": 0.408203125, "learning_rate": 0.0007192271568495907, "loss": 1.6593, "step": 4468 }, { "epoch": 0.11992808072133963, "grad_norm": 0.404296875, "learning_rate": 0.0007193881658392593, "loss": 1.6661, "step": 4469 }, { "epoch": 0.11995491627307857, "grad_norm": 0.40234375, "learning_rate": 0.0007195491748289279, "loss": 1.534, "step": 4470 }, { "epoch": 0.11998175182481752, "grad_norm": 0.41015625, "learning_rate": 0.0007197101838185964, "loss": 1.5223, "step": 4471 }, { "epoch": 0.12000858737655647, "grad_norm": 0.40625, "learning_rate": 0.0007198711928082651, "loss": 1.6436, "step": 4472 }, { "epoch": 0.1200354229282954, "grad_norm": 0.392578125, "learning_rate": 0.0007200322017979337, "loss": 1.463, "step": 4473 }, { "epoch": 0.12006225848003435, "grad_norm": 0.404296875, "learning_rate": 0.0007201932107876023, "loss": 1.54, "step": 4474 }, { "epoch": 0.1200890940317733, "grad_norm": 0.39453125, "learning_rate": 0.0007203542197772709, "loss": 1.5518, "step": 4475 }, { "epoch": 0.12011592958351223, "grad_norm": 0.404296875, "learning_rate": 0.0007205152287669394, "loss": 1.5676, "step": 4476 }, { "epoch": 0.12014276513525118, "grad_norm": 0.375, "learning_rate": 0.0007206762377566081, "loss": 1.4808, "step": 4477 }, { "epoch": 0.12016960068699012, "grad_norm": 0.412109375, "learning_rate": 0.0007208372467462765, "loss": 1.5826, "step": 4478 }, { "epoch": 0.12019643623872907, "grad_norm": 0.390625, "learning_rate": 0.0007209982557359452, "loss": 1.443, "step": 4479 }, { "epoch": 0.12022327179046802, "grad_norm": 0.412109375, "learning_rate": 0.0007211592647256137, "loss": 1.5352, "step": 4480 }, { "epoch": 0.12025010734220695, "grad_norm": 0.4140625, "learning_rate": 0.0007213202737152823, "loss": 1.573, "step": 4481 }, { "epoch": 0.1202769428939459, "grad_norm": 0.40625, "learning_rate": 0.000721481282704951, "loss": 1.5214, "step": 4482 }, { "epoch": 0.12030377844568484, "grad_norm": 0.390625, "learning_rate": 0.0007216422916946195, "loss": 1.5494, "step": 4483 }, { "epoch": 0.12033061399742379, "grad_norm": 0.423828125, "learning_rate": 0.0007218033006842882, "loss": 1.5842, "step": 4484 }, { "epoch": 0.12035744954916273, "grad_norm": 0.423828125, "learning_rate": 0.0007219643096739567, "loss": 1.5965, "step": 4485 }, { "epoch": 0.12038428510090167, "grad_norm": 0.412109375, "learning_rate": 0.0007221253186636253, "loss": 1.6429, "step": 4486 }, { "epoch": 0.12041112065264062, "grad_norm": 0.41015625, "learning_rate": 0.000722286327653294, "loss": 1.6102, "step": 4487 }, { "epoch": 0.12043795620437957, "grad_norm": 0.404296875, "learning_rate": 0.0007224473366429625, "loss": 1.6233, "step": 4488 }, { "epoch": 0.1204647917561185, "grad_norm": 0.400390625, "learning_rate": 0.0007226083456326311, "loss": 1.586, "step": 4489 }, { "epoch": 0.12049162730785745, "grad_norm": 0.412109375, "learning_rate": 0.0007227693546222997, "loss": 1.6172, "step": 4490 }, { "epoch": 0.12051846285959639, "grad_norm": 0.40625, "learning_rate": 0.0007229303636119683, "loss": 1.6443, "step": 4491 }, { "epoch": 0.12054529841133534, "grad_norm": 0.412109375, "learning_rate": 0.000723091372601637, "loss": 1.5877, "step": 4492 }, { "epoch": 0.12057213396307428, "grad_norm": 0.416015625, "learning_rate": 0.0007232523815913054, "loss": 1.7057, "step": 4493 }, { "epoch": 0.12059896951481322, "grad_norm": 0.396484375, "learning_rate": 0.000723413390580974, "loss": 1.6209, "step": 4494 }, { "epoch": 0.12062580506655217, "grad_norm": 0.392578125, "learning_rate": 0.0007235743995706426, "loss": 1.4708, "step": 4495 }, { "epoch": 0.12065264061829112, "grad_norm": 0.41015625, "learning_rate": 0.0007237354085603112, "loss": 1.614, "step": 4496 }, { "epoch": 0.12067947617003005, "grad_norm": 0.41015625, "learning_rate": 0.0007238964175499797, "loss": 1.6633, "step": 4497 }, { "epoch": 0.120706311721769, "grad_norm": 0.419921875, "learning_rate": 0.0007240574265396484, "loss": 1.5993, "step": 4498 }, { "epoch": 0.12073314727350794, "grad_norm": 0.423828125, "learning_rate": 0.0007242184355293169, "loss": 1.6215, "step": 4499 }, { "epoch": 0.12075998282524689, "grad_norm": 0.388671875, "learning_rate": 0.0007243794445189856, "loss": 1.4727, "step": 4500 }, { "epoch": 0.12078681837698584, "grad_norm": 0.384765625, "learning_rate": 0.0007245404535086542, "loss": 1.4966, "step": 4501 }, { "epoch": 0.12081365392872477, "grad_norm": 0.392578125, "learning_rate": 0.0007247014624983227, "loss": 1.4819, "step": 4502 }, { "epoch": 0.12084048948046372, "grad_norm": 0.408203125, "learning_rate": 0.0007248624714879914, "loss": 1.6741, "step": 4503 }, { "epoch": 0.12086732503220267, "grad_norm": 0.404296875, "learning_rate": 0.0007250234804776599, "loss": 1.6525, "step": 4504 }, { "epoch": 0.1208941605839416, "grad_norm": 0.412109375, "learning_rate": 0.0007251844894673286, "loss": 1.5674, "step": 4505 }, { "epoch": 0.12092099613568055, "grad_norm": 0.384765625, "learning_rate": 0.0007253454984569972, "loss": 1.4698, "step": 4506 }, { "epoch": 0.12094783168741949, "grad_norm": 0.404296875, "learning_rate": 0.0007255065074466657, "loss": 1.6118, "step": 4507 }, { "epoch": 0.12097466723915844, "grad_norm": 0.396484375, "learning_rate": 0.0007256675164363344, "loss": 1.4842, "step": 4508 }, { "epoch": 0.12100150279089739, "grad_norm": 0.39453125, "learning_rate": 0.0007258285254260028, "loss": 1.6121, "step": 4509 }, { "epoch": 0.12102833834263632, "grad_norm": 0.40234375, "learning_rate": 0.0007259895344156715, "loss": 1.4784, "step": 4510 }, { "epoch": 0.12105517389437527, "grad_norm": 0.388671875, "learning_rate": 0.00072615054340534, "loss": 1.4335, "step": 4511 }, { "epoch": 0.1210820094461142, "grad_norm": 0.3828125, "learning_rate": 0.0007263115523950086, "loss": 1.5186, "step": 4512 }, { "epoch": 0.12110884499785315, "grad_norm": 0.39453125, "learning_rate": 0.0007264725613846772, "loss": 1.5842, "step": 4513 }, { "epoch": 0.1211356805495921, "grad_norm": 0.40625, "learning_rate": 0.0007266335703743458, "loss": 1.6096, "step": 4514 }, { "epoch": 0.12116251610133104, "grad_norm": 0.408203125, "learning_rate": 0.0007267945793640145, "loss": 1.5786, "step": 4515 }, { "epoch": 0.12118935165306999, "grad_norm": 0.416015625, "learning_rate": 0.000726955588353683, "loss": 1.5848, "step": 4516 }, { "epoch": 0.12121618720480894, "grad_norm": 0.38671875, "learning_rate": 0.0007271165973433516, "loss": 1.5096, "step": 4517 }, { "epoch": 0.12124302275654787, "grad_norm": 0.421875, "learning_rate": 0.0007272776063330202, "loss": 1.7085, "step": 4518 }, { "epoch": 0.12126985830828682, "grad_norm": 0.38671875, "learning_rate": 0.0007274386153226888, "loss": 1.4933, "step": 4519 }, { "epoch": 0.12129669386002576, "grad_norm": 0.388671875, "learning_rate": 0.0007275996243123575, "loss": 1.4291, "step": 4520 }, { "epoch": 0.1213235294117647, "grad_norm": 0.412109375, "learning_rate": 0.000727760633302026, "loss": 1.5233, "step": 4521 }, { "epoch": 0.12135036496350365, "grad_norm": 0.390625, "learning_rate": 0.0007279216422916946, "loss": 1.4928, "step": 4522 }, { "epoch": 0.12137720051524259, "grad_norm": 0.390625, "learning_rate": 0.0007280826512813632, "loss": 1.4505, "step": 4523 }, { "epoch": 0.12140403606698154, "grad_norm": 0.392578125, "learning_rate": 0.0007282436602710317, "loss": 1.6244, "step": 4524 }, { "epoch": 0.12143087161872049, "grad_norm": 0.41796875, "learning_rate": 0.0007284046692607002, "loss": 1.6275, "step": 4525 }, { "epoch": 0.12145770717045942, "grad_norm": 0.40234375, "learning_rate": 0.0007285656782503689, "loss": 1.5118, "step": 4526 }, { "epoch": 0.12148454272219837, "grad_norm": 0.408203125, "learning_rate": 0.0007287266872400375, "loss": 1.6632, "step": 4527 }, { "epoch": 0.1215113782739373, "grad_norm": 0.396484375, "learning_rate": 0.000728887696229706, "loss": 1.5261, "step": 4528 }, { "epoch": 0.12153821382567626, "grad_norm": 0.388671875, "learning_rate": 0.0007290487052193747, "loss": 1.5286, "step": 4529 }, { "epoch": 0.1215650493774152, "grad_norm": 0.40625, "learning_rate": 0.0007292097142090432, "loss": 1.5958, "step": 4530 }, { "epoch": 0.12159188492915414, "grad_norm": 0.4140625, "learning_rate": 0.0007293707231987119, "loss": 1.582, "step": 4531 }, { "epoch": 0.12161872048089309, "grad_norm": 0.396484375, "learning_rate": 0.0007295317321883804, "loss": 1.5972, "step": 4532 }, { "epoch": 0.12164555603263202, "grad_norm": 0.404296875, "learning_rate": 0.000729692741178049, "loss": 1.623, "step": 4533 }, { "epoch": 0.12167239158437097, "grad_norm": 0.3984375, "learning_rate": 0.0007298537501677177, "loss": 1.5676, "step": 4534 }, { "epoch": 0.12169922713610992, "grad_norm": 0.4140625, "learning_rate": 0.0007300147591573862, "loss": 1.5619, "step": 4535 }, { "epoch": 0.12172606268784886, "grad_norm": 0.38671875, "learning_rate": 0.0007301757681470549, "loss": 1.5159, "step": 4536 }, { "epoch": 0.1217528982395878, "grad_norm": 0.40234375, "learning_rate": 0.0007303367771367234, "loss": 1.4157, "step": 4537 }, { "epoch": 0.12177973379132676, "grad_norm": 0.39453125, "learning_rate": 0.000730497786126392, "loss": 1.5496, "step": 4538 }, { "epoch": 0.12180656934306569, "grad_norm": 0.40234375, "learning_rate": 0.0007306587951160607, "loss": 1.5598, "step": 4539 }, { "epoch": 0.12183340489480464, "grad_norm": 0.39453125, "learning_rate": 0.0007308198041057291, "loss": 1.4831, "step": 4540 }, { "epoch": 0.12186024044654357, "grad_norm": 0.4140625, "learning_rate": 0.0007309808130953978, "loss": 1.6374, "step": 4541 }, { "epoch": 0.12188707599828252, "grad_norm": 0.408203125, "learning_rate": 0.0007311418220850663, "loss": 1.5423, "step": 4542 }, { "epoch": 0.12191391155002147, "grad_norm": 0.412109375, "learning_rate": 0.0007313028310747349, "loss": 1.7433, "step": 4543 }, { "epoch": 0.12194074710176041, "grad_norm": 0.416015625, "learning_rate": 0.0007314638400644035, "loss": 1.6227, "step": 4544 }, { "epoch": 0.12196758265349936, "grad_norm": 0.40625, "learning_rate": 0.0007316248490540721, "loss": 1.6241, "step": 4545 }, { "epoch": 0.1219944182052383, "grad_norm": 0.41015625, "learning_rate": 0.0007317858580437408, "loss": 1.5571, "step": 4546 }, { "epoch": 0.12202125375697724, "grad_norm": 0.39453125, "learning_rate": 0.0007319468670334093, "loss": 1.5727, "step": 4547 }, { "epoch": 0.12204808930871619, "grad_norm": 0.384765625, "learning_rate": 0.0007321078760230779, "loss": 1.4668, "step": 4548 }, { "epoch": 0.12207492486045513, "grad_norm": 0.404296875, "learning_rate": 0.0007322688850127465, "loss": 1.6293, "step": 4549 }, { "epoch": 0.12210176041219407, "grad_norm": 0.400390625, "learning_rate": 0.0007324298940024151, "loss": 1.5843, "step": 4550 }, { "epoch": 0.12212859596393302, "grad_norm": 0.400390625, "learning_rate": 0.0007325909029920836, "loss": 1.5983, "step": 4551 }, { "epoch": 0.12215543151567196, "grad_norm": 0.39453125, "learning_rate": 0.0007327519119817523, "loss": 1.5229, "step": 4552 }, { "epoch": 0.12218226706741091, "grad_norm": 0.38671875, "learning_rate": 0.0007329129209714209, "loss": 1.4427, "step": 4553 }, { "epoch": 0.12220910261914986, "grad_norm": 0.380859375, "learning_rate": 0.0007330739299610895, "loss": 1.4621, "step": 4554 }, { "epoch": 0.12223593817088879, "grad_norm": 0.408203125, "learning_rate": 0.000733234938950758, "loss": 1.6411, "step": 4555 }, { "epoch": 0.12226277372262774, "grad_norm": 0.384765625, "learning_rate": 0.0007333959479404265, "loss": 1.4624, "step": 4556 }, { "epoch": 0.12228960927436668, "grad_norm": 0.408203125, "learning_rate": 0.0007335569569300952, "loss": 1.6321, "step": 4557 }, { "epoch": 0.12231644482610562, "grad_norm": 0.388671875, "learning_rate": 0.0007337179659197637, "loss": 1.4881, "step": 4558 }, { "epoch": 0.12234328037784457, "grad_norm": 0.400390625, "learning_rate": 0.0007338789749094324, "loss": 1.5282, "step": 4559 }, { "epoch": 0.12237011592958351, "grad_norm": 0.3984375, "learning_rate": 0.000734039983899101, "loss": 1.5916, "step": 4560 }, { "epoch": 0.12239695148132246, "grad_norm": 0.3828125, "learning_rate": 0.0007342009928887695, "loss": 1.5435, "step": 4561 }, { "epoch": 0.1224237870330614, "grad_norm": 0.40234375, "learning_rate": 0.0007343620018784382, "loss": 1.5938, "step": 4562 }, { "epoch": 0.12245062258480034, "grad_norm": 0.40234375, "learning_rate": 0.0007345230108681067, "loss": 1.6683, "step": 4563 }, { "epoch": 0.12247745813653929, "grad_norm": 0.421875, "learning_rate": 0.0007346840198577754, "loss": 1.579, "step": 4564 }, { "epoch": 0.12250429368827823, "grad_norm": 0.40234375, "learning_rate": 0.0007348450288474439, "loss": 1.576, "step": 4565 }, { "epoch": 0.12253112924001718, "grad_norm": 0.41015625, "learning_rate": 0.0007350060378371125, "loss": 1.5614, "step": 4566 }, { "epoch": 0.12255796479175612, "grad_norm": 0.40625, "learning_rate": 0.0007351670468267812, "loss": 1.506, "step": 4567 }, { "epoch": 0.12258480034349506, "grad_norm": 0.4140625, "learning_rate": 0.0007353280558164497, "loss": 1.5621, "step": 4568 }, { "epoch": 0.12261163589523401, "grad_norm": 0.390625, "learning_rate": 0.0007354890648061183, "loss": 1.5636, "step": 4569 }, { "epoch": 0.12263847144697294, "grad_norm": 0.40234375, "learning_rate": 0.0007356500737957869, "loss": 1.5952, "step": 4570 }, { "epoch": 0.12266530699871189, "grad_norm": 0.39453125, "learning_rate": 0.0007358110827854554, "loss": 1.5116, "step": 4571 }, { "epoch": 0.12269214255045084, "grad_norm": 0.41015625, "learning_rate": 0.000735972091775124, "loss": 1.5423, "step": 4572 }, { "epoch": 0.12271897810218978, "grad_norm": 0.404296875, "learning_rate": 0.0007361331007647926, "loss": 1.5532, "step": 4573 }, { "epoch": 0.12274581365392873, "grad_norm": 0.404296875, "learning_rate": 0.0007362941097544612, "loss": 1.5213, "step": 4574 }, { "epoch": 0.12277264920566767, "grad_norm": 0.404296875, "learning_rate": 0.0007364551187441298, "loss": 1.5322, "step": 4575 }, { "epoch": 0.12279948475740661, "grad_norm": 0.51171875, "learning_rate": 0.0007366161277337984, "loss": 1.3069, "step": 4576 }, { "epoch": 0.12282632030914556, "grad_norm": 0.43359375, "learning_rate": 0.000736777136723467, "loss": 1.3729, "step": 4577 }, { "epoch": 0.1228531558608845, "grad_norm": 0.515625, "learning_rate": 0.0007369381457131356, "loss": 1.3705, "step": 4578 }, { "epoch": 0.12287999141262344, "grad_norm": 0.3828125, "learning_rate": 0.0007370991547028042, "loss": 1.3189, "step": 4579 }, { "epoch": 0.12290682696436239, "grad_norm": 0.4140625, "learning_rate": 0.0007372601636924728, "loss": 1.4745, "step": 4580 }, { "epoch": 0.12293366251610133, "grad_norm": 0.3984375, "learning_rate": 0.0007374211726821414, "loss": 1.2296, "step": 4581 }, { "epoch": 0.12296049806784028, "grad_norm": 0.38671875, "learning_rate": 0.00073758218167181, "loss": 1.2923, "step": 4582 }, { "epoch": 0.12298733361957921, "grad_norm": 0.4140625, "learning_rate": 0.0007377431906614786, "loss": 1.3589, "step": 4583 }, { "epoch": 0.12301416917131816, "grad_norm": 0.423828125, "learning_rate": 0.0007379041996511471, "loss": 1.3755, "step": 4584 }, { "epoch": 0.12304100472305711, "grad_norm": 0.40625, "learning_rate": 0.0007380652086408158, "loss": 1.3901, "step": 4585 }, { "epoch": 0.12306784027479604, "grad_norm": 0.388671875, "learning_rate": 0.0007382262176304842, "loss": 1.3402, "step": 4586 }, { "epoch": 0.123094675826535, "grad_norm": 0.376953125, "learning_rate": 0.0007383872266201528, "loss": 1.1942, "step": 4587 }, { "epoch": 0.12312151137827394, "grad_norm": 0.376953125, "learning_rate": 0.0007385482356098215, "loss": 1.2526, "step": 4588 }, { "epoch": 0.12314834693001288, "grad_norm": 0.396484375, "learning_rate": 0.00073870924459949, "loss": 1.324, "step": 4589 }, { "epoch": 0.12317518248175183, "grad_norm": 0.37109375, "learning_rate": 0.0007388702535891587, "loss": 1.3335, "step": 4590 }, { "epoch": 0.12320201803349076, "grad_norm": 0.373046875, "learning_rate": 0.0007390312625788272, "loss": 1.3194, "step": 4591 }, { "epoch": 0.12322885358522971, "grad_norm": 0.380859375, "learning_rate": 0.0007391922715684958, "loss": 1.2805, "step": 4592 }, { "epoch": 0.12325568913696866, "grad_norm": 0.373046875, "learning_rate": 0.0007393532805581645, "loss": 1.3584, "step": 4593 }, { "epoch": 0.1232825246887076, "grad_norm": 0.3671875, "learning_rate": 0.000739514289547833, "loss": 1.2058, "step": 4594 }, { "epoch": 0.12330936024044654, "grad_norm": 0.384765625, "learning_rate": 0.0007396752985375017, "loss": 1.3997, "step": 4595 }, { "epoch": 0.1233361957921855, "grad_norm": 0.369140625, "learning_rate": 0.0007398363075271702, "loss": 1.4086, "step": 4596 }, { "epoch": 0.12336303134392443, "grad_norm": 0.359375, "learning_rate": 0.0007399973165168388, "loss": 1.2604, "step": 4597 }, { "epoch": 0.12338986689566338, "grad_norm": 0.375, "learning_rate": 0.0007401583255065075, "loss": 1.3393, "step": 4598 }, { "epoch": 0.12341670244740231, "grad_norm": 0.3671875, "learning_rate": 0.000740319334496176, "loss": 1.2211, "step": 4599 }, { "epoch": 0.12344353799914126, "grad_norm": 0.369140625, "learning_rate": 0.0007404803434858447, "loss": 1.4166, "step": 4600 }, { "epoch": 0.12347037355088021, "grad_norm": 0.359375, "learning_rate": 0.0007406413524755132, "loss": 1.2243, "step": 4601 }, { "epoch": 0.12349720910261915, "grad_norm": 0.3515625, "learning_rate": 0.0007408023614651817, "loss": 1.2338, "step": 4602 }, { "epoch": 0.1235240446543581, "grad_norm": 0.37890625, "learning_rate": 0.0007409633704548503, "loss": 1.3411, "step": 4603 }, { "epoch": 0.12355088020609704, "grad_norm": 0.35546875, "learning_rate": 0.0007411243794445189, "loss": 1.2813, "step": 4604 }, { "epoch": 0.12357771575783598, "grad_norm": 0.37109375, "learning_rate": 0.0007412853884341874, "loss": 1.3662, "step": 4605 }, { "epoch": 0.12360455130957493, "grad_norm": 0.361328125, "learning_rate": 0.0007414463974238561, "loss": 1.2414, "step": 4606 }, { "epoch": 0.12363138686131386, "grad_norm": 0.37109375, "learning_rate": 0.0007416074064135247, "loss": 1.303, "step": 4607 }, { "epoch": 0.12365822241305281, "grad_norm": 0.3671875, "learning_rate": 0.0007417684154031933, "loss": 1.2021, "step": 4608 }, { "epoch": 0.12368505796479176, "grad_norm": 0.36328125, "learning_rate": 0.0007419294243928619, "loss": 1.1815, "step": 4609 }, { "epoch": 0.1237118935165307, "grad_norm": 0.337890625, "learning_rate": 0.0007420904333825304, "loss": 1.1912, "step": 4610 }, { "epoch": 0.12373872906826965, "grad_norm": 0.375, "learning_rate": 0.0007422514423721991, "loss": 1.3667, "step": 4611 }, { "epoch": 0.12376556462000858, "grad_norm": 0.3671875, "learning_rate": 0.0007424124513618677, "loss": 1.2269, "step": 4612 }, { "epoch": 0.12379240017174753, "grad_norm": 0.375, "learning_rate": 0.0007425734603515362, "loss": 1.2898, "step": 4613 }, { "epoch": 0.12381923572348648, "grad_norm": 0.35546875, "learning_rate": 0.0007427344693412049, "loss": 1.2668, "step": 4614 }, { "epoch": 0.12384607127522541, "grad_norm": 0.384765625, "learning_rate": 0.0007428954783308734, "loss": 1.3598, "step": 4615 }, { "epoch": 0.12387290682696436, "grad_norm": 0.37109375, "learning_rate": 0.0007430564873205421, "loss": 1.2823, "step": 4616 }, { "epoch": 0.12389974237870331, "grad_norm": 0.3671875, "learning_rate": 0.0007432174963102105, "loss": 1.326, "step": 4617 }, { "epoch": 0.12392657793044225, "grad_norm": 0.37109375, "learning_rate": 0.0007433785052998791, "loss": 1.3263, "step": 4618 }, { "epoch": 0.1239534134821812, "grad_norm": 0.3515625, "learning_rate": 0.0007435395142895477, "loss": 1.2587, "step": 4619 }, { "epoch": 0.12398024903392013, "grad_norm": 0.353515625, "learning_rate": 0.0007437005232792163, "loss": 1.2486, "step": 4620 }, { "epoch": 0.12400708458565908, "grad_norm": 0.3984375, "learning_rate": 0.000743861532268885, "loss": 1.3698, "step": 4621 }, { "epoch": 0.12403392013739803, "grad_norm": 0.357421875, "learning_rate": 0.0007440225412585535, "loss": 1.1942, "step": 4622 }, { "epoch": 0.12406075568913696, "grad_norm": 0.345703125, "learning_rate": 0.0007441835502482221, "loss": 1.1558, "step": 4623 }, { "epoch": 0.12408759124087591, "grad_norm": 0.353515625, "learning_rate": 0.0007443445592378907, "loss": 1.2642, "step": 4624 }, { "epoch": 0.12411442679261486, "grad_norm": 0.359375, "learning_rate": 0.0007445055682275593, "loss": 1.3493, "step": 4625 }, { "epoch": 0.1241412623443538, "grad_norm": 0.34765625, "learning_rate": 0.000744666577217228, "loss": 1.2541, "step": 4626 }, { "epoch": 0.12416809789609275, "grad_norm": 0.3671875, "learning_rate": 0.0007448275862068965, "loss": 1.3631, "step": 4627 }, { "epoch": 0.12419493344783168, "grad_norm": 0.37109375, "learning_rate": 0.0007449885951965651, "loss": 1.2638, "step": 4628 }, { "epoch": 0.12422176899957063, "grad_norm": 0.37109375, "learning_rate": 0.0007451496041862337, "loss": 1.4151, "step": 4629 }, { "epoch": 0.12424860455130958, "grad_norm": 0.375, "learning_rate": 0.0007453106131759023, "loss": 1.3035, "step": 4630 }, { "epoch": 0.12427544010304852, "grad_norm": 0.37109375, "learning_rate": 0.000745471622165571, "loss": 1.3162, "step": 4631 }, { "epoch": 0.12430227565478746, "grad_norm": 0.349609375, "learning_rate": 0.0007456326311552395, "loss": 1.2858, "step": 4632 }, { "epoch": 0.1243291112065264, "grad_norm": 0.365234375, "learning_rate": 0.0007457936401449079, "loss": 1.338, "step": 4633 }, { "epoch": 0.12435594675826535, "grad_norm": 0.33984375, "learning_rate": 0.0007459546491345766, "loss": 1.1694, "step": 4634 }, { "epoch": 0.1243827823100043, "grad_norm": 0.341796875, "learning_rate": 0.0007461156581242452, "loss": 1.1805, "step": 4635 }, { "epoch": 0.12440961786174323, "grad_norm": 0.341796875, "learning_rate": 0.0007462766671139137, "loss": 1.2744, "step": 4636 }, { "epoch": 0.12443645341348218, "grad_norm": 0.37109375, "learning_rate": 0.0007464376761035824, "loss": 1.3576, "step": 4637 }, { "epoch": 0.12446328896522113, "grad_norm": 0.353515625, "learning_rate": 0.0007465986850932509, "loss": 1.301, "step": 4638 }, { "epoch": 0.12449012451696007, "grad_norm": 0.36328125, "learning_rate": 0.0007467596940829196, "loss": 1.3307, "step": 4639 }, { "epoch": 0.12451696006869901, "grad_norm": 0.34765625, "learning_rate": 0.0007469207030725882, "loss": 1.1727, "step": 4640 }, { "epoch": 0.12454379562043795, "grad_norm": 0.33203125, "learning_rate": 0.0007470817120622567, "loss": 1.2022, "step": 4641 }, { "epoch": 0.1245706311721769, "grad_norm": 0.3828125, "learning_rate": 0.0007472427210519254, "loss": 1.3184, "step": 4642 }, { "epoch": 0.12459746672391585, "grad_norm": 0.380859375, "learning_rate": 0.0007474037300415939, "loss": 1.2469, "step": 4643 }, { "epoch": 0.12462430227565478, "grad_norm": 0.38671875, "learning_rate": 0.0007475647390312626, "loss": 1.2945, "step": 4644 }, { "epoch": 0.12465113782739373, "grad_norm": 0.365234375, "learning_rate": 0.0007477257480209312, "loss": 1.2843, "step": 4645 }, { "epoch": 0.12467797337913268, "grad_norm": 0.365234375, "learning_rate": 0.0007478867570105997, "loss": 1.2193, "step": 4646 }, { "epoch": 0.12470480893087162, "grad_norm": 0.3671875, "learning_rate": 0.0007480477660002684, "loss": 1.2971, "step": 4647 }, { "epoch": 0.12473164448261057, "grad_norm": 0.359375, "learning_rate": 0.0007482087749899368, "loss": 1.2996, "step": 4648 }, { "epoch": 0.1247584800343495, "grad_norm": 0.375, "learning_rate": 0.0007483697839796054, "loss": 1.4089, "step": 4649 }, { "epoch": 0.12478531558608845, "grad_norm": 0.36328125, "learning_rate": 0.000748530792969274, "loss": 1.2909, "step": 4650 }, { "epoch": 0.1248121511378274, "grad_norm": 0.36328125, "learning_rate": 0.0007486918019589426, "loss": 1.2991, "step": 4651 }, { "epoch": 0.12483898668956633, "grad_norm": 0.357421875, "learning_rate": 0.0007488528109486112, "loss": 1.2696, "step": 4652 }, { "epoch": 0.12486582224130528, "grad_norm": 0.375, "learning_rate": 0.0007490138199382798, "loss": 1.3883, "step": 4653 }, { "epoch": 0.12489265779304423, "grad_norm": 0.35546875, "learning_rate": 0.0007491748289279484, "loss": 1.2381, "step": 4654 }, { "epoch": 0.12491949334478317, "grad_norm": 0.361328125, "learning_rate": 0.000749335837917617, "loss": 1.2501, "step": 4655 }, { "epoch": 0.12494632889652212, "grad_norm": 0.35546875, "learning_rate": 0.0007494968469072856, "loss": 1.3364, "step": 4656 }, { "epoch": 0.12497316444826105, "grad_norm": 0.345703125, "learning_rate": 0.0007496578558969541, "loss": 1.1572, "step": 4657 }, { "epoch": 0.125, "grad_norm": 0.33203125, "learning_rate": 0.0007498188648866228, "loss": 1.1829, "step": 4658 }, { "epoch": 0.12502683555173894, "grad_norm": 0.349609375, "learning_rate": 0.0007499798738762914, "loss": 1.2486, "step": 4659 }, { "epoch": 0.1250536711034779, "grad_norm": 0.353515625, "learning_rate": 0.00075014088286596, "loss": 1.295, "step": 4660 }, { "epoch": 0.12508050665521683, "grad_norm": 0.384765625, "learning_rate": 0.0007503018918556286, "loss": 1.3799, "step": 4661 }, { "epoch": 0.12510734220695577, "grad_norm": 0.3515625, "learning_rate": 0.0007504629008452971, "loss": 1.2961, "step": 4662 }, { "epoch": 0.12513417775869473, "grad_norm": 0.361328125, "learning_rate": 0.0007506239098349657, "loss": 1.2916, "step": 4663 }, { "epoch": 0.12516101331043367, "grad_norm": 0.3671875, "learning_rate": 0.0007507849188246342, "loss": 1.4377, "step": 4664 }, { "epoch": 0.1251878488621726, "grad_norm": 0.357421875, "learning_rate": 0.0007509459278143029, "loss": 1.3408, "step": 4665 }, { "epoch": 0.12521468441391154, "grad_norm": 0.349609375, "learning_rate": 0.0007511069368039715, "loss": 1.2113, "step": 4666 }, { "epoch": 0.1252415199656505, "grad_norm": 0.337890625, "learning_rate": 0.00075126794579364, "loss": 1.1912, "step": 4667 }, { "epoch": 0.12526835551738943, "grad_norm": 0.373046875, "learning_rate": 0.0007514289547833087, "loss": 1.3746, "step": 4668 }, { "epoch": 0.12529519106912837, "grad_norm": 0.3515625, "learning_rate": 0.0007515899637729772, "loss": 1.2496, "step": 4669 }, { "epoch": 0.12532202662086733, "grad_norm": 0.361328125, "learning_rate": 0.0007517509727626459, "loss": 1.337, "step": 4670 }, { "epoch": 0.12534886217260627, "grad_norm": 0.34765625, "learning_rate": 0.0007519119817523144, "loss": 1.2576, "step": 4671 }, { "epoch": 0.1253756977243452, "grad_norm": 0.353515625, "learning_rate": 0.000752072990741983, "loss": 1.2553, "step": 4672 }, { "epoch": 0.12540253327608417, "grad_norm": 0.3515625, "learning_rate": 0.0007522339997316517, "loss": 1.2733, "step": 4673 }, { "epoch": 0.1254293688278231, "grad_norm": 0.353515625, "learning_rate": 0.0007523950087213202, "loss": 1.2316, "step": 4674 }, { "epoch": 0.12545620437956204, "grad_norm": 0.345703125, "learning_rate": 0.0007525560177109889, "loss": 1.1709, "step": 4675 }, { "epoch": 0.125483039931301, "grad_norm": 0.359375, "learning_rate": 0.0007527170267006574, "loss": 1.2011, "step": 4676 }, { "epoch": 0.12550987548303993, "grad_norm": 0.369140625, "learning_rate": 0.000752878035690326, "loss": 1.3164, "step": 4677 }, { "epoch": 0.12553671103477887, "grad_norm": 0.376953125, "learning_rate": 0.0007530390446799947, "loss": 1.2665, "step": 4678 }, { "epoch": 0.12556354658651783, "grad_norm": 0.3671875, "learning_rate": 0.0007532000536696631, "loss": 1.333, "step": 4679 }, { "epoch": 0.12559038213825677, "grad_norm": 0.353515625, "learning_rate": 0.0007533610626593317, "loss": 1.168, "step": 4680 }, { "epoch": 0.1256172176899957, "grad_norm": 0.359375, "learning_rate": 0.0007535220716490003, "loss": 1.24, "step": 4681 }, { "epoch": 0.12564405324173464, "grad_norm": 0.357421875, "learning_rate": 0.0007536830806386689, "loss": 1.3507, "step": 4682 }, { "epoch": 0.1256708887934736, "grad_norm": 0.380859375, "learning_rate": 0.0007538440896283375, "loss": 1.3311, "step": 4683 }, { "epoch": 0.12569772434521254, "grad_norm": 0.365234375, "learning_rate": 0.0007540050986180061, "loss": 1.3622, "step": 4684 }, { "epoch": 0.12572455989695147, "grad_norm": 0.34765625, "learning_rate": 0.0007541661076076746, "loss": 1.2112, "step": 4685 }, { "epoch": 0.12575139544869043, "grad_norm": 0.373046875, "learning_rate": 0.0007543271165973433, "loss": 1.4163, "step": 4686 }, { "epoch": 0.12577823100042937, "grad_norm": 0.359375, "learning_rate": 0.0007544881255870119, "loss": 1.335, "step": 4687 }, { "epoch": 0.1258050665521683, "grad_norm": 0.3515625, "learning_rate": 0.0007546491345766805, "loss": 1.2458, "step": 4688 }, { "epoch": 0.12583190210390727, "grad_norm": 0.345703125, "learning_rate": 0.0007548101435663491, "loss": 1.1375, "step": 4689 }, { "epoch": 0.1258587376556462, "grad_norm": 0.353515625, "learning_rate": 0.0007549711525560176, "loss": 1.2797, "step": 4690 }, { "epoch": 0.12588557320738514, "grad_norm": 0.373046875, "learning_rate": 0.0007551321615456863, "loss": 1.3657, "step": 4691 }, { "epoch": 0.1259124087591241, "grad_norm": 0.3359375, "learning_rate": 0.0007552931705353549, "loss": 1.1822, "step": 4692 }, { "epoch": 0.12593924431086304, "grad_norm": 0.365234375, "learning_rate": 0.0007554541795250234, "loss": 1.3097, "step": 4693 }, { "epoch": 0.12596607986260197, "grad_norm": 0.361328125, "learning_rate": 0.000755615188514692, "loss": 1.2882, "step": 4694 }, { "epoch": 0.1259929154143409, "grad_norm": 0.3671875, "learning_rate": 0.0007557761975043605, "loss": 1.335, "step": 4695 }, { "epoch": 0.12601975096607987, "grad_norm": 0.33984375, "learning_rate": 0.0007559372064940292, "loss": 1.1768, "step": 4696 }, { "epoch": 0.1260465865178188, "grad_norm": 0.353515625, "learning_rate": 0.0007560982154836977, "loss": 1.2486, "step": 4697 }, { "epoch": 0.12607342206955774, "grad_norm": 0.37109375, "learning_rate": 0.0007562592244733663, "loss": 1.331, "step": 4698 }, { "epoch": 0.1261002576212967, "grad_norm": 0.373046875, "learning_rate": 0.000756420233463035, "loss": 1.4524, "step": 4699 }, { "epoch": 0.12612709317303564, "grad_norm": 0.357421875, "learning_rate": 0.0007565812424527035, "loss": 1.2856, "step": 4700 }, { "epoch": 0.12615392872477457, "grad_norm": 0.359375, "learning_rate": 0.0007567422514423722, "loss": 1.3108, "step": 4701 }, { "epoch": 0.12618076427651354, "grad_norm": 0.357421875, "learning_rate": 0.0007569032604320407, "loss": 1.2969, "step": 4702 }, { "epoch": 0.12620759982825247, "grad_norm": 0.3828125, "learning_rate": 0.0007570642694217093, "loss": 1.304, "step": 4703 }, { "epoch": 0.1262344353799914, "grad_norm": 0.359375, "learning_rate": 0.0007572252784113779, "loss": 1.2918, "step": 4704 }, { "epoch": 0.12626127093173037, "grad_norm": 0.373046875, "learning_rate": 0.0007573862874010465, "loss": 1.3583, "step": 4705 }, { "epoch": 0.1262881064834693, "grad_norm": 0.333984375, "learning_rate": 0.0007575472963907152, "loss": 1.1568, "step": 4706 }, { "epoch": 0.12631494203520824, "grad_norm": 0.435546875, "learning_rate": 0.0007577083053803837, "loss": 1.2075, "step": 4707 }, { "epoch": 0.1263417775869472, "grad_norm": 0.353515625, "learning_rate": 0.0007578693143700523, "loss": 1.19, "step": 4708 }, { "epoch": 0.12636861313868614, "grad_norm": 0.345703125, "learning_rate": 0.0007580303233597209, "loss": 1.2515, "step": 4709 }, { "epoch": 0.12639544869042507, "grad_norm": 0.365234375, "learning_rate": 0.0007581913323493894, "loss": 1.2553, "step": 4710 }, { "epoch": 0.126422284242164, "grad_norm": 0.44140625, "learning_rate": 0.0007583523413390579, "loss": 1.3591, "step": 4711 }, { "epoch": 0.12644911979390297, "grad_norm": 0.3359375, "learning_rate": 0.0007585133503287266, "loss": 1.2712, "step": 4712 }, { "epoch": 0.1264759553456419, "grad_norm": 0.373046875, "learning_rate": 0.0007586743593183952, "loss": 1.2933, "step": 4713 }, { "epoch": 0.12650279089738084, "grad_norm": 0.33984375, "learning_rate": 0.0007588353683080638, "loss": 1.1656, "step": 4714 }, { "epoch": 0.1265296264491198, "grad_norm": 0.34765625, "learning_rate": 0.0007589963772977324, "loss": 1.2156, "step": 4715 }, { "epoch": 0.12655646200085874, "grad_norm": 0.345703125, "learning_rate": 0.0007591573862874009, "loss": 1.1503, "step": 4716 }, { "epoch": 0.12658329755259767, "grad_norm": 0.37890625, "learning_rate": 0.0007593183952770696, "loss": 1.234, "step": 4717 }, { "epoch": 0.12661013310433664, "grad_norm": 0.361328125, "learning_rate": 0.0007594794042667382, "loss": 1.2392, "step": 4718 }, { "epoch": 0.12663696865607557, "grad_norm": 0.34375, "learning_rate": 0.0007596404132564068, "loss": 1.1389, "step": 4719 }, { "epoch": 0.1266638042078145, "grad_norm": 0.369140625, "learning_rate": 0.0007598014222460754, "loss": 1.2219, "step": 4720 }, { "epoch": 0.12669063975955347, "grad_norm": 0.35546875, "learning_rate": 0.0007599624312357439, "loss": 1.2154, "step": 4721 }, { "epoch": 0.1267174753112924, "grad_norm": 0.349609375, "learning_rate": 0.0007601234402254126, "loss": 1.2491, "step": 4722 }, { "epoch": 0.12674431086303134, "grad_norm": 0.353515625, "learning_rate": 0.0007602844492150811, "loss": 1.2472, "step": 4723 }, { "epoch": 0.12677114641477027, "grad_norm": 0.375, "learning_rate": 0.0007604454582047498, "loss": 1.3872, "step": 4724 }, { "epoch": 0.12679798196650924, "grad_norm": 0.341796875, "learning_rate": 0.0007606064671944182, "loss": 1.2711, "step": 4725 }, { "epoch": 0.12682481751824817, "grad_norm": 0.369140625, "learning_rate": 0.0007607674761840868, "loss": 1.3723, "step": 4726 }, { "epoch": 0.1268516530699871, "grad_norm": 0.369140625, "learning_rate": 0.0007609284851737555, "loss": 1.2892, "step": 4727 }, { "epoch": 0.12687848862172607, "grad_norm": 0.365234375, "learning_rate": 0.000761089494163424, "loss": 1.3788, "step": 4728 }, { "epoch": 0.126905324173465, "grad_norm": 0.330078125, "learning_rate": 0.0007612505031530926, "loss": 1.1157, "step": 4729 }, { "epoch": 0.12693215972520394, "grad_norm": 0.353515625, "learning_rate": 0.0007614115121427612, "loss": 1.2646, "step": 4730 }, { "epoch": 0.1269589952769429, "grad_norm": 0.349609375, "learning_rate": 0.0007615725211324298, "loss": 1.2738, "step": 4731 }, { "epoch": 0.12698583082868184, "grad_norm": 0.365234375, "learning_rate": 0.0007617335301220985, "loss": 1.3831, "step": 4732 }, { "epoch": 0.12701266638042077, "grad_norm": 0.349609375, "learning_rate": 0.000761894539111767, "loss": 1.2029, "step": 4733 }, { "epoch": 0.12703950193215974, "grad_norm": 0.32421875, "learning_rate": 0.0007620555481014356, "loss": 1.1137, "step": 4734 }, { "epoch": 0.12706633748389867, "grad_norm": 0.3671875, "learning_rate": 0.0007622165570911042, "loss": 1.3611, "step": 4735 }, { "epoch": 0.1270931730356376, "grad_norm": 0.33984375, "learning_rate": 0.0007623775660807728, "loss": 1.1932, "step": 4736 }, { "epoch": 0.12712000858737654, "grad_norm": 0.33203125, "learning_rate": 0.0007625385750704415, "loss": 1.1477, "step": 4737 }, { "epoch": 0.1271468441391155, "grad_norm": 0.341796875, "learning_rate": 0.00076269958406011, "loss": 1.2293, "step": 4738 }, { "epoch": 0.12717367969085444, "grad_norm": 0.35546875, "learning_rate": 0.0007628605930497786, "loss": 1.2955, "step": 4739 }, { "epoch": 0.12720051524259338, "grad_norm": 0.349609375, "learning_rate": 0.0007630216020394472, "loss": 1.2944, "step": 4740 }, { "epoch": 0.12722735079433234, "grad_norm": 0.357421875, "learning_rate": 0.0007631826110291157, "loss": 1.2333, "step": 4741 }, { "epoch": 0.12725418634607127, "grad_norm": 0.34375, "learning_rate": 0.0007633436200187842, "loss": 1.2736, "step": 4742 }, { "epoch": 0.1272810218978102, "grad_norm": 0.37109375, "learning_rate": 0.0007635046290084529, "loss": 1.3871, "step": 4743 }, { "epoch": 0.12730785744954917, "grad_norm": 0.33984375, "learning_rate": 0.0007636656379981214, "loss": 1.1725, "step": 4744 }, { "epoch": 0.1273346930012881, "grad_norm": 0.376953125, "learning_rate": 0.0007638266469877901, "loss": 1.3931, "step": 4745 }, { "epoch": 0.12736152855302704, "grad_norm": 0.365234375, "learning_rate": 0.0007639876559774587, "loss": 1.2936, "step": 4746 }, { "epoch": 0.127388364104766, "grad_norm": 0.341796875, "learning_rate": 0.0007641486649671272, "loss": 1.1906, "step": 4747 }, { "epoch": 0.12741519965650494, "grad_norm": 0.36328125, "learning_rate": 0.0007643096739567959, "loss": 1.3104, "step": 4748 }, { "epoch": 0.12744203520824388, "grad_norm": 0.345703125, "learning_rate": 0.0007644706829464644, "loss": 1.2162, "step": 4749 }, { "epoch": 0.12746887075998284, "grad_norm": 0.359375, "learning_rate": 0.0007646316919361331, "loss": 1.324, "step": 4750 }, { "epoch": 0.12749570631172177, "grad_norm": 0.33984375, "learning_rate": 0.0007647927009258017, "loss": 1.2191, "step": 4751 }, { "epoch": 0.1275225418634607, "grad_norm": 0.361328125, "learning_rate": 0.0007649537099154702, "loss": 1.2448, "step": 4752 }, { "epoch": 0.12754937741519964, "grad_norm": 0.361328125, "learning_rate": 0.0007651147189051389, "loss": 1.3241, "step": 4753 }, { "epoch": 0.1275762129669386, "grad_norm": 0.373046875, "learning_rate": 0.0007652757278948074, "loss": 1.3512, "step": 4754 }, { "epoch": 0.12760304851867754, "grad_norm": 0.3515625, "learning_rate": 0.000765436736884476, "loss": 1.2224, "step": 4755 }, { "epoch": 0.12762988407041648, "grad_norm": 0.359375, "learning_rate": 0.0007655977458741445, "loss": 1.3258, "step": 4756 }, { "epoch": 0.12765671962215544, "grad_norm": 0.33984375, "learning_rate": 0.0007657587548638131, "loss": 1.2429, "step": 4757 }, { "epoch": 0.12768355517389438, "grad_norm": 0.365234375, "learning_rate": 0.0007659197638534817, "loss": 1.2991, "step": 4758 }, { "epoch": 0.1277103907256333, "grad_norm": 0.36328125, "learning_rate": 0.0007660807728431503, "loss": 1.306, "step": 4759 }, { "epoch": 0.12773722627737227, "grad_norm": 0.349609375, "learning_rate": 0.0007662417818328189, "loss": 1.2818, "step": 4760 }, { "epoch": 0.1277640618291112, "grad_norm": 0.3515625, "learning_rate": 0.0007664027908224875, "loss": 1.2274, "step": 4761 }, { "epoch": 0.12779089738085014, "grad_norm": 0.341796875, "learning_rate": 0.0007665637998121561, "loss": 1.1758, "step": 4762 }, { "epoch": 0.1278177329325891, "grad_norm": 0.384765625, "learning_rate": 0.0007667248088018247, "loss": 1.2825, "step": 4763 }, { "epoch": 0.12784456848432804, "grad_norm": 0.326171875, "learning_rate": 0.0007668858177914933, "loss": 1.1212, "step": 4764 }, { "epoch": 0.12787140403606698, "grad_norm": 0.369140625, "learning_rate": 0.0007670468267811619, "loss": 1.3769, "step": 4765 }, { "epoch": 0.1278982395878059, "grad_norm": 0.37890625, "learning_rate": 0.0007672078357708305, "loss": 1.4374, "step": 4766 }, { "epoch": 0.12792507513954487, "grad_norm": 0.361328125, "learning_rate": 0.0007673688447604991, "loss": 1.2549, "step": 4767 }, { "epoch": 0.1279519106912838, "grad_norm": 0.353515625, "learning_rate": 0.0007675298537501677, "loss": 1.239, "step": 4768 }, { "epoch": 0.12797874624302275, "grad_norm": 0.337890625, "learning_rate": 0.0007676908627398363, "loss": 1.1547, "step": 4769 }, { "epoch": 0.1280055817947617, "grad_norm": 0.337890625, "learning_rate": 0.0007678518717295049, "loss": 1.2245, "step": 4770 }, { "epoch": 0.12803241734650064, "grad_norm": 0.35546875, "learning_rate": 0.0007680128807191735, "loss": 1.2716, "step": 4771 }, { "epoch": 0.12805925289823958, "grad_norm": 0.376953125, "learning_rate": 0.0007681738897088419, "loss": 1.3724, "step": 4772 }, { "epoch": 0.12808608844997854, "grad_norm": 0.345703125, "learning_rate": 0.0007683348986985105, "loss": 1.2514, "step": 4773 }, { "epoch": 0.12811292400171748, "grad_norm": 0.34765625, "learning_rate": 0.0007684959076881792, "loss": 1.275, "step": 4774 }, { "epoch": 0.1281397595534564, "grad_norm": 0.36328125, "learning_rate": 0.0007686569166778477, "loss": 1.3372, "step": 4775 }, { "epoch": 0.12816659510519537, "grad_norm": 0.3515625, "learning_rate": 0.0007688179256675164, "loss": 1.3312, "step": 4776 }, { "epoch": 0.1281934306569343, "grad_norm": 0.3515625, "learning_rate": 0.0007689789346571849, "loss": 1.3213, "step": 4777 }, { "epoch": 0.12822026620867324, "grad_norm": 0.36328125, "learning_rate": 0.0007691399436468535, "loss": 1.3879, "step": 4778 }, { "epoch": 0.1282471017604122, "grad_norm": 0.357421875, "learning_rate": 0.0007693009526365222, "loss": 1.2653, "step": 4779 }, { "epoch": 0.12827393731215114, "grad_norm": 0.37109375, "learning_rate": 0.0007694619616261907, "loss": 1.3339, "step": 4780 }, { "epoch": 0.12830077286389008, "grad_norm": 0.3359375, "learning_rate": 0.0007696229706158594, "loss": 1.1274, "step": 4781 }, { "epoch": 0.128327608415629, "grad_norm": 0.361328125, "learning_rate": 0.0007697839796055279, "loss": 1.2835, "step": 4782 }, { "epoch": 0.12835444396736798, "grad_norm": 0.357421875, "learning_rate": 0.0007699449885951965, "loss": 1.2873, "step": 4783 }, { "epoch": 0.1283812795191069, "grad_norm": 0.330078125, "learning_rate": 0.0007701059975848652, "loss": 1.1366, "step": 4784 }, { "epoch": 0.12840811507084585, "grad_norm": 0.36328125, "learning_rate": 0.0007702670065745337, "loss": 1.2338, "step": 4785 }, { "epoch": 0.1284349506225848, "grad_norm": 0.3515625, "learning_rate": 0.0007704280155642024, "loss": 1.2646, "step": 4786 }, { "epoch": 0.12846178617432374, "grad_norm": 0.3359375, "learning_rate": 0.0007705890245538708, "loss": 1.2438, "step": 4787 }, { "epoch": 0.12848862172606268, "grad_norm": 0.337890625, "learning_rate": 0.0007707500335435394, "loss": 1.1983, "step": 4788 }, { "epoch": 0.12851545727780164, "grad_norm": 0.3671875, "learning_rate": 0.000770911042533208, "loss": 1.3478, "step": 4789 }, { "epoch": 0.12854229282954058, "grad_norm": 0.353515625, "learning_rate": 0.0007710720515228766, "loss": 1.3079, "step": 4790 }, { "epoch": 0.1285691283812795, "grad_norm": 0.33984375, "learning_rate": 0.0007712330605125451, "loss": 1.263, "step": 4791 }, { "epoch": 0.12859596393301848, "grad_norm": 0.365234375, "learning_rate": 0.0007713940695022138, "loss": 1.2586, "step": 4792 }, { "epoch": 0.1286227994847574, "grad_norm": 0.380859375, "learning_rate": 0.0007715550784918824, "loss": 1.3381, "step": 4793 }, { "epoch": 0.12864963503649635, "grad_norm": 0.357421875, "learning_rate": 0.000771716087481551, "loss": 1.2031, "step": 4794 }, { "epoch": 0.12867647058823528, "grad_norm": 0.32421875, "learning_rate": 0.0007718770964712196, "loss": 1.1064, "step": 4795 }, { "epoch": 0.12870330613997424, "grad_norm": 0.35546875, "learning_rate": 0.0007720381054608881, "loss": 1.2909, "step": 4796 }, { "epoch": 0.12873014169171318, "grad_norm": 0.36328125, "learning_rate": 0.0007721991144505568, "loss": 1.2352, "step": 4797 }, { "epoch": 0.12875697724345211, "grad_norm": 0.341796875, "learning_rate": 0.0007723601234402254, "loss": 1.2005, "step": 4798 }, { "epoch": 0.12878381279519108, "grad_norm": 0.35546875, "learning_rate": 0.000772521132429894, "loss": 1.2793, "step": 4799 }, { "epoch": 0.12881064834693, "grad_norm": 0.3203125, "learning_rate": 0.0007726821414195626, "loss": 1.204, "step": 4800 }, { "epoch": 0.12883748389866895, "grad_norm": 0.3671875, "learning_rate": 0.0007728431504092311, "loss": 1.2453, "step": 4801 }, { "epoch": 0.1288643194504079, "grad_norm": 0.341796875, "learning_rate": 0.0007730041593988998, "loss": 1.2517, "step": 4802 }, { "epoch": 0.12889115500214685, "grad_norm": 0.357421875, "learning_rate": 0.0007731651683885682, "loss": 1.2853, "step": 4803 }, { "epoch": 0.12891799055388578, "grad_norm": 0.34375, "learning_rate": 0.0007733261773782368, "loss": 1.1786, "step": 4804 }, { "epoch": 0.12894482610562474, "grad_norm": 0.357421875, "learning_rate": 0.0007734871863679054, "loss": 1.3555, "step": 4805 }, { "epoch": 0.12897166165736368, "grad_norm": 0.35546875, "learning_rate": 0.000773648195357574, "loss": 1.2735, "step": 4806 }, { "epoch": 0.1289984972091026, "grad_norm": 0.359375, "learning_rate": 0.0007738092043472427, "loss": 1.3257, "step": 4807 }, { "epoch": 0.12902533276084158, "grad_norm": 0.361328125, "learning_rate": 0.0007739702133369112, "loss": 1.2765, "step": 4808 }, { "epoch": 0.1290521683125805, "grad_norm": 0.361328125, "learning_rate": 0.0007741312223265798, "loss": 1.3956, "step": 4809 }, { "epoch": 0.12907900386431945, "grad_norm": 0.365234375, "learning_rate": 0.0007742922313162484, "loss": 1.3171, "step": 4810 }, { "epoch": 0.12910583941605838, "grad_norm": 0.3359375, "learning_rate": 0.000774453240305917, "loss": 1.2213, "step": 4811 }, { "epoch": 0.12913267496779735, "grad_norm": 0.34375, "learning_rate": 0.0007746142492955857, "loss": 1.2624, "step": 4812 }, { "epoch": 0.12915951051953628, "grad_norm": 0.36328125, "learning_rate": 0.0007747752582852542, "loss": 1.2529, "step": 4813 }, { "epoch": 0.12918634607127522, "grad_norm": 0.3515625, "learning_rate": 0.0007749362672749228, "loss": 1.3233, "step": 4814 }, { "epoch": 0.12921318162301418, "grad_norm": 0.357421875, "learning_rate": 0.0007750972762645914, "loss": 1.2568, "step": 4815 }, { "epoch": 0.1292400171747531, "grad_norm": 0.337890625, "learning_rate": 0.00077525828525426, "loss": 1.2073, "step": 4816 }, { "epoch": 0.12926685272649205, "grad_norm": 0.34765625, "learning_rate": 0.0007754192942439287, "loss": 1.2113, "step": 4817 }, { "epoch": 0.129293688278231, "grad_norm": 0.35546875, "learning_rate": 0.0007755803032335971, "loss": 1.2505, "step": 4818 }, { "epoch": 0.12932052382996995, "grad_norm": 0.39453125, "learning_rate": 0.0007757413122232657, "loss": 1.163, "step": 4819 }, { "epoch": 0.12934735938170888, "grad_norm": 0.353515625, "learning_rate": 0.0007759023212129343, "loss": 1.2819, "step": 4820 }, { "epoch": 0.12937419493344784, "grad_norm": 0.345703125, "learning_rate": 0.0007760633302026029, "loss": 1.2533, "step": 4821 }, { "epoch": 0.12940103048518678, "grad_norm": 0.345703125, "learning_rate": 0.0007762243391922714, "loss": 1.2506, "step": 4822 }, { "epoch": 0.12942786603692572, "grad_norm": 0.3359375, "learning_rate": 0.0007763853481819401, "loss": 1.1405, "step": 4823 }, { "epoch": 0.12945470158866465, "grad_norm": 0.37109375, "learning_rate": 0.0007765463571716086, "loss": 1.4198, "step": 4824 }, { "epoch": 0.1294815371404036, "grad_norm": 0.361328125, "learning_rate": 0.0007767073661612773, "loss": 1.2699, "step": 4825 }, { "epoch": 0.12950837269214255, "grad_norm": 0.361328125, "learning_rate": 0.0007768683751509459, "loss": 1.3396, "step": 4826 }, { "epoch": 0.12953520824388148, "grad_norm": 0.36328125, "learning_rate": 0.0007770293841406144, "loss": 1.3041, "step": 4827 }, { "epoch": 0.12956204379562045, "grad_norm": 0.3359375, "learning_rate": 0.0007771903931302831, "loss": 1.257, "step": 4828 }, { "epoch": 0.12958887934735938, "grad_norm": 0.359375, "learning_rate": 0.0007773514021199516, "loss": 1.3027, "step": 4829 }, { "epoch": 0.12961571489909832, "grad_norm": 0.34765625, "learning_rate": 0.0007775124111096203, "loss": 1.1711, "step": 4830 }, { "epoch": 0.12964255045083728, "grad_norm": 0.357421875, "learning_rate": 0.0007776734200992889, "loss": 1.2597, "step": 4831 }, { "epoch": 0.12966938600257621, "grad_norm": 0.35546875, "learning_rate": 0.0007778344290889574, "loss": 1.3088, "step": 4832 }, { "epoch": 0.12969622155431515, "grad_norm": 0.376953125, "learning_rate": 0.0007779954380786261, "loss": 1.36, "step": 4833 }, { "epoch": 0.1297230571060541, "grad_norm": 0.365234375, "learning_rate": 0.0007781564470682945, "loss": 1.354, "step": 4834 }, { "epoch": 0.12974989265779305, "grad_norm": 0.3515625, "learning_rate": 0.0007783174560579631, "loss": 1.2501, "step": 4835 }, { "epoch": 0.12977672820953198, "grad_norm": 0.34375, "learning_rate": 0.0007784784650476317, "loss": 1.2187, "step": 4836 }, { "epoch": 0.12980356376127092, "grad_norm": 0.373046875, "learning_rate": 0.0007786394740373003, "loss": 1.3579, "step": 4837 }, { "epoch": 0.12983039931300988, "grad_norm": 0.35546875, "learning_rate": 0.000778800483026969, "loss": 1.2577, "step": 4838 }, { "epoch": 0.12985723486474882, "grad_norm": 0.345703125, "learning_rate": 0.0007789614920166375, "loss": 1.295, "step": 4839 }, { "epoch": 0.12988407041648775, "grad_norm": 0.3515625, "learning_rate": 0.0007791225010063061, "loss": 1.2125, "step": 4840 }, { "epoch": 0.12991090596822671, "grad_norm": 0.33984375, "learning_rate": 0.0007792835099959747, "loss": 1.1566, "step": 4841 }, { "epoch": 0.12993774151996565, "grad_norm": 0.35546875, "learning_rate": 0.0007794445189856433, "loss": 1.2883, "step": 4842 }, { "epoch": 0.12996457707170458, "grad_norm": 0.353515625, "learning_rate": 0.0007796055279753119, "loss": 1.2105, "step": 4843 }, { "epoch": 0.12999141262344355, "grad_norm": 0.365234375, "learning_rate": 0.0007797665369649805, "loss": 1.2561, "step": 4844 }, { "epoch": 0.13001824817518248, "grad_norm": 0.349609375, "learning_rate": 0.0007799275459546491, "loss": 1.2701, "step": 4845 }, { "epoch": 0.13004508372692142, "grad_norm": 0.328125, "learning_rate": 0.0007800885549443177, "loss": 1.1168, "step": 4846 }, { "epoch": 0.13007191927866038, "grad_norm": 0.357421875, "learning_rate": 0.0007802495639339863, "loss": 1.2955, "step": 4847 }, { "epoch": 0.13009875483039932, "grad_norm": 0.37109375, "learning_rate": 0.0007804105729236549, "loss": 1.327, "step": 4848 }, { "epoch": 0.13012559038213825, "grad_norm": 0.353515625, "learning_rate": 0.0007805715819133234, "loss": 1.1938, "step": 4849 }, { "epoch": 0.1301524259338772, "grad_norm": 0.3515625, "learning_rate": 0.0007807325909029919, "loss": 1.2651, "step": 4850 }, { "epoch": 0.13017926148561615, "grad_norm": 0.34375, "learning_rate": 0.0007808935998926606, "loss": 1.1473, "step": 4851 }, { "epoch": 0.13020609703735508, "grad_norm": 0.3515625, "learning_rate": 0.0007810546088823292, "loss": 1.1538, "step": 4852 }, { "epoch": 0.13023293258909402, "grad_norm": 0.345703125, "learning_rate": 0.0007812156178719977, "loss": 1.2667, "step": 4853 }, { "epoch": 0.13025976814083298, "grad_norm": 0.357421875, "learning_rate": 0.0007813766268616664, "loss": 1.2762, "step": 4854 }, { "epoch": 0.13028660369257192, "grad_norm": 0.341796875, "learning_rate": 0.0007815376358513349, "loss": 1.1051, "step": 4855 }, { "epoch": 0.13031343924431085, "grad_norm": 0.34375, "learning_rate": 0.0007816986448410036, "loss": 1.2162, "step": 4856 }, { "epoch": 0.13034027479604982, "grad_norm": 0.359375, "learning_rate": 0.0007818596538306722, "loss": 1.1701, "step": 4857 }, { "epoch": 0.13036711034778875, "grad_norm": 0.328125, "learning_rate": 0.0007820206628203407, "loss": 1.143, "step": 4858 }, { "epoch": 0.13039394589952769, "grad_norm": 0.357421875, "learning_rate": 0.0007821816718100094, "loss": 1.2124, "step": 4859 }, { "epoch": 0.13042078145126665, "grad_norm": 0.3515625, "learning_rate": 0.0007823426807996779, "loss": 1.3478, "step": 4860 }, { "epoch": 0.13044761700300558, "grad_norm": 0.357421875, "learning_rate": 0.0007825036897893466, "loss": 1.2935, "step": 4861 }, { "epoch": 0.13047445255474452, "grad_norm": 0.365234375, "learning_rate": 0.0007826646987790151, "loss": 1.2818, "step": 4862 }, { "epoch": 0.13050128810648348, "grad_norm": 0.3515625, "learning_rate": 0.0007828257077686837, "loss": 1.1756, "step": 4863 }, { "epoch": 0.13052812365822242, "grad_norm": 0.337890625, "learning_rate": 0.0007829867167583524, "loss": 1.2328, "step": 4864 }, { "epoch": 0.13055495920996135, "grad_norm": 0.359375, "learning_rate": 0.0007831477257480208, "loss": 1.3704, "step": 4865 }, { "epoch": 0.1305817947617003, "grad_norm": 0.3515625, "learning_rate": 0.0007833087347376894, "loss": 1.2537, "step": 4866 }, { "epoch": 0.13060863031343925, "grad_norm": 0.333984375, "learning_rate": 0.000783469743727358, "loss": 1.2443, "step": 4867 }, { "epoch": 0.13063546586517819, "grad_norm": 0.34765625, "learning_rate": 0.0007836307527170266, "loss": 1.2304, "step": 4868 }, { "epoch": 0.13066230141691712, "grad_norm": 0.337890625, "learning_rate": 0.0007837917617066952, "loss": 1.2181, "step": 4869 }, { "epoch": 0.13068913696865608, "grad_norm": 0.33984375, "learning_rate": 0.0007839527706963638, "loss": 1.2348, "step": 4870 }, { "epoch": 0.13071597252039502, "grad_norm": 0.349609375, "learning_rate": 0.0007841137796860324, "loss": 1.2604, "step": 4871 }, { "epoch": 0.13074280807213395, "grad_norm": 0.33203125, "learning_rate": 0.000784274788675701, "loss": 1.1469, "step": 4872 }, { "epoch": 0.13076964362387292, "grad_norm": 0.34375, "learning_rate": 0.0007844357976653696, "loss": 1.2615, "step": 4873 }, { "epoch": 0.13079647917561185, "grad_norm": 0.37890625, "learning_rate": 0.0007845968066550382, "loss": 1.3556, "step": 4874 }, { "epoch": 0.1308233147273508, "grad_norm": 0.341796875, "learning_rate": 0.0007847578156447068, "loss": 1.2227, "step": 4875 }, { "epoch": 0.13085015027908975, "grad_norm": 0.375, "learning_rate": 0.0007849188246343753, "loss": 1.2713, "step": 4876 }, { "epoch": 0.13087698583082868, "grad_norm": 0.359375, "learning_rate": 0.000785079833624044, "loss": 1.2416, "step": 4877 }, { "epoch": 0.13090382138256762, "grad_norm": 0.318359375, "learning_rate": 0.0007852408426137126, "loss": 1.1026, "step": 4878 }, { "epoch": 0.13093065693430658, "grad_norm": 0.37109375, "learning_rate": 0.0007854018516033812, "loss": 1.386, "step": 4879 }, { "epoch": 0.13095749248604552, "grad_norm": 0.3515625, "learning_rate": 0.0007855628605930497, "loss": 1.2415, "step": 4880 }, { "epoch": 0.13098432803778445, "grad_norm": 0.3515625, "learning_rate": 0.0007857238695827182, "loss": 1.2533, "step": 4881 }, { "epoch": 0.1310111635895234, "grad_norm": 0.34375, "learning_rate": 0.0007858848785723869, "loss": 1.2174, "step": 4882 }, { "epoch": 0.13103799914126235, "grad_norm": 0.357421875, "learning_rate": 0.0007860458875620554, "loss": 1.2672, "step": 4883 }, { "epoch": 0.1310648346930013, "grad_norm": 0.33984375, "learning_rate": 0.000786206896551724, "loss": 1.1834, "step": 4884 }, { "epoch": 0.13109167024474022, "grad_norm": 0.341796875, "learning_rate": 0.0007863679055413927, "loss": 1.2027, "step": 4885 }, { "epoch": 0.13111850579647918, "grad_norm": 0.359375, "learning_rate": 0.0007865289145310612, "loss": 1.2607, "step": 4886 }, { "epoch": 0.13114534134821812, "grad_norm": 0.365234375, "learning_rate": 0.0007866899235207299, "loss": 1.3683, "step": 4887 }, { "epoch": 0.13117217689995705, "grad_norm": 0.3515625, "learning_rate": 0.0007868509325103984, "loss": 1.2065, "step": 4888 }, { "epoch": 0.13119901245169602, "grad_norm": 0.3515625, "learning_rate": 0.000787011941500067, "loss": 1.3122, "step": 4889 }, { "epoch": 0.13122584800343495, "grad_norm": 0.337890625, "learning_rate": 0.0007871729504897357, "loss": 1.2073, "step": 4890 }, { "epoch": 0.1312526835551739, "grad_norm": 0.357421875, "learning_rate": 0.0007873339594794042, "loss": 1.2543, "step": 4891 }, { "epoch": 0.13127951910691285, "grad_norm": 0.349609375, "learning_rate": 0.0007874949684690729, "loss": 1.2533, "step": 4892 }, { "epoch": 0.13130635465865179, "grad_norm": 0.33984375, "learning_rate": 0.0007876559774587414, "loss": 1.2149, "step": 4893 }, { "epoch": 0.13133319021039072, "grad_norm": 0.359375, "learning_rate": 0.00078781698644841, "loss": 1.3247, "step": 4894 }, { "epoch": 0.13136002576212966, "grad_norm": 0.36328125, "learning_rate": 0.0007879779954380785, "loss": 1.2579, "step": 4895 }, { "epoch": 0.13138686131386862, "grad_norm": 0.357421875, "learning_rate": 0.0007881390044277471, "loss": 1.2884, "step": 4896 }, { "epoch": 0.13141369686560755, "grad_norm": 0.36328125, "learning_rate": 0.0007883000134174156, "loss": 1.2887, "step": 4897 }, { "epoch": 0.1314405324173465, "grad_norm": 0.349609375, "learning_rate": 0.0007884610224070843, "loss": 1.2091, "step": 4898 }, { "epoch": 0.13146736796908545, "grad_norm": 0.34375, "learning_rate": 0.0007886220313967529, "loss": 1.1713, "step": 4899 }, { "epoch": 0.1314942035208244, "grad_norm": 0.369140625, "learning_rate": 0.0007887830403864215, "loss": 1.235, "step": 4900 }, { "epoch": 0.13152103907256332, "grad_norm": 0.4140625, "learning_rate": 0.0007889440493760901, "loss": 1.2049, "step": 4901 }, { "epoch": 0.13154787462430229, "grad_norm": 0.421875, "learning_rate": 0.0007891050583657586, "loss": 1.0219, "step": 4902 }, { "epoch": 0.13157471017604122, "grad_norm": 0.63671875, "learning_rate": 0.0007892660673554273, "loss": 0.9809, "step": 4903 }, { "epoch": 0.13160154572778016, "grad_norm": 1.390625, "learning_rate": 0.0007894270763450959, "loss": 1.1494, "step": 4904 }, { "epoch": 0.13162838127951912, "grad_norm": 2.0, "learning_rate": 0.0007895880853347645, "loss": 1.2536, "step": 4905 }, { "epoch": 0.13165521683125805, "grad_norm": 1.5546875, "learning_rate": 0.0007897490943244331, "loss": 1.3673, "step": 4906 }, { "epoch": 0.131682052382997, "grad_norm": 0.8125, "learning_rate": 0.0007899101033141016, "loss": 1.158, "step": 4907 }, { "epoch": 0.13170888793473595, "grad_norm": 1.2421875, "learning_rate": 0.0007900711123037703, "loss": 1.2241, "step": 4908 }, { "epoch": 0.1317357234864749, "grad_norm": 0.7421875, "learning_rate": 0.0007902321212934389, "loss": 1.1549, "step": 4909 }, { "epoch": 0.13176255903821382, "grad_norm": 0.486328125, "learning_rate": 0.0007903931302831075, "loss": 1.0563, "step": 4910 }, { "epoch": 0.13178939458995276, "grad_norm": 1.203125, "learning_rate": 0.0007905541392727759, "loss": 1.1701, "step": 4911 }, { "epoch": 0.13181623014169172, "grad_norm": 0.640625, "learning_rate": 0.0007907151482624445, "loss": 1.0293, "step": 4912 }, { "epoch": 0.13184306569343066, "grad_norm": 0.421875, "learning_rate": 0.0007908761572521132, "loss": 1.0895, "step": 4913 }, { "epoch": 0.1318699012451696, "grad_norm": 0.34765625, "learning_rate": 0.0007910371662417817, "loss": 1.0564, "step": 4914 }, { "epoch": 0.13189673679690855, "grad_norm": 0.361328125, "learning_rate": 0.0007911981752314503, "loss": 0.9726, "step": 4915 }, { "epoch": 0.1319235723486475, "grad_norm": 0.359375, "learning_rate": 0.0007913591842211189, "loss": 1.1334, "step": 4916 }, { "epoch": 0.13195040790038642, "grad_norm": 0.353515625, "learning_rate": 0.0007915201932107875, "loss": 1.043, "step": 4917 }, { "epoch": 0.1319772434521254, "grad_norm": 0.353515625, "learning_rate": 0.0007916812022004562, "loss": 0.893, "step": 4918 }, { "epoch": 0.13200407900386432, "grad_norm": 0.330078125, "learning_rate": 0.0007918422111901247, "loss": 0.9427, "step": 4919 }, { "epoch": 0.13203091455560326, "grad_norm": 0.376953125, "learning_rate": 0.0007920032201797933, "loss": 1.208, "step": 4920 }, { "epoch": 0.13205775010734222, "grad_norm": 0.359375, "learning_rate": 0.0007921642291694619, "loss": 1.1198, "step": 4921 }, { "epoch": 0.13208458565908116, "grad_norm": 0.337890625, "learning_rate": 0.0007923252381591305, "loss": 0.9912, "step": 4922 }, { "epoch": 0.1321114212108201, "grad_norm": 0.328125, "learning_rate": 0.0007924862471487992, "loss": 0.9394, "step": 4923 }, { "epoch": 0.13213825676255903, "grad_norm": 0.345703125, "learning_rate": 0.0007926472561384677, "loss": 1.0793, "step": 4924 }, { "epoch": 0.132165092314298, "grad_norm": 0.33984375, "learning_rate": 0.0007928082651281363, "loss": 1.0418, "step": 4925 }, { "epoch": 0.13219192786603692, "grad_norm": 0.34765625, "learning_rate": 0.0007929692741178048, "loss": 1.036, "step": 4926 }, { "epoch": 0.13221876341777586, "grad_norm": 0.353515625, "learning_rate": 0.0007931302831074734, "loss": 1.1592, "step": 4927 }, { "epoch": 0.13224559896951482, "grad_norm": 0.34375, "learning_rate": 0.0007932912920971419, "loss": 1.0421, "step": 4928 }, { "epoch": 0.13227243452125376, "grad_norm": 0.330078125, "learning_rate": 0.0007934523010868106, "loss": 1.0331, "step": 4929 }, { "epoch": 0.1322992700729927, "grad_norm": 0.349609375, "learning_rate": 0.0007936133100764791, "loss": 1.1208, "step": 4930 }, { "epoch": 0.13232610562473165, "grad_norm": 0.35546875, "learning_rate": 0.0007937743190661478, "loss": 1.0864, "step": 4931 }, { "epoch": 0.1323529411764706, "grad_norm": 0.357421875, "learning_rate": 0.0007939353280558164, "loss": 1.2013, "step": 4932 }, { "epoch": 0.13237977672820953, "grad_norm": 0.328125, "learning_rate": 0.0007940963370454849, "loss": 1.0328, "step": 4933 }, { "epoch": 0.1324066122799485, "grad_norm": 0.34375, "learning_rate": 0.0007942573460351536, "loss": 1.1599, "step": 4934 }, { "epoch": 0.13243344783168742, "grad_norm": 0.333984375, "learning_rate": 0.0007944183550248221, "loss": 1.1522, "step": 4935 }, { "epoch": 0.13246028338342636, "grad_norm": 0.337890625, "learning_rate": 0.0007945793640144908, "loss": 1.1168, "step": 4936 }, { "epoch": 0.1324871189351653, "grad_norm": 0.34375, "learning_rate": 0.0007947403730041594, "loss": 1.085, "step": 4937 }, { "epoch": 0.13251395448690426, "grad_norm": 0.337890625, "learning_rate": 0.0007949013819938279, "loss": 1.1361, "step": 4938 }, { "epoch": 0.1325407900386432, "grad_norm": 0.33984375, "learning_rate": 0.0007950623909834966, "loss": 1.0536, "step": 4939 }, { "epoch": 0.13256762559038213, "grad_norm": 0.3203125, "learning_rate": 0.0007952233999731651, "loss": 0.9978, "step": 4940 }, { "epoch": 0.1325944611421211, "grad_norm": 0.31640625, "learning_rate": 0.0007953844089628338, "loss": 1.0092, "step": 4941 }, { "epoch": 0.13262129669386002, "grad_norm": 0.33984375, "learning_rate": 0.0007955454179525022, "loss": 1.1125, "step": 4942 }, { "epoch": 0.13264813224559896, "grad_norm": 0.341796875, "learning_rate": 0.0007957064269421708, "loss": 1.0973, "step": 4943 }, { "epoch": 0.13267496779733792, "grad_norm": 0.326171875, "learning_rate": 0.0007958674359318394, "loss": 1.0246, "step": 4944 }, { "epoch": 0.13270180334907686, "grad_norm": 0.318359375, "learning_rate": 0.000796028444921508, "loss": 1.0837, "step": 4945 }, { "epoch": 0.1327286389008158, "grad_norm": 0.330078125, "learning_rate": 0.0007961894539111766, "loss": 1.095, "step": 4946 }, { "epoch": 0.13275547445255476, "grad_norm": 0.341796875, "learning_rate": 0.0007963504629008452, "loss": 1.1734, "step": 4947 }, { "epoch": 0.1327823100042937, "grad_norm": 0.3203125, "learning_rate": 0.0007965114718905138, "loss": 1.099, "step": 4948 }, { "epoch": 0.13280914555603263, "grad_norm": 0.37109375, "learning_rate": 0.0007966724808801824, "loss": 1.101, "step": 4949 }, { "epoch": 0.1328359811077716, "grad_norm": 0.337890625, "learning_rate": 0.000796833489869851, "loss": 1.1151, "step": 4950 }, { "epoch": 0.13286281665951052, "grad_norm": 0.333984375, "learning_rate": 0.0007969944988595196, "loss": 1.0815, "step": 4951 }, { "epoch": 0.13288965221124946, "grad_norm": 0.326171875, "learning_rate": 0.0007971555078491882, "loss": 0.9946, "step": 4952 }, { "epoch": 0.1329164877629884, "grad_norm": 0.3359375, "learning_rate": 0.0007973165168388568, "loss": 0.9616, "step": 4953 }, { "epoch": 0.13294332331472736, "grad_norm": 0.3515625, "learning_rate": 0.0007974775258285254, "loss": 1.1136, "step": 4954 }, { "epoch": 0.1329701588664663, "grad_norm": 0.345703125, "learning_rate": 0.000797638534818194, "loss": 1.0961, "step": 4955 }, { "epoch": 0.13299699441820523, "grad_norm": 0.32421875, "learning_rate": 0.0007977995438078626, "loss": 1.0229, "step": 4956 }, { "epoch": 0.1330238299699442, "grad_norm": 0.34375, "learning_rate": 0.0007979605527975311, "loss": 1.1646, "step": 4957 }, { "epoch": 0.13305066552168313, "grad_norm": 0.33203125, "learning_rate": 0.0007981215617871997, "loss": 1.0355, "step": 4958 }, { "epoch": 0.13307750107342206, "grad_norm": 0.30078125, "learning_rate": 0.0007982825707768682, "loss": 0.9112, "step": 4959 }, { "epoch": 0.13310433662516102, "grad_norm": 0.30859375, "learning_rate": 0.0007984435797665369, "loss": 1.0345, "step": 4960 }, { "epoch": 0.13313117217689996, "grad_norm": 0.330078125, "learning_rate": 0.0007986045887562054, "loss": 1.1322, "step": 4961 }, { "epoch": 0.1331580077286389, "grad_norm": 0.31640625, "learning_rate": 0.0007987655977458741, "loss": 0.9717, "step": 4962 }, { "epoch": 0.13318484328037786, "grad_norm": 0.330078125, "learning_rate": 0.0007989266067355426, "loss": 1.0085, "step": 4963 }, { "epoch": 0.1332116788321168, "grad_norm": 0.310546875, "learning_rate": 0.0007990876157252112, "loss": 0.99, "step": 4964 }, { "epoch": 0.13323851438385573, "grad_norm": 0.3203125, "learning_rate": 0.0007992486247148799, "loss": 0.9867, "step": 4965 }, { "epoch": 0.13326534993559466, "grad_norm": 0.359375, "learning_rate": 0.0007994096337045484, "loss": 1.2859, "step": 4966 }, { "epoch": 0.13329218548733363, "grad_norm": 0.341796875, "learning_rate": 0.0007995706426942171, "loss": 1.1202, "step": 4967 }, { "epoch": 0.13331902103907256, "grad_norm": 0.30078125, "learning_rate": 0.0007997316516838856, "loss": 0.9871, "step": 4968 }, { "epoch": 0.1333458565908115, "grad_norm": 0.3203125, "learning_rate": 0.0007998926606735542, "loss": 1.0912, "step": 4969 }, { "epoch": 0.13337269214255046, "grad_norm": 0.333984375, "learning_rate": 0.0008000536696632229, "loss": 1.0276, "step": 4970 }, { "epoch": 0.1333995276942894, "grad_norm": 0.3203125, "learning_rate": 0.0008002146786528914, "loss": 1.0743, "step": 4971 }, { "epoch": 0.13342636324602833, "grad_norm": 0.337890625, "learning_rate": 0.0008003756876425601, "loss": 1.0955, "step": 4972 }, { "epoch": 0.1334531987977673, "grad_norm": 0.337890625, "learning_rate": 0.0008005366966322285, "loss": 1.0483, "step": 4973 }, { "epoch": 0.13348003434950623, "grad_norm": 0.326171875, "learning_rate": 0.0008006977056218971, "loss": 1.1339, "step": 4974 }, { "epoch": 0.13350686990124516, "grad_norm": 0.337890625, "learning_rate": 0.0008008587146115657, "loss": 1.0232, "step": 4975 }, { "epoch": 0.13353370545298412, "grad_norm": 0.337890625, "learning_rate": 0.0008010197236012343, "loss": 1.1166, "step": 4976 }, { "epoch": 0.13356054100472306, "grad_norm": 0.333984375, "learning_rate": 0.000801180732590903, "loss": 1.0704, "step": 4977 }, { "epoch": 0.133587376556462, "grad_norm": 0.32421875, "learning_rate": 0.0008013417415805715, "loss": 1.0104, "step": 4978 }, { "epoch": 0.13361421210820096, "grad_norm": 0.33203125, "learning_rate": 0.0008015027505702401, "loss": 1.0277, "step": 4979 }, { "epoch": 0.1336410476599399, "grad_norm": 0.3203125, "learning_rate": 0.0008016637595599087, "loss": 0.9922, "step": 4980 }, { "epoch": 0.13366788321167883, "grad_norm": 0.3359375, "learning_rate": 0.0008018247685495773, "loss": 0.9742, "step": 4981 }, { "epoch": 0.13369471876341776, "grad_norm": 0.3046875, "learning_rate": 0.0008019857775392458, "loss": 0.9098, "step": 4982 }, { "epoch": 0.13372155431515673, "grad_norm": 0.3203125, "learning_rate": 0.0008021467865289145, "loss": 1.0829, "step": 4983 }, { "epoch": 0.13374838986689566, "grad_norm": 0.341796875, "learning_rate": 0.0008023077955185831, "loss": 1.0027, "step": 4984 }, { "epoch": 0.1337752254186346, "grad_norm": 0.328125, "learning_rate": 0.0008024688045082517, "loss": 1.025, "step": 4985 }, { "epoch": 0.13380206097037356, "grad_norm": 0.345703125, "learning_rate": 0.0008026298134979203, "loss": 1.0125, "step": 4986 }, { "epoch": 0.1338288965221125, "grad_norm": 0.328125, "learning_rate": 0.0008027908224875888, "loss": 1.0912, "step": 4987 }, { "epoch": 0.13385573207385143, "grad_norm": 0.349609375, "learning_rate": 0.0008029518314772574, "loss": 1.1717, "step": 4988 }, { "epoch": 0.1338825676255904, "grad_norm": 0.33203125, "learning_rate": 0.0008031128404669259, "loss": 1.08, "step": 4989 }, { "epoch": 0.13390940317732933, "grad_norm": 0.318359375, "learning_rate": 0.0008032738494565945, "loss": 1.073, "step": 4990 }, { "epoch": 0.13393623872906826, "grad_norm": 0.33203125, "learning_rate": 0.0008034348584462632, "loss": 0.9981, "step": 4991 }, { "epoch": 0.13396307428080723, "grad_norm": 0.322265625, "learning_rate": 0.0008035958674359317, "loss": 1.0272, "step": 4992 }, { "epoch": 0.13398990983254616, "grad_norm": 0.369140625, "learning_rate": 0.0008037568764256004, "loss": 1.1694, "step": 4993 }, { "epoch": 0.1340167453842851, "grad_norm": 0.333984375, "learning_rate": 0.0008039178854152689, "loss": 1.0731, "step": 4994 }, { "epoch": 0.13404358093602403, "grad_norm": 0.3359375, "learning_rate": 0.0008040788944049375, "loss": 1.0273, "step": 4995 }, { "epoch": 0.134070416487763, "grad_norm": 0.34375, "learning_rate": 0.0008042399033946061, "loss": 1.0938, "step": 4996 }, { "epoch": 0.13409725203950193, "grad_norm": 0.3515625, "learning_rate": 0.0008044009123842747, "loss": 1.1476, "step": 4997 }, { "epoch": 0.13412408759124086, "grad_norm": 0.3359375, "learning_rate": 0.0008045619213739434, "loss": 1.0444, "step": 4998 }, { "epoch": 0.13415092314297983, "grad_norm": 0.330078125, "learning_rate": 0.0008047229303636119, "loss": 1.0791, "step": 4999 }, { "epoch": 0.13417775869471876, "grad_norm": 0.310546875, "learning_rate": 0.0008048839393532805, "loss": 0.9698, "step": 5000 }, { "epoch": 0.1342045942464577, "grad_norm": 0.33984375, "learning_rate": 0.0008050449483429491, "loss": 1.0448, "step": 5001 }, { "epoch": 0.13423142979819666, "grad_norm": 0.333984375, "learning_rate": 0.0008052059573326177, "loss": 1.1444, "step": 5002 }, { "epoch": 0.1342582653499356, "grad_norm": 0.333984375, "learning_rate": 0.0008053669663222864, "loss": 1.111, "step": 5003 }, { "epoch": 0.13428510090167453, "grad_norm": 0.30859375, "learning_rate": 0.0008055279753119548, "loss": 0.9761, "step": 5004 }, { "epoch": 0.1343119364534135, "grad_norm": 0.314453125, "learning_rate": 0.0008056889843016234, "loss": 1.0608, "step": 5005 }, { "epoch": 0.13433877200515243, "grad_norm": 0.306640625, "learning_rate": 0.000805849993291292, "loss": 1.0345, "step": 5006 }, { "epoch": 0.13436560755689136, "grad_norm": 0.330078125, "learning_rate": 0.0008060110022809606, "loss": 1.125, "step": 5007 }, { "epoch": 0.13439244310863033, "grad_norm": 0.306640625, "learning_rate": 0.0008061720112706291, "loss": 1.0515, "step": 5008 }, { "epoch": 0.13441927866036926, "grad_norm": 0.333984375, "learning_rate": 0.0008063330202602978, "loss": 1.146, "step": 5009 }, { "epoch": 0.1344461142121082, "grad_norm": 0.333984375, "learning_rate": 0.0008064940292499664, "loss": 1.1373, "step": 5010 }, { "epoch": 0.13447294976384713, "grad_norm": 0.3125, "learning_rate": 0.000806655038239635, "loss": 0.9437, "step": 5011 }, { "epoch": 0.1344997853155861, "grad_norm": 0.341796875, "learning_rate": 0.0008068160472293036, "loss": 1.0604, "step": 5012 }, { "epoch": 0.13452662086732503, "grad_norm": 0.35546875, "learning_rate": 0.0008069770562189721, "loss": 1.1953, "step": 5013 }, { "epoch": 0.13455345641906397, "grad_norm": 0.345703125, "learning_rate": 0.0008071380652086408, "loss": 1.1543, "step": 5014 }, { "epoch": 0.13458029197080293, "grad_norm": 0.302734375, "learning_rate": 0.0008072990741983093, "loss": 0.9442, "step": 5015 }, { "epoch": 0.13460712752254186, "grad_norm": 0.328125, "learning_rate": 0.000807460083187978, "loss": 0.9747, "step": 5016 }, { "epoch": 0.1346339630742808, "grad_norm": 0.3125, "learning_rate": 0.0008076210921776466, "loss": 1.0963, "step": 5017 }, { "epoch": 0.13466079862601976, "grad_norm": 0.33203125, "learning_rate": 0.0008077821011673151, "loss": 1.0192, "step": 5018 }, { "epoch": 0.1346876341777587, "grad_norm": 0.341796875, "learning_rate": 0.0008079431101569837, "loss": 1.0311, "step": 5019 }, { "epoch": 0.13471446972949763, "grad_norm": 0.3125, "learning_rate": 0.0008081041191466522, "loss": 1.0013, "step": 5020 }, { "epoch": 0.1347413052812366, "grad_norm": 0.333984375, "learning_rate": 0.0008082651281363208, "loss": 1.0753, "step": 5021 }, { "epoch": 0.13476814083297553, "grad_norm": 0.30078125, "learning_rate": 0.0008084261371259894, "loss": 0.9271, "step": 5022 }, { "epoch": 0.13479497638471447, "grad_norm": 0.298828125, "learning_rate": 0.000808587146115658, "loss": 0.917, "step": 5023 }, { "epoch": 0.1348218119364534, "grad_norm": 0.337890625, "learning_rate": 0.0008087481551053267, "loss": 1.1077, "step": 5024 }, { "epoch": 0.13484864748819236, "grad_norm": 0.306640625, "learning_rate": 0.0008089091640949952, "loss": 0.9508, "step": 5025 }, { "epoch": 0.1348754830399313, "grad_norm": 0.3203125, "learning_rate": 0.0008090701730846638, "loss": 1.0391, "step": 5026 }, { "epoch": 0.13490231859167023, "grad_norm": 0.3046875, "learning_rate": 0.0008092311820743324, "loss": 1.0199, "step": 5027 }, { "epoch": 0.1349291541434092, "grad_norm": 0.328125, "learning_rate": 0.000809392191064001, "loss": 1.0654, "step": 5028 }, { "epoch": 0.13495598969514813, "grad_norm": 0.33203125, "learning_rate": 0.0008095532000536697, "loss": 1.0666, "step": 5029 }, { "epoch": 0.13498282524688707, "grad_norm": 0.369140625, "learning_rate": 0.0008097142090433382, "loss": 1.1209, "step": 5030 }, { "epoch": 0.13500966079862603, "grad_norm": 0.310546875, "learning_rate": 0.0008098752180330068, "loss": 0.9621, "step": 5031 }, { "epoch": 0.13503649635036497, "grad_norm": 0.322265625, "learning_rate": 0.0008100362270226754, "loss": 1.1288, "step": 5032 }, { "epoch": 0.1350633319021039, "grad_norm": 0.318359375, "learning_rate": 0.000810197236012344, "loss": 0.9718, "step": 5033 }, { "epoch": 0.13509016745384286, "grad_norm": 0.3359375, "learning_rate": 0.0008103582450020126, "loss": 1.012, "step": 5034 }, { "epoch": 0.1351170030055818, "grad_norm": 0.310546875, "learning_rate": 0.0008105192539916811, "loss": 0.959, "step": 5035 }, { "epoch": 0.13514383855732073, "grad_norm": 0.314453125, "learning_rate": 0.0008106802629813496, "loss": 1.0626, "step": 5036 }, { "epoch": 0.13517067410905967, "grad_norm": 0.326171875, "learning_rate": 0.0008108412719710183, "loss": 1.1517, "step": 5037 }, { "epoch": 0.13519750966079863, "grad_norm": 0.333984375, "learning_rate": 0.0008110022809606869, "loss": 1.0747, "step": 5038 }, { "epoch": 0.13522434521253757, "grad_norm": 0.353515625, "learning_rate": 0.0008111632899503554, "loss": 1.1079, "step": 5039 }, { "epoch": 0.1352511807642765, "grad_norm": 0.322265625, "learning_rate": 0.0008113242989400241, "loss": 0.9968, "step": 5040 }, { "epoch": 0.13527801631601546, "grad_norm": 0.333984375, "learning_rate": 0.0008114853079296926, "loss": 1.074, "step": 5041 }, { "epoch": 0.1353048518677544, "grad_norm": 0.31640625, "learning_rate": 0.0008116463169193613, "loss": 0.9775, "step": 5042 }, { "epoch": 0.13533168741949334, "grad_norm": 0.326171875, "learning_rate": 0.0008118073259090299, "loss": 1.1284, "step": 5043 }, { "epoch": 0.1353585229712323, "grad_norm": 0.3125, "learning_rate": 0.0008119683348986984, "loss": 1.0136, "step": 5044 }, { "epoch": 0.13538535852297123, "grad_norm": 0.330078125, "learning_rate": 0.0008121293438883671, "loss": 1.0048, "step": 5045 }, { "epoch": 0.13541219407471017, "grad_norm": 0.30859375, "learning_rate": 0.0008122903528780356, "loss": 0.9855, "step": 5046 }, { "epoch": 0.13543902962644913, "grad_norm": 0.3125, "learning_rate": 0.0008124513618677043, "loss": 1.0764, "step": 5047 }, { "epoch": 0.13546586517818807, "grad_norm": 0.333984375, "learning_rate": 0.0008126123708573729, "loss": 1.0981, "step": 5048 }, { "epoch": 0.135492700729927, "grad_norm": 0.326171875, "learning_rate": 0.0008127733798470414, "loss": 0.9503, "step": 5049 }, { "epoch": 0.13551953628166596, "grad_norm": 0.333984375, "learning_rate": 0.0008129343888367099, "loss": 1.1436, "step": 5050 }, { "epoch": 0.1355463718334049, "grad_norm": 0.314453125, "learning_rate": 0.0008130953978263785, "loss": 1.1072, "step": 5051 }, { "epoch": 0.13557320738514383, "grad_norm": 0.30078125, "learning_rate": 0.0008132564068160472, "loss": 0.973, "step": 5052 }, { "epoch": 0.13560004293688277, "grad_norm": 0.322265625, "learning_rate": 0.0008134174158057157, "loss": 1.1025, "step": 5053 }, { "epoch": 0.13562687848862173, "grad_norm": 0.31640625, "learning_rate": 0.0008135784247953843, "loss": 1.0793, "step": 5054 }, { "epoch": 0.13565371404036067, "grad_norm": 0.306640625, "learning_rate": 0.0008137394337850529, "loss": 1.0397, "step": 5055 }, { "epoch": 0.1356805495920996, "grad_norm": 0.322265625, "learning_rate": 0.0008139004427747215, "loss": 0.9998, "step": 5056 }, { "epoch": 0.13570738514383857, "grad_norm": 0.326171875, "learning_rate": 0.0008140614517643901, "loss": 1.0509, "step": 5057 }, { "epoch": 0.1357342206955775, "grad_norm": 0.314453125, "learning_rate": 0.0008142224607540587, "loss": 1.0169, "step": 5058 }, { "epoch": 0.13576105624731644, "grad_norm": 0.302734375, "learning_rate": 0.0008143834697437273, "loss": 0.995, "step": 5059 }, { "epoch": 0.1357878917990554, "grad_norm": 0.306640625, "learning_rate": 0.0008145444787333959, "loss": 1.0044, "step": 5060 }, { "epoch": 0.13581472735079433, "grad_norm": 0.32421875, "learning_rate": 0.0008147054877230645, "loss": 1.051, "step": 5061 }, { "epoch": 0.13584156290253327, "grad_norm": 0.302734375, "learning_rate": 0.0008148664967127331, "loss": 0.9369, "step": 5062 }, { "epoch": 0.13586839845427223, "grad_norm": 0.3125, "learning_rate": 0.0008150275057024017, "loss": 1.0563, "step": 5063 }, { "epoch": 0.13589523400601117, "grad_norm": 0.3203125, "learning_rate": 0.0008151885146920703, "loss": 1.0524, "step": 5064 }, { "epoch": 0.1359220695577501, "grad_norm": 0.3046875, "learning_rate": 0.0008153495236817389, "loss": 0.9873, "step": 5065 }, { "epoch": 0.13594890510948904, "grad_norm": 0.333984375, "learning_rate": 0.0008155105326714074, "loss": 1.0458, "step": 5066 }, { "epoch": 0.135975740661228, "grad_norm": 0.330078125, "learning_rate": 0.0008156715416610759, "loss": 1.05, "step": 5067 }, { "epoch": 0.13600257621296694, "grad_norm": 0.30859375, "learning_rate": 0.0008158325506507446, "loss": 1.0583, "step": 5068 }, { "epoch": 0.13602941176470587, "grad_norm": 0.3359375, "learning_rate": 0.0008159935596404131, "loss": 1.1581, "step": 5069 }, { "epoch": 0.13605624731644483, "grad_norm": 0.33203125, "learning_rate": 0.0008161545686300817, "loss": 1.0544, "step": 5070 }, { "epoch": 0.13608308286818377, "grad_norm": 0.3125, "learning_rate": 0.0008163155776197504, "loss": 1.0181, "step": 5071 }, { "epoch": 0.1361099184199227, "grad_norm": 0.3203125, "learning_rate": 0.0008164765866094189, "loss": 1.0297, "step": 5072 }, { "epoch": 0.13613675397166167, "grad_norm": 0.330078125, "learning_rate": 0.0008166375955990876, "loss": 1.1542, "step": 5073 }, { "epoch": 0.1361635895234006, "grad_norm": 0.30859375, "learning_rate": 0.0008167986045887561, "loss": 0.9758, "step": 5074 }, { "epoch": 0.13619042507513954, "grad_norm": 0.337890625, "learning_rate": 0.0008169596135784247, "loss": 1.0971, "step": 5075 }, { "epoch": 0.1362172606268785, "grad_norm": 0.294921875, "learning_rate": 0.0008171206225680934, "loss": 0.9815, "step": 5076 }, { "epoch": 0.13624409617861744, "grad_norm": 0.33984375, "learning_rate": 0.0008172816315577619, "loss": 1.1403, "step": 5077 }, { "epoch": 0.13627093173035637, "grad_norm": 0.31640625, "learning_rate": 0.0008174426405474306, "loss": 1.0555, "step": 5078 }, { "epoch": 0.13629776728209533, "grad_norm": 0.314453125, "learning_rate": 0.0008176036495370991, "loss": 1.0333, "step": 5079 }, { "epoch": 0.13632460283383427, "grad_norm": 0.33984375, "learning_rate": 0.0008177646585267677, "loss": 1.1447, "step": 5080 }, { "epoch": 0.1363514383855732, "grad_norm": 0.337890625, "learning_rate": 0.0008179256675164362, "loss": 1.06, "step": 5081 }, { "epoch": 0.13637827393731214, "grad_norm": 0.328125, "learning_rate": 0.0008180866765061048, "loss": 1.0161, "step": 5082 }, { "epoch": 0.1364051094890511, "grad_norm": 0.318359375, "learning_rate": 0.0008182476854957733, "loss": 1.0645, "step": 5083 }, { "epoch": 0.13643194504079004, "grad_norm": 0.310546875, "learning_rate": 0.000818408694485442, "loss": 1.0896, "step": 5084 }, { "epoch": 0.13645878059252897, "grad_norm": 0.328125, "learning_rate": 0.0008185697034751106, "loss": 1.0273, "step": 5085 }, { "epoch": 0.13648561614426793, "grad_norm": 0.32421875, "learning_rate": 0.0008187307124647792, "loss": 1.0614, "step": 5086 }, { "epoch": 0.13651245169600687, "grad_norm": 0.30078125, "learning_rate": 0.0008188917214544478, "loss": 1.0002, "step": 5087 }, { "epoch": 0.1365392872477458, "grad_norm": 0.306640625, "learning_rate": 0.0008190527304441163, "loss": 0.9952, "step": 5088 }, { "epoch": 0.13656612279948477, "grad_norm": 0.3125, "learning_rate": 0.000819213739433785, "loss": 1.0069, "step": 5089 }, { "epoch": 0.1365929583512237, "grad_norm": 0.3125, "learning_rate": 0.0008193747484234536, "loss": 0.9618, "step": 5090 }, { "epoch": 0.13661979390296264, "grad_norm": 0.33203125, "learning_rate": 0.0008195357574131222, "loss": 1.0937, "step": 5091 }, { "epoch": 0.1366466294547016, "grad_norm": 0.318359375, "learning_rate": 0.0008196967664027908, "loss": 1.038, "step": 5092 }, { "epoch": 0.13667346500644054, "grad_norm": 0.3359375, "learning_rate": 0.0008198577753924593, "loss": 1.1263, "step": 5093 }, { "epoch": 0.13670030055817947, "grad_norm": 0.32421875, "learning_rate": 0.000820018784382128, "loss": 1.0933, "step": 5094 }, { "epoch": 0.1367271361099184, "grad_norm": 0.330078125, "learning_rate": 0.0008201797933717966, "loss": 1.1528, "step": 5095 }, { "epoch": 0.13675397166165737, "grad_norm": 0.3203125, "learning_rate": 0.000820340802361465, "loss": 1.019, "step": 5096 }, { "epoch": 0.1367808072133963, "grad_norm": 0.29296875, "learning_rate": 0.0008205018113511337, "loss": 0.9018, "step": 5097 }, { "epoch": 0.13680764276513524, "grad_norm": 0.330078125, "learning_rate": 0.0008206628203408022, "loss": 1.0245, "step": 5098 }, { "epoch": 0.1368344783168742, "grad_norm": 0.3125, "learning_rate": 0.0008208238293304709, "loss": 1.0132, "step": 5099 }, { "epoch": 0.13686131386861314, "grad_norm": 0.3203125, "learning_rate": 0.0008209848383201394, "loss": 1.0473, "step": 5100 }, { "epoch": 0.13688814942035207, "grad_norm": 0.333984375, "learning_rate": 0.000821145847309808, "loss": 0.9566, "step": 5101 }, { "epoch": 0.13691498497209104, "grad_norm": 0.310546875, "learning_rate": 0.0008213068562994766, "loss": 0.919, "step": 5102 }, { "epoch": 0.13694182052382997, "grad_norm": 0.322265625, "learning_rate": 0.0008214678652891452, "loss": 1.1185, "step": 5103 }, { "epoch": 0.1369686560755689, "grad_norm": 0.328125, "learning_rate": 0.0008216288742788139, "loss": 1.0321, "step": 5104 }, { "epoch": 0.13699549162730787, "grad_norm": 0.314453125, "learning_rate": 0.0008217898832684824, "loss": 1.0151, "step": 5105 }, { "epoch": 0.1370223271790468, "grad_norm": 0.314453125, "learning_rate": 0.000821950892258151, "loss": 1.0019, "step": 5106 }, { "epoch": 0.13704916273078574, "grad_norm": 0.30078125, "learning_rate": 0.0008221119012478196, "loss": 0.9323, "step": 5107 }, { "epoch": 0.1370759982825247, "grad_norm": 0.322265625, "learning_rate": 0.0008222729102374882, "loss": 1.0656, "step": 5108 }, { "epoch": 0.13710283383426364, "grad_norm": 0.291015625, "learning_rate": 0.0008224339192271569, "loss": 0.916, "step": 5109 }, { "epoch": 0.13712966938600257, "grad_norm": 0.32421875, "learning_rate": 0.0008225949282168254, "loss": 0.9651, "step": 5110 }, { "epoch": 0.1371565049377415, "grad_norm": 0.310546875, "learning_rate": 0.000822755937206494, "loss": 1.0372, "step": 5111 }, { "epoch": 0.13718334048948047, "grad_norm": 0.326171875, "learning_rate": 0.0008229169461961625, "loss": 1.1184, "step": 5112 }, { "epoch": 0.1372101760412194, "grad_norm": 0.31640625, "learning_rate": 0.0008230779551858311, "loss": 1.0802, "step": 5113 }, { "epoch": 0.13723701159295834, "grad_norm": 0.306640625, "learning_rate": 0.0008232389641754996, "loss": 1.0287, "step": 5114 }, { "epoch": 0.1372638471446973, "grad_norm": 0.330078125, "learning_rate": 0.0008233999731651683, "loss": 1.0747, "step": 5115 }, { "epoch": 0.13729068269643624, "grad_norm": 0.314453125, "learning_rate": 0.0008235609821548368, "loss": 1.0512, "step": 5116 }, { "epoch": 0.13731751824817517, "grad_norm": 0.31640625, "learning_rate": 0.0008237219911445055, "loss": 1.0452, "step": 5117 }, { "epoch": 0.13734435379991414, "grad_norm": 0.318359375, "learning_rate": 0.0008238830001341741, "loss": 1.0733, "step": 5118 }, { "epoch": 0.13737118935165307, "grad_norm": 0.294921875, "learning_rate": 0.0008240440091238426, "loss": 0.9969, "step": 5119 }, { "epoch": 0.137398024903392, "grad_norm": 0.330078125, "learning_rate": 0.0008242050181135113, "loss": 1.1351, "step": 5120 }, { "epoch": 0.13742486045513097, "grad_norm": 0.3046875, "learning_rate": 0.0008243660271031798, "loss": 0.9887, "step": 5121 }, { "epoch": 0.1374516960068699, "grad_norm": 0.32421875, "learning_rate": 0.0008245270360928485, "loss": 1.0389, "step": 5122 }, { "epoch": 0.13747853155860884, "grad_norm": 0.318359375, "learning_rate": 0.0008246880450825171, "loss": 1.0688, "step": 5123 }, { "epoch": 0.13750536711034778, "grad_norm": 0.30078125, "learning_rate": 0.0008248490540721856, "loss": 0.9335, "step": 5124 }, { "epoch": 0.13753220266208674, "grad_norm": 0.322265625, "learning_rate": 0.0008250100630618543, "loss": 1.0492, "step": 5125 }, { "epoch": 0.13755903821382567, "grad_norm": 0.322265625, "learning_rate": 0.0008251710720515228, "loss": 1.0613, "step": 5126 }, { "epoch": 0.1375858737655646, "grad_norm": 0.283203125, "learning_rate": 0.0008253320810411914, "loss": 0.8933, "step": 5127 }, { "epoch": 0.13761270931730357, "grad_norm": 0.337890625, "learning_rate": 0.0008254930900308599, "loss": 1.0258, "step": 5128 }, { "epoch": 0.1376395448690425, "grad_norm": 0.31640625, "learning_rate": 0.0008256540990205285, "loss": 1.0344, "step": 5129 }, { "epoch": 0.13766638042078144, "grad_norm": 0.3203125, "learning_rate": 0.0008258151080101972, "loss": 1.0779, "step": 5130 }, { "epoch": 0.1376932159725204, "grad_norm": 0.298828125, "learning_rate": 0.0008259761169998657, "loss": 0.9249, "step": 5131 }, { "epoch": 0.13772005152425934, "grad_norm": 0.32421875, "learning_rate": 0.0008261371259895344, "loss": 0.9416, "step": 5132 }, { "epoch": 0.13774688707599828, "grad_norm": 0.30859375, "learning_rate": 0.0008262981349792029, "loss": 1.0166, "step": 5133 }, { "epoch": 0.13777372262773724, "grad_norm": 0.322265625, "learning_rate": 0.0008264591439688715, "loss": 1.0537, "step": 5134 }, { "epoch": 0.13780055817947617, "grad_norm": 0.318359375, "learning_rate": 0.0008266201529585401, "loss": 0.9233, "step": 5135 }, { "epoch": 0.1378273937312151, "grad_norm": 0.33984375, "learning_rate": 0.0008267811619482087, "loss": 1.0654, "step": 5136 }, { "epoch": 0.13785422928295404, "grad_norm": 0.306640625, "learning_rate": 0.0008269421709378773, "loss": 0.9929, "step": 5137 }, { "epoch": 0.137881064834693, "grad_norm": 0.3125, "learning_rate": 0.0008271031799275459, "loss": 1.0468, "step": 5138 }, { "epoch": 0.13790790038643194, "grad_norm": 0.314453125, "learning_rate": 0.0008272641889172145, "loss": 1.0376, "step": 5139 }, { "epoch": 0.13793473593817088, "grad_norm": 0.31640625, "learning_rate": 0.0008274251979068831, "loss": 1.0225, "step": 5140 }, { "epoch": 0.13796157148990984, "grad_norm": 0.3359375, "learning_rate": 0.0008275862068965517, "loss": 1.1353, "step": 5141 }, { "epoch": 0.13798840704164878, "grad_norm": 0.3046875, "learning_rate": 0.0008277472158862203, "loss": 0.9861, "step": 5142 }, { "epoch": 0.1380152425933877, "grad_norm": 0.326171875, "learning_rate": 0.0008279082248758888, "loss": 1.0882, "step": 5143 }, { "epoch": 0.13804207814512667, "grad_norm": 0.322265625, "learning_rate": 0.0008280692338655574, "loss": 1.034, "step": 5144 }, { "epoch": 0.1380689136968656, "grad_norm": 0.326171875, "learning_rate": 0.000828230242855226, "loss": 1.0484, "step": 5145 }, { "epoch": 0.13809574924860454, "grad_norm": 0.322265625, "learning_rate": 0.0008283912518448946, "loss": 0.9581, "step": 5146 }, { "epoch": 0.1381225848003435, "grad_norm": 0.33203125, "learning_rate": 0.0008285522608345631, "loss": 1.0265, "step": 5147 }, { "epoch": 0.13814942035208244, "grad_norm": 0.30859375, "learning_rate": 0.0008287132698242318, "loss": 0.9687, "step": 5148 }, { "epoch": 0.13817625590382138, "grad_norm": 0.318359375, "learning_rate": 0.0008288742788139004, "loss": 1.0714, "step": 5149 }, { "epoch": 0.13820309145556034, "grad_norm": 0.34765625, "learning_rate": 0.000829035287803569, "loss": 1.0738, "step": 5150 }, { "epoch": 0.13822992700729927, "grad_norm": 0.322265625, "learning_rate": 0.0008291962967932376, "loss": 1.0119, "step": 5151 }, { "epoch": 0.1382567625590382, "grad_norm": 0.322265625, "learning_rate": 0.0008293573057829061, "loss": 0.9391, "step": 5152 }, { "epoch": 0.13828359811077715, "grad_norm": 0.310546875, "learning_rate": 0.0008295183147725748, "loss": 1.0255, "step": 5153 }, { "epoch": 0.1383104336625161, "grad_norm": 0.3203125, "learning_rate": 0.0008296793237622433, "loss": 1.0689, "step": 5154 }, { "epoch": 0.13833726921425504, "grad_norm": 0.31640625, "learning_rate": 0.0008298403327519119, "loss": 1.0003, "step": 5155 }, { "epoch": 0.13836410476599398, "grad_norm": 0.3046875, "learning_rate": 0.0008300013417415806, "loss": 0.9484, "step": 5156 }, { "epoch": 0.13839094031773294, "grad_norm": 0.349609375, "learning_rate": 0.0008301623507312491, "loss": 1.205, "step": 5157 }, { "epoch": 0.13841777586947188, "grad_norm": 0.330078125, "learning_rate": 0.0008303233597209177, "loss": 1.0066, "step": 5158 }, { "epoch": 0.1384446114212108, "grad_norm": 0.341796875, "learning_rate": 0.0008304843687105862, "loss": 1.1116, "step": 5159 }, { "epoch": 0.13847144697294977, "grad_norm": 0.3125, "learning_rate": 0.0008306453777002548, "loss": 1.1044, "step": 5160 }, { "epoch": 0.1384982825246887, "grad_norm": 0.30078125, "learning_rate": 0.0008308063866899234, "loss": 0.9785, "step": 5161 }, { "epoch": 0.13852511807642764, "grad_norm": 0.30859375, "learning_rate": 0.000830967395679592, "loss": 1.0092, "step": 5162 }, { "epoch": 0.1385519536281666, "grad_norm": 0.322265625, "learning_rate": 0.0008311284046692607, "loss": 0.8953, "step": 5163 }, { "epoch": 0.13857878917990554, "grad_norm": 0.3359375, "learning_rate": 0.0008312894136589292, "loss": 1.1495, "step": 5164 }, { "epoch": 0.13860562473164448, "grad_norm": 0.32421875, "learning_rate": 0.0008314504226485978, "loss": 1.0878, "step": 5165 }, { "epoch": 0.1386324602833834, "grad_norm": 0.32421875, "learning_rate": 0.0008316114316382664, "loss": 0.9645, "step": 5166 }, { "epoch": 0.13865929583512238, "grad_norm": 0.33203125, "learning_rate": 0.000831772440627935, "loss": 1.0758, "step": 5167 }, { "epoch": 0.1386861313868613, "grad_norm": 0.333984375, "learning_rate": 0.0008319334496176037, "loss": 1.0425, "step": 5168 }, { "epoch": 0.13871296693860025, "grad_norm": 0.306640625, "learning_rate": 0.0008320944586072722, "loss": 1.0145, "step": 5169 }, { "epoch": 0.1387398024903392, "grad_norm": 0.322265625, "learning_rate": 0.0008322554675969408, "loss": 0.9827, "step": 5170 }, { "epoch": 0.13876663804207814, "grad_norm": 0.310546875, "learning_rate": 0.0008324164765866094, "loss": 0.9822, "step": 5171 }, { "epoch": 0.13879347359381708, "grad_norm": 0.30859375, "learning_rate": 0.000832577485576278, "loss": 0.9663, "step": 5172 }, { "epoch": 0.13882030914555604, "grad_norm": 0.34375, "learning_rate": 0.0008327384945659465, "loss": 1.1367, "step": 5173 }, { "epoch": 0.13884714469729498, "grad_norm": 0.314453125, "learning_rate": 0.0008328995035556151, "loss": 0.9564, "step": 5174 }, { "epoch": 0.1388739802490339, "grad_norm": 0.32421875, "learning_rate": 0.0008330605125452836, "loss": 1.0127, "step": 5175 }, { "epoch": 0.13890081580077288, "grad_norm": 0.3046875, "learning_rate": 0.0008332215215349523, "loss": 1.0304, "step": 5176 }, { "epoch": 0.1389276513525118, "grad_norm": 0.318359375, "learning_rate": 0.0008333825305246209, "loss": 1.0529, "step": 5177 }, { "epoch": 0.13895448690425075, "grad_norm": 0.310546875, "learning_rate": 0.0008335435395142894, "loss": 1.101, "step": 5178 }, { "epoch": 0.1389813224559897, "grad_norm": 0.330078125, "learning_rate": 0.0008337045485039581, "loss": 1.0604, "step": 5179 }, { "epoch": 0.13900815800772864, "grad_norm": 0.310546875, "learning_rate": 0.0008338655574936266, "loss": 1.0885, "step": 5180 }, { "epoch": 0.13903499355946758, "grad_norm": 0.3203125, "learning_rate": 0.0008340265664832952, "loss": 1.0734, "step": 5181 }, { "epoch": 0.13906182911120651, "grad_norm": 0.322265625, "learning_rate": 0.0008341875754729639, "loss": 1.1298, "step": 5182 }, { "epoch": 0.13908866466294548, "grad_norm": 0.306640625, "learning_rate": 0.0008343485844626324, "loss": 0.9886, "step": 5183 }, { "epoch": 0.1391155002146844, "grad_norm": 0.3203125, "learning_rate": 0.0008345095934523011, "loss": 1.0741, "step": 5184 }, { "epoch": 0.13914233576642335, "grad_norm": 0.302734375, "learning_rate": 0.0008346706024419696, "loss": 1.063, "step": 5185 }, { "epoch": 0.1391691713181623, "grad_norm": 0.29296875, "learning_rate": 0.0008348316114316382, "loss": 0.9993, "step": 5186 }, { "epoch": 0.13919600686990125, "grad_norm": 0.306640625, "learning_rate": 0.0008349926204213068, "loss": 0.997, "step": 5187 }, { "epoch": 0.13922284242164018, "grad_norm": 0.306640625, "learning_rate": 0.0008351536294109754, "loss": 1.037, "step": 5188 }, { "epoch": 0.13924967797337914, "grad_norm": 0.31640625, "learning_rate": 0.0008353146384006439, "loss": 1.038, "step": 5189 }, { "epoch": 0.13927651352511808, "grad_norm": 0.322265625, "learning_rate": 0.0008354756473903125, "loss": 1.0353, "step": 5190 }, { "epoch": 0.139303349076857, "grad_norm": 0.3125, "learning_rate": 0.0008356366563799811, "loss": 1.0281, "step": 5191 }, { "epoch": 0.13933018462859598, "grad_norm": 0.302734375, "learning_rate": 0.0008357976653696497, "loss": 0.9486, "step": 5192 }, { "epoch": 0.1393570201803349, "grad_norm": 0.326171875, "learning_rate": 0.0008359586743593183, "loss": 1.0161, "step": 5193 }, { "epoch": 0.13938385573207385, "grad_norm": 0.283203125, "learning_rate": 0.0008361196833489868, "loss": 0.8741, "step": 5194 }, { "epoch": 0.13941069128381278, "grad_norm": 0.318359375, "learning_rate": 0.0008362806923386555, "loss": 1.0443, "step": 5195 }, { "epoch": 0.13943752683555174, "grad_norm": 0.330078125, "learning_rate": 0.0008364417013283241, "loss": 1.0984, "step": 5196 }, { "epoch": 0.13946436238729068, "grad_norm": 0.30859375, "learning_rate": 0.0008366027103179927, "loss": 0.9932, "step": 5197 }, { "epoch": 0.13949119793902962, "grad_norm": 0.341796875, "learning_rate": 0.0008367637193076613, "loss": 1.1571, "step": 5198 }, { "epoch": 0.13951803349076858, "grad_norm": 0.318359375, "learning_rate": 0.0008369247282973298, "loss": 0.9885, "step": 5199 }, { "epoch": 0.1395448690425075, "grad_norm": 0.314453125, "learning_rate": 0.0008370857372869985, "loss": 1.0171, "step": 5200 }, { "epoch": 0.13957170459424645, "grad_norm": 0.2890625, "learning_rate": 0.0008372467462766671, "loss": 0.9353, "step": 5201 }, { "epoch": 0.1395985401459854, "grad_norm": 0.326171875, "learning_rate": 0.0008374077552663357, "loss": 1.1055, "step": 5202 }, { "epoch": 0.13962537569772435, "grad_norm": 0.302734375, "learning_rate": 0.0008375687642560043, "loss": 0.9834, "step": 5203 }, { "epoch": 0.13965221124946328, "grad_norm": 0.330078125, "learning_rate": 0.0008377297732456728, "loss": 1.1079, "step": 5204 }, { "epoch": 0.13967904680120224, "grad_norm": 0.318359375, "learning_rate": 0.0008378907822353414, "loss": 1.079, "step": 5205 }, { "epoch": 0.13970588235294118, "grad_norm": 0.30859375, "learning_rate": 0.0008380517912250099, "loss": 1.1106, "step": 5206 }, { "epoch": 0.13973271790468011, "grad_norm": 0.31640625, "learning_rate": 0.0008382128002146786, "loss": 1.0722, "step": 5207 }, { "epoch": 0.13975955345641908, "grad_norm": 0.322265625, "learning_rate": 0.0008383738092043471, "loss": 1.0303, "step": 5208 }, { "epoch": 0.139786389008158, "grad_norm": 0.30078125, "learning_rate": 0.0008385348181940157, "loss": 0.9486, "step": 5209 }, { "epoch": 0.13981322455989695, "grad_norm": 0.296875, "learning_rate": 0.0008386958271836844, "loss": 0.9276, "step": 5210 }, { "epoch": 0.13984006011163588, "grad_norm": 0.3046875, "learning_rate": 0.0008388568361733529, "loss": 0.9956, "step": 5211 }, { "epoch": 0.13986689566337485, "grad_norm": 0.3359375, "learning_rate": 0.0008390178451630216, "loss": 1.0196, "step": 5212 }, { "epoch": 0.13989373121511378, "grad_norm": 0.310546875, "learning_rate": 0.0008391788541526901, "loss": 1.0175, "step": 5213 }, { "epoch": 0.13992056676685272, "grad_norm": 0.32421875, "learning_rate": 0.0008393398631423587, "loss": 1.043, "step": 5214 }, { "epoch": 0.13994740231859168, "grad_norm": 0.33203125, "learning_rate": 0.0008395008721320274, "loss": 1.1566, "step": 5215 }, { "epoch": 0.13997423787033061, "grad_norm": 0.302734375, "learning_rate": 0.0008396618811216959, "loss": 0.9852, "step": 5216 }, { "epoch": 0.14000107342206955, "grad_norm": 0.32421875, "learning_rate": 0.0008398228901113645, "loss": 1.0187, "step": 5217 }, { "epoch": 0.1400279089738085, "grad_norm": 0.330078125, "learning_rate": 0.0008399838991010331, "loss": 1.0384, "step": 5218 }, { "epoch": 0.14005474452554745, "grad_norm": 0.29296875, "learning_rate": 0.0008401449080907017, "loss": 0.9947, "step": 5219 }, { "epoch": 0.14008158007728638, "grad_norm": 0.314453125, "learning_rate": 0.0008403059170803702, "loss": 1.1407, "step": 5220 }, { "epoch": 0.14010841562902535, "grad_norm": 0.2890625, "learning_rate": 0.0008404669260700388, "loss": 0.9691, "step": 5221 }, { "epoch": 0.14013525118076428, "grad_norm": 0.318359375, "learning_rate": 0.0008406279350597073, "loss": 1.0656, "step": 5222 }, { "epoch": 0.14016208673250322, "grad_norm": 0.32421875, "learning_rate": 0.000840788944049376, "loss": 1.1472, "step": 5223 }, { "epoch": 0.14018892228424215, "grad_norm": 0.322265625, "learning_rate": 0.0008409499530390446, "loss": 1.1357, "step": 5224 }, { "epoch": 0.14021575783598111, "grad_norm": 0.314453125, "learning_rate": 0.0008411109620287132, "loss": 0.9422, "step": 5225 }, { "epoch": 0.14024259338772005, "grad_norm": 0.28515625, "learning_rate": 0.0008412719710183818, "loss": 0.8644, "step": 5226 }, { "epoch": 0.14026942893945898, "grad_norm": 0.306640625, "learning_rate": 0.0008414329800080503, "loss": 1.001, "step": 5227 }, { "epoch": 0.14029626449119795, "grad_norm": 0.296875, "learning_rate": 0.000841593988997719, "loss": 0.9688, "step": 5228 }, { "epoch": 0.14032310004293688, "grad_norm": 0.330078125, "learning_rate": 0.0008417549979873876, "loss": 1.0232, "step": 5229 }, { "epoch": 0.14034993559467582, "grad_norm": 0.31640625, "learning_rate": 0.0008419160069770561, "loss": 1.0575, "step": 5230 }, { "epoch": 0.14037677114641478, "grad_norm": 0.32421875, "learning_rate": 0.0008420770159667248, "loss": 1.0644, "step": 5231 }, { "epoch": 0.14040360669815372, "grad_norm": 0.30859375, "learning_rate": 0.0008422380249563933, "loss": 0.9766, "step": 5232 }, { "epoch": 0.14043044224989265, "grad_norm": 0.322265625, "learning_rate": 0.000842399033946062, "loss": 0.981, "step": 5233 }, { "epoch": 0.1404572778016316, "grad_norm": 0.31640625, "learning_rate": 0.0008425600429357306, "loss": 1.0756, "step": 5234 }, { "epoch": 0.14048411335337055, "grad_norm": 0.322265625, "learning_rate": 0.0008427210519253991, "loss": 1.082, "step": 5235 }, { "epoch": 0.14051094890510948, "grad_norm": 0.310546875, "learning_rate": 0.0008428820609150676, "loss": 1.0346, "step": 5236 }, { "epoch": 0.14053778445684842, "grad_norm": 0.296875, "learning_rate": 0.0008430430699047362, "loss": 0.9779, "step": 5237 }, { "epoch": 0.14056462000858738, "grad_norm": 0.3125, "learning_rate": 0.0008432040788944049, "loss": 1.0486, "step": 5238 }, { "epoch": 0.14059145556032632, "grad_norm": 0.310546875, "learning_rate": 0.0008433650878840734, "loss": 0.9754, "step": 5239 }, { "epoch": 0.14061829111206525, "grad_norm": 0.318359375, "learning_rate": 0.000843526096873742, "loss": 1.0512, "step": 5240 }, { "epoch": 0.14064512666380422, "grad_norm": 0.3125, "learning_rate": 0.0008436871058634106, "loss": 1.0442, "step": 5241 }, { "epoch": 0.14067196221554315, "grad_norm": 0.31640625, "learning_rate": 0.0008438481148530792, "loss": 1.0732, "step": 5242 }, { "epoch": 0.14069879776728209, "grad_norm": 0.314453125, "learning_rate": 0.0008440091238427479, "loss": 1.0323, "step": 5243 }, { "epoch": 0.14072563331902105, "grad_norm": 0.306640625, "learning_rate": 0.0008441701328324164, "loss": 1.0514, "step": 5244 }, { "epoch": 0.14075246887075998, "grad_norm": 0.3125, "learning_rate": 0.000844331141822085, "loss": 1.0817, "step": 5245 }, { "epoch": 0.14077930442249892, "grad_norm": 0.298828125, "learning_rate": 0.0008444921508117536, "loss": 0.9368, "step": 5246 }, { "epoch": 0.14080613997423788, "grad_norm": 0.296875, "learning_rate": 0.0008446531598014222, "loss": 0.9306, "step": 5247 }, { "epoch": 0.14083297552597682, "grad_norm": 0.322265625, "learning_rate": 0.0008448141687910909, "loss": 1.0203, "step": 5248 }, { "epoch": 0.14085981107771575, "grad_norm": 0.30078125, "learning_rate": 0.0008449751777807594, "loss": 1.0325, "step": 5249 }, { "epoch": 0.14088664662945471, "grad_norm": 0.29296875, "learning_rate": 0.000845136186770428, "loss": 0.9279, "step": 5250 }, { "epoch": 0.14091348218119365, "grad_norm": 0.310546875, "learning_rate": 0.0008452971957600965, "loss": 0.9343, "step": 5251 }, { "epoch": 0.14094031773293259, "grad_norm": 0.314453125, "learning_rate": 0.0008454582047497651, "loss": 1.0357, "step": 5252 }, { "epoch": 0.14096715328467152, "grad_norm": 0.302734375, "learning_rate": 0.0008456192137394336, "loss": 1.026, "step": 5253 }, { "epoch": 0.14099398883641048, "grad_norm": 0.330078125, "learning_rate": 0.0008457802227291023, "loss": 1.1316, "step": 5254 }, { "epoch": 0.14102082438814942, "grad_norm": 0.318359375, "learning_rate": 0.0008459412317187708, "loss": 1.0677, "step": 5255 }, { "epoch": 0.14104765993988835, "grad_norm": 0.310546875, "learning_rate": 0.0008461022407084395, "loss": 1.0147, "step": 5256 }, { "epoch": 0.14107449549162732, "grad_norm": 0.296875, "learning_rate": 0.0008462632496981081, "loss": 0.9481, "step": 5257 }, { "epoch": 0.14110133104336625, "grad_norm": 0.291015625, "learning_rate": 0.0008464242586877766, "loss": 0.9339, "step": 5258 }, { "epoch": 0.1411281665951052, "grad_norm": 0.31640625, "learning_rate": 0.0008465852676774453, "loss": 0.9908, "step": 5259 }, { "epoch": 0.14115500214684415, "grad_norm": 0.337890625, "learning_rate": 0.0008467462766671138, "loss": 1.1485, "step": 5260 }, { "epoch": 0.14118183769858308, "grad_norm": 0.3125, "learning_rate": 0.0008469072856567824, "loss": 0.9301, "step": 5261 }, { "epoch": 0.14120867325032202, "grad_norm": 0.341796875, "learning_rate": 0.0008470682946464511, "loss": 1.0957, "step": 5262 }, { "epoch": 0.14123550880206098, "grad_norm": 0.322265625, "learning_rate": 0.0008472293036361196, "loss": 1.0477, "step": 5263 }, { "epoch": 0.14126234435379992, "grad_norm": 0.322265625, "learning_rate": 0.0008473903126257883, "loss": 1.0725, "step": 5264 }, { "epoch": 0.14128917990553885, "grad_norm": 0.30078125, "learning_rate": 0.0008475513216154568, "loss": 0.9326, "step": 5265 }, { "epoch": 0.1413160154572778, "grad_norm": 0.32421875, "learning_rate": 0.0008477123306051254, "loss": 0.993, "step": 5266 }, { "epoch": 0.14134285100901675, "grad_norm": 0.3046875, "learning_rate": 0.0008478733395947939, "loss": 1.0126, "step": 5267 }, { "epoch": 0.1413696865607557, "grad_norm": 0.296875, "learning_rate": 0.0008480343485844625, "loss": 0.9032, "step": 5268 }, { "epoch": 0.14139652211249462, "grad_norm": 0.298828125, "learning_rate": 0.0008481953575741312, "loss": 0.9996, "step": 5269 }, { "epoch": 0.14142335766423358, "grad_norm": 0.314453125, "learning_rate": 0.0008483563665637997, "loss": 1.0532, "step": 5270 }, { "epoch": 0.14145019321597252, "grad_norm": 0.322265625, "learning_rate": 0.0008485173755534683, "loss": 1.0254, "step": 5271 }, { "epoch": 0.14147702876771145, "grad_norm": 0.294921875, "learning_rate": 0.0008486783845431369, "loss": 0.9112, "step": 5272 }, { "epoch": 0.14150386431945042, "grad_norm": 0.326171875, "learning_rate": 0.0008488393935328055, "loss": 1.1048, "step": 5273 }, { "epoch": 0.14153069987118935, "grad_norm": 0.33203125, "learning_rate": 0.000849000402522474, "loss": 1.0749, "step": 5274 }, { "epoch": 0.1415575354229283, "grad_norm": 0.296875, "learning_rate": 0.0008491614115121427, "loss": 0.9706, "step": 5275 }, { "epoch": 0.14158437097466725, "grad_norm": 0.310546875, "learning_rate": 0.0008493224205018113, "loss": 1.0057, "step": 5276 }, { "epoch": 0.14161120652640619, "grad_norm": 0.3359375, "learning_rate": 0.0008494834294914799, "loss": 1.096, "step": 5277 }, { "epoch": 0.14163804207814512, "grad_norm": 0.318359375, "learning_rate": 0.0008496444384811485, "loss": 1.0124, "step": 5278 }, { "epoch": 0.14166487762988408, "grad_norm": 0.291015625, "learning_rate": 0.000849805447470817, "loss": 0.9342, "step": 5279 }, { "epoch": 0.14169171318162302, "grad_norm": 0.31640625, "learning_rate": 0.0008499664564604857, "loss": 0.9801, "step": 5280 }, { "epoch": 0.14171854873336195, "grad_norm": 0.31640625, "learning_rate": 0.0008501274654501543, "loss": 1.1223, "step": 5281 }, { "epoch": 0.1417453842851009, "grad_norm": 0.310546875, "learning_rate": 0.0008502884744398228, "loss": 0.9262, "step": 5282 }, { "epoch": 0.14177221983683985, "grad_norm": 0.31640625, "learning_rate": 0.0008504494834294914, "loss": 1.0063, "step": 5283 }, { "epoch": 0.1417990553885788, "grad_norm": 0.29296875, "learning_rate": 0.0008506104924191599, "loss": 0.9195, "step": 5284 }, { "epoch": 0.14182589094031772, "grad_norm": 0.29296875, "learning_rate": 0.0008507715014088286, "loss": 0.8902, "step": 5285 }, { "epoch": 0.14185272649205669, "grad_norm": 0.291015625, "learning_rate": 0.0008509325103984971, "loss": 0.9488, "step": 5286 }, { "epoch": 0.14187956204379562, "grad_norm": 0.326171875, "learning_rate": 0.0008510935193881658, "loss": 1.0778, "step": 5287 }, { "epoch": 0.14190639759553456, "grad_norm": 0.314453125, "learning_rate": 0.0008512545283778344, "loss": 1.0552, "step": 5288 }, { "epoch": 0.14193323314727352, "grad_norm": 0.296875, "learning_rate": 0.0008514155373675029, "loss": 1.0341, "step": 5289 }, { "epoch": 0.14196006869901245, "grad_norm": 0.314453125, "learning_rate": 0.0008515765463571716, "loss": 1.035, "step": 5290 }, { "epoch": 0.1419869042507514, "grad_norm": 0.32421875, "learning_rate": 0.0008517375553468401, "loss": 1.0721, "step": 5291 }, { "epoch": 0.14201373980249035, "grad_norm": 0.31640625, "learning_rate": 0.0008518985643365088, "loss": 1.0912, "step": 5292 }, { "epoch": 0.1420405753542293, "grad_norm": 0.30859375, "learning_rate": 0.0008520595733261773, "loss": 1.0222, "step": 5293 }, { "epoch": 0.14206741090596822, "grad_norm": 0.3125, "learning_rate": 0.0008522205823158459, "loss": 1.0168, "step": 5294 }, { "epoch": 0.14209424645770716, "grad_norm": 0.3203125, "learning_rate": 0.0008523815913055146, "loss": 1.0494, "step": 5295 }, { "epoch": 0.14212108200944612, "grad_norm": 0.333984375, "learning_rate": 0.0008525426002951831, "loss": 1.1193, "step": 5296 }, { "epoch": 0.14214791756118506, "grad_norm": 0.322265625, "learning_rate": 0.0008527036092848516, "loss": 1.024, "step": 5297 }, { "epoch": 0.142174753112924, "grad_norm": 0.30859375, "learning_rate": 0.0008528646182745202, "loss": 1.0474, "step": 5298 }, { "epoch": 0.14220158866466295, "grad_norm": 0.318359375, "learning_rate": 0.0008530256272641888, "loss": 1.0377, "step": 5299 }, { "epoch": 0.1422284242164019, "grad_norm": 0.310546875, "learning_rate": 0.0008531866362538574, "loss": 0.9739, "step": 5300 }, { "epoch": 0.14225525976814082, "grad_norm": 0.3203125, "learning_rate": 0.000853347645243526, "loss": 1.0719, "step": 5301 }, { "epoch": 0.1422820953198798, "grad_norm": 0.3125, "learning_rate": 0.0008535086542331946, "loss": 0.9657, "step": 5302 }, { "epoch": 0.14230893087161872, "grad_norm": 0.30859375, "learning_rate": 0.0008536696632228632, "loss": 0.9545, "step": 5303 }, { "epoch": 0.14233576642335766, "grad_norm": 0.3359375, "learning_rate": 0.0008538306722125318, "loss": 1.0687, "step": 5304 }, { "epoch": 0.14236260197509662, "grad_norm": 0.3046875, "learning_rate": 0.0008539916812022004, "loss": 0.9994, "step": 5305 }, { "epoch": 0.14238943752683555, "grad_norm": 0.3125, "learning_rate": 0.000854152690191869, "loss": 1.0889, "step": 5306 }, { "epoch": 0.1424162730785745, "grad_norm": 0.31640625, "learning_rate": 0.0008543136991815375, "loss": 1.0316, "step": 5307 }, { "epoch": 0.14244310863031345, "grad_norm": 0.30859375, "learning_rate": 0.0008544747081712062, "loss": 1.0008, "step": 5308 }, { "epoch": 0.1424699441820524, "grad_norm": 0.30859375, "learning_rate": 0.0008546357171608748, "loss": 1.029, "step": 5309 }, { "epoch": 0.14249677973379132, "grad_norm": 0.298828125, "learning_rate": 0.0008547967261505433, "loss": 1.0115, "step": 5310 }, { "epoch": 0.14252361528553026, "grad_norm": 0.31640625, "learning_rate": 0.000854957735140212, "loss": 1.0558, "step": 5311 }, { "epoch": 0.14255045083726922, "grad_norm": 0.326171875, "learning_rate": 0.0008551187441298805, "loss": 1.0704, "step": 5312 }, { "epoch": 0.14257728638900816, "grad_norm": 0.31640625, "learning_rate": 0.0008552797531195491, "loss": 1.0544, "step": 5313 }, { "epoch": 0.1426041219407471, "grad_norm": 0.322265625, "learning_rate": 0.0008554407621092176, "loss": 1.0936, "step": 5314 }, { "epoch": 0.14263095749248605, "grad_norm": 0.3046875, "learning_rate": 0.0008556017710988862, "loss": 1.0389, "step": 5315 }, { "epoch": 0.142657793044225, "grad_norm": 0.42578125, "learning_rate": 0.0008557627800885549, "loss": 1.0113, "step": 5316 }, { "epoch": 0.14268462859596392, "grad_norm": 0.390625, "learning_rate": 0.0008559237890782234, "loss": 1.1023, "step": 5317 }, { "epoch": 0.1427114641477029, "grad_norm": 0.421875, "learning_rate": 0.0008560847980678921, "loss": 1.093, "step": 5318 }, { "epoch": 0.14273829969944182, "grad_norm": 0.53125, "learning_rate": 0.0008562458070575606, "loss": 1.049, "step": 5319 }, { "epoch": 0.14276513525118076, "grad_norm": 0.357421875, "learning_rate": 0.0008564068160472292, "loss": 0.9883, "step": 5320 }, { "epoch": 0.14279197080291972, "grad_norm": 0.375, "learning_rate": 0.0008565678250368979, "loss": 1.0151, "step": 5321 }, { "epoch": 0.14281880635465866, "grad_norm": 0.3515625, "learning_rate": 0.0008567288340265664, "loss": 0.9337, "step": 5322 }, { "epoch": 0.1428456419063976, "grad_norm": 0.392578125, "learning_rate": 0.000856889843016235, "loss": 0.9951, "step": 5323 }, { "epoch": 0.14287247745813653, "grad_norm": 0.333984375, "learning_rate": 0.0008570508520059036, "loss": 0.9904, "step": 5324 }, { "epoch": 0.1428993130098755, "grad_norm": 0.388671875, "learning_rate": 0.0008572118609955722, "loss": 1.0349, "step": 5325 }, { "epoch": 0.14292614856161442, "grad_norm": 0.388671875, "learning_rate": 0.0008573728699852408, "loss": 1.1804, "step": 5326 }, { "epoch": 0.14295298411335336, "grad_norm": 0.349609375, "learning_rate": 0.0008575338789749094, "loss": 1.146, "step": 5327 }, { "epoch": 0.14297981966509232, "grad_norm": 0.361328125, "learning_rate": 0.0008576948879645778, "loss": 1.074, "step": 5328 }, { "epoch": 0.14300665521683126, "grad_norm": 0.376953125, "learning_rate": 0.0008578558969542465, "loss": 1.0724, "step": 5329 }, { "epoch": 0.1430334907685702, "grad_norm": 0.337890625, "learning_rate": 0.0008580169059439151, "loss": 1.0768, "step": 5330 }, { "epoch": 0.14306032632030916, "grad_norm": 0.34375, "learning_rate": 0.0008581779149335837, "loss": 1.0809, "step": 5331 }, { "epoch": 0.1430871618720481, "grad_norm": 0.310546875, "learning_rate": 0.0008583389239232523, "loss": 1.0012, "step": 5332 }, { "epoch": 0.14311399742378703, "grad_norm": 0.330078125, "learning_rate": 0.0008584999329129208, "loss": 1.0931, "step": 5333 }, { "epoch": 0.143140832975526, "grad_norm": 0.32421875, "learning_rate": 0.0008586609419025895, "loss": 1.0149, "step": 5334 }, { "epoch": 0.14316766852726492, "grad_norm": 0.31640625, "learning_rate": 0.0008588219508922581, "loss": 0.9344, "step": 5335 }, { "epoch": 0.14319450407900386, "grad_norm": 0.296875, "learning_rate": 0.0008589829598819267, "loss": 0.9214, "step": 5336 }, { "epoch": 0.1432213396307428, "grad_norm": 0.3125, "learning_rate": 0.0008591439688715953, "loss": 1.017, "step": 5337 }, { "epoch": 0.14324817518248176, "grad_norm": 0.333984375, "learning_rate": 0.0008593049778612638, "loss": 1.0351, "step": 5338 }, { "epoch": 0.1432750107342207, "grad_norm": 0.330078125, "learning_rate": 0.0008594659868509325, "loss": 0.9798, "step": 5339 }, { "epoch": 0.14330184628595963, "grad_norm": 0.306640625, "learning_rate": 0.0008596269958406011, "loss": 0.9806, "step": 5340 }, { "epoch": 0.1433286818376986, "grad_norm": 0.318359375, "learning_rate": 0.0008597880048302696, "loss": 1.0556, "step": 5341 }, { "epoch": 0.14335551738943753, "grad_norm": 0.333984375, "learning_rate": 0.0008599490138199383, "loss": 1.1105, "step": 5342 }, { "epoch": 0.14338235294117646, "grad_norm": 0.318359375, "learning_rate": 0.0008601100228096068, "loss": 0.9418, "step": 5343 }, { "epoch": 0.14340918849291542, "grad_norm": 0.326171875, "learning_rate": 0.0008602710317992754, "loss": 0.9214, "step": 5344 }, { "epoch": 0.14343602404465436, "grad_norm": 0.318359375, "learning_rate": 0.0008604320407889439, "loss": 1.013, "step": 5345 }, { "epoch": 0.1434628595963933, "grad_norm": 0.306640625, "learning_rate": 0.0008605930497786125, "loss": 0.9721, "step": 5346 }, { "epoch": 0.14348969514813226, "grad_norm": 0.314453125, "learning_rate": 0.0008607540587682811, "loss": 0.9908, "step": 5347 }, { "epoch": 0.1435165306998712, "grad_norm": 0.3125, "learning_rate": 0.0008609150677579497, "loss": 0.9969, "step": 5348 }, { "epoch": 0.14354336625161013, "grad_norm": 0.3125, "learning_rate": 0.0008610760767476184, "loss": 1.1058, "step": 5349 }, { "epoch": 0.1435702018033491, "grad_norm": 0.341796875, "learning_rate": 0.0008612370857372869, "loss": 1.1637, "step": 5350 }, { "epoch": 0.14359703735508803, "grad_norm": 0.318359375, "learning_rate": 0.0008613980947269555, "loss": 1.0057, "step": 5351 }, { "epoch": 0.14362387290682696, "grad_norm": 0.29296875, "learning_rate": 0.0008615591037166241, "loss": 0.9206, "step": 5352 }, { "epoch": 0.1436507084585659, "grad_norm": 0.314453125, "learning_rate": 0.0008617201127062927, "loss": 1.0097, "step": 5353 }, { "epoch": 0.14367754401030486, "grad_norm": 0.306640625, "learning_rate": 0.0008618811216959614, "loss": 0.9654, "step": 5354 }, { "epoch": 0.1437043795620438, "grad_norm": 0.318359375, "learning_rate": 0.0008620421306856299, "loss": 1.1323, "step": 5355 }, { "epoch": 0.14373121511378273, "grad_norm": 0.314453125, "learning_rate": 0.0008622031396752985, "loss": 1.0977, "step": 5356 }, { "epoch": 0.1437580506655217, "grad_norm": 0.3125, "learning_rate": 0.0008623641486649671, "loss": 1.0618, "step": 5357 }, { "epoch": 0.14378488621726063, "grad_norm": 0.314453125, "learning_rate": 0.0008625251576546357, "loss": 1.1343, "step": 5358 }, { "epoch": 0.14381172176899956, "grad_norm": 0.29296875, "learning_rate": 0.0008626861666443041, "loss": 0.967, "step": 5359 }, { "epoch": 0.14383855732073852, "grad_norm": 0.306640625, "learning_rate": 0.0008628471756339728, "loss": 1.0118, "step": 5360 }, { "epoch": 0.14386539287247746, "grad_norm": 0.3203125, "learning_rate": 0.0008630081846236413, "loss": 1.0038, "step": 5361 }, { "epoch": 0.1438922284242164, "grad_norm": 0.330078125, "learning_rate": 0.00086316919361331, "loss": 1.1839, "step": 5362 }, { "epoch": 0.14391906397595536, "grad_norm": 0.302734375, "learning_rate": 0.0008633302026029786, "loss": 1.0103, "step": 5363 }, { "epoch": 0.1439458995276943, "grad_norm": 0.30859375, "learning_rate": 0.0008634912115926471, "loss": 1.0577, "step": 5364 }, { "epoch": 0.14397273507943323, "grad_norm": 0.322265625, "learning_rate": 0.0008636522205823158, "loss": 1.0612, "step": 5365 }, { "epoch": 0.14399957063117216, "grad_norm": 0.3125, "learning_rate": 0.0008638132295719843, "loss": 1.0784, "step": 5366 }, { "epoch": 0.14402640618291113, "grad_norm": 0.287109375, "learning_rate": 0.000863974238561653, "loss": 1.0243, "step": 5367 }, { "epoch": 0.14405324173465006, "grad_norm": 0.310546875, "learning_rate": 0.0008641352475513216, "loss": 1.0583, "step": 5368 }, { "epoch": 0.144080077286389, "grad_norm": 0.326171875, "learning_rate": 0.0008642962565409901, "loss": 1.1474, "step": 5369 }, { "epoch": 0.14410691283812796, "grad_norm": 0.283203125, "learning_rate": 0.0008644572655306588, "loss": 0.8909, "step": 5370 }, { "epoch": 0.1441337483898669, "grad_norm": 0.3046875, "learning_rate": 0.0008646182745203273, "loss": 1.0495, "step": 5371 }, { "epoch": 0.14416058394160583, "grad_norm": 0.326171875, "learning_rate": 0.000864779283509996, "loss": 1.1122, "step": 5372 }, { "epoch": 0.1441874194933448, "grad_norm": 0.337890625, "learning_rate": 0.0008649402924996646, "loss": 1.2233, "step": 5373 }, { "epoch": 0.14421425504508373, "grad_norm": 0.294921875, "learning_rate": 0.0008651013014893331, "loss": 1.0054, "step": 5374 }, { "epoch": 0.14424109059682266, "grad_norm": 0.31640625, "learning_rate": 0.0008652623104790016, "loss": 1.1004, "step": 5375 }, { "epoch": 0.14426792614856163, "grad_norm": 0.306640625, "learning_rate": 0.0008654233194686702, "loss": 0.8859, "step": 5376 }, { "epoch": 0.14429476170030056, "grad_norm": 0.306640625, "learning_rate": 0.0008655843284583388, "loss": 1.0154, "step": 5377 }, { "epoch": 0.1443215972520395, "grad_norm": 0.298828125, "learning_rate": 0.0008657453374480074, "loss": 1.0633, "step": 5378 }, { "epoch": 0.14434843280377846, "grad_norm": 0.337890625, "learning_rate": 0.000865906346437676, "loss": 1.1514, "step": 5379 }, { "epoch": 0.1443752683555174, "grad_norm": 0.298828125, "learning_rate": 0.0008660673554273446, "loss": 0.9422, "step": 5380 }, { "epoch": 0.14440210390725633, "grad_norm": 0.328125, "learning_rate": 0.0008662283644170132, "loss": 1.014, "step": 5381 }, { "epoch": 0.14442893945899526, "grad_norm": 0.302734375, "learning_rate": 0.0008663893734066818, "loss": 1.0, "step": 5382 }, { "epoch": 0.14445577501073423, "grad_norm": 0.32421875, "learning_rate": 0.0008665503823963504, "loss": 1.1473, "step": 5383 }, { "epoch": 0.14448261056247316, "grad_norm": 0.291015625, "learning_rate": 0.000866711391386019, "loss": 0.9122, "step": 5384 }, { "epoch": 0.1445094461142121, "grad_norm": 0.302734375, "learning_rate": 0.0008668724003756876, "loss": 0.9943, "step": 5385 }, { "epoch": 0.14453628166595106, "grad_norm": 0.328125, "learning_rate": 0.0008670334093653562, "loss": 1.0203, "step": 5386 }, { "epoch": 0.14456311721769, "grad_norm": 0.3125, "learning_rate": 0.0008671944183550248, "loss": 1.0596, "step": 5387 }, { "epoch": 0.14458995276942893, "grad_norm": 0.310546875, "learning_rate": 0.0008673554273446934, "loss": 1.0121, "step": 5388 }, { "epoch": 0.1446167883211679, "grad_norm": 0.294921875, "learning_rate": 0.000867516436334362, "loss": 0.9034, "step": 5389 }, { "epoch": 0.14464362387290683, "grad_norm": 0.310546875, "learning_rate": 0.0008676774453240304, "loss": 0.9839, "step": 5390 }, { "epoch": 0.14467045942464576, "grad_norm": 0.296875, "learning_rate": 0.0008678384543136991, "loss": 1.0488, "step": 5391 }, { "epoch": 0.14469729497638473, "grad_norm": 0.318359375, "learning_rate": 0.0008679994633033676, "loss": 1.13, "step": 5392 }, { "epoch": 0.14472413052812366, "grad_norm": 0.294921875, "learning_rate": 0.0008681604722930363, "loss": 1.0271, "step": 5393 }, { "epoch": 0.1447509660798626, "grad_norm": 0.29296875, "learning_rate": 0.0008683214812827048, "loss": 1.0402, "step": 5394 }, { "epoch": 0.14477780163160153, "grad_norm": 0.302734375, "learning_rate": 0.0008684824902723734, "loss": 1.0122, "step": 5395 }, { "epoch": 0.1448046371833405, "grad_norm": 0.296875, "learning_rate": 0.0008686434992620421, "loss": 1.0298, "step": 5396 }, { "epoch": 0.14483147273507943, "grad_norm": 0.326171875, "learning_rate": 0.0008688045082517106, "loss": 1.1264, "step": 5397 }, { "epoch": 0.14485830828681837, "grad_norm": 0.298828125, "learning_rate": 0.0008689655172413793, "loss": 0.9436, "step": 5398 }, { "epoch": 0.14488514383855733, "grad_norm": 0.30078125, "learning_rate": 0.0008691265262310478, "loss": 1.0479, "step": 5399 }, { "epoch": 0.14491197939029626, "grad_norm": 0.302734375, "learning_rate": 0.0008692875352207164, "loss": 0.9839, "step": 5400 }, { "epoch": 0.1449388149420352, "grad_norm": 0.330078125, "learning_rate": 0.0008694485442103851, "loss": 1.0706, "step": 5401 }, { "epoch": 0.14496565049377416, "grad_norm": 0.302734375, "learning_rate": 0.0008696095532000536, "loss": 0.9673, "step": 5402 }, { "epoch": 0.1449924860455131, "grad_norm": 0.31640625, "learning_rate": 0.0008697705621897223, "loss": 1.1156, "step": 5403 }, { "epoch": 0.14501932159725203, "grad_norm": 0.30078125, "learning_rate": 0.0008699315711793908, "loss": 1.0657, "step": 5404 }, { "epoch": 0.145046157148991, "grad_norm": 0.330078125, "learning_rate": 0.0008700925801690594, "loss": 1.133, "step": 5405 }, { "epoch": 0.14507299270072993, "grad_norm": 0.30859375, "learning_rate": 0.0008702535891587279, "loss": 1.0081, "step": 5406 }, { "epoch": 0.14509982825246887, "grad_norm": 0.302734375, "learning_rate": 0.0008704145981483965, "loss": 0.9354, "step": 5407 }, { "epoch": 0.14512666380420783, "grad_norm": 0.30859375, "learning_rate": 0.0008705756071380651, "loss": 1.0517, "step": 5408 }, { "epoch": 0.14515349935594676, "grad_norm": 0.29296875, "learning_rate": 0.0008707366161277337, "loss": 1.0073, "step": 5409 }, { "epoch": 0.1451803349076857, "grad_norm": 0.30859375, "learning_rate": 0.0008708976251174023, "loss": 1.017, "step": 5410 }, { "epoch": 0.14520717045942463, "grad_norm": 0.33984375, "learning_rate": 0.0008710586341070709, "loss": 1.1995, "step": 5411 }, { "epoch": 0.1452340060111636, "grad_norm": 0.30859375, "learning_rate": 0.0008712196430967395, "loss": 1.0696, "step": 5412 }, { "epoch": 0.14526084156290253, "grad_norm": 0.3203125, "learning_rate": 0.000871380652086408, "loss": 1.0813, "step": 5413 }, { "epoch": 0.14528767711464147, "grad_norm": 0.294921875, "learning_rate": 0.0008715416610760767, "loss": 1.0118, "step": 5414 }, { "epoch": 0.14531451266638043, "grad_norm": 0.267578125, "learning_rate": 0.0008717026700657453, "loss": 0.8587, "step": 5415 }, { "epoch": 0.14534134821811936, "grad_norm": 0.333984375, "learning_rate": 0.0008718636790554139, "loss": 1.1531, "step": 5416 }, { "epoch": 0.1453681837698583, "grad_norm": 0.306640625, "learning_rate": 0.0008720246880450825, "loss": 1.1669, "step": 5417 }, { "epoch": 0.14539501932159726, "grad_norm": 0.326171875, "learning_rate": 0.000872185697034751, "loss": 1.0955, "step": 5418 }, { "epoch": 0.1454218548733362, "grad_norm": 0.314453125, "learning_rate": 0.0008723467060244197, "loss": 1.0538, "step": 5419 }, { "epoch": 0.14544869042507513, "grad_norm": 0.2890625, "learning_rate": 0.0008725077150140883, "loss": 0.9745, "step": 5420 }, { "epoch": 0.1454755259768141, "grad_norm": 0.296875, "learning_rate": 0.0008726687240037567, "loss": 1.0032, "step": 5421 }, { "epoch": 0.14550236152855303, "grad_norm": 0.318359375, "learning_rate": 0.0008728297329934254, "loss": 1.1054, "step": 5422 }, { "epoch": 0.14552919708029197, "grad_norm": 0.306640625, "learning_rate": 0.0008729907419830939, "loss": 1.0387, "step": 5423 }, { "epoch": 0.1455560326320309, "grad_norm": 0.318359375, "learning_rate": 0.0008731517509727626, "loss": 1.1524, "step": 5424 }, { "epoch": 0.14558286818376986, "grad_norm": 0.306640625, "learning_rate": 0.0008733127599624311, "loss": 1.0632, "step": 5425 }, { "epoch": 0.1456097037355088, "grad_norm": 0.326171875, "learning_rate": 0.0008734737689520997, "loss": 1.0903, "step": 5426 }, { "epoch": 0.14563653928724773, "grad_norm": 0.302734375, "learning_rate": 0.0008736347779417683, "loss": 0.9938, "step": 5427 }, { "epoch": 0.1456633748389867, "grad_norm": 0.294921875, "learning_rate": 0.0008737957869314369, "loss": 0.9472, "step": 5428 }, { "epoch": 0.14569021039072563, "grad_norm": 0.3125, "learning_rate": 0.0008739567959211056, "loss": 1.0646, "step": 5429 }, { "epoch": 0.14571704594246457, "grad_norm": 0.30078125, "learning_rate": 0.0008741178049107741, "loss": 0.8996, "step": 5430 }, { "epoch": 0.14574388149420353, "grad_norm": 0.298828125, "learning_rate": 0.0008742788139004427, "loss": 0.956, "step": 5431 }, { "epoch": 0.14577071704594247, "grad_norm": 0.2890625, "learning_rate": 0.0008744398228901113, "loss": 0.908, "step": 5432 }, { "epoch": 0.1457975525976814, "grad_norm": 0.314453125, "learning_rate": 0.0008746008318797799, "loss": 1.0457, "step": 5433 }, { "epoch": 0.14582438814942036, "grad_norm": 0.310546875, "learning_rate": 0.0008747618408694486, "loss": 0.9552, "step": 5434 }, { "epoch": 0.1458512237011593, "grad_norm": 0.33203125, "learning_rate": 0.0008749228498591171, "loss": 1.0548, "step": 5435 }, { "epoch": 0.14587805925289823, "grad_norm": 0.3203125, "learning_rate": 0.0008750838588487857, "loss": 1.0304, "step": 5436 }, { "epoch": 0.14590489480463717, "grad_norm": 0.3125, "learning_rate": 0.0008752448678384542, "loss": 1.0309, "step": 5437 }, { "epoch": 0.14593173035637613, "grad_norm": 0.302734375, "learning_rate": 0.0008754058768281228, "loss": 0.9844, "step": 5438 }, { "epoch": 0.14595856590811507, "grad_norm": 0.3125, "learning_rate": 0.0008755668858177913, "loss": 1.0697, "step": 5439 }, { "epoch": 0.145985401459854, "grad_norm": 0.30859375, "learning_rate": 0.00087572789480746, "loss": 1.0611, "step": 5440 }, { "epoch": 0.14601223701159297, "grad_norm": 0.3046875, "learning_rate": 0.0008758889037971286, "loss": 1.0091, "step": 5441 }, { "epoch": 0.1460390725633319, "grad_norm": 0.310546875, "learning_rate": 0.0008760499127867972, "loss": 1.0762, "step": 5442 }, { "epoch": 0.14606590811507084, "grad_norm": 0.2890625, "learning_rate": 0.0008762109217764658, "loss": 0.989, "step": 5443 }, { "epoch": 0.1460927436668098, "grad_norm": 0.30859375, "learning_rate": 0.0008763719307661343, "loss": 0.9442, "step": 5444 }, { "epoch": 0.14611957921854873, "grad_norm": 0.28515625, "learning_rate": 0.000876532939755803, "loss": 0.9047, "step": 5445 }, { "epoch": 0.14614641477028767, "grad_norm": 0.333984375, "learning_rate": 0.0008766939487454715, "loss": 1.1322, "step": 5446 }, { "epoch": 0.14617325032202663, "grad_norm": 0.3125, "learning_rate": 0.0008768549577351402, "loss": 1.0493, "step": 5447 }, { "epoch": 0.14620008587376557, "grad_norm": 0.306640625, "learning_rate": 0.0008770159667248088, "loss": 1.0037, "step": 5448 }, { "epoch": 0.1462269214255045, "grad_norm": 0.302734375, "learning_rate": 0.0008771769757144773, "loss": 1.0154, "step": 5449 }, { "epoch": 0.14625375697724347, "grad_norm": 0.330078125, "learning_rate": 0.000877337984704146, "loss": 1.1315, "step": 5450 }, { "epoch": 0.1462805925289824, "grad_norm": 0.302734375, "learning_rate": 0.0008774989936938145, "loss": 0.9646, "step": 5451 }, { "epoch": 0.14630742808072134, "grad_norm": 0.322265625, "learning_rate": 0.000877660002683483, "loss": 1.0969, "step": 5452 }, { "epoch": 0.14633426363246027, "grad_norm": 0.30078125, "learning_rate": 0.0008778210116731516, "loss": 0.9759, "step": 5453 }, { "epoch": 0.14636109918419923, "grad_norm": 0.30078125, "learning_rate": 0.0008779820206628202, "loss": 0.9499, "step": 5454 }, { "epoch": 0.14638793473593817, "grad_norm": 0.30078125, "learning_rate": 0.0008781430296524889, "loss": 0.9618, "step": 5455 }, { "epoch": 0.1464147702876771, "grad_norm": 0.330078125, "learning_rate": 0.0008783040386421574, "loss": 1.0497, "step": 5456 }, { "epoch": 0.14644160583941607, "grad_norm": 0.326171875, "learning_rate": 0.000878465047631826, "loss": 1.0954, "step": 5457 }, { "epoch": 0.146468441391155, "grad_norm": 0.29296875, "learning_rate": 0.0008786260566214946, "loss": 0.9085, "step": 5458 }, { "epoch": 0.14649527694289394, "grad_norm": 0.314453125, "learning_rate": 0.0008787870656111632, "loss": 1.0903, "step": 5459 }, { "epoch": 0.1465221124946329, "grad_norm": 0.298828125, "learning_rate": 0.0008789480746008319, "loss": 0.9773, "step": 5460 }, { "epoch": 0.14654894804637184, "grad_norm": 0.322265625, "learning_rate": 0.0008791090835905004, "loss": 0.9822, "step": 5461 }, { "epoch": 0.14657578359811077, "grad_norm": 0.31640625, "learning_rate": 0.000879270092580169, "loss": 1.1126, "step": 5462 }, { "epoch": 0.14660261914984973, "grad_norm": 0.29296875, "learning_rate": 0.0008794311015698376, "loss": 0.9839, "step": 5463 }, { "epoch": 0.14662945470158867, "grad_norm": 0.310546875, "learning_rate": 0.0008795921105595062, "loss": 1.0779, "step": 5464 }, { "epoch": 0.1466562902533276, "grad_norm": 0.3046875, "learning_rate": 0.0008797531195491748, "loss": 0.9608, "step": 5465 }, { "epoch": 0.14668312580506654, "grad_norm": 0.318359375, "learning_rate": 0.0008799141285388434, "loss": 1.1055, "step": 5466 }, { "epoch": 0.1467099613568055, "grad_norm": 0.302734375, "learning_rate": 0.000880075137528512, "loss": 0.9071, "step": 5467 }, { "epoch": 0.14673679690854444, "grad_norm": 0.32421875, "learning_rate": 0.0008802361465181805, "loss": 1.1035, "step": 5468 }, { "epoch": 0.14676363246028337, "grad_norm": 0.298828125, "learning_rate": 0.0008803971555078491, "loss": 0.9978, "step": 5469 }, { "epoch": 0.14679046801202233, "grad_norm": 0.333984375, "learning_rate": 0.0008805581644975176, "loss": 1.1537, "step": 5470 }, { "epoch": 0.14681730356376127, "grad_norm": 0.314453125, "learning_rate": 0.0008807191734871863, "loss": 1.1468, "step": 5471 }, { "epoch": 0.1468441391155002, "grad_norm": 0.33203125, "learning_rate": 0.0008808801824768548, "loss": 1.1091, "step": 5472 }, { "epoch": 0.14687097466723917, "grad_norm": 0.31640625, "learning_rate": 0.0008810411914665235, "loss": 1.1149, "step": 5473 }, { "epoch": 0.1468978102189781, "grad_norm": 0.296875, "learning_rate": 0.0008812022004561921, "loss": 0.9293, "step": 5474 }, { "epoch": 0.14692464577071704, "grad_norm": 0.3125, "learning_rate": 0.0008813632094458606, "loss": 1.1253, "step": 5475 }, { "epoch": 0.146951481322456, "grad_norm": 0.30859375, "learning_rate": 0.0008815242184355293, "loss": 0.9413, "step": 5476 }, { "epoch": 0.14697831687419494, "grad_norm": 0.33203125, "learning_rate": 0.0008816852274251978, "loss": 1.0999, "step": 5477 }, { "epoch": 0.14700515242593387, "grad_norm": 0.3125, "learning_rate": 0.0008818462364148665, "loss": 1.0832, "step": 5478 }, { "epoch": 0.14703198797767283, "grad_norm": 0.322265625, "learning_rate": 0.0008820072454045351, "loss": 1.0612, "step": 5479 }, { "epoch": 0.14705882352941177, "grad_norm": 0.3203125, "learning_rate": 0.0008821682543942036, "loss": 1.1378, "step": 5480 }, { "epoch": 0.1470856590811507, "grad_norm": 0.28515625, "learning_rate": 0.0008823292633838723, "loss": 0.9274, "step": 5481 }, { "epoch": 0.14711249463288964, "grad_norm": 0.294921875, "learning_rate": 0.0008824902723735408, "loss": 0.9448, "step": 5482 }, { "epoch": 0.1471393301846286, "grad_norm": 0.314453125, "learning_rate": 0.0008826512813632093, "loss": 1.0063, "step": 5483 }, { "epoch": 0.14716616573636754, "grad_norm": 0.328125, "learning_rate": 0.0008828122903528779, "loss": 1.2244, "step": 5484 }, { "epoch": 0.14719300128810647, "grad_norm": 0.3125, "learning_rate": 0.0008829732993425465, "loss": 0.9839, "step": 5485 }, { "epoch": 0.14721983683984544, "grad_norm": 0.3125, "learning_rate": 0.0008831343083322151, "loss": 1.0111, "step": 5486 }, { "epoch": 0.14724667239158437, "grad_norm": 0.30859375, "learning_rate": 0.0008832953173218837, "loss": 1.1085, "step": 5487 }, { "epoch": 0.1472735079433233, "grad_norm": 0.326171875, "learning_rate": 0.0008834563263115523, "loss": 1.1347, "step": 5488 }, { "epoch": 0.14730034349506227, "grad_norm": 0.302734375, "learning_rate": 0.0008836173353012209, "loss": 0.979, "step": 5489 }, { "epoch": 0.1473271790468012, "grad_norm": 0.330078125, "learning_rate": 0.0008837783442908895, "loss": 1.1036, "step": 5490 }, { "epoch": 0.14735401459854014, "grad_norm": 0.291015625, "learning_rate": 0.0008839393532805581, "loss": 0.9376, "step": 5491 }, { "epoch": 0.1473808501502791, "grad_norm": 0.322265625, "learning_rate": 0.0008841003622702267, "loss": 1.0833, "step": 5492 }, { "epoch": 0.14740768570201804, "grad_norm": 0.3046875, "learning_rate": 0.0008842613712598953, "loss": 1.0052, "step": 5493 }, { "epoch": 0.14743452125375697, "grad_norm": 0.30078125, "learning_rate": 0.0008844223802495639, "loss": 1.0027, "step": 5494 }, { "epoch": 0.1474613568054959, "grad_norm": 0.3046875, "learning_rate": 0.0008845833892392325, "loss": 0.9459, "step": 5495 }, { "epoch": 0.14748819235723487, "grad_norm": 0.314453125, "learning_rate": 0.000884744398228901, "loss": 1.1411, "step": 5496 }, { "epoch": 0.1475150279089738, "grad_norm": 0.296875, "learning_rate": 0.0008849054072185697, "loss": 0.9337, "step": 5497 }, { "epoch": 0.14754186346071274, "grad_norm": 0.31640625, "learning_rate": 0.0008850664162082381, "loss": 1.1132, "step": 5498 }, { "epoch": 0.1475686990124517, "grad_norm": 0.294921875, "learning_rate": 0.0008852274251979068, "loss": 0.9764, "step": 5499 }, { "epoch": 0.14759553456419064, "grad_norm": 0.291015625, "learning_rate": 0.0008853884341875753, "loss": 0.9617, "step": 5500 }, { "epoch": 0.14762237011592957, "grad_norm": 0.302734375, "learning_rate": 0.0008855494431772439, "loss": 1.018, "step": 5501 }, { "epoch": 0.14764920566766854, "grad_norm": 0.3125, "learning_rate": 0.0008857104521669126, "loss": 0.9936, "step": 5502 }, { "epoch": 0.14767604121940747, "grad_norm": 0.30859375, "learning_rate": 0.0008858714611565811, "loss": 1.0209, "step": 5503 }, { "epoch": 0.1477028767711464, "grad_norm": 0.296875, "learning_rate": 0.0008860324701462498, "loss": 0.9568, "step": 5504 }, { "epoch": 0.14772971232288537, "grad_norm": 0.30078125, "learning_rate": 0.0008861934791359183, "loss": 0.9815, "step": 5505 }, { "epoch": 0.1477565478746243, "grad_norm": 0.31640625, "learning_rate": 0.0008863544881255869, "loss": 1.0223, "step": 5506 }, { "epoch": 0.14778338342636324, "grad_norm": 0.30859375, "learning_rate": 0.0008865154971152556, "loss": 1.0645, "step": 5507 }, { "epoch": 0.1478102189781022, "grad_norm": 0.30859375, "learning_rate": 0.0008866765061049241, "loss": 1.0642, "step": 5508 }, { "epoch": 0.14783705452984114, "grad_norm": 0.3046875, "learning_rate": 0.0008868375150945928, "loss": 1.0802, "step": 5509 }, { "epoch": 0.14786389008158007, "grad_norm": 0.31640625, "learning_rate": 0.0008869985240842613, "loss": 0.9764, "step": 5510 }, { "epoch": 0.147890725633319, "grad_norm": 0.310546875, "learning_rate": 0.0008871595330739299, "loss": 1.0087, "step": 5511 }, { "epoch": 0.14791756118505797, "grad_norm": 0.314453125, "learning_rate": 0.0008873205420635986, "loss": 1.0307, "step": 5512 }, { "epoch": 0.1479443967367969, "grad_norm": 0.30078125, "learning_rate": 0.0008874815510532671, "loss": 0.9828, "step": 5513 }, { "epoch": 0.14797123228853584, "grad_norm": 0.314453125, "learning_rate": 0.0008876425600429355, "loss": 1.059, "step": 5514 }, { "epoch": 0.1479980678402748, "grad_norm": 0.298828125, "learning_rate": 0.0008878035690326042, "loss": 1.0116, "step": 5515 }, { "epoch": 0.14802490339201374, "grad_norm": 0.291015625, "learning_rate": 0.0008879645780222728, "loss": 0.9419, "step": 5516 }, { "epoch": 0.14805173894375268, "grad_norm": 0.310546875, "learning_rate": 0.0008881255870119414, "loss": 1.0182, "step": 5517 }, { "epoch": 0.14807857449549164, "grad_norm": 0.294921875, "learning_rate": 0.00088828659600161, "loss": 0.8976, "step": 5518 }, { "epoch": 0.14810541004723057, "grad_norm": 0.294921875, "learning_rate": 0.0008884476049912785, "loss": 1.0071, "step": 5519 }, { "epoch": 0.1481322455989695, "grad_norm": 0.310546875, "learning_rate": 0.0008886086139809472, "loss": 1.0629, "step": 5520 }, { "epoch": 0.14815908115070847, "grad_norm": 0.30078125, "learning_rate": 0.0008887696229706158, "loss": 0.9992, "step": 5521 }, { "epoch": 0.1481859167024474, "grad_norm": 0.291015625, "learning_rate": 0.0008889306319602844, "loss": 0.8851, "step": 5522 }, { "epoch": 0.14821275225418634, "grad_norm": 0.3359375, "learning_rate": 0.000889091640949953, "loss": 1.099, "step": 5523 }, { "epoch": 0.14823958780592528, "grad_norm": 0.306640625, "learning_rate": 0.0008892526499396215, "loss": 1.0299, "step": 5524 }, { "epoch": 0.14826642335766424, "grad_norm": 0.294921875, "learning_rate": 0.0008894136589292902, "loss": 0.9978, "step": 5525 }, { "epoch": 0.14829325890940318, "grad_norm": 0.298828125, "learning_rate": 0.0008895746679189588, "loss": 1.0171, "step": 5526 }, { "epoch": 0.1483200944611421, "grad_norm": 0.296875, "learning_rate": 0.0008897356769086274, "loss": 1.0169, "step": 5527 }, { "epoch": 0.14834693001288107, "grad_norm": 0.326171875, "learning_rate": 0.000889896685898296, "loss": 1.1288, "step": 5528 }, { "epoch": 0.14837376556462, "grad_norm": 0.298828125, "learning_rate": 0.0008900576948879644, "loss": 0.9813, "step": 5529 }, { "epoch": 0.14840060111635894, "grad_norm": 0.33203125, "learning_rate": 0.0008902187038776331, "loss": 1.0909, "step": 5530 }, { "epoch": 0.1484274366680979, "grad_norm": 0.328125, "learning_rate": 0.0008903797128673016, "loss": 1.0987, "step": 5531 }, { "epoch": 0.14845427221983684, "grad_norm": 0.3125, "learning_rate": 0.0008905407218569702, "loss": 1.1027, "step": 5532 }, { "epoch": 0.14848110777157578, "grad_norm": 0.3125, "learning_rate": 0.0008907017308466388, "loss": 1.0409, "step": 5533 }, { "epoch": 0.14850794332331474, "grad_norm": 0.318359375, "learning_rate": 0.0008908627398363074, "loss": 1.07, "step": 5534 }, { "epoch": 0.14853477887505367, "grad_norm": 0.3046875, "learning_rate": 0.0008910237488259761, "loss": 1.0136, "step": 5535 }, { "epoch": 0.1485616144267926, "grad_norm": 0.298828125, "learning_rate": 0.0008911847578156446, "loss": 0.9769, "step": 5536 }, { "epoch": 0.14858844997853154, "grad_norm": 0.30859375, "learning_rate": 0.0008913457668053132, "loss": 1.1142, "step": 5537 }, { "epoch": 0.1486152855302705, "grad_norm": 0.283203125, "learning_rate": 0.0008915067757949818, "loss": 0.9532, "step": 5538 }, { "epoch": 0.14864212108200944, "grad_norm": 0.30078125, "learning_rate": 0.0008916677847846504, "loss": 1.0714, "step": 5539 }, { "epoch": 0.14866895663374838, "grad_norm": 0.306640625, "learning_rate": 0.0008918287937743191, "loss": 1.1067, "step": 5540 }, { "epoch": 0.14869579218548734, "grad_norm": 0.306640625, "learning_rate": 0.0008919898027639876, "loss": 1.0259, "step": 5541 }, { "epoch": 0.14872262773722628, "grad_norm": 0.298828125, "learning_rate": 0.0008921508117536562, "loss": 1.0319, "step": 5542 }, { "epoch": 0.1487494632889652, "grad_norm": 0.31640625, "learning_rate": 0.0008923118207433248, "loss": 1.0855, "step": 5543 }, { "epoch": 0.14877629884070417, "grad_norm": 0.30859375, "learning_rate": 0.0008924728297329934, "loss": 1.046, "step": 5544 }, { "epoch": 0.1488031343924431, "grad_norm": 0.294921875, "learning_rate": 0.0008926338387226618, "loss": 0.9662, "step": 5545 }, { "epoch": 0.14882996994418204, "grad_norm": 0.29296875, "learning_rate": 0.0008927948477123305, "loss": 0.988, "step": 5546 }, { "epoch": 0.148856805495921, "grad_norm": 0.322265625, "learning_rate": 0.000892955856701999, "loss": 1.0509, "step": 5547 }, { "epoch": 0.14888364104765994, "grad_norm": 0.3125, "learning_rate": 0.0008931168656916677, "loss": 0.9769, "step": 5548 }, { "epoch": 0.14891047659939888, "grad_norm": 0.298828125, "learning_rate": 0.0008932778746813363, "loss": 0.9956, "step": 5549 }, { "epoch": 0.14893731215113784, "grad_norm": 0.298828125, "learning_rate": 0.0008934388836710048, "loss": 0.9337, "step": 5550 }, { "epoch": 0.14896414770287678, "grad_norm": 0.306640625, "learning_rate": 0.0008935998926606735, "loss": 1.1145, "step": 5551 }, { "epoch": 0.1489909832546157, "grad_norm": 0.31640625, "learning_rate": 0.000893760901650342, "loss": 1.1122, "step": 5552 }, { "epoch": 0.14901781880635465, "grad_norm": 0.32421875, "learning_rate": 0.0008939219106400107, "loss": 1.1065, "step": 5553 }, { "epoch": 0.1490446543580936, "grad_norm": 0.296875, "learning_rate": 0.0008940829196296793, "loss": 0.9506, "step": 5554 }, { "epoch": 0.14907148990983254, "grad_norm": 0.310546875, "learning_rate": 0.0008942439286193478, "loss": 1.0905, "step": 5555 }, { "epoch": 0.14909832546157148, "grad_norm": 0.33203125, "learning_rate": 0.0008944049376090165, "loss": 1.1589, "step": 5556 }, { "epoch": 0.14912516101331044, "grad_norm": 0.302734375, "learning_rate": 0.000894565946598685, "loss": 1.0447, "step": 5557 }, { "epoch": 0.14915199656504938, "grad_norm": 0.32421875, "learning_rate": 0.0008947269555883537, "loss": 1.0631, "step": 5558 }, { "epoch": 0.1491788321167883, "grad_norm": 0.298828125, "learning_rate": 0.0008948879645780223, "loss": 0.9754, "step": 5559 }, { "epoch": 0.14920566766852728, "grad_norm": 0.310546875, "learning_rate": 0.0008950489735676907, "loss": 1.0501, "step": 5560 }, { "epoch": 0.1492325032202662, "grad_norm": 0.306640625, "learning_rate": 0.0008952099825573594, "loss": 1.0225, "step": 5561 }, { "epoch": 0.14925933877200515, "grad_norm": 0.287109375, "learning_rate": 0.0008953709915470279, "loss": 0.9345, "step": 5562 }, { "epoch": 0.1492861743237441, "grad_norm": 0.29296875, "learning_rate": 0.0008955320005366965, "loss": 0.9033, "step": 5563 }, { "epoch": 0.14931300987548304, "grad_norm": 0.27734375, "learning_rate": 0.0008956930095263651, "loss": 0.8324, "step": 5564 }, { "epoch": 0.14933984542722198, "grad_norm": 0.306640625, "learning_rate": 0.0008958540185160337, "loss": 1.0752, "step": 5565 }, { "epoch": 0.14936668097896091, "grad_norm": 0.306640625, "learning_rate": 0.0008960150275057023, "loss": 1.0567, "step": 5566 }, { "epoch": 0.14939351653069988, "grad_norm": 0.291015625, "learning_rate": 0.0008961760364953709, "loss": 0.9911, "step": 5567 }, { "epoch": 0.1494203520824388, "grad_norm": 0.298828125, "learning_rate": 0.0008963370454850395, "loss": 1.0079, "step": 5568 }, { "epoch": 0.14944718763417775, "grad_norm": 0.318359375, "learning_rate": 0.0008964980544747081, "loss": 1.0065, "step": 5569 }, { "epoch": 0.1494740231859167, "grad_norm": 0.29296875, "learning_rate": 0.0008966590634643767, "loss": 0.9715, "step": 5570 }, { "epoch": 0.14950085873765565, "grad_norm": 0.271484375, "learning_rate": 0.0008968200724540453, "loss": 0.9202, "step": 5571 }, { "epoch": 0.14952769428939458, "grad_norm": 0.2890625, "learning_rate": 0.0008969810814437139, "loss": 0.9409, "step": 5572 }, { "epoch": 0.14955452984113354, "grad_norm": 0.3046875, "learning_rate": 0.0008971420904333825, "loss": 1.0723, "step": 5573 }, { "epoch": 0.14958136539287248, "grad_norm": 0.287109375, "learning_rate": 0.0008973030994230511, "loss": 0.856, "step": 5574 }, { "epoch": 0.1496082009446114, "grad_norm": 0.2890625, "learning_rate": 0.0008974641084127197, "loss": 1.0048, "step": 5575 }, { "epoch": 0.14963503649635038, "grad_norm": 0.296875, "learning_rate": 0.0008976251174023881, "loss": 0.9966, "step": 5576 }, { "epoch": 0.1496618720480893, "grad_norm": 0.322265625, "learning_rate": 0.0008977861263920568, "loss": 1.1067, "step": 5577 }, { "epoch": 0.14968870759982825, "grad_norm": 0.32421875, "learning_rate": 0.0008979471353817253, "loss": 1.0157, "step": 5578 }, { "epoch": 0.1497155431515672, "grad_norm": 0.294921875, "learning_rate": 0.000898108144371394, "loss": 1.0008, "step": 5579 }, { "epoch": 0.14974237870330614, "grad_norm": 0.310546875, "learning_rate": 0.0008982691533610626, "loss": 1.0253, "step": 5580 }, { "epoch": 0.14976921425504508, "grad_norm": 0.306640625, "learning_rate": 0.0008984301623507311, "loss": 1.0247, "step": 5581 }, { "epoch": 0.14979604980678402, "grad_norm": 0.28125, "learning_rate": 0.0008985911713403998, "loss": 0.9886, "step": 5582 }, { "epoch": 0.14982288535852298, "grad_norm": 0.3125, "learning_rate": 0.0008987521803300683, "loss": 0.9902, "step": 5583 }, { "epoch": 0.1498497209102619, "grad_norm": 0.28515625, "learning_rate": 0.000898913189319737, "loss": 0.9998, "step": 5584 }, { "epoch": 0.14987655646200085, "grad_norm": 0.314453125, "learning_rate": 0.0008990741983094055, "loss": 1.0367, "step": 5585 }, { "epoch": 0.1499033920137398, "grad_norm": 0.322265625, "learning_rate": 0.0008992352072990741, "loss": 1.1757, "step": 5586 }, { "epoch": 0.14993022756547875, "grad_norm": 0.298828125, "learning_rate": 0.0008993962162887428, "loss": 0.9815, "step": 5587 }, { "epoch": 0.14995706311721768, "grad_norm": 0.328125, "learning_rate": 0.0008995572252784113, "loss": 1.1751, "step": 5588 }, { "epoch": 0.14998389866895664, "grad_norm": 0.298828125, "learning_rate": 0.00089971823426808, "loss": 0.9613, "step": 5589 }, { "epoch": 0.15001073422069558, "grad_norm": 0.314453125, "learning_rate": 0.0008998792432577485, "loss": 0.9812, "step": 5590 }, { "epoch": 0.15003756977243451, "grad_norm": 0.31640625, "learning_rate": 0.000900040252247417, "loss": 1.0592, "step": 5591 }, { "epoch": 0.15006440532417348, "grad_norm": 0.328125, "learning_rate": 0.0009002012612370856, "loss": 1.0716, "step": 5592 }, { "epoch": 0.1500912408759124, "grad_norm": 0.310546875, "learning_rate": 0.0009003622702267542, "loss": 1.0621, "step": 5593 }, { "epoch": 0.15011807642765135, "grad_norm": 0.322265625, "learning_rate": 0.0009005232792164228, "loss": 0.9979, "step": 5594 }, { "epoch": 0.15014491197939028, "grad_norm": 0.314453125, "learning_rate": 0.0009006842882060914, "loss": 1.0881, "step": 5595 }, { "epoch": 0.15017174753112925, "grad_norm": 0.306640625, "learning_rate": 0.00090084529719576, "loss": 0.9595, "step": 5596 }, { "epoch": 0.15019858308286818, "grad_norm": 0.3125, "learning_rate": 0.0009010063061854286, "loss": 1.0444, "step": 5597 }, { "epoch": 0.15022541863460712, "grad_norm": 0.302734375, "learning_rate": 0.0009011673151750972, "loss": 1.0362, "step": 5598 }, { "epoch": 0.15025225418634608, "grad_norm": 0.322265625, "learning_rate": 0.0009013283241647658, "loss": 1.0601, "step": 5599 }, { "epoch": 0.15027908973808501, "grad_norm": 0.283203125, "learning_rate": 0.0009014893331544344, "loss": 0.9166, "step": 5600 }, { "epoch": 0.15030592528982395, "grad_norm": 0.306640625, "learning_rate": 0.000901650342144103, "loss": 1.0566, "step": 5601 }, { "epoch": 0.1503327608415629, "grad_norm": 0.306640625, "learning_rate": 0.0009018113511337716, "loss": 1.0321, "step": 5602 }, { "epoch": 0.15035959639330185, "grad_norm": 0.306640625, "learning_rate": 0.0009019723601234402, "loss": 1.012, "step": 5603 }, { "epoch": 0.15038643194504078, "grad_norm": 0.341796875, "learning_rate": 0.0009021333691131087, "loss": 1.1396, "step": 5604 }, { "epoch": 0.15041326749677975, "grad_norm": 0.328125, "learning_rate": 0.0009022943781027774, "loss": 1.1076, "step": 5605 }, { "epoch": 0.15044010304851868, "grad_norm": 0.302734375, "learning_rate": 0.000902455387092446, "loss": 1.0089, "step": 5606 }, { "epoch": 0.15046693860025762, "grad_norm": 0.296875, "learning_rate": 0.0009026163960821144, "loss": 0.9213, "step": 5607 }, { "epoch": 0.15049377415199658, "grad_norm": 0.3046875, "learning_rate": 0.0009027774050717831, "loss": 1.0083, "step": 5608 }, { "epoch": 0.15052060970373551, "grad_norm": 0.30859375, "learning_rate": 0.0009029384140614516, "loss": 1.023, "step": 5609 }, { "epoch": 0.15054744525547445, "grad_norm": 0.314453125, "learning_rate": 0.0009030994230511203, "loss": 1.0521, "step": 5610 }, { "epoch": 0.15057428080721338, "grad_norm": 0.310546875, "learning_rate": 0.0009032604320407888, "loss": 1.039, "step": 5611 }, { "epoch": 0.15060111635895235, "grad_norm": 0.314453125, "learning_rate": 0.0009034214410304574, "loss": 1.0633, "step": 5612 }, { "epoch": 0.15062795191069128, "grad_norm": 0.3125, "learning_rate": 0.0009035824500201261, "loss": 0.9931, "step": 5613 }, { "epoch": 0.15065478746243022, "grad_norm": 0.294921875, "learning_rate": 0.0009037434590097946, "loss": 0.9082, "step": 5614 }, { "epoch": 0.15068162301416918, "grad_norm": 0.314453125, "learning_rate": 0.0009039044679994633, "loss": 1.0357, "step": 5615 }, { "epoch": 0.15070845856590812, "grad_norm": 0.3125, "learning_rate": 0.0009040654769891318, "loss": 0.9837, "step": 5616 }, { "epoch": 0.15073529411764705, "grad_norm": 0.30859375, "learning_rate": 0.0009042264859788004, "loss": 1.0275, "step": 5617 }, { "epoch": 0.150762129669386, "grad_norm": 0.296875, "learning_rate": 0.000904387494968469, "loss": 0.9817, "step": 5618 }, { "epoch": 0.15078896522112495, "grad_norm": 0.333984375, "learning_rate": 0.0009045485039581376, "loss": 1.1798, "step": 5619 }, { "epoch": 0.15081580077286388, "grad_norm": 0.30078125, "learning_rate": 0.0009047095129478063, "loss": 0.9941, "step": 5620 }, { "epoch": 0.15084263632460285, "grad_norm": 0.302734375, "learning_rate": 0.0009048705219374748, "loss": 1.0386, "step": 5621 }, { "epoch": 0.15086947187634178, "grad_norm": 0.3046875, "learning_rate": 0.0009050315309271433, "loss": 0.9796, "step": 5622 }, { "epoch": 0.15089630742808072, "grad_norm": 0.314453125, "learning_rate": 0.0009051925399168119, "loss": 1.0666, "step": 5623 }, { "epoch": 0.15092314297981965, "grad_norm": 0.3046875, "learning_rate": 0.0009053535489064805, "loss": 0.9997, "step": 5624 }, { "epoch": 0.15094997853155862, "grad_norm": 0.294921875, "learning_rate": 0.000905514557896149, "loss": 0.9187, "step": 5625 }, { "epoch": 0.15097681408329755, "grad_norm": 0.310546875, "learning_rate": 0.0009056755668858177, "loss": 1.043, "step": 5626 }, { "epoch": 0.15100364963503649, "grad_norm": 0.2890625, "learning_rate": 0.0009058365758754863, "loss": 0.9188, "step": 5627 }, { "epoch": 0.15103048518677545, "grad_norm": 0.31640625, "learning_rate": 0.0009059975848651549, "loss": 1.0254, "step": 5628 }, { "epoch": 0.15105732073851438, "grad_norm": 0.287109375, "learning_rate": 0.0009061585938548235, "loss": 0.9356, "step": 5629 }, { "epoch": 0.15108415629025332, "grad_norm": 0.3046875, "learning_rate": 0.000906319602844492, "loss": 0.984, "step": 5630 }, { "epoch": 0.15111099184199228, "grad_norm": 0.314453125, "learning_rate": 0.0009064806118341607, "loss": 1.0595, "step": 5631 }, { "epoch": 0.15113782739373122, "grad_norm": 0.30859375, "learning_rate": 0.0009066416208238293, "loss": 1.0337, "step": 5632 }, { "epoch": 0.15116466294547015, "grad_norm": 0.30859375, "learning_rate": 0.0009068026298134979, "loss": 1.0433, "step": 5633 }, { "epoch": 0.15119149849720911, "grad_norm": 0.306640625, "learning_rate": 0.0009069636388031665, "loss": 0.9898, "step": 5634 }, { "epoch": 0.15121833404894805, "grad_norm": 0.302734375, "learning_rate": 0.000907124647792835, "loss": 0.9485, "step": 5635 }, { "epoch": 0.15124516960068699, "grad_norm": 0.31640625, "learning_rate": 0.0009072856567825037, "loss": 1.0635, "step": 5636 }, { "epoch": 0.15127200515242592, "grad_norm": 0.322265625, "learning_rate": 0.0009074466657721722, "loss": 1.1111, "step": 5637 }, { "epoch": 0.15129884070416488, "grad_norm": 0.306640625, "learning_rate": 0.0009076076747618407, "loss": 0.964, "step": 5638 }, { "epoch": 0.15132567625590382, "grad_norm": 0.306640625, "learning_rate": 0.0009077686837515093, "loss": 1.0745, "step": 5639 }, { "epoch": 0.15135251180764275, "grad_norm": 0.302734375, "learning_rate": 0.0009079296927411779, "loss": 1.0762, "step": 5640 }, { "epoch": 0.15137934735938172, "grad_norm": 0.29296875, "learning_rate": 0.0009080907017308466, "loss": 0.922, "step": 5641 }, { "epoch": 0.15140618291112065, "grad_norm": 0.314453125, "learning_rate": 0.0009082517107205151, "loss": 1.0826, "step": 5642 }, { "epoch": 0.1514330184628596, "grad_norm": 0.3203125, "learning_rate": 0.0009084127197101837, "loss": 1.1062, "step": 5643 }, { "epoch": 0.15145985401459855, "grad_norm": 0.3125, "learning_rate": 0.0009085737286998523, "loss": 1.0898, "step": 5644 }, { "epoch": 0.15148668956633748, "grad_norm": 0.310546875, "learning_rate": 0.0009087347376895209, "loss": 1.0025, "step": 5645 }, { "epoch": 0.15151352511807642, "grad_norm": 0.306640625, "learning_rate": 0.0009088957466791896, "loss": 1.0116, "step": 5646 }, { "epoch": 0.15154036066981538, "grad_norm": 0.296875, "learning_rate": 0.0009090567556688581, "loss": 0.962, "step": 5647 }, { "epoch": 0.15156719622155432, "grad_norm": 0.29296875, "learning_rate": 0.0009092177646585267, "loss": 0.9962, "step": 5648 }, { "epoch": 0.15159403177329325, "grad_norm": 0.306640625, "learning_rate": 0.0009093787736481953, "loss": 0.9536, "step": 5649 }, { "epoch": 0.15162086732503222, "grad_norm": 0.310546875, "learning_rate": 0.0009095397826378639, "loss": 1.0563, "step": 5650 }, { "epoch": 0.15164770287677115, "grad_norm": 0.30078125, "learning_rate": 0.0009097007916275326, "loss": 0.9895, "step": 5651 }, { "epoch": 0.1516745384285101, "grad_norm": 0.330078125, "learning_rate": 0.0009098618006172011, "loss": 1.1723, "step": 5652 }, { "epoch": 0.15170137398024902, "grad_norm": 0.330078125, "learning_rate": 0.0009100228096068695, "loss": 1.0817, "step": 5653 }, { "epoch": 0.15172820953198798, "grad_norm": 0.302734375, "learning_rate": 0.0009101838185965382, "loss": 0.9251, "step": 5654 }, { "epoch": 0.15175504508372692, "grad_norm": 0.314453125, "learning_rate": 0.0009103448275862068, "loss": 1.0383, "step": 5655 }, { "epoch": 0.15178188063546585, "grad_norm": 0.3125, "learning_rate": 0.0009105058365758753, "loss": 1.1749, "step": 5656 }, { "epoch": 0.15180871618720482, "grad_norm": 0.28515625, "learning_rate": 0.000910666845565544, "loss": 0.9732, "step": 5657 }, { "epoch": 0.15183555173894375, "grad_norm": 0.310546875, "learning_rate": 0.0009108278545552125, "loss": 1.0269, "step": 5658 }, { "epoch": 0.1518623872906827, "grad_norm": 0.31640625, "learning_rate": 0.0009109888635448812, "loss": 1.0227, "step": 5659 }, { "epoch": 0.15188922284242165, "grad_norm": 0.3046875, "learning_rate": 0.0009111498725345498, "loss": 0.9484, "step": 5660 }, { "epoch": 0.15191605839416059, "grad_norm": 0.302734375, "learning_rate": 0.0009113108815242183, "loss": 0.9926, "step": 5661 }, { "epoch": 0.15194289394589952, "grad_norm": 0.30078125, "learning_rate": 0.000911471890513887, "loss": 0.9977, "step": 5662 }, { "epoch": 0.15196972949763848, "grad_norm": 0.306640625, "learning_rate": 0.0009116328995035555, "loss": 0.9547, "step": 5663 }, { "epoch": 0.15199656504937742, "grad_norm": 0.287109375, "learning_rate": 0.0009117939084932242, "loss": 0.9506, "step": 5664 }, { "epoch": 0.15202340060111635, "grad_norm": 0.3046875, "learning_rate": 0.0009119549174828928, "loss": 1.0045, "step": 5665 }, { "epoch": 0.1520502361528553, "grad_norm": 0.306640625, "learning_rate": 0.0009121159264725613, "loss": 1.0497, "step": 5666 }, { "epoch": 0.15207707170459425, "grad_norm": 0.3046875, "learning_rate": 0.00091227693546223, "loss": 1.0256, "step": 5667 }, { "epoch": 0.1521039072563332, "grad_norm": 0.30078125, "learning_rate": 0.0009124379444518985, "loss": 0.9965, "step": 5668 }, { "epoch": 0.15213074280807212, "grad_norm": 0.296875, "learning_rate": 0.000912598953441567, "loss": 0.9997, "step": 5669 }, { "epoch": 0.15215757835981109, "grad_norm": 0.32421875, "learning_rate": 0.0009127599624312356, "loss": 1.1429, "step": 5670 }, { "epoch": 0.15218441391155002, "grad_norm": 0.3125, "learning_rate": 0.0009129209714209042, "loss": 1.0349, "step": 5671 }, { "epoch": 0.15221124946328896, "grad_norm": 0.296875, "learning_rate": 0.0009130819804105728, "loss": 0.9625, "step": 5672 }, { "epoch": 0.15223808501502792, "grad_norm": 0.279296875, "learning_rate": 0.0009132429894002414, "loss": 0.8926, "step": 5673 }, { "epoch": 0.15226492056676685, "grad_norm": 0.318359375, "learning_rate": 0.00091340399838991, "loss": 1.0518, "step": 5674 }, { "epoch": 0.1522917561185058, "grad_norm": 0.322265625, "learning_rate": 0.0009135650073795786, "loss": 1.0465, "step": 5675 }, { "epoch": 0.15231859167024475, "grad_norm": 0.302734375, "learning_rate": 0.0009137260163692472, "loss": 0.9414, "step": 5676 }, { "epoch": 0.1523454272219837, "grad_norm": 0.314453125, "learning_rate": 0.0009138870253589158, "loss": 1.0503, "step": 5677 }, { "epoch": 0.15237226277372262, "grad_norm": 0.322265625, "learning_rate": 0.0009140480343485844, "loss": 1.1514, "step": 5678 }, { "epoch": 0.15239909832546158, "grad_norm": 0.291015625, "learning_rate": 0.000914209043338253, "loss": 0.9616, "step": 5679 }, { "epoch": 0.15242593387720052, "grad_norm": 0.326171875, "learning_rate": 0.0009143700523279216, "loss": 1.1174, "step": 5680 }, { "epoch": 0.15245276942893946, "grad_norm": 0.30859375, "learning_rate": 0.0009145310613175902, "loss": 0.9946, "step": 5681 }, { "epoch": 0.1524796049806784, "grad_norm": 0.291015625, "learning_rate": 0.0009146920703072588, "loss": 0.9663, "step": 5682 }, { "epoch": 0.15250644053241735, "grad_norm": 0.31640625, "learning_rate": 0.0009148530792969274, "loss": 1.0643, "step": 5683 }, { "epoch": 0.1525332760841563, "grad_norm": 0.306640625, "learning_rate": 0.0009150140882865958, "loss": 1.0219, "step": 5684 }, { "epoch": 0.15256011163589522, "grad_norm": 0.314453125, "learning_rate": 0.0009151750972762645, "loss": 1.1258, "step": 5685 }, { "epoch": 0.1525869471876342, "grad_norm": 0.3046875, "learning_rate": 0.000915336106265933, "loss": 0.9758, "step": 5686 }, { "epoch": 0.15261378273937312, "grad_norm": 0.30859375, "learning_rate": 0.0009154971152556016, "loss": 1.0868, "step": 5687 }, { "epoch": 0.15264061829111206, "grad_norm": 0.287109375, "learning_rate": 0.0009156581242452703, "loss": 0.9195, "step": 5688 }, { "epoch": 0.15266745384285102, "grad_norm": 0.30859375, "learning_rate": 0.0009158191332349388, "loss": 1.0577, "step": 5689 }, { "epoch": 0.15269428939458995, "grad_norm": 0.3125, "learning_rate": 0.0009159801422246075, "loss": 0.9878, "step": 5690 }, { "epoch": 0.1527211249463289, "grad_norm": 0.333984375, "learning_rate": 0.000916141151214276, "loss": 1.2033, "step": 5691 }, { "epoch": 0.15274796049806785, "grad_norm": 0.33203125, "learning_rate": 0.0009163021602039446, "loss": 1.1561, "step": 5692 }, { "epoch": 0.1527747960498068, "grad_norm": 0.3125, "learning_rate": 0.0009164631691936133, "loss": 1.0268, "step": 5693 }, { "epoch": 0.15280163160154572, "grad_norm": 0.30078125, "learning_rate": 0.0009166241781832818, "loss": 1.0076, "step": 5694 }, { "epoch": 0.15282846715328466, "grad_norm": 0.30078125, "learning_rate": 0.0009167851871729505, "loss": 0.9973, "step": 5695 }, { "epoch": 0.15285530270502362, "grad_norm": 0.314453125, "learning_rate": 0.000916946196162619, "loss": 0.9714, "step": 5696 }, { "epoch": 0.15288213825676256, "grad_norm": 0.3203125, "learning_rate": 0.0009171072051522876, "loss": 1.1094, "step": 5697 }, { "epoch": 0.1529089738085015, "grad_norm": 0.3125, "learning_rate": 0.0009172682141419563, "loss": 1.09, "step": 5698 }, { "epoch": 0.15293580936024045, "grad_norm": 0.32421875, "learning_rate": 0.0009174292231316247, "loss": 1.1124, "step": 5699 }, { "epoch": 0.1529626449119794, "grad_norm": 0.310546875, "learning_rate": 0.0009175902321212934, "loss": 1.1053, "step": 5700 }, { "epoch": 0.15298948046371832, "grad_norm": 0.2890625, "learning_rate": 0.0009177512411109619, "loss": 0.9117, "step": 5701 }, { "epoch": 0.1530163160154573, "grad_norm": 0.287109375, "learning_rate": 0.0009179122501006305, "loss": 0.9048, "step": 5702 }, { "epoch": 0.15304315156719622, "grad_norm": 0.32421875, "learning_rate": 0.0009180732590902991, "loss": 1.1122, "step": 5703 }, { "epoch": 0.15306998711893516, "grad_norm": 0.31640625, "learning_rate": 0.0009182342680799677, "loss": 1.0228, "step": 5704 }, { "epoch": 0.15309682267067412, "grad_norm": 0.32421875, "learning_rate": 0.0009183952770696362, "loss": 1.019, "step": 5705 }, { "epoch": 0.15312365822241306, "grad_norm": 0.3125, "learning_rate": 0.0009185562860593049, "loss": 1.0174, "step": 5706 }, { "epoch": 0.153150493774152, "grad_norm": 0.314453125, "learning_rate": 0.0009187172950489735, "loss": 1.1333, "step": 5707 }, { "epoch": 0.15317732932589095, "grad_norm": 0.30078125, "learning_rate": 0.0009188783040386421, "loss": 1.0508, "step": 5708 }, { "epoch": 0.1532041648776299, "grad_norm": 0.341796875, "learning_rate": 0.0009190393130283107, "loss": 1.1705, "step": 5709 }, { "epoch": 0.15323100042936882, "grad_norm": 0.31640625, "learning_rate": 0.0009192003220179792, "loss": 1.1073, "step": 5710 }, { "epoch": 0.15325783598110776, "grad_norm": 0.291015625, "learning_rate": 0.0009193613310076479, "loss": 0.9715, "step": 5711 }, { "epoch": 0.15328467153284672, "grad_norm": 0.302734375, "learning_rate": 0.0009195223399973165, "loss": 0.9574, "step": 5712 }, { "epoch": 0.15331150708458566, "grad_norm": 0.298828125, "learning_rate": 0.0009196833489869851, "loss": 1.0015, "step": 5713 }, { "epoch": 0.1533383426363246, "grad_norm": 0.314453125, "learning_rate": 0.0009198443579766537, "loss": 1.0061, "step": 5714 }, { "epoch": 0.15336517818806356, "grad_norm": 0.32421875, "learning_rate": 0.0009200053669663221, "loss": 1.137, "step": 5715 }, { "epoch": 0.1533920137398025, "grad_norm": 0.298828125, "learning_rate": 0.0009201663759559908, "loss": 0.922, "step": 5716 }, { "epoch": 0.15341884929154143, "grad_norm": 0.333984375, "learning_rate": 0.0009203273849456593, "loss": 1.1425, "step": 5717 }, { "epoch": 0.1534456848432804, "grad_norm": 0.3203125, "learning_rate": 0.000920488393935328, "loss": 1.1032, "step": 5718 }, { "epoch": 0.15347252039501932, "grad_norm": 0.310546875, "learning_rate": 0.0009206494029249966, "loss": 0.9568, "step": 5719 }, { "epoch": 0.15349935594675826, "grad_norm": 0.302734375, "learning_rate": 0.0009208104119146651, "loss": 1.0109, "step": 5720 }, { "epoch": 0.15352619149849722, "grad_norm": 0.318359375, "learning_rate": 0.0009209714209043338, "loss": 1.1044, "step": 5721 }, { "epoch": 0.15355302705023616, "grad_norm": 0.302734375, "learning_rate": 0.0009211324298940023, "loss": 0.9986, "step": 5722 }, { "epoch": 0.1535798626019751, "grad_norm": 0.306640625, "learning_rate": 0.0009212934388836709, "loss": 1.0201, "step": 5723 }, { "epoch": 0.15360669815371403, "grad_norm": 0.291015625, "learning_rate": 0.0009214544478733395, "loss": 0.9679, "step": 5724 }, { "epoch": 0.153633533705453, "grad_norm": 0.30859375, "learning_rate": 0.0009216154568630081, "loss": 0.9809, "step": 5725 }, { "epoch": 0.15366036925719193, "grad_norm": 0.30078125, "learning_rate": 0.0009217764658526768, "loss": 1.0136, "step": 5726 }, { "epoch": 0.15368720480893086, "grad_norm": 0.302734375, "learning_rate": 0.0009219374748423453, "loss": 0.9483, "step": 5727 }, { "epoch": 0.15371404036066982, "grad_norm": 0.306640625, "learning_rate": 0.0009220984838320139, "loss": 1.094, "step": 5728 }, { "epoch": 0.15374087591240876, "grad_norm": 0.30859375, "learning_rate": 0.0009222594928216825, "loss": 0.9786, "step": 5729 }, { "epoch": 0.1537677114641477, "grad_norm": 0.283203125, "learning_rate": 0.000922420501811351, "loss": 0.9359, "step": 5730 }, { "epoch": 0.15379454701588666, "grad_norm": 0.333984375, "learning_rate": 0.0009225815108010195, "loss": 1.086, "step": 5731 }, { "epoch": 0.1538213825676256, "grad_norm": 0.44921875, "learning_rate": 0.0009227425197906882, "loss": 1.3032, "step": 5732 }, { "epoch": 0.15384821811936453, "grad_norm": 0.369140625, "learning_rate": 0.0009229035287803568, "loss": 1.0998, "step": 5733 }, { "epoch": 0.1538750536711035, "grad_norm": 0.51953125, "learning_rate": 0.0009230645377700254, "loss": 1.0931, "step": 5734 }, { "epoch": 0.15390188922284243, "grad_norm": 0.359375, "learning_rate": 0.000923225546759694, "loss": 1.0252, "step": 5735 }, { "epoch": 0.15392872477458136, "grad_norm": 0.365234375, "learning_rate": 0.0009233865557493625, "loss": 1.1258, "step": 5736 }, { "epoch": 0.1539555603263203, "grad_norm": 0.3515625, "learning_rate": 0.0009235475647390312, "loss": 1.1817, "step": 5737 }, { "epoch": 0.15398239587805926, "grad_norm": 0.71484375, "learning_rate": 0.0009237085737286997, "loss": 1.0149, "step": 5738 }, { "epoch": 0.1540092314297982, "grad_norm": 0.5859375, "learning_rate": 0.0009238695827183684, "loss": 1.2021, "step": 5739 }, { "epoch": 0.15403606698153713, "grad_norm": 0.37109375, "learning_rate": 0.000924030591708037, "loss": 1.1598, "step": 5740 }, { "epoch": 0.1540629025332761, "grad_norm": 1.9765625, "learning_rate": 0.0009241916006977055, "loss": 0.9804, "step": 5741 }, { "epoch": 0.15408973808501503, "grad_norm": 0.578125, "learning_rate": 0.0009243526096873742, "loss": 1.1144, "step": 5742 }, { "epoch": 0.15411657363675396, "grad_norm": 0.439453125, "learning_rate": 0.0009245136186770427, "loss": 1.1037, "step": 5743 }, { "epoch": 0.15414340918849292, "grad_norm": 0.330078125, "learning_rate": 0.0009246746276667114, "loss": 1.165, "step": 5744 }, { "epoch": 0.15417024474023186, "grad_norm": 0.349609375, "learning_rate": 0.00092483563665638, "loss": 1.1193, "step": 5745 }, { "epoch": 0.1541970802919708, "grad_norm": 0.34375, "learning_rate": 0.0009249966456460484, "loss": 1.017, "step": 5746 }, { "epoch": 0.15422391584370976, "grad_norm": 0.31640625, "learning_rate": 0.0009251576546357171, "loss": 0.905, "step": 5747 }, { "epoch": 0.1542507513954487, "grad_norm": 0.337890625, "learning_rate": 0.0009253186636253856, "loss": 1.0439, "step": 5748 }, { "epoch": 0.15427758694718763, "grad_norm": 0.318359375, "learning_rate": 0.0009254796726150543, "loss": 1.0145, "step": 5749 }, { "epoch": 0.1543044224989266, "grad_norm": 0.353515625, "learning_rate": 0.0009256406816047228, "loss": 1.1867, "step": 5750 }, { "epoch": 0.15433125805066553, "grad_norm": 0.337890625, "learning_rate": 0.0009258016905943914, "loss": 1.0095, "step": 5751 }, { "epoch": 0.15435809360240446, "grad_norm": 0.337890625, "learning_rate": 0.0009259626995840601, "loss": 1.0867, "step": 5752 }, { "epoch": 0.1543849291541434, "grad_norm": 0.32421875, "learning_rate": 0.0009261237085737286, "loss": 1.012, "step": 5753 }, { "epoch": 0.15441176470588236, "grad_norm": 0.326171875, "learning_rate": 0.0009262847175633972, "loss": 1.0793, "step": 5754 }, { "epoch": 0.1544386002576213, "grad_norm": 0.318359375, "learning_rate": 0.0009264457265530658, "loss": 1.0489, "step": 5755 }, { "epoch": 0.15446543580936023, "grad_norm": 0.318359375, "learning_rate": 0.0009266067355427344, "loss": 1.0459, "step": 5756 }, { "epoch": 0.1544922713610992, "grad_norm": 0.31640625, "learning_rate": 0.000926767744532403, "loss": 0.9379, "step": 5757 }, { "epoch": 0.15451910691283813, "grad_norm": 0.310546875, "learning_rate": 0.0009269287535220716, "loss": 1.0136, "step": 5758 }, { "epoch": 0.15454594246457706, "grad_norm": 0.32421875, "learning_rate": 0.0009270897625117402, "loss": 0.9829, "step": 5759 }, { "epoch": 0.15457277801631603, "grad_norm": 0.318359375, "learning_rate": 0.0009272507715014088, "loss": 0.9494, "step": 5760 }, { "epoch": 0.15459961356805496, "grad_norm": 0.33203125, "learning_rate": 0.0009274117804910773, "loss": 1.1535, "step": 5761 }, { "epoch": 0.1546264491197939, "grad_norm": 0.326171875, "learning_rate": 0.0009275727894807458, "loss": 1.1863, "step": 5762 }, { "epoch": 0.15465328467153286, "grad_norm": 0.3125, "learning_rate": 0.0009277337984704145, "loss": 1.0868, "step": 5763 }, { "epoch": 0.1546801202232718, "grad_norm": 0.322265625, "learning_rate": 0.000927894807460083, "loss": 1.1056, "step": 5764 }, { "epoch": 0.15470695577501073, "grad_norm": 0.310546875, "learning_rate": 0.0009280558164497517, "loss": 0.9334, "step": 5765 }, { "epoch": 0.15473379132674966, "grad_norm": 0.322265625, "learning_rate": 0.0009282168254394203, "loss": 1.058, "step": 5766 }, { "epoch": 0.15476062687848863, "grad_norm": 0.3359375, "learning_rate": 0.0009283778344290888, "loss": 1.129, "step": 5767 }, { "epoch": 0.15478746243022756, "grad_norm": 0.326171875, "learning_rate": 0.0009285388434187575, "loss": 1.1108, "step": 5768 }, { "epoch": 0.1548142979819665, "grad_norm": 0.302734375, "learning_rate": 0.000928699852408426, "loss": 0.906, "step": 5769 }, { "epoch": 0.15484113353370546, "grad_norm": 0.326171875, "learning_rate": 0.0009288608613980947, "loss": 1.0717, "step": 5770 }, { "epoch": 0.1548679690854444, "grad_norm": 0.333984375, "learning_rate": 0.0009290218703877633, "loss": 1.1641, "step": 5771 }, { "epoch": 0.15489480463718333, "grad_norm": 0.296875, "learning_rate": 0.0009291828793774318, "loss": 0.9733, "step": 5772 }, { "epoch": 0.1549216401889223, "grad_norm": 0.32421875, "learning_rate": 0.0009293438883671005, "loss": 1.1427, "step": 5773 }, { "epoch": 0.15494847574066123, "grad_norm": 0.31640625, "learning_rate": 0.000929504897356769, "loss": 1.0481, "step": 5774 }, { "epoch": 0.15497531129240016, "grad_norm": 0.310546875, "learning_rate": 0.0009296659063464377, "loss": 1.1383, "step": 5775 }, { "epoch": 0.15500214684413913, "grad_norm": 0.318359375, "learning_rate": 0.0009298269153361062, "loss": 1.037, "step": 5776 }, { "epoch": 0.15502898239587806, "grad_norm": 0.30859375, "learning_rate": 0.0009299879243257747, "loss": 1.0004, "step": 5777 }, { "epoch": 0.155055817947617, "grad_norm": 0.318359375, "learning_rate": 0.0009301489333154433, "loss": 1.08, "step": 5778 }, { "epoch": 0.15508265349935596, "grad_norm": 0.314453125, "learning_rate": 0.0009303099423051119, "loss": 1.012, "step": 5779 }, { "epoch": 0.1551094890510949, "grad_norm": 0.3125, "learning_rate": 0.0009304709512947806, "loss": 1.0426, "step": 5780 }, { "epoch": 0.15513632460283383, "grad_norm": 0.34375, "learning_rate": 0.0009306319602844491, "loss": 1.1649, "step": 5781 }, { "epoch": 0.15516316015457277, "grad_norm": 0.33203125, "learning_rate": 0.0009307929692741177, "loss": 1.1363, "step": 5782 }, { "epoch": 0.15518999570631173, "grad_norm": 0.310546875, "learning_rate": 0.0009309539782637863, "loss": 1.0091, "step": 5783 }, { "epoch": 0.15521683125805066, "grad_norm": 0.318359375, "learning_rate": 0.0009311149872534549, "loss": 1.1119, "step": 5784 }, { "epoch": 0.1552436668097896, "grad_norm": 0.322265625, "learning_rate": 0.0009312759962431235, "loss": 1.0269, "step": 5785 }, { "epoch": 0.15527050236152856, "grad_norm": 0.32421875, "learning_rate": 0.0009314370052327921, "loss": 1.1386, "step": 5786 }, { "epoch": 0.1552973379132675, "grad_norm": 0.298828125, "learning_rate": 0.0009315980142224607, "loss": 1.0403, "step": 5787 }, { "epoch": 0.15532417346500643, "grad_norm": 0.30859375, "learning_rate": 0.0009317590232121293, "loss": 0.9948, "step": 5788 }, { "epoch": 0.1553510090167454, "grad_norm": 0.33203125, "learning_rate": 0.0009319200322017979, "loss": 1.129, "step": 5789 }, { "epoch": 0.15537784456848433, "grad_norm": 0.30859375, "learning_rate": 0.0009320810411914665, "loss": 1.0707, "step": 5790 }, { "epoch": 0.15540468012022327, "grad_norm": 0.330078125, "learning_rate": 0.0009322420501811351, "loss": 1.1305, "step": 5791 }, { "epoch": 0.15543151567196223, "grad_norm": 0.306640625, "learning_rate": 0.0009324030591708035, "loss": 1.058, "step": 5792 }, { "epoch": 0.15545835122370116, "grad_norm": 0.328125, "learning_rate": 0.0009325640681604722, "loss": 1.1221, "step": 5793 }, { "epoch": 0.1554851867754401, "grad_norm": 0.306640625, "learning_rate": 0.0009327250771501408, "loss": 1.0074, "step": 5794 }, { "epoch": 0.15551202232717903, "grad_norm": 0.30078125, "learning_rate": 0.0009328860861398093, "loss": 1.0193, "step": 5795 }, { "epoch": 0.155538857878918, "grad_norm": 0.341796875, "learning_rate": 0.000933047095129478, "loss": 1.2078, "step": 5796 }, { "epoch": 0.15556569343065693, "grad_norm": 0.33203125, "learning_rate": 0.0009332081041191465, "loss": 1.181, "step": 5797 }, { "epoch": 0.15559252898239587, "grad_norm": 0.302734375, "learning_rate": 0.0009333691131088151, "loss": 0.9748, "step": 5798 }, { "epoch": 0.15561936453413483, "grad_norm": 0.296875, "learning_rate": 0.0009335301220984838, "loss": 1.0478, "step": 5799 }, { "epoch": 0.15564620008587376, "grad_norm": 0.296875, "learning_rate": 0.0009336911310881523, "loss": 0.9756, "step": 5800 }, { "epoch": 0.1556730356376127, "grad_norm": 0.3359375, "learning_rate": 0.000933852140077821, "loss": 1.1168, "step": 5801 }, { "epoch": 0.15569987118935166, "grad_norm": 0.33203125, "learning_rate": 0.0009340131490674895, "loss": 1.1779, "step": 5802 }, { "epoch": 0.1557267067410906, "grad_norm": 0.318359375, "learning_rate": 0.0009341741580571581, "loss": 1.0816, "step": 5803 }, { "epoch": 0.15575354229282953, "grad_norm": 0.302734375, "learning_rate": 0.0009343351670468268, "loss": 0.9347, "step": 5804 }, { "epoch": 0.1557803778445685, "grad_norm": 0.3046875, "learning_rate": 0.0009344961760364953, "loss": 1.0682, "step": 5805 }, { "epoch": 0.15580721339630743, "grad_norm": 0.31640625, "learning_rate": 0.000934657185026164, "loss": 1.1192, "step": 5806 }, { "epoch": 0.15583404894804637, "grad_norm": 0.3046875, "learning_rate": 0.0009348181940158325, "loss": 1.0055, "step": 5807 }, { "epoch": 0.15586088449978533, "grad_norm": 0.302734375, "learning_rate": 0.000934979203005501, "loss": 1.0642, "step": 5808 }, { "epoch": 0.15588772005152426, "grad_norm": 0.318359375, "learning_rate": 0.0009351402119951696, "loss": 1.1124, "step": 5809 }, { "epoch": 0.1559145556032632, "grad_norm": 0.29296875, "learning_rate": 0.0009353012209848382, "loss": 1.0091, "step": 5810 }, { "epoch": 0.15594139115500213, "grad_norm": 0.298828125, "learning_rate": 0.0009354622299745067, "loss": 0.9987, "step": 5811 }, { "epoch": 0.1559682267067411, "grad_norm": 0.296875, "learning_rate": 0.0009356232389641754, "loss": 1.0722, "step": 5812 }, { "epoch": 0.15599506225848003, "grad_norm": 0.30859375, "learning_rate": 0.000935784247953844, "loss": 1.025, "step": 5813 }, { "epoch": 0.15602189781021897, "grad_norm": 0.291015625, "learning_rate": 0.0009359452569435126, "loss": 0.9583, "step": 5814 }, { "epoch": 0.15604873336195793, "grad_norm": 0.314453125, "learning_rate": 0.0009361062659331812, "loss": 1.0698, "step": 5815 }, { "epoch": 0.15607556891369687, "grad_norm": 0.306640625, "learning_rate": 0.0009362672749228497, "loss": 0.9783, "step": 5816 }, { "epoch": 0.1561024044654358, "grad_norm": 0.306640625, "learning_rate": 0.0009364282839125184, "loss": 1.0445, "step": 5817 }, { "epoch": 0.15612924001717476, "grad_norm": 0.298828125, "learning_rate": 0.000936589292902187, "loss": 0.9982, "step": 5818 }, { "epoch": 0.1561560755689137, "grad_norm": 0.322265625, "learning_rate": 0.0009367503018918556, "loss": 1.1713, "step": 5819 }, { "epoch": 0.15618291112065263, "grad_norm": 0.30859375, "learning_rate": 0.0009369113108815242, "loss": 1.0062, "step": 5820 }, { "epoch": 0.1562097466723916, "grad_norm": 0.30078125, "learning_rate": 0.0009370723198711927, "loss": 1.0803, "step": 5821 }, { "epoch": 0.15623658222413053, "grad_norm": 0.30859375, "learning_rate": 0.0009372333288608614, "loss": 1.1003, "step": 5822 }, { "epoch": 0.15626341777586947, "grad_norm": 0.30859375, "learning_rate": 0.0009373943378505298, "loss": 1.0551, "step": 5823 }, { "epoch": 0.1562902533276084, "grad_norm": 0.29296875, "learning_rate": 0.0009375553468401985, "loss": 1.0152, "step": 5824 }, { "epoch": 0.15631708887934737, "grad_norm": 0.310546875, "learning_rate": 0.000937716355829867, "loss": 1.1099, "step": 5825 }, { "epoch": 0.1563439244310863, "grad_norm": 0.31640625, "learning_rate": 0.0009378773648195356, "loss": 1.2008, "step": 5826 }, { "epoch": 0.15637075998282524, "grad_norm": 0.306640625, "learning_rate": 0.0009380383738092043, "loss": 1.0318, "step": 5827 }, { "epoch": 0.1563975955345642, "grad_norm": 0.279296875, "learning_rate": 0.0009381993827988728, "loss": 0.912, "step": 5828 }, { "epoch": 0.15642443108630313, "grad_norm": 0.298828125, "learning_rate": 0.0009383603917885415, "loss": 1.0442, "step": 5829 }, { "epoch": 0.15645126663804207, "grad_norm": 0.283203125, "learning_rate": 0.00093852140077821, "loss": 0.9526, "step": 5830 }, { "epoch": 0.15647810218978103, "grad_norm": 0.326171875, "learning_rate": 0.0009386824097678786, "loss": 1.1758, "step": 5831 }, { "epoch": 0.15650493774151997, "grad_norm": 0.294921875, "learning_rate": 0.0009388434187575473, "loss": 1.0421, "step": 5832 }, { "epoch": 0.1565317732932589, "grad_norm": 0.30078125, "learning_rate": 0.0009390044277472158, "loss": 1.0384, "step": 5833 }, { "epoch": 0.15655860884499787, "grad_norm": 0.263671875, "learning_rate": 0.0009391654367368844, "loss": 0.8661, "step": 5834 }, { "epoch": 0.1565854443967368, "grad_norm": 0.310546875, "learning_rate": 0.000939326445726553, "loss": 1.0741, "step": 5835 }, { "epoch": 0.15661227994847574, "grad_norm": 0.30859375, "learning_rate": 0.0009394874547162216, "loss": 1.0567, "step": 5836 }, { "epoch": 0.15663911550021467, "grad_norm": 0.279296875, "learning_rate": 0.0009396484637058903, "loss": 0.9551, "step": 5837 }, { "epoch": 0.15666595105195363, "grad_norm": 0.296875, "learning_rate": 0.0009398094726955588, "loss": 1.0106, "step": 5838 }, { "epoch": 0.15669278660369257, "grad_norm": 0.3125, "learning_rate": 0.0009399704816852273, "loss": 1.048, "step": 5839 }, { "epoch": 0.1567196221554315, "grad_norm": 0.318359375, "learning_rate": 0.0009401314906748959, "loss": 1.0513, "step": 5840 }, { "epoch": 0.15674645770717047, "grad_norm": 0.326171875, "learning_rate": 0.0009402924996645645, "loss": 1.0959, "step": 5841 }, { "epoch": 0.1567732932589094, "grad_norm": 0.291015625, "learning_rate": 0.000940453508654233, "loss": 0.9347, "step": 5842 }, { "epoch": 0.15680012881064834, "grad_norm": 0.30859375, "learning_rate": 0.0009406145176439017, "loss": 1.0661, "step": 5843 }, { "epoch": 0.1568269643623873, "grad_norm": 0.31640625, "learning_rate": 0.0009407755266335702, "loss": 1.0358, "step": 5844 }, { "epoch": 0.15685379991412624, "grad_norm": 0.296875, "learning_rate": 0.0009409365356232389, "loss": 0.9974, "step": 5845 }, { "epoch": 0.15688063546586517, "grad_norm": 0.28515625, "learning_rate": 0.0009410975446129075, "loss": 0.9361, "step": 5846 }, { "epoch": 0.15690747101760413, "grad_norm": 0.287109375, "learning_rate": 0.000941258553602576, "loss": 0.92, "step": 5847 }, { "epoch": 0.15693430656934307, "grad_norm": 0.322265625, "learning_rate": 0.0009414195625922447, "loss": 1.1006, "step": 5848 }, { "epoch": 0.156961142121082, "grad_norm": 0.328125, "learning_rate": 0.0009415805715819132, "loss": 1.0873, "step": 5849 }, { "epoch": 0.15698797767282097, "grad_norm": 0.3125, "learning_rate": 0.0009417415805715819, "loss": 1.1181, "step": 5850 }, { "epoch": 0.1570148132245599, "grad_norm": 0.318359375, "learning_rate": 0.0009419025895612505, "loss": 1.0715, "step": 5851 }, { "epoch": 0.15704164877629884, "grad_norm": 0.28125, "learning_rate": 0.000942063598550919, "loss": 0.9013, "step": 5852 }, { "epoch": 0.15706848432803777, "grad_norm": 0.296875, "learning_rate": 0.0009422246075405877, "loss": 1.1003, "step": 5853 }, { "epoch": 0.15709531987977673, "grad_norm": 0.294921875, "learning_rate": 0.0009423856165302561, "loss": 1.0879, "step": 5854 }, { "epoch": 0.15712215543151567, "grad_norm": 0.287109375, "learning_rate": 0.0009425466255199248, "loss": 0.9807, "step": 5855 }, { "epoch": 0.1571489909832546, "grad_norm": 0.306640625, "learning_rate": 0.0009427076345095933, "loss": 1.0019, "step": 5856 }, { "epoch": 0.15717582653499357, "grad_norm": 0.318359375, "learning_rate": 0.0009428686434992619, "loss": 1.0533, "step": 5857 }, { "epoch": 0.1572026620867325, "grad_norm": 0.30859375, "learning_rate": 0.0009430296524889305, "loss": 1.1181, "step": 5858 }, { "epoch": 0.15722949763847144, "grad_norm": 0.2890625, "learning_rate": 0.0009431906614785991, "loss": 0.9367, "step": 5859 }, { "epoch": 0.1572563331902104, "grad_norm": 0.31640625, "learning_rate": 0.0009433516704682678, "loss": 1.0747, "step": 5860 }, { "epoch": 0.15728316874194934, "grad_norm": 0.310546875, "learning_rate": 0.0009435126794579363, "loss": 1.0703, "step": 5861 }, { "epoch": 0.15731000429368827, "grad_norm": 0.3046875, "learning_rate": 0.0009436736884476049, "loss": 1.0702, "step": 5862 }, { "epoch": 0.15733683984542723, "grad_norm": 0.28125, "learning_rate": 0.0009438346974372735, "loss": 0.96, "step": 5863 }, { "epoch": 0.15736367539716617, "grad_norm": 0.3203125, "learning_rate": 0.0009439957064269421, "loss": 1.1022, "step": 5864 }, { "epoch": 0.1573905109489051, "grad_norm": 0.326171875, "learning_rate": 0.0009441567154166107, "loss": 1.1857, "step": 5865 }, { "epoch": 0.15741734650064404, "grad_norm": 0.3203125, "learning_rate": 0.0009443177244062793, "loss": 1.1532, "step": 5866 }, { "epoch": 0.157444182052383, "grad_norm": 0.31640625, "learning_rate": 0.0009444787333959479, "loss": 1.1647, "step": 5867 }, { "epoch": 0.15747101760412194, "grad_norm": 0.326171875, "learning_rate": 0.0009446397423856165, "loss": 1.1937, "step": 5868 }, { "epoch": 0.15749785315586087, "grad_norm": 0.310546875, "learning_rate": 0.0009448007513752851, "loss": 1.0439, "step": 5869 }, { "epoch": 0.15752468870759984, "grad_norm": 0.318359375, "learning_rate": 0.0009449617603649535, "loss": 1.1274, "step": 5870 }, { "epoch": 0.15755152425933877, "grad_norm": 0.32421875, "learning_rate": 0.0009451227693546222, "loss": 1.1912, "step": 5871 }, { "epoch": 0.1575783598110777, "grad_norm": 0.310546875, "learning_rate": 0.0009452837783442908, "loss": 1.0967, "step": 5872 }, { "epoch": 0.15760519536281667, "grad_norm": 0.30859375, "learning_rate": 0.0009454447873339594, "loss": 1.0637, "step": 5873 }, { "epoch": 0.1576320309145556, "grad_norm": 0.322265625, "learning_rate": 0.000945605796323628, "loss": 1.1844, "step": 5874 }, { "epoch": 0.15765886646629454, "grad_norm": 0.30078125, "learning_rate": 0.0009457668053132965, "loss": 1.0477, "step": 5875 }, { "epoch": 0.1576857020180335, "grad_norm": 0.318359375, "learning_rate": 0.0009459278143029652, "loss": 1.1684, "step": 5876 }, { "epoch": 0.15771253756977244, "grad_norm": 0.2890625, "learning_rate": 0.0009460888232926337, "loss": 0.9453, "step": 5877 }, { "epoch": 0.15773937312151137, "grad_norm": 0.283203125, "learning_rate": 0.0009462498322823023, "loss": 0.9706, "step": 5878 }, { "epoch": 0.15776620867325034, "grad_norm": 0.330078125, "learning_rate": 0.000946410841271971, "loss": 1.1496, "step": 5879 }, { "epoch": 0.15779304422498927, "grad_norm": 0.3046875, "learning_rate": 0.0009465718502616395, "loss": 1.0513, "step": 5880 }, { "epoch": 0.1578198797767282, "grad_norm": 0.28515625, "learning_rate": 0.0009467328592513082, "loss": 0.8971, "step": 5881 }, { "epoch": 0.15784671532846714, "grad_norm": 0.298828125, "learning_rate": 0.0009468938682409767, "loss": 1.0111, "step": 5882 }, { "epoch": 0.1578735508802061, "grad_norm": 0.3125, "learning_rate": 0.0009470548772306453, "loss": 1.0288, "step": 5883 }, { "epoch": 0.15790038643194504, "grad_norm": 0.326171875, "learning_rate": 0.000947215886220314, "loss": 1.1063, "step": 5884 }, { "epoch": 0.15792722198368397, "grad_norm": 0.306640625, "learning_rate": 0.0009473768952099824, "loss": 1.0116, "step": 5885 }, { "epoch": 0.15795405753542294, "grad_norm": 0.291015625, "learning_rate": 0.0009475379041996511, "loss": 1.0051, "step": 5886 }, { "epoch": 0.15798089308716187, "grad_norm": 0.310546875, "learning_rate": 0.0009476989131893196, "loss": 1.0085, "step": 5887 }, { "epoch": 0.1580077286389008, "grad_norm": 0.3203125, "learning_rate": 0.0009478599221789882, "loss": 1.1696, "step": 5888 }, { "epoch": 0.15803456419063977, "grad_norm": 0.306640625, "learning_rate": 0.0009480209311686568, "loss": 1.0417, "step": 5889 }, { "epoch": 0.1580613997423787, "grad_norm": 0.3203125, "learning_rate": 0.0009481819401583254, "loss": 1.076, "step": 5890 }, { "epoch": 0.15808823529411764, "grad_norm": 0.30859375, "learning_rate": 0.0009483429491479941, "loss": 1.058, "step": 5891 }, { "epoch": 0.1581150708458566, "grad_norm": 0.30859375, "learning_rate": 0.0009485039581376626, "loss": 1.0778, "step": 5892 }, { "epoch": 0.15814190639759554, "grad_norm": 0.318359375, "learning_rate": 0.0009486649671273312, "loss": 1.0911, "step": 5893 }, { "epoch": 0.15816874194933447, "grad_norm": 0.314453125, "learning_rate": 0.0009488259761169998, "loss": 1.0698, "step": 5894 }, { "epoch": 0.1581955775010734, "grad_norm": 0.29296875, "learning_rate": 0.0009489869851066684, "loss": 1.0122, "step": 5895 }, { "epoch": 0.15822241305281237, "grad_norm": 0.30859375, "learning_rate": 0.0009491479940963369, "loss": 1.0513, "step": 5896 }, { "epoch": 0.1582492486045513, "grad_norm": 0.296875, "learning_rate": 0.0009493090030860056, "loss": 0.9915, "step": 5897 }, { "epoch": 0.15827608415629024, "grad_norm": 0.2890625, "learning_rate": 0.0009494700120756742, "loss": 0.9682, "step": 5898 }, { "epoch": 0.1583029197080292, "grad_norm": 0.30078125, "learning_rate": 0.0009496310210653428, "loss": 1.0942, "step": 5899 }, { "epoch": 0.15832975525976814, "grad_norm": 0.302734375, "learning_rate": 0.0009497920300550114, "loss": 1.0467, "step": 5900 }, { "epoch": 0.15835659081150708, "grad_norm": 0.296875, "learning_rate": 0.0009499530390446798, "loss": 0.9925, "step": 5901 }, { "epoch": 0.15838342636324604, "grad_norm": 0.30078125, "learning_rate": 0.0009501140480343485, "loss": 0.9946, "step": 5902 }, { "epoch": 0.15841026191498497, "grad_norm": 0.30859375, "learning_rate": 0.000950275057024017, "loss": 1.0413, "step": 5903 }, { "epoch": 0.1584370974667239, "grad_norm": 0.287109375, "learning_rate": 0.0009504360660136857, "loss": 0.9846, "step": 5904 }, { "epoch": 0.15846393301846287, "grad_norm": 0.294921875, "learning_rate": 0.0009505970750033543, "loss": 0.9658, "step": 5905 }, { "epoch": 0.1584907685702018, "grad_norm": 0.296875, "learning_rate": 0.0009507580839930228, "loss": 1.0407, "step": 5906 }, { "epoch": 0.15851760412194074, "grad_norm": 0.3046875, "learning_rate": 0.0009509190929826915, "loss": 1.0155, "step": 5907 }, { "epoch": 0.1585444396736797, "grad_norm": 0.283203125, "learning_rate": 0.00095108010197236, "loss": 0.964, "step": 5908 }, { "epoch": 0.15857127522541864, "grad_norm": 0.310546875, "learning_rate": 0.0009512411109620287, "loss": 1.1068, "step": 5909 }, { "epoch": 0.15859811077715757, "grad_norm": 0.3046875, "learning_rate": 0.0009514021199516973, "loss": 1.0934, "step": 5910 }, { "epoch": 0.1586249463288965, "grad_norm": 0.314453125, "learning_rate": 0.0009515631289413658, "loss": 1.0573, "step": 5911 }, { "epoch": 0.15865178188063547, "grad_norm": 0.296875, "learning_rate": 0.0009517241379310345, "loss": 1.0637, "step": 5912 }, { "epoch": 0.1586786174323744, "grad_norm": 0.3125, "learning_rate": 0.000951885146920703, "loss": 1.0691, "step": 5913 }, { "epoch": 0.15870545298411334, "grad_norm": 0.322265625, "learning_rate": 0.0009520461559103716, "loss": 1.1526, "step": 5914 }, { "epoch": 0.1587322885358523, "grad_norm": 0.310546875, "learning_rate": 0.0009522071649000402, "loss": 1.08, "step": 5915 }, { "epoch": 0.15875912408759124, "grad_norm": 0.287109375, "learning_rate": 0.0009523681738897087, "loss": 0.8855, "step": 5916 }, { "epoch": 0.15878595963933018, "grad_norm": 0.2890625, "learning_rate": 0.0009525291828793773, "loss": 1.0228, "step": 5917 }, { "epoch": 0.15881279519106914, "grad_norm": 0.3046875, "learning_rate": 0.0009526901918690459, "loss": 1.0589, "step": 5918 }, { "epoch": 0.15883963074280807, "grad_norm": 0.298828125, "learning_rate": 0.0009528512008587145, "loss": 1.0357, "step": 5919 }, { "epoch": 0.158866466294547, "grad_norm": 0.30859375, "learning_rate": 0.0009530122098483831, "loss": 1.0701, "step": 5920 }, { "epoch": 0.15889330184628597, "grad_norm": 0.318359375, "learning_rate": 0.0009531732188380517, "loss": 1.1443, "step": 5921 }, { "epoch": 0.1589201373980249, "grad_norm": 0.291015625, "learning_rate": 0.0009533342278277202, "loss": 1.0212, "step": 5922 }, { "epoch": 0.15894697294976384, "grad_norm": 0.30078125, "learning_rate": 0.0009534952368173889, "loss": 1.0529, "step": 5923 }, { "epoch": 0.15897380850150278, "grad_norm": 0.328125, "learning_rate": 0.0009536562458070575, "loss": 1.1812, "step": 5924 }, { "epoch": 0.15900064405324174, "grad_norm": 0.294921875, "learning_rate": 0.0009538172547967261, "loss": 0.979, "step": 5925 }, { "epoch": 0.15902747960498068, "grad_norm": 0.318359375, "learning_rate": 0.0009539782637863947, "loss": 1.1652, "step": 5926 }, { "epoch": 0.1590543151567196, "grad_norm": 0.287109375, "learning_rate": 0.0009541392727760632, "loss": 1.0093, "step": 5927 }, { "epoch": 0.15908115070845857, "grad_norm": 0.306640625, "learning_rate": 0.0009543002817657319, "loss": 1.1132, "step": 5928 }, { "epoch": 0.1591079862601975, "grad_norm": 0.322265625, "learning_rate": 0.0009544612907554004, "loss": 1.1392, "step": 5929 }, { "epoch": 0.15913482181193644, "grad_norm": 0.32421875, "learning_rate": 0.0009546222997450691, "loss": 1.1295, "step": 5930 }, { "epoch": 0.1591616573636754, "grad_norm": 0.271484375, "learning_rate": 0.0009547833087347375, "loss": 0.9838, "step": 5931 }, { "epoch": 0.15918849291541434, "grad_norm": 0.31640625, "learning_rate": 0.0009549443177244061, "loss": 1.0831, "step": 5932 }, { "epoch": 0.15921532846715328, "grad_norm": 0.29296875, "learning_rate": 0.0009551053267140748, "loss": 0.9894, "step": 5933 }, { "epoch": 0.15924216401889224, "grad_norm": 0.29296875, "learning_rate": 0.0009552663357037433, "loss": 1.0942, "step": 5934 }, { "epoch": 0.15926899957063118, "grad_norm": 0.29296875, "learning_rate": 0.000955427344693412, "loss": 0.971, "step": 5935 }, { "epoch": 0.1592958351223701, "grad_norm": 0.330078125, "learning_rate": 0.0009555883536830805, "loss": 1.2422, "step": 5936 }, { "epoch": 0.15932267067410905, "grad_norm": 0.283203125, "learning_rate": 0.0009557493626727491, "loss": 0.9099, "step": 5937 }, { "epoch": 0.159349506225848, "grad_norm": 0.271484375, "learning_rate": 0.0009559103716624178, "loss": 0.8856, "step": 5938 }, { "epoch": 0.15937634177758694, "grad_norm": 0.298828125, "learning_rate": 0.0009560713806520863, "loss": 1.054, "step": 5939 }, { "epoch": 0.15940317732932588, "grad_norm": 0.32421875, "learning_rate": 0.000956232389641755, "loss": 1.0935, "step": 5940 }, { "epoch": 0.15943001288106484, "grad_norm": 0.310546875, "learning_rate": 0.0009563933986314235, "loss": 1.0043, "step": 5941 }, { "epoch": 0.15945684843280378, "grad_norm": 0.294921875, "learning_rate": 0.0009565544076210921, "loss": 0.948, "step": 5942 }, { "epoch": 0.1594836839845427, "grad_norm": 0.296875, "learning_rate": 0.0009567154166107608, "loss": 1.0381, "step": 5943 }, { "epoch": 0.15951051953628168, "grad_norm": 0.322265625, "learning_rate": 0.0009568764256004293, "loss": 1.113, "step": 5944 }, { "epoch": 0.1595373550880206, "grad_norm": 0.31640625, "learning_rate": 0.000957037434590098, "loss": 0.9809, "step": 5945 }, { "epoch": 0.15956419063975955, "grad_norm": 0.322265625, "learning_rate": 0.0009571984435797665, "loss": 1.1632, "step": 5946 }, { "epoch": 0.1595910261914985, "grad_norm": 0.310546875, "learning_rate": 0.000957359452569435, "loss": 0.9557, "step": 5947 }, { "epoch": 0.15961786174323744, "grad_norm": 0.3125, "learning_rate": 0.0009575204615591036, "loss": 1.0646, "step": 5948 }, { "epoch": 0.15964469729497638, "grad_norm": 0.3203125, "learning_rate": 0.0009576814705487722, "loss": 1.127, "step": 5949 }, { "epoch": 0.15967153284671534, "grad_norm": 0.306640625, "learning_rate": 0.0009578424795384407, "loss": 1.1166, "step": 5950 }, { "epoch": 0.15969836839845428, "grad_norm": 0.306640625, "learning_rate": 0.0009580034885281094, "loss": 1.0241, "step": 5951 }, { "epoch": 0.1597252039501932, "grad_norm": 0.3046875, "learning_rate": 0.000958164497517778, "loss": 1.0236, "step": 5952 }, { "epoch": 0.15975203950193215, "grad_norm": 0.318359375, "learning_rate": 0.0009583255065074466, "loss": 1.0648, "step": 5953 }, { "epoch": 0.1597788750536711, "grad_norm": 0.3203125, "learning_rate": 0.0009584865154971152, "loss": 1.0916, "step": 5954 }, { "epoch": 0.15980571060541005, "grad_norm": 0.302734375, "learning_rate": 0.0009586475244867837, "loss": 1.0569, "step": 5955 }, { "epoch": 0.15983254615714898, "grad_norm": 0.314453125, "learning_rate": 0.0009588085334764524, "loss": 0.9923, "step": 5956 }, { "epoch": 0.15985938170888794, "grad_norm": 0.322265625, "learning_rate": 0.000958969542466121, "loss": 1.1045, "step": 5957 }, { "epoch": 0.15988621726062688, "grad_norm": 0.3125, "learning_rate": 0.0009591305514557895, "loss": 1.0894, "step": 5958 }, { "epoch": 0.1599130528123658, "grad_norm": 0.30078125, "learning_rate": 0.0009592915604454582, "loss": 0.9553, "step": 5959 }, { "epoch": 0.15993988836410478, "grad_norm": 0.3046875, "learning_rate": 0.0009594525694351267, "loss": 1.1128, "step": 5960 }, { "epoch": 0.1599667239158437, "grad_norm": 0.291015625, "learning_rate": 0.0009596135784247954, "loss": 1.0643, "step": 5961 }, { "epoch": 0.15999355946758265, "grad_norm": 0.322265625, "learning_rate": 0.0009597745874144638, "loss": 1.1243, "step": 5962 }, { "epoch": 0.1600203950193216, "grad_norm": 0.287109375, "learning_rate": 0.0009599355964041324, "loss": 0.9336, "step": 5963 }, { "epoch": 0.1600203950193216, "eval_loss": 3.5207624435424805, "eval_runtime": 587.1235, "eval_samples_per_second": 81.649, "eval_steps_per_second": 20.413, "step": 5963 }, { "epoch": 0.16004723057106054, "grad_norm": 0.291015625, "learning_rate": 0.000960096605393801, "loss": 0.8888, "step": 5964 }, { "epoch": 0.16007406612279948, "grad_norm": 0.287109375, "learning_rate": 0.0009602576143834696, "loss": 1.0097, "step": 5965 }, { "epoch": 0.16010090167453842, "grad_norm": 0.29296875, "learning_rate": 0.0009604186233731383, "loss": 1.1009, "step": 5966 }, { "epoch": 0.16012773722627738, "grad_norm": 0.30078125, "learning_rate": 0.0009605796323628068, "loss": 1.0623, "step": 5967 }, { "epoch": 0.1601545727780163, "grad_norm": 0.291015625, "learning_rate": 0.0009607406413524754, "loss": 1.0198, "step": 5968 }, { "epoch": 0.16018140832975525, "grad_norm": 0.287109375, "learning_rate": 0.000960901650342144, "loss": 0.9608, "step": 5969 }, { "epoch": 0.1602082438814942, "grad_norm": 0.322265625, "learning_rate": 0.0009610626593318126, "loss": 1.0648, "step": 5970 }, { "epoch": 0.16023507943323315, "grad_norm": 0.29296875, "learning_rate": 0.0009612236683214813, "loss": 0.9952, "step": 5971 }, { "epoch": 0.16026191498497208, "grad_norm": 0.296875, "learning_rate": 0.0009613846773111498, "loss": 1.0109, "step": 5972 }, { "epoch": 0.16028875053671104, "grad_norm": 0.298828125, "learning_rate": 0.0009615456863008184, "loss": 1.0462, "step": 5973 }, { "epoch": 0.16031558608844998, "grad_norm": 0.296875, "learning_rate": 0.000961706695290487, "loss": 0.9712, "step": 5974 }, { "epoch": 0.16034242164018891, "grad_norm": 0.296875, "learning_rate": 0.0009618677042801556, "loss": 0.938, "step": 5975 }, { "epoch": 0.16036925719192788, "grad_norm": 0.3046875, "learning_rate": 0.0009620287132698243, "loss": 1.0337, "step": 5976 }, { "epoch": 0.1603960927436668, "grad_norm": 0.310546875, "learning_rate": 0.0009621897222594928, "loss": 1.1431, "step": 5977 }, { "epoch": 0.16042292829540575, "grad_norm": 0.3125, "learning_rate": 0.0009623507312491612, "loss": 1.0509, "step": 5978 }, { "epoch": 0.1604497638471447, "grad_norm": 0.302734375, "learning_rate": 0.0009625117402388299, "loss": 1.0203, "step": 5979 }, { "epoch": 0.16047659939888365, "grad_norm": 0.322265625, "learning_rate": 0.0009626727492284985, "loss": 1.1413, "step": 5980 }, { "epoch": 0.16050343495062258, "grad_norm": 0.298828125, "learning_rate": 0.000962833758218167, "loss": 1.0043, "step": 5981 }, { "epoch": 0.16053027050236152, "grad_norm": 0.294921875, "learning_rate": 0.0009629947672078357, "loss": 1.0476, "step": 5982 }, { "epoch": 0.16055710605410048, "grad_norm": 0.3046875, "learning_rate": 0.0009631557761975042, "loss": 0.9442, "step": 5983 }, { "epoch": 0.16058394160583941, "grad_norm": 0.306640625, "learning_rate": 0.0009633167851871729, "loss": 1.1108, "step": 5984 }, { "epoch": 0.16061077715757835, "grad_norm": 0.306640625, "learning_rate": 0.0009634777941768415, "loss": 1.0594, "step": 5985 }, { "epoch": 0.1606376127093173, "grad_norm": 0.302734375, "learning_rate": 0.00096363880316651, "loss": 1.0204, "step": 5986 }, { "epoch": 0.16066444826105625, "grad_norm": 0.310546875, "learning_rate": 0.0009637998121561787, "loss": 1.088, "step": 5987 }, { "epoch": 0.16069128381279518, "grad_norm": 0.318359375, "learning_rate": 0.0009639608211458472, "loss": 1.1753, "step": 5988 }, { "epoch": 0.16071811936453415, "grad_norm": 0.28515625, "learning_rate": 0.0009641218301355159, "loss": 0.9778, "step": 5989 }, { "epoch": 0.16074495491627308, "grad_norm": 0.318359375, "learning_rate": 0.0009642828391251845, "loss": 1.0283, "step": 5990 }, { "epoch": 0.16077179046801202, "grad_norm": 0.30078125, "learning_rate": 0.000964443848114853, "loss": 1.1409, "step": 5991 }, { "epoch": 0.16079862601975098, "grad_norm": 0.287109375, "learning_rate": 0.0009646048571045217, "loss": 1.045, "step": 5992 }, { "epoch": 0.1608254615714899, "grad_norm": 0.294921875, "learning_rate": 0.0009647658660941901, "loss": 1.0656, "step": 5993 }, { "epoch": 0.16085229712322885, "grad_norm": 0.31640625, "learning_rate": 0.0009649268750838587, "loss": 1.1828, "step": 5994 }, { "epoch": 0.16087913267496778, "grad_norm": 0.3125, "learning_rate": 0.0009650878840735273, "loss": 1.1217, "step": 5995 }, { "epoch": 0.16090596822670675, "grad_norm": 0.306640625, "learning_rate": 0.0009652488930631959, "loss": 1.1791, "step": 5996 }, { "epoch": 0.16093280377844568, "grad_norm": 0.302734375, "learning_rate": 0.0009654099020528645, "loss": 1.1226, "step": 5997 }, { "epoch": 0.16095963933018462, "grad_norm": 0.30859375, "learning_rate": 0.0009655709110425331, "loss": 1.1224, "step": 5998 }, { "epoch": 0.16098647488192358, "grad_norm": 0.306640625, "learning_rate": 0.0009657319200322017, "loss": 1.0764, "step": 5999 }, { "epoch": 0.16101331043366252, "grad_norm": 0.30078125, "learning_rate": 0.0009658929290218703, "loss": 1.0114, "step": 6000 }, { "epoch": 0.16104014598540145, "grad_norm": 0.296875, "learning_rate": 0.0009660539380115389, "loss": 1.093, "step": 6001 }, { "epoch": 0.1610669815371404, "grad_norm": 0.302734375, "learning_rate": 0.0009662149470012074, "loss": 1.0849, "step": 6002 }, { "epoch": 0.16109381708887935, "grad_norm": 0.287109375, "learning_rate": 0.0009663759559908761, "loss": 0.9549, "step": 6003 }, { "epoch": 0.16112065264061828, "grad_norm": 0.31640625, "learning_rate": 0.0009665369649805447, "loss": 1.0716, "step": 6004 }, { "epoch": 0.16114748819235725, "grad_norm": 0.3046875, "learning_rate": 0.0009666979739702133, "loss": 1.0902, "step": 6005 }, { "epoch": 0.16117432374409618, "grad_norm": 0.296875, "learning_rate": 0.0009668589829598819, "loss": 1.0261, "step": 6006 }, { "epoch": 0.16120115929583512, "grad_norm": 0.3203125, "learning_rate": 0.0009670199919495504, "loss": 1.1732, "step": 6007 }, { "epoch": 0.16122799484757408, "grad_norm": 0.333984375, "learning_rate": 0.0009671810009392191, "loss": 1.0235, "step": 6008 }, { "epoch": 0.16125483039931301, "grad_norm": 0.326171875, "learning_rate": 0.0009673420099288875, "loss": 1.1649, "step": 6009 }, { "epoch": 0.16128166595105195, "grad_norm": 0.28515625, "learning_rate": 0.0009675030189185562, "loss": 0.9445, "step": 6010 }, { "epoch": 0.16130850150279089, "grad_norm": 0.30859375, "learning_rate": 0.0009676640279082248, "loss": 1.0535, "step": 6011 }, { "epoch": 0.16133533705452985, "grad_norm": 0.318359375, "learning_rate": 0.0009678250368978933, "loss": 1.0502, "step": 6012 }, { "epoch": 0.16136217260626878, "grad_norm": 0.310546875, "learning_rate": 0.000967986045887562, "loss": 1.0158, "step": 6013 }, { "epoch": 0.16138900815800772, "grad_norm": 0.318359375, "learning_rate": 0.0009681470548772305, "loss": 1.0828, "step": 6014 }, { "epoch": 0.16141584370974668, "grad_norm": 0.306640625, "learning_rate": 0.0009683080638668992, "loss": 1.0392, "step": 6015 }, { "epoch": 0.16144267926148562, "grad_norm": 0.2890625, "learning_rate": 0.0009684690728565677, "loss": 0.9825, "step": 6016 }, { "epoch": 0.16146951481322455, "grad_norm": 0.2890625, "learning_rate": 0.0009686300818462363, "loss": 0.9609, "step": 6017 }, { "epoch": 0.16149635036496351, "grad_norm": 0.28515625, "learning_rate": 0.000968791090835905, "loss": 0.9941, "step": 6018 }, { "epoch": 0.16152318591670245, "grad_norm": 0.298828125, "learning_rate": 0.0009689520998255735, "loss": 1.0479, "step": 6019 }, { "epoch": 0.16155002146844138, "grad_norm": 0.31640625, "learning_rate": 0.0009691131088152422, "loss": 1.1667, "step": 6020 }, { "epoch": 0.16157685702018035, "grad_norm": 0.296875, "learning_rate": 0.0009692741178049107, "loss": 1.0068, "step": 6021 }, { "epoch": 0.16160369257191928, "grad_norm": 0.328125, "learning_rate": 0.0009694351267945793, "loss": 0.9795, "step": 6022 }, { "epoch": 0.16163052812365822, "grad_norm": 0.294921875, "learning_rate": 0.000969596135784248, "loss": 0.9984, "step": 6023 }, { "epoch": 0.16165736367539715, "grad_norm": 0.294921875, "learning_rate": 0.0009697571447739164, "loss": 0.9567, "step": 6024 }, { "epoch": 0.16168419922713612, "grad_norm": 0.298828125, "learning_rate": 0.000969918153763585, "loss": 1.099, "step": 6025 }, { "epoch": 0.16171103477887505, "grad_norm": 0.32421875, "learning_rate": 0.0009700791627532536, "loss": 1.1823, "step": 6026 }, { "epoch": 0.161737870330614, "grad_norm": 0.32421875, "learning_rate": 0.0009702401717429222, "loss": 1.1649, "step": 6027 }, { "epoch": 0.16176470588235295, "grad_norm": 0.294921875, "learning_rate": 0.0009704011807325908, "loss": 0.9701, "step": 6028 }, { "epoch": 0.16179154143409188, "grad_norm": 0.3046875, "learning_rate": 0.0009705621897222594, "loss": 1.0803, "step": 6029 }, { "epoch": 0.16181837698583082, "grad_norm": 0.294921875, "learning_rate": 0.000970723198711928, "loss": 0.9686, "step": 6030 }, { "epoch": 0.16184521253756978, "grad_norm": 0.298828125, "learning_rate": 0.0009708842077015966, "loss": 1.0102, "step": 6031 }, { "epoch": 0.16187204808930872, "grad_norm": 0.27734375, "learning_rate": 0.0009710452166912652, "loss": 0.9428, "step": 6032 }, { "epoch": 0.16189888364104765, "grad_norm": 0.314453125, "learning_rate": 0.0009712062256809338, "loss": 1.1577, "step": 6033 }, { "epoch": 0.16192571919278662, "grad_norm": 0.2890625, "learning_rate": 0.0009713672346706024, "loss": 0.9917, "step": 6034 }, { "epoch": 0.16195255474452555, "grad_norm": 0.30078125, "learning_rate": 0.0009715282436602709, "loss": 1.0665, "step": 6035 }, { "epoch": 0.16197939029626449, "grad_norm": 0.310546875, "learning_rate": 0.0009716892526499396, "loss": 1.0813, "step": 6036 }, { "epoch": 0.16200622584800342, "grad_norm": 0.298828125, "learning_rate": 0.0009718502616396082, "loss": 1.0512, "step": 6037 }, { "epoch": 0.16203306139974238, "grad_norm": 0.31640625, "learning_rate": 0.0009720112706292767, "loss": 1.1686, "step": 6038 }, { "epoch": 0.16205989695148132, "grad_norm": 0.291015625, "learning_rate": 0.0009721722796189454, "loss": 1.0966, "step": 6039 }, { "epoch": 0.16208673250322025, "grad_norm": 0.30859375, "learning_rate": 0.0009723332886086138, "loss": 1.0446, "step": 6040 }, { "epoch": 0.16211356805495922, "grad_norm": 0.318359375, "learning_rate": 0.0009724942975982825, "loss": 1.1414, "step": 6041 }, { "epoch": 0.16214040360669815, "grad_norm": 0.298828125, "learning_rate": 0.000972655306587951, "loss": 1.022, "step": 6042 }, { "epoch": 0.1621672391584371, "grad_norm": 0.29296875, "learning_rate": 0.0009728163155776196, "loss": 0.9879, "step": 6043 }, { "epoch": 0.16219407471017605, "grad_norm": 0.3046875, "learning_rate": 0.0009729773245672883, "loss": 0.995, "step": 6044 }, { "epoch": 0.16222091026191499, "grad_norm": 0.3125, "learning_rate": 0.0009731383335569568, "loss": 1.1113, "step": 6045 }, { "epoch": 0.16224774581365392, "grad_norm": 0.302734375, "learning_rate": 0.0009732993425466255, "loss": 1.0412, "step": 6046 }, { "epoch": 0.16227458136539288, "grad_norm": 0.298828125, "learning_rate": 0.000973460351536294, "loss": 1.0041, "step": 6047 }, { "epoch": 0.16230141691713182, "grad_norm": 0.29296875, "learning_rate": 0.0009736213605259626, "loss": 0.9069, "step": 6048 }, { "epoch": 0.16232825246887075, "grad_norm": 0.294921875, "learning_rate": 0.0009737823695156312, "loss": 1.0029, "step": 6049 }, { "epoch": 0.16235508802060972, "grad_norm": 0.3203125, "learning_rate": 0.0009739433785052998, "loss": 1.1444, "step": 6050 }, { "epoch": 0.16238192357234865, "grad_norm": 0.3125, "learning_rate": 0.0009741043874949685, "loss": 1.0846, "step": 6051 }, { "epoch": 0.1624087591240876, "grad_norm": 0.2890625, "learning_rate": 0.000974265396484637, "loss": 0.9823, "step": 6052 }, { "epoch": 0.16243559467582652, "grad_norm": 0.31640625, "learning_rate": 0.0009744264054743056, "loss": 1.1614, "step": 6053 }, { "epoch": 0.16246243022756549, "grad_norm": 0.3046875, "learning_rate": 0.0009745874144639742, "loss": 1.0741, "step": 6054 }, { "epoch": 0.16248926577930442, "grad_norm": 0.302734375, "learning_rate": 0.0009747484234536427, "loss": 0.9717, "step": 6055 }, { "epoch": 0.16251610133104336, "grad_norm": 0.318359375, "learning_rate": 0.0009749094324433112, "loss": 1.1028, "step": 6056 }, { "epoch": 0.16254293688278232, "grad_norm": 0.30078125, "learning_rate": 0.0009750704414329799, "loss": 1.0356, "step": 6057 }, { "epoch": 0.16256977243452125, "grad_norm": 0.310546875, "learning_rate": 0.0009752314504226485, "loss": 1.0509, "step": 6058 }, { "epoch": 0.1625966079862602, "grad_norm": 0.28515625, "learning_rate": 0.0009753924594123171, "loss": 0.9917, "step": 6059 }, { "epoch": 0.16262344353799915, "grad_norm": 0.29296875, "learning_rate": 0.0009755534684019857, "loss": 0.9908, "step": 6060 }, { "epoch": 0.1626502790897381, "grad_norm": 0.322265625, "learning_rate": 0.0009757144773916542, "loss": 1.1403, "step": 6061 }, { "epoch": 0.16267711464147702, "grad_norm": 0.275390625, "learning_rate": 0.0009758754863813229, "loss": 0.9207, "step": 6062 }, { "epoch": 0.16270395019321598, "grad_norm": 0.330078125, "learning_rate": 0.0009760364953709915, "loss": 1.1386, "step": 6063 }, { "epoch": 0.16273078574495492, "grad_norm": 0.310546875, "learning_rate": 0.00097619750436066, "loss": 1.0564, "step": 6064 }, { "epoch": 0.16275762129669386, "grad_norm": 0.287109375, "learning_rate": 0.0009763585133503287, "loss": 0.9782, "step": 6065 }, { "epoch": 0.1627844568484328, "grad_norm": 0.30078125, "learning_rate": 0.0009765195223399972, "loss": 1.0729, "step": 6066 }, { "epoch": 0.16281129240017175, "grad_norm": 0.32421875, "learning_rate": 0.0009766805313296658, "loss": 1.1771, "step": 6067 }, { "epoch": 0.1628381279519107, "grad_norm": 0.310546875, "learning_rate": 0.0009768415403193344, "loss": 1.1023, "step": 6068 }, { "epoch": 0.16286496350364962, "grad_norm": 0.318359375, "learning_rate": 0.000977002549309003, "loss": 1.0691, "step": 6069 }, { "epoch": 0.1628917990553886, "grad_norm": 0.302734375, "learning_rate": 0.0009771635582986717, "loss": 1.0644, "step": 6070 }, { "epoch": 0.16291863460712752, "grad_norm": 0.287109375, "learning_rate": 0.0009773245672883401, "loss": 0.9659, "step": 6071 }, { "epoch": 0.16294547015886646, "grad_norm": 0.291015625, "learning_rate": 0.0009774855762780088, "loss": 1.0335, "step": 6072 }, { "epoch": 0.16297230571060542, "grad_norm": 0.333984375, "learning_rate": 0.0009776465852676774, "loss": 1.112, "step": 6073 }, { "epoch": 0.16299914126234435, "grad_norm": 0.302734375, "learning_rate": 0.0009778075942573458, "loss": 1.0289, "step": 6074 }, { "epoch": 0.1630259768140833, "grad_norm": 0.310546875, "learning_rate": 0.0009779686032470145, "loss": 1.1513, "step": 6075 }, { "epoch": 0.16305281236582225, "grad_norm": 0.306640625, "learning_rate": 0.0009781296122366831, "loss": 1.0483, "step": 6076 }, { "epoch": 0.1630796479175612, "grad_norm": 0.298828125, "learning_rate": 0.0009782906212263518, "loss": 1.0936, "step": 6077 }, { "epoch": 0.16310648346930012, "grad_norm": 0.298828125, "learning_rate": 0.0009784516302160204, "loss": 1.0289, "step": 6078 }, { "epoch": 0.16313331902103909, "grad_norm": 0.30078125, "learning_rate": 0.0009786126392056888, "loss": 1.0772, "step": 6079 }, { "epoch": 0.16316015457277802, "grad_norm": 0.423828125, "learning_rate": 0.0009787736481953575, "loss": 1.0309, "step": 6080 }, { "epoch": 0.16318699012451696, "grad_norm": 0.2890625, "learning_rate": 0.0009789346571850261, "loss": 1.0042, "step": 6081 }, { "epoch": 0.1632138256762559, "grad_norm": 0.3203125, "learning_rate": 0.0009790956661746948, "loss": 1.1974, "step": 6082 }, { "epoch": 0.16324066122799485, "grad_norm": 0.296875, "learning_rate": 0.0009792566751643634, "loss": 1.0444, "step": 6083 }, { "epoch": 0.1632674967797338, "grad_norm": 0.29296875, "learning_rate": 0.0009794176841540318, "loss": 1.0478, "step": 6084 }, { "epoch": 0.16329433233147272, "grad_norm": 0.322265625, "learning_rate": 0.0009795786931437005, "loss": 1.1133, "step": 6085 }, { "epoch": 0.1633211678832117, "grad_norm": 0.2890625, "learning_rate": 0.000979739702133369, "loss": 0.9567, "step": 6086 }, { "epoch": 0.16334800343495062, "grad_norm": 0.3046875, "learning_rate": 0.0009799007111230375, "loss": 1.0492, "step": 6087 }, { "epoch": 0.16337483898668956, "grad_norm": 0.322265625, "learning_rate": 0.0009800617201127062, "loss": 1.1535, "step": 6088 }, { "epoch": 0.16340167453842852, "grad_norm": 0.3203125, "learning_rate": 0.0009802227291023748, "loss": 1.1105, "step": 6089 }, { "epoch": 0.16342851009016746, "grad_norm": 0.287109375, "learning_rate": 0.0009803837380920435, "loss": 0.9088, "step": 6090 }, { "epoch": 0.1634553456419064, "grad_norm": 0.294921875, "learning_rate": 0.000980544747081712, "loss": 1.048, "step": 6091 }, { "epoch": 0.16348218119364535, "grad_norm": 0.302734375, "learning_rate": 0.0009807057560713805, "loss": 1.0579, "step": 6092 }, { "epoch": 0.1635090167453843, "grad_norm": 0.333984375, "learning_rate": 0.0009808667650610492, "loss": 1.1558, "step": 6093 }, { "epoch": 0.16353585229712322, "grad_norm": 0.30859375, "learning_rate": 0.0009810277740507178, "loss": 1.0828, "step": 6094 }, { "epoch": 0.16356268784886216, "grad_norm": 0.306640625, "learning_rate": 0.0009811887830403862, "loss": 1.0778, "step": 6095 }, { "epoch": 0.16358952340060112, "grad_norm": 0.291015625, "learning_rate": 0.0009813497920300549, "loss": 1.0776, "step": 6096 }, { "epoch": 0.16361635895234006, "grad_norm": 0.310546875, "learning_rate": 0.0009815108010197235, "loss": 1.1169, "step": 6097 }, { "epoch": 0.163643194504079, "grad_norm": 0.298828125, "learning_rate": 0.0009816718100093922, "loss": 1.0213, "step": 6098 }, { "epoch": 0.16367003005581796, "grad_norm": 0.31640625, "learning_rate": 0.0009818328189990608, "loss": 1.0594, "step": 6099 }, { "epoch": 0.1636968656075569, "grad_norm": 0.3203125, "learning_rate": 0.0009819938279887292, "loss": 1.1063, "step": 6100 }, { "epoch": 0.16372370115929583, "grad_norm": 0.3046875, "learning_rate": 0.0009821548369783979, "loss": 1.0446, "step": 6101 }, { "epoch": 0.1637505367110348, "grad_norm": 0.328125, "learning_rate": 0.0009823158459680663, "loss": 1.1628, "step": 6102 }, { "epoch": 0.16377737226277372, "grad_norm": 0.294921875, "learning_rate": 0.000982476854957735, "loss": 0.9642, "step": 6103 }, { "epoch": 0.16380420781451266, "grad_norm": 0.330078125, "learning_rate": 0.0009826378639474036, "loss": 1.1259, "step": 6104 }, { "epoch": 0.16383104336625162, "grad_norm": 0.298828125, "learning_rate": 0.0009827988729370722, "loss": 1.0028, "step": 6105 }, { "epoch": 0.16385787891799056, "grad_norm": 0.30859375, "learning_rate": 0.0009829598819267409, "loss": 1.0598, "step": 6106 }, { "epoch": 0.1638847144697295, "grad_norm": 0.3046875, "learning_rate": 0.0009831208909164093, "loss": 1.0582, "step": 6107 }, { "epoch": 0.16391155002146846, "grad_norm": 0.298828125, "learning_rate": 0.000983281899906078, "loss": 0.9509, "step": 6108 }, { "epoch": 0.1639383855732074, "grad_norm": 0.306640625, "learning_rate": 0.0009834429088957466, "loss": 1.0456, "step": 6109 }, { "epoch": 0.16396522112494633, "grad_norm": 0.302734375, "learning_rate": 0.0009836039178854152, "loss": 1.0544, "step": 6110 }, { "epoch": 0.16399205667668526, "grad_norm": 0.296875, "learning_rate": 0.0009837649268750839, "loss": 1.0421, "step": 6111 }, { "epoch": 0.16401889222842422, "grad_norm": 0.30859375, "learning_rate": 0.0009839259358647523, "loss": 1.1332, "step": 6112 }, { "epoch": 0.16404572778016316, "grad_norm": 0.302734375, "learning_rate": 0.000984086944854421, "loss": 1.0912, "step": 6113 }, { "epoch": 0.1640725633319021, "grad_norm": 0.298828125, "learning_rate": 0.0009842479538440896, "loss": 1.0366, "step": 6114 }, { "epoch": 0.16409939888364106, "grad_norm": 0.3203125, "learning_rate": 0.0009844089628337582, "loss": 1.128, "step": 6115 }, { "epoch": 0.16412623443538, "grad_norm": 0.314453125, "learning_rate": 0.0009845699718234269, "loss": 1.1246, "step": 6116 }, { "epoch": 0.16415306998711893, "grad_norm": 0.298828125, "learning_rate": 0.0009847309808130953, "loss": 0.9404, "step": 6117 }, { "epoch": 0.1641799055388579, "grad_norm": 0.3125, "learning_rate": 0.000984891989802764, "loss": 1.0628, "step": 6118 }, { "epoch": 0.16420674109059682, "grad_norm": 0.30859375, "learning_rate": 0.0009850529987924324, "loss": 1.1115, "step": 6119 }, { "epoch": 0.16423357664233576, "grad_norm": 0.310546875, "learning_rate": 0.000985214007782101, "loss": 1.0764, "step": 6120 }, { "epoch": 0.16426041219407472, "grad_norm": 0.298828125, "learning_rate": 0.0009853750167717697, "loss": 0.9848, "step": 6121 }, { "epoch": 0.16428724774581366, "grad_norm": 0.291015625, "learning_rate": 0.0009855360257614383, "loss": 0.996, "step": 6122 }, { "epoch": 0.1643140832975526, "grad_norm": 0.3203125, "learning_rate": 0.000985697034751107, "loss": 1.0957, "step": 6123 }, { "epoch": 0.16434091884929153, "grad_norm": 0.28125, "learning_rate": 0.0009858580437407754, "loss": 0.9479, "step": 6124 }, { "epoch": 0.1643677544010305, "grad_norm": 0.294921875, "learning_rate": 0.000986019052730444, "loss": 0.9597, "step": 6125 }, { "epoch": 0.16439458995276943, "grad_norm": 0.318359375, "learning_rate": 0.0009861800617201127, "loss": 1.1143, "step": 6126 }, { "epoch": 0.16442142550450836, "grad_norm": 0.296875, "learning_rate": 0.0009863410707097813, "loss": 0.9966, "step": 6127 }, { "epoch": 0.16444826105624732, "grad_norm": 0.294921875, "learning_rate": 0.0009865020796994497, "loss": 1.0514, "step": 6128 }, { "epoch": 0.16447509660798626, "grad_norm": 0.322265625, "learning_rate": 0.0009866630886891184, "loss": 1.1625, "step": 6129 }, { "epoch": 0.1645019321597252, "grad_norm": 0.427734375, "learning_rate": 0.000986824097678787, "loss": 1.3193, "step": 6130 }, { "epoch": 0.16452876771146416, "grad_norm": 0.37109375, "learning_rate": 0.0009869851066684557, "loss": 1.204, "step": 6131 }, { "epoch": 0.1645556032632031, "grad_norm": 0.361328125, "learning_rate": 0.000987146115658124, "loss": 1.1306, "step": 6132 }, { "epoch": 0.16458243881494203, "grad_norm": 0.37109375, "learning_rate": 0.0009873071246477927, "loss": 1.1979, "step": 6133 }, { "epoch": 0.164609274366681, "grad_norm": 0.384765625, "learning_rate": 0.0009874681336374614, "loss": 1.2493, "step": 6134 }, { "epoch": 0.16463610991841993, "grad_norm": 0.310546875, "learning_rate": 0.0009876291426271298, "loss": 1.1629, "step": 6135 }, { "epoch": 0.16466294547015886, "grad_norm": 0.3203125, "learning_rate": 0.0009877901516167984, "loss": 1.0943, "step": 6136 }, { "epoch": 0.1646897810218978, "grad_norm": 0.34765625, "learning_rate": 0.000987951160606467, "loss": 1.1812, "step": 6137 }, { "epoch": 0.16471661657363676, "grad_norm": 0.33984375, "learning_rate": 0.0009881121695961357, "loss": 1.1779, "step": 6138 }, { "epoch": 0.1647434521253757, "grad_norm": 0.32421875, "learning_rate": 0.0009882731785858044, "loss": 1.1542, "step": 6139 }, { "epoch": 0.16477028767711463, "grad_norm": 0.306640625, "learning_rate": 0.0009884341875754728, "loss": 1.0811, "step": 6140 }, { "epoch": 0.1647971232288536, "grad_norm": 0.33203125, "learning_rate": 0.0009885951965651414, "loss": 1.2103, "step": 6141 }, { "epoch": 0.16482395878059253, "grad_norm": 0.32421875, "learning_rate": 0.00098875620555481, "loss": 1.1048, "step": 6142 }, { "epoch": 0.16485079433233146, "grad_norm": 0.33984375, "learning_rate": 0.0009889172145444787, "loss": 1.1577, "step": 6143 }, { "epoch": 0.16487762988407043, "grad_norm": 0.322265625, "learning_rate": 0.0009890782235341474, "loss": 1.1083, "step": 6144 }, { "epoch": 0.16490446543580936, "grad_norm": 0.333984375, "learning_rate": 0.0009892392325238158, "loss": 1.1447, "step": 6145 }, { "epoch": 0.1649313009875483, "grad_norm": 0.3203125, "learning_rate": 0.0009894002415134844, "loss": 1.1593, "step": 6146 }, { "epoch": 0.16495813653928726, "grad_norm": 0.3125, "learning_rate": 0.000989561250503153, "loss": 1.0508, "step": 6147 }, { "epoch": 0.1649849720910262, "grad_norm": 0.326171875, "learning_rate": 0.0009897222594928215, "loss": 1.1517, "step": 6148 }, { "epoch": 0.16501180764276513, "grad_norm": 0.33984375, "learning_rate": 0.0009898832684824901, "loss": 1.2209, "step": 6149 }, { "epoch": 0.1650386431945041, "grad_norm": 0.3125, "learning_rate": 0.0009900442774721588, "loss": 1.0889, "step": 6150 }, { "epoch": 0.16506547874624303, "grad_norm": 0.3203125, "learning_rate": 0.0009902052864618274, "loss": 1.1817, "step": 6151 }, { "epoch": 0.16509231429798196, "grad_norm": 0.298828125, "learning_rate": 0.0009903662954514959, "loss": 1.0622, "step": 6152 }, { "epoch": 0.1651191498497209, "grad_norm": 0.29296875, "learning_rate": 0.0009905273044411645, "loss": 0.9562, "step": 6153 }, { "epoch": 0.16514598540145986, "grad_norm": 0.326171875, "learning_rate": 0.0009906883134308331, "loss": 1.2269, "step": 6154 }, { "epoch": 0.1651728209531988, "grad_norm": 0.322265625, "learning_rate": 0.0009908493224205018, "loss": 1.155, "step": 6155 }, { "epoch": 0.16519965650493773, "grad_norm": 0.31640625, "learning_rate": 0.0009910103314101704, "loss": 1.2176, "step": 6156 }, { "epoch": 0.1652264920566767, "grad_norm": 0.3046875, "learning_rate": 0.0009911713403998389, "loss": 1.0892, "step": 6157 }, { "epoch": 0.16525332760841563, "grad_norm": 0.296875, "learning_rate": 0.0009913323493895075, "loss": 1.0553, "step": 6158 }, { "epoch": 0.16528016316015456, "grad_norm": 0.3046875, "learning_rate": 0.0009914933583791761, "loss": 1.041, "step": 6159 }, { "epoch": 0.16530699871189353, "grad_norm": 0.3125, "learning_rate": 0.0009916543673688448, "loss": 1.0595, "step": 6160 }, { "epoch": 0.16533383426363246, "grad_norm": 0.283203125, "learning_rate": 0.0009918153763585134, "loss": 0.9722, "step": 6161 }, { "epoch": 0.1653606698153714, "grad_norm": 0.330078125, "learning_rate": 0.0009919763853481818, "loss": 1.269, "step": 6162 }, { "epoch": 0.16538750536711036, "grad_norm": 0.322265625, "learning_rate": 0.0009921373943378503, "loss": 1.2342, "step": 6163 }, { "epoch": 0.1654143409188493, "grad_norm": 0.310546875, "learning_rate": 0.000992298403327519, "loss": 1.1563, "step": 6164 }, { "epoch": 0.16544117647058823, "grad_norm": 0.32421875, "learning_rate": 0.0009924594123171876, "loss": 1.2069, "step": 6165 }, { "epoch": 0.16546801202232717, "grad_norm": 0.30078125, "learning_rate": 0.0009926204213068562, "loss": 1.0493, "step": 6166 }, { "epoch": 0.16549484757406613, "grad_norm": 0.3125, "learning_rate": 0.0009927814302965248, "loss": 1.1785, "step": 6167 }, { "epoch": 0.16552168312580506, "grad_norm": 0.296875, "learning_rate": 0.0009929424392861933, "loss": 1.0431, "step": 6168 }, { "epoch": 0.165548518677544, "grad_norm": 0.3359375, "learning_rate": 0.000993103448275862, "loss": 1.22, "step": 6169 }, { "epoch": 0.16557535422928296, "grad_norm": 0.287109375, "learning_rate": 0.0009932644572655306, "loss": 1.0562, "step": 6170 }, { "epoch": 0.1656021897810219, "grad_norm": 0.306640625, "learning_rate": 0.0009934254662551992, "loss": 1.0711, "step": 6171 }, { "epoch": 0.16562902533276083, "grad_norm": 0.32421875, "learning_rate": 0.0009935864752448678, "loss": 1.168, "step": 6172 }, { "epoch": 0.1656558608844998, "grad_norm": 0.296875, "learning_rate": 0.0009937474842345363, "loss": 1.0026, "step": 6173 }, { "epoch": 0.16568269643623873, "grad_norm": 0.306640625, "learning_rate": 0.000993908493224205, "loss": 1.0825, "step": 6174 }, { "epoch": 0.16570953198797767, "grad_norm": 0.3046875, "learning_rate": 0.0009940695022138736, "loss": 1.1095, "step": 6175 }, { "epoch": 0.16573636753971663, "grad_norm": 0.318359375, "learning_rate": 0.0009942305112035422, "loss": 1.1719, "step": 6176 }, { "epoch": 0.16576320309145556, "grad_norm": 0.298828125, "learning_rate": 0.0009943915201932108, "loss": 1.0818, "step": 6177 }, { "epoch": 0.1657900386431945, "grad_norm": 0.294921875, "learning_rate": 0.0009945525291828793, "loss": 1.0789, "step": 6178 }, { "epoch": 0.16581687419493346, "grad_norm": 0.32421875, "learning_rate": 0.000994713538172548, "loss": 1.1982, "step": 6179 }, { "epoch": 0.1658437097466724, "grad_norm": 0.296875, "learning_rate": 0.0009948745471622163, "loss": 1.0206, "step": 6180 }, { "epoch": 0.16587054529841133, "grad_norm": 0.30859375, "learning_rate": 0.000995035556151885, "loss": 1.0811, "step": 6181 }, { "epoch": 0.16589738085015027, "grad_norm": 0.30859375, "learning_rate": 0.0009951965651415536, "loss": 1.0794, "step": 6182 }, { "epoch": 0.16592421640188923, "grad_norm": 0.3203125, "learning_rate": 0.0009953575741312223, "loss": 1.2183, "step": 6183 }, { "epoch": 0.16595105195362816, "grad_norm": 0.310546875, "learning_rate": 0.000995518583120891, "loss": 1.134, "step": 6184 }, { "epoch": 0.1659778875053671, "grad_norm": 0.314453125, "learning_rate": 0.0009956795921105593, "loss": 1.0981, "step": 6185 }, { "epoch": 0.16600472305710606, "grad_norm": 0.310546875, "learning_rate": 0.000995840601100228, "loss": 1.0612, "step": 6186 }, { "epoch": 0.166031558608845, "grad_norm": 0.318359375, "learning_rate": 0.0009960016100898966, "loss": 1.1673, "step": 6187 }, { "epoch": 0.16605839416058393, "grad_norm": 0.3046875, "learning_rate": 0.0009961626190795653, "loss": 1.103, "step": 6188 }, { "epoch": 0.1660852297123229, "grad_norm": 0.3125, "learning_rate": 0.000996323628069234, "loss": 1.1131, "step": 6189 }, { "epoch": 0.16611206526406183, "grad_norm": 0.3359375, "learning_rate": 0.0009964846370589023, "loss": 1.1719, "step": 6190 }, { "epoch": 0.16613890081580077, "grad_norm": 0.31640625, "learning_rate": 0.000996645646048571, "loss": 1.1549, "step": 6191 }, { "epoch": 0.16616573636753973, "grad_norm": 0.306640625, "learning_rate": 0.0009968066550382396, "loss": 1.0918, "step": 6192 }, { "epoch": 0.16619257191927866, "grad_norm": 0.2890625, "learning_rate": 0.0009969676640279083, "loss": 1.1006, "step": 6193 }, { "epoch": 0.1662194074710176, "grad_norm": 0.322265625, "learning_rate": 0.0009971286730175767, "loss": 1.1469, "step": 6194 }, { "epoch": 0.16624624302275653, "grad_norm": 0.3125, "learning_rate": 0.0009972896820072453, "loss": 1.0598, "step": 6195 }, { "epoch": 0.1662730785744955, "grad_norm": 0.291015625, "learning_rate": 0.0009974506909969138, "loss": 1.0582, "step": 6196 }, { "epoch": 0.16629991412623443, "grad_norm": 0.306640625, "learning_rate": 0.0009976116999865824, "loss": 1.0902, "step": 6197 }, { "epoch": 0.16632674967797337, "grad_norm": 0.30859375, "learning_rate": 0.000997772708976251, "loss": 1.117, "step": 6198 }, { "epoch": 0.16635358522971233, "grad_norm": 0.30078125, "learning_rate": 0.0009979337179659197, "loss": 1.1272, "step": 6199 }, { "epoch": 0.16638042078145127, "grad_norm": 0.296875, "learning_rate": 0.0009980947269555883, "loss": 1.0165, "step": 6200 }, { "epoch": 0.1664072563331902, "grad_norm": 0.3203125, "learning_rate": 0.0009982557359452568, "loss": 1.1922, "step": 6201 }, { "epoch": 0.16643409188492916, "grad_norm": 0.296875, "learning_rate": 0.0009984167449349254, "loss": 1.0253, "step": 6202 }, { "epoch": 0.1664609274366681, "grad_norm": 0.328125, "learning_rate": 0.000998577753924594, "loss": 1.2262, "step": 6203 }, { "epoch": 0.16648776298840703, "grad_norm": 0.3125, "learning_rate": 0.0009987387629142627, "loss": 1.0738, "step": 6204 }, { "epoch": 0.166514598540146, "grad_norm": 0.30859375, "learning_rate": 0.0009988997719039313, "loss": 1.1686, "step": 6205 }, { "epoch": 0.16654143409188493, "grad_norm": 0.310546875, "learning_rate": 0.0009990607808935997, "loss": 1.1121, "step": 6206 }, { "epoch": 0.16656826964362387, "grad_norm": 0.302734375, "learning_rate": 0.0009992217898832684, "loss": 1.0619, "step": 6207 }, { "epoch": 0.16659510519536283, "grad_norm": 0.29296875, "learning_rate": 0.000999382798872937, "loss": 1.0089, "step": 6208 }, { "epoch": 0.16662194074710177, "grad_norm": 0.306640625, "learning_rate": 0.0009995438078626057, "loss": 1.1038, "step": 6209 }, { "epoch": 0.1666487762988407, "grad_norm": 0.310546875, "learning_rate": 0.000999704816852274, "loss": 1.1262, "step": 6210 }, { "epoch": 0.16667561185057964, "grad_norm": 0.318359375, "learning_rate": 0.0009998658258419427, "loss": 1.1641, "step": 6211 }, { "epoch": 0.1667024474023186, "grad_norm": 0.31640625, "learning_rate": 0.0010000268348316114, "loss": 1.2202, "step": 6212 }, { "epoch": 0.16672928295405753, "grad_norm": 0.287109375, "learning_rate": 0.0010001878438212798, "loss": 1.0249, "step": 6213 }, { "epoch": 0.16675611850579647, "grad_norm": 0.287109375, "learning_rate": 0.0010003488528109485, "loss": 0.9857, "step": 6214 }, { "epoch": 0.16678295405753543, "grad_norm": 0.310546875, "learning_rate": 0.001000509861800617, "loss": 1.1794, "step": 6215 }, { "epoch": 0.16680978960927437, "grad_norm": 0.322265625, "learning_rate": 0.0010006708707902857, "loss": 1.2196, "step": 6216 }, { "epoch": 0.1668366251610133, "grad_norm": 0.322265625, "learning_rate": 0.0010008318797799544, "loss": 1.2172, "step": 6217 }, { "epoch": 0.16686346071275227, "grad_norm": 0.30859375, "learning_rate": 0.0010009928887696228, "loss": 1.1678, "step": 6218 }, { "epoch": 0.1668902962644912, "grad_norm": 0.3046875, "learning_rate": 0.0010011538977592915, "loss": 1.1595, "step": 6219 }, { "epoch": 0.16691713181623014, "grad_norm": 0.314453125, "learning_rate": 0.00100131490674896, "loss": 1.1216, "step": 6220 }, { "epoch": 0.1669439673679691, "grad_norm": 0.314453125, "learning_rate": 0.0010014759157386287, "loss": 1.1428, "step": 6221 }, { "epoch": 0.16697080291970803, "grad_norm": 0.298828125, "learning_rate": 0.0010016369247282974, "loss": 1.1067, "step": 6222 }, { "epoch": 0.16699763847144697, "grad_norm": 0.3203125, "learning_rate": 0.0010017979337179658, "loss": 1.1206, "step": 6223 }, { "epoch": 0.1670244740231859, "grad_norm": 0.310546875, "learning_rate": 0.0010019589427076345, "loss": 1.1465, "step": 6224 }, { "epoch": 0.16705130957492487, "grad_norm": 0.302734375, "learning_rate": 0.0010021199516973029, "loss": 1.1344, "step": 6225 }, { "epoch": 0.1670781451266638, "grad_norm": 0.3203125, "learning_rate": 0.0010022809606869715, "loss": 1.1978, "step": 6226 }, { "epoch": 0.16710498067840274, "grad_norm": 0.3125, "learning_rate": 0.0010024419696766402, "loss": 1.1637, "step": 6227 }, { "epoch": 0.1671318162301417, "grad_norm": 0.302734375, "learning_rate": 0.0010026029786663088, "loss": 1.0992, "step": 6228 }, { "epoch": 0.16715865178188064, "grad_norm": 0.314453125, "learning_rate": 0.0010027639876559772, "loss": 1.1868, "step": 6229 }, { "epoch": 0.16718548733361957, "grad_norm": 0.306640625, "learning_rate": 0.0010029249966456459, "loss": 1.0871, "step": 6230 }, { "epoch": 0.16721232288535853, "grad_norm": 0.3125, "learning_rate": 0.0010030860056353145, "loss": 1.0555, "step": 6231 }, { "epoch": 0.16723915843709747, "grad_norm": 0.318359375, "learning_rate": 0.0010032470146249832, "loss": 1.1465, "step": 6232 }, { "epoch": 0.1672659939888364, "grad_norm": 0.310546875, "learning_rate": 0.0010034080236146518, "loss": 1.1391, "step": 6233 }, { "epoch": 0.16729282954057537, "grad_norm": 0.310546875, "learning_rate": 0.0010035690326043202, "loss": 1.0432, "step": 6234 }, { "epoch": 0.1673196650923143, "grad_norm": 0.314453125, "learning_rate": 0.0010037300415939889, "loss": 1.1192, "step": 6235 }, { "epoch": 0.16734650064405324, "grad_norm": 0.3203125, "learning_rate": 0.0010038910505836575, "loss": 1.1779, "step": 6236 }, { "epoch": 0.16737333619579217, "grad_norm": 0.28515625, "learning_rate": 0.0010040520595733262, "loss": 1.0317, "step": 6237 }, { "epoch": 0.16740017174753113, "grad_norm": 0.322265625, "learning_rate": 0.0010042130685629948, "loss": 1.1494, "step": 6238 }, { "epoch": 0.16742700729927007, "grad_norm": 0.28125, "learning_rate": 0.0010043740775526632, "loss": 0.9686, "step": 6239 }, { "epoch": 0.167453842851009, "grad_norm": 0.3125, "learning_rate": 0.0010045350865423319, "loss": 1.1245, "step": 6240 }, { "epoch": 0.16748067840274797, "grad_norm": 0.3203125, "learning_rate": 0.0010046960955320003, "loss": 1.0889, "step": 6241 }, { "epoch": 0.1675075139544869, "grad_norm": 0.294921875, "learning_rate": 0.001004857104521669, "loss": 1.0254, "step": 6242 }, { "epoch": 0.16753434950622584, "grad_norm": 0.32421875, "learning_rate": 0.0010050181135113376, "loss": 1.1429, "step": 6243 }, { "epoch": 0.1675611850579648, "grad_norm": 0.28515625, "learning_rate": 0.0010051791225010062, "loss": 1.0413, "step": 6244 }, { "epoch": 0.16758802060970374, "grad_norm": 0.322265625, "learning_rate": 0.0010053401314906749, "loss": 1.0855, "step": 6245 }, { "epoch": 0.16761485616144267, "grad_norm": 0.302734375, "learning_rate": 0.0010055011404803433, "loss": 1.0744, "step": 6246 }, { "epoch": 0.16764169171318163, "grad_norm": 0.310546875, "learning_rate": 0.001005662149470012, "loss": 1.064, "step": 6247 }, { "epoch": 0.16766852726492057, "grad_norm": 0.310546875, "learning_rate": 0.0010058231584596806, "loss": 1.2131, "step": 6248 }, { "epoch": 0.1676953628166595, "grad_norm": 0.302734375, "learning_rate": 0.0010059841674493492, "loss": 1.0086, "step": 6249 }, { "epoch": 0.16772219836839847, "grad_norm": 0.318359375, "learning_rate": 0.0010061451764390179, "loss": 1.1654, "step": 6250 }, { "epoch": 0.1677490339201374, "grad_norm": 0.291015625, "learning_rate": 0.0010063061854286863, "loss": 1.014, "step": 6251 }, { "epoch": 0.16777586947187634, "grad_norm": 0.302734375, "learning_rate": 0.001006467194418355, "loss": 1.1001, "step": 6252 }, { "epoch": 0.16780270502361527, "grad_norm": 0.283203125, "learning_rate": 0.0010066282034080236, "loss": 1.0087, "step": 6253 }, { "epoch": 0.16782954057535424, "grad_norm": 0.333984375, "learning_rate": 0.0010067892123976922, "loss": 1.2223, "step": 6254 }, { "epoch": 0.16785637612709317, "grad_norm": 0.30859375, "learning_rate": 0.0010069502213873609, "loss": 1.1411, "step": 6255 }, { "epoch": 0.1678832116788321, "grad_norm": 0.30078125, "learning_rate": 0.0010071112303770293, "loss": 1.0473, "step": 6256 }, { "epoch": 0.16791004723057107, "grad_norm": 0.30078125, "learning_rate": 0.001007272239366698, "loss": 1.1312, "step": 6257 }, { "epoch": 0.16793688278231, "grad_norm": 0.31640625, "learning_rate": 0.0010074332483563664, "loss": 1.1461, "step": 6258 }, { "epoch": 0.16796371833404894, "grad_norm": 0.302734375, "learning_rate": 0.001007594257346035, "loss": 1.0855, "step": 6259 }, { "epoch": 0.1679905538857879, "grad_norm": 0.32421875, "learning_rate": 0.0010077552663357036, "loss": 1.1649, "step": 6260 }, { "epoch": 0.16801738943752684, "grad_norm": 0.298828125, "learning_rate": 0.0010079162753253723, "loss": 1.0526, "step": 6261 }, { "epoch": 0.16804422498926577, "grad_norm": 0.298828125, "learning_rate": 0.001008077284315041, "loss": 1.0455, "step": 6262 }, { "epoch": 0.16807106054100474, "grad_norm": 0.310546875, "learning_rate": 0.0010082382933047094, "loss": 1.0354, "step": 6263 }, { "epoch": 0.16809789609274367, "grad_norm": 0.31640625, "learning_rate": 0.001008399302294378, "loss": 1.1234, "step": 6264 }, { "epoch": 0.1681247316444826, "grad_norm": 0.2890625, "learning_rate": 0.0010085603112840466, "loss": 1.0388, "step": 6265 }, { "epoch": 0.16815156719622154, "grad_norm": 0.326171875, "learning_rate": 0.0010087213202737153, "loss": 1.1787, "step": 6266 }, { "epoch": 0.1681784027479605, "grad_norm": 0.2890625, "learning_rate": 0.0010088823292633837, "loss": 1.0588, "step": 6267 }, { "epoch": 0.16820523829969944, "grad_norm": 0.298828125, "learning_rate": 0.0010090433382530524, "loss": 1.1173, "step": 6268 }, { "epoch": 0.16823207385143837, "grad_norm": 0.318359375, "learning_rate": 0.001009204347242721, "loss": 1.0886, "step": 6269 }, { "epoch": 0.16825890940317734, "grad_norm": 0.296875, "learning_rate": 0.0010093653562323896, "loss": 1.0122, "step": 6270 }, { "epoch": 0.16828574495491627, "grad_norm": 0.33203125, "learning_rate": 0.0010095263652220583, "loss": 1.2703, "step": 6271 }, { "epoch": 0.1683125805066552, "grad_norm": 0.30859375, "learning_rate": 0.0010096873742117267, "loss": 1.0492, "step": 6272 }, { "epoch": 0.16833941605839417, "grad_norm": 0.3046875, "learning_rate": 0.0010098483832013954, "loss": 1.0416, "step": 6273 }, { "epoch": 0.1683662516101331, "grad_norm": 0.322265625, "learning_rate": 0.0010100093921910638, "loss": 1.1282, "step": 6274 }, { "epoch": 0.16839308716187204, "grad_norm": 0.330078125, "learning_rate": 0.0010101704011807324, "loss": 1.198, "step": 6275 }, { "epoch": 0.168419922713611, "grad_norm": 0.3125, "learning_rate": 0.001010331410170401, "loss": 1.1109, "step": 6276 }, { "epoch": 0.16844675826534994, "grad_norm": 0.30859375, "learning_rate": 0.0010104924191600697, "loss": 1.1702, "step": 6277 }, { "epoch": 0.16847359381708887, "grad_norm": 0.30859375, "learning_rate": 0.0010106534281497383, "loss": 1.0723, "step": 6278 }, { "epoch": 0.16850042936882784, "grad_norm": 0.302734375, "learning_rate": 0.0010108144371394068, "loss": 1.0737, "step": 6279 }, { "epoch": 0.16852726492056677, "grad_norm": 0.314453125, "learning_rate": 0.0010109754461290754, "loss": 1.1385, "step": 6280 }, { "epoch": 0.1685541004723057, "grad_norm": 0.298828125, "learning_rate": 0.001011136455118744, "loss": 1.1045, "step": 6281 }, { "epoch": 0.16858093602404464, "grad_norm": 0.3203125, "learning_rate": 0.0010112974641084127, "loss": 1.1724, "step": 6282 }, { "epoch": 0.1686077715757836, "grad_norm": 0.32421875, "learning_rate": 0.0010114584730980813, "loss": 1.2097, "step": 6283 }, { "epoch": 0.16863460712752254, "grad_norm": 0.2890625, "learning_rate": 0.0010116194820877498, "loss": 1.0465, "step": 6284 }, { "epoch": 0.16866144267926148, "grad_norm": 0.30078125, "learning_rate": 0.0010117804910774184, "loss": 1.1449, "step": 6285 }, { "epoch": 0.16868827823100044, "grad_norm": 0.3125, "learning_rate": 0.001011941500067087, "loss": 1.1274, "step": 6286 }, { "epoch": 0.16871511378273937, "grad_norm": 0.287109375, "learning_rate": 0.0010121025090567555, "loss": 0.9724, "step": 6287 }, { "epoch": 0.1687419493344783, "grad_norm": 0.3046875, "learning_rate": 0.0010122635180464241, "loss": 1.0991, "step": 6288 }, { "epoch": 0.16876878488621727, "grad_norm": 0.330078125, "learning_rate": 0.0010124245270360928, "loss": 1.1912, "step": 6289 }, { "epoch": 0.1687956204379562, "grad_norm": 0.2890625, "learning_rate": 0.0010125855360257614, "loss": 1.0118, "step": 6290 }, { "epoch": 0.16882245598969514, "grad_norm": 0.30078125, "learning_rate": 0.0010127465450154298, "loss": 1.141, "step": 6291 }, { "epoch": 0.1688492915414341, "grad_norm": 0.318359375, "learning_rate": 0.0010129075540050985, "loss": 1.2349, "step": 6292 }, { "epoch": 0.16887612709317304, "grad_norm": 0.314453125, "learning_rate": 0.0010130685629947671, "loss": 1.0607, "step": 6293 }, { "epoch": 0.16890296264491197, "grad_norm": 0.314453125, "learning_rate": 0.0010132295719844358, "loss": 1.13, "step": 6294 }, { "epoch": 0.1689297981966509, "grad_norm": 0.287109375, "learning_rate": 0.0010133905809741044, "loss": 1.0012, "step": 6295 }, { "epoch": 0.16895663374838987, "grad_norm": 0.291015625, "learning_rate": 0.0010135515899637728, "loss": 1.0456, "step": 6296 }, { "epoch": 0.1689834693001288, "grad_norm": 0.3046875, "learning_rate": 0.0010137125989534415, "loss": 1.0547, "step": 6297 }, { "epoch": 0.16901030485186774, "grad_norm": 0.296875, "learning_rate": 0.0010138736079431101, "loss": 1.0339, "step": 6298 }, { "epoch": 0.1690371404036067, "grad_norm": 0.30078125, "learning_rate": 0.0010140346169327788, "loss": 1.0671, "step": 6299 }, { "epoch": 0.16906397595534564, "grad_norm": 0.322265625, "learning_rate": 0.0010141956259224472, "loss": 1.1675, "step": 6300 }, { "epoch": 0.16909081150708458, "grad_norm": 0.298828125, "learning_rate": 0.0010143566349121158, "loss": 1.1378, "step": 6301 }, { "epoch": 0.16911764705882354, "grad_norm": 0.296875, "learning_rate": 0.0010145176439017845, "loss": 1.0527, "step": 6302 }, { "epoch": 0.16914448261056247, "grad_norm": 0.294921875, "learning_rate": 0.001014678652891453, "loss": 1.0201, "step": 6303 }, { "epoch": 0.1691713181623014, "grad_norm": 0.298828125, "learning_rate": 0.0010148396618811215, "loss": 0.9569, "step": 6304 }, { "epoch": 0.16919815371404037, "grad_norm": 0.29296875, "learning_rate": 0.0010150006708707902, "loss": 1.0578, "step": 6305 }, { "epoch": 0.1692249892657793, "grad_norm": 0.3125, "learning_rate": 0.0010151616798604588, "loss": 1.095, "step": 6306 }, { "epoch": 0.16925182481751824, "grad_norm": 0.310546875, "learning_rate": 0.0010153226888501273, "loss": 1.084, "step": 6307 }, { "epoch": 0.1692786603692572, "grad_norm": 0.3046875, "learning_rate": 0.001015483697839796, "loss": 1.0988, "step": 6308 }, { "epoch": 0.16930549592099614, "grad_norm": 0.30078125, "learning_rate": 0.0010156447068294645, "loss": 1.0335, "step": 6309 }, { "epoch": 0.16933233147273508, "grad_norm": 0.298828125, "learning_rate": 0.0010158057158191332, "loss": 1.0068, "step": 6310 }, { "epoch": 0.169359167024474, "grad_norm": 0.2890625, "learning_rate": 0.0010159667248088018, "loss": 1.0052, "step": 6311 }, { "epoch": 0.16938600257621297, "grad_norm": 0.31640625, "learning_rate": 0.0010161277337984703, "loss": 1.1743, "step": 6312 }, { "epoch": 0.1694128381279519, "grad_norm": 0.29296875, "learning_rate": 0.001016288742788139, "loss": 1.033, "step": 6313 }, { "epoch": 0.16943967367969084, "grad_norm": 0.3046875, "learning_rate": 0.0010164497517778075, "loss": 1.1223, "step": 6314 }, { "epoch": 0.1694665092314298, "grad_norm": 0.283203125, "learning_rate": 0.0010166107607674762, "loss": 1.0482, "step": 6315 }, { "epoch": 0.16949334478316874, "grad_norm": 0.302734375, "learning_rate": 0.0010167717697571448, "loss": 0.9984, "step": 6316 }, { "epoch": 0.16952018033490768, "grad_norm": 0.294921875, "learning_rate": 0.0010169327787468133, "loss": 1.0356, "step": 6317 }, { "epoch": 0.16954701588664664, "grad_norm": 0.31640625, "learning_rate": 0.001017093787736482, "loss": 1.0942, "step": 6318 }, { "epoch": 0.16957385143838558, "grad_norm": 0.294921875, "learning_rate": 0.0010172547967261503, "loss": 1.1202, "step": 6319 }, { "epoch": 0.1696006869901245, "grad_norm": 0.267578125, "learning_rate": 0.001017415805715819, "loss": 0.9706, "step": 6320 }, { "epoch": 0.16962752254186347, "grad_norm": 0.326171875, "learning_rate": 0.0010175768147054876, "loss": 1.2328, "step": 6321 }, { "epoch": 0.1696543580936024, "grad_norm": 0.33984375, "learning_rate": 0.0010177378236951562, "loss": 1.1404, "step": 6322 }, { "epoch": 0.16968119364534134, "grad_norm": 0.3046875, "learning_rate": 0.0010178988326848249, "loss": 1.1004, "step": 6323 }, { "epoch": 0.16970802919708028, "grad_norm": 0.3125, "learning_rate": 0.0010180598416744933, "loss": 1.1534, "step": 6324 }, { "epoch": 0.16973486474881924, "grad_norm": 0.3046875, "learning_rate": 0.001018220850664162, "loss": 1.051, "step": 6325 }, { "epoch": 0.16976170030055818, "grad_norm": 0.298828125, "learning_rate": 0.0010183818596538306, "loss": 1.0273, "step": 6326 }, { "epoch": 0.1697885358522971, "grad_norm": 0.328125, "learning_rate": 0.0010185428686434992, "loss": 1.3448, "step": 6327 }, { "epoch": 0.16981537140403608, "grad_norm": 0.314453125, "learning_rate": 0.0010187038776331679, "loss": 1.1549, "step": 6328 }, { "epoch": 0.169842206955775, "grad_norm": 0.30859375, "learning_rate": 0.0010188648866228363, "loss": 1.1201, "step": 6329 }, { "epoch": 0.16986904250751395, "grad_norm": 0.29296875, "learning_rate": 0.001019025895612505, "loss": 1.0275, "step": 6330 }, { "epoch": 0.1698958780592529, "grad_norm": 0.31640625, "learning_rate": 0.0010191869046021736, "loss": 1.0403, "step": 6331 }, { "epoch": 0.16992271361099184, "grad_norm": 0.3203125, "learning_rate": 0.0010193479135918422, "loss": 1.1465, "step": 6332 }, { "epoch": 0.16994954916273078, "grad_norm": 0.306640625, "learning_rate": 0.0010195089225815107, "loss": 1.1354, "step": 6333 }, { "epoch": 0.16997638471446974, "grad_norm": 0.2890625, "learning_rate": 0.0010196699315711793, "loss": 0.9537, "step": 6334 }, { "epoch": 0.17000322026620868, "grad_norm": 0.314453125, "learning_rate": 0.0010198309405608477, "loss": 1.1966, "step": 6335 }, { "epoch": 0.1700300558179476, "grad_norm": 0.28515625, "learning_rate": 0.0010199919495505164, "loss": 1.0116, "step": 6336 }, { "epoch": 0.17005689136968655, "grad_norm": 0.30859375, "learning_rate": 0.001020152958540185, "loss": 1.0766, "step": 6337 }, { "epoch": 0.1700837269214255, "grad_norm": 0.30859375, "learning_rate": 0.0010203139675298537, "loss": 1.0428, "step": 6338 }, { "epoch": 0.17011056247316445, "grad_norm": 0.314453125, "learning_rate": 0.0010204749765195223, "loss": 1.1086, "step": 6339 }, { "epoch": 0.17013739802490338, "grad_norm": 0.29296875, "learning_rate": 0.0010206359855091907, "loss": 1.0962, "step": 6340 }, { "epoch": 0.17016423357664234, "grad_norm": 0.30078125, "learning_rate": 0.0010207969944988594, "loss": 1.1747, "step": 6341 }, { "epoch": 0.17019106912838128, "grad_norm": 0.3046875, "learning_rate": 0.001020958003488528, "loss": 0.9751, "step": 6342 }, { "epoch": 0.1702179046801202, "grad_norm": 0.29296875, "learning_rate": 0.0010211190124781967, "loss": 1.026, "step": 6343 }, { "epoch": 0.17024474023185918, "grad_norm": 0.322265625, "learning_rate": 0.0010212800214678653, "loss": 1.1283, "step": 6344 }, { "epoch": 0.1702715757835981, "grad_norm": 0.28515625, "learning_rate": 0.0010214410304575337, "loss": 1.0387, "step": 6345 }, { "epoch": 0.17029841133533705, "grad_norm": 0.3046875, "learning_rate": 0.0010216020394472024, "loss": 1.0788, "step": 6346 }, { "epoch": 0.170325246887076, "grad_norm": 0.314453125, "learning_rate": 0.001021763048436871, "loss": 1.1312, "step": 6347 }, { "epoch": 0.17035208243881494, "grad_norm": 0.28515625, "learning_rate": 0.0010219240574265397, "loss": 0.9733, "step": 6348 }, { "epoch": 0.17037891799055388, "grad_norm": 0.322265625, "learning_rate": 0.001022085066416208, "loss": 1.208, "step": 6349 }, { "epoch": 0.17040575354229284, "grad_norm": 0.296875, "learning_rate": 0.0010222460754058767, "loss": 1.0884, "step": 6350 }, { "epoch": 0.17043258909403178, "grad_norm": 0.28125, "learning_rate": 0.0010224070843955454, "loss": 1.0026, "step": 6351 }, { "epoch": 0.1704594246457707, "grad_norm": 0.310546875, "learning_rate": 0.0010225680933852138, "loss": 1.1056, "step": 6352 }, { "epoch": 0.17048626019750965, "grad_norm": 0.328125, "learning_rate": 0.0010227291023748824, "loss": 1.1615, "step": 6353 }, { "epoch": 0.1705130957492486, "grad_norm": 0.32421875, "learning_rate": 0.001022890111364551, "loss": 1.1072, "step": 6354 }, { "epoch": 0.17053993130098755, "grad_norm": 0.302734375, "learning_rate": 0.0010230511203542197, "loss": 1.0729, "step": 6355 }, { "epoch": 0.17056676685272648, "grad_norm": 0.30859375, "learning_rate": 0.0010232121293438884, "loss": 1.2079, "step": 6356 }, { "epoch": 0.17059360240446544, "grad_norm": 0.30078125, "learning_rate": 0.0010233731383335568, "loss": 1.0927, "step": 6357 }, { "epoch": 0.17062043795620438, "grad_norm": 0.28125, "learning_rate": 0.0010235341473232254, "loss": 1.0097, "step": 6358 }, { "epoch": 0.17064727350794331, "grad_norm": 0.283203125, "learning_rate": 0.001023695156312894, "loss": 1.0035, "step": 6359 }, { "epoch": 0.17067410905968228, "grad_norm": 0.28515625, "learning_rate": 0.0010238561653025627, "loss": 1.0052, "step": 6360 }, { "epoch": 0.1707009446114212, "grad_norm": 0.322265625, "learning_rate": 0.0010240171742922314, "loss": 1.2222, "step": 6361 }, { "epoch": 0.17072778016316015, "grad_norm": 0.306640625, "learning_rate": 0.0010241781832818998, "loss": 1.1494, "step": 6362 }, { "epoch": 0.1707546157148991, "grad_norm": 0.29296875, "learning_rate": 0.0010243391922715684, "loss": 1.0977, "step": 6363 }, { "epoch": 0.17078145126663805, "grad_norm": 0.29296875, "learning_rate": 0.0010245002012612369, "loss": 1.0772, "step": 6364 }, { "epoch": 0.17080828681837698, "grad_norm": 0.306640625, "learning_rate": 0.0010246612102509055, "loss": 1.2111, "step": 6365 }, { "epoch": 0.17083512237011592, "grad_norm": 0.30859375, "learning_rate": 0.0010248222192405741, "loss": 1.0747, "step": 6366 }, { "epoch": 0.17086195792185488, "grad_norm": 0.314453125, "learning_rate": 0.0010249832282302428, "loss": 1.1526, "step": 6367 }, { "epoch": 0.17088879347359381, "grad_norm": 0.314453125, "learning_rate": 0.0010251442372199112, "loss": 1.1301, "step": 6368 }, { "epoch": 0.17091562902533275, "grad_norm": 0.31640625, "learning_rate": 0.0010253052462095799, "loss": 1.2011, "step": 6369 }, { "epoch": 0.1709424645770717, "grad_norm": 0.298828125, "learning_rate": 0.0010254662551992485, "loss": 1.0736, "step": 6370 }, { "epoch": 0.17096930012881065, "grad_norm": 0.3046875, "learning_rate": 0.0010256272641889171, "loss": 1.1052, "step": 6371 }, { "epoch": 0.17099613568054958, "grad_norm": 0.314453125, "learning_rate": 0.0010257882731785858, "loss": 1.1294, "step": 6372 }, { "epoch": 0.17102297123228855, "grad_norm": 0.3203125, "learning_rate": 0.0010259492821682542, "loss": 1.166, "step": 6373 }, { "epoch": 0.17104980678402748, "grad_norm": 0.318359375, "learning_rate": 0.0010261102911579229, "loss": 1.1663, "step": 6374 }, { "epoch": 0.17107664233576642, "grad_norm": 0.30078125, "learning_rate": 0.0010262713001475915, "loss": 1.09, "step": 6375 }, { "epoch": 0.17110347788750538, "grad_norm": 0.31640625, "learning_rate": 0.0010264323091372601, "loss": 1.1865, "step": 6376 }, { "epoch": 0.1711303134392443, "grad_norm": 0.318359375, "learning_rate": 0.0010265933181269288, "loss": 1.0142, "step": 6377 }, { "epoch": 0.17115714899098325, "grad_norm": 0.296875, "learning_rate": 0.0010267543271165972, "loss": 1.0735, "step": 6378 }, { "epoch": 0.1711839845427222, "grad_norm": 0.3125, "learning_rate": 0.0010269153361062659, "loss": 1.111, "step": 6379 }, { "epoch": 0.17121082009446115, "grad_norm": 0.30859375, "learning_rate": 0.0010270763450959343, "loss": 1.1185, "step": 6380 }, { "epoch": 0.17123765564620008, "grad_norm": 0.287109375, "learning_rate": 0.001027237354085603, "loss": 0.9265, "step": 6381 }, { "epoch": 0.17126449119793902, "grad_norm": 0.298828125, "learning_rate": 0.0010273983630752716, "loss": 1.0949, "step": 6382 }, { "epoch": 0.17129132674967798, "grad_norm": 0.3125, "learning_rate": 0.0010275593720649402, "loss": 1.1259, "step": 6383 }, { "epoch": 0.17131816230141692, "grad_norm": 0.3046875, "learning_rate": 0.0010277203810546089, "loss": 1.1185, "step": 6384 }, { "epoch": 0.17134499785315585, "grad_norm": 0.310546875, "learning_rate": 0.0010278813900442773, "loss": 1.1802, "step": 6385 }, { "epoch": 0.1713718334048948, "grad_norm": 0.30859375, "learning_rate": 0.001028042399033946, "loss": 1.2258, "step": 6386 }, { "epoch": 0.17139866895663375, "grad_norm": 0.298828125, "learning_rate": 0.0010282034080236146, "loss": 1.0894, "step": 6387 }, { "epoch": 0.17142550450837268, "grad_norm": 0.314453125, "learning_rate": 0.0010283644170132832, "loss": 1.138, "step": 6388 }, { "epoch": 0.17145234006011165, "grad_norm": 0.296875, "learning_rate": 0.0010285254260029519, "loss": 1.0108, "step": 6389 }, { "epoch": 0.17147917561185058, "grad_norm": 0.294921875, "learning_rate": 0.0010286864349926203, "loss": 1.0161, "step": 6390 }, { "epoch": 0.17150601116358952, "grad_norm": 0.296875, "learning_rate": 0.001028847443982289, "loss": 1.0208, "step": 6391 }, { "epoch": 0.17153284671532848, "grad_norm": 0.3046875, "learning_rate": 0.0010290084529719576, "loss": 1.1075, "step": 6392 }, { "epoch": 0.17155968226706741, "grad_norm": 0.296875, "learning_rate": 0.0010291694619616262, "loss": 1.0549, "step": 6393 }, { "epoch": 0.17158651781880635, "grad_norm": 0.314453125, "learning_rate": 0.0010293304709512948, "loss": 1.1641, "step": 6394 }, { "epoch": 0.17161335337054529, "grad_norm": 0.302734375, "learning_rate": 0.0010294914799409633, "loss": 1.0872, "step": 6395 }, { "epoch": 0.17164018892228425, "grad_norm": 0.318359375, "learning_rate": 0.001029652488930632, "loss": 1.2096, "step": 6396 }, { "epoch": 0.17166702447402318, "grad_norm": 0.30078125, "learning_rate": 0.0010298134979203003, "loss": 1.0974, "step": 6397 }, { "epoch": 0.17169386002576212, "grad_norm": 0.328125, "learning_rate": 0.001029974506909969, "loss": 1.2497, "step": 6398 }, { "epoch": 0.17172069557750108, "grad_norm": 0.296875, "learning_rate": 0.0010301355158996376, "loss": 1.0629, "step": 6399 }, { "epoch": 0.17174753112924002, "grad_norm": 0.306640625, "learning_rate": 0.0010302965248893063, "loss": 1.1421, "step": 6400 }, { "epoch": 0.17177436668097895, "grad_norm": 0.3125, "learning_rate": 0.001030457533878975, "loss": 1.1732, "step": 6401 }, { "epoch": 0.17180120223271791, "grad_norm": 0.29296875, "learning_rate": 0.0010306185428686433, "loss": 1.0, "step": 6402 }, { "epoch": 0.17182803778445685, "grad_norm": 0.322265625, "learning_rate": 0.001030779551858312, "loss": 1.1211, "step": 6403 }, { "epoch": 0.17185487333619578, "grad_norm": 0.314453125, "learning_rate": 0.0010309405608479806, "loss": 1.1955, "step": 6404 }, { "epoch": 0.17188170888793475, "grad_norm": 0.30078125, "learning_rate": 0.0010311015698376493, "loss": 1.035, "step": 6405 }, { "epoch": 0.17190854443967368, "grad_norm": 0.3046875, "learning_rate": 0.0010312625788273177, "loss": 1.0966, "step": 6406 }, { "epoch": 0.17193537999141262, "grad_norm": 0.287109375, "learning_rate": 0.0010314235878169863, "loss": 1.0238, "step": 6407 }, { "epoch": 0.17196221554315158, "grad_norm": 0.29296875, "learning_rate": 0.001031584596806655, "loss": 1.0085, "step": 6408 }, { "epoch": 0.17198905109489052, "grad_norm": 0.322265625, "learning_rate": 0.0010317456057963236, "loss": 1.1378, "step": 6409 }, { "epoch": 0.17201588664662945, "grad_norm": 0.26953125, "learning_rate": 0.0010319066147859923, "loss": 0.9012, "step": 6410 }, { "epoch": 0.1720427221983684, "grad_norm": 0.31640625, "learning_rate": 0.0010320676237756607, "loss": 1.157, "step": 6411 }, { "epoch": 0.17206955775010735, "grad_norm": 0.298828125, "learning_rate": 0.0010322286327653293, "loss": 1.0659, "step": 6412 }, { "epoch": 0.17209639330184628, "grad_norm": 0.30078125, "learning_rate": 0.0010323896417549978, "loss": 1.1184, "step": 6413 }, { "epoch": 0.17212322885358522, "grad_norm": 0.306640625, "learning_rate": 0.0010325506507446664, "loss": 1.1037, "step": 6414 }, { "epoch": 0.17215006440532418, "grad_norm": 0.296875, "learning_rate": 0.001032711659734335, "loss": 1.0579, "step": 6415 }, { "epoch": 0.17217689995706312, "grad_norm": 0.287109375, "learning_rate": 0.0010328726687240037, "loss": 1.0312, "step": 6416 }, { "epoch": 0.17220373550880205, "grad_norm": 0.3125, "learning_rate": 0.0010330336777136723, "loss": 1.1159, "step": 6417 }, { "epoch": 0.17223057106054102, "grad_norm": 0.2890625, "learning_rate": 0.0010331946867033408, "loss": 1.0681, "step": 6418 }, { "epoch": 0.17225740661227995, "grad_norm": 0.30859375, "learning_rate": 0.0010333556956930094, "loss": 1.1778, "step": 6419 }, { "epoch": 0.17228424216401889, "grad_norm": 0.291015625, "learning_rate": 0.001033516704682678, "loss": 1.0522, "step": 6420 }, { "epoch": 0.17231107771575785, "grad_norm": 0.310546875, "learning_rate": 0.0010336777136723467, "loss": 1.1444, "step": 6421 }, { "epoch": 0.17233791326749678, "grad_norm": 0.31640625, "learning_rate": 0.0010338387226620153, "loss": 1.1521, "step": 6422 }, { "epoch": 0.17236474881923572, "grad_norm": 0.314453125, "learning_rate": 0.0010339997316516838, "loss": 1.1246, "step": 6423 }, { "epoch": 0.17239158437097465, "grad_norm": 0.310546875, "learning_rate": 0.0010341607406413524, "loss": 1.2076, "step": 6424 }, { "epoch": 0.17241841992271362, "grad_norm": 0.28515625, "learning_rate": 0.001034321749631021, "loss": 1.1017, "step": 6425 }, { "epoch": 0.17244525547445255, "grad_norm": 0.2890625, "learning_rate": 0.0010344827586206895, "loss": 0.9889, "step": 6426 }, { "epoch": 0.1724720910261915, "grad_norm": 0.314453125, "learning_rate": 0.0010346437676103581, "loss": 1.0389, "step": 6427 }, { "epoch": 0.17249892657793045, "grad_norm": 0.2734375, "learning_rate": 0.0010348047766000268, "loss": 0.9316, "step": 6428 }, { "epoch": 0.17252576212966939, "grad_norm": 0.3125, "learning_rate": 0.0010349657855896954, "loss": 1.1747, "step": 6429 }, { "epoch": 0.17255259768140832, "grad_norm": 0.318359375, "learning_rate": 0.0010351267945793638, "loss": 1.2005, "step": 6430 }, { "epoch": 0.17257943323314728, "grad_norm": 0.306640625, "learning_rate": 0.0010352878035690325, "loss": 1.0727, "step": 6431 }, { "epoch": 0.17260626878488622, "grad_norm": 0.283203125, "learning_rate": 0.001035448812558701, "loss": 0.9754, "step": 6432 }, { "epoch": 0.17263310433662515, "grad_norm": 0.33203125, "learning_rate": 0.0010356098215483698, "loss": 1.1326, "step": 6433 }, { "epoch": 0.17265993988836412, "grad_norm": 0.3125, "learning_rate": 0.0010357708305380384, "loss": 1.1179, "step": 6434 }, { "epoch": 0.17268677544010305, "grad_norm": 0.291015625, "learning_rate": 0.0010359318395277068, "loss": 1.0663, "step": 6435 }, { "epoch": 0.172713610991842, "grad_norm": 0.31640625, "learning_rate": 0.0010360928485173755, "loss": 1.2759, "step": 6436 }, { "epoch": 0.17274044654358092, "grad_norm": 0.3046875, "learning_rate": 0.001036253857507044, "loss": 1.0857, "step": 6437 }, { "epoch": 0.17276728209531989, "grad_norm": 0.29296875, "learning_rate": 0.0010364148664967127, "loss": 1.0611, "step": 6438 }, { "epoch": 0.17279411764705882, "grad_norm": 0.306640625, "learning_rate": 0.0010365758754863812, "loss": 1.1577, "step": 6439 }, { "epoch": 0.17282095319879776, "grad_norm": 0.3125, "learning_rate": 0.0010367368844760498, "loss": 1.1447, "step": 6440 }, { "epoch": 0.17284778875053672, "grad_norm": 0.30859375, "learning_rate": 0.0010368978934657185, "loss": 1.1316, "step": 6441 }, { "epoch": 0.17287462430227565, "grad_norm": 0.314453125, "learning_rate": 0.0010370589024553869, "loss": 1.0745, "step": 6442 }, { "epoch": 0.1729014598540146, "grad_norm": 0.3203125, "learning_rate": 0.0010372199114450555, "loss": 1.1825, "step": 6443 }, { "epoch": 0.17292829540575355, "grad_norm": 0.30859375, "learning_rate": 0.0010373809204347242, "loss": 1.0613, "step": 6444 }, { "epoch": 0.1729551309574925, "grad_norm": 0.291015625, "learning_rate": 0.0010375419294243928, "loss": 1.0641, "step": 6445 }, { "epoch": 0.17298196650923142, "grad_norm": 0.3046875, "learning_rate": 0.0010377029384140612, "loss": 1.0731, "step": 6446 }, { "epoch": 0.17300880206097038, "grad_norm": 0.29296875, "learning_rate": 0.0010378639474037299, "loss": 1.0398, "step": 6447 }, { "epoch": 0.17303563761270932, "grad_norm": 0.326171875, "learning_rate": 0.0010380249563933985, "loss": 1.0076, "step": 6448 }, { "epoch": 0.17306247316444826, "grad_norm": 0.306640625, "learning_rate": 0.0010381859653830672, "loss": 1.1161, "step": 6449 }, { "epoch": 0.17308930871618722, "grad_norm": 0.2890625, "learning_rate": 0.0010383469743727358, "loss": 1.0565, "step": 6450 }, { "epoch": 0.17311614426792615, "grad_norm": 0.2890625, "learning_rate": 0.0010385079833624042, "loss": 1.0193, "step": 6451 }, { "epoch": 0.1731429798196651, "grad_norm": 0.32421875, "learning_rate": 0.0010386689923520729, "loss": 1.1319, "step": 6452 }, { "epoch": 0.17316981537140402, "grad_norm": 0.2890625, "learning_rate": 0.0010388300013417415, "loss": 1.0424, "step": 6453 }, { "epoch": 0.173196650923143, "grad_norm": 0.326171875, "learning_rate": 0.0010389910103314102, "loss": 1.196, "step": 6454 }, { "epoch": 0.17322348647488192, "grad_norm": 0.3125, "learning_rate": 0.0010391520193210788, "loss": 1.1822, "step": 6455 }, { "epoch": 0.17325032202662086, "grad_norm": 0.294921875, "learning_rate": 0.0010393130283107472, "loss": 0.9856, "step": 6456 }, { "epoch": 0.17327715757835982, "grad_norm": 0.302734375, "learning_rate": 0.0010394740373004159, "loss": 1.168, "step": 6457 }, { "epoch": 0.17330399313009875, "grad_norm": 0.306640625, "learning_rate": 0.0010396350462900843, "loss": 1.1129, "step": 6458 }, { "epoch": 0.1733308286818377, "grad_norm": 0.314453125, "learning_rate": 0.001039796055279753, "loss": 1.1697, "step": 6459 }, { "epoch": 0.17335766423357665, "grad_norm": 0.298828125, "learning_rate": 0.0010399570642694216, "loss": 1.1026, "step": 6460 }, { "epoch": 0.1733844997853156, "grad_norm": 0.30859375, "learning_rate": 0.0010401180732590902, "loss": 1.1593, "step": 6461 }, { "epoch": 0.17341133533705452, "grad_norm": 0.3046875, "learning_rate": 0.0010402790822487589, "loss": 1.0929, "step": 6462 }, { "epoch": 0.17343817088879349, "grad_norm": 0.326171875, "learning_rate": 0.0010404400912384273, "loss": 1.2716, "step": 6463 }, { "epoch": 0.17346500644053242, "grad_norm": 0.3125, "learning_rate": 0.001040601100228096, "loss": 1.1378, "step": 6464 }, { "epoch": 0.17349184199227136, "grad_norm": 0.30078125, "learning_rate": 0.0010407621092177646, "loss": 1.1046, "step": 6465 }, { "epoch": 0.1735186775440103, "grad_norm": 0.3046875, "learning_rate": 0.0010409231182074332, "loss": 1.1586, "step": 6466 }, { "epoch": 0.17354551309574925, "grad_norm": 0.296875, "learning_rate": 0.0010410841271971019, "loss": 1.0808, "step": 6467 }, { "epoch": 0.1735723486474882, "grad_norm": 0.302734375, "learning_rate": 0.0010412451361867703, "loss": 1.1193, "step": 6468 }, { "epoch": 0.17359918419922712, "grad_norm": 0.306640625, "learning_rate": 0.001041406145176439, "loss": 1.0573, "step": 6469 }, { "epoch": 0.1736260197509661, "grad_norm": 0.3125, "learning_rate": 0.0010415671541661076, "loss": 1.2052, "step": 6470 }, { "epoch": 0.17365285530270502, "grad_norm": 0.302734375, "learning_rate": 0.0010417281631557762, "loss": 1.0966, "step": 6471 }, { "epoch": 0.17367969085444396, "grad_norm": 0.302734375, "learning_rate": 0.0010418891721454447, "loss": 1.1819, "step": 6472 }, { "epoch": 0.17370652640618292, "grad_norm": 0.29296875, "learning_rate": 0.0010420501811351133, "loss": 1.0812, "step": 6473 }, { "epoch": 0.17373336195792186, "grad_norm": 0.302734375, "learning_rate": 0.0010422111901247817, "loss": 1.0237, "step": 6474 }, { "epoch": 0.1737601975096608, "grad_norm": 0.310546875, "learning_rate": 0.0010423721991144504, "loss": 1.082, "step": 6475 }, { "epoch": 0.17378703306139975, "grad_norm": 0.298828125, "learning_rate": 0.001042533208104119, "loss": 1.1189, "step": 6476 }, { "epoch": 0.1738138686131387, "grad_norm": 0.291015625, "learning_rate": 0.0010426942170937877, "loss": 1.0359, "step": 6477 }, { "epoch": 0.17384070416487762, "grad_norm": 0.296875, "learning_rate": 0.0010428552260834563, "loss": 1.0539, "step": 6478 }, { "epoch": 0.1738675397166166, "grad_norm": 0.318359375, "learning_rate": 0.0010430162350731247, "loss": 1.1371, "step": 6479 }, { "epoch": 0.17389437526835552, "grad_norm": 0.310546875, "learning_rate": 0.0010431772440627934, "loss": 1.1562, "step": 6480 }, { "epoch": 0.17392121082009446, "grad_norm": 0.30078125, "learning_rate": 0.001043338253052462, "loss": 1.143, "step": 6481 }, { "epoch": 0.1739480463718334, "grad_norm": 0.30078125, "learning_rate": 0.0010434992620421306, "loss": 1.1571, "step": 6482 }, { "epoch": 0.17397488192357236, "grad_norm": 0.32421875, "learning_rate": 0.0010436602710317993, "loss": 1.1547, "step": 6483 }, { "epoch": 0.1740017174753113, "grad_norm": 0.298828125, "learning_rate": 0.0010438212800214677, "loss": 1.069, "step": 6484 }, { "epoch": 0.17402855302705023, "grad_norm": 0.31640625, "learning_rate": 0.0010439822890111364, "loss": 1.1389, "step": 6485 }, { "epoch": 0.1740553885787892, "grad_norm": 0.296875, "learning_rate": 0.001044143298000805, "loss": 1.0432, "step": 6486 }, { "epoch": 0.17408222413052812, "grad_norm": 0.298828125, "learning_rate": 0.0010443043069904736, "loss": 1.0386, "step": 6487 }, { "epoch": 0.17410905968226706, "grad_norm": 0.30078125, "learning_rate": 0.001044465315980142, "loss": 1.0716, "step": 6488 }, { "epoch": 0.17413589523400602, "grad_norm": 0.3046875, "learning_rate": 0.0010446263249698107, "loss": 1.1532, "step": 6489 }, { "epoch": 0.17416273078574496, "grad_norm": 0.310546875, "learning_rate": 0.0010447873339594794, "loss": 1.0486, "step": 6490 }, { "epoch": 0.1741895663374839, "grad_norm": 0.318359375, "learning_rate": 0.0010449483429491478, "loss": 1.1098, "step": 6491 }, { "epoch": 0.17421640188922285, "grad_norm": 0.294921875, "learning_rate": 0.0010451093519388164, "loss": 1.0161, "step": 6492 }, { "epoch": 0.1742432374409618, "grad_norm": 0.30859375, "learning_rate": 0.001045270360928485, "loss": 1.1115, "step": 6493 }, { "epoch": 0.17427007299270073, "grad_norm": 0.287109375, "learning_rate": 0.0010454313699181537, "loss": 1.1079, "step": 6494 }, { "epoch": 0.17429690854443966, "grad_norm": 0.291015625, "learning_rate": 0.0010455923789078224, "loss": 0.9834, "step": 6495 }, { "epoch": 0.17432374409617862, "grad_norm": 0.30859375, "learning_rate": 0.0010457533878974908, "loss": 1.2095, "step": 6496 }, { "epoch": 0.17435057964791756, "grad_norm": 0.314453125, "learning_rate": 0.0010459143968871594, "loss": 1.2452, "step": 6497 }, { "epoch": 0.1743774151996565, "grad_norm": 0.29296875, "learning_rate": 0.001046075405876828, "loss": 1.0944, "step": 6498 }, { "epoch": 0.17440425075139546, "grad_norm": 0.3046875, "learning_rate": 0.0010462364148664967, "loss": 1.0926, "step": 6499 }, { "epoch": 0.1744310863031344, "grad_norm": 0.291015625, "learning_rate": 0.0010463974238561654, "loss": 1.0567, "step": 6500 }, { "epoch": 0.17445792185487333, "grad_norm": 0.298828125, "learning_rate": 0.0010465584328458338, "loss": 1.0602, "step": 6501 }, { "epoch": 0.1744847574066123, "grad_norm": 0.32421875, "learning_rate": 0.0010467194418355024, "loss": 1.0814, "step": 6502 }, { "epoch": 0.17451159295835122, "grad_norm": 0.298828125, "learning_rate": 0.001046880450825171, "loss": 1.0815, "step": 6503 }, { "epoch": 0.17453842851009016, "grad_norm": 0.3203125, "learning_rate": 0.0010470414598148395, "loss": 1.1929, "step": 6504 }, { "epoch": 0.17456526406182912, "grad_norm": 0.486328125, "learning_rate": 0.0010472024688045081, "loss": 1.0481, "step": 6505 }, { "epoch": 0.17459209961356806, "grad_norm": 0.546875, "learning_rate": 0.0010473634777941768, "loss": 0.9762, "step": 6506 }, { "epoch": 0.174618935165307, "grad_norm": 0.85546875, "learning_rate": 0.0010475244867838452, "loss": 0.9189, "step": 6507 }, { "epoch": 0.17464577071704596, "grad_norm": 1.046875, "learning_rate": 0.0010476854957735138, "loss": 0.8962, "step": 6508 }, { "epoch": 0.1746726062687849, "grad_norm": 1.1640625, "learning_rate": 0.0010478465047631825, "loss": 1.0523, "step": 6509 }, { "epoch": 0.17469944182052383, "grad_norm": 1.140625, "learning_rate": 0.0010480075137528511, "loss": 0.9977, "step": 6510 }, { "epoch": 0.17472627737226276, "grad_norm": 1.1484375, "learning_rate": 0.0010481685227425198, "loss": 1.1569, "step": 6511 }, { "epoch": 0.17475311292400172, "grad_norm": 1.0390625, "learning_rate": 0.0010483295317321882, "loss": 1.1012, "step": 6512 }, { "epoch": 0.17477994847574066, "grad_norm": 1.171875, "learning_rate": 0.0010484905407218568, "loss": 0.952, "step": 6513 }, { "epoch": 0.1748067840274796, "grad_norm": 0.69921875, "learning_rate": 0.0010486515497115255, "loss": 1.0221, "step": 6514 }, { "epoch": 0.17483361957921856, "grad_norm": 1.21875, "learning_rate": 0.0010488125587011941, "loss": 1.2902, "step": 6515 }, { "epoch": 0.1748604551309575, "grad_norm": 0.359375, "learning_rate": 0.0010489735676908628, "loss": 0.8753, "step": 6516 }, { "epoch": 0.17488729068269643, "grad_norm": 0.357421875, "learning_rate": 0.0010491345766805312, "loss": 0.9602, "step": 6517 }, { "epoch": 0.1749141262344354, "grad_norm": 0.796875, "learning_rate": 0.0010492955856701998, "loss": 0.874, "step": 6518 }, { "epoch": 0.17494096178617433, "grad_norm": 0.3828125, "learning_rate": 0.0010494565946598683, "loss": 0.9562, "step": 6519 }, { "epoch": 0.17496779733791326, "grad_norm": 0.435546875, "learning_rate": 0.001049617603649537, "loss": 0.9704, "step": 6520 }, { "epoch": 0.17499463288965222, "grad_norm": 1.0078125, "learning_rate": 0.0010497786126392056, "loss": 0.9801, "step": 6521 }, { "epoch": 0.17502146844139116, "grad_norm": 0.34375, "learning_rate": 0.0010499396216288742, "loss": 0.9525, "step": 6522 }, { "epoch": 0.1750483039931301, "grad_norm": 1.4765625, "learning_rate": 0.0010501006306185428, "loss": 1.0654, "step": 6523 }, { "epoch": 0.17507513954486903, "grad_norm": 0.62109375, "learning_rate": 0.0010502616396082113, "loss": 1.0962, "step": 6524 }, { "epoch": 0.175101975096608, "grad_norm": 2.5, "learning_rate": 0.00105042264859788, "loss": 0.9667, "step": 6525 }, { "epoch": 0.17512881064834693, "grad_norm": 0.455078125, "learning_rate": 0.0010505836575875485, "loss": 1.0575, "step": 6526 }, { "epoch": 0.17515564620008586, "grad_norm": 1.4453125, "learning_rate": 0.0010507446665772172, "loss": 1.1368, "step": 6527 }, { "epoch": 0.17518248175182483, "grad_norm": 0.431640625, "learning_rate": 0.0010509056755668858, "loss": 1.0094, "step": 6528 }, { "epoch": 0.17520931730356376, "grad_norm": 0.419921875, "learning_rate": 0.0010510666845565543, "loss": 0.9135, "step": 6529 }, { "epoch": 0.1752361528553027, "grad_norm": 0.349609375, "learning_rate": 0.001051227693546223, "loss": 0.9576, "step": 6530 }, { "epoch": 0.17526298840704166, "grad_norm": 0.34765625, "learning_rate": 0.0010513887025358915, "loss": 1.0372, "step": 6531 }, { "epoch": 0.1752898239587806, "grad_norm": 0.359375, "learning_rate": 0.0010515497115255602, "loss": 1.0082, "step": 6532 }, { "epoch": 0.17531665951051953, "grad_norm": 0.341796875, "learning_rate": 0.0010517107205152288, "loss": 0.9999, "step": 6533 }, { "epoch": 0.1753434950622585, "grad_norm": 0.296875, "learning_rate": 0.0010518717295048973, "loss": 0.8555, "step": 6534 }, { "epoch": 0.17537033061399743, "grad_norm": 0.5703125, "learning_rate": 0.001052032738494566, "loss": 0.8569, "step": 6535 }, { "epoch": 0.17539716616573636, "grad_norm": 0.30859375, "learning_rate": 0.0010521937474842343, "loss": 0.9665, "step": 6536 }, { "epoch": 0.1754240017174753, "grad_norm": 0.3125, "learning_rate": 0.001052354756473903, "loss": 0.8617, "step": 6537 }, { "epoch": 0.17545083726921426, "grad_norm": 0.31640625, "learning_rate": 0.0010525157654635716, "loss": 0.9174, "step": 6538 }, { "epoch": 0.1754776728209532, "grad_norm": 0.296875, "learning_rate": 0.0010526767744532403, "loss": 0.8756, "step": 6539 }, { "epoch": 0.17550450837269213, "grad_norm": 0.30859375, "learning_rate": 0.0010528377834429087, "loss": 0.9779, "step": 6540 }, { "epoch": 0.1755313439244311, "grad_norm": 0.318359375, "learning_rate": 0.0010529987924325773, "loss": 0.8944, "step": 6541 }, { "epoch": 0.17555817947617003, "grad_norm": 2.140625, "learning_rate": 0.001053159801422246, "loss": 0.8385, "step": 6542 }, { "epoch": 0.17558501502790896, "grad_norm": 0.39453125, "learning_rate": 0.0010533208104119146, "loss": 0.9596, "step": 6543 }, { "epoch": 0.17561185057964793, "grad_norm": 0.68359375, "learning_rate": 0.0010534818194015833, "loss": 0.9913, "step": 6544 }, { "epoch": 0.17563868613138686, "grad_norm": 0.51953125, "learning_rate": 0.0010536428283912517, "loss": 0.9552, "step": 6545 }, { "epoch": 0.1756655216831258, "grad_norm": 0.57421875, "learning_rate": 0.0010538038373809203, "loss": 0.8274, "step": 6546 }, { "epoch": 0.17569235723486476, "grad_norm": 1.328125, "learning_rate": 0.001053964846370589, "loss": 0.9556, "step": 6547 }, { "epoch": 0.1757191927866037, "grad_norm": 0.703125, "learning_rate": 0.0010541258553602576, "loss": 0.9439, "step": 6548 }, { "epoch": 0.17574602833834263, "grad_norm": 0.322265625, "learning_rate": 0.0010542868643499263, "loss": 0.9393, "step": 6549 }, { "epoch": 0.1757728638900816, "grad_norm": 0.3125, "learning_rate": 0.0010544478733395947, "loss": 0.9443, "step": 6550 }, { "epoch": 0.17579969944182053, "grad_norm": 0.33203125, "learning_rate": 0.0010546088823292633, "loss": 0.9399, "step": 6551 }, { "epoch": 0.17582653499355946, "grad_norm": 0.328125, "learning_rate": 0.0010547698913189317, "loss": 0.8738, "step": 6552 }, { "epoch": 0.1758533705452984, "grad_norm": 0.31640625, "learning_rate": 0.0010549309003086004, "loss": 0.9937, "step": 6553 }, { "epoch": 0.17588020609703736, "grad_norm": 0.30078125, "learning_rate": 0.001055091909298269, "loss": 0.8036, "step": 6554 }, { "epoch": 0.1759070416487763, "grad_norm": 0.3203125, "learning_rate": 0.0010552529182879377, "loss": 0.996, "step": 6555 }, { "epoch": 0.17593387720051523, "grad_norm": 0.341796875, "learning_rate": 0.0010554139272776063, "loss": 0.9597, "step": 6556 }, { "epoch": 0.1759607127522542, "grad_norm": 0.33984375, "learning_rate": 0.0010555749362672747, "loss": 0.9753, "step": 6557 }, { "epoch": 0.17598754830399313, "grad_norm": 0.33203125, "learning_rate": 0.0010557359452569434, "loss": 0.8541, "step": 6558 }, { "epoch": 0.17601438385573207, "grad_norm": 0.291015625, "learning_rate": 0.001055896954246612, "loss": 0.8536, "step": 6559 }, { "epoch": 0.17604121940747103, "grad_norm": 0.30859375, "learning_rate": 0.0010560579632362807, "loss": 0.9177, "step": 6560 }, { "epoch": 0.17606805495920996, "grad_norm": 0.30859375, "learning_rate": 0.0010562189722259493, "loss": 0.9173, "step": 6561 }, { "epoch": 0.1760948905109489, "grad_norm": 0.3046875, "learning_rate": 0.0010563799812156177, "loss": 0.9377, "step": 6562 }, { "epoch": 0.17612172606268786, "grad_norm": 0.291015625, "learning_rate": 0.0010565409902052864, "loss": 0.9083, "step": 6563 }, { "epoch": 0.1761485616144268, "grad_norm": 0.30859375, "learning_rate": 0.001056701999194955, "loss": 0.9404, "step": 6564 }, { "epoch": 0.17617539716616573, "grad_norm": 0.314453125, "learning_rate": 0.0010568630081846235, "loss": 1.0044, "step": 6565 }, { "epoch": 0.17620223271790467, "grad_norm": 0.291015625, "learning_rate": 0.001057024017174292, "loss": 0.8767, "step": 6566 }, { "epoch": 0.17622906826964363, "grad_norm": 0.30078125, "learning_rate": 0.0010571850261639607, "loss": 0.9949, "step": 6567 }, { "epoch": 0.17625590382138256, "grad_norm": 0.30859375, "learning_rate": 0.0010573460351536294, "loss": 1.0246, "step": 6568 }, { "epoch": 0.1762827393731215, "grad_norm": 0.28515625, "learning_rate": 0.0010575070441432978, "loss": 0.9143, "step": 6569 }, { "epoch": 0.17630957492486046, "grad_norm": 0.302734375, "learning_rate": 0.0010576680531329664, "loss": 1.0123, "step": 6570 }, { "epoch": 0.1763364104765994, "grad_norm": 0.298828125, "learning_rate": 0.001057829062122635, "loss": 0.9749, "step": 6571 }, { "epoch": 0.17636324602833833, "grad_norm": 0.296875, "learning_rate": 0.0010579900711123037, "loss": 1.0154, "step": 6572 }, { "epoch": 0.1763900815800773, "grad_norm": 0.298828125, "learning_rate": 0.0010581510801019724, "loss": 0.9444, "step": 6573 }, { "epoch": 0.17641691713181623, "grad_norm": 0.31640625, "learning_rate": 0.0010583120890916408, "loss": 0.9925, "step": 6574 }, { "epoch": 0.17644375268355517, "grad_norm": 0.322265625, "learning_rate": 0.0010584730980813094, "loss": 1.0914, "step": 6575 }, { "epoch": 0.17647058823529413, "grad_norm": 0.3046875, "learning_rate": 0.001058634107070978, "loss": 0.8839, "step": 6576 }, { "epoch": 0.17649742378703306, "grad_norm": 0.298828125, "learning_rate": 0.0010587951160606467, "loss": 0.9597, "step": 6577 }, { "epoch": 0.176524259338772, "grad_norm": 0.302734375, "learning_rate": 0.0010589561250503152, "loss": 1.0035, "step": 6578 }, { "epoch": 0.17655109489051096, "grad_norm": 0.2890625, "learning_rate": 0.0010591171340399838, "loss": 0.851, "step": 6579 }, { "epoch": 0.1765779304422499, "grad_norm": 0.2890625, "learning_rate": 0.0010592781430296524, "loss": 0.873, "step": 6580 }, { "epoch": 0.17660476599398883, "grad_norm": 0.296875, "learning_rate": 0.0010594391520193209, "loss": 0.9867, "step": 6581 }, { "epoch": 0.17663160154572777, "grad_norm": 0.29296875, "learning_rate": 0.0010596001610089895, "loss": 0.9943, "step": 6582 }, { "epoch": 0.17665843709746673, "grad_norm": 0.302734375, "learning_rate": 0.0010597611699986582, "loss": 1.0072, "step": 6583 }, { "epoch": 0.17668527264920567, "grad_norm": 0.30859375, "learning_rate": 0.0010599221789883268, "loss": 0.9987, "step": 6584 }, { "epoch": 0.1767121082009446, "grad_norm": 0.296875, "learning_rate": 0.0010600831879779952, "loss": 0.9508, "step": 6585 }, { "epoch": 0.17673894375268356, "grad_norm": 0.27734375, "learning_rate": 0.0010602441969676639, "loss": 0.8788, "step": 6586 }, { "epoch": 0.1767657793044225, "grad_norm": 0.27734375, "learning_rate": 0.0010604052059573325, "loss": 0.8743, "step": 6587 }, { "epoch": 0.17679261485616143, "grad_norm": 0.306640625, "learning_rate": 0.0010605662149470012, "loss": 1.0053, "step": 6588 }, { "epoch": 0.1768194504079004, "grad_norm": 0.26953125, "learning_rate": 0.0010607272239366698, "loss": 0.8171, "step": 6589 }, { "epoch": 0.17684628595963933, "grad_norm": 0.291015625, "learning_rate": 0.0010608882329263382, "loss": 0.9634, "step": 6590 }, { "epoch": 0.17687312151137827, "grad_norm": 0.291015625, "learning_rate": 0.0010610492419160069, "loss": 0.943, "step": 6591 }, { "epoch": 0.17689995706311723, "grad_norm": 0.29296875, "learning_rate": 0.0010612102509056755, "loss": 0.9248, "step": 6592 }, { "epoch": 0.17692679261485617, "grad_norm": 0.30859375, "learning_rate": 0.0010613712598953442, "loss": 1.0824, "step": 6593 }, { "epoch": 0.1769536281665951, "grad_norm": 0.296875, "learning_rate": 0.0010615322688850128, "loss": 0.9326, "step": 6594 }, { "epoch": 0.17698046371833404, "grad_norm": 0.28515625, "learning_rate": 0.0010616932778746812, "loss": 0.9106, "step": 6595 }, { "epoch": 0.177007299270073, "grad_norm": 0.29296875, "learning_rate": 0.0010618542868643499, "loss": 0.9255, "step": 6596 }, { "epoch": 0.17703413482181193, "grad_norm": 0.294921875, "learning_rate": 0.0010620152958540183, "loss": 0.9454, "step": 6597 }, { "epoch": 0.17706097037355087, "grad_norm": 0.2890625, "learning_rate": 0.001062176304843687, "loss": 0.9074, "step": 6598 }, { "epoch": 0.17708780592528983, "grad_norm": 0.296875, "learning_rate": 0.0010623373138333556, "loss": 1.0399, "step": 6599 }, { "epoch": 0.17711464147702877, "grad_norm": 0.314453125, "learning_rate": 0.0010624983228230242, "loss": 1.0512, "step": 6600 }, { "epoch": 0.1771414770287677, "grad_norm": 0.287109375, "learning_rate": 0.0010626593318126929, "loss": 0.9481, "step": 6601 }, { "epoch": 0.17716831258050666, "grad_norm": 0.296875, "learning_rate": 0.0010628203408023613, "loss": 0.894, "step": 6602 }, { "epoch": 0.1771951481322456, "grad_norm": 0.30078125, "learning_rate": 0.00106298134979203, "loss": 1.0647, "step": 6603 }, { "epoch": 0.17722198368398454, "grad_norm": 0.30078125, "learning_rate": 0.0010631423587816986, "loss": 1.0204, "step": 6604 }, { "epoch": 0.1772488192357235, "grad_norm": 0.306640625, "learning_rate": 0.0010633033677713672, "loss": 0.9752, "step": 6605 }, { "epoch": 0.17727565478746243, "grad_norm": 0.322265625, "learning_rate": 0.0010634643767610359, "loss": 1.0251, "step": 6606 }, { "epoch": 0.17730249033920137, "grad_norm": 0.283203125, "learning_rate": 0.0010636253857507043, "loss": 0.9056, "step": 6607 }, { "epoch": 0.17732932589094033, "grad_norm": 0.28125, "learning_rate": 0.001063786394740373, "loss": 0.8927, "step": 6608 }, { "epoch": 0.17735616144267927, "grad_norm": 0.287109375, "learning_rate": 0.0010639474037300416, "loss": 0.9135, "step": 6609 }, { "epoch": 0.1773829969944182, "grad_norm": 0.3046875, "learning_rate": 0.0010641084127197102, "loss": 0.9271, "step": 6610 }, { "epoch": 0.17740983254615714, "grad_norm": 0.294921875, "learning_rate": 0.0010642694217093786, "loss": 0.9002, "step": 6611 }, { "epoch": 0.1774366680978961, "grad_norm": 0.3046875, "learning_rate": 0.0010644304306990473, "loss": 0.9801, "step": 6612 }, { "epoch": 0.17746350364963503, "grad_norm": 0.27734375, "learning_rate": 0.0010645914396887157, "loss": 0.9076, "step": 6613 }, { "epoch": 0.17749033920137397, "grad_norm": 0.2890625, "learning_rate": 0.0010647524486783843, "loss": 0.9684, "step": 6614 }, { "epoch": 0.17751717475311293, "grad_norm": 0.2890625, "learning_rate": 0.001064913457668053, "loss": 0.9614, "step": 6615 }, { "epoch": 0.17754401030485187, "grad_norm": 0.298828125, "learning_rate": 0.0010650744666577216, "loss": 0.9583, "step": 6616 }, { "epoch": 0.1775708458565908, "grad_norm": 0.263671875, "learning_rate": 0.0010652354756473903, "loss": 0.8583, "step": 6617 }, { "epoch": 0.17759768140832977, "grad_norm": 0.294921875, "learning_rate": 0.0010653964846370587, "loss": 0.9501, "step": 6618 }, { "epoch": 0.1776245169600687, "grad_norm": 0.30078125, "learning_rate": 0.0010655574936267273, "loss": 0.9854, "step": 6619 }, { "epoch": 0.17765135251180764, "grad_norm": 0.25390625, "learning_rate": 0.001065718502616396, "loss": 0.8187, "step": 6620 }, { "epoch": 0.1776781880635466, "grad_norm": 0.267578125, "learning_rate": 0.0010658795116060646, "loss": 0.8173, "step": 6621 }, { "epoch": 0.17770502361528553, "grad_norm": 0.30859375, "learning_rate": 0.0010660405205957333, "loss": 1.0452, "step": 6622 }, { "epoch": 0.17773185916702447, "grad_norm": 0.287109375, "learning_rate": 0.0010662015295854017, "loss": 0.9777, "step": 6623 }, { "epoch": 0.1777586947187634, "grad_norm": 0.291015625, "learning_rate": 0.0010663625385750703, "loss": 0.9191, "step": 6624 }, { "epoch": 0.17778553027050237, "grad_norm": 0.28125, "learning_rate": 0.001066523547564739, "loss": 0.9631, "step": 6625 }, { "epoch": 0.1778123658222413, "grad_norm": 0.287109375, "learning_rate": 0.0010666845565544076, "loss": 0.8723, "step": 6626 }, { "epoch": 0.17783920137398024, "grad_norm": 0.287109375, "learning_rate": 0.001066845565544076, "loss": 0.9179, "step": 6627 }, { "epoch": 0.1778660369257192, "grad_norm": 0.271484375, "learning_rate": 0.0010670065745337447, "loss": 0.8541, "step": 6628 }, { "epoch": 0.17789287247745814, "grad_norm": 0.296875, "learning_rate": 0.0010671675835234133, "loss": 0.97, "step": 6629 }, { "epoch": 0.17791970802919707, "grad_norm": 0.318359375, "learning_rate": 0.0010673285925130818, "loss": 1.0744, "step": 6630 }, { "epoch": 0.17794654358093603, "grad_norm": 0.27734375, "learning_rate": 0.0010674896015027504, "loss": 0.9649, "step": 6631 }, { "epoch": 0.17797337913267497, "grad_norm": 0.27734375, "learning_rate": 0.001067650610492419, "loss": 0.8755, "step": 6632 }, { "epoch": 0.1780002146844139, "grad_norm": 0.283203125, "learning_rate": 0.0010678116194820877, "loss": 0.8894, "step": 6633 }, { "epoch": 0.17802705023615287, "grad_norm": 0.294921875, "learning_rate": 0.0010679726284717563, "loss": 1.0041, "step": 6634 }, { "epoch": 0.1780538857878918, "grad_norm": 0.287109375, "learning_rate": 0.0010681336374614248, "loss": 0.9859, "step": 6635 }, { "epoch": 0.17808072133963074, "grad_norm": 0.283203125, "learning_rate": 0.0010682946464510934, "loss": 0.8242, "step": 6636 }, { "epoch": 0.17810755689136967, "grad_norm": 0.275390625, "learning_rate": 0.001068455655440762, "loss": 0.8879, "step": 6637 }, { "epoch": 0.17813439244310864, "grad_norm": 0.251953125, "learning_rate": 0.0010686166644304307, "loss": 0.8301, "step": 6638 }, { "epoch": 0.17816122799484757, "grad_norm": 0.26171875, "learning_rate": 0.0010687776734200993, "loss": 0.8805, "step": 6639 }, { "epoch": 0.1781880635465865, "grad_norm": 0.275390625, "learning_rate": 0.0010689386824097678, "loss": 0.9261, "step": 6640 }, { "epoch": 0.17821489909832547, "grad_norm": 0.287109375, "learning_rate": 0.0010690996913994364, "loss": 0.9829, "step": 6641 }, { "epoch": 0.1782417346500644, "grad_norm": 0.296875, "learning_rate": 0.001069260700389105, "loss": 1.0477, "step": 6642 }, { "epoch": 0.17826857020180334, "grad_norm": 0.310546875, "learning_rate": 0.0010694217093787735, "loss": 0.9496, "step": 6643 }, { "epoch": 0.1782954057535423, "grad_norm": 0.2890625, "learning_rate": 0.0010695827183684421, "loss": 1.0589, "step": 6644 }, { "epoch": 0.17832224130528124, "grad_norm": 0.287109375, "learning_rate": 0.0010697437273581108, "loss": 1.008, "step": 6645 }, { "epoch": 0.17834907685702017, "grad_norm": 0.32421875, "learning_rate": 0.0010699047363477792, "loss": 1.0371, "step": 6646 }, { "epoch": 0.17837591240875914, "grad_norm": 0.291015625, "learning_rate": 0.0010700657453374478, "loss": 0.9946, "step": 6647 }, { "epoch": 0.17840274796049807, "grad_norm": 0.26953125, "learning_rate": 0.0010702267543271165, "loss": 0.7931, "step": 6648 }, { "epoch": 0.178429583512237, "grad_norm": 0.259765625, "learning_rate": 0.0010703877633167851, "loss": 0.8503, "step": 6649 }, { "epoch": 0.17845641906397597, "grad_norm": 0.259765625, "learning_rate": 0.0010705487723064538, "loss": 0.8338, "step": 6650 }, { "epoch": 0.1784832546157149, "grad_norm": 0.291015625, "learning_rate": 0.0010707097812961222, "loss": 0.9978, "step": 6651 }, { "epoch": 0.17851009016745384, "grad_norm": 0.26953125, "learning_rate": 0.0010708707902857908, "loss": 0.8991, "step": 6652 }, { "epoch": 0.17853692571919277, "grad_norm": 0.2734375, "learning_rate": 0.0010710317992754595, "loss": 0.9309, "step": 6653 }, { "epoch": 0.17856376127093174, "grad_norm": 0.2734375, "learning_rate": 0.0010711928082651281, "loss": 0.9056, "step": 6654 }, { "epoch": 0.17859059682267067, "grad_norm": 0.259765625, "learning_rate": 0.0010713538172547968, "loss": 0.8345, "step": 6655 }, { "epoch": 0.1786174323744096, "grad_norm": 0.28515625, "learning_rate": 0.0010715148262444652, "loss": 0.9107, "step": 6656 }, { "epoch": 0.17864426792614857, "grad_norm": 0.271484375, "learning_rate": 0.0010716758352341338, "loss": 0.8789, "step": 6657 }, { "epoch": 0.1786711034778875, "grad_norm": 0.28125, "learning_rate": 0.0010718368442238022, "loss": 0.8698, "step": 6658 }, { "epoch": 0.17869793902962644, "grad_norm": 0.28515625, "learning_rate": 0.001071997853213471, "loss": 0.9318, "step": 6659 }, { "epoch": 0.1787247745813654, "grad_norm": 0.275390625, "learning_rate": 0.0010721588622031395, "loss": 0.9217, "step": 6660 }, { "epoch": 0.17875161013310434, "grad_norm": 0.271484375, "learning_rate": 0.0010723198711928082, "loss": 0.8982, "step": 6661 }, { "epoch": 0.17877844568484327, "grad_norm": 0.29296875, "learning_rate": 0.0010724808801824768, "loss": 1.0508, "step": 6662 }, { "epoch": 0.17880528123658224, "grad_norm": 0.2734375, "learning_rate": 0.0010726418891721452, "loss": 0.8806, "step": 6663 }, { "epoch": 0.17883211678832117, "grad_norm": 0.2421875, "learning_rate": 0.0010728028981618139, "loss": 0.7542, "step": 6664 }, { "epoch": 0.1788589523400601, "grad_norm": 0.265625, "learning_rate": 0.0010729639071514825, "loss": 0.8538, "step": 6665 }, { "epoch": 0.17888578789179904, "grad_norm": 0.26171875, "learning_rate": 0.0010731249161411512, "loss": 0.8507, "step": 6666 }, { "epoch": 0.178912623443538, "grad_norm": 0.255859375, "learning_rate": 0.0010732859251308198, "loss": 0.8247, "step": 6667 }, { "epoch": 0.17893945899527694, "grad_norm": 0.255859375, "learning_rate": 0.0010734469341204882, "loss": 0.8171, "step": 6668 }, { "epoch": 0.17896629454701588, "grad_norm": 0.267578125, "learning_rate": 0.0010736079431101569, "loss": 0.8061, "step": 6669 }, { "epoch": 0.17899313009875484, "grad_norm": 0.287109375, "learning_rate": 0.0010737689520998255, "loss": 0.918, "step": 6670 }, { "epoch": 0.17901996565049377, "grad_norm": 0.271484375, "learning_rate": 0.0010739299610894942, "loss": 0.8996, "step": 6671 }, { "epoch": 0.1790468012022327, "grad_norm": 0.29296875, "learning_rate": 0.0010740909700791628, "loss": 1.0152, "step": 6672 }, { "epoch": 0.17907363675397167, "grad_norm": 0.28125, "learning_rate": 0.0010742519790688312, "loss": 0.922, "step": 6673 }, { "epoch": 0.1791004723057106, "grad_norm": 0.25390625, "learning_rate": 0.0010744129880584999, "loss": 0.803, "step": 6674 }, { "epoch": 0.17912730785744954, "grad_norm": 0.302734375, "learning_rate": 0.0010745739970481683, "loss": 1.0622, "step": 6675 }, { "epoch": 0.1791541434091885, "grad_norm": 0.27734375, "learning_rate": 0.001074735006037837, "loss": 0.9624, "step": 6676 }, { "epoch": 0.17918097896092744, "grad_norm": 0.283203125, "learning_rate": 0.0010748960150275056, "loss": 0.9883, "step": 6677 }, { "epoch": 0.17920781451266637, "grad_norm": 0.2890625, "learning_rate": 0.0010750570240171742, "loss": 1.0054, "step": 6678 }, { "epoch": 0.17923465006440534, "grad_norm": 0.27734375, "learning_rate": 0.0010752180330068427, "loss": 0.9879, "step": 6679 }, { "epoch": 0.17926148561614427, "grad_norm": 0.265625, "learning_rate": 0.0010753790419965113, "loss": 0.8026, "step": 6680 }, { "epoch": 0.1792883211678832, "grad_norm": 0.287109375, "learning_rate": 0.00107554005098618, "loss": 0.965, "step": 6681 }, { "epoch": 0.17931515671962214, "grad_norm": 0.30078125, "learning_rate": 0.0010757010599758486, "loss": 1.1373, "step": 6682 }, { "epoch": 0.1793419922713611, "grad_norm": 0.2734375, "learning_rate": 0.0010758620689655172, "loss": 0.9558, "step": 6683 }, { "epoch": 0.17936882782310004, "grad_norm": 0.294921875, "learning_rate": 0.0010760230779551857, "loss": 0.963, "step": 6684 }, { "epoch": 0.17939566337483898, "grad_norm": 0.279296875, "learning_rate": 0.0010761840869448543, "loss": 0.9063, "step": 6685 }, { "epoch": 0.17942249892657794, "grad_norm": 0.27734375, "learning_rate": 0.001076345095934523, "loss": 0.9374, "step": 6686 }, { "epoch": 0.17944933447831687, "grad_norm": 0.2734375, "learning_rate": 0.0010765061049241916, "loss": 0.9342, "step": 6687 }, { "epoch": 0.1794761700300558, "grad_norm": 0.294921875, "learning_rate": 0.0010766671139138602, "loss": 0.9709, "step": 6688 }, { "epoch": 0.17950300558179477, "grad_norm": 0.27734375, "learning_rate": 0.0010768281229035287, "loss": 0.8943, "step": 6689 }, { "epoch": 0.1795298411335337, "grad_norm": 0.29296875, "learning_rate": 0.0010769891318931973, "loss": 0.9614, "step": 6690 }, { "epoch": 0.17955667668527264, "grad_norm": 0.314453125, "learning_rate": 0.0010771501408828657, "loss": 1.1333, "step": 6691 }, { "epoch": 0.1795835122370116, "grad_norm": 0.283203125, "learning_rate": 0.0010773111498725344, "loss": 0.9642, "step": 6692 }, { "epoch": 0.17961034778875054, "grad_norm": 0.265625, "learning_rate": 0.001077472158862203, "loss": 0.8376, "step": 6693 }, { "epoch": 0.17963718334048948, "grad_norm": 0.26953125, "learning_rate": 0.0010776331678518717, "loss": 0.901, "step": 6694 }, { "epoch": 0.1796640188922284, "grad_norm": 0.275390625, "learning_rate": 0.0010777941768415403, "loss": 0.8449, "step": 6695 }, { "epoch": 0.17969085444396737, "grad_norm": 0.279296875, "learning_rate": 0.0010779551858312087, "loss": 0.9172, "step": 6696 }, { "epoch": 0.1797176899957063, "grad_norm": 0.28515625, "learning_rate": 0.0010781161948208774, "loss": 0.9215, "step": 6697 }, { "epoch": 0.17974452554744524, "grad_norm": 0.30078125, "learning_rate": 0.001078277203810546, "loss": 0.9582, "step": 6698 }, { "epoch": 0.1797713610991842, "grad_norm": 0.26171875, "learning_rate": 0.0010784382128002147, "loss": 0.7986, "step": 6699 }, { "epoch": 0.17979819665092314, "grad_norm": 0.306640625, "learning_rate": 0.0010785992217898833, "loss": 1.0163, "step": 6700 }, { "epoch": 0.17982503220266208, "grad_norm": 0.27734375, "learning_rate": 0.0010787602307795517, "loss": 0.8968, "step": 6701 }, { "epoch": 0.17985186775440104, "grad_norm": 0.296875, "learning_rate": 0.0010789212397692204, "loss": 1.0407, "step": 6702 }, { "epoch": 0.17987870330613998, "grad_norm": 0.27734375, "learning_rate": 0.001079082248758889, "loss": 0.9224, "step": 6703 }, { "epoch": 0.1799055388578789, "grad_norm": 0.27734375, "learning_rate": 0.0010792432577485577, "loss": 0.9348, "step": 6704 }, { "epoch": 0.17993237440961787, "grad_norm": 0.29296875, "learning_rate": 0.001079404266738226, "loss": 0.9892, "step": 6705 }, { "epoch": 0.1799592099613568, "grad_norm": 0.26953125, "learning_rate": 0.0010795652757278947, "loss": 0.8076, "step": 6706 }, { "epoch": 0.17998604551309574, "grad_norm": 0.283203125, "learning_rate": 0.0010797262847175634, "loss": 0.923, "step": 6707 }, { "epoch": 0.1800128810648347, "grad_norm": 0.275390625, "learning_rate": 0.0010798872937072318, "loss": 0.8833, "step": 6708 }, { "epoch": 0.18003971661657364, "grad_norm": 0.271484375, "learning_rate": 0.0010800483026969004, "loss": 0.8916, "step": 6709 }, { "epoch": 0.18006655216831258, "grad_norm": 0.267578125, "learning_rate": 0.001080209311686569, "loss": 0.7924, "step": 6710 }, { "epoch": 0.1800933877200515, "grad_norm": 0.275390625, "learning_rate": 0.0010803703206762377, "loss": 0.9364, "step": 6711 }, { "epoch": 0.18012022327179047, "grad_norm": 0.2890625, "learning_rate": 0.0010805313296659064, "loss": 0.942, "step": 6712 }, { "epoch": 0.1801470588235294, "grad_norm": 0.267578125, "learning_rate": 0.0010806923386555748, "loss": 0.8228, "step": 6713 }, { "epoch": 0.18017389437526835, "grad_norm": 0.28125, "learning_rate": 0.0010808533476452434, "loss": 0.9003, "step": 6714 }, { "epoch": 0.1802007299270073, "grad_norm": 0.28515625, "learning_rate": 0.001081014356634912, "loss": 0.9844, "step": 6715 }, { "epoch": 0.18022756547874624, "grad_norm": 0.28515625, "learning_rate": 0.0010811753656245807, "loss": 0.9627, "step": 6716 }, { "epoch": 0.18025440103048518, "grad_norm": 0.302734375, "learning_rate": 0.0010813363746142491, "loss": 1.048, "step": 6717 }, { "epoch": 0.18028123658222414, "grad_norm": 0.279296875, "learning_rate": 0.0010814973836039178, "loss": 0.9403, "step": 6718 }, { "epoch": 0.18030807213396308, "grad_norm": 0.279296875, "learning_rate": 0.0010816583925935864, "loss": 0.9555, "step": 6719 }, { "epoch": 0.180334907685702, "grad_norm": 0.2890625, "learning_rate": 0.0010818194015832549, "loss": 0.9164, "step": 6720 }, { "epoch": 0.18036174323744097, "grad_norm": 0.25, "learning_rate": 0.0010819804105729235, "loss": 0.8461, "step": 6721 }, { "epoch": 0.1803885787891799, "grad_norm": 0.2734375, "learning_rate": 0.0010821414195625921, "loss": 0.8786, "step": 6722 }, { "epoch": 0.18041541434091884, "grad_norm": 0.287109375, "learning_rate": 0.0010823024285522608, "loss": 0.9611, "step": 6723 }, { "epoch": 0.18044224989265778, "grad_norm": 0.29296875, "learning_rate": 0.0010824634375419292, "loss": 0.8777, "step": 6724 }, { "epoch": 0.18046908544439674, "grad_norm": 0.263671875, "learning_rate": 0.0010826244465315979, "loss": 0.9205, "step": 6725 }, { "epoch": 0.18049592099613568, "grad_norm": 0.259765625, "learning_rate": 0.0010827854555212665, "loss": 0.7966, "step": 6726 }, { "epoch": 0.1805227565478746, "grad_norm": 0.2890625, "learning_rate": 0.0010829464645109351, "loss": 0.9542, "step": 6727 }, { "epoch": 0.18054959209961358, "grad_norm": 0.271484375, "learning_rate": 0.0010831074735006038, "loss": 0.8244, "step": 6728 }, { "epoch": 0.1805764276513525, "grad_norm": 0.28125, "learning_rate": 0.0010832684824902722, "loss": 0.856, "step": 6729 }, { "epoch": 0.18060326320309145, "grad_norm": 0.2890625, "learning_rate": 0.0010834294914799408, "loss": 0.9908, "step": 6730 }, { "epoch": 0.1806300987548304, "grad_norm": 0.279296875, "learning_rate": 0.0010835905004696095, "loss": 0.9542, "step": 6731 }, { "epoch": 0.18065693430656934, "grad_norm": 0.29296875, "learning_rate": 0.0010837515094592781, "loss": 0.9662, "step": 6732 }, { "epoch": 0.18068376985830828, "grad_norm": 0.26171875, "learning_rate": 0.0010839125184489468, "loss": 0.8596, "step": 6733 }, { "epoch": 0.18071060541004724, "grad_norm": 0.283203125, "learning_rate": 0.0010840735274386152, "loss": 0.933, "step": 6734 }, { "epoch": 0.18073744096178618, "grad_norm": 0.26953125, "learning_rate": 0.0010842345364282838, "loss": 0.8583, "step": 6735 }, { "epoch": 0.1807642765135251, "grad_norm": 0.306640625, "learning_rate": 0.0010843955454179523, "loss": 1.0691, "step": 6736 }, { "epoch": 0.18079111206526405, "grad_norm": 0.2734375, "learning_rate": 0.001084556554407621, "loss": 0.8424, "step": 6737 }, { "epoch": 0.180817947617003, "grad_norm": 0.27734375, "learning_rate": 0.0010847175633972896, "loss": 0.9579, "step": 6738 }, { "epoch": 0.18084478316874195, "grad_norm": 0.298828125, "learning_rate": 0.0010848785723869582, "loss": 1.1484, "step": 6739 }, { "epoch": 0.18087161872048088, "grad_norm": 0.291015625, "learning_rate": 0.0010850395813766268, "loss": 0.8038, "step": 6740 }, { "epoch": 0.18089845427221984, "grad_norm": 0.2890625, "learning_rate": 0.0010852005903662953, "loss": 0.8959, "step": 6741 }, { "epoch": 0.18092528982395878, "grad_norm": 0.26171875, "learning_rate": 0.001085361599355964, "loss": 0.8752, "step": 6742 }, { "epoch": 0.18095212537569771, "grad_norm": 0.26953125, "learning_rate": 0.0010855226083456326, "loss": 0.9242, "step": 6743 }, { "epoch": 0.18097896092743668, "grad_norm": 0.2734375, "learning_rate": 0.0010856836173353012, "loss": 0.9079, "step": 6744 }, { "epoch": 0.1810057964791756, "grad_norm": 0.29296875, "learning_rate": 0.0010858446263249698, "loss": 1.0266, "step": 6745 }, { "epoch": 0.18103263203091455, "grad_norm": 0.265625, "learning_rate": 0.0010860056353146383, "loss": 0.8097, "step": 6746 }, { "epoch": 0.1810594675826535, "grad_norm": 0.267578125, "learning_rate": 0.001086166644304307, "loss": 0.9513, "step": 6747 }, { "epoch": 0.18108630313439245, "grad_norm": 0.294921875, "learning_rate": 0.0010863276532939756, "loss": 1.0467, "step": 6748 }, { "epoch": 0.18111313868613138, "grad_norm": 0.271484375, "learning_rate": 0.0010864886622836442, "loss": 0.9404, "step": 6749 }, { "epoch": 0.18113997423787034, "grad_norm": 0.267578125, "learning_rate": 0.0010866496712733126, "loss": 0.8662, "step": 6750 }, { "epoch": 0.18116680978960928, "grad_norm": 0.28125, "learning_rate": 0.0010868106802629813, "loss": 0.8815, "step": 6751 }, { "epoch": 0.18119364534134821, "grad_norm": 0.271484375, "learning_rate": 0.0010869716892526497, "loss": 0.9512, "step": 6752 }, { "epoch": 0.18122048089308715, "grad_norm": 0.28125, "learning_rate": 0.0010871326982423183, "loss": 0.8937, "step": 6753 }, { "epoch": 0.1812473164448261, "grad_norm": 0.2578125, "learning_rate": 0.001087293707231987, "loss": 0.8981, "step": 6754 }, { "epoch": 0.18127415199656505, "grad_norm": 0.2890625, "learning_rate": 0.0010874547162216556, "loss": 1.0026, "step": 6755 }, { "epoch": 0.18130098754830398, "grad_norm": 0.26953125, "learning_rate": 0.0010876157252113243, "loss": 0.9169, "step": 6756 }, { "epoch": 0.18132782310004295, "grad_norm": 0.275390625, "learning_rate": 0.0010877767342009927, "loss": 0.8892, "step": 6757 }, { "epoch": 0.18135465865178188, "grad_norm": 0.294921875, "learning_rate": 0.0010879377431906613, "loss": 0.8223, "step": 6758 }, { "epoch": 0.18138149420352082, "grad_norm": 0.23828125, "learning_rate": 0.00108809875218033, "loss": 0.725, "step": 6759 }, { "epoch": 0.18140832975525978, "grad_norm": 0.27734375, "learning_rate": 0.0010882597611699986, "loss": 0.9358, "step": 6760 }, { "epoch": 0.1814351653069987, "grad_norm": 0.28125, "learning_rate": 0.0010884207701596673, "loss": 0.9926, "step": 6761 }, { "epoch": 0.18146200085873765, "grad_norm": 0.27734375, "learning_rate": 0.0010885817791493357, "loss": 0.9402, "step": 6762 }, { "epoch": 0.1814888364104766, "grad_norm": 0.263671875, "learning_rate": 0.0010887427881390043, "loss": 0.8453, "step": 6763 }, { "epoch": 0.18151567196221555, "grad_norm": 0.27734375, "learning_rate": 0.001088903797128673, "loss": 0.9184, "step": 6764 }, { "epoch": 0.18154250751395448, "grad_norm": 0.265625, "learning_rate": 0.0010890648061183416, "loss": 0.9283, "step": 6765 }, { "epoch": 0.18156934306569342, "grad_norm": 0.291015625, "learning_rate": 0.00108922581510801, "loss": 1.0641, "step": 6766 }, { "epoch": 0.18159617861743238, "grad_norm": 0.283203125, "learning_rate": 0.0010893868240976787, "loss": 0.8706, "step": 6767 }, { "epoch": 0.18162301416917132, "grad_norm": 0.267578125, "learning_rate": 0.0010895478330873473, "loss": 0.8464, "step": 6768 }, { "epoch": 0.18164984972091025, "grad_norm": 0.27734375, "learning_rate": 0.0010897088420770158, "loss": 0.9241, "step": 6769 }, { "epoch": 0.1816766852726492, "grad_norm": 0.291015625, "learning_rate": 0.0010898698510666844, "loss": 1.0072, "step": 6770 }, { "epoch": 0.18170352082438815, "grad_norm": 0.2578125, "learning_rate": 0.001090030860056353, "loss": 0.8643, "step": 6771 }, { "epoch": 0.18173035637612708, "grad_norm": 0.296875, "learning_rate": 0.0010901918690460217, "loss": 1.012, "step": 6772 }, { "epoch": 0.18175719192786605, "grad_norm": 0.25, "learning_rate": 0.0010903528780356903, "loss": 0.7926, "step": 6773 }, { "epoch": 0.18178402747960498, "grad_norm": 0.251953125, "learning_rate": 0.0010905138870253587, "loss": 0.7886, "step": 6774 }, { "epoch": 0.18181086303134392, "grad_norm": 0.28125, "learning_rate": 0.0010906748960150274, "loss": 0.8987, "step": 6775 }, { "epoch": 0.18183769858308288, "grad_norm": 0.271484375, "learning_rate": 0.001090835905004696, "loss": 1.0021, "step": 6776 }, { "epoch": 0.18186453413482181, "grad_norm": 0.2578125, "learning_rate": 0.0010909969139943647, "loss": 0.8187, "step": 6777 }, { "epoch": 0.18189136968656075, "grad_norm": 0.265625, "learning_rate": 0.0010911579229840333, "loss": 0.8507, "step": 6778 }, { "epoch": 0.1819182052382997, "grad_norm": 0.294921875, "learning_rate": 0.0010913189319737017, "loss": 0.932, "step": 6779 }, { "epoch": 0.18194504079003865, "grad_norm": 0.28125, "learning_rate": 0.0010914799409633704, "loss": 0.9496, "step": 6780 }, { "epoch": 0.18197187634177758, "grad_norm": 0.251953125, "learning_rate": 0.001091640949953039, "loss": 0.8565, "step": 6781 }, { "epoch": 0.18199871189351652, "grad_norm": 0.275390625, "learning_rate": 0.0010918019589427075, "loss": 0.9698, "step": 6782 }, { "epoch": 0.18202554744525548, "grad_norm": 0.27734375, "learning_rate": 0.001091962967932376, "loss": 0.9735, "step": 6783 }, { "epoch": 0.18205238299699442, "grad_norm": 0.275390625, "learning_rate": 0.0010921239769220447, "loss": 0.9208, "step": 6784 }, { "epoch": 0.18207921854873335, "grad_norm": 0.279296875, "learning_rate": 0.0010922849859117132, "loss": 0.903, "step": 6785 }, { "epoch": 0.18210605410047231, "grad_norm": 0.275390625, "learning_rate": 0.0010924459949013818, "loss": 0.914, "step": 6786 }, { "epoch": 0.18213288965221125, "grad_norm": 0.2734375, "learning_rate": 0.0010926070038910505, "loss": 0.9657, "step": 6787 }, { "epoch": 0.18215972520395018, "grad_norm": 0.2578125, "learning_rate": 0.001092768012880719, "loss": 0.9304, "step": 6788 }, { "epoch": 0.18218656075568915, "grad_norm": 0.279296875, "learning_rate": 0.0010929290218703877, "loss": 0.9378, "step": 6789 }, { "epoch": 0.18221339630742808, "grad_norm": 0.267578125, "learning_rate": 0.0010930900308600562, "loss": 0.9284, "step": 6790 }, { "epoch": 0.18224023185916702, "grad_norm": 0.27734375, "learning_rate": 0.0010932510398497248, "loss": 0.9087, "step": 6791 }, { "epoch": 0.18226706741090598, "grad_norm": 0.275390625, "learning_rate": 0.0010934120488393935, "loss": 0.883, "step": 6792 }, { "epoch": 0.18229390296264492, "grad_norm": 0.26171875, "learning_rate": 0.001093573057829062, "loss": 0.8579, "step": 6793 }, { "epoch": 0.18232073851438385, "grad_norm": 0.2734375, "learning_rate": 0.0010937340668187307, "loss": 0.9192, "step": 6794 }, { "epoch": 0.1823475740661228, "grad_norm": 0.283203125, "learning_rate": 0.0010938950758083992, "loss": 0.941, "step": 6795 }, { "epoch": 0.18237440961786175, "grad_norm": 0.2451171875, "learning_rate": 0.0010940560847980678, "loss": 0.8573, "step": 6796 }, { "epoch": 0.18240124516960068, "grad_norm": 0.26953125, "learning_rate": 0.0010942170937877362, "loss": 0.9104, "step": 6797 }, { "epoch": 0.18242808072133962, "grad_norm": 0.267578125, "learning_rate": 0.0010943781027774049, "loss": 0.8791, "step": 6798 }, { "epoch": 0.18245491627307858, "grad_norm": 0.263671875, "learning_rate": 0.0010945391117670735, "loss": 0.856, "step": 6799 }, { "epoch": 0.18248175182481752, "grad_norm": 0.294921875, "learning_rate": 0.0010947001207567422, "loss": 0.9868, "step": 6800 }, { "epoch": 0.18250858737655645, "grad_norm": 0.26171875, "learning_rate": 0.0010948611297464108, "loss": 0.9437, "step": 6801 }, { "epoch": 0.18253542292829542, "grad_norm": 0.271484375, "learning_rate": 0.0010950221387360792, "loss": 0.8681, "step": 6802 }, { "epoch": 0.18256225848003435, "grad_norm": 0.267578125, "learning_rate": 0.0010951831477257479, "loss": 0.9112, "step": 6803 }, { "epoch": 0.18258909403177329, "grad_norm": 0.275390625, "learning_rate": 0.0010953441567154165, "loss": 0.9882, "step": 6804 }, { "epoch": 0.18261592958351225, "grad_norm": 0.255859375, "learning_rate": 0.0010955051657050852, "loss": 0.8798, "step": 6805 }, { "epoch": 0.18264276513525118, "grad_norm": 0.2734375, "learning_rate": 0.0010956661746947538, "loss": 0.967, "step": 6806 }, { "epoch": 0.18266960068699012, "grad_norm": 0.279296875, "learning_rate": 0.0010958271836844222, "loss": 0.9762, "step": 6807 }, { "epoch": 0.18269643623872908, "grad_norm": 0.2734375, "learning_rate": 0.0010959881926740909, "loss": 0.9376, "step": 6808 }, { "epoch": 0.18272327179046802, "grad_norm": 0.291015625, "learning_rate": 0.0010961492016637595, "loss": 1.0002, "step": 6809 }, { "epoch": 0.18275010734220695, "grad_norm": 0.26171875, "learning_rate": 0.0010963102106534282, "loss": 0.9394, "step": 6810 }, { "epoch": 0.1827769428939459, "grad_norm": 0.267578125, "learning_rate": 0.0010964712196430968, "loss": 0.9268, "step": 6811 }, { "epoch": 0.18280377844568485, "grad_norm": 0.275390625, "learning_rate": 0.0010966322286327652, "loss": 0.9351, "step": 6812 }, { "epoch": 0.18283061399742379, "grad_norm": 0.275390625, "learning_rate": 0.0010967932376224339, "loss": 0.9836, "step": 6813 }, { "epoch": 0.18285744954916272, "grad_norm": 0.26953125, "learning_rate": 0.0010969542466121023, "loss": 0.8413, "step": 6814 }, { "epoch": 0.18288428510090168, "grad_norm": 0.27734375, "learning_rate": 0.001097115255601771, "loss": 0.963, "step": 6815 }, { "epoch": 0.18291112065264062, "grad_norm": 0.267578125, "learning_rate": 0.0010972762645914396, "loss": 0.8038, "step": 6816 }, { "epoch": 0.18293795620437955, "grad_norm": 0.2734375, "learning_rate": 0.0010974372735811082, "loss": 0.8482, "step": 6817 }, { "epoch": 0.18296479175611852, "grad_norm": 0.25390625, "learning_rate": 0.0010975982825707766, "loss": 0.7908, "step": 6818 }, { "epoch": 0.18299162730785745, "grad_norm": 0.279296875, "learning_rate": 0.0010977592915604453, "loss": 0.9186, "step": 6819 }, { "epoch": 0.1830184628595964, "grad_norm": 0.29296875, "learning_rate": 0.001097920300550114, "loss": 0.9525, "step": 6820 }, { "epoch": 0.18304529841133535, "grad_norm": 0.29296875, "learning_rate": 0.0010980813095397826, "loss": 1.0078, "step": 6821 }, { "epoch": 0.18307213396307428, "grad_norm": 0.26953125, "learning_rate": 0.0010982423185294512, "loss": 0.9152, "step": 6822 }, { "epoch": 0.18309896951481322, "grad_norm": 0.275390625, "learning_rate": 0.0010984033275191196, "loss": 0.8742, "step": 6823 }, { "epoch": 0.18312580506655216, "grad_norm": 0.279296875, "learning_rate": 0.0010985643365087883, "loss": 0.7972, "step": 6824 }, { "epoch": 0.18315264061829112, "grad_norm": 0.31640625, "learning_rate": 0.001098725345498457, "loss": 0.9789, "step": 6825 }, { "epoch": 0.18317947617003005, "grad_norm": 0.283203125, "learning_rate": 0.0010988863544881256, "loss": 0.9706, "step": 6826 }, { "epoch": 0.183206311721769, "grad_norm": 0.287109375, "learning_rate": 0.0010990473634777942, "loss": 0.9823, "step": 6827 }, { "epoch": 0.18323314727350795, "grad_norm": 0.265625, "learning_rate": 0.0010992083724674626, "loss": 0.9023, "step": 6828 }, { "epoch": 0.1832599828252469, "grad_norm": 0.275390625, "learning_rate": 0.0010993693814571313, "loss": 0.8774, "step": 6829 }, { "epoch": 0.18328681837698582, "grad_norm": 0.279296875, "learning_rate": 0.0010995303904467997, "loss": 0.921, "step": 6830 }, { "epoch": 0.18331365392872478, "grad_norm": 0.267578125, "learning_rate": 0.0010996913994364684, "loss": 0.9201, "step": 6831 }, { "epoch": 0.18334048948046372, "grad_norm": 0.2734375, "learning_rate": 0.001099852408426137, "loss": 0.9123, "step": 6832 }, { "epoch": 0.18336732503220265, "grad_norm": 0.2734375, "learning_rate": 0.0011000134174158056, "loss": 0.9794, "step": 6833 }, { "epoch": 0.18339416058394162, "grad_norm": 0.2890625, "learning_rate": 0.0011001744264054743, "loss": 0.9243, "step": 6834 }, { "epoch": 0.18342099613568055, "grad_norm": 0.26953125, "learning_rate": 0.0011003354353951427, "loss": 0.9198, "step": 6835 }, { "epoch": 0.1834478316874195, "grad_norm": 0.287109375, "learning_rate": 0.0011004964443848114, "loss": 0.9102, "step": 6836 }, { "epoch": 0.18347466723915842, "grad_norm": 0.263671875, "learning_rate": 0.00110065745337448, "loss": 0.8038, "step": 6837 }, { "epoch": 0.1835015027908974, "grad_norm": 0.2890625, "learning_rate": 0.0011008184623641486, "loss": 1.0481, "step": 6838 }, { "epoch": 0.18352833834263632, "grad_norm": 0.291015625, "learning_rate": 0.0011009794713538173, "loss": 1.0272, "step": 6839 }, { "epoch": 0.18355517389437526, "grad_norm": 0.279296875, "learning_rate": 0.0011011404803434857, "loss": 0.9092, "step": 6840 }, { "epoch": 0.18358200944611422, "grad_norm": 0.263671875, "learning_rate": 0.0011013014893331544, "loss": 0.8779, "step": 6841 }, { "epoch": 0.18360884499785315, "grad_norm": 0.28515625, "learning_rate": 0.001101462498322823, "loss": 0.9691, "step": 6842 }, { "epoch": 0.1836356805495921, "grad_norm": 0.265625, "learning_rate": 0.0011016235073124916, "loss": 0.8984, "step": 6843 }, { "epoch": 0.18366251610133105, "grad_norm": 0.271484375, "learning_rate": 0.00110178451630216, "loss": 0.876, "step": 6844 }, { "epoch": 0.18368935165307, "grad_norm": 0.2890625, "learning_rate": 0.0011019455252918287, "loss": 1.0816, "step": 6845 }, { "epoch": 0.18371618720480892, "grad_norm": 0.25390625, "learning_rate": 0.0011021065342814973, "loss": 0.8554, "step": 6846 }, { "epoch": 0.18374302275654789, "grad_norm": 0.255859375, "learning_rate": 0.0011022675432711658, "loss": 0.8264, "step": 6847 }, { "epoch": 0.18376985830828682, "grad_norm": 0.279296875, "learning_rate": 0.0011024285522608344, "loss": 1.0235, "step": 6848 }, { "epoch": 0.18379669386002576, "grad_norm": 0.275390625, "learning_rate": 0.001102589561250503, "loss": 0.9118, "step": 6849 }, { "epoch": 0.18382352941176472, "grad_norm": 0.2734375, "learning_rate": 0.0011027505702401717, "loss": 0.9818, "step": 6850 }, { "epoch": 0.18385036496350365, "grad_norm": 0.2470703125, "learning_rate": 0.0011029115792298401, "loss": 0.7996, "step": 6851 }, { "epoch": 0.1838772005152426, "grad_norm": 0.26171875, "learning_rate": 0.0011030725882195088, "loss": 0.9086, "step": 6852 }, { "epoch": 0.18390403606698152, "grad_norm": 0.279296875, "learning_rate": 0.0011032335972091774, "loss": 0.9242, "step": 6853 }, { "epoch": 0.1839308716187205, "grad_norm": 0.271484375, "learning_rate": 0.001103394606198846, "loss": 0.8802, "step": 6854 }, { "epoch": 0.18395770717045942, "grad_norm": 0.259765625, "learning_rate": 0.0011035556151885147, "loss": 0.7803, "step": 6855 }, { "epoch": 0.18398454272219836, "grad_norm": 0.2578125, "learning_rate": 0.0011037166241781831, "loss": 0.8528, "step": 6856 }, { "epoch": 0.18401137827393732, "grad_norm": 0.2490234375, "learning_rate": 0.0011038776331678518, "loss": 0.8154, "step": 6857 }, { "epoch": 0.18403821382567626, "grad_norm": 0.26953125, "learning_rate": 0.0011040386421575204, "loss": 0.8782, "step": 6858 }, { "epoch": 0.1840650493774152, "grad_norm": 0.29296875, "learning_rate": 0.0011041996511471888, "loss": 0.9894, "step": 6859 }, { "epoch": 0.18409188492915415, "grad_norm": 0.255859375, "learning_rate": 0.0011043606601368575, "loss": 0.8479, "step": 6860 }, { "epoch": 0.1841187204808931, "grad_norm": 0.283203125, "learning_rate": 0.0011045216691265261, "loss": 0.9966, "step": 6861 }, { "epoch": 0.18414555603263202, "grad_norm": 0.2734375, "learning_rate": 0.0011046826781161948, "loss": 0.9533, "step": 6862 }, { "epoch": 0.184172391584371, "grad_norm": 0.279296875, "learning_rate": 0.0011048436871058632, "loss": 0.9264, "step": 6863 }, { "epoch": 0.18419922713610992, "grad_norm": 0.294921875, "learning_rate": 0.0011050046960955318, "loss": 0.9889, "step": 6864 }, { "epoch": 0.18422606268784886, "grad_norm": 0.26171875, "learning_rate": 0.0011051657050852005, "loss": 0.8696, "step": 6865 }, { "epoch": 0.1842528982395878, "grad_norm": 0.263671875, "learning_rate": 0.0011053267140748691, "loss": 0.8242, "step": 6866 }, { "epoch": 0.18427973379132676, "grad_norm": 0.259765625, "learning_rate": 0.0011054877230645378, "loss": 0.8993, "step": 6867 }, { "epoch": 0.1843065693430657, "grad_norm": 0.251953125, "learning_rate": 0.0011056487320542062, "loss": 0.8107, "step": 6868 }, { "epoch": 0.18433340489480463, "grad_norm": 0.27734375, "learning_rate": 0.0011058097410438748, "loss": 0.9538, "step": 6869 }, { "epoch": 0.1843602404465436, "grad_norm": 0.2578125, "learning_rate": 0.0011059707500335435, "loss": 0.8174, "step": 6870 }, { "epoch": 0.18438707599828252, "grad_norm": 0.279296875, "learning_rate": 0.0011061317590232121, "loss": 0.9651, "step": 6871 }, { "epoch": 0.18441391155002146, "grad_norm": 0.265625, "learning_rate": 0.0011062927680128808, "loss": 0.9049, "step": 6872 }, { "epoch": 0.18444074710176042, "grad_norm": 0.27734375, "learning_rate": 0.0011064537770025492, "loss": 0.9461, "step": 6873 }, { "epoch": 0.18446758265349936, "grad_norm": 0.2578125, "learning_rate": 0.0011066147859922178, "loss": 0.9053, "step": 6874 }, { "epoch": 0.1844944182052383, "grad_norm": 0.287109375, "learning_rate": 0.0011067757949818863, "loss": 1.0134, "step": 6875 }, { "epoch": 0.18452125375697725, "grad_norm": 0.2578125, "learning_rate": 0.001106936803971555, "loss": 0.8453, "step": 6876 }, { "epoch": 0.1845480893087162, "grad_norm": 0.275390625, "learning_rate": 0.0011070978129612235, "loss": 0.9892, "step": 6877 }, { "epoch": 0.18457492486045513, "grad_norm": 0.267578125, "learning_rate": 0.0011072588219508922, "loss": 0.9512, "step": 6878 }, { "epoch": 0.1846017604121941, "grad_norm": 0.2392578125, "learning_rate": 0.0011074198309405608, "loss": 0.7862, "step": 6879 }, { "epoch": 0.18462859596393302, "grad_norm": 0.265625, "learning_rate": 0.0011075808399302293, "loss": 0.8589, "step": 6880 }, { "epoch": 0.18465543151567196, "grad_norm": 0.28125, "learning_rate": 0.001107741848919898, "loss": 0.9875, "step": 6881 }, { "epoch": 0.1846822670674109, "grad_norm": 0.28515625, "learning_rate": 0.0011079028579095665, "loss": 1.0129, "step": 6882 }, { "epoch": 0.18470910261914986, "grad_norm": 0.2890625, "learning_rate": 0.0011080638668992352, "loss": 0.9857, "step": 6883 }, { "epoch": 0.1847359381708888, "grad_norm": 0.291015625, "learning_rate": 0.0011082248758889038, "loss": 0.9987, "step": 6884 }, { "epoch": 0.18476277372262773, "grad_norm": 0.271484375, "learning_rate": 0.0011083858848785723, "loss": 0.9221, "step": 6885 }, { "epoch": 0.1847896092743667, "grad_norm": 0.267578125, "learning_rate": 0.001108546893868241, "loss": 0.915, "step": 6886 }, { "epoch": 0.18481644482610562, "grad_norm": 0.255859375, "learning_rate": 0.0011087079028579095, "loss": 0.7976, "step": 6887 }, { "epoch": 0.18484328037784456, "grad_norm": 0.28515625, "learning_rate": 0.0011088689118475782, "loss": 1.0084, "step": 6888 }, { "epoch": 0.18487011592958352, "grad_norm": 0.28515625, "learning_rate": 0.0011090299208372466, "loss": 0.942, "step": 6889 }, { "epoch": 0.18489695148132246, "grad_norm": 0.283203125, "learning_rate": 0.0011091909298269152, "loss": 0.8899, "step": 6890 }, { "epoch": 0.1849237870330614, "grad_norm": 0.26953125, "learning_rate": 0.0011093519388165837, "loss": 0.9335, "step": 6891 }, { "epoch": 0.18495062258480036, "grad_norm": 0.2412109375, "learning_rate": 0.0011095129478062523, "loss": 0.8079, "step": 6892 }, { "epoch": 0.1849774581365393, "grad_norm": 0.259765625, "learning_rate": 0.001109673956795921, "loss": 0.8399, "step": 6893 }, { "epoch": 0.18500429368827823, "grad_norm": 0.291015625, "learning_rate": 0.0011098349657855896, "loss": 0.9241, "step": 6894 }, { "epoch": 0.18503112924001716, "grad_norm": 0.271484375, "learning_rate": 0.0011099959747752582, "loss": 0.8778, "step": 6895 }, { "epoch": 0.18505796479175612, "grad_norm": 0.251953125, "learning_rate": 0.0011101569837649267, "loss": 0.8966, "step": 6896 }, { "epoch": 0.18508480034349506, "grad_norm": 0.26953125, "learning_rate": 0.0011103179927545953, "loss": 0.9444, "step": 6897 }, { "epoch": 0.185111635895234, "grad_norm": 0.271484375, "learning_rate": 0.001110479001744264, "loss": 0.9201, "step": 6898 }, { "epoch": 0.18513847144697296, "grad_norm": 0.2578125, "learning_rate": 0.0011106400107339326, "loss": 0.8031, "step": 6899 }, { "epoch": 0.1851653069987119, "grad_norm": 0.25390625, "learning_rate": 0.0011108010197236012, "loss": 0.7236, "step": 6900 }, { "epoch": 0.18519214255045083, "grad_norm": 0.2294921875, "learning_rate": 0.0011109620287132697, "loss": 0.7756, "step": 6901 }, { "epoch": 0.1852189781021898, "grad_norm": 0.26953125, "learning_rate": 0.0011111230377029383, "loss": 0.9182, "step": 6902 }, { "epoch": 0.18524581365392873, "grad_norm": 0.26953125, "learning_rate": 0.001111284046692607, "loss": 0.8944, "step": 6903 }, { "epoch": 0.18527264920566766, "grad_norm": 0.2578125, "learning_rate": 0.0011114450556822756, "loss": 0.8383, "step": 6904 }, { "epoch": 0.18529948475740662, "grad_norm": 0.259765625, "learning_rate": 0.0011116060646719442, "loss": 0.8128, "step": 6905 }, { "epoch": 0.18532632030914556, "grad_norm": 0.271484375, "learning_rate": 0.0011117670736616127, "loss": 0.7969, "step": 6906 }, { "epoch": 0.1853531558608845, "grad_norm": 0.25390625, "learning_rate": 0.0011119280826512813, "loss": 0.8011, "step": 6907 }, { "epoch": 0.18537999141262346, "grad_norm": 0.26171875, "learning_rate": 0.0011120890916409497, "loss": 0.815, "step": 6908 }, { "epoch": 0.1854068269643624, "grad_norm": 0.26953125, "learning_rate": 0.0011122501006306184, "loss": 0.9099, "step": 6909 }, { "epoch": 0.18543366251610133, "grad_norm": 0.267578125, "learning_rate": 0.001112411109620287, "loss": 0.9217, "step": 6910 }, { "epoch": 0.18546049806784026, "grad_norm": 0.259765625, "learning_rate": 0.0011125721186099557, "loss": 0.9001, "step": 6911 }, { "epoch": 0.18548733361957923, "grad_norm": 0.265625, "learning_rate": 0.0011127331275996243, "loss": 0.8544, "step": 6912 }, { "epoch": 0.18551416917131816, "grad_norm": 0.27734375, "learning_rate": 0.0011128941365892927, "loss": 0.957, "step": 6913 }, { "epoch": 0.1855410047230571, "grad_norm": 0.259765625, "learning_rate": 0.0011130551455789614, "loss": 0.8087, "step": 6914 }, { "epoch": 0.18556784027479606, "grad_norm": 0.275390625, "learning_rate": 0.00111321615456863, "loss": 0.9593, "step": 6915 }, { "epoch": 0.185594675826535, "grad_norm": 0.287109375, "learning_rate": 0.0011133771635582987, "loss": 1.0077, "step": 6916 }, { "epoch": 0.18562151137827393, "grad_norm": 0.265625, "learning_rate": 0.0011135381725479673, "loss": 0.8476, "step": 6917 }, { "epoch": 0.1856483469300129, "grad_norm": 0.263671875, "learning_rate": 0.0011136991815376357, "loss": 0.9275, "step": 6918 }, { "epoch": 0.18567518248175183, "grad_norm": 0.267578125, "learning_rate": 0.0011138601905273044, "loss": 0.8328, "step": 6919 }, { "epoch": 0.18570201803349076, "grad_norm": 0.27734375, "learning_rate": 0.001114021199516973, "loss": 0.8426, "step": 6920 }, { "epoch": 0.18572885358522973, "grad_norm": 0.271484375, "learning_rate": 0.0011141822085066414, "loss": 0.8843, "step": 6921 }, { "epoch": 0.18575568913696866, "grad_norm": 0.271484375, "learning_rate": 0.00111434321749631, "loss": 0.9664, "step": 6922 }, { "epoch": 0.1857825246887076, "grad_norm": 0.263671875, "learning_rate": 0.0011145042264859787, "loss": 0.862, "step": 6923 }, { "epoch": 0.18580936024044653, "grad_norm": 0.26171875, "learning_rate": 0.0011146652354756472, "loss": 0.8012, "step": 6924 }, { "epoch": 0.1858361957921855, "grad_norm": 0.25, "learning_rate": 0.0011148262444653158, "loss": 0.8012, "step": 6925 }, { "epoch": 0.18586303134392443, "grad_norm": 0.26953125, "learning_rate": 0.0011149872534549844, "loss": 0.9497, "step": 6926 }, { "epoch": 0.18588986689566336, "grad_norm": 0.2412109375, "learning_rate": 0.001115148262444653, "loss": 0.7692, "step": 6927 }, { "epoch": 0.18591670244740233, "grad_norm": 0.279296875, "learning_rate": 0.0011153092714343217, "loss": 0.9076, "step": 6928 }, { "epoch": 0.18594353799914126, "grad_norm": 0.28125, "learning_rate": 0.0011154702804239902, "loss": 0.8917, "step": 6929 }, { "epoch": 0.1859703735508802, "grad_norm": 0.2412109375, "learning_rate": 0.0011156312894136588, "loss": 0.7115, "step": 6930 }, { "epoch": 0.18599720910261916, "grad_norm": 0.263671875, "learning_rate": 0.0011157922984033274, "loss": 0.9084, "step": 6931 }, { "epoch": 0.1860240446543581, "grad_norm": 0.275390625, "learning_rate": 0.001115953307392996, "loss": 0.9216, "step": 6932 }, { "epoch": 0.18605088020609703, "grad_norm": 0.25390625, "learning_rate": 0.0011161143163826647, "loss": 0.832, "step": 6933 }, { "epoch": 0.186077715757836, "grad_norm": 0.298828125, "learning_rate": 0.0011162753253723331, "loss": 0.9877, "step": 6934 }, { "epoch": 0.18610455130957493, "grad_norm": 0.265625, "learning_rate": 0.0011164363343620018, "loss": 0.8772, "step": 6935 }, { "epoch": 0.18613138686131386, "grad_norm": 0.271484375, "learning_rate": 0.0011165973433516704, "loss": 0.8671, "step": 6936 }, { "epoch": 0.1861582224130528, "grad_norm": 0.25390625, "learning_rate": 0.0011167583523413389, "loss": 0.8292, "step": 6937 }, { "epoch": 0.18618505796479176, "grad_norm": 0.28515625, "learning_rate": 0.0011169193613310075, "loss": 0.8254, "step": 6938 }, { "epoch": 0.1862118935165307, "grad_norm": 0.2890625, "learning_rate": 0.0011170803703206761, "loss": 1.0219, "step": 6939 }, { "epoch": 0.18623872906826963, "grad_norm": 0.263671875, "learning_rate": 0.0011172413793103448, "loss": 0.85, "step": 6940 }, { "epoch": 0.1862655646200086, "grad_norm": 0.259765625, "learning_rate": 0.0011174023883000132, "loss": 0.8215, "step": 6941 }, { "epoch": 0.18629240017174753, "grad_norm": 0.279296875, "learning_rate": 0.0011175633972896819, "loss": 1.0047, "step": 6942 }, { "epoch": 0.18631923572348646, "grad_norm": 0.27734375, "learning_rate": 0.0011177244062793505, "loss": 0.9596, "step": 6943 }, { "epoch": 0.18634607127522543, "grad_norm": 0.263671875, "learning_rate": 0.0011178854152690191, "loss": 0.8987, "step": 6944 }, { "epoch": 0.18637290682696436, "grad_norm": 0.275390625, "learning_rate": 0.0011180464242586878, "loss": 0.9244, "step": 6945 }, { "epoch": 0.1863997423787033, "grad_norm": 0.255859375, "learning_rate": 0.0011182074332483562, "loss": 0.8192, "step": 6946 }, { "epoch": 0.18642657793044226, "grad_norm": 0.263671875, "learning_rate": 0.0011183684422380249, "loss": 0.8316, "step": 6947 }, { "epoch": 0.1864534134821812, "grad_norm": 0.27734375, "learning_rate": 0.0011185294512276935, "loss": 0.9215, "step": 6948 }, { "epoch": 0.18648024903392013, "grad_norm": 0.279296875, "learning_rate": 0.0011186904602173621, "loss": 0.9862, "step": 6949 }, { "epoch": 0.1865070845856591, "grad_norm": 0.26171875, "learning_rate": 0.0011188514692070308, "loss": 0.8448, "step": 6950 }, { "epoch": 0.18653392013739803, "grad_norm": 0.28125, "learning_rate": 0.0011190124781966992, "loss": 0.9817, "step": 6951 }, { "epoch": 0.18656075568913696, "grad_norm": 0.265625, "learning_rate": 0.0011191734871863679, "loss": 0.8513, "step": 6952 }, { "epoch": 0.1865875912408759, "grad_norm": 0.28515625, "learning_rate": 0.0011193344961760363, "loss": 1.0033, "step": 6953 }, { "epoch": 0.18661442679261486, "grad_norm": 0.255859375, "learning_rate": 0.001119495505165705, "loss": 0.8894, "step": 6954 }, { "epoch": 0.1866412623443538, "grad_norm": 0.28125, "learning_rate": 0.0011196565141553736, "loss": 0.9052, "step": 6955 }, { "epoch": 0.18666809789609273, "grad_norm": 0.2734375, "learning_rate": 0.0011198175231450422, "loss": 0.8457, "step": 6956 }, { "epoch": 0.1866949334478317, "grad_norm": 0.2734375, "learning_rate": 0.0011199785321347106, "loss": 0.945, "step": 6957 }, { "epoch": 0.18672176899957063, "grad_norm": 0.2734375, "learning_rate": 0.0011201395411243793, "loss": 0.795, "step": 6958 }, { "epoch": 0.18674860455130957, "grad_norm": 0.265625, "learning_rate": 0.001120300550114048, "loss": 0.8749, "step": 6959 }, { "epoch": 0.18677544010304853, "grad_norm": 0.248046875, "learning_rate": 0.0011204615591037166, "loss": 0.8131, "step": 6960 }, { "epoch": 0.18680227565478746, "grad_norm": 0.267578125, "learning_rate": 0.0011206225680933852, "loss": 0.8925, "step": 6961 }, { "epoch": 0.1868291112065264, "grad_norm": 0.259765625, "learning_rate": 0.0011207835770830536, "loss": 0.8357, "step": 6962 }, { "epoch": 0.18685594675826536, "grad_norm": 0.287109375, "learning_rate": 0.0011209445860727223, "loss": 1.0323, "step": 6963 }, { "epoch": 0.1868827823100043, "grad_norm": 0.263671875, "learning_rate": 0.001121105595062391, "loss": 0.9376, "step": 6964 }, { "epoch": 0.18690961786174323, "grad_norm": 0.287109375, "learning_rate": 0.0011212666040520596, "loss": 1.0335, "step": 6965 }, { "epoch": 0.18693645341348217, "grad_norm": 0.265625, "learning_rate": 0.0011214276130417282, "loss": 0.9216, "step": 6966 }, { "epoch": 0.18696328896522113, "grad_norm": 0.26171875, "learning_rate": 0.0011215886220313966, "loss": 0.8643, "step": 6967 }, { "epoch": 0.18699012451696007, "grad_norm": 0.275390625, "learning_rate": 0.0011217496310210653, "loss": 0.9674, "step": 6968 }, { "epoch": 0.187016960068699, "grad_norm": 0.259765625, "learning_rate": 0.0011219106400107337, "loss": 0.8793, "step": 6969 }, { "epoch": 0.18704379562043796, "grad_norm": 0.2734375, "learning_rate": 0.0011220716490004023, "loss": 1.0194, "step": 6970 }, { "epoch": 0.1870706311721769, "grad_norm": 0.2734375, "learning_rate": 0.001122232657990071, "loss": 0.9227, "step": 6971 }, { "epoch": 0.18709746672391583, "grad_norm": 0.271484375, "learning_rate": 0.0011223936669797396, "loss": 0.9645, "step": 6972 }, { "epoch": 0.1871243022756548, "grad_norm": 0.248046875, "learning_rate": 0.0011225546759694083, "loss": 0.7863, "step": 6973 }, { "epoch": 0.18715113782739373, "grad_norm": 0.267578125, "learning_rate": 0.0011227156849590767, "loss": 0.8654, "step": 6974 }, { "epoch": 0.18717797337913267, "grad_norm": 0.263671875, "learning_rate": 0.0011228766939487453, "loss": 0.8118, "step": 6975 }, { "epoch": 0.18720480893087163, "grad_norm": 0.275390625, "learning_rate": 0.001123037702938414, "loss": 0.8806, "step": 6976 }, { "epoch": 0.18723164448261057, "grad_norm": 0.265625, "learning_rate": 0.0011231987119280826, "loss": 0.8999, "step": 6977 }, { "epoch": 0.1872584800343495, "grad_norm": 0.271484375, "learning_rate": 0.0011233597209177513, "loss": 0.8722, "step": 6978 }, { "epoch": 0.18728531558608846, "grad_norm": 0.275390625, "learning_rate": 0.0011235207299074197, "loss": 0.9625, "step": 6979 }, { "epoch": 0.1873121511378274, "grad_norm": 0.28125, "learning_rate": 0.0011236817388970883, "loss": 0.9588, "step": 6980 }, { "epoch": 0.18733898668956633, "grad_norm": 0.271484375, "learning_rate": 0.001123842747886757, "loss": 0.8762, "step": 6981 }, { "epoch": 0.18736582224130527, "grad_norm": 0.2490234375, "learning_rate": 0.0011240037568764256, "loss": 0.8159, "step": 6982 }, { "epoch": 0.18739265779304423, "grad_norm": 0.287109375, "learning_rate": 0.001124164765866094, "loss": 0.9667, "step": 6983 }, { "epoch": 0.18741949334478317, "grad_norm": 0.373046875, "learning_rate": 0.0011243257748557627, "loss": 0.9514, "step": 6984 }, { "epoch": 0.1874463288965221, "grad_norm": 0.310546875, "learning_rate": 0.0011244867838454313, "loss": 0.7914, "step": 6985 }, { "epoch": 0.18747316444826106, "grad_norm": 0.345703125, "learning_rate": 0.0011246477928350998, "loss": 0.9404, "step": 6986 }, { "epoch": 0.1875, "grad_norm": 0.37109375, "learning_rate": 0.0011248088018247684, "loss": 0.8112, "step": 6987 }, { "epoch": 0.18752683555173894, "grad_norm": 0.302734375, "learning_rate": 0.001124969810814437, "loss": 0.8011, "step": 6988 }, { "epoch": 0.1875536711034779, "grad_norm": 0.265625, "learning_rate": 0.0011251308198041057, "loss": 0.8104, "step": 6989 }, { "epoch": 0.18758050665521683, "grad_norm": 0.298828125, "learning_rate": 0.0011252918287937741, "loss": 0.7766, "step": 6990 }, { "epoch": 0.18760734220695577, "grad_norm": 0.30078125, "learning_rate": 0.0011254528377834428, "loss": 0.7291, "step": 6991 }, { "epoch": 0.18763417775869473, "grad_norm": 0.275390625, "learning_rate": 0.0011256138467731114, "loss": 0.8413, "step": 6992 }, { "epoch": 0.18766101331043367, "grad_norm": 0.283203125, "learning_rate": 0.00112577485576278, "loss": 0.8667, "step": 6993 }, { "epoch": 0.1876878488621726, "grad_norm": 0.314453125, "learning_rate": 0.0011259358647524487, "loss": 0.7098, "step": 6994 }, { "epoch": 0.18771468441391154, "grad_norm": 0.29296875, "learning_rate": 0.0011260968737421171, "loss": 0.7736, "step": 6995 }, { "epoch": 0.1877415199656505, "grad_norm": 0.302734375, "learning_rate": 0.0011262578827317858, "loss": 0.9358, "step": 6996 }, { "epoch": 0.18776835551738943, "grad_norm": 0.26171875, "learning_rate": 0.0011264188917214544, "loss": 0.769, "step": 6997 }, { "epoch": 0.18779519106912837, "grad_norm": 0.353515625, "learning_rate": 0.0011265799007111228, "loss": 0.9107, "step": 6998 }, { "epoch": 0.18782202662086733, "grad_norm": 0.294921875, "learning_rate": 0.0011267409097007915, "loss": 0.7484, "step": 6999 }, { "epoch": 0.18784886217260627, "grad_norm": 0.291015625, "learning_rate": 0.00112690191869046, "loss": 0.8659, "step": 7000 }, { "epoch": 0.1878756977243452, "grad_norm": 0.2578125, "learning_rate": 0.0011270629276801288, "loss": 0.8479, "step": 7001 }, { "epoch": 0.18790253327608417, "grad_norm": 0.263671875, "learning_rate": 0.0011272239366697972, "loss": 0.7445, "step": 7002 }, { "epoch": 0.1879293688278231, "grad_norm": 0.28515625, "learning_rate": 0.0011273849456594658, "loss": 0.9463, "step": 7003 }, { "epoch": 0.18795620437956204, "grad_norm": 0.28125, "learning_rate": 0.0011275459546491345, "loss": 0.9391, "step": 7004 }, { "epoch": 0.187983039931301, "grad_norm": 0.259765625, "learning_rate": 0.001127706963638803, "loss": 0.8535, "step": 7005 }, { "epoch": 0.18800987548303993, "grad_norm": 0.26171875, "learning_rate": 0.0011278679726284717, "loss": 0.7418, "step": 7006 }, { "epoch": 0.18803671103477887, "grad_norm": 0.265625, "learning_rate": 0.0011280289816181402, "loss": 0.7913, "step": 7007 }, { "epoch": 0.18806354658651783, "grad_norm": 0.271484375, "learning_rate": 0.0011281899906078088, "loss": 0.8774, "step": 7008 }, { "epoch": 0.18809038213825677, "grad_norm": 0.255859375, "learning_rate": 0.0011283509995974775, "loss": 0.7415, "step": 7009 }, { "epoch": 0.1881172176899957, "grad_norm": 0.265625, "learning_rate": 0.001128512008587146, "loss": 0.8151, "step": 7010 }, { "epoch": 0.18814405324173464, "grad_norm": 0.251953125, "learning_rate": 0.0011286730175768147, "loss": 0.7568, "step": 7011 }, { "epoch": 0.1881708887934736, "grad_norm": 0.255859375, "learning_rate": 0.0011288340265664832, "loss": 0.8224, "step": 7012 }, { "epoch": 0.18819772434521254, "grad_norm": 0.2470703125, "learning_rate": 0.0011289950355561518, "loss": 0.7517, "step": 7013 }, { "epoch": 0.18822455989695147, "grad_norm": 0.26953125, "learning_rate": 0.0011291560445458202, "loss": 0.8396, "step": 7014 }, { "epoch": 0.18825139544869043, "grad_norm": 0.26171875, "learning_rate": 0.0011293170535354889, "loss": 0.8935, "step": 7015 }, { "epoch": 0.18827823100042937, "grad_norm": 0.251953125, "learning_rate": 0.0011294780625251575, "loss": 0.8259, "step": 7016 }, { "epoch": 0.1883050665521683, "grad_norm": 0.26953125, "learning_rate": 0.0011296390715148262, "loss": 0.8491, "step": 7017 }, { "epoch": 0.18833190210390727, "grad_norm": 0.279296875, "learning_rate": 0.0011298000805044948, "loss": 0.8548, "step": 7018 }, { "epoch": 0.1883587376556462, "grad_norm": 0.263671875, "learning_rate": 0.0011299610894941632, "loss": 0.8555, "step": 7019 }, { "epoch": 0.18838557320738514, "grad_norm": 0.27734375, "learning_rate": 0.0011301220984838319, "loss": 0.8998, "step": 7020 }, { "epoch": 0.1884124087591241, "grad_norm": 0.25390625, "learning_rate": 0.0011302831074735005, "loss": 0.7083, "step": 7021 }, { "epoch": 0.18843924431086304, "grad_norm": 0.271484375, "learning_rate": 0.0011304441164631692, "loss": 0.8576, "step": 7022 }, { "epoch": 0.18846607986260197, "grad_norm": 0.267578125, "learning_rate": 0.0011306051254528378, "loss": 0.8716, "step": 7023 }, { "epoch": 0.1884929154143409, "grad_norm": 0.2353515625, "learning_rate": 0.0011307661344425062, "loss": 0.7441, "step": 7024 }, { "epoch": 0.18851975096607987, "grad_norm": 0.26953125, "learning_rate": 0.0011309271434321749, "loss": 0.8981, "step": 7025 }, { "epoch": 0.1885465865178188, "grad_norm": 0.26171875, "learning_rate": 0.0011310881524218435, "loss": 0.8562, "step": 7026 }, { "epoch": 0.18857342206955774, "grad_norm": 0.263671875, "learning_rate": 0.0011312491614115122, "loss": 0.7798, "step": 7027 }, { "epoch": 0.1886002576212967, "grad_norm": 0.265625, "learning_rate": 0.0011314101704011806, "loss": 0.8355, "step": 7028 }, { "epoch": 0.18862709317303564, "grad_norm": 0.2451171875, "learning_rate": 0.0011315711793908492, "loss": 0.7763, "step": 7029 }, { "epoch": 0.18865392872477457, "grad_norm": 0.267578125, "learning_rate": 0.0011317321883805177, "loss": 0.8498, "step": 7030 }, { "epoch": 0.18868076427651354, "grad_norm": 0.26953125, "learning_rate": 0.0011318931973701863, "loss": 0.7894, "step": 7031 }, { "epoch": 0.18870759982825247, "grad_norm": 0.248046875, "learning_rate": 0.001132054206359855, "loss": 0.7649, "step": 7032 }, { "epoch": 0.1887344353799914, "grad_norm": 0.2373046875, "learning_rate": 0.0011322152153495236, "loss": 0.7156, "step": 7033 }, { "epoch": 0.18876127093173037, "grad_norm": 0.248046875, "learning_rate": 0.0011323762243391922, "loss": 0.846, "step": 7034 }, { "epoch": 0.1887881064834693, "grad_norm": 0.26171875, "learning_rate": 0.0011325372333288607, "loss": 0.8074, "step": 7035 }, { "epoch": 0.18881494203520824, "grad_norm": 0.255859375, "learning_rate": 0.0011326982423185293, "loss": 0.7845, "step": 7036 }, { "epoch": 0.1888417775869472, "grad_norm": 0.26171875, "learning_rate": 0.001132859251308198, "loss": 0.8598, "step": 7037 }, { "epoch": 0.18886861313868614, "grad_norm": 0.263671875, "learning_rate": 0.0011330202602978666, "loss": 0.9242, "step": 7038 }, { "epoch": 0.18889544869042507, "grad_norm": 0.2490234375, "learning_rate": 0.0011331812692875352, "loss": 0.7945, "step": 7039 }, { "epoch": 0.188922284242164, "grad_norm": 0.255859375, "learning_rate": 0.0011333422782772037, "loss": 0.8718, "step": 7040 }, { "epoch": 0.18894911979390297, "grad_norm": 0.271484375, "learning_rate": 0.0011335032872668723, "loss": 0.8805, "step": 7041 }, { "epoch": 0.1889759553456419, "grad_norm": 0.232421875, "learning_rate": 0.001133664296256541, "loss": 0.7302, "step": 7042 }, { "epoch": 0.18900279089738084, "grad_norm": 0.2421875, "learning_rate": 0.0011338253052462096, "loss": 0.7456, "step": 7043 }, { "epoch": 0.1890296264491198, "grad_norm": 0.26171875, "learning_rate": 0.0011339863142358782, "loss": 0.8433, "step": 7044 }, { "epoch": 0.18905646200085874, "grad_norm": 0.255859375, "learning_rate": 0.0011341473232255467, "loss": 0.896, "step": 7045 }, { "epoch": 0.18908329755259767, "grad_norm": 0.251953125, "learning_rate": 0.0011343083322152153, "loss": 0.8362, "step": 7046 }, { "epoch": 0.18911013310433664, "grad_norm": 0.255859375, "learning_rate": 0.0011344693412048837, "loss": 0.7467, "step": 7047 }, { "epoch": 0.18913696865607557, "grad_norm": 0.283203125, "learning_rate": 0.0011346303501945524, "loss": 0.9998, "step": 7048 }, { "epoch": 0.1891638042078145, "grad_norm": 0.26171875, "learning_rate": 0.001134791359184221, "loss": 0.8168, "step": 7049 }, { "epoch": 0.18919063975955347, "grad_norm": 0.2451171875, "learning_rate": 0.0011349523681738896, "loss": 0.8819, "step": 7050 }, { "epoch": 0.1892174753112924, "grad_norm": 0.251953125, "learning_rate": 0.0011351133771635583, "loss": 0.784, "step": 7051 }, { "epoch": 0.18924431086303134, "grad_norm": 0.2373046875, "learning_rate": 0.0011352743861532267, "loss": 0.725, "step": 7052 }, { "epoch": 0.18927114641477027, "grad_norm": 0.2451171875, "learning_rate": 0.0011354353951428954, "loss": 0.7267, "step": 7053 }, { "epoch": 0.18929798196650924, "grad_norm": 0.283203125, "learning_rate": 0.001135596404132564, "loss": 0.8648, "step": 7054 }, { "epoch": 0.18932481751824817, "grad_norm": 0.240234375, "learning_rate": 0.0011357574131222326, "loss": 0.7201, "step": 7055 }, { "epoch": 0.1893516530699871, "grad_norm": 0.251953125, "learning_rate": 0.0011359184221119013, "loss": 0.8355, "step": 7056 }, { "epoch": 0.18937848862172607, "grad_norm": 0.26171875, "learning_rate": 0.0011360794311015697, "loss": 0.8971, "step": 7057 }, { "epoch": 0.189405324173465, "grad_norm": 0.2578125, "learning_rate": 0.0011362404400912384, "loss": 0.762, "step": 7058 }, { "epoch": 0.18943215972520394, "grad_norm": 0.26171875, "learning_rate": 0.001136401449080907, "loss": 0.7637, "step": 7059 }, { "epoch": 0.1894589952769429, "grad_norm": 0.25, "learning_rate": 0.0011365624580705754, "loss": 0.7054, "step": 7060 }, { "epoch": 0.18948583082868184, "grad_norm": 0.267578125, "learning_rate": 0.001136723467060244, "loss": 0.8897, "step": 7061 }, { "epoch": 0.18951266638042077, "grad_norm": 0.251953125, "learning_rate": 0.0011368844760499127, "loss": 0.8354, "step": 7062 }, { "epoch": 0.18953950193215974, "grad_norm": 0.26171875, "learning_rate": 0.0011370454850395811, "loss": 0.8475, "step": 7063 }, { "epoch": 0.18956633748389867, "grad_norm": 0.259765625, "learning_rate": 0.0011372064940292498, "loss": 0.9255, "step": 7064 }, { "epoch": 0.1895931730356376, "grad_norm": 0.2431640625, "learning_rate": 0.0011373675030189184, "loss": 0.7983, "step": 7065 }, { "epoch": 0.18962000858737654, "grad_norm": 0.255859375, "learning_rate": 0.001137528512008587, "loss": 0.8467, "step": 7066 }, { "epoch": 0.1896468441391155, "grad_norm": 0.26171875, "learning_rate": 0.0011376895209982557, "loss": 0.789, "step": 7067 }, { "epoch": 0.18967367969085444, "grad_norm": 0.2314453125, "learning_rate": 0.0011378505299879241, "loss": 0.7636, "step": 7068 }, { "epoch": 0.18970051524259338, "grad_norm": 0.24609375, "learning_rate": 0.0011380115389775928, "loss": 0.8263, "step": 7069 }, { "epoch": 0.18972735079433234, "grad_norm": 0.248046875, "learning_rate": 0.0011381725479672614, "loss": 0.7941, "step": 7070 }, { "epoch": 0.18975418634607127, "grad_norm": 0.2490234375, "learning_rate": 0.00113833355695693, "loss": 0.7284, "step": 7071 }, { "epoch": 0.1897810218978102, "grad_norm": 0.248046875, "learning_rate": 0.0011384945659465987, "loss": 0.8155, "step": 7072 }, { "epoch": 0.18980785744954917, "grad_norm": 0.255859375, "learning_rate": 0.0011386555749362671, "loss": 0.87, "step": 7073 }, { "epoch": 0.1898346930012881, "grad_norm": 0.25, "learning_rate": 0.0011388165839259358, "loss": 0.7791, "step": 7074 }, { "epoch": 0.18986152855302704, "grad_norm": 0.259765625, "learning_rate": 0.0011389775929156044, "loss": 0.8533, "step": 7075 }, { "epoch": 0.189888364104766, "grad_norm": 0.2578125, "learning_rate": 0.0011391386019052728, "loss": 0.8394, "step": 7076 }, { "epoch": 0.18991519965650494, "grad_norm": 0.23046875, "learning_rate": 0.0011392996108949415, "loss": 0.6611, "step": 7077 }, { "epoch": 0.18994203520824388, "grad_norm": 0.2490234375, "learning_rate": 0.0011394606198846101, "loss": 0.785, "step": 7078 }, { "epoch": 0.18996887075998284, "grad_norm": 0.240234375, "learning_rate": 0.0011396216288742788, "loss": 0.7623, "step": 7079 }, { "epoch": 0.18999570631172177, "grad_norm": 0.25390625, "learning_rate": 0.0011397826378639472, "loss": 0.8422, "step": 7080 }, { "epoch": 0.1900225418634607, "grad_norm": 0.255859375, "learning_rate": 0.0011399436468536158, "loss": 0.8016, "step": 7081 }, { "epoch": 0.19004937741519964, "grad_norm": 0.26953125, "learning_rate": 0.0011401046558432845, "loss": 0.8073, "step": 7082 }, { "epoch": 0.1900762129669386, "grad_norm": 0.271484375, "learning_rate": 0.0011402656648329531, "loss": 0.8795, "step": 7083 }, { "epoch": 0.19010304851867754, "grad_norm": 0.24609375, "learning_rate": 0.0011404266738226218, "loss": 0.7945, "step": 7084 }, { "epoch": 0.19012988407041648, "grad_norm": 0.275390625, "learning_rate": 0.0011405876828122902, "loss": 0.9376, "step": 7085 }, { "epoch": 0.19015671962215544, "grad_norm": 0.27734375, "learning_rate": 0.0011407486918019588, "loss": 0.9858, "step": 7086 }, { "epoch": 0.19018355517389438, "grad_norm": 0.25390625, "learning_rate": 0.0011409097007916275, "loss": 0.8648, "step": 7087 }, { "epoch": 0.1902103907256333, "grad_norm": 0.24609375, "learning_rate": 0.0011410707097812961, "loss": 0.7983, "step": 7088 }, { "epoch": 0.19023722627737227, "grad_norm": 0.2353515625, "learning_rate": 0.0011412317187709648, "loss": 0.8025, "step": 7089 }, { "epoch": 0.1902640618291112, "grad_norm": 0.265625, "learning_rate": 0.0011413927277606332, "loss": 0.8719, "step": 7090 }, { "epoch": 0.19029089738085014, "grad_norm": 0.26171875, "learning_rate": 0.0011415537367503016, "loss": 0.968, "step": 7091 }, { "epoch": 0.1903177329325891, "grad_norm": 0.259765625, "learning_rate": 0.0011417147457399703, "loss": 0.795, "step": 7092 }, { "epoch": 0.19034456848432804, "grad_norm": 0.2421875, "learning_rate": 0.001141875754729639, "loss": 0.787, "step": 7093 }, { "epoch": 0.19037140403606698, "grad_norm": 0.25, "learning_rate": 0.0011420367637193075, "loss": 0.7401, "step": 7094 }, { "epoch": 0.1903982395878059, "grad_norm": 0.251953125, "learning_rate": 0.0011421977727089762, "loss": 0.8113, "step": 7095 }, { "epoch": 0.19042507513954487, "grad_norm": 0.2373046875, "learning_rate": 0.0011423587816986446, "loss": 0.7606, "step": 7096 }, { "epoch": 0.1904519106912838, "grad_norm": 0.259765625, "learning_rate": 0.0011425197906883133, "loss": 0.7858, "step": 7097 }, { "epoch": 0.19047874624302275, "grad_norm": 0.265625, "learning_rate": 0.001142680799677982, "loss": 0.9452, "step": 7098 }, { "epoch": 0.1905055817947617, "grad_norm": 0.251953125, "learning_rate": 0.0011428418086676505, "loss": 0.781, "step": 7099 }, { "epoch": 0.19053241734650064, "grad_norm": 0.23828125, "learning_rate": 0.0011430028176573192, "loss": 0.7167, "step": 7100 }, { "epoch": 0.19055925289823958, "grad_norm": 0.228515625, "learning_rate": 0.0011431638266469876, "loss": 0.7024, "step": 7101 }, { "epoch": 0.19058608844997854, "grad_norm": 0.2431640625, "learning_rate": 0.0011433248356366563, "loss": 0.7181, "step": 7102 }, { "epoch": 0.19061292400171748, "grad_norm": 0.26171875, "learning_rate": 0.001143485844626325, "loss": 0.8843, "step": 7103 }, { "epoch": 0.1906397595534564, "grad_norm": 0.267578125, "learning_rate": 0.0011436468536159935, "loss": 0.8772, "step": 7104 }, { "epoch": 0.19066659510519537, "grad_norm": 0.259765625, "learning_rate": 0.0011438078626056622, "loss": 0.8529, "step": 7105 }, { "epoch": 0.1906934306569343, "grad_norm": 0.24609375, "learning_rate": 0.0011439688715953306, "loss": 0.7022, "step": 7106 }, { "epoch": 0.19072026620867324, "grad_norm": 0.25390625, "learning_rate": 0.0011441298805849993, "loss": 0.8816, "step": 7107 }, { "epoch": 0.1907471017604122, "grad_norm": 0.25, "learning_rate": 0.0011442908895746677, "loss": 0.7204, "step": 7108 }, { "epoch": 0.19077393731215114, "grad_norm": 0.251953125, "learning_rate": 0.0011444518985643363, "loss": 0.8506, "step": 7109 }, { "epoch": 0.19080077286389008, "grad_norm": 0.23828125, "learning_rate": 0.001144612907554005, "loss": 0.8284, "step": 7110 }, { "epoch": 0.190827608415629, "grad_norm": 0.251953125, "learning_rate": 0.0011447739165436736, "loss": 0.7946, "step": 7111 }, { "epoch": 0.19085444396736798, "grad_norm": 0.265625, "learning_rate": 0.0011449349255333423, "loss": 0.8353, "step": 7112 }, { "epoch": 0.1908812795191069, "grad_norm": 0.251953125, "learning_rate": 0.0011450959345230107, "loss": 0.786, "step": 7113 }, { "epoch": 0.19090811507084585, "grad_norm": 0.26171875, "learning_rate": 0.0011452569435126793, "loss": 0.8453, "step": 7114 }, { "epoch": 0.1909349506225848, "grad_norm": 0.2412109375, "learning_rate": 0.001145417952502348, "loss": 0.7603, "step": 7115 }, { "epoch": 0.19096178617432374, "grad_norm": 0.251953125, "learning_rate": 0.0011455789614920166, "loss": 0.7655, "step": 7116 }, { "epoch": 0.19098862172606268, "grad_norm": 0.25, "learning_rate": 0.0011457399704816853, "loss": 0.8334, "step": 7117 }, { "epoch": 0.19101545727780164, "grad_norm": 0.236328125, "learning_rate": 0.0011459009794713537, "loss": 0.7849, "step": 7118 }, { "epoch": 0.19104229282954058, "grad_norm": 0.2421875, "learning_rate": 0.0011460619884610223, "loss": 0.7997, "step": 7119 }, { "epoch": 0.1910691283812795, "grad_norm": 0.255859375, "learning_rate": 0.001146222997450691, "loss": 0.8192, "step": 7120 }, { "epoch": 0.19109596393301848, "grad_norm": 0.248046875, "learning_rate": 0.0011463840064403596, "loss": 0.8045, "step": 7121 }, { "epoch": 0.1911227994847574, "grad_norm": 0.2578125, "learning_rate": 0.001146545015430028, "loss": 0.8442, "step": 7122 }, { "epoch": 0.19114963503649635, "grad_norm": 0.224609375, "learning_rate": 0.0011467060244196967, "loss": 0.7531, "step": 7123 }, { "epoch": 0.19117647058823528, "grad_norm": 0.23828125, "learning_rate": 0.0011468670334093653, "loss": 0.8127, "step": 7124 }, { "epoch": 0.19120330613997424, "grad_norm": 0.25390625, "learning_rate": 0.0011470280423990337, "loss": 0.8267, "step": 7125 }, { "epoch": 0.19123014169171318, "grad_norm": 0.263671875, "learning_rate": 0.0011471890513887024, "loss": 0.8573, "step": 7126 }, { "epoch": 0.19125697724345211, "grad_norm": 0.25390625, "learning_rate": 0.001147350060378371, "loss": 0.8848, "step": 7127 }, { "epoch": 0.19128381279519108, "grad_norm": 0.251953125, "learning_rate": 0.0011475110693680397, "loss": 0.83, "step": 7128 }, { "epoch": 0.19131064834693, "grad_norm": 0.255859375, "learning_rate": 0.001147672078357708, "loss": 0.9277, "step": 7129 }, { "epoch": 0.19133748389866895, "grad_norm": 0.236328125, "learning_rate": 0.0011478330873473767, "loss": 0.7598, "step": 7130 }, { "epoch": 0.1913643194504079, "grad_norm": 0.220703125, "learning_rate": 0.0011479940963370454, "loss": 0.6658, "step": 7131 }, { "epoch": 0.19139115500214685, "grad_norm": 0.2392578125, "learning_rate": 0.001148155105326714, "loss": 0.7657, "step": 7132 }, { "epoch": 0.19141799055388578, "grad_norm": 0.2294921875, "learning_rate": 0.0011483161143163827, "loss": 0.7863, "step": 7133 }, { "epoch": 0.19144482610562474, "grad_norm": 0.2353515625, "learning_rate": 0.001148477123306051, "loss": 0.8002, "step": 7134 }, { "epoch": 0.19147166165736368, "grad_norm": 0.267578125, "learning_rate": 0.0011486381322957197, "loss": 0.8413, "step": 7135 }, { "epoch": 0.1914984972091026, "grad_norm": 0.224609375, "learning_rate": 0.0011487991412853884, "loss": 0.6661, "step": 7136 }, { "epoch": 0.19152533276084158, "grad_norm": 0.255859375, "learning_rate": 0.001148960150275057, "loss": 0.8638, "step": 7137 }, { "epoch": 0.1915521683125805, "grad_norm": 0.25, "learning_rate": 0.0011491211592647254, "loss": 0.8542, "step": 7138 }, { "epoch": 0.19157900386431945, "grad_norm": 0.2255859375, "learning_rate": 0.001149282168254394, "loss": 0.6718, "step": 7139 }, { "epoch": 0.19160583941605838, "grad_norm": 0.2451171875, "learning_rate": 0.0011494431772440627, "loss": 0.8481, "step": 7140 }, { "epoch": 0.19163267496779735, "grad_norm": 0.267578125, "learning_rate": 0.0011496041862337312, "loss": 0.8145, "step": 7141 }, { "epoch": 0.19165951051953628, "grad_norm": 0.251953125, "learning_rate": 0.0011497651952233998, "loss": 0.7885, "step": 7142 }, { "epoch": 0.19168634607127522, "grad_norm": 0.251953125, "learning_rate": 0.0011499262042130684, "loss": 0.7058, "step": 7143 }, { "epoch": 0.19171318162301418, "grad_norm": 0.2421875, "learning_rate": 0.001150087213202737, "loss": 0.8206, "step": 7144 }, { "epoch": 0.1917400171747531, "grad_norm": 0.2470703125, "learning_rate": 0.0011502482221924057, "loss": 0.8404, "step": 7145 }, { "epoch": 0.19176685272649205, "grad_norm": 0.27734375, "learning_rate": 0.0011504092311820742, "loss": 0.9405, "step": 7146 }, { "epoch": 0.191793688278231, "grad_norm": 0.265625, "learning_rate": 0.0011505702401717428, "loss": 0.8305, "step": 7147 }, { "epoch": 0.19182052382996995, "grad_norm": 0.248046875, "learning_rate": 0.0011507312491614114, "loss": 0.8193, "step": 7148 }, { "epoch": 0.19184735938170888, "grad_norm": 0.263671875, "learning_rate": 0.00115089225815108, "loss": 0.8428, "step": 7149 }, { "epoch": 0.19187419493344784, "grad_norm": 0.287109375, "learning_rate": 0.0011510532671407487, "loss": 1.0424, "step": 7150 }, { "epoch": 0.19190103048518678, "grad_norm": 0.25390625, "learning_rate": 0.0011512142761304172, "loss": 0.7573, "step": 7151 }, { "epoch": 0.19192786603692572, "grad_norm": 0.236328125, "learning_rate": 0.0011513752851200858, "loss": 0.7022, "step": 7152 }, { "epoch": 0.19195470158866465, "grad_norm": 0.25, "learning_rate": 0.0011515362941097542, "loss": 0.7105, "step": 7153 }, { "epoch": 0.1919815371404036, "grad_norm": 0.244140625, "learning_rate": 0.0011516973030994229, "loss": 0.7244, "step": 7154 }, { "epoch": 0.19200837269214255, "grad_norm": 0.259765625, "learning_rate": 0.0011518583120890915, "loss": 0.7914, "step": 7155 }, { "epoch": 0.19203520824388148, "grad_norm": 0.25390625, "learning_rate": 0.0011520193210787602, "loss": 0.8338, "step": 7156 }, { "epoch": 0.19206204379562045, "grad_norm": 0.2236328125, "learning_rate": 0.0011521803300684288, "loss": 0.714, "step": 7157 }, { "epoch": 0.19208887934735938, "grad_norm": 0.2470703125, "learning_rate": 0.0011523413390580972, "loss": 0.754, "step": 7158 }, { "epoch": 0.19211571489909832, "grad_norm": 0.2578125, "learning_rate": 0.0011525023480477659, "loss": 0.8686, "step": 7159 }, { "epoch": 0.19214255045083728, "grad_norm": 0.232421875, "learning_rate": 0.0011526633570374345, "loss": 0.7523, "step": 7160 }, { "epoch": 0.19216938600257621, "grad_norm": 0.2265625, "learning_rate": 0.0011528243660271032, "loss": 0.7411, "step": 7161 }, { "epoch": 0.19219622155431515, "grad_norm": 0.24609375, "learning_rate": 0.0011529853750167716, "loss": 0.8486, "step": 7162 }, { "epoch": 0.1922230571060541, "grad_norm": 0.2421875, "learning_rate": 0.0011531463840064402, "loss": 0.731, "step": 7163 }, { "epoch": 0.19224989265779305, "grad_norm": 0.228515625, "learning_rate": 0.0011533073929961089, "loss": 0.7269, "step": 7164 }, { "epoch": 0.19227672820953198, "grad_norm": 0.2431640625, "learning_rate": 0.0011534684019857775, "loss": 0.8523, "step": 7165 }, { "epoch": 0.19230356376127092, "grad_norm": 0.265625, "learning_rate": 0.0011536294109754461, "loss": 0.9227, "step": 7166 }, { "epoch": 0.19233039931300988, "grad_norm": 0.251953125, "learning_rate": 0.0011537904199651146, "loss": 0.8082, "step": 7167 }, { "epoch": 0.19235723486474882, "grad_norm": 0.248046875, "learning_rate": 0.0011539514289547832, "loss": 0.8141, "step": 7168 }, { "epoch": 0.19238407041648775, "grad_norm": 0.25390625, "learning_rate": 0.0011541124379444516, "loss": 0.961, "step": 7169 }, { "epoch": 0.19241090596822671, "grad_norm": 0.2353515625, "learning_rate": 0.0011542734469341203, "loss": 0.7888, "step": 7170 }, { "epoch": 0.19243774151996565, "grad_norm": 0.2451171875, "learning_rate": 0.001154434455923789, "loss": 0.8132, "step": 7171 }, { "epoch": 0.19246457707170458, "grad_norm": 0.265625, "learning_rate": 0.0011545954649134576, "loss": 0.9812, "step": 7172 }, { "epoch": 0.19249141262344355, "grad_norm": 0.25390625, "learning_rate": 0.0011547564739031262, "loss": 0.7248, "step": 7173 }, { "epoch": 0.19251824817518248, "grad_norm": 0.251953125, "learning_rate": 0.0011549174828927946, "loss": 0.8586, "step": 7174 }, { "epoch": 0.19254508372692142, "grad_norm": 0.2333984375, "learning_rate": 0.0011550784918824633, "loss": 0.7179, "step": 7175 }, { "epoch": 0.19257191927866038, "grad_norm": 0.2421875, "learning_rate": 0.001155239500872132, "loss": 0.8044, "step": 7176 }, { "epoch": 0.19259875483039932, "grad_norm": 0.25390625, "learning_rate": 0.0011554005098618006, "loss": 0.846, "step": 7177 }, { "epoch": 0.19262559038213825, "grad_norm": 0.267578125, "learning_rate": 0.0011555615188514692, "loss": 0.7893, "step": 7178 }, { "epoch": 0.1926524259338772, "grad_norm": 0.259765625, "learning_rate": 0.0011557225278411376, "loss": 0.7329, "step": 7179 }, { "epoch": 0.19267926148561615, "grad_norm": 0.240234375, "learning_rate": 0.0011558835368308063, "loss": 0.7548, "step": 7180 }, { "epoch": 0.19270609703735508, "grad_norm": 0.2421875, "learning_rate": 0.001156044545820475, "loss": 0.7849, "step": 7181 }, { "epoch": 0.19273293258909402, "grad_norm": 0.25, "learning_rate": 0.0011562055548101436, "loss": 0.7342, "step": 7182 }, { "epoch": 0.19275976814083298, "grad_norm": 0.25390625, "learning_rate": 0.0011563665637998122, "loss": 0.8099, "step": 7183 }, { "epoch": 0.19278660369257192, "grad_norm": 0.2578125, "learning_rate": 0.0011565275727894806, "loss": 0.8301, "step": 7184 }, { "epoch": 0.19281343924431085, "grad_norm": 0.244140625, "learning_rate": 0.0011566885817791493, "loss": 0.7583, "step": 7185 }, { "epoch": 0.19284027479604982, "grad_norm": 0.26171875, "learning_rate": 0.0011568495907688177, "loss": 0.8652, "step": 7186 }, { "epoch": 0.19286711034778875, "grad_norm": 0.2177734375, "learning_rate": 0.0011570105997584863, "loss": 0.6605, "step": 7187 }, { "epoch": 0.19289394589952769, "grad_norm": 0.248046875, "learning_rate": 0.001157171608748155, "loss": 0.7451, "step": 7188 }, { "epoch": 0.19292078145126665, "grad_norm": 0.251953125, "learning_rate": 0.0011573326177378236, "loss": 0.829, "step": 7189 }, { "epoch": 0.19294761700300558, "grad_norm": 0.2890625, "learning_rate": 0.0011574936267274923, "loss": 0.878, "step": 7190 }, { "epoch": 0.19297445255474452, "grad_norm": 0.25390625, "learning_rate": 0.0011576546357171607, "loss": 0.863, "step": 7191 }, { "epoch": 0.19300128810648348, "grad_norm": 0.263671875, "learning_rate": 0.0011578156447068293, "loss": 0.954, "step": 7192 }, { "epoch": 0.19302812365822242, "grad_norm": 0.25, "learning_rate": 0.001157976653696498, "loss": 0.8438, "step": 7193 }, { "epoch": 0.19305495920996135, "grad_norm": 0.2451171875, "learning_rate": 0.0011581376626861666, "loss": 0.7306, "step": 7194 }, { "epoch": 0.1930817947617003, "grad_norm": 0.26953125, "learning_rate": 0.0011582986716758353, "loss": 0.8473, "step": 7195 }, { "epoch": 0.19310863031343925, "grad_norm": 0.265625, "learning_rate": 0.0011584596806655037, "loss": 0.8949, "step": 7196 }, { "epoch": 0.19313546586517819, "grad_norm": 0.251953125, "learning_rate": 0.0011586206896551723, "loss": 0.8662, "step": 7197 }, { "epoch": 0.19316230141691712, "grad_norm": 0.2333984375, "learning_rate": 0.001158781698644841, "loss": 0.719, "step": 7198 }, { "epoch": 0.19318913696865608, "grad_norm": 0.2578125, "learning_rate": 0.0011589427076345094, "loss": 0.885, "step": 7199 }, { "epoch": 0.19321597252039502, "grad_norm": 0.240234375, "learning_rate": 0.001159103716624178, "loss": 0.772, "step": 7200 }, { "epoch": 0.19324280807213395, "grad_norm": 0.27734375, "learning_rate": 0.0011592647256138467, "loss": 0.9614, "step": 7201 }, { "epoch": 0.19326964362387292, "grad_norm": 0.267578125, "learning_rate": 0.0011594257346035151, "loss": 0.8947, "step": 7202 }, { "epoch": 0.19329647917561185, "grad_norm": 0.25, "learning_rate": 0.0011595867435931838, "loss": 0.8537, "step": 7203 }, { "epoch": 0.1933233147273508, "grad_norm": 0.240234375, "learning_rate": 0.0011597477525828524, "loss": 0.7276, "step": 7204 }, { "epoch": 0.19335015027908975, "grad_norm": 0.23828125, "learning_rate": 0.001159908761572521, "loss": 0.7261, "step": 7205 }, { "epoch": 0.19337698583082868, "grad_norm": 0.2490234375, "learning_rate": 0.0011600697705621897, "loss": 0.8117, "step": 7206 }, { "epoch": 0.19340382138256762, "grad_norm": 0.2392578125, "learning_rate": 0.0011602307795518581, "loss": 0.8347, "step": 7207 }, { "epoch": 0.19343065693430658, "grad_norm": 0.25390625, "learning_rate": 0.0011603917885415268, "loss": 0.6779, "step": 7208 }, { "epoch": 0.19345749248604552, "grad_norm": 0.267578125, "learning_rate": 0.0011605527975311954, "loss": 0.7773, "step": 7209 }, { "epoch": 0.19348432803778445, "grad_norm": 0.25, "learning_rate": 0.001160713806520864, "loss": 0.8141, "step": 7210 }, { "epoch": 0.1935111635895234, "grad_norm": 0.259765625, "learning_rate": 0.0011608748155105327, "loss": 0.8214, "step": 7211 }, { "epoch": 0.19353799914126235, "grad_norm": 0.23828125, "learning_rate": 0.0011610358245002011, "loss": 0.8467, "step": 7212 }, { "epoch": 0.1935648346930013, "grad_norm": 0.248046875, "learning_rate": 0.0011611968334898698, "loss": 0.8137, "step": 7213 }, { "epoch": 0.19359167024474022, "grad_norm": 0.2470703125, "learning_rate": 0.0011613578424795384, "loss": 0.8198, "step": 7214 }, { "epoch": 0.19361850579647918, "grad_norm": 0.251953125, "learning_rate": 0.0011615188514692068, "loss": 0.8076, "step": 7215 }, { "epoch": 0.19364534134821812, "grad_norm": 0.25390625, "learning_rate": 0.0011616798604588755, "loss": 0.7513, "step": 7216 }, { "epoch": 0.19367217689995705, "grad_norm": 0.244140625, "learning_rate": 0.0011618408694485441, "loss": 0.8085, "step": 7217 }, { "epoch": 0.19369901245169602, "grad_norm": 0.259765625, "learning_rate": 0.0011620018784382128, "loss": 0.8392, "step": 7218 }, { "epoch": 0.19372584800343495, "grad_norm": 0.27734375, "learning_rate": 0.0011621628874278812, "loss": 0.8013, "step": 7219 }, { "epoch": 0.1937526835551739, "grad_norm": 0.26953125, "learning_rate": 0.0011623238964175498, "loss": 0.8324, "step": 7220 }, { "epoch": 0.19377951910691285, "grad_norm": 0.248046875, "learning_rate": 0.0011624849054072185, "loss": 0.7903, "step": 7221 }, { "epoch": 0.19380635465865179, "grad_norm": 0.2431640625, "learning_rate": 0.0011626459143968871, "loss": 0.7232, "step": 7222 }, { "epoch": 0.19383319021039072, "grad_norm": 0.24609375, "learning_rate": 0.0011628069233865558, "loss": 0.8605, "step": 7223 }, { "epoch": 0.19386002576212966, "grad_norm": 0.26953125, "learning_rate": 0.0011629679323762242, "loss": 0.8205, "step": 7224 }, { "epoch": 0.19388686131386862, "grad_norm": 0.244140625, "learning_rate": 0.0011631289413658928, "loss": 0.7226, "step": 7225 }, { "epoch": 0.19391369686560755, "grad_norm": 0.236328125, "learning_rate": 0.0011632899503555615, "loss": 0.8646, "step": 7226 }, { "epoch": 0.1939405324173465, "grad_norm": 0.2373046875, "learning_rate": 0.0011634509593452301, "loss": 0.8083, "step": 7227 }, { "epoch": 0.19396736796908545, "grad_norm": 0.2470703125, "learning_rate": 0.0011636119683348988, "loss": 0.8283, "step": 7228 }, { "epoch": 0.1939942035208244, "grad_norm": 0.271484375, "learning_rate": 0.0011637729773245672, "loss": 0.8826, "step": 7229 }, { "epoch": 0.19402103907256332, "grad_norm": 0.251953125, "learning_rate": 0.0011639339863142356, "loss": 0.7836, "step": 7230 }, { "epoch": 0.19404787462430229, "grad_norm": 0.251953125, "learning_rate": 0.0011640949953039042, "loss": 0.8353, "step": 7231 }, { "epoch": 0.19407471017604122, "grad_norm": 0.24609375, "learning_rate": 0.0011642560042935729, "loss": 0.7906, "step": 7232 }, { "epoch": 0.19410154572778016, "grad_norm": 0.2578125, "learning_rate": 0.0011644170132832415, "loss": 0.8429, "step": 7233 }, { "epoch": 0.19412838127951912, "grad_norm": 0.26953125, "learning_rate": 0.0011645780222729102, "loss": 0.7641, "step": 7234 }, { "epoch": 0.19415521683125805, "grad_norm": 0.2392578125, "learning_rate": 0.0011647390312625786, "loss": 0.7389, "step": 7235 }, { "epoch": 0.194182052382997, "grad_norm": 0.2421875, "learning_rate": 0.0011649000402522472, "loss": 0.7596, "step": 7236 }, { "epoch": 0.19420888793473595, "grad_norm": 0.2353515625, "learning_rate": 0.0011650610492419159, "loss": 0.7634, "step": 7237 }, { "epoch": 0.1942357234864749, "grad_norm": 0.24609375, "learning_rate": 0.0011652220582315845, "loss": 0.8063, "step": 7238 }, { "epoch": 0.19426255903821382, "grad_norm": 0.259765625, "learning_rate": 0.0011653830672212532, "loss": 0.8112, "step": 7239 }, { "epoch": 0.19428939458995276, "grad_norm": 0.2412109375, "learning_rate": 0.0011655440762109216, "loss": 0.7708, "step": 7240 }, { "epoch": 0.19431623014169172, "grad_norm": 0.2333984375, "learning_rate": 0.0011657050852005902, "loss": 0.7262, "step": 7241 }, { "epoch": 0.19434306569343066, "grad_norm": 0.216796875, "learning_rate": 0.0011658660941902589, "loss": 0.6812, "step": 7242 }, { "epoch": 0.1943699012451696, "grad_norm": 0.2333984375, "learning_rate": 0.0011660271031799275, "loss": 0.7251, "step": 7243 }, { "epoch": 0.19439673679690855, "grad_norm": 0.240234375, "learning_rate": 0.0011661881121695962, "loss": 0.7278, "step": 7244 }, { "epoch": 0.1944235723486475, "grad_norm": 0.2451171875, "learning_rate": 0.0011663491211592646, "loss": 0.8639, "step": 7245 }, { "epoch": 0.19445040790038642, "grad_norm": 0.2470703125, "learning_rate": 0.0011665101301489332, "loss": 0.7502, "step": 7246 }, { "epoch": 0.1944772434521254, "grad_norm": 0.275390625, "learning_rate": 0.0011666711391386017, "loss": 0.9098, "step": 7247 }, { "epoch": 0.19450407900386432, "grad_norm": 0.240234375, "learning_rate": 0.0011668321481282703, "loss": 0.7338, "step": 7248 }, { "epoch": 0.19453091455560326, "grad_norm": 0.23828125, "learning_rate": 0.001166993157117939, "loss": 0.8154, "step": 7249 }, { "epoch": 0.19455775010734222, "grad_norm": 0.2490234375, "learning_rate": 0.0011671541661076076, "loss": 0.79, "step": 7250 }, { "epoch": 0.19458458565908116, "grad_norm": 0.2373046875, "learning_rate": 0.0011673151750972762, "loss": 0.7171, "step": 7251 }, { "epoch": 0.1946114212108201, "grad_norm": 0.27734375, "learning_rate": 0.0011674761840869447, "loss": 0.8123, "step": 7252 }, { "epoch": 0.19463825676255903, "grad_norm": 0.248046875, "learning_rate": 0.0011676371930766133, "loss": 0.8275, "step": 7253 }, { "epoch": 0.194665092314298, "grad_norm": 0.259765625, "learning_rate": 0.001167798202066282, "loss": 0.8644, "step": 7254 }, { "epoch": 0.19469192786603692, "grad_norm": 0.2392578125, "learning_rate": 0.0011679592110559506, "loss": 0.7565, "step": 7255 }, { "epoch": 0.19471876341777586, "grad_norm": 0.2412109375, "learning_rate": 0.0011681202200456192, "loss": 0.7779, "step": 7256 }, { "epoch": 0.19474559896951482, "grad_norm": 0.240234375, "learning_rate": 0.0011682812290352877, "loss": 0.7649, "step": 7257 }, { "epoch": 0.19477243452125376, "grad_norm": 0.2470703125, "learning_rate": 0.0011684422380249563, "loss": 0.8502, "step": 7258 }, { "epoch": 0.1947992700729927, "grad_norm": 0.251953125, "learning_rate": 0.001168603247014625, "loss": 0.7889, "step": 7259 }, { "epoch": 0.19482610562473165, "grad_norm": 0.234375, "learning_rate": 0.0011687642560042936, "loss": 0.7685, "step": 7260 }, { "epoch": 0.1948529411764706, "grad_norm": 0.234375, "learning_rate": 0.001168925264993962, "loss": 0.7299, "step": 7261 }, { "epoch": 0.19487977672820953, "grad_norm": 0.251953125, "learning_rate": 0.0011690862739836307, "loss": 0.8186, "step": 7262 }, { "epoch": 0.1949066122799485, "grad_norm": 0.2490234375, "learning_rate": 0.0011692472829732993, "loss": 0.8359, "step": 7263 }, { "epoch": 0.19493344783168742, "grad_norm": 0.275390625, "learning_rate": 0.0011694082919629677, "loss": 0.8332, "step": 7264 }, { "epoch": 0.19496028338342636, "grad_norm": 0.23828125, "learning_rate": 0.0011695693009526364, "loss": 0.7631, "step": 7265 }, { "epoch": 0.1949871189351653, "grad_norm": 0.255859375, "learning_rate": 0.001169730309942305, "loss": 0.8515, "step": 7266 }, { "epoch": 0.19501395448690426, "grad_norm": 0.240234375, "learning_rate": 0.0011698913189319737, "loss": 0.8753, "step": 7267 }, { "epoch": 0.1950407900386432, "grad_norm": 0.248046875, "learning_rate": 0.001170052327921642, "loss": 0.8108, "step": 7268 }, { "epoch": 0.19506762559038213, "grad_norm": 0.263671875, "learning_rate": 0.0011702133369113107, "loss": 0.8292, "step": 7269 }, { "epoch": 0.1950944611421211, "grad_norm": 0.244140625, "learning_rate": 0.0011703743459009794, "loss": 0.7518, "step": 7270 }, { "epoch": 0.19512129669386002, "grad_norm": 0.259765625, "learning_rate": 0.001170535354890648, "loss": 0.794, "step": 7271 }, { "epoch": 0.19514813224559896, "grad_norm": 0.265625, "learning_rate": 0.0011706963638803167, "loss": 0.8428, "step": 7272 }, { "epoch": 0.19517496779733792, "grad_norm": 0.25390625, "learning_rate": 0.001170857372869985, "loss": 0.859, "step": 7273 }, { "epoch": 0.19520180334907686, "grad_norm": 0.2412109375, "learning_rate": 0.0011710183818596537, "loss": 0.8145, "step": 7274 }, { "epoch": 0.1952286389008158, "grad_norm": 0.271484375, "learning_rate": 0.0011711793908493224, "loss": 0.8486, "step": 7275 }, { "epoch": 0.19525547445255476, "grad_norm": 0.251953125, "learning_rate": 0.001171340399838991, "loss": 0.8531, "step": 7276 }, { "epoch": 0.1952823100042937, "grad_norm": 0.232421875, "learning_rate": 0.0011715014088286594, "loss": 0.6836, "step": 7277 }, { "epoch": 0.19530914555603263, "grad_norm": 0.2421875, "learning_rate": 0.001171662417818328, "loss": 0.7133, "step": 7278 }, { "epoch": 0.1953359811077716, "grad_norm": 0.240234375, "learning_rate": 0.0011718234268079967, "loss": 0.7776, "step": 7279 }, { "epoch": 0.19536281665951052, "grad_norm": 0.251953125, "learning_rate": 0.0011719844357976651, "loss": 0.8775, "step": 7280 }, { "epoch": 0.19538965221124946, "grad_norm": 0.265625, "learning_rate": 0.0011721454447873338, "loss": 0.8974, "step": 7281 }, { "epoch": 0.1954164877629884, "grad_norm": 0.23828125, "learning_rate": 0.0011723064537770024, "loss": 0.696, "step": 7282 }, { "epoch": 0.19544332331472736, "grad_norm": 0.2236328125, "learning_rate": 0.001172467462766671, "loss": 0.6707, "step": 7283 }, { "epoch": 0.1954701588664663, "grad_norm": 0.2431640625, "learning_rate": 0.0011726284717563397, "loss": 0.7457, "step": 7284 }, { "epoch": 0.19549699441820523, "grad_norm": 0.244140625, "learning_rate": 0.0011727894807460081, "loss": 0.8413, "step": 7285 }, { "epoch": 0.1955238299699442, "grad_norm": 0.2470703125, "learning_rate": 0.0011729504897356768, "loss": 0.7425, "step": 7286 }, { "epoch": 0.19555066552168313, "grad_norm": 0.2734375, "learning_rate": 0.0011731114987253454, "loss": 0.8906, "step": 7287 }, { "epoch": 0.19557750107342206, "grad_norm": 0.2275390625, "learning_rate": 0.001173272507715014, "loss": 0.7214, "step": 7288 }, { "epoch": 0.19560433662516102, "grad_norm": 0.2373046875, "learning_rate": 0.0011734335167046827, "loss": 0.7208, "step": 7289 }, { "epoch": 0.19563117217689996, "grad_norm": 0.263671875, "learning_rate": 0.0011735945256943511, "loss": 0.8999, "step": 7290 }, { "epoch": 0.1956580077286389, "grad_norm": 0.2431640625, "learning_rate": 0.0011737555346840198, "loss": 0.6619, "step": 7291 }, { "epoch": 0.19568484328037786, "grad_norm": 0.2578125, "learning_rate": 0.0011739165436736882, "loss": 0.8584, "step": 7292 }, { "epoch": 0.1957116788321168, "grad_norm": 0.2470703125, "learning_rate": 0.0011740775526633569, "loss": 0.7681, "step": 7293 }, { "epoch": 0.19573851438385573, "grad_norm": 0.21484375, "learning_rate": 0.0011742385616530255, "loss": 0.6755, "step": 7294 }, { "epoch": 0.19576534993559466, "grad_norm": 0.25390625, "learning_rate": 0.0011743995706426941, "loss": 0.8097, "step": 7295 }, { "epoch": 0.19579218548733363, "grad_norm": 0.267578125, "learning_rate": 0.0011745605796323628, "loss": 0.9353, "step": 7296 }, { "epoch": 0.19581902103907256, "grad_norm": 0.2431640625, "learning_rate": 0.0011747215886220312, "loss": 0.7641, "step": 7297 }, { "epoch": 0.1958458565908115, "grad_norm": 0.2353515625, "learning_rate": 0.0011748825976116998, "loss": 0.7738, "step": 7298 }, { "epoch": 0.19587269214255046, "grad_norm": 0.2412109375, "learning_rate": 0.0011750436066013685, "loss": 0.8343, "step": 7299 }, { "epoch": 0.1958995276942894, "grad_norm": 0.244140625, "learning_rate": 0.0011752046155910371, "loss": 0.86, "step": 7300 }, { "epoch": 0.19592636324602833, "grad_norm": 0.2294921875, "learning_rate": 0.0011753656245807056, "loss": 0.6888, "step": 7301 }, { "epoch": 0.1959531987977673, "grad_norm": 0.2451171875, "learning_rate": 0.0011755266335703742, "loss": 0.7914, "step": 7302 }, { "epoch": 0.19598003434950623, "grad_norm": 0.251953125, "learning_rate": 0.0011756876425600428, "loss": 0.8169, "step": 7303 }, { "epoch": 0.19600686990124516, "grad_norm": 0.2353515625, "learning_rate": 0.0011758486515497115, "loss": 0.7732, "step": 7304 }, { "epoch": 0.19603370545298412, "grad_norm": 0.2197265625, "learning_rate": 0.0011760096605393801, "loss": 0.6982, "step": 7305 }, { "epoch": 0.19606054100472306, "grad_norm": 0.259765625, "learning_rate": 0.0011761706695290486, "loss": 0.8838, "step": 7306 }, { "epoch": 0.196087376556462, "grad_norm": 0.2294921875, "learning_rate": 0.0011763316785187172, "loss": 0.7273, "step": 7307 }, { "epoch": 0.19611421210820096, "grad_norm": 0.25, "learning_rate": 0.0011764926875083856, "loss": 0.8388, "step": 7308 }, { "epoch": 0.1961410476599399, "grad_norm": 0.255859375, "learning_rate": 0.0011766536964980543, "loss": 0.8526, "step": 7309 }, { "epoch": 0.19616788321167883, "grad_norm": 0.2412109375, "learning_rate": 0.001176814705487723, "loss": 0.7777, "step": 7310 }, { "epoch": 0.19619471876341776, "grad_norm": 0.255859375, "learning_rate": 0.0011769757144773916, "loss": 0.8567, "step": 7311 }, { "epoch": 0.19622155431515673, "grad_norm": 0.255859375, "learning_rate": 0.0011771367234670602, "loss": 0.8641, "step": 7312 }, { "epoch": 0.19624838986689566, "grad_norm": 0.2451171875, "learning_rate": 0.0011772977324567286, "loss": 0.7703, "step": 7313 }, { "epoch": 0.1962752254186346, "grad_norm": 0.25, "learning_rate": 0.0011774587414463973, "loss": 0.8449, "step": 7314 }, { "epoch": 0.19630206097037356, "grad_norm": 0.263671875, "learning_rate": 0.001177619750436066, "loss": 0.7914, "step": 7315 }, { "epoch": 0.1963288965221125, "grad_norm": 0.244140625, "learning_rate": 0.0011777807594257346, "loss": 0.8702, "step": 7316 }, { "epoch": 0.19635573207385143, "grad_norm": 0.255859375, "learning_rate": 0.0011779417684154032, "loss": 0.8893, "step": 7317 }, { "epoch": 0.1963825676255904, "grad_norm": 0.2392578125, "learning_rate": 0.0011781027774050716, "loss": 0.8361, "step": 7318 }, { "epoch": 0.19640940317732933, "grad_norm": 0.25390625, "learning_rate": 0.0011782637863947403, "loss": 0.7772, "step": 7319 }, { "epoch": 0.19643623872906826, "grad_norm": 0.275390625, "learning_rate": 0.001178424795384409, "loss": 0.8407, "step": 7320 }, { "epoch": 0.19646307428080723, "grad_norm": 0.2578125, "learning_rate": 0.0011785858043740776, "loss": 0.7875, "step": 7321 }, { "epoch": 0.19648990983254616, "grad_norm": 0.2490234375, "learning_rate": 0.0011787468133637462, "loss": 0.8021, "step": 7322 }, { "epoch": 0.1965167453842851, "grad_norm": 0.2578125, "learning_rate": 0.0011789078223534146, "loss": 0.9045, "step": 7323 }, { "epoch": 0.19654358093602403, "grad_norm": 0.267578125, "learning_rate": 0.0011790688313430833, "loss": 0.7932, "step": 7324 }, { "epoch": 0.196570416487763, "grad_norm": 0.2412109375, "learning_rate": 0.0011792298403327517, "loss": 0.754, "step": 7325 }, { "epoch": 0.19659725203950193, "grad_norm": 0.2490234375, "learning_rate": 0.0011793908493224203, "loss": 0.8154, "step": 7326 }, { "epoch": 0.19662408759124086, "grad_norm": 0.234375, "learning_rate": 0.001179551858312089, "loss": 0.7516, "step": 7327 }, { "epoch": 0.19665092314297983, "grad_norm": 0.2421875, "learning_rate": 0.0011797128673017576, "loss": 0.646, "step": 7328 }, { "epoch": 0.19667775869471876, "grad_norm": 0.228515625, "learning_rate": 0.0011798738762914263, "loss": 0.7098, "step": 7329 }, { "epoch": 0.1967045942464577, "grad_norm": 0.2421875, "learning_rate": 0.0011800348852810947, "loss": 0.8091, "step": 7330 }, { "epoch": 0.19673142979819666, "grad_norm": 0.2451171875, "learning_rate": 0.0011801958942707633, "loss": 0.7276, "step": 7331 }, { "epoch": 0.1967582653499356, "grad_norm": 0.26171875, "learning_rate": 0.001180356903260432, "loss": 0.8199, "step": 7332 }, { "epoch": 0.19678510090167453, "grad_norm": 0.2470703125, "learning_rate": 0.0011805179122501006, "loss": 0.8604, "step": 7333 }, { "epoch": 0.1968119364534135, "grad_norm": 0.244140625, "learning_rate": 0.001180678921239769, "loss": 0.8625, "step": 7334 }, { "epoch": 0.19683877200515243, "grad_norm": 0.2431640625, "learning_rate": 0.0011808399302294377, "loss": 0.8126, "step": 7335 }, { "epoch": 0.19686560755689136, "grad_norm": 0.2490234375, "learning_rate": 0.0011810009392191063, "loss": 0.7666, "step": 7336 }, { "epoch": 0.19689244310863033, "grad_norm": 0.234375, "learning_rate": 0.001181161948208775, "loss": 0.7615, "step": 7337 }, { "epoch": 0.19691927866036926, "grad_norm": 0.267578125, "learning_rate": 0.0011813229571984436, "loss": 0.896, "step": 7338 }, { "epoch": 0.1969461142121082, "grad_norm": 0.2421875, "learning_rate": 0.001181483966188112, "loss": 0.7308, "step": 7339 }, { "epoch": 0.19697294976384713, "grad_norm": 0.265625, "learning_rate": 0.0011816449751777807, "loss": 0.8893, "step": 7340 }, { "epoch": 0.1969997853155861, "grad_norm": 0.251953125, "learning_rate": 0.001181805984167449, "loss": 0.9115, "step": 7341 }, { "epoch": 0.19702662086732503, "grad_norm": 0.2392578125, "learning_rate": 0.0011819669931571177, "loss": 0.7705, "step": 7342 }, { "epoch": 0.19705345641906397, "grad_norm": 0.236328125, "learning_rate": 0.0011821280021467864, "loss": 0.7168, "step": 7343 }, { "epoch": 0.19708029197080293, "grad_norm": 0.21484375, "learning_rate": 0.001182289011136455, "loss": 0.637, "step": 7344 }, { "epoch": 0.19710712752254186, "grad_norm": 0.25390625, "learning_rate": 0.0011824500201261237, "loss": 0.874, "step": 7345 }, { "epoch": 0.1971339630742808, "grad_norm": 0.2412109375, "learning_rate": 0.001182611029115792, "loss": 0.7843, "step": 7346 }, { "epoch": 0.19716079862601976, "grad_norm": 0.2431640625, "learning_rate": 0.0011827720381054607, "loss": 0.8145, "step": 7347 }, { "epoch": 0.1971876341777587, "grad_norm": 0.236328125, "learning_rate": 0.0011829330470951294, "loss": 0.7383, "step": 7348 }, { "epoch": 0.19721446972949763, "grad_norm": 0.2373046875, "learning_rate": 0.001183094056084798, "loss": 0.7536, "step": 7349 }, { "epoch": 0.1972413052812366, "grad_norm": 0.244140625, "learning_rate": 0.0011832550650744667, "loss": 0.7968, "step": 7350 }, { "epoch": 0.19726814083297553, "grad_norm": 0.2431640625, "learning_rate": 0.001183416074064135, "loss": 0.7871, "step": 7351 }, { "epoch": 0.19729497638471447, "grad_norm": 0.2578125, "learning_rate": 0.0011835770830538037, "loss": 0.868, "step": 7352 }, { "epoch": 0.1973218119364534, "grad_norm": 0.26171875, "learning_rate": 0.0011837380920434724, "loss": 0.6878, "step": 7353 }, { "epoch": 0.19734864748819236, "grad_norm": 0.248046875, "learning_rate": 0.0011838991010331408, "loss": 0.7881, "step": 7354 }, { "epoch": 0.1973754830399313, "grad_norm": 0.275390625, "learning_rate": 0.0011840601100228095, "loss": 0.9713, "step": 7355 }, { "epoch": 0.19740231859167023, "grad_norm": 0.2392578125, "learning_rate": 0.001184221119012478, "loss": 0.8397, "step": 7356 }, { "epoch": 0.1974291541434092, "grad_norm": 0.2451171875, "learning_rate": 0.0011843821280021467, "loss": 0.8093, "step": 7357 }, { "epoch": 0.19745598969514813, "grad_norm": 0.267578125, "learning_rate": 0.0011845431369918152, "loss": 0.8816, "step": 7358 }, { "epoch": 0.19748282524688707, "grad_norm": 0.263671875, "learning_rate": 0.0011847041459814838, "loss": 0.8802, "step": 7359 }, { "epoch": 0.19750966079862603, "grad_norm": 0.25, "learning_rate": 0.0011848651549711525, "loss": 0.7594, "step": 7360 }, { "epoch": 0.19753649635036497, "grad_norm": 0.2431640625, "learning_rate": 0.001185026163960821, "loss": 0.7428, "step": 7361 }, { "epoch": 0.1975633319021039, "grad_norm": 0.2451171875, "learning_rate": 0.0011851871729504897, "loss": 0.7647, "step": 7362 }, { "epoch": 0.19759016745384286, "grad_norm": 0.259765625, "learning_rate": 0.0011853481819401582, "loss": 0.8964, "step": 7363 }, { "epoch": 0.1976170030055818, "grad_norm": 0.2578125, "learning_rate": 0.0011855091909298268, "loss": 0.9005, "step": 7364 }, { "epoch": 0.19764383855732073, "grad_norm": 0.263671875, "learning_rate": 0.0011856701999194955, "loss": 0.917, "step": 7365 }, { "epoch": 0.19767067410905967, "grad_norm": 0.2294921875, "learning_rate": 0.001185831208909164, "loss": 0.6834, "step": 7366 }, { "epoch": 0.19769750966079863, "grad_norm": 0.240234375, "learning_rate": 0.0011859922178988327, "loss": 0.8328, "step": 7367 }, { "epoch": 0.19772434521253757, "grad_norm": 0.251953125, "learning_rate": 0.0011861532268885012, "loss": 0.7958, "step": 7368 }, { "epoch": 0.1977511807642765, "grad_norm": 0.25390625, "learning_rate": 0.0011863142358781696, "loss": 0.8188, "step": 7369 }, { "epoch": 0.19777801631601546, "grad_norm": 0.240234375, "learning_rate": 0.0011864752448678382, "loss": 0.7349, "step": 7370 }, { "epoch": 0.1978048518677544, "grad_norm": 0.251953125, "learning_rate": 0.0011866362538575069, "loss": 0.7115, "step": 7371 }, { "epoch": 0.19783168741949334, "grad_norm": 0.255859375, "learning_rate": 0.0011867972628471755, "loss": 0.8039, "step": 7372 }, { "epoch": 0.1978585229712323, "grad_norm": 0.275390625, "learning_rate": 0.0011869582718368442, "loss": 0.8496, "step": 7373 }, { "epoch": 0.19788535852297123, "grad_norm": 0.279296875, "learning_rate": 0.0011871192808265126, "loss": 0.8468, "step": 7374 }, { "epoch": 0.19791219407471017, "grad_norm": 0.263671875, "learning_rate": 0.0011872802898161812, "loss": 0.7508, "step": 7375 }, { "epoch": 0.19793902962644913, "grad_norm": 0.2578125, "learning_rate": 0.0011874412988058499, "loss": 0.91, "step": 7376 }, { "epoch": 0.19796586517818807, "grad_norm": 0.2197265625, "learning_rate": 0.0011876023077955185, "loss": 0.634, "step": 7377 }, { "epoch": 0.197992700729927, "grad_norm": 0.2373046875, "learning_rate": 0.0011877633167851872, "loss": 0.7546, "step": 7378 }, { "epoch": 0.19801953628166596, "grad_norm": 0.24609375, "learning_rate": 0.0011879243257748556, "loss": 0.7846, "step": 7379 }, { "epoch": 0.1980463718334049, "grad_norm": 0.25390625, "learning_rate": 0.0011880853347645242, "loss": 0.791, "step": 7380 }, { "epoch": 0.19807320738514383, "grad_norm": 0.255859375, "learning_rate": 0.0011882463437541929, "loss": 0.8565, "step": 7381 }, { "epoch": 0.19810004293688277, "grad_norm": 0.255859375, "learning_rate": 0.0011884073527438615, "loss": 0.8353, "step": 7382 }, { "epoch": 0.19812687848862173, "grad_norm": 0.24609375, "learning_rate": 0.0011885683617335302, "loss": 0.8373, "step": 7383 }, { "epoch": 0.19815371404036067, "grad_norm": 0.2412109375, "learning_rate": 0.0011887293707231986, "loss": 0.7676, "step": 7384 }, { "epoch": 0.1981805495920996, "grad_norm": 0.26171875, "learning_rate": 0.0011888903797128672, "loss": 0.9047, "step": 7385 }, { "epoch": 0.19820738514383857, "grad_norm": 0.2255859375, "learning_rate": 0.0011890513887025357, "loss": 0.7324, "step": 7386 }, { "epoch": 0.1982342206955775, "grad_norm": 0.25390625, "learning_rate": 0.0011892123976922043, "loss": 0.858, "step": 7387 }, { "epoch": 0.19826105624731644, "grad_norm": 0.263671875, "learning_rate": 0.001189373406681873, "loss": 0.9251, "step": 7388 }, { "epoch": 0.1982878917990554, "grad_norm": 0.228515625, "learning_rate": 0.0011895344156715416, "loss": 0.6763, "step": 7389 }, { "epoch": 0.19831472735079433, "grad_norm": 0.24609375, "learning_rate": 0.0011896954246612102, "loss": 0.7965, "step": 7390 }, { "epoch": 0.19834156290253327, "grad_norm": 0.2392578125, "learning_rate": 0.0011898564336508786, "loss": 0.787, "step": 7391 }, { "epoch": 0.19836839845427223, "grad_norm": 0.255859375, "learning_rate": 0.0011900174426405473, "loss": 0.8328, "step": 7392 }, { "epoch": 0.19839523400601117, "grad_norm": 0.25390625, "learning_rate": 0.001190178451630216, "loss": 0.8163, "step": 7393 }, { "epoch": 0.1984220695577501, "grad_norm": 0.25, "learning_rate": 0.0011903394606198846, "loss": 0.8085, "step": 7394 }, { "epoch": 0.19844890510948904, "grad_norm": 0.2392578125, "learning_rate": 0.0011905004696095532, "loss": 0.7655, "step": 7395 }, { "epoch": 0.198475740661228, "grad_norm": 0.259765625, "learning_rate": 0.0011906614785992216, "loss": 0.9184, "step": 7396 }, { "epoch": 0.19850257621296694, "grad_norm": 0.255859375, "learning_rate": 0.0011908224875888903, "loss": 0.8867, "step": 7397 }, { "epoch": 0.19852941176470587, "grad_norm": 0.255859375, "learning_rate": 0.001190983496578559, "loss": 0.8537, "step": 7398 }, { "epoch": 0.19855624731644483, "grad_norm": 0.2451171875, "learning_rate": 0.0011911445055682276, "loss": 0.8023, "step": 7399 }, { "epoch": 0.19858308286818377, "grad_norm": 0.2578125, "learning_rate": 0.001191305514557896, "loss": 0.8522, "step": 7400 }, { "epoch": 0.1986099184199227, "grad_norm": 0.248046875, "learning_rate": 0.0011914665235475646, "loss": 0.8369, "step": 7401 }, { "epoch": 0.19863675397166167, "grad_norm": 0.24609375, "learning_rate": 0.001191627532537233, "loss": 0.8298, "step": 7402 }, { "epoch": 0.1986635895234006, "grad_norm": 0.255859375, "learning_rate": 0.0011917885415269017, "loss": 0.751, "step": 7403 }, { "epoch": 0.19869042507513954, "grad_norm": 0.23046875, "learning_rate": 0.0011919495505165704, "loss": 0.7115, "step": 7404 }, { "epoch": 0.1987172606268785, "grad_norm": 0.251953125, "learning_rate": 0.001192110559506239, "loss": 0.8354, "step": 7405 }, { "epoch": 0.19874409617861744, "grad_norm": 0.21484375, "learning_rate": 0.0011922715684959076, "loss": 0.6294, "step": 7406 }, { "epoch": 0.19877093173035637, "grad_norm": 0.2392578125, "learning_rate": 0.001192432577485576, "loss": 0.7753, "step": 7407 }, { "epoch": 0.19879776728209533, "grad_norm": 0.23828125, "learning_rate": 0.0011925935864752447, "loss": 0.7165, "step": 7408 }, { "epoch": 0.19882460283383427, "grad_norm": 0.2373046875, "learning_rate": 0.0011927545954649134, "loss": 0.7329, "step": 7409 }, { "epoch": 0.1988514383855732, "grad_norm": 0.244140625, "learning_rate": 0.001192915604454582, "loss": 0.7598, "step": 7410 }, { "epoch": 0.19887827393731214, "grad_norm": 0.23828125, "learning_rate": 0.0011930766134442506, "loss": 0.7365, "step": 7411 }, { "epoch": 0.1989051094890511, "grad_norm": 0.2373046875, "learning_rate": 0.001193237622433919, "loss": 0.7764, "step": 7412 }, { "epoch": 0.19893194504079004, "grad_norm": 0.265625, "learning_rate": 0.0011933986314235877, "loss": 0.8584, "step": 7413 }, { "epoch": 0.19895878059252897, "grad_norm": 0.2412109375, "learning_rate": 0.0011935596404132563, "loss": 0.7942, "step": 7414 }, { "epoch": 0.19898561614426793, "grad_norm": 0.248046875, "learning_rate": 0.001193720649402925, "loss": 0.7679, "step": 7415 }, { "epoch": 0.19901245169600687, "grad_norm": 0.2412109375, "learning_rate": 0.0011938816583925934, "loss": 0.8004, "step": 7416 }, { "epoch": 0.1990392872477458, "grad_norm": 0.244140625, "learning_rate": 0.001194042667382262, "loss": 0.8444, "step": 7417 }, { "epoch": 0.19906612279948477, "grad_norm": 0.2734375, "learning_rate": 0.0011942036763719307, "loss": 0.8734, "step": 7418 }, { "epoch": 0.1990929583512237, "grad_norm": 0.23046875, "learning_rate": 0.0011943646853615991, "loss": 0.7161, "step": 7419 }, { "epoch": 0.19911979390296264, "grad_norm": 0.251953125, "learning_rate": 0.0011945256943512678, "loss": 0.7976, "step": 7420 }, { "epoch": 0.1991466294547016, "grad_norm": 0.2392578125, "learning_rate": 0.0011946867033409364, "loss": 0.7262, "step": 7421 }, { "epoch": 0.19917346500644054, "grad_norm": 0.251953125, "learning_rate": 0.001194847712330605, "loss": 0.8307, "step": 7422 }, { "epoch": 0.19920030055817947, "grad_norm": 0.251953125, "learning_rate": 0.0011950087213202737, "loss": 0.7322, "step": 7423 }, { "epoch": 0.1992271361099184, "grad_norm": 0.2451171875, "learning_rate": 0.0011951697303099421, "loss": 0.7859, "step": 7424 }, { "epoch": 0.19925397166165737, "grad_norm": 0.24609375, "learning_rate": 0.0011953307392996108, "loss": 0.8001, "step": 7425 }, { "epoch": 0.1992808072133963, "grad_norm": 0.2490234375, "learning_rate": 0.0011954917482892794, "loss": 0.8515, "step": 7426 }, { "epoch": 0.19930764276513524, "grad_norm": 0.251953125, "learning_rate": 0.001195652757278948, "loss": 0.7972, "step": 7427 }, { "epoch": 0.1993344783168742, "grad_norm": 0.267578125, "learning_rate": 0.0011958137662686167, "loss": 0.8626, "step": 7428 }, { "epoch": 0.19936131386861314, "grad_norm": 0.267578125, "learning_rate": 0.0011959747752582851, "loss": 0.8561, "step": 7429 }, { "epoch": 0.19938814942035207, "grad_norm": 0.2734375, "learning_rate": 0.0011961357842479538, "loss": 0.9275, "step": 7430 }, { "epoch": 0.19941498497209104, "grad_norm": 0.2353515625, "learning_rate": 0.0011962967932376222, "loss": 0.7743, "step": 7431 }, { "epoch": 0.19944182052382997, "grad_norm": 0.23828125, "learning_rate": 0.0011964578022272908, "loss": 0.7718, "step": 7432 }, { "epoch": 0.1994686560755689, "grad_norm": 0.265625, "learning_rate": 0.0011966188112169595, "loss": 0.9454, "step": 7433 }, { "epoch": 0.19949549162730787, "grad_norm": 0.2490234375, "learning_rate": 0.0011967798202066281, "loss": 0.8046, "step": 7434 }, { "epoch": 0.1995223271790468, "grad_norm": 0.2734375, "learning_rate": 0.0011969408291962968, "loss": 0.9174, "step": 7435 }, { "epoch": 0.19954916273078574, "grad_norm": 0.25390625, "learning_rate": 0.0011971018381859652, "loss": 0.9635, "step": 7436 }, { "epoch": 0.1995759982825247, "grad_norm": 0.23828125, "learning_rate": 0.0011972628471756338, "loss": 0.7679, "step": 7437 }, { "epoch": 0.19960283383426364, "grad_norm": 0.2490234375, "learning_rate": 0.0011974238561653025, "loss": 0.8492, "step": 7438 }, { "epoch": 0.19962966938600257, "grad_norm": 0.2490234375, "learning_rate": 0.0011975848651549711, "loss": 0.8787, "step": 7439 }, { "epoch": 0.1996565049377415, "grad_norm": 0.2578125, "learning_rate": 0.0011977458741446395, "loss": 0.894, "step": 7440 }, { "epoch": 0.19968334048948047, "grad_norm": 0.2392578125, "learning_rate": 0.0011979068831343082, "loss": 0.6652, "step": 7441 }, { "epoch": 0.1997101760412194, "grad_norm": 0.259765625, "learning_rate": 0.0011980678921239768, "loss": 0.8047, "step": 7442 }, { "epoch": 0.19973701159295834, "grad_norm": 0.236328125, "learning_rate": 0.0011982289011136455, "loss": 0.784, "step": 7443 }, { "epoch": 0.1997638471446973, "grad_norm": 0.24609375, "learning_rate": 0.0011983899101033141, "loss": 0.7975, "step": 7444 }, { "epoch": 0.19979068269643624, "grad_norm": 0.2421875, "learning_rate": 0.0011985509190929825, "loss": 0.7578, "step": 7445 }, { "epoch": 0.19981751824817517, "grad_norm": 0.2392578125, "learning_rate": 0.0011987119280826512, "loss": 0.8018, "step": 7446 }, { "epoch": 0.19984435379991414, "grad_norm": 0.2451171875, "learning_rate": 0.0011988729370723196, "loss": 0.6809, "step": 7447 }, { "epoch": 0.19987118935165307, "grad_norm": 0.251953125, "learning_rate": 0.0011990339460619883, "loss": 0.8295, "step": 7448 }, { "epoch": 0.199898024903392, "grad_norm": 0.2578125, "learning_rate": 0.001199194955051657, "loss": 0.887, "step": 7449 }, { "epoch": 0.19992486045513097, "grad_norm": 0.2578125, "learning_rate": 0.0011993559640413255, "loss": 0.8201, "step": 7450 }, { "epoch": 0.1999516960068699, "grad_norm": 0.251953125, "learning_rate": 0.0011995169730309942, "loss": 0.7295, "step": 7451 }, { "epoch": 0.19997853155860884, "grad_norm": 0.26171875, "learning_rate": 0.0011996779820206626, "loss": 0.9121, "step": 7452 }, { "epoch": 0.20000536711034778, "grad_norm": 0.22265625, "learning_rate": 0.0011998389910103313, "loss": 0.6833, "step": 7453 }, { "epoch": 0.20003220266208674, "grad_norm": 0.267578125, "learning_rate": 0.0012, "loss": 0.9379, "step": 7454 }, { "epoch": 0.20005903821382567, "grad_norm": 0.23828125, "learning_rate": 0.001199999999341888, "loss": 0.6979, "step": 7455 }, { "epoch": 0.2000858737655646, "grad_norm": 0.279296875, "learning_rate": 0.001199999997367553, "loss": 0.9385, "step": 7456 }, { "epoch": 0.20011270931730357, "grad_norm": 0.25390625, "learning_rate": 0.0011999999940769943, "loss": 0.8556, "step": 7457 }, { "epoch": 0.2001395448690425, "grad_norm": 0.25390625, "learning_rate": 0.0011999999894702121, "loss": 0.8266, "step": 7458 }, { "epoch": 0.20016638042078144, "grad_norm": 0.2734375, "learning_rate": 0.0011999999835472066, "loss": 0.9827, "step": 7459 }, { "epoch": 0.2001932159725204, "grad_norm": 0.2451171875, "learning_rate": 0.0011999999763079774, "loss": 0.8178, "step": 7460 }, { "epoch": 0.20022005152425934, "grad_norm": 0.25390625, "learning_rate": 0.001199999967752525, "loss": 0.7853, "step": 7461 }, { "epoch": 0.20024688707599828, "grad_norm": 0.2470703125, "learning_rate": 0.001199999957880849, "loss": 0.7937, "step": 7462 }, { "epoch": 0.20027372262773724, "grad_norm": 0.25, "learning_rate": 0.00119999994669295, "loss": 0.7982, "step": 7463 }, { "epoch": 0.20030055817947617, "grad_norm": 0.2578125, "learning_rate": 0.0011999999341888272, "loss": 0.7835, "step": 7464 }, { "epoch": 0.2003273937312151, "grad_norm": 0.251953125, "learning_rate": 0.0011999999203684813, "loss": 0.8256, "step": 7465 }, { "epoch": 0.20035422928295404, "grad_norm": 0.2421875, "learning_rate": 0.001199999905231912, "loss": 0.8241, "step": 7466 }, { "epoch": 0.200381064834693, "grad_norm": 0.25390625, "learning_rate": 0.0011999998887791193, "loss": 0.7728, "step": 7467 }, { "epoch": 0.20040790038643194, "grad_norm": 0.2470703125, "learning_rate": 0.0011999998710101039, "loss": 0.7682, "step": 7468 }, { "epoch": 0.20043473593817088, "grad_norm": 0.25390625, "learning_rate": 0.0011999998519248648, "loss": 0.8132, "step": 7469 }, { "epoch": 0.20046157148990984, "grad_norm": 0.25, "learning_rate": 0.0011999998315234026, "loss": 0.7986, "step": 7470 }, { "epoch": 0.20048840704164878, "grad_norm": 0.271484375, "learning_rate": 0.0011999998098057175, "loss": 0.8812, "step": 7471 }, { "epoch": 0.2005152425933877, "grad_norm": 0.234375, "learning_rate": 0.0011999997867718091, "loss": 0.7094, "step": 7472 }, { "epoch": 0.20054207814512667, "grad_norm": 0.2392578125, "learning_rate": 0.0011999997624216779, "loss": 0.7559, "step": 7473 }, { "epoch": 0.2005689136968656, "grad_norm": 0.2333984375, "learning_rate": 0.0011999997367553236, "loss": 0.7408, "step": 7474 }, { "epoch": 0.20059574924860454, "grad_norm": 0.25, "learning_rate": 0.0011999997097727464, "loss": 0.814, "step": 7475 }, { "epoch": 0.2006225848003435, "grad_norm": 0.2431640625, "learning_rate": 0.0011999996814739465, "loss": 0.7879, "step": 7476 }, { "epoch": 0.20064942035208244, "grad_norm": 0.255859375, "learning_rate": 0.0011999996518589236, "loss": 0.8142, "step": 7477 }, { "epoch": 0.20067625590382138, "grad_norm": 0.2421875, "learning_rate": 0.0011999996209276782, "loss": 0.8419, "step": 7478 }, { "epoch": 0.20070309145556034, "grad_norm": 0.271484375, "learning_rate": 0.0011999995886802103, "loss": 0.8718, "step": 7479 }, { "epoch": 0.20072992700729927, "grad_norm": 0.251953125, "learning_rate": 0.0011999995551165194, "loss": 0.8496, "step": 7480 }, { "epoch": 0.2007567625590382, "grad_norm": 0.2373046875, "learning_rate": 0.0011999995202366062, "loss": 0.7817, "step": 7481 }, { "epoch": 0.20078359811077715, "grad_norm": 0.240234375, "learning_rate": 0.0011999994840404707, "loss": 0.7794, "step": 7482 }, { "epoch": 0.2008104336625161, "grad_norm": 0.25390625, "learning_rate": 0.0011999994465281126, "loss": 0.8878, "step": 7483 }, { "epoch": 0.20083726921425504, "grad_norm": 0.2578125, "learning_rate": 0.0011999994076995323, "loss": 0.7579, "step": 7484 }, { "epoch": 0.20086410476599398, "grad_norm": 0.248046875, "learning_rate": 0.00119999936755473, "loss": 0.8049, "step": 7485 }, { "epoch": 0.20089094031773294, "grad_norm": 0.26171875, "learning_rate": 0.0011999993260937056, "loss": 0.8917, "step": 7486 }, { "epoch": 0.20091777586947188, "grad_norm": 0.25, "learning_rate": 0.001199999283316459, "loss": 0.8096, "step": 7487 }, { "epoch": 0.2009446114212108, "grad_norm": 0.251953125, "learning_rate": 0.0011999992392229906, "loss": 0.8737, "step": 7488 }, { "epoch": 0.20097144697294977, "grad_norm": 0.25, "learning_rate": 0.0011999991938133005, "loss": 0.8419, "step": 7489 }, { "epoch": 0.2009982825246887, "grad_norm": 0.2431640625, "learning_rate": 0.0011999991470873885, "loss": 0.7697, "step": 7490 }, { "epoch": 0.20102511807642764, "grad_norm": 0.259765625, "learning_rate": 0.001199999099045255, "loss": 0.8065, "step": 7491 }, { "epoch": 0.2010519536281666, "grad_norm": 0.236328125, "learning_rate": 0.0011999990496869, "loss": 0.6529, "step": 7492 }, { "epoch": 0.20107878917990554, "grad_norm": 0.251953125, "learning_rate": 0.0011999989990123235, "loss": 0.7693, "step": 7493 }, { "epoch": 0.20110562473164448, "grad_norm": 0.267578125, "learning_rate": 0.001199998947021526, "loss": 0.7955, "step": 7494 }, { "epoch": 0.2011324602833834, "grad_norm": 0.2734375, "learning_rate": 0.001199998893714507, "loss": 0.9377, "step": 7495 }, { "epoch": 0.20115929583512238, "grad_norm": 0.248046875, "learning_rate": 0.001199998839091267, "loss": 0.8681, "step": 7496 }, { "epoch": 0.2011861313868613, "grad_norm": 0.2470703125, "learning_rate": 0.0011999987831518063, "loss": 0.863, "step": 7497 }, { "epoch": 0.20121296693860025, "grad_norm": 0.240234375, "learning_rate": 0.0011999987258961243, "loss": 0.813, "step": 7498 }, { "epoch": 0.2012398024903392, "grad_norm": 0.248046875, "learning_rate": 0.0011999986673242222, "loss": 0.8104, "step": 7499 }, { "epoch": 0.20126663804207814, "grad_norm": 0.259765625, "learning_rate": 0.0011999986074360993, "loss": 0.8832, "step": 7500 }, { "epoch": 0.20129347359381708, "grad_norm": 0.2421875, "learning_rate": 0.0011999985462317559, "loss": 0.761, "step": 7501 }, { "epoch": 0.20132030914555604, "grad_norm": 0.234375, "learning_rate": 0.0011999984837111923, "loss": 0.7805, "step": 7502 }, { "epoch": 0.20134714469729498, "grad_norm": 0.2353515625, "learning_rate": 0.0011999984198744085, "loss": 0.7245, "step": 7503 }, { "epoch": 0.2013739802490339, "grad_norm": 0.267578125, "learning_rate": 0.0011999983547214049, "loss": 0.8038, "step": 7504 }, { "epoch": 0.20140081580077288, "grad_norm": 0.24609375, "learning_rate": 0.001199998288252181, "loss": 0.7811, "step": 7505 }, { "epoch": 0.2014276513525118, "grad_norm": 0.23828125, "learning_rate": 0.0011999982204667378, "loss": 0.7399, "step": 7506 }, { "epoch": 0.20145448690425075, "grad_norm": 0.2119140625, "learning_rate": 0.0011999981513650749, "loss": 0.5906, "step": 7507 }, { "epoch": 0.2014813224559897, "grad_norm": 0.2333984375, "learning_rate": 0.0011999980809471926, "loss": 0.7023, "step": 7508 }, { "epoch": 0.20150815800772864, "grad_norm": 0.2451171875, "learning_rate": 0.0011999980092130909, "loss": 0.7499, "step": 7509 }, { "epoch": 0.20153499355946758, "grad_norm": 0.259765625, "learning_rate": 0.0011999979361627703, "loss": 0.8743, "step": 7510 }, { "epoch": 0.20156182911120651, "grad_norm": 0.240234375, "learning_rate": 0.0011999978617962305, "loss": 0.7283, "step": 7511 }, { "epoch": 0.20158866466294548, "grad_norm": 0.240234375, "learning_rate": 0.001199997786113472, "loss": 0.7669, "step": 7512 }, { "epoch": 0.2016155002146844, "grad_norm": 0.25390625, "learning_rate": 0.001199997709114495, "loss": 0.7739, "step": 7513 }, { "epoch": 0.20164233576642335, "grad_norm": 0.2470703125, "learning_rate": 0.0011999976307992995, "loss": 0.7596, "step": 7514 }, { "epoch": 0.2016691713181623, "grad_norm": 0.240234375, "learning_rate": 0.0011999975511678856, "loss": 0.7562, "step": 7515 }, { "epoch": 0.20169600686990125, "grad_norm": 0.240234375, "learning_rate": 0.0011999974702202537, "loss": 0.7488, "step": 7516 }, { "epoch": 0.20172284242164018, "grad_norm": 0.251953125, "learning_rate": 0.0011999973879564037, "loss": 0.8231, "step": 7517 }, { "epoch": 0.20174967797337914, "grad_norm": 0.248046875, "learning_rate": 0.0011999973043763362, "loss": 0.8488, "step": 7518 }, { "epoch": 0.20177651352511808, "grad_norm": 0.2333984375, "learning_rate": 0.001199997219480051, "loss": 0.7729, "step": 7519 }, { "epoch": 0.201803349076857, "grad_norm": 0.25390625, "learning_rate": 0.0011999971332675484, "loss": 0.8901, "step": 7520 }, { "epoch": 0.20183018462859598, "grad_norm": 0.24609375, "learning_rate": 0.0011999970457388287, "loss": 0.8493, "step": 7521 }, { "epoch": 0.2018570201803349, "grad_norm": 0.2421875, "learning_rate": 0.001199996956893892, "loss": 0.7483, "step": 7522 }, { "epoch": 0.20188385573207385, "grad_norm": 0.25, "learning_rate": 0.0011999968667327386, "loss": 0.7989, "step": 7523 }, { "epoch": 0.20191069128381278, "grad_norm": 0.263671875, "learning_rate": 0.0011999967752553682, "loss": 0.9319, "step": 7524 }, { "epoch": 0.20193752683555174, "grad_norm": 0.240234375, "learning_rate": 0.0011999966824617818, "loss": 0.7501, "step": 7525 }, { "epoch": 0.20196436238729068, "grad_norm": 0.24609375, "learning_rate": 0.0011999965883519792, "loss": 0.691, "step": 7526 }, { "epoch": 0.20199119793902962, "grad_norm": 0.25390625, "learning_rate": 0.0011999964929259605, "loss": 0.7809, "step": 7527 }, { "epoch": 0.20201803349076858, "grad_norm": 0.2451171875, "learning_rate": 0.001199996396183726, "loss": 0.8267, "step": 7528 }, { "epoch": 0.2020448690425075, "grad_norm": 0.2490234375, "learning_rate": 0.0011999962981252758, "loss": 0.817, "step": 7529 }, { "epoch": 0.20207170459424645, "grad_norm": 0.255859375, "learning_rate": 0.0011999961987506104, "loss": 0.8469, "step": 7530 }, { "epoch": 0.2020985401459854, "grad_norm": 0.236328125, "learning_rate": 0.0011999960980597299, "loss": 0.7374, "step": 7531 }, { "epoch": 0.20212537569772435, "grad_norm": 0.25, "learning_rate": 0.0011999959960526342, "loss": 0.8238, "step": 7532 }, { "epoch": 0.20215221124946328, "grad_norm": 0.25390625, "learning_rate": 0.001199995892729324, "loss": 0.8406, "step": 7533 }, { "epoch": 0.20217904680120224, "grad_norm": 0.349609375, "learning_rate": 0.0011999957880897995, "loss": 0.8004, "step": 7534 }, { "epoch": 0.20220588235294118, "grad_norm": 0.337890625, "learning_rate": 0.0011999956821340605, "loss": 0.7834, "step": 7535 }, { "epoch": 0.20223271790468011, "grad_norm": 0.333984375, "learning_rate": 0.0011999955748621077, "loss": 0.8674, "step": 7536 }, { "epoch": 0.20225955345641908, "grad_norm": 0.33984375, "learning_rate": 0.0011999954662739408, "loss": 0.7704, "step": 7537 }, { "epoch": 0.202286389008158, "grad_norm": 0.27734375, "learning_rate": 0.0011999953563695607, "loss": 0.7312, "step": 7538 }, { "epoch": 0.20231322455989695, "grad_norm": 0.267578125, "learning_rate": 0.0011999952451489672, "loss": 0.7559, "step": 7539 }, { "epoch": 0.20234006011163588, "grad_norm": 0.27734375, "learning_rate": 0.0011999951326121608, "loss": 0.7746, "step": 7540 }, { "epoch": 0.20236689566337485, "grad_norm": 0.30078125, "learning_rate": 0.0011999950187591415, "loss": 0.7855, "step": 7541 }, { "epoch": 0.20239373121511378, "grad_norm": 0.2734375, "learning_rate": 0.0011999949035899095, "loss": 0.7666, "step": 7542 }, { "epoch": 0.20242056676685272, "grad_norm": 0.25390625, "learning_rate": 0.0011999947871044652, "loss": 0.7801, "step": 7543 }, { "epoch": 0.20244740231859168, "grad_norm": 0.259765625, "learning_rate": 0.001199994669302809, "loss": 0.862, "step": 7544 }, { "epoch": 0.20247423787033061, "grad_norm": 0.275390625, "learning_rate": 0.001199994550184941, "loss": 0.8483, "step": 7545 }, { "epoch": 0.20250107342206955, "grad_norm": 0.283203125, "learning_rate": 0.0011999944297508614, "loss": 0.807, "step": 7546 }, { "epoch": 0.2025279089738085, "grad_norm": 0.248046875, "learning_rate": 0.0011999943080005708, "loss": 0.7905, "step": 7547 }, { "epoch": 0.20255474452554745, "grad_norm": 0.244140625, "learning_rate": 0.001199994184934069, "loss": 0.7691, "step": 7548 }, { "epoch": 0.20258158007728638, "grad_norm": 0.251953125, "learning_rate": 0.0011999940605513565, "loss": 0.7803, "step": 7549 }, { "epoch": 0.20260841562902535, "grad_norm": 0.2470703125, "learning_rate": 0.0011999939348524334, "loss": 0.7203, "step": 7550 }, { "epoch": 0.20263525118076428, "grad_norm": 0.244140625, "learning_rate": 0.0011999938078373004, "loss": 0.7455, "step": 7551 }, { "epoch": 0.20266208673250322, "grad_norm": 0.244140625, "learning_rate": 0.0011999936795059573, "loss": 0.6786, "step": 7552 }, { "epoch": 0.20268892228424215, "grad_norm": 0.2412109375, "learning_rate": 0.0011999935498584046, "loss": 0.7502, "step": 7553 }, { "epoch": 0.20271575783598111, "grad_norm": 0.279296875, "learning_rate": 0.0011999934188946426, "loss": 0.8631, "step": 7554 }, { "epoch": 0.20274259338772005, "grad_norm": 0.2431640625, "learning_rate": 0.0011999932866146718, "loss": 0.7651, "step": 7555 }, { "epoch": 0.20276942893945898, "grad_norm": 0.2314453125, "learning_rate": 0.001199993153018492, "loss": 0.7131, "step": 7556 }, { "epoch": 0.20279626449119795, "grad_norm": 0.265625, "learning_rate": 0.0011999930181061037, "loss": 0.8279, "step": 7557 }, { "epoch": 0.20282310004293688, "grad_norm": 0.2392578125, "learning_rate": 0.0011999928818775074, "loss": 0.7201, "step": 7558 }, { "epoch": 0.20284993559467582, "grad_norm": 0.2431640625, "learning_rate": 0.001199992744332703, "loss": 0.7227, "step": 7559 }, { "epoch": 0.20287677114641478, "grad_norm": 0.240234375, "learning_rate": 0.0011999926054716914, "loss": 0.7345, "step": 7560 }, { "epoch": 0.20290360669815372, "grad_norm": 0.2265625, "learning_rate": 0.0011999924652944724, "loss": 0.7415, "step": 7561 }, { "epoch": 0.20293044224989265, "grad_norm": 0.24609375, "learning_rate": 0.0011999923238010463, "loss": 0.801, "step": 7562 }, { "epoch": 0.2029572778016316, "grad_norm": 0.25, "learning_rate": 0.0011999921809914137, "loss": 0.7425, "step": 7563 }, { "epoch": 0.20298411335337055, "grad_norm": 0.2333984375, "learning_rate": 0.0011999920368655746, "loss": 0.7649, "step": 7564 }, { "epoch": 0.20301094890510948, "grad_norm": 0.234375, "learning_rate": 0.00119999189142353, "loss": 0.684, "step": 7565 }, { "epoch": 0.20303778445684842, "grad_norm": 0.259765625, "learning_rate": 0.0011999917446652792, "loss": 0.8617, "step": 7566 }, { "epoch": 0.20306462000858738, "grad_norm": 0.2412109375, "learning_rate": 0.001199991596590823, "loss": 0.7269, "step": 7567 }, { "epoch": 0.20309145556032632, "grad_norm": 0.26171875, "learning_rate": 0.001199991447200162, "loss": 0.82, "step": 7568 }, { "epoch": 0.20311829111206525, "grad_norm": 0.2431640625, "learning_rate": 0.0011999912964932964, "loss": 0.7874, "step": 7569 }, { "epoch": 0.20314512666380422, "grad_norm": 0.2431640625, "learning_rate": 0.0011999911444702262, "loss": 0.7903, "step": 7570 }, { "epoch": 0.20317196221554315, "grad_norm": 0.2216796875, "learning_rate": 0.001199990991130952, "loss": 0.6902, "step": 7571 }, { "epoch": 0.20319879776728209, "grad_norm": 0.2373046875, "learning_rate": 0.0011999908364754741, "loss": 0.7222, "step": 7572 }, { "epoch": 0.20322563331902105, "grad_norm": 0.2373046875, "learning_rate": 0.0011999906805037926, "loss": 0.6238, "step": 7573 }, { "epoch": 0.20325246887075998, "grad_norm": 0.26953125, "learning_rate": 0.0011999905232159083, "loss": 0.9684, "step": 7574 }, { "epoch": 0.20327930442249892, "grad_norm": 0.2314453125, "learning_rate": 0.0011999903646118214, "loss": 0.7658, "step": 7575 }, { "epoch": 0.20330613997423788, "grad_norm": 0.25390625, "learning_rate": 0.001199990204691532, "loss": 0.793, "step": 7576 }, { "epoch": 0.20333297552597682, "grad_norm": 0.2333984375, "learning_rate": 0.0011999900434550405, "loss": 0.7256, "step": 7577 }, { "epoch": 0.20335981107771575, "grad_norm": 0.2294921875, "learning_rate": 0.0011999898809023476, "loss": 0.767, "step": 7578 }, { "epoch": 0.20338664662945471, "grad_norm": 0.2197265625, "learning_rate": 0.0011999897170334533, "loss": 0.6673, "step": 7579 }, { "epoch": 0.20341348218119365, "grad_norm": 0.2236328125, "learning_rate": 0.001199989551848358, "loss": 0.7048, "step": 7580 }, { "epoch": 0.20344031773293259, "grad_norm": 0.2216796875, "learning_rate": 0.0011999893853470625, "loss": 0.7186, "step": 7581 }, { "epoch": 0.20346715328467152, "grad_norm": 0.2275390625, "learning_rate": 0.0011999892175295665, "loss": 0.7182, "step": 7582 }, { "epoch": 0.20349398883641048, "grad_norm": 0.224609375, "learning_rate": 0.0011999890483958707, "loss": 0.7742, "step": 7583 }, { "epoch": 0.20352082438814942, "grad_norm": 0.25, "learning_rate": 0.0011999888779459754, "loss": 0.7979, "step": 7584 }, { "epoch": 0.20354765993988835, "grad_norm": 0.2197265625, "learning_rate": 0.0011999887061798812, "loss": 0.6794, "step": 7585 }, { "epoch": 0.20357449549162732, "grad_norm": 0.2294921875, "learning_rate": 0.0011999885330975881, "loss": 0.7199, "step": 7586 }, { "epoch": 0.20360133104336625, "grad_norm": 0.25, "learning_rate": 0.0011999883586990968, "loss": 0.7931, "step": 7587 }, { "epoch": 0.2036281665951052, "grad_norm": 0.25, "learning_rate": 0.0011999881829844077, "loss": 0.8658, "step": 7588 }, { "epoch": 0.20365500214684415, "grad_norm": 0.2490234375, "learning_rate": 0.0011999880059535208, "loss": 0.8617, "step": 7589 }, { "epoch": 0.20368183769858308, "grad_norm": 0.2451171875, "learning_rate": 0.0011999878276064367, "loss": 0.7793, "step": 7590 }, { "epoch": 0.20370867325032202, "grad_norm": 0.248046875, "learning_rate": 0.0011999876479431562, "loss": 0.8315, "step": 7591 }, { "epoch": 0.20373550880206098, "grad_norm": 0.2421875, "learning_rate": 0.001199987466963679, "loss": 0.7535, "step": 7592 }, { "epoch": 0.20376234435379992, "grad_norm": 0.2236328125, "learning_rate": 0.001199987284668006, "loss": 0.6183, "step": 7593 }, { "epoch": 0.20378917990553885, "grad_norm": 0.240234375, "learning_rate": 0.0011999871010561372, "loss": 0.7711, "step": 7594 }, { "epoch": 0.2038160154572778, "grad_norm": 0.234375, "learning_rate": 0.0011999869161280733, "loss": 0.7502, "step": 7595 }, { "epoch": 0.20384285100901675, "grad_norm": 0.2373046875, "learning_rate": 0.0011999867298838145, "loss": 0.7466, "step": 7596 }, { "epoch": 0.2038696865607557, "grad_norm": 0.2314453125, "learning_rate": 0.0011999865423233615, "loss": 0.6791, "step": 7597 }, { "epoch": 0.20389652211249462, "grad_norm": 0.2373046875, "learning_rate": 0.0011999863534467147, "loss": 0.7732, "step": 7598 }, { "epoch": 0.20392335766423358, "grad_norm": 0.2412109375, "learning_rate": 0.0011999861632538739, "loss": 0.732, "step": 7599 }, { "epoch": 0.20395019321597252, "grad_norm": 0.2265625, "learning_rate": 0.0011999859717448404, "loss": 0.7647, "step": 7600 }, { "epoch": 0.20397702876771145, "grad_norm": 0.2421875, "learning_rate": 0.0011999857789196141, "loss": 0.7853, "step": 7601 }, { "epoch": 0.20400386431945042, "grad_norm": 0.2451171875, "learning_rate": 0.0011999855847781955, "loss": 0.8154, "step": 7602 }, { "epoch": 0.20403069987118935, "grad_norm": 0.240234375, "learning_rate": 0.001199985389320585, "loss": 0.7601, "step": 7603 }, { "epoch": 0.2040575354229283, "grad_norm": 0.255859375, "learning_rate": 0.001199985192546783, "loss": 0.7803, "step": 7604 }, { "epoch": 0.20408437097466725, "grad_norm": 0.255859375, "learning_rate": 0.00119998499445679, "loss": 0.8365, "step": 7605 }, { "epoch": 0.20411120652640619, "grad_norm": 0.244140625, "learning_rate": 0.0011999847950506068, "loss": 0.7917, "step": 7606 }, { "epoch": 0.20413804207814512, "grad_norm": 0.2314453125, "learning_rate": 0.0011999845943282332, "loss": 0.7037, "step": 7607 }, { "epoch": 0.20416487762988408, "grad_norm": 0.2275390625, "learning_rate": 0.0011999843922896698, "loss": 0.7012, "step": 7608 }, { "epoch": 0.20419171318162302, "grad_norm": 0.2353515625, "learning_rate": 0.0011999841889349173, "loss": 0.7714, "step": 7609 }, { "epoch": 0.20421854873336195, "grad_norm": 0.234375, "learning_rate": 0.001199983984263976, "loss": 0.8149, "step": 7610 }, { "epoch": 0.2042453842851009, "grad_norm": 0.2392578125, "learning_rate": 0.0011999837782768464, "loss": 0.7639, "step": 7611 }, { "epoch": 0.20427221983683985, "grad_norm": 0.2255859375, "learning_rate": 0.0011999835709735288, "loss": 0.6482, "step": 7612 }, { "epoch": 0.2042990553885788, "grad_norm": 0.2333984375, "learning_rate": 0.001199983362354024, "loss": 0.7509, "step": 7613 }, { "epoch": 0.20432589094031772, "grad_norm": 0.2373046875, "learning_rate": 0.001199983152418332, "loss": 0.708, "step": 7614 }, { "epoch": 0.20435272649205669, "grad_norm": 0.259765625, "learning_rate": 0.0011999829411664534, "loss": 0.8542, "step": 7615 }, { "epoch": 0.20437956204379562, "grad_norm": 0.2255859375, "learning_rate": 0.001199982728598389, "loss": 0.7629, "step": 7616 }, { "epoch": 0.20440639759553456, "grad_norm": 0.2431640625, "learning_rate": 0.0011999825147141388, "loss": 0.7943, "step": 7617 }, { "epoch": 0.20443323314727352, "grad_norm": 0.251953125, "learning_rate": 0.0011999822995137036, "loss": 0.8261, "step": 7618 }, { "epoch": 0.20446006869901245, "grad_norm": 0.240234375, "learning_rate": 0.0011999820829970836, "loss": 0.7943, "step": 7619 }, { "epoch": 0.2044869042507514, "grad_norm": 0.25390625, "learning_rate": 0.0011999818651642795, "loss": 0.7332, "step": 7620 }, { "epoch": 0.20451373980249035, "grad_norm": 0.2392578125, "learning_rate": 0.0011999816460152917, "loss": 0.8312, "step": 7621 }, { "epoch": 0.2045405753542293, "grad_norm": 0.2431640625, "learning_rate": 0.0011999814255501209, "loss": 0.7872, "step": 7622 }, { "epoch": 0.20456741090596822, "grad_norm": 0.248046875, "learning_rate": 0.0011999812037687674, "loss": 0.8145, "step": 7623 }, { "epoch": 0.20459424645770716, "grad_norm": 0.2392578125, "learning_rate": 0.0011999809806712313, "loss": 0.7826, "step": 7624 }, { "epoch": 0.20462108200944612, "grad_norm": 0.234375, "learning_rate": 0.0011999807562575137, "loss": 0.7111, "step": 7625 }, { "epoch": 0.20464791756118506, "grad_norm": 0.2314453125, "learning_rate": 0.0011999805305276147, "loss": 0.6841, "step": 7626 }, { "epoch": 0.204674753112924, "grad_norm": 0.236328125, "learning_rate": 0.001199980303481535, "loss": 0.7669, "step": 7627 }, { "epoch": 0.20470158866466295, "grad_norm": 0.23046875, "learning_rate": 0.001199980075119275, "loss": 0.6829, "step": 7628 }, { "epoch": 0.2047284242164019, "grad_norm": 0.2236328125, "learning_rate": 0.0011999798454408353, "loss": 0.7275, "step": 7629 }, { "epoch": 0.20475525976814082, "grad_norm": 0.2314453125, "learning_rate": 0.0011999796144462162, "loss": 0.7705, "step": 7630 }, { "epoch": 0.2047820953198798, "grad_norm": 0.2412109375, "learning_rate": 0.0011999793821354185, "loss": 0.8029, "step": 7631 }, { "epoch": 0.20480893087161872, "grad_norm": 0.2314453125, "learning_rate": 0.0011999791485084424, "loss": 0.7852, "step": 7632 }, { "epoch": 0.20483576642335766, "grad_norm": 0.2421875, "learning_rate": 0.0011999789135652887, "loss": 0.8213, "step": 7633 }, { "epoch": 0.20486260197509662, "grad_norm": 0.2314453125, "learning_rate": 0.0011999786773059578, "loss": 0.7341, "step": 7634 }, { "epoch": 0.20488943752683555, "grad_norm": 0.251953125, "learning_rate": 0.00119997843973045, "loss": 0.8345, "step": 7635 }, { "epoch": 0.2049162730785745, "grad_norm": 0.251953125, "learning_rate": 0.0011999782008387662, "loss": 0.8291, "step": 7636 }, { "epoch": 0.20494310863031345, "grad_norm": 0.236328125, "learning_rate": 0.0011999779606309065, "loss": 0.8231, "step": 7637 }, { "epoch": 0.2049699441820524, "grad_norm": 0.244140625, "learning_rate": 0.001199977719106872, "loss": 0.8328, "step": 7638 }, { "epoch": 0.20499677973379132, "grad_norm": 0.240234375, "learning_rate": 0.0011999774762666627, "loss": 0.806, "step": 7639 }, { "epoch": 0.20502361528553026, "grad_norm": 0.22265625, "learning_rate": 0.0011999772321102794, "loss": 0.7138, "step": 7640 }, { "epoch": 0.20505045083726922, "grad_norm": 0.23046875, "learning_rate": 0.0011999769866377224, "loss": 0.7822, "step": 7641 }, { "epoch": 0.20507728638900816, "grad_norm": 0.255859375, "learning_rate": 0.0011999767398489928, "loss": 0.8791, "step": 7642 }, { "epoch": 0.2051041219407471, "grad_norm": 0.23828125, "learning_rate": 0.0011999764917440903, "loss": 0.7633, "step": 7643 }, { "epoch": 0.20513095749248605, "grad_norm": 0.2421875, "learning_rate": 0.0011999762423230162, "loss": 0.7761, "step": 7644 }, { "epoch": 0.205157793044225, "grad_norm": 0.22265625, "learning_rate": 0.0011999759915857708, "loss": 0.6564, "step": 7645 }, { "epoch": 0.20518462859596392, "grad_norm": 0.2099609375, "learning_rate": 0.0011999757395323542, "loss": 0.6452, "step": 7646 }, { "epoch": 0.2052114641477029, "grad_norm": 0.244140625, "learning_rate": 0.0011999754861627676, "loss": 0.729, "step": 7647 }, { "epoch": 0.20523829969944182, "grad_norm": 0.259765625, "learning_rate": 0.0011999752314770112, "loss": 0.8806, "step": 7648 }, { "epoch": 0.20526513525118076, "grad_norm": 0.23828125, "learning_rate": 0.0011999749754750858, "loss": 0.8043, "step": 7649 }, { "epoch": 0.20529197080291972, "grad_norm": 0.24609375, "learning_rate": 0.0011999747181569917, "loss": 0.7639, "step": 7650 }, { "epoch": 0.20531880635465866, "grad_norm": 0.24609375, "learning_rate": 0.0011999744595227297, "loss": 0.8222, "step": 7651 }, { "epoch": 0.2053456419063976, "grad_norm": 0.2451171875, "learning_rate": 0.0011999741995723002, "loss": 0.6534, "step": 7652 }, { "epoch": 0.20537247745813653, "grad_norm": 0.2412109375, "learning_rate": 0.001199973938305704, "loss": 0.7761, "step": 7653 }, { "epoch": 0.2053993130098755, "grad_norm": 0.224609375, "learning_rate": 0.0011999736757229412, "loss": 0.7121, "step": 7654 }, { "epoch": 0.20542614856161442, "grad_norm": 0.2265625, "learning_rate": 0.0011999734118240126, "loss": 0.6856, "step": 7655 }, { "epoch": 0.20545298411335336, "grad_norm": 0.2333984375, "learning_rate": 0.001199973146608919, "loss": 0.7891, "step": 7656 }, { "epoch": 0.20547981966509232, "grad_norm": 0.2470703125, "learning_rate": 0.001199972880077661, "loss": 0.7666, "step": 7657 }, { "epoch": 0.20550665521683126, "grad_norm": 0.26171875, "learning_rate": 0.0011999726122302388, "loss": 0.8314, "step": 7658 }, { "epoch": 0.2055334907685702, "grad_norm": 0.25390625, "learning_rate": 0.0011999723430666533, "loss": 0.7882, "step": 7659 }, { "epoch": 0.20556032632030916, "grad_norm": 0.251953125, "learning_rate": 0.001199972072586905, "loss": 0.7542, "step": 7660 }, { "epoch": 0.2055871618720481, "grad_norm": 0.21875, "learning_rate": 0.0011999718007909945, "loss": 0.7217, "step": 7661 }, { "epoch": 0.20561399742378703, "grad_norm": 0.23828125, "learning_rate": 0.0011999715276789222, "loss": 0.7306, "step": 7662 }, { "epoch": 0.205640832975526, "grad_norm": 0.2470703125, "learning_rate": 0.001199971253250689, "loss": 0.7278, "step": 7663 }, { "epoch": 0.20566766852726492, "grad_norm": 0.24609375, "learning_rate": 0.0011999709775062954, "loss": 0.8141, "step": 7664 }, { "epoch": 0.20569450407900386, "grad_norm": 0.208984375, "learning_rate": 0.0011999707004457423, "loss": 0.6147, "step": 7665 }, { "epoch": 0.2057213396307428, "grad_norm": 0.25390625, "learning_rate": 0.0011999704220690295, "loss": 0.7942, "step": 7666 }, { "epoch": 0.20574817518248176, "grad_norm": 0.2294921875, "learning_rate": 0.0011999701423761582, "loss": 0.7623, "step": 7667 }, { "epoch": 0.2057750107342207, "grad_norm": 0.251953125, "learning_rate": 0.001199969861367129, "loss": 0.7849, "step": 7668 }, { "epoch": 0.20580184628595963, "grad_norm": 0.28125, "learning_rate": 0.0011999695790419424, "loss": 0.854, "step": 7669 }, { "epoch": 0.2058286818376986, "grad_norm": 0.2314453125, "learning_rate": 0.0011999692954005993, "loss": 0.7433, "step": 7670 }, { "epoch": 0.20585551738943753, "grad_norm": 0.23828125, "learning_rate": 0.0011999690104431, "loss": 0.811, "step": 7671 }, { "epoch": 0.20588235294117646, "grad_norm": 0.228515625, "learning_rate": 0.001199968724169445, "loss": 0.6892, "step": 7672 }, { "epoch": 0.20590918849291542, "grad_norm": 0.236328125, "learning_rate": 0.0011999684365796351, "loss": 0.753, "step": 7673 }, { "epoch": 0.20593602404465436, "grad_norm": 0.2431640625, "learning_rate": 0.001199968147673671, "loss": 0.7475, "step": 7674 }, { "epoch": 0.2059628595963933, "grad_norm": 0.2421875, "learning_rate": 0.0011999678574515535, "loss": 0.7398, "step": 7675 }, { "epoch": 0.20598969514813226, "grad_norm": 0.25390625, "learning_rate": 0.001199967565913283, "loss": 0.8461, "step": 7676 }, { "epoch": 0.2060165306998712, "grad_norm": 0.244140625, "learning_rate": 0.0011999672730588602, "loss": 0.7147, "step": 7677 }, { "epoch": 0.20604336625161013, "grad_norm": 0.2392578125, "learning_rate": 0.0011999669788882855, "loss": 0.8113, "step": 7678 }, { "epoch": 0.2060702018033491, "grad_norm": 0.2451171875, "learning_rate": 0.0011999666834015597, "loss": 0.7448, "step": 7679 }, { "epoch": 0.20609703735508803, "grad_norm": 0.23828125, "learning_rate": 0.0011999663865986838, "loss": 0.7658, "step": 7680 }, { "epoch": 0.20612387290682696, "grad_norm": 0.251953125, "learning_rate": 0.001199966088479658, "loss": 0.8556, "step": 7681 }, { "epoch": 0.2061507084585659, "grad_norm": 0.2158203125, "learning_rate": 0.001199965789044483, "loss": 0.69, "step": 7682 }, { "epoch": 0.20617754401030486, "grad_norm": 0.26171875, "learning_rate": 0.00119996548829316, "loss": 0.8187, "step": 7683 }, { "epoch": 0.2062043795620438, "grad_norm": 0.248046875, "learning_rate": 0.0011999651862256888, "loss": 0.8241, "step": 7684 }, { "epoch": 0.20623121511378273, "grad_norm": 0.25390625, "learning_rate": 0.001199964882842071, "loss": 0.8486, "step": 7685 }, { "epoch": 0.2062580506655217, "grad_norm": 0.2392578125, "learning_rate": 0.0011999645781423061, "loss": 0.8139, "step": 7686 }, { "epoch": 0.20628488621726063, "grad_norm": 0.23046875, "learning_rate": 0.0011999642721263958, "loss": 0.7406, "step": 7687 }, { "epoch": 0.20631172176899956, "grad_norm": 0.22265625, "learning_rate": 0.0011999639647943404, "loss": 0.7343, "step": 7688 }, { "epoch": 0.20633855732073852, "grad_norm": 0.2216796875, "learning_rate": 0.0011999636561461405, "loss": 0.7826, "step": 7689 }, { "epoch": 0.20636539287247746, "grad_norm": 0.248046875, "learning_rate": 0.0011999633461817968, "loss": 0.8079, "step": 7690 }, { "epoch": 0.2063922284242164, "grad_norm": 0.2412109375, "learning_rate": 0.0011999630349013102, "loss": 0.7821, "step": 7691 }, { "epoch": 0.20641906397595536, "grad_norm": 0.2392578125, "learning_rate": 0.001199962722304681, "loss": 0.787, "step": 7692 }, { "epoch": 0.2064458995276943, "grad_norm": 0.20703125, "learning_rate": 0.0011999624083919103, "loss": 0.6403, "step": 7693 }, { "epoch": 0.20647273507943323, "grad_norm": 0.251953125, "learning_rate": 0.0011999620931629985, "loss": 0.7618, "step": 7694 }, { "epoch": 0.20649957063117216, "grad_norm": 0.2431640625, "learning_rate": 0.0011999617766179464, "loss": 0.7515, "step": 7695 }, { "epoch": 0.20652640618291113, "grad_norm": 0.2314453125, "learning_rate": 0.0011999614587567546, "loss": 0.711, "step": 7696 }, { "epoch": 0.20655324173465006, "grad_norm": 0.2265625, "learning_rate": 0.001199961139579424, "loss": 0.7289, "step": 7697 }, { "epoch": 0.206580077286389, "grad_norm": 0.2412109375, "learning_rate": 0.001199960819085955, "loss": 0.786, "step": 7698 }, { "epoch": 0.20660691283812796, "grad_norm": 0.224609375, "learning_rate": 0.0011999604972763485, "loss": 0.6887, "step": 7699 }, { "epoch": 0.2066337483898669, "grad_norm": 0.263671875, "learning_rate": 0.0011999601741506053, "loss": 0.8025, "step": 7700 }, { "epoch": 0.20666058394160583, "grad_norm": 0.23046875, "learning_rate": 0.001199959849708726, "loss": 0.6995, "step": 7701 }, { "epoch": 0.2066874194933448, "grad_norm": 0.240234375, "learning_rate": 0.0011999595239507112, "loss": 0.7458, "step": 7702 }, { "epoch": 0.20671425504508373, "grad_norm": 0.271484375, "learning_rate": 0.0011999591968765617, "loss": 0.9298, "step": 7703 }, { "epoch": 0.20674109059682266, "grad_norm": 0.234375, "learning_rate": 0.0011999588684862782, "loss": 0.7654, "step": 7704 }, { "epoch": 0.20676792614856163, "grad_norm": 0.22265625, "learning_rate": 0.0011999585387798618, "loss": 0.7912, "step": 7705 }, { "epoch": 0.20679476170030056, "grad_norm": 0.2353515625, "learning_rate": 0.0011999582077573125, "loss": 0.688, "step": 7706 }, { "epoch": 0.2068215972520395, "grad_norm": 0.236328125, "learning_rate": 0.0011999578754186314, "loss": 0.7557, "step": 7707 }, { "epoch": 0.20684843280377846, "grad_norm": 0.2158203125, "learning_rate": 0.0011999575417638193, "loss": 0.6697, "step": 7708 }, { "epoch": 0.2068752683555174, "grad_norm": 0.228515625, "learning_rate": 0.001199957206792877, "loss": 0.7209, "step": 7709 }, { "epoch": 0.20690210390725633, "grad_norm": 0.2255859375, "learning_rate": 0.001199956870505805, "loss": 0.7411, "step": 7710 }, { "epoch": 0.20692893945899526, "grad_norm": 0.240234375, "learning_rate": 0.0011999565329026043, "loss": 0.7735, "step": 7711 }, { "epoch": 0.20695577501073423, "grad_norm": 0.248046875, "learning_rate": 0.0011999561939832753, "loss": 0.81, "step": 7712 }, { "epoch": 0.20698261056247316, "grad_norm": 0.220703125, "learning_rate": 0.001199955853747819, "loss": 0.6921, "step": 7713 }, { "epoch": 0.2070094461142121, "grad_norm": 0.232421875, "learning_rate": 0.0011999555121962359, "loss": 0.7094, "step": 7714 }, { "epoch": 0.20703628166595106, "grad_norm": 0.251953125, "learning_rate": 0.0011999551693285268, "loss": 0.82, "step": 7715 }, { "epoch": 0.20706311721769, "grad_norm": 0.23828125, "learning_rate": 0.001199954825144693, "loss": 0.7449, "step": 7716 }, { "epoch": 0.20708995276942893, "grad_norm": 0.23828125, "learning_rate": 0.0011999544796447346, "loss": 0.7879, "step": 7717 }, { "epoch": 0.2071167883211679, "grad_norm": 0.2353515625, "learning_rate": 0.0011999541328286524, "loss": 0.7167, "step": 7718 }, { "epoch": 0.20714362387290683, "grad_norm": 0.2294921875, "learning_rate": 0.0011999537846964477, "loss": 0.7413, "step": 7719 }, { "epoch": 0.20717045942464576, "grad_norm": 0.2353515625, "learning_rate": 0.0011999534352481207, "loss": 0.786, "step": 7720 }, { "epoch": 0.20719729497638473, "grad_norm": 0.23828125, "learning_rate": 0.0011999530844836724, "loss": 0.7135, "step": 7721 }, { "epoch": 0.20722413052812366, "grad_norm": 0.240234375, "learning_rate": 0.0011999527324031037, "loss": 0.7866, "step": 7722 }, { "epoch": 0.2072509660798626, "grad_norm": 0.2412109375, "learning_rate": 0.0011999523790064148, "loss": 0.8034, "step": 7723 }, { "epoch": 0.20727780163160153, "grad_norm": 0.2373046875, "learning_rate": 0.0011999520242936073, "loss": 0.7791, "step": 7724 }, { "epoch": 0.2073046371833405, "grad_norm": 0.25, "learning_rate": 0.0011999516682646815, "loss": 0.7395, "step": 7725 }, { "epoch": 0.20733147273507943, "grad_norm": 0.25390625, "learning_rate": 0.0011999513109196382, "loss": 0.8398, "step": 7726 }, { "epoch": 0.20735830828681837, "grad_norm": 0.2275390625, "learning_rate": 0.0011999509522584782, "loss": 0.7058, "step": 7727 }, { "epoch": 0.20738514383855733, "grad_norm": 0.244140625, "learning_rate": 0.0011999505922812024, "loss": 0.8385, "step": 7728 }, { "epoch": 0.20741197939029626, "grad_norm": 0.23046875, "learning_rate": 0.0011999502309878113, "loss": 0.7526, "step": 7729 }, { "epoch": 0.2074388149420352, "grad_norm": 0.244140625, "learning_rate": 0.001199949868378306, "loss": 0.8281, "step": 7730 }, { "epoch": 0.20746565049377416, "grad_norm": 0.2265625, "learning_rate": 0.0011999495044526874, "loss": 0.6679, "step": 7731 }, { "epoch": 0.2074924860455131, "grad_norm": 0.23046875, "learning_rate": 0.0011999491392109558, "loss": 0.7402, "step": 7732 }, { "epoch": 0.20751932159725203, "grad_norm": 0.228515625, "learning_rate": 0.0011999487726531124, "loss": 0.7401, "step": 7733 }, { "epoch": 0.207546157148991, "grad_norm": 0.2177734375, "learning_rate": 0.001199948404779158, "loss": 0.6772, "step": 7734 }, { "epoch": 0.20757299270072993, "grad_norm": 0.2177734375, "learning_rate": 0.001199948035589093, "loss": 0.5963, "step": 7735 }, { "epoch": 0.20759982825246887, "grad_norm": 0.2373046875, "learning_rate": 0.001199947665082919, "loss": 0.8011, "step": 7736 }, { "epoch": 0.20762666380420783, "grad_norm": 0.2412109375, "learning_rate": 0.001199947293260636, "loss": 0.7613, "step": 7737 }, { "epoch": 0.20765349935594676, "grad_norm": 0.2119140625, "learning_rate": 0.0011999469201222452, "loss": 0.7287, "step": 7738 }, { "epoch": 0.2076803349076857, "grad_norm": 0.22265625, "learning_rate": 0.0011999465456677472, "loss": 0.6923, "step": 7739 }, { "epoch": 0.20770717045942463, "grad_norm": 0.2451171875, "learning_rate": 0.0011999461698971434, "loss": 0.8033, "step": 7740 }, { "epoch": 0.2077340060111636, "grad_norm": 0.2490234375, "learning_rate": 0.001199945792810434, "loss": 0.8081, "step": 7741 }, { "epoch": 0.20776084156290253, "grad_norm": 0.2392578125, "learning_rate": 0.00119994541440762, "loss": 0.7886, "step": 7742 }, { "epoch": 0.20778767711464147, "grad_norm": 0.23046875, "learning_rate": 0.0011999450346887022, "loss": 0.7594, "step": 7743 }, { "epoch": 0.20781451266638043, "grad_norm": 0.2236328125, "learning_rate": 0.0011999446536536816, "loss": 0.7445, "step": 7744 }, { "epoch": 0.20784134821811936, "grad_norm": 0.2333984375, "learning_rate": 0.0011999442713025589, "loss": 0.7895, "step": 7745 }, { "epoch": 0.2078681837698583, "grad_norm": 0.2099609375, "learning_rate": 0.001199943887635335, "loss": 0.6525, "step": 7746 }, { "epoch": 0.20789501932159726, "grad_norm": 0.2431640625, "learning_rate": 0.001199943502652011, "loss": 0.7151, "step": 7747 }, { "epoch": 0.2079218548733362, "grad_norm": 0.220703125, "learning_rate": 0.001199943116352587, "loss": 0.7029, "step": 7748 }, { "epoch": 0.20794869042507513, "grad_norm": 0.2490234375, "learning_rate": 0.0011999427287370645, "loss": 0.8395, "step": 7749 }, { "epoch": 0.2079755259768141, "grad_norm": 0.240234375, "learning_rate": 0.001199942339805444, "loss": 0.7494, "step": 7750 }, { "epoch": 0.20800236152855303, "grad_norm": 0.2412109375, "learning_rate": 0.0011999419495577267, "loss": 0.8463, "step": 7751 }, { "epoch": 0.20802919708029197, "grad_norm": 0.236328125, "learning_rate": 0.0011999415579939132, "loss": 0.7696, "step": 7752 }, { "epoch": 0.2080560326320309, "grad_norm": 0.2373046875, "learning_rate": 0.0011999411651140044, "loss": 0.7244, "step": 7753 }, { "epoch": 0.20808286818376986, "grad_norm": 0.244140625, "learning_rate": 0.0011999407709180013, "loss": 0.8473, "step": 7754 }, { "epoch": 0.2081097037355088, "grad_norm": 0.248046875, "learning_rate": 0.0011999403754059047, "loss": 0.7783, "step": 7755 }, { "epoch": 0.20813653928724773, "grad_norm": 0.2255859375, "learning_rate": 0.0011999399785777153, "loss": 0.723, "step": 7756 }, { "epoch": 0.2081633748389867, "grad_norm": 0.2265625, "learning_rate": 0.001199939580433434, "loss": 0.7855, "step": 7757 }, { "epoch": 0.20819021039072563, "grad_norm": 0.2275390625, "learning_rate": 0.001199939180973062, "loss": 0.7944, "step": 7758 }, { "epoch": 0.20821704594246457, "grad_norm": 0.23046875, "learning_rate": 0.0011999387801965998, "loss": 0.7269, "step": 7759 }, { "epoch": 0.20824388149420353, "grad_norm": 0.23828125, "learning_rate": 0.0011999383781040483, "loss": 0.7792, "step": 7760 }, { "epoch": 0.20827071704594247, "grad_norm": 0.2197265625, "learning_rate": 0.001199937974695409, "loss": 0.6784, "step": 7761 }, { "epoch": 0.2082975525976814, "grad_norm": 0.248046875, "learning_rate": 0.0011999375699706816, "loss": 0.7438, "step": 7762 }, { "epoch": 0.20832438814942036, "grad_norm": 0.248046875, "learning_rate": 0.001199937163929868, "loss": 0.7421, "step": 7763 }, { "epoch": 0.2083512237011593, "grad_norm": 0.240234375, "learning_rate": 0.0011999367565729687, "loss": 0.7996, "step": 7764 }, { "epoch": 0.20837805925289823, "grad_norm": 0.234375, "learning_rate": 0.0011999363478999848, "loss": 0.779, "step": 7765 }, { "epoch": 0.20840489480463717, "grad_norm": 0.25, "learning_rate": 0.001199935937910917, "loss": 0.8237, "step": 7766 }, { "epoch": 0.20843173035637613, "grad_norm": 0.2265625, "learning_rate": 0.0011999355266057664, "loss": 0.655, "step": 7767 }, { "epoch": 0.20845856590811507, "grad_norm": 0.236328125, "learning_rate": 0.0011999351139845335, "loss": 0.7809, "step": 7768 }, { "epoch": 0.208485401459854, "grad_norm": 0.2197265625, "learning_rate": 0.0011999347000472196, "loss": 0.7724, "step": 7769 }, { "epoch": 0.20851223701159297, "grad_norm": 0.23828125, "learning_rate": 0.0011999342847938254, "loss": 0.7982, "step": 7770 }, { "epoch": 0.2085390725633319, "grad_norm": 0.2216796875, "learning_rate": 0.001199933868224352, "loss": 0.696, "step": 7771 }, { "epoch": 0.20856590811507084, "grad_norm": 0.2265625, "learning_rate": 0.0011999334503388, "loss": 0.7423, "step": 7772 }, { "epoch": 0.2085927436668098, "grad_norm": 0.2216796875, "learning_rate": 0.0011999330311371708, "loss": 0.735, "step": 7773 }, { "epoch": 0.20861957921854873, "grad_norm": 0.220703125, "learning_rate": 0.0011999326106194646, "loss": 0.6973, "step": 7774 }, { "epoch": 0.20864641477028767, "grad_norm": 0.228515625, "learning_rate": 0.0011999321887856832, "loss": 0.8364, "step": 7775 }, { "epoch": 0.20867325032202663, "grad_norm": 0.22265625, "learning_rate": 0.0011999317656358269, "loss": 0.7223, "step": 7776 }, { "epoch": 0.20870008587376557, "grad_norm": 0.240234375, "learning_rate": 0.0011999313411698968, "loss": 0.7298, "step": 7777 }, { "epoch": 0.2087269214255045, "grad_norm": 0.2314453125, "learning_rate": 0.0011999309153878936, "loss": 0.7203, "step": 7778 }, { "epoch": 0.20875375697724347, "grad_norm": 0.2177734375, "learning_rate": 0.001199930488289819, "loss": 0.6999, "step": 7779 }, { "epoch": 0.2087805925289824, "grad_norm": 0.2392578125, "learning_rate": 0.001199930059875673, "loss": 0.77, "step": 7780 }, { "epoch": 0.20880742808072134, "grad_norm": 0.234375, "learning_rate": 0.001199929630145457, "loss": 0.7386, "step": 7781 }, { "epoch": 0.20883426363246027, "grad_norm": 0.2470703125, "learning_rate": 0.0011999291990991722, "loss": 0.8299, "step": 7782 }, { "epoch": 0.20886109918419923, "grad_norm": 0.2275390625, "learning_rate": 0.0011999287667368188, "loss": 0.7512, "step": 7783 }, { "epoch": 0.20888793473593817, "grad_norm": 0.2275390625, "learning_rate": 0.0011999283330583984, "loss": 0.7179, "step": 7784 }, { "epoch": 0.2089147702876771, "grad_norm": 0.228515625, "learning_rate": 0.0011999278980639118, "loss": 0.7938, "step": 7785 }, { "epoch": 0.20894160583941607, "grad_norm": 0.2490234375, "learning_rate": 0.0011999274617533599, "loss": 0.8231, "step": 7786 }, { "epoch": 0.208968441391155, "grad_norm": 0.240234375, "learning_rate": 0.0011999270241267437, "loss": 0.832, "step": 7787 }, { "epoch": 0.20899527694289394, "grad_norm": 0.240234375, "learning_rate": 0.0011999265851840638, "loss": 0.7867, "step": 7788 }, { "epoch": 0.2090221124946329, "grad_norm": 0.23046875, "learning_rate": 0.0011999261449253218, "loss": 0.7858, "step": 7789 }, { "epoch": 0.20904894804637184, "grad_norm": 0.251953125, "learning_rate": 0.0011999257033505181, "loss": 0.877, "step": 7790 }, { "epoch": 0.20907578359811077, "grad_norm": 0.2255859375, "learning_rate": 0.001199925260459654, "loss": 0.7696, "step": 7791 }, { "epoch": 0.20910261914984973, "grad_norm": 0.2216796875, "learning_rate": 0.0011999248162527303, "loss": 0.7081, "step": 7792 }, { "epoch": 0.20912945470158867, "grad_norm": 0.224609375, "learning_rate": 0.0011999243707297481, "loss": 0.7526, "step": 7793 }, { "epoch": 0.2091562902533276, "grad_norm": 0.2470703125, "learning_rate": 0.0011999239238907086, "loss": 0.8558, "step": 7794 }, { "epoch": 0.20918312580506654, "grad_norm": 0.2255859375, "learning_rate": 0.0011999234757356122, "loss": 0.6989, "step": 7795 }, { "epoch": 0.2092099613568055, "grad_norm": 0.216796875, "learning_rate": 0.0011999230262644603, "loss": 0.6701, "step": 7796 }, { "epoch": 0.20923679690854444, "grad_norm": 0.232421875, "learning_rate": 0.0011999225754772537, "loss": 0.7222, "step": 7797 }, { "epoch": 0.20926363246028337, "grad_norm": 0.234375, "learning_rate": 0.0011999221233739935, "loss": 0.7751, "step": 7798 }, { "epoch": 0.20929046801202233, "grad_norm": 0.2412109375, "learning_rate": 0.0011999216699546808, "loss": 0.7562, "step": 7799 }, { "epoch": 0.20931730356376127, "grad_norm": 0.2490234375, "learning_rate": 0.0011999212152193162, "loss": 0.7523, "step": 7800 }, { "epoch": 0.2093441391155002, "grad_norm": 0.234375, "learning_rate": 0.0011999207591679012, "loss": 0.7915, "step": 7801 }, { "epoch": 0.20937097466723917, "grad_norm": 0.21875, "learning_rate": 0.0011999203018004366, "loss": 0.6761, "step": 7802 }, { "epoch": 0.2093978102189781, "grad_norm": 0.23046875, "learning_rate": 0.001199919843116923, "loss": 0.7064, "step": 7803 }, { "epoch": 0.20942464577071704, "grad_norm": 0.25390625, "learning_rate": 0.001199919383117362, "loss": 0.9129, "step": 7804 }, { "epoch": 0.209451481322456, "grad_norm": 0.236328125, "learning_rate": 0.0011999189218017544, "loss": 0.7594, "step": 7805 }, { "epoch": 0.20947831687419494, "grad_norm": 0.24609375, "learning_rate": 0.0011999184591701012, "loss": 0.7895, "step": 7806 }, { "epoch": 0.20950515242593387, "grad_norm": 0.224609375, "learning_rate": 0.0011999179952224034, "loss": 0.7191, "step": 7807 }, { "epoch": 0.20953198797767283, "grad_norm": 0.2197265625, "learning_rate": 0.0011999175299586618, "loss": 0.6911, "step": 7808 }, { "epoch": 0.20955882352941177, "grad_norm": 0.21875, "learning_rate": 0.0011999170633788779, "loss": 0.6616, "step": 7809 }, { "epoch": 0.2095856590811507, "grad_norm": 0.240234375, "learning_rate": 0.0011999165954830523, "loss": 0.7358, "step": 7810 }, { "epoch": 0.20961249463288964, "grad_norm": 0.240234375, "learning_rate": 0.001199916126271186, "loss": 0.7358, "step": 7811 }, { "epoch": 0.2096393301846286, "grad_norm": 0.22265625, "learning_rate": 0.0011999156557432806, "loss": 0.621, "step": 7812 }, { "epoch": 0.20966616573636754, "grad_norm": 0.228515625, "learning_rate": 0.0011999151838993365, "loss": 0.7092, "step": 7813 }, { "epoch": 0.20969300128810647, "grad_norm": 0.2333984375, "learning_rate": 0.0011999147107393552, "loss": 0.7651, "step": 7814 }, { "epoch": 0.20971983683984544, "grad_norm": 0.255859375, "learning_rate": 0.0011999142362633375, "loss": 0.8003, "step": 7815 }, { "epoch": 0.20974667239158437, "grad_norm": 0.234375, "learning_rate": 0.0011999137604712844, "loss": 0.7893, "step": 7816 }, { "epoch": 0.2097735079433233, "grad_norm": 0.23828125, "learning_rate": 0.001199913283363197, "loss": 0.7595, "step": 7817 }, { "epoch": 0.20980034349506227, "grad_norm": 0.224609375, "learning_rate": 0.0011999128049390761, "loss": 0.7038, "step": 7818 }, { "epoch": 0.2098271790468012, "grad_norm": 0.234375, "learning_rate": 0.0011999123251989234, "loss": 0.7477, "step": 7819 }, { "epoch": 0.20985401459854014, "grad_norm": 0.21484375, "learning_rate": 0.0011999118441427393, "loss": 0.6592, "step": 7820 }, { "epoch": 0.2098808501502791, "grad_norm": 0.251953125, "learning_rate": 0.0011999113617705253, "loss": 0.7788, "step": 7821 }, { "epoch": 0.20990768570201804, "grad_norm": 0.23046875, "learning_rate": 0.0011999108780822822, "loss": 0.7292, "step": 7822 }, { "epoch": 0.20993452125375697, "grad_norm": 0.2216796875, "learning_rate": 0.0011999103930780111, "loss": 0.8025, "step": 7823 }, { "epoch": 0.2099613568054959, "grad_norm": 0.2333984375, "learning_rate": 0.001199909906757713, "loss": 0.7355, "step": 7824 }, { "epoch": 0.20998819235723487, "grad_norm": 0.2255859375, "learning_rate": 0.0011999094191213892, "loss": 0.6972, "step": 7825 }, { "epoch": 0.2100150279089738, "grad_norm": 0.2421875, "learning_rate": 0.0011999089301690405, "loss": 0.7955, "step": 7826 }, { "epoch": 0.21004186346071274, "grad_norm": 0.251953125, "learning_rate": 0.0011999084399006682, "loss": 0.8459, "step": 7827 }, { "epoch": 0.2100686990124517, "grad_norm": 0.2431640625, "learning_rate": 0.0011999079483162732, "loss": 0.8061, "step": 7828 }, { "epoch": 0.21009553456419064, "grad_norm": 0.2373046875, "learning_rate": 0.0011999074554158568, "loss": 0.8446, "step": 7829 }, { "epoch": 0.21012237011592957, "grad_norm": 0.234375, "learning_rate": 0.00119990696119942, "loss": 0.7574, "step": 7830 }, { "epoch": 0.21014920566766854, "grad_norm": 0.236328125, "learning_rate": 0.0011999064656669635, "loss": 0.7643, "step": 7831 }, { "epoch": 0.21017604121940747, "grad_norm": 0.2421875, "learning_rate": 0.0011999059688184888, "loss": 0.7697, "step": 7832 }, { "epoch": 0.2102028767711464, "grad_norm": 0.259765625, "learning_rate": 0.0011999054706539971, "loss": 0.7705, "step": 7833 }, { "epoch": 0.21022971232288537, "grad_norm": 0.244140625, "learning_rate": 0.0011999049711734892, "loss": 0.7156, "step": 7834 }, { "epoch": 0.2102565478746243, "grad_norm": 0.22265625, "learning_rate": 0.0011999044703769662, "loss": 0.7444, "step": 7835 }, { "epoch": 0.21028338342636324, "grad_norm": 0.2314453125, "learning_rate": 0.0011999039682644293, "loss": 0.7844, "step": 7836 }, { "epoch": 0.2103102189781022, "grad_norm": 0.23828125, "learning_rate": 0.0011999034648358799, "loss": 0.7403, "step": 7837 }, { "epoch": 0.21033705452984114, "grad_norm": 0.25390625, "learning_rate": 0.0011999029600913186, "loss": 0.8509, "step": 7838 }, { "epoch": 0.21036389008158007, "grad_norm": 0.259765625, "learning_rate": 0.0011999024540307465, "loss": 0.7739, "step": 7839 }, { "epoch": 0.210390725633319, "grad_norm": 0.2333984375, "learning_rate": 0.001199901946654165, "loss": 0.7302, "step": 7840 }, { "epoch": 0.21041756118505797, "grad_norm": 0.2392578125, "learning_rate": 0.0011999014379615752, "loss": 0.8087, "step": 7841 }, { "epoch": 0.2104443967367969, "grad_norm": 0.236328125, "learning_rate": 0.0011999009279529782, "loss": 0.7059, "step": 7842 }, { "epoch": 0.21047123228853584, "grad_norm": 0.25390625, "learning_rate": 0.0011999004166283749, "loss": 0.7919, "step": 7843 }, { "epoch": 0.2104980678402748, "grad_norm": 0.2333984375, "learning_rate": 0.0011998999039877667, "loss": 0.7997, "step": 7844 }, { "epoch": 0.21052490339201374, "grad_norm": 0.2412109375, "learning_rate": 0.0011998993900311544, "loss": 0.7117, "step": 7845 }, { "epoch": 0.21055173894375268, "grad_norm": 0.2392578125, "learning_rate": 0.0011998988747585395, "loss": 0.7389, "step": 7846 }, { "epoch": 0.21057857449549164, "grad_norm": 0.2255859375, "learning_rate": 0.001199898358169923, "loss": 0.739, "step": 7847 }, { "epoch": 0.21060541004723057, "grad_norm": 0.2275390625, "learning_rate": 0.0011998978402653058, "loss": 0.6596, "step": 7848 }, { "epoch": 0.2106322455989695, "grad_norm": 0.2333984375, "learning_rate": 0.0011998973210446894, "loss": 0.6579, "step": 7849 }, { "epoch": 0.21065908115070847, "grad_norm": 0.25390625, "learning_rate": 0.0011998968005080748, "loss": 0.8165, "step": 7850 }, { "epoch": 0.2106859167024474, "grad_norm": 0.236328125, "learning_rate": 0.001199896278655463, "loss": 0.7797, "step": 7851 }, { "epoch": 0.21071275225418634, "grad_norm": 0.255859375, "learning_rate": 0.0011998957554868555, "loss": 0.8151, "step": 7852 }, { "epoch": 0.21073958780592528, "grad_norm": 0.2451171875, "learning_rate": 0.001199895231002253, "loss": 0.8055, "step": 7853 }, { "epoch": 0.21076642335766424, "grad_norm": 0.2373046875, "learning_rate": 0.0011998947052016568, "loss": 0.7776, "step": 7854 }, { "epoch": 0.21079325890940318, "grad_norm": 0.251953125, "learning_rate": 0.001199894178085068, "loss": 0.7532, "step": 7855 }, { "epoch": 0.2108200944611421, "grad_norm": 0.25, "learning_rate": 0.001199893649652488, "loss": 0.7912, "step": 7856 }, { "epoch": 0.21084693001288107, "grad_norm": 0.2333984375, "learning_rate": 0.001199893119903918, "loss": 0.7515, "step": 7857 }, { "epoch": 0.21087376556462, "grad_norm": 0.2177734375, "learning_rate": 0.001199892588839359, "loss": 0.6776, "step": 7858 }, { "epoch": 0.21090060111635894, "grad_norm": 0.224609375, "learning_rate": 0.001199892056458812, "loss": 0.669, "step": 7859 }, { "epoch": 0.2109274366680979, "grad_norm": 0.240234375, "learning_rate": 0.0011998915227622781, "loss": 0.8282, "step": 7860 }, { "epoch": 0.21095427221983684, "grad_norm": 0.2265625, "learning_rate": 0.001199890987749759, "loss": 0.6759, "step": 7861 }, { "epoch": 0.21098110777157578, "grad_norm": 0.2578125, "learning_rate": 0.0011998904514212555, "loss": 0.7928, "step": 7862 }, { "epoch": 0.21100794332331474, "grad_norm": 0.232421875, "learning_rate": 0.0011998899137767687, "loss": 0.7046, "step": 7863 }, { "epoch": 0.21103477887505367, "grad_norm": 0.2421875, "learning_rate": 0.0011998893748163002, "loss": 0.7469, "step": 7864 }, { "epoch": 0.2110616144267926, "grad_norm": 0.236328125, "learning_rate": 0.0011998888345398507, "loss": 0.7158, "step": 7865 }, { "epoch": 0.21108844997853154, "grad_norm": 0.234375, "learning_rate": 0.0011998882929474214, "loss": 0.7413, "step": 7866 }, { "epoch": 0.2111152855302705, "grad_norm": 0.2177734375, "learning_rate": 0.0011998877500390141, "loss": 0.6825, "step": 7867 }, { "epoch": 0.21114212108200944, "grad_norm": 0.2216796875, "learning_rate": 0.0011998872058146292, "loss": 0.7027, "step": 7868 }, { "epoch": 0.21116895663374838, "grad_norm": 0.255859375, "learning_rate": 0.0011998866602742685, "loss": 0.7928, "step": 7869 }, { "epoch": 0.21119579218548734, "grad_norm": 0.244140625, "learning_rate": 0.0011998861134179329, "loss": 0.7213, "step": 7870 }, { "epoch": 0.21122262773722628, "grad_norm": 0.244140625, "learning_rate": 0.0011998855652456237, "loss": 0.8277, "step": 7871 }, { "epoch": 0.2112494632889652, "grad_norm": 0.2255859375, "learning_rate": 0.001199885015757342, "loss": 0.6909, "step": 7872 }, { "epoch": 0.21127629884070417, "grad_norm": 0.236328125, "learning_rate": 0.001199884464953089, "loss": 0.7843, "step": 7873 }, { "epoch": 0.2113031343924431, "grad_norm": 0.2265625, "learning_rate": 0.001199883912832866, "loss": 0.7409, "step": 7874 }, { "epoch": 0.21132996994418204, "grad_norm": 0.240234375, "learning_rate": 0.0011998833593966744, "loss": 0.7662, "step": 7875 }, { "epoch": 0.211356805495921, "grad_norm": 0.2353515625, "learning_rate": 0.0011998828046445149, "loss": 0.7033, "step": 7876 }, { "epoch": 0.21138364104765994, "grad_norm": 0.22265625, "learning_rate": 0.001199882248576389, "loss": 0.6802, "step": 7877 }, { "epoch": 0.21141047659939888, "grad_norm": 0.2255859375, "learning_rate": 0.0011998816911922981, "loss": 0.7414, "step": 7878 }, { "epoch": 0.21143731215113784, "grad_norm": 0.2470703125, "learning_rate": 0.0011998811324922434, "loss": 0.7534, "step": 7879 }, { "epoch": 0.21146414770287678, "grad_norm": 0.2353515625, "learning_rate": 0.0011998805724762257, "loss": 0.6964, "step": 7880 }, { "epoch": 0.2114909832546157, "grad_norm": 0.255859375, "learning_rate": 0.0011998800111442465, "loss": 0.8124, "step": 7881 }, { "epoch": 0.21151781880635465, "grad_norm": 0.25390625, "learning_rate": 0.0011998794484963074, "loss": 0.8238, "step": 7882 }, { "epoch": 0.2115446543580936, "grad_norm": 0.234375, "learning_rate": 0.0011998788845324088, "loss": 0.7321, "step": 7883 }, { "epoch": 0.21157148990983254, "grad_norm": 0.251953125, "learning_rate": 0.001199878319252553, "loss": 0.7902, "step": 7884 }, { "epoch": 0.21159832546157148, "grad_norm": 0.228515625, "learning_rate": 0.0011998777526567401, "loss": 0.6832, "step": 7885 }, { "epoch": 0.21162516101331044, "grad_norm": 0.2353515625, "learning_rate": 0.001199877184744972, "loss": 0.7629, "step": 7886 }, { "epoch": 0.21165199656504938, "grad_norm": 0.23828125, "learning_rate": 0.00119987661551725, "loss": 0.7184, "step": 7887 }, { "epoch": 0.2116788321167883, "grad_norm": 0.2392578125, "learning_rate": 0.001199876044973575, "loss": 0.7438, "step": 7888 }, { "epoch": 0.21170566766852728, "grad_norm": 0.2294921875, "learning_rate": 0.0011998754731139486, "loss": 0.6421, "step": 7889 }, { "epoch": 0.2117325032202662, "grad_norm": 0.232421875, "learning_rate": 0.001199874899938372, "loss": 0.7184, "step": 7890 }, { "epoch": 0.21175933877200515, "grad_norm": 0.23828125, "learning_rate": 0.0011998743254468462, "loss": 0.727, "step": 7891 }, { "epoch": 0.2117861743237441, "grad_norm": 0.240234375, "learning_rate": 0.0011998737496393727, "loss": 0.7768, "step": 7892 }, { "epoch": 0.21181300987548304, "grad_norm": 0.228515625, "learning_rate": 0.0011998731725159527, "loss": 0.7119, "step": 7893 }, { "epoch": 0.21183984542722198, "grad_norm": 0.23828125, "learning_rate": 0.0011998725940765872, "loss": 0.7912, "step": 7894 }, { "epoch": 0.21186668097896091, "grad_norm": 0.21875, "learning_rate": 0.001199872014321278, "loss": 0.6793, "step": 7895 }, { "epoch": 0.21189351653069988, "grad_norm": 0.2353515625, "learning_rate": 0.001199871433250026, "loss": 0.749, "step": 7896 }, { "epoch": 0.2119203520824388, "grad_norm": 0.2470703125, "learning_rate": 0.0011998708508628325, "loss": 0.7928, "step": 7897 }, { "epoch": 0.21194718763417775, "grad_norm": 0.234375, "learning_rate": 0.0011998702671596987, "loss": 0.7501, "step": 7898 }, { "epoch": 0.2119740231859167, "grad_norm": 0.2275390625, "learning_rate": 0.0011998696821406264, "loss": 0.7367, "step": 7899 }, { "epoch": 0.21200085873765565, "grad_norm": 0.2333984375, "learning_rate": 0.0011998690958056162, "loss": 0.7124, "step": 7900 }, { "epoch": 0.21202769428939458, "grad_norm": 0.2197265625, "learning_rate": 0.0011998685081546698, "loss": 0.6409, "step": 7901 }, { "epoch": 0.21205452984113354, "grad_norm": 0.2333984375, "learning_rate": 0.0011998679191877882, "loss": 0.7641, "step": 7902 }, { "epoch": 0.21208136539287248, "grad_norm": 0.248046875, "learning_rate": 0.001199867328904973, "loss": 0.8362, "step": 7903 }, { "epoch": 0.2121082009446114, "grad_norm": 0.220703125, "learning_rate": 0.0011998667373062256, "loss": 0.7005, "step": 7904 }, { "epoch": 0.21213503649635038, "grad_norm": 0.2216796875, "learning_rate": 0.0011998661443915468, "loss": 0.719, "step": 7905 }, { "epoch": 0.2121618720480893, "grad_norm": 0.2294921875, "learning_rate": 0.0011998655501609381, "loss": 0.7136, "step": 7906 }, { "epoch": 0.21218870759982825, "grad_norm": 0.2392578125, "learning_rate": 0.0011998649546144011, "loss": 0.7823, "step": 7907 }, { "epoch": 0.2122155431515672, "grad_norm": 0.2412109375, "learning_rate": 0.0011998643577519367, "loss": 0.7004, "step": 7908 }, { "epoch": 0.21224237870330614, "grad_norm": 0.255859375, "learning_rate": 0.0011998637595735464, "loss": 0.8421, "step": 7909 }, { "epoch": 0.21226921425504508, "grad_norm": 0.259765625, "learning_rate": 0.0011998631600792316, "loss": 0.9221, "step": 7910 }, { "epoch": 0.21229604980678402, "grad_norm": 0.244140625, "learning_rate": 0.0011998625592689932, "loss": 0.7954, "step": 7911 }, { "epoch": 0.21232288535852298, "grad_norm": 0.2177734375, "learning_rate": 0.001199861957142833, "loss": 0.6493, "step": 7912 }, { "epoch": 0.2123497209102619, "grad_norm": 0.24609375, "learning_rate": 0.0011998613537007525, "loss": 0.7757, "step": 7913 }, { "epoch": 0.21237655646200085, "grad_norm": 0.234375, "learning_rate": 0.0011998607489427523, "loss": 0.7704, "step": 7914 }, { "epoch": 0.2124033920137398, "grad_norm": 0.20703125, "learning_rate": 0.0011998601428688342, "loss": 0.6212, "step": 7915 }, { "epoch": 0.21243022756547875, "grad_norm": 0.2431640625, "learning_rate": 0.0011998595354789994, "loss": 0.8502, "step": 7916 }, { "epoch": 0.21245706311721768, "grad_norm": 0.2275390625, "learning_rate": 0.0011998589267732493, "loss": 0.7795, "step": 7917 }, { "epoch": 0.21248389866895664, "grad_norm": 0.2421875, "learning_rate": 0.0011998583167515853, "loss": 0.8033, "step": 7918 }, { "epoch": 0.21251073422069558, "grad_norm": 0.228515625, "learning_rate": 0.0011998577054140084, "loss": 0.704, "step": 7919 }, { "epoch": 0.21253756977243451, "grad_norm": 0.2099609375, "learning_rate": 0.0011998570927605203, "loss": 0.5969, "step": 7920 }, { "epoch": 0.21256440532417348, "grad_norm": 0.2255859375, "learning_rate": 0.001199856478791122, "loss": 0.6992, "step": 7921 }, { "epoch": 0.2125912408759124, "grad_norm": 0.2333984375, "learning_rate": 0.0011998558635058154, "loss": 0.7273, "step": 7922 }, { "epoch": 0.21261807642765135, "grad_norm": 0.23046875, "learning_rate": 0.0011998552469046015, "loss": 0.7364, "step": 7923 }, { "epoch": 0.21264491197939028, "grad_norm": 0.2353515625, "learning_rate": 0.0011998546289874812, "loss": 0.6803, "step": 7924 }, { "epoch": 0.21267174753112925, "grad_norm": 0.23828125, "learning_rate": 0.0011998540097544568, "loss": 0.6902, "step": 7925 }, { "epoch": 0.21269858308286818, "grad_norm": 0.25, "learning_rate": 0.0011998533892055292, "loss": 0.822, "step": 7926 }, { "epoch": 0.21272541863460712, "grad_norm": 0.2412109375, "learning_rate": 0.0011998527673406996, "loss": 0.7819, "step": 7927 }, { "epoch": 0.21275225418634608, "grad_norm": 0.240234375, "learning_rate": 0.0011998521441599695, "loss": 0.7848, "step": 7928 }, { "epoch": 0.21277908973808501, "grad_norm": 0.234375, "learning_rate": 0.0011998515196633402, "loss": 0.7805, "step": 7929 }, { "epoch": 0.21280592528982395, "grad_norm": 0.22265625, "learning_rate": 0.0011998508938508132, "loss": 0.7049, "step": 7930 }, { "epoch": 0.2128327608415629, "grad_norm": 0.2314453125, "learning_rate": 0.0011998502667223898, "loss": 0.6862, "step": 7931 }, { "epoch": 0.21285959639330185, "grad_norm": 0.2109375, "learning_rate": 0.0011998496382780715, "loss": 0.6584, "step": 7932 }, { "epoch": 0.21288643194504078, "grad_norm": 0.25, "learning_rate": 0.0011998490085178594, "loss": 0.7813, "step": 7933 }, { "epoch": 0.21291326749677975, "grad_norm": 0.2490234375, "learning_rate": 0.0011998483774417553, "loss": 0.8347, "step": 7934 }, { "epoch": 0.21294010304851868, "grad_norm": 0.2333984375, "learning_rate": 0.0011998477450497601, "loss": 0.7654, "step": 7935 }, { "epoch": 0.21296693860025762, "grad_norm": 0.2265625, "learning_rate": 0.0011998471113418756, "loss": 0.693, "step": 7936 }, { "epoch": 0.21299377415199658, "grad_norm": 0.2294921875, "learning_rate": 0.0011998464763181029, "loss": 0.6782, "step": 7937 }, { "epoch": 0.21302060970373551, "grad_norm": 0.2255859375, "learning_rate": 0.0011998458399784435, "loss": 0.7363, "step": 7938 }, { "epoch": 0.21304744525547445, "grad_norm": 0.2451171875, "learning_rate": 0.0011998452023228988, "loss": 0.7155, "step": 7939 }, { "epoch": 0.21307428080721338, "grad_norm": 0.2109375, "learning_rate": 0.0011998445633514703, "loss": 0.6561, "step": 7940 }, { "epoch": 0.21310111635895235, "grad_norm": 0.2392578125, "learning_rate": 0.0011998439230641593, "loss": 0.7679, "step": 7941 }, { "epoch": 0.21312795191069128, "grad_norm": 0.23828125, "learning_rate": 0.001199843281460967, "loss": 0.8518, "step": 7942 }, { "epoch": 0.21315478746243022, "grad_norm": 0.2490234375, "learning_rate": 0.0011998426385418954, "loss": 0.8464, "step": 7943 }, { "epoch": 0.21318162301416918, "grad_norm": 0.240234375, "learning_rate": 0.0011998419943069453, "loss": 0.8284, "step": 7944 }, { "epoch": 0.21320845856590812, "grad_norm": 0.2294921875, "learning_rate": 0.0011998413487561183, "loss": 0.713, "step": 7945 }, { "epoch": 0.21323529411764705, "grad_norm": 0.2373046875, "learning_rate": 0.001199840701889416, "loss": 0.8047, "step": 7946 }, { "epoch": 0.213262129669386, "grad_norm": 0.23046875, "learning_rate": 0.0011998400537068395, "loss": 0.725, "step": 7947 }, { "epoch": 0.21328896522112495, "grad_norm": 0.2431640625, "learning_rate": 0.0011998394042083906, "loss": 0.8297, "step": 7948 }, { "epoch": 0.21331580077286388, "grad_norm": 0.23828125, "learning_rate": 0.0011998387533940702, "loss": 0.7766, "step": 7949 }, { "epoch": 0.21334263632460285, "grad_norm": 0.244140625, "learning_rate": 0.0011998381012638805, "loss": 0.8341, "step": 7950 }, { "epoch": 0.21336947187634178, "grad_norm": 0.267578125, "learning_rate": 0.0011998374478178221, "loss": 0.8805, "step": 7951 }, { "epoch": 0.21339630742808072, "grad_norm": 0.2392578125, "learning_rate": 0.001199836793055897, "loss": 0.7678, "step": 7952 }, { "epoch": 0.21342314297981965, "grad_norm": 0.2373046875, "learning_rate": 0.0011998361369781063, "loss": 0.7414, "step": 7953 }, { "epoch": 0.21344997853155862, "grad_norm": 0.2431640625, "learning_rate": 0.0011998354795844516, "loss": 0.7224, "step": 7954 }, { "epoch": 0.21347681408329755, "grad_norm": 0.224609375, "learning_rate": 0.0011998348208749345, "loss": 0.7134, "step": 7955 }, { "epoch": 0.21350364963503649, "grad_norm": 0.2578125, "learning_rate": 0.0011998341608495562, "loss": 0.8717, "step": 7956 }, { "epoch": 0.21353048518677545, "grad_norm": 0.234375, "learning_rate": 0.0011998334995083181, "loss": 0.773, "step": 7957 }, { "epoch": 0.21355732073851438, "grad_norm": 0.228515625, "learning_rate": 0.001199832836851222, "loss": 0.7243, "step": 7958 }, { "epoch": 0.21358415629025332, "grad_norm": 0.251953125, "learning_rate": 0.001199832172878269, "loss": 0.8232, "step": 7959 }, { "epoch": 0.21361099184199228, "grad_norm": 0.224609375, "learning_rate": 0.0011998315075894609, "loss": 0.6754, "step": 7960 }, { "epoch": 0.21363782739373122, "grad_norm": 0.24609375, "learning_rate": 0.0011998308409847988, "loss": 0.821, "step": 7961 }, { "epoch": 0.21366466294547015, "grad_norm": 0.22265625, "learning_rate": 0.001199830173064284, "loss": 0.7481, "step": 7962 }, { "epoch": 0.21369149849720911, "grad_norm": 0.2353515625, "learning_rate": 0.0011998295038279185, "loss": 0.7627, "step": 7963 }, { "epoch": 0.21371833404894805, "grad_norm": 0.224609375, "learning_rate": 0.0011998288332757037, "loss": 0.6884, "step": 7964 }, { "epoch": 0.21374516960068699, "grad_norm": 0.2490234375, "learning_rate": 0.0011998281614076407, "loss": 0.7034, "step": 7965 }, { "epoch": 0.21377200515242592, "grad_norm": 0.23828125, "learning_rate": 0.0011998274882237315, "loss": 0.7226, "step": 7966 }, { "epoch": 0.21379884070416488, "grad_norm": 0.2412109375, "learning_rate": 0.0011998268137239768, "loss": 0.7577, "step": 7967 }, { "epoch": 0.21382567625590382, "grad_norm": 0.23828125, "learning_rate": 0.001199826137908379, "loss": 0.724, "step": 7968 }, { "epoch": 0.21385251180764275, "grad_norm": 0.259765625, "learning_rate": 0.0011998254607769388, "loss": 0.8677, "step": 7969 }, { "epoch": 0.21387934735938172, "grad_norm": 0.244140625, "learning_rate": 0.0011998247823296582, "loss": 0.664, "step": 7970 }, { "epoch": 0.21390618291112065, "grad_norm": 0.234375, "learning_rate": 0.0011998241025665383, "loss": 0.7372, "step": 7971 }, { "epoch": 0.2139330184628596, "grad_norm": 0.2353515625, "learning_rate": 0.001199823421487581, "loss": 0.76, "step": 7972 }, { "epoch": 0.21395985401459855, "grad_norm": 0.2412109375, "learning_rate": 0.0011998227390927875, "loss": 0.7568, "step": 7973 }, { "epoch": 0.21398668956633748, "grad_norm": 0.259765625, "learning_rate": 0.0011998220553821593, "loss": 0.8023, "step": 7974 }, { "epoch": 0.21401352511807642, "grad_norm": 0.22265625, "learning_rate": 0.001199821370355698, "loss": 0.7489, "step": 7975 }, { "epoch": 0.21404036066981538, "grad_norm": 0.234375, "learning_rate": 0.0011998206840134053, "loss": 0.8007, "step": 7976 }, { "epoch": 0.21406719622155432, "grad_norm": 0.26171875, "learning_rate": 0.001199819996355282, "loss": 0.8038, "step": 7977 }, { "epoch": 0.21409403177329325, "grad_norm": 0.2236328125, "learning_rate": 0.0011998193073813306, "loss": 0.7143, "step": 7978 }, { "epoch": 0.21412086732503222, "grad_norm": 0.2236328125, "learning_rate": 0.001199818617091552, "loss": 0.664, "step": 7979 }, { "epoch": 0.21414770287677115, "grad_norm": 0.251953125, "learning_rate": 0.0011998179254859478, "loss": 0.8419, "step": 7980 }, { "epoch": 0.2141745384285101, "grad_norm": 0.212890625, "learning_rate": 0.0011998172325645194, "loss": 0.5769, "step": 7981 }, { "epoch": 0.21420137398024902, "grad_norm": 0.240234375, "learning_rate": 0.0011998165383272686, "loss": 0.7874, "step": 7982 }, { "epoch": 0.21422820953198798, "grad_norm": 0.2470703125, "learning_rate": 0.0011998158427741967, "loss": 0.8896, "step": 7983 }, { "epoch": 0.21425504508372692, "grad_norm": 0.2470703125, "learning_rate": 0.0011998151459053053, "loss": 0.7721, "step": 7984 }, { "epoch": 0.21428188063546585, "grad_norm": 0.2216796875, "learning_rate": 0.001199814447720596, "loss": 0.6804, "step": 7985 }, { "epoch": 0.21430871618720482, "grad_norm": 0.2314453125, "learning_rate": 0.0011998137482200702, "loss": 0.7292, "step": 7986 }, { "epoch": 0.21433555173894375, "grad_norm": 0.2392578125, "learning_rate": 0.0011998130474037297, "loss": 0.7534, "step": 7987 }, { "epoch": 0.2143623872906827, "grad_norm": 0.251953125, "learning_rate": 0.0011998123452715756, "loss": 0.8594, "step": 7988 }, { "epoch": 0.21438922284242165, "grad_norm": 0.2314453125, "learning_rate": 0.00119981164182361, "loss": 0.6743, "step": 7989 }, { "epoch": 0.21441605839416059, "grad_norm": 0.248046875, "learning_rate": 0.0011998109370598337, "loss": 0.78, "step": 7990 }, { "epoch": 0.21444289394589952, "grad_norm": 0.232421875, "learning_rate": 0.001199810230980249, "loss": 0.74, "step": 7991 }, { "epoch": 0.21446972949763848, "grad_norm": 0.259765625, "learning_rate": 0.001199809523584857, "loss": 0.798, "step": 7992 }, { "epoch": 0.21449656504937742, "grad_norm": 0.2490234375, "learning_rate": 0.0011998088148736596, "loss": 0.7537, "step": 7993 }, { "epoch": 0.21452340060111635, "grad_norm": 0.2314453125, "learning_rate": 0.0011998081048466577, "loss": 0.7496, "step": 7994 }, { "epoch": 0.2145502361528553, "grad_norm": 0.24609375, "learning_rate": 0.0011998073935038534, "loss": 0.8341, "step": 7995 }, { "epoch": 0.21457707170459425, "grad_norm": 0.271484375, "learning_rate": 0.0011998066808452483, "loss": 0.9459, "step": 7996 }, { "epoch": 0.2146039072563332, "grad_norm": 0.2734375, "learning_rate": 0.0011998059668708438, "loss": 0.8118, "step": 7997 }, { "epoch": 0.21463074280807212, "grad_norm": 0.244140625, "learning_rate": 0.0011998052515806413, "loss": 0.8364, "step": 7998 }, { "epoch": 0.21465757835981109, "grad_norm": 0.22265625, "learning_rate": 0.0011998045349746426, "loss": 0.6996, "step": 7999 }, { "epoch": 0.21468441391155002, "grad_norm": 0.236328125, "learning_rate": 0.0011998038170528494, "loss": 0.7338, "step": 8000 }, { "epoch": 0.21471124946328896, "grad_norm": 0.2138671875, "learning_rate": 0.001199803097815263, "loss": 0.6588, "step": 8001 }, { "epoch": 0.21473808501502792, "grad_norm": 0.21875, "learning_rate": 0.001199802377261885, "loss": 0.7157, "step": 8002 }, { "epoch": 0.21476492056676685, "grad_norm": 0.236328125, "learning_rate": 0.001199801655392717, "loss": 0.7617, "step": 8003 }, { "epoch": 0.2147917561185058, "grad_norm": 0.2158203125, "learning_rate": 0.0011998009322077609, "loss": 0.7533, "step": 8004 }, { "epoch": 0.21481859167024475, "grad_norm": 0.2197265625, "learning_rate": 0.0011998002077070176, "loss": 0.687, "step": 8005 }, { "epoch": 0.2148454272219837, "grad_norm": 0.2216796875, "learning_rate": 0.0011997994818904894, "loss": 0.6876, "step": 8006 }, { "epoch": 0.21487226277372262, "grad_norm": 0.255859375, "learning_rate": 0.0011997987547581776, "loss": 0.8315, "step": 8007 }, { "epoch": 0.21489909832546158, "grad_norm": 0.2333984375, "learning_rate": 0.0011997980263100836, "loss": 0.6857, "step": 8008 }, { "epoch": 0.21492593387720052, "grad_norm": 0.2314453125, "learning_rate": 0.0011997972965462095, "loss": 0.7696, "step": 8009 }, { "epoch": 0.21495276942893946, "grad_norm": 0.2412109375, "learning_rate": 0.0011997965654665563, "loss": 0.837, "step": 8010 }, { "epoch": 0.2149796049806784, "grad_norm": 0.23046875, "learning_rate": 0.001199795833071126, "loss": 0.7534, "step": 8011 }, { "epoch": 0.21500644053241735, "grad_norm": 0.2265625, "learning_rate": 0.0011997950993599203, "loss": 0.7403, "step": 8012 }, { "epoch": 0.2150332760841563, "grad_norm": 0.2236328125, "learning_rate": 0.0011997943643329405, "loss": 0.7622, "step": 8013 }, { "epoch": 0.21506011163589522, "grad_norm": 0.2275390625, "learning_rate": 0.001199793627990188, "loss": 0.7227, "step": 8014 }, { "epoch": 0.2150869471876342, "grad_norm": 0.240234375, "learning_rate": 0.001199792890331665, "loss": 0.7881, "step": 8015 }, { "epoch": 0.21511378273937312, "grad_norm": 0.216796875, "learning_rate": 0.001199792151357373, "loss": 0.6854, "step": 8016 }, { "epoch": 0.21514061829111206, "grad_norm": 0.2158203125, "learning_rate": 0.0011997914110673132, "loss": 0.6612, "step": 8017 }, { "epoch": 0.21516745384285102, "grad_norm": 0.2431640625, "learning_rate": 0.0011997906694614876, "loss": 0.8849, "step": 8018 }, { "epoch": 0.21519428939458995, "grad_norm": 0.251953125, "learning_rate": 0.001199789926539898, "loss": 0.7703, "step": 8019 }, { "epoch": 0.2152211249463289, "grad_norm": 0.236328125, "learning_rate": 0.0011997891823025456, "loss": 0.7979, "step": 8020 }, { "epoch": 0.21524796049806785, "grad_norm": 0.2470703125, "learning_rate": 0.001199788436749432, "loss": 0.7702, "step": 8021 }, { "epoch": 0.2152747960498068, "grad_norm": 0.2177734375, "learning_rate": 0.0011997876898805593, "loss": 0.6663, "step": 8022 }, { "epoch": 0.21530163160154572, "grad_norm": 0.2265625, "learning_rate": 0.0011997869416959287, "loss": 0.7511, "step": 8023 }, { "epoch": 0.21532846715328466, "grad_norm": 0.248046875, "learning_rate": 0.001199786192195542, "loss": 0.8742, "step": 8024 }, { "epoch": 0.21535530270502362, "grad_norm": 0.2216796875, "learning_rate": 0.001199785441379401, "loss": 0.7382, "step": 8025 }, { "epoch": 0.21538213825676256, "grad_norm": 0.244140625, "learning_rate": 0.0011997846892475071, "loss": 0.8959, "step": 8026 }, { "epoch": 0.2154089738085015, "grad_norm": 0.2373046875, "learning_rate": 0.0011997839357998621, "loss": 0.6896, "step": 8027 }, { "epoch": 0.21543580936024045, "grad_norm": 0.23828125, "learning_rate": 0.0011997831810364676, "loss": 0.8294, "step": 8028 }, { "epoch": 0.2154626449119794, "grad_norm": 0.2216796875, "learning_rate": 0.0011997824249573251, "loss": 0.7029, "step": 8029 }, { "epoch": 0.21548948046371832, "grad_norm": 0.220703125, "learning_rate": 0.0011997816675624366, "loss": 0.6695, "step": 8030 }, { "epoch": 0.2155163160154573, "grad_norm": 0.2177734375, "learning_rate": 0.0011997809088518034, "loss": 0.7335, "step": 8031 }, { "epoch": 0.21554315156719622, "grad_norm": 0.232421875, "learning_rate": 0.0011997801488254276, "loss": 0.7387, "step": 8032 }, { "epoch": 0.21556998711893516, "grad_norm": 0.2421875, "learning_rate": 0.0011997793874833104, "loss": 0.7901, "step": 8033 }, { "epoch": 0.21559682267067412, "grad_norm": 0.2216796875, "learning_rate": 0.0011997786248254538, "loss": 0.7048, "step": 8034 }, { "epoch": 0.21562365822241306, "grad_norm": 0.234375, "learning_rate": 0.0011997778608518594, "loss": 0.7752, "step": 8035 }, { "epoch": 0.215650493774152, "grad_norm": 0.236328125, "learning_rate": 0.0011997770955625286, "loss": 0.7216, "step": 8036 }, { "epoch": 0.21567732932589095, "grad_norm": 0.2294921875, "learning_rate": 0.0011997763289574635, "loss": 0.6912, "step": 8037 }, { "epoch": 0.2157041648776299, "grad_norm": 0.2265625, "learning_rate": 0.0011997755610366656, "loss": 0.6959, "step": 8038 }, { "epoch": 0.21573100042936882, "grad_norm": 0.208984375, "learning_rate": 0.0011997747918001363, "loss": 0.6246, "step": 8039 }, { "epoch": 0.21575783598110776, "grad_norm": 0.236328125, "learning_rate": 0.0011997740212478778, "loss": 0.7276, "step": 8040 }, { "epoch": 0.21578467153284672, "grad_norm": 0.2294921875, "learning_rate": 0.0011997732493798916, "loss": 0.7879, "step": 8041 }, { "epoch": 0.21581150708458566, "grad_norm": 0.216796875, "learning_rate": 0.0011997724761961793, "loss": 0.7164, "step": 8042 }, { "epoch": 0.2158383426363246, "grad_norm": 0.2392578125, "learning_rate": 0.0011997717016967427, "loss": 0.7909, "step": 8043 }, { "epoch": 0.21586517818806356, "grad_norm": 0.2294921875, "learning_rate": 0.0011997709258815834, "loss": 0.7187, "step": 8044 }, { "epoch": 0.2158920137398025, "grad_norm": 0.228515625, "learning_rate": 0.001199770148750703, "loss": 0.6996, "step": 8045 }, { "epoch": 0.21591884929154143, "grad_norm": 0.2333984375, "learning_rate": 0.0011997693703041034, "loss": 0.6905, "step": 8046 }, { "epoch": 0.2159456848432804, "grad_norm": 0.23828125, "learning_rate": 0.0011997685905417864, "loss": 0.7861, "step": 8047 }, { "epoch": 0.21597252039501932, "grad_norm": 0.2392578125, "learning_rate": 0.0011997678094637534, "loss": 0.7941, "step": 8048 }, { "epoch": 0.21599935594675826, "grad_norm": 0.2265625, "learning_rate": 0.0011997670270700064, "loss": 0.7228, "step": 8049 }, { "epoch": 0.21602619149849722, "grad_norm": 0.251953125, "learning_rate": 0.001199766243360547, "loss": 0.8317, "step": 8050 }, { "epoch": 0.21605302705023616, "grad_norm": 0.2421875, "learning_rate": 0.001199765458335377, "loss": 0.7139, "step": 8051 }, { "epoch": 0.2160798626019751, "grad_norm": 0.244140625, "learning_rate": 0.0011997646719944979, "loss": 0.7282, "step": 8052 }, { "epoch": 0.21610669815371403, "grad_norm": 0.25, "learning_rate": 0.0011997638843379115, "loss": 0.7656, "step": 8053 }, { "epoch": 0.216133533705453, "grad_norm": 0.2119140625, "learning_rate": 0.0011997630953656197, "loss": 0.6944, "step": 8054 }, { "epoch": 0.21616036925719193, "grad_norm": 0.21875, "learning_rate": 0.001199762305077624, "loss": 0.7154, "step": 8055 }, { "epoch": 0.21618720480893086, "grad_norm": 0.251953125, "learning_rate": 0.0011997615134739263, "loss": 0.8766, "step": 8056 }, { "epoch": 0.21621404036066982, "grad_norm": 0.248046875, "learning_rate": 0.0011997607205545284, "loss": 0.807, "step": 8057 }, { "epoch": 0.21624087591240876, "grad_norm": 0.263671875, "learning_rate": 0.0011997599263194319, "loss": 0.7222, "step": 8058 }, { "epoch": 0.2162677114641477, "grad_norm": 0.25, "learning_rate": 0.0011997591307686384, "loss": 0.7956, "step": 8059 }, { "epoch": 0.21629454701588666, "grad_norm": 0.251953125, "learning_rate": 0.00119975833390215, "loss": 0.8523, "step": 8060 }, { "epoch": 0.2163213825676256, "grad_norm": 0.23828125, "learning_rate": 0.0011997575357199682, "loss": 0.7813, "step": 8061 }, { "epoch": 0.21634821811936453, "grad_norm": 0.2490234375, "learning_rate": 0.001199756736222095, "loss": 0.7847, "step": 8062 }, { "epoch": 0.2163750536711035, "grad_norm": 0.2451171875, "learning_rate": 0.0011997559354085317, "loss": 0.6579, "step": 8063 }, { "epoch": 0.21640188922284243, "grad_norm": 0.248046875, "learning_rate": 0.0011997551332792805, "loss": 0.7066, "step": 8064 }, { "epoch": 0.21642872477458136, "grad_norm": 0.2255859375, "learning_rate": 0.001199754329834343, "loss": 0.7648, "step": 8065 }, { "epoch": 0.2164555603263203, "grad_norm": 0.2216796875, "learning_rate": 0.0011997535250737208, "loss": 0.7444, "step": 8066 }, { "epoch": 0.21648239587805926, "grad_norm": 0.234375, "learning_rate": 0.001199752718997416, "loss": 0.6883, "step": 8067 }, { "epoch": 0.2165092314297982, "grad_norm": 0.263671875, "learning_rate": 0.00119975191160543, "loss": 0.9265, "step": 8068 }, { "epoch": 0.21653606698153713, "grad_norm": 0.2353515625, "learning_rate": 0.0011997511028977647, "loss": 0.6827, "step": 8069 }, { "epoch": 0.2165629025332761, "grad_norm": 0.2265625, "learning_rate": 0.0011997502928744223, "loss": 0.7161, "step": 8070 }, { "epoch": 0.21658973808501503, "grad_norm": 0.228515625, "learning_rate": 0.001199749481535404, "loss": 0.8246, "step": 8071 }, { "epoch": 0.21661657363675396, "grad_norm": 0.26953125, "learning_rate": 0.0011997486688807117, "loss": 0.8449, "step": 8072 }, { "epoch": 0.21664340918849292, "grad_norm": 0.2470703125, "learning_rate": 0.0011997478549103473, "loss": 0.7443, "step": 8073 }, { "epoch": 0.21667024474023186, "grad_norm": 0.255859375, "learning_rate": 0.0011997470396243127, "loss": 0.7281, "step": 8074 }, { "epoch": 0.2166970802919708, "grad_norm": 0.2333984375, "learning_rate": 0.0011997462230226093, "loss": 0.7619, "step": 8075 }, { "epoch": 0.21672391584370976, "grad_norm": 0.2431640625, "learning_rate": 0.0011997454051052394, "loss": 0.7238, "step": 8076 }, { "epoch": 0.2167507513954487, "grad_norm": 0.2470703125, "learning_rate": 0.0011997445858722043, "loss": 0.7213, "step": 8077 }, { "epoch": 0.21677758694718763, "grad_norm": 0.2373046875, "learning_rate": 0.001199743765323506, "loss": 0.7152, "step": 8078 }, { "epoch": 0.2168044224989266, "grad_norm": 0.23828125, "learning_rate": 0.0011997429434591467, "loss": 0.7084, "step": 8079 }, { "epoch": 0.21683125805066553, "grad_norm": 0.244140625, "learning_rate": 0.0011997421202791273, "loss": 0.8229, "step": 8080 }, { "epoch": 0.21685809360240446, "grad_norm": 0.2177734375, "learning_rate": 0.0011997412957834505, "loss": 0.6722, "step": 8081 }, { "epoch": 0.2168849291541434, "grad_norm": 0.2421875, "learning_rate": 0.0011997404699721178, "loss": 0.8162, "step": 8082 }, { "epoch": 0.21691176470588236, "grad_norm": 0.251953125, "learning_rate": 0.0011997396428451307, "loss": 0.6784, "step": 8083 }, { "epoch": 0.2169386002576213, "grad_norm": 0.2451171875, "learning_rate": 0.0011997388144024913, "loss": 0.8586, "step": 8084 }, { "epoch": 0.21696543580936023, "grad_norm": 0.255859375, "learning_rate": 0.0011997379846442014, "loss": 0.8574, "step": 8085 }, { "epoch": 0.2169922713610992, "grad_norm": 0.25, "learning_rate": 0.001199737153570263, "loss": 0.7696, "step": 8086 }, { "epoch": 0.21701910691283813, "grad_norm": 0.2451171875, "learning_rate": 0.0011997363211806776, "loss": 0.7896, "step": 8087 }, { "epoch": 0.21704594246457706, "grad_norm": 0.2294921875, "learning_rate": 0.0011997354874754471, "loss": 0.7479, "step": 8088 }, { "epoch": 0.21707277801631603, "grad_norm": 0.244140625, "learning_rate": 0.0011997346524545735, "loss": 0.7484, "step": 8089 }, { "epoch": 0.21709961356805496, "grad_norm": 0.2470703125, "learning_rate": 0.0011997338161180585, "loss": 0.7593, "step": 8090 }, { "epoch": 0.2171264491197939, "grad_norm": 0.232421875, "learning_rate": 0.0011997329784659038, "loss": 0.775, "step": 8091 }, { "epoch": 0.21715328467153286, "grad_norm": 0.216796875, "learning_rate": 0.0011997321394981116, "loss": 0.6978, "step": 8092 }, { "epoch": 0.2171801202232718, "grad_norm": 0.23828125, "learning_rate": 0.0011997312992146835, "loss": 0.8109, "step": 8093 }, { "epoch": 0.21720695577501073, "grad_norm": 0.2373046875, "learning_rate": 0.0011997304576156212, "loss": 0.7926, "step": 8094 }, { "epoch": 0.21723379132674966, "grad_norm": 0.2294921875, "learning_rate": 0.0011997296147009267, "loss": 0.7126, "step": 8095 }, { "epoch": 0.21726062687848863, "grad_norm": 0.2265625, "learning_rate": 0.0011997287704706022, "loss": 0.6798, "step": 8096 }, { "epoch": 0.21728746243022756, "grad_norm": 0.2255859375, "learning_rate": 0.001199727924924649, "loss": 0.7097, "step": 8097 }, { "epoch": 0.2173142979819665, "grad_norm": 0.2392578125, "learning_rate": 0.0011997270780630693, "loss": 0.7654, "step": 8098 }, { "epoch": 0.21734113353370546, "grad_norm": 0.2578125, "learning_rate": 0.0011997262298858646, "loss": 0.8986, "step": 8099 }, { "epoch": 0.2173679690854444, "grad_norm": 0.240234375, "learning_rate": 0.0011997253803930372, "loss": 0.7804, "step": 8100 }, { "epoch": 0.21739480463718333, "grad_norm": 0.234375, "learning_rate": 0.0011997245295845887, "loss": 0.7409, "step": 8101 }, { "epoch": 0.2174216401889223, "grad_norm": 0.23046875, "learning_rate": 0.0011997236774605208, "loss": 0.7011, "step": 8102 }, { "epoch": 0.21744847574066123, "grad_norm": 0.2294921875, "learning_rate": 0.001199722824020836, "loss": 0.7534, "step": 8103 }, { "epoch": 0.21747531129240016, "grad_norm": 0.23828125, "learning_rate": 0.0011997219692655354, "loss": 0.7816, "step": 8104 }, { "epoch": 0.21750214684413913, "grad_norm": 0.23828125, "learning_rate": 0.0011997211131946214, "loss": 0.7415, "step": 8105 }, { "epoch": 0.21752898239587806, "grad_norm": 0.234375, "learning_rate": 0.0011997202558080957, "loss": 0.7581, "step": 8106 }, { "epoch": 0.217555817947617, "grad_norm": 0.2373046875, "learning_rate": 0.0011997193971059602, "loss": 0.7852, "step": 8107 }, { "epoch": 0.21758265349935596, "grad_norm": 0.2197265625, "learning_rate": 0.0011997185370882168, "loss": 0.6986, "step": 8108 }, { "epoch": 0.2176094890510949, "grad_norm": 0.224609375, "learning_rate": 0.0011997176757548675, "loss": 0.6892, "step": 8109 }, { "epoch": 0.21763632460283383, "grad_norm": 0.2265625, "learning_rate": 0.0011997168131059137, "loss": 0.7075, "step": 8110 }, { "epoch": 0.21766316015457277, "grad_norm": 0.2431640625, "learning_rate": 0.001199715949141358, "loss": 0.7777, "step": 8111 }, { "epoch": 0.21768999570631173, "grad_norm": 0.212890625, "learning_rate": 0.0011997150838612017, "loss": 0.6684, "step": 8112 }, { "epoch": 0.21771683125805066, "grad_norm": 0.2451171875, "learning_rate": 0.0011997142172654471, "loss": 0.7974, "step": 8113 }, { "epoch": 0.2177436668097896, "grad_norm": 0.23046875, "learning_rate": 0.001199713349354096, "loss": 0.7618, "step": 8114 }, { "epoch": 0.21777050236152856, "grad_norm": 0.2197265625, "learning_rate": 0.0011997124801271502, "loss": 0.7336, "step": 8115 }, { "epoch": 0.2177973379132675, "grad_norm": 0.2060546875, "learning_rate": 0.0011997116095846115, "loss": 0.6722, "step": 8116 }, { "epoch": 0.21782417346500643, "grad_norm": 0.2216796875, "learning_rate": 0.0011997107377264822, "loss": 0.6884, "step": 8117 }, { "epoch": 0.2178510090167454, "grad_norm": 0.279296875, "learning_rate": 0.0011997098645527638, "loss": 0.8217, "step": 8118 }, { "epoch": 0.21787784456848433, "grad_norm": 0.328125, "learning_rate": 0.0011997089900634586, "loss": 0.8754, "step": 8119 }, { "epoch": 0.21790468012022327, "grad_norm": 0.28515625, "learning_rate": 0.001199708114258568, "loss": 0.9649, "step": 8120 }, { "epoch": 0.21793151567196223, "grad_norm": 0.3359375, "learning_rate": 0.0011997072371380945, "loss": 0.8145, "step": 8121 }, { "epoch": 0.21795835122370116, "grad_norm": 0.353515625, "learning_rate": 0.0011997063587020397, "loss": 0.7548, "step": 8122 }, { "epoch": 0.2179851867754401, "grad_norm": 0.27734375, "learning_rate": 0.0011997054789504055, "loss": 0.7659, "step": 8123 }, { "epoch": 0.21801202232717903, "grad_norm": 0.28125, "learning_rate": 0.001199704597883194, "loss": 0.9206, "step": 8124 }, { "epoch": 0.218038857878918, "grad_norm": 0.298828125, "learning_rate": 0.001199703715500407, "loss": 0.7998, "step": 8125 }, { "epoch": 0.21806569343065693, "grad_norm": 0.298828125, "learning_rate": 0.0011997028318020465, "loss": 0.7876, "step": 8126 }, { "epoch": 0.21809252898239587, "grad_norm": 0.267578125, "learning_rate": 0.0011997019467881143, "loss": 0.7407, "step": 8127 }, { "epoch": 0.21811936453413483, "grad_norm": 0.26171875, "learning_rate": 0.0011997010604586128, "loss": 0.9476, "step": 8128 }, { "epoch": 0.21814620008587376, "grad_norm": 0.236328125, "learning_rate": 0.0011997001728135434, "loss": 0.6997, "step": 8129 }, { "epoch": 0.2181730356376127, "grad_norm": 0.271484375, "learning_rate": 0.001199699283852908, "loss": 0.7116, "step": 8130 }, { "epoch": 0.21819987118935166, "grad_norm": 0.28515625, "learning_rate": 0.0011996983935767091, "loss": 0.8704, "step": 8131 }, { "epoch": 0.2182267067410906, "grad_norm": 0.24609375, "learning_rate": 0.0011996975019849484, "loss": 0.7035, "step": 8132 }, { "epoch": 0.21825354229282953, "grad_norm": 0.24609375, "learning_rate": 0.0011996966090776276, "loss": 0.8372, "step": 8133 }, { "epoch": 0.2182803778445685, "grad_norm": 0.259765625, "learning_rate": 0.0011996957148547492, "loss": 0.8587, "step": 8134 }, { "epoch": 0.21830721339630743, "grad_norm": 0.265625, "learning_rate": 0.0011996948193163145, "loss": 0.8976, "step": 8135 }, { "epoch": 0.21833404894804637, "grad_norm": 0.2451171875, "learning_rate": 0.001199693922462326, "loss": 0.6961, "step": 8136 }, { "epoch": 0.21836088449978533, "grad_norm": 0.25390625, "learning_rate": 0.0011996930242927854, "loss": 0.9021, "step": 8137 }, { "epoch": 0.21838772005152426, "grad_norm": 0.2451171875, "learning_rate": 0.0011996921248076948, "loss": 0.7895, "step": 8138 }, { "epoch": 0.2184145556032632, "grad_norm": 0.263671875, "learning_rate": 0.0011996912240070559, "loss": 0.8569, "step": 8139 }, { "epoch": 0.21844139115500213, "grad_norm": 0.251953125, "learning_rate": 0.001199690321890871, "loss": 0.7771, "step": 8140 }, { "epoch": 0.2184682267067411, "grad_norm": 0.259765625, "learning_rate": 0.0011996894184591422, "loss": 0.9052, "step": 8141 }, { "epoch": 0.21849506225848003, "grad_norm": 0.2734375, "learning_rate": 0.001199688513711871, "loss": 0.8543, "step": 8142 }, { "epoch": 0.21852189781021897, "grad_norm": 0.24609375, "learning_rate": 0.0011996876076490596, "loss": 0.7991, "step": 8143 }, { "epoch": 0.21854873336195793, "grad_norm": 0.2490234375, "learning_rate": 0.0011996867002707103, "loss": 0.7605, "step": 8144 }, { "epoch": 0.21857556891369687, "grad_norm": 0.224609375, "learning_rate": 0.0011996857915768245, "loss": 0.6946, "step": 8145 }, { "epoch": 0.2186024044654358, "grad_norm": 0.255859375, "learning_rate": 0.0011996848815674045, "loss": 0.762, "step": 8146 }, { "epoch": 0.21862924001717476, "grad_norm": 0.244140625, "learning_rate": 0.0011996839702424524, "loss": 0.7935, "step": 8147 }, { "epoch": 0.2186560755689137, "grad_norm": 0.2421875, "learning_rate": 0.0011996830576019703, "loss": 0.879, "step": 8148 }, { "epoch": 0.21868291112065263, "grad_norm": 0.236328125, "learning_rate": 0.0011996821436459596, "loss": 0.8256, "step": 8149 }, { "epoch": 0.2187097466723916, "grad_norm": 0.2333984375, "learning_rate": 0.0011996812283744229, "loss": 0.7731, "step": 8150 }, { "epoch": 0.21873658222413053, "grad_norm": 0.244140625, "learning_rate": 0.001199680311787362, "loss": 0.854, "step": 8151 }, { "epoch": 0.21876341777586947, "grad_norm": 0.26171875, "learning_rate": 0.001199679393884779, "loss": 0.828, "step": 8152 }, { "epoch": 0.2187902533276084, "grad_norm": 0.2373046875, "learning_rate": 0.0011996784746666758, "loss": 0.8472, "step": 8153 }, { "epoch": 0.21881708887934737, "grad_norm": 0.25390625, "learning_rate": 0.0011996775541330545, "loss": 0.8279, "step": 8154 }, { "epoch": 0.2188439244310863, "grad_norm": 0.22265625, "learning_rate": 0.001199676632283917, "loss": 0.7348, "step": 8155 }, { "epoch": 0.21887075998282524, "grad_norm": 0.25390625, "learning_rate": 0.0011996757091192656, "loss": 0.91, "step": 8156 }, { "epoch": 0.2188975955345642, "grad_norm": 0.232421875, "learning_rate": 0.0011996747846391017, "loss": 0.7371, "step": 8157 }, { "epoch": 0.21892443108630313, "grad_norm": 0.234375, "learning_rate": 0.001199673858843428, "loss": 0.7519, "step": 8158 }, { "epoch": 0.21895126663804207, "grad_norm": 0.2255859375, "learning_rate": 0.0011996729317322462, "loss": 0.775, "step": 8159 }, { "epoch": 0.21897810218978103, "grad_norm": 0.2431640625, "learning_rate": 0.0011996720033055587, "loss": 0.8024, "step": 8160 }, { "epoch": 0.21900493774151997, "grad_norm": 0.2080078125, "learning_rate": 0.0011996710735633668, "loss": 0.6327, "step": 8161 }, { "epoch": 0.2190317732932589, "grad_norm": 0.2294921875, "learning_rate": 0.0011996701425056732, "loss": 0.7517, "step": 8162 }, { "epoch": 0.21905860884499787, "grad_norm": 0.2470703125, "learning_rate": 0.00119966921013248, "loss": 0.8454, "step": 8163 }, { "epoch": 0.2190854443967368, "grad_norm": 0.2421875, "learning_rate": 0.0011996682764437887, "loss": 0.7864, "step": 8164 }, { "epoch": 0.21911227994847574, "grad_norm": 0.263671875, "learning_rate": 0.0011996673414396016, "loss": 0.9299, "step": 8165 }, { "epoch": 0.21913911550021467, "grad_norm": 0.2412109375, "learning_rate": 0.001199666405119921, "loss": 0.8244, "step": 8166 }, { "epoch": 0.21916595105195363, "grad_norm": 0.234375, "learning_rate": 0.0011996654674847485, "loss": 0.7847, "step": 8167 }, { "epoch": 0.21919278660369257, "grad_norm": 0.2490234375, "learning_rate": 0.0011996645285340865, "loss": 0.8286, "step": 8168 }, { "epoch": 0.2192196221554315, "grad_norm": 0.2314453125, "learning_rate": 0.0011996635882679367, "loss": 0.6837, "step": 8169 }, { "epoch": 0.21924645770717047, "grad_norm": 0.2578125, "learning_rate": 0.0011996626466863017, "loss": 0.8356, "step": 8170 }, { "epoch": 0.2192732932589094, "grad_norm": 0.2236328125, "learning_rate": 0.001199661703789183, "loss": 0.6409, "step": 8171 }, { "epoch": 0.21930012881064834, "grad_norm": 0.216796875, "learning_rate": 0.0011996607595765832, "loss": 0.7224, "step": 8172 }, { "epoch": 0.2193269643623873, "grad_norm": 0.251953125, "learning_rate": 0.0011996598140485042, "loss": 0.8298, "step": 8173 }, { "epoch": 0.21935379991412624, "grad_norm": 0.251953125, "learning_rate": 0.001199658867204948, "loss": 0.8583, "step": 8174 }, { "epoch": 0.21938063546586517, "grad_norm": 0.26171875, "learning_rate": 0.0011996579190459164, "loss": 0.9037, "step": 8175 }, { "epoch": 0.21940747101760413, "grad_norm": 0.255859375, "learning_rate": 0.0011996569695714118, "loss": 0.8294, "step": 8176 }, { "epoch": 0.21943430656934307, "grad_norm": 0.232421875, "learning_rate": 0.0011996560187814364, "loss": 0.7446, "step": 8177 }, { "epoch": 0.219461142121082, "grad_norm": 0.2255859375, "learning_rate": 0.0011996550666759918, "loss": 0.7168, "step": 8178 }, { "epoch": 0.21948797767282097, "grad_norm": 0.2373046875, "learning_rate": 0.0011996541132550807, "loss": 0.8183, "step": 8179 }, { "epoch": 0.2195148132245599, "grad_norm": 0.2255859375, "learning_rate": 0.0011996531585187049, "loss": 0.7463, "step": 8180 }, { "epoch": 0.21954164877629884, "grad_norm": 0.2470703125, "learning_rate": 0.0011996522024668665, "loss": 0.8592, "step": 8181 }, { "epoch": 0.21956848432803777, "grad_norm": 0.2421875, "learning_rate": 0.0011996512450995673, "loss": 0.7965, "step": 8182 }, { "epoch": 0.21959531987977673, "grad_norm": 0.224609375, "learning_rate": 0.0011996502864168098, "loss": 0.6923, "step": 8183 }, { "epoch": 0.21962215543151567, "grad_norm": 0.2333984375, "learning_rate": 0.001199649326418596, "loss": 0.8104, "step": 8184 }, { "epoch": 0.2196489909832546, "grad_norm": 0.23828125, "learning_rate": 0.001199648365104928, "loss": 0.7693, "step": 8185 }, { "epoch": 0.21967582653499357, "grad_norm": 0.244140625, "learning_rate": 0.0011996474024758082, "loss": 0.8711, "step": 8186 }, { "epoch": 0.2197026620867325, "grad_norm": 0.2294921875, "learning_rate": 0.0011996464385312382, "loss": 0.691, "step": 8187 }, { "epoch": 0.21972949763847144, "grad_norm": 0.234375, "learning_rate": 0.0011996454732712202, "loss": 0.7178, "step": 8188 }, { "epoch": 0.2197563331902104, "grad_norm": 0.224609375, "learning_rate": 0.0011996445066957567, "loss": 0.7251, "step": 8189 }, { "epoch": 0.21978316874194934, "grad_norm": 0.228515625, "learning_rate": 0.0011996435388048493, "loss": 0.7614, "step": 8190 }, { "epoch": 0.21981000429368827, "grad_norm": 0.23828125, "learning_rate": 0.0011996425695985006, "loss": 0.7187, "step": 8191 }, { "epoch": 0.21983683984542723, "grad_norm": 0.2333984375, "learning_rate": 0.0011996415990767125, "loss": 0.7927, "step": 8192 }, { "epoch": 0.21986367539716617, "grad_norm": 0.224609375, "learning_rate": 0.0011996406272394872, "loss": 0.6902, "step": 8193 }, { "epoch": 0.2198905109489051, "grad_norm": 0.23828125, "learning_rate": 0.0011996396540868265, "loss": 0.8051, "step": 8194 }, { "epoch": 0.21991734650064404, "grad_norm": 0.2412109375, "learning_rate": 0.001199638679618733, "loss": 0.8664, "step": 8195 }, { "epoch": 0.219944182052383, "grad_norm": 0.2255859375, "learning_rate": 0.0011996377038352088, "loss": 0.7506, "step": 8196 }, { "epoch": 0.21997101760412194, "grad_norm": 0.2421875, "learning_rate": 0.0011996367267362556, "loss": 0.8468, "step": 8197 }, { "epoch": 0.21999785315586087, "grad_norm": 0.2431640625, "learning_rate": 0.0011996357483218761, "loss": 0.8051, "step": 8198 }, { "epoch": 0.22002468870759984, "grad_norm": 0.25390625, "learning_rate": 0.001199634768592072, "loss": 0.8495, "step": 8199 }, { "epoch": 0.22005152425933877, "grad_norm": 0.244140625, "learning_rate": 0.0011996337875468457, "loss": 0.855, "step": 8200 }, { "epoch": 0.2200783598110777, "grad_norm": 0.2265625, "learning_rate": 0.0011996328051861994, "loss": 0.8125, "step": 8201 }, { "epoch": 0.22010519536281667, "grad_norm": 0.25, "learning_rate": 0.001199631821510135, "loss": 0.8692, "step": 8202 }, { "epoch": 0.2201320309145556, "grad_norm": 0.220703125, "learning_rate": 0.0011996308365186548, "loss": 0.6905, "step": 8203 }, { "epoch": 0.22015886646629454, "grad_norm": 0.2490234375, "learning_rate": 0.001199629850211761, "loss": 0.8863, "step": 8204 }, { "epoch": 0.2201857020180335, "grad_norm": 0.2275390625, "learning_rate": 0.0011996288625894557, "loss": 0.7881, "step": 8205 }, { "epoch": 0.22021253756977244, "grad_norm": 0.2265625, "learning_rate": 0.001199627873651741, "loss": 0.7322, "step": 8206 }, { "epoch": 0.22023937312151137, "grad_norm": 0.2470703125, "learning_rate": 0.0011996268833986195, "loss": 0.9111, "step": 8207 }, { "epoch": 0.22026620867325034, "grad_norm": 0.22265625, "learning_rate": 0.0011996258918300926, "loss": 0.7085, "step": 8208 }, { "epoch": 0.22029304422498927, "grad_norm": 0.232421875, "learning_rate": 0.0011996248989461632, "loss": 0.706, "step": 8209 }, { "epoch": 0.2203198797767282, "grad_norm": 0.23046875, "learning_rate": 0.001199623904746833, "loss": 0.7374, "step": 8210 }, { "epoch": 0.22034671532846714, "grad_norm": 0.2373046875, "learning_rate": 0.0011996229092321043, "loss": 0.8272, "step": 8211 }, { "epoch": 0.2203735508802061, "grad_norm": 0.23046875, "learning_rate": 0.0011996219124019796, "loss": 0.7246, "step": 8212 }, { "epoch": 0.22040038643194504, "grad_norm": 0.248046875, "learning_rate": 0.0011996209142564607, "loss": 0.7929, "step": 8213 }, { "epoch": 0.22042722198368397, "grad_norm": 0.2392578125, "learning_rate": 0.00119961991479555, "loss": 0.7994, "step": 8214 }, { "epoch": 0.22045405753542294, "grad_norm": 0.232421875, "learning_rate": 0.0011996189140192496, "loss": 0.746, "step": 8215 }, { "epoch": 0.22048089308716187, "grad_norm": 0.2236328125, "learning_rate": 0.0011996179119275617, "loss": 0.7429, "step": 8216 }, { "epoch": 0.2205077286389008, "grad_norm": 0.2421875, "learning_rate": 0.0011996169085204886, "loss": 0.8375, "step": 8217 }, { "epoch": 0.22053456419063977, "grad_norm": 0.2578125, "learning_rate": 0.001199615903798032, "loss": 0.856, "step": 8218 }, { "epoch": 0.2205613997423787, "grad_norm": 0.2421875, "learning_rate": 0.001199614897760195, "loss": 0.672, "step": 8219 }, { "epoch": 0.22058823529411764, "grad_norm": 0.2314453125, "learning_rate": 0.001199613890406979, "loss": 0.6658, "step": 8220 }, { "epoch": 0.2206150708458566, "grad_norm": 0.224609375, "learning_rate": 0.0011996128817383868, "loss": 0.7091, "step": 8221 }, { "epoch": 0.22064190639759554, "grad_norm": 0.2373046875, "learning_rate": 0.0011996118717544203, "loss": 0.768, "step": 8222 }, { "epoch": 0.22066874194933447, "grad_norm": 0.232421875, "learning_rate": 0.0011996108604550816, "loss": 0.7564, "step": 8223 }, { "epoch": 0.2206955775010734, "grad_norm": 0.212890625, "learning_rate": 0.0011996098478403733, "loss": 0.6834, "step": 8224 }, { "epoch": 0.22072241305281237, "grad_norm": 0.2392578125, "learning_rate": 0.0011996088339102972, "loss": 0.8634, "step": 8225 }, { "epoch": 0.2207492486045513, "grad_norm": 0.2412109375, "learning_rate": 0.0011996078186648558, "loss": 0.7878, "step": 8226 }, { "epoch": 0.22077608415629024, "grad_norm": 0.2421875, "learning_rate": 0.0011996068021040511, "loss": 0.9007, "step": 8227 }, { "epoch": 0.2208029197080292, "grad_norm": 0.240234375, "learning_rate": 0.0011996057842278856, "loss": 0.8572, "step": 8228 }, { "epoch": 0.22082975525976814, "grad_norm": 0.236328125, "learning_rate": 0.0011996047650363615, "loss": 0.8678, "step": 8229 }, { "epoch": 0.22085659081150708, "grad_norm": 0.2353515625, "learning_rate": 0.0011996037445294809, "loss": 0.7689, "step": 8230 }, { "epoch": 0.22088342636324604, "grad_norm": 0.255859375, "learning_rate": 0.0011996027227072462, "loss": 0.8184, "step": 8231 }, { "epoch": 0.22091026191498497, "grad_norm": 0.25, "learning_rate": 0.0011996016995696594, "loss": 0.8931, "step": 8232 }, { "epoch": 0.2209370974667239, "grad_norm": 0.2138671875, "learning_rate": 0.0011996006751167228, "loss": 0.7027, "step": 8233 }, { "epoch": 0.22096393301846287, "grad_norm": 0.236328125, "learning_rate": 0.0011995996493484387, "loss": 0.7986, "step": 8234 }, { "epoch": 0.2209907685702018, "grad_norm": 0.2421875, "learning_rate": 0.0011995986222648098, "loss": 0.848, "step": 8235 }, { "epoch": 0.22101760412194074, "grad_norm": 0.25390625, "learning_rate": 0.0011995975938658375, "loss": 0.916, "step": 8236 }, { "epoch": 0.2210444396736797, "grad_norm": 0.255859375, "learning_rate": 0.0011995965641515246, "loss": 0.7906, "step": 8237 }, { "epoch": 0.22107127522541864, "grad_norm": 0.2294921875, "learning_rate": 0.0011995955331218732, "loss": 0.763, "step": 8238 }, { "epoch": 0.22109811077715757, "grad_norm": 0.2197265625, "learning_rate": 0.0011995945007768857, "loss": 0.7449, "step": 8239 }, { "epoch": 0.2211249463288965, "grad_norm": 0.24609375, "learning_rate": 0.001199593467116564, "loss": 0.8946, "step": 8240 }, { "epoch": 0.22115178188063547, "grad_norm": 0.2578125, "learning_rate": 0.0011995924321409111, "loss": 0.856, "step": 8241 }, { "epoch": 0.2211786174323744, "grad_norm": 0.2333984375, "learning_rate": 0.0011995913958499286, "loss": 0.7295, "step": 8242 }, { "epoch": 0.22120545298411334, "grad_norm": 0.26171875, "learning_rate": 0.001199590358243619, "loss": 0.9548, "step": 8243 }, { "epoch": 0.2212322885358523, "grad_norm": 0.25, "learning_rate": 0.0011995893193219843, "loss": 0.878, "step": 8244 }, { "epoch": 0.22125912408759124, "grad_norm": 0.2314453125, "learning_rate": 0.0011995882790850271, "loss": 0.823, "step": 8245 }, { "epoch": 0.22128595963933018, "grad_norm": 0.2333984375, "learning_rate": 0.00119958723753275, "loss": 0.7648, "step": 8246 }, { "epoch": 0.22131279519106914, "grad_norm": 0.25, "learning_rate": 0.0011995861946651545, "loss": 0.8086, "step": 8247 }, { "epoch": 0.22133963074280807, "grad_norm": 0.2333984375, "learning_rate": 0.0011995851504822436, "loss": 0.7282, "step": 8248 }, { "epoch": 0.221366466294547, "grad_norm": 0.2314453125, "learning_rate": 0.001199584104984019, "loss": 0.7631, "step": 8249 }, { "epoch": 0.22139330184628597, "grad_norm": 0.234375, "learning_rate": 0.0011995830581704836, "loss": 0.7384, "step": 8250 }, { "epoch": 0.2214201373980249, "grad_norm": 0.2197265625, "learning_rate": 0.001199582010041639, "loss": 0.7083, "step": 8251 }, { "epoch": 0.22144697294976384, "grad_norm": 0.2353515625, "learning_rate": 0.001199580960597488, "loss": 0.8448, "step": 8252 }, { "epoch": 0.22147380850150278, "grad_norm": 0.2421875, "learning_rate": 0.001199579909838033, "loss": 0.8072, "step": 8253 }, { "epoch": 0.22150064405324174, "grad_norm": 0.2275390625, "learning_rate": 0.0011995788577632757, "loss": 0.7396, "step": 8254 }, { "epoch": 0.22152747960498068, "grad_norm": 0.2373046875, "learning_rate": 0.001199577804373219, "loss": 0.7413, "step": 8255 }, { "epoch": 0.2215543151567196, "grad_norm": 0.220703125, "learning_rate": 0.001199576749667865, "loss": 0.7005, "step": 8256 }, { "epoch": 0.22158115070845857, "grad_norm": 0.2490234375, "learning_rate": 0.0011995756936472159, "loss": 0.8362, "step": 8257 }, { "epoch": 0.2216079862601975, "grad_norm": 0.2412109375, "learning_rate": 0.0011995746363112744, "loss": 0.8216, "step": 8258 }, { "epoch": 0.22163482181193644, "grad_norm": 0.2470703125, "learning_rate": 0.0011995735776600422, "loss": 0.825, "step": 8259 }, { "epoch": 0.2216616573636754, "grad_norm": 0.220703125, "learning_rate": 0.0011995725176935223, "loss": 0.7022, "step": 8260 }, { "epoch": 0.22168849291541434, "grad_norm": 0.23828125, "learning_rate": 0.0011995714564117163, "loss": 0.8651, "step": 8261 }, { "epoch": 0.22171532846715328, "grad_norm": 0.2265625, "learning_rate": 0.0011995703938146272, "loss": 0.7736, "step": 8262 }, { "epoch": 0.22174216401889224, "grad_norm": 0.2294921875, "learning_rate": 0.001199569329902257, "loss": 0.7045, "step": 8263 }, { "epoch": 0.22176899957063118, "grad_norm": 0.263671875, "learning_rate": 0.001199568264674608, "loss": 0.8925, "step": 8264 }, { "epoch": 0.2217958351223701, "grad_norm": 0.23828125, "learning_rate": 0.0011995671981316826, "loss": 0.8273, "step": 8265 }, { "epoch": 0.22182267067410905, "grad_norm": 0.2255859375, "learning_rate": 0.0011995661302734834, "loss": 0.8253, "step": 8266 }, { "epoch": 0.221849506225848, "grad_norm": 0.25, "learning_rate": 0.0011995650611000123, "loss": 0.8107, "step": 8267 }, { "epoch": 0.22187634177758694, "grad_norm": 0.2392578125, "learning_rate": 0.0011995639906112721, "loss": 0.8015, "step": 8268 }, { "epoch": 0.22190317732932588, "grad_norm": 0.2373046875, "learning_rate": 0.0011995629188072646, "loss": 0.8121, "step": 8269 }, { "epoch": 0.22193001288106484, "grad_norm": 0.23046875, "learning_rate": 0.0011995618456879925, "loss": 0.7415, "step": 8270 }, { "epoch": 0.22195684843280378, "grad_norm": 0.2451171875, "learning_rate": 0.0011995607712534581, "loss": 0.7995, "step": 8271 }, { "epoch": 0.2219836839845427, "grad_norm": 0.2109375, "learning_rate": 0.001199559695503664, "loss": 0.6174, "step": 8272 }, { "epoch": 0.22201051953628168, "grad_norm": 0.23046875, "learning_rate": 0.001199558618438612, "loss": 0.7448, "step": 8273 }, { "epoch": 0.2220373550880206, "grad_norm": 0.22265625, "learning_rate": 0.0011995575400583051, "loss": 0.7444, "step": 8274 }, { "epoch": 0.22206419063975955, "grad_norm": 0.2177734375, "learning_rate": 0.001199556460362745, "loss": 0.731, "step": 8275 }, { "epoch": 0.2220910261914985, "grad_norm": 0.2392578125, "learning_rate": 0.0011995553793519348, "loss": 0.7006, "step": 8276 }, { "epoch": 0.22211786174323744, "grad_norm": 0.2294921875, "learning_rate": 0.0011995542970258761, "loss": 0.6914, "step": 8277 }, { "epoch": 0.22214469729497638, "grad_norm": 0.2158203125, "learning_rate": 0.0011995532133845719, "loss": 0.6662, "step": 8278 }, { "epoch": 0.22217153284671534, "grad_norm": 0.2412109375, "learning_rate": 0.0011995521284280243, "loss": 0.8323, "step": 8279 }, { "epoch": 0.22219836839845428, "grad_norm": 0.21875, "learning_rate": 0.0011995510421562356, "loss": 0.705, "step": 8280 }, { "epoch": 0.2222252039501932, "grad_norm": 0.23046875, "learning_rate": 0.0011995499545692085, "loss": 0.7383, "step": 8281 }, { "epoch": 0.22225203950193215, "grad_norm": 0.25, "learning_rate": 0.0011995488656669452, "loss": 0.8769, "step": 8282 }, { "epoch": 0.2222788750536711, "grad_norm": 0.2470703125, "learning_rate": 0.001199547775449448, "loss": 0.8102, "step": 8283 }, { "epoch": 0.22230571060541005, "grad_norm": 0.2353515625, "learning_rate": 0.0011995466839167192, "loss": 0.7856, "step": 8284 }, { "epoch": 0.22233254615714898, "grad_norm": 0.236328125, "learning_rate": 0.0011995455910687616, "loss": 0.7934, "step": 8285 }, { "epoch": 0.22235938170888794, "grad_norm": 0.2197265625, "learning_rate": 0.0011995444969055773, "loss": 0.6584, "step": 8286 }, { "epoch": 0.22238621726062688, "grad_norm": 0.2470703125, "learning_rate": 0.0011995434014271688, "loss": 0.8543, "step": 8287 }, { "epoch": 0.2224130528123658, "grad_norm": 0.2138671875, "learning_rate": 0.0011995423046335383, "loss": 0.6466, "step": 8288 }, { "epoch": 0.22243988836410478, "grad_norm": 0.2392578125, "learning_rate": 0.0011995412065246886, "loss": 0.7878, "step": 8289 }, { "epoch": 0.2224667239158437, "grad_norm": 0.2294921875, "learning_rate": 0.0011995401071006216, "loss": 0.7504, "step": 8290 }, { "epoch": 0.22249355946758265, "grad_norm": 0.2294921875, "learning_rate": 0.0011995390063613402, "loss": 0.755, "step": 8291 }, { "epoch": 0.2225203950193216, "grad_norm": 0.244140625, "learning_rate": 0.0011995379043068467, "loss": 0.8724, "step": 8292 }, { "epoch": 0.22254723057106054, "grad_norm": 0.2412109375, "learning_rate": 0.0011995368009371432, "loss": 0.8687, "step": 8293 }, { "epoch": 0.22257406612279948, "grad_norm": 0.240234375, "learning_rate": 0.0011995356962522324, "loss": 0.7691, "step": 8294 }, { "epoch": 0.22260090167453842, "grad_norm": 0.259765625, "learning_rate": 0.0011995345902521166, "loss": 0.7664, "step": 8295 }, { "epoch": 0.22262773722627738, "grad_norm": 0.259765625, "learning_rate": 0.0011995334829367984, "loss": 0.8478, "step": 8296 }, { "epoch": 0.2226545727780163, "grad_norm": 0.240234375, "learning_rate": 0.00119953237430628, "loss": 0.7444, "step": 8297 }, { "epoch": 0.22268140832975525, "grad_norm": 0.2431640625, "learning_rate": 0.0011995312643605642, "loss": 0.8445, "step": 8298 }, { "epoch": 0.2227082438814942, "grad_norm": 0.244140625, "learning_rate": 0.0011995301530996533, "loss": 0.9037, "step": 8299 }, { "epoch": 0.22273507943323315, "grad_norm": 0.265625, "learning_rate": 0.001199529040523549, "loss": 0.815, "step": 8300 }, { "epoch": 0.22276191498497208, "grad_norm": 0.236328125, "learning_rate": 0.001199527926632255, "loss": 0.7332, "step": 8301 }, { "epoch": 0.22278875053671104, "grad_norm": 0.25, "learning_rate": 0.001199526811425773, "loss": 0.8643, "step": 8302 }, { "epoch": 0.22281558608844998, "grad_norm": 0.2333984375, "learning_rate": 0.0011995256949041053, "loss": 0.7831, "step": 8303 }, { "epoch": 0.22284242164018891, "grad_norm": 0.23046875, "learning_rate": 0.0011995245770672546, "loss": 0.6887, "step": 8304 }, { "epoch": 0.22286925719192788, "grad_norm": 0.2177734375, "learning_rate": 0.0011995234579152236, "loss": 0.6875, "step": 8305 }, { "epoch": 0.2228960927436668, "grad_norm": 0.25390625, "learning_rate": 0.0011995223374480146, "loss": 0.7899, "step": 8306 }, { "epoch": 0.22292292829540575, "grad_norm": 0.259765625, "learning_rate": 0.0011995212156656297, "loss": 0.8263, "step": 8307 }, { "epoch": 0.2229497638471447, "grad_norm": 0.255859375, "learning_rate": 0.0011995200925680716, "loss": 0.8491, "step": 8308 }, { "epoch": 0.22297659939888365, "grad_norm": 0.2197265625, "learning_rate": 0.0011995189681553431, "loss": 0.6617, "step": 8309 }, { "epoch": 0.22300343495062258, "grad_norm": 0.232421875, "learning_rate": 0.0011995178424274461, "loss": 0.749, "step": 8310 }, { "epoch": 0.22303027050236152, "grad_norm": 0.2412109375, "learning_rate": 0.0011995167153843833, "loss": 0.7029, "step": 8311 }, { "epoch": 0.22305710605410048, "grad_norm": 0.25390625, "learning_rate": 0.0011995155870261575, "loss": 0.8078, "step": 8312 }, { "epoch": 0.22308394160583941, "grad_norm": 0.2421875, "learning_rate": 0.0011995144573527707, "loss": 0.7777, "step": 8313 }, { "epoch": 0.22311077715757835, "grad_norm": 0.2177734375, "learning_rate": 0.0011995133263642255, "loss": 0.7429, "step": 8314 }, { "epoch": 0.2231376127093173, "grad_norm": 0.2490234375, "learning_rate": 0.0011995121940605246, "loss": 0.8731, "step": 8315 }, { "epoch": 0.22316444826105625, "grad_norm": 0.2490234375, "learning_rate": 0.0011995110604416702, "loss": 0.8089, "step": 8316 }, { "epoch": 0.22319128381279518, "grad_norm": 0.240234375, "learning_rate": 0.001199509925507665, "loss": 0.835, "step": 8317 }, { "epoch": 0.22321811936453415, "grad_norm": 0.23046875, "learning_rate": 0.0011995087892585114, "loss": 0.763, "step": 8318 }, { "epoch": 0.22324495491627308, "grad_norm": 0.23828125, "learning_rate": 0.0011995076516942119, "loss": 0.758, "step": 8319 }, { "epoch": 0.22327179046801202, "grad_norm": 0.255859375, "learning_rate": 0.0011995065128147689, "loss": 0.8785, "step": 8320 }, { "epoch": 0.22329862601975098, "grad_norm": 0.23828125, "learning_rate": 0.001199505372620185, "loss": 0.7889, "step": 8321 }, { "epoch": 0.2233254615714899, "grad_norm": 0.25390625, "learning_rate": 0.0011995042311104627, "loss": 0.8748, "step": 8322 }, { "epoch": 0.22335229712322885, "grad_norm": 0.248046875, "learning_rate": 0.0011995030882856045, "loss": 0.7371, "step": 8323 }, { "epoch": 0.22337913267496778, "grad_norm": 0.2451171875, "learning_rate": 0.0011995019441456128, "loss": 0.8128, "step": 8324 }, { "epoch": 0.22340596822670675, "grad_norm": 0.232421875, "learning_rate": 0.0011995007986904903, "loss": 0.8142, "step": 8325 }, { "epoch": 0.22343280377844568, "grad_norm": 0.232421875, "learning_rate": 0.0011994996519202395, "loss": 0.821, "step": 8326 }, { "epoch": 0.22345963933018462, "grad_norm": 0.23046875, "learning_rate": 0.001199498503834863, "loss": 0.735, "step": 8327 }, { "epoch": 0.22348647488192358, "grad_norm": 0.255859375, "learning_rate": 0.0011994973544343628, "loss": 0.8249, "step": 8328 }, { "epoch": 0.22351331043366252, "grad_norm": 0.25390625, "learning_rate": 0.001199496203718742, "loss": 0.8246, "step": 8329 }, { "epoch": 0.22354014598540145, "grad_norm": 0.2353515625, "learning_rate": 0.001199495051688003, "loss": 0.7787, "step": 8330 }, { "epoch": 0.2235669815371404, "grad_norm": 0.2353515625, "learning_rate": 0.0011994938983421477, "loss": 0.7478, "step": 8331 }, { "epoch": 0.22359381708887935, "grad_norm": 0.2197265625, "learning_rate": 0.0011994927436811797, "loss": 0.726, "step": 8332 }, { "epoch": 0.22362065264061828, "grad_norm": 0.25, "learning_rate": 0.0011994915877051007, "loss": 0.7993, "step": 8333 }, { "epoch": 0.22364748819235725, "grad_norm": 0.2421875, "learning_rate": 0.0011994904304139138, "loss": 0.7401, "step": 8334 }, { "epoch": 0.22367432374409618, "grad_norm": 0.26171875, "learning_rate": 0.001199489271807621, "loss": 0.8682, "step": 8335 }, { "epoch": 0.22370115929583512, "grad_norm": 0.232421875, "learning_rate": 0.0011994881118862252, "loss": 0.6932, "step": 8336 }, { "epoch": 0.22372799484757408, "grad_norm": 0.22265625, "learning_rate": 0.001199486950649729, "loss": 0.6983, "step": 8337 }, { "epoch": 0.22375483039931301, "grad_norm": 0.2392578125, "learning_rate": 0.0011994857880981346, "loss": 0.7892, "step": 8338 }, { "epoch": 0.22378166595105195, "grad_norm": 0.251953125, "learning_rate": 0.0011994846242314448, "loss": 0.7485, "step": 8339 }, { "epoch": 0.22380850150279089, "grad_norm": 0.2392578125, "learning_rate": 0.001199483459049662, "loss": 0.7889, "step": 8340 }, { "epoch": 0.22383533705452985, "grad_norm": 0.2353515625, "learning_rate": 0.0011994822925527893, "loss": 0.7523, "step": 8341 }, { "epoch": 0.22386217260626878, "grad_norm": 0.25, "learning_rate": 0.0011994811247408284, "loss": 0.8841, "step": 8342 }, { "epoch": 0.22388900815800772, "grad_norm": 0.2333984375, "learning_rate": 0.0011994799556137824, "loss": 0.7954, "step": 8343 }, { "epoch": 0.22391584370974668, "grad_norm": 0.2412109375, "learning_rate": 0.0011994787851716537, "loss": 0.8466, "step": 8344 }, { "epoch": 0.22394267926148562, "grad_norm": 0.2373046875, "learning_rate": 0.001199477613414445, "loss": 0.8378, "step": 8345 }, { "epoch": 0.22396951481322455, "grad_norm": 0.2197265625, "learning_rate": 0.0011994764403421587, "loss": 0.7275, "step": 8346 }, { "epoch": 0.22399635036496351, "grad_norm": 0.2294921875, "learning_rate": 0.0011994752659547974, "loss": 0.7816, "step": 8347 }, { "epoch": 0.22402318591670245, "grad_norm": 0.21875, "learning_rate": 0.0011994740902523638, "loss": 0.7117, "step": 8348 }, { "epoch": 0.22405002146844138, "grad_norm": 0.2333984375, "learning_rate": 0.0011994729132348606, "loss": 0.7971, "step": 8349 }, { "epoch": 0.22407685702018035, "grad_norm": 0.24609375, "learning_rate": 0.00119947173490229, "loss": 0.8249, "step": 8350 }, { "epoch": 0.22410369257191928, "grad_norm": 0.2421875, "learning_rate": 0.001199470555254655, "loss": 0.7578, "step": 8351 }, { "epoch": 0.22413052812365822, "grad_norm": 0.2265625, "learning_rate": 0.0011994693742919578, "loss": 0.7466, "step": 8352 }, { "epoch": 0.22415736367539715, "grad_norm": 0.2734375, "learning_rate": 0.001199468192014201, "loss": 0.9306, "step": 8353 }, { "epoch": 0.22418419922713612, "grad_norm": 0.224609375, "learning_rate": 0.0011994670084213876, "loss": 0.7381, "step": 8354 }, { "epoch": 0.22421103477887505, "grad_norm": 0.2294921875, "learning_rate": 0.0011994658235135198, "loss": 0.7599, "step": 8355 }, { "epoch": 0.224237870330614, "grad_norm": 0.2333984375, "learning_rate": 0.0011994646372906004, "loss": 0.7825, "step": 8356 }, { "epoch": 0.22426470588235295, "grad_norm": 0.234375, "learning_rate": 0.0011994634497526318, "loss": 0.8209, "step": 8357 }, { "epoch": 0.22429154143409188, "grad_norm": 0.2421875, "learning_rate": 0.0011994622608996171, "loss": 0.8192, "step": 8358 }, { "epoch": 0.22431837698583082, "grad_norm": 0.2294921875, "learning_rate": 0.0011994610707315584, "loss": 0.6798, "step": 8359 }, { "epoch": 0.22434521253756978, "grad_norm": 0.24609375, "learning_rate": 0.0011994598792484585, "loss": 0.8863, "step": 8360 }, { "epoch": 0.22437204808930872, "grad_norm": 0.2451171875, "learning_rate": 0.0011994586864503198, "loss": 0.8621, "step": 8361 }, { "epoch": 0.22439888364104765, "grad_norm": 0.2265625, "learning_rate": 0.0011994574923371454, "loss": 0.7254, "step": 8362 }, { "epoch": 0.22442571919278662, "grad_norm": 0.2412109375, "learning_rate": 0.0011994562969089373, "loss": 0.7849, "step": 8363 }, { "epoch": 0.22445255474452555, "grad_norm": 0.248046875, "learning_rate": 0.0011994551001656987, "loss": 0.8332, "step": 8364 }, { "epoch": 0.22447939029626449, "grad_norm": 0.2392578125, "learning_rate": 0.0011994539021074318, "loss": 0.7144, "step": 8365 }, { "epoch": 0.22450622584800342, "grad_norm": 0.25, "learning_rate": 0.0011994527027341395, "loss": 0.751, "step": 8366 }, { "epoch": 0.22453306139974238, "grad_norm": 0.2451171875, "learning_rate": 0.0011994515020458244, "loss": 0.8027, "step": 8367 }, { "epoch": 0.22455989695148132, "grad_norm": 0.2236328125, "learning_rate": 0.001199450300042489, "loss": 0.7536, "step": 8368 }, { "epoch": 0.22458673250322025, "grad_norm": 0.236328125, "learning_rate": 0.001199449096724136, "loss": 0.757, "step": 8369 }, { "epoch": 0.22461356805495922, "grad_norm": 0.2255859375, "learning_rate": 0.001199447892090768, "loss": 0.7028, "step": 8370 }, { "epoch": 0.22464040360669815, "grad_norm": 0.2353515625, "learning_rate": 0.0011994466861423878, "loss": 0.6917, "step": 8371 }, { "epoch": 0.2246672391584371, "grad_norm": 0.2275390625, "learning_rate": 0.0011994454788789978, "loss": 0.709, "step": 8372 }, { "epoch": 0.22469407471017605, "grad_norm": 0.248046875, "learning_rate": 0.0011994442703006007, "loss": 0.8626, "step": 8373 }, { "epoch": 0.22472091026191499, "grad_norm": 0.228515625, "learning_rate": 0.0011994430604071995, "loss": 0.748, "step": 8374 }, { "epoch": 0.22474774581365392, "grad_norm": 0.240234375, "learning_rate": 0.0011994418491987964, "loss": 0.765, "step": 8375 }, { "epoch": 0.22477458136539288, "grad_norm": 0.2373046875, "learning_rate": 0.0011994406366753944, "loss": 0.8426, "step": 8376 }, { "epoch": 0.22480141691713182, "grad_norm": 0.216796875, "learning_rate": 0.001199439422836996, "loss": 0.7072, "step": 8377 }, { "epoch": 0.22482825246887075, "grad_norm": 0.2412109375, "learning_rate": 0.0011994382076836035, "loss": 0.801, "step": 8378 }, { "epoch": 0.22485508802060972, "grad_norm": 0.232421875, "learning_rate": 0.0011994369912152203, "loss": 0.7865, "step": 8379 }, { "epoch": 0.22488192357234865, "grad_norm": 0.2373046875, "learning_rate": 0.0011994357734318485, "loss": 0.7702, "step": 8380 }, { "epoch": 0.2249087591240876, "grad_norm": 0.25390625, "learning_rate": 0.001199434554333491, "loss": 0.9354, "step": 8381 }, { "epoch": 0.22493559467582652, "grad_norm": 0.228515625, "learning_rate": 0.0011994333339201506, "loss": 0.8079, "step": 8382 }, { "epoch": 0.22496243022756549, "grad_norm": 0.2412109375, "learning_rate": 0.0011994321121918297, "loss": 0.8168, "step": 8383 }, { "epoch": 0.22498926577930442, "grad_norm": 0.2421875, "learning_rate": 0.001199430889148531, "loss": 0.7861, "step": 8384 }, { "epoch": 0.22501610133104336, "grad_norm": 0.23828125, "learning_rate": 0.0011994296647902575, "loss": 0.7184, "step": 8385 }, { "epoch": 0.22504293688278232, "grad_norm": 0.2353515625, "learning_rate": 0.0011994284391170116, "loss": 0.7888, "step": 8386 }, { "epoch": 0.22506977243452125, "grad_norm": 0.23046875, "learning_rate": 0.001199427212128796, "loss": 0.7637, "step": 8387 }, { "epoch": 0.2250966079862602, "grad_norm": 0.2275390625, "learning_rate": 0.0011994259838256135, "loss": 0.7245, "step": 8388 }, { "epoch": 0.22512344353799915, "grad_norm": 0.25, "learning_rate": 0.0011994247542074668, "loss": 0.8992, "step": 8389 }, { "epoch": 0.2251502790897381, "grad_norm": 0.251953125, "learning_rate": 0.0011994235232743583, "loss": 0.8106, "step": 8390 }, { "epoch": 0.22517711464147702, "grad_norm": 0.2353515625, "learning_rate": 0.001199422291026291, "loss": 0.7809, "step": 8391 }, { "epoch": 0.22520395019321598, "grad_norm": 0.2431640625, "learning_rate": 0.0011994210574632679, "loss": 0.7786, "step": 8392 }, { "epoch": 0.22523078574495492, "grad_norm": 0.2138671875, "learning_rate": 0.001199419822585291, "loss": 0.6422, "step": 8393 }, { "epoch": 0.22525762129669386, "grad_norm": 0.2451171875, "learning_rate": 0.0011994185863923635, "loss": 0.7968, "step": 8394 }, { "epoch": 0.2252844568484328, "grad_norm": 0.23828125, "learning_rate": 0.0011994173488844879, "loss": 0.7821, "step": 8395 }, { "epoch": 0.22531129240017175, "grad_norm": 0.25, "learning_rate": 0.001199416110061667, "loss": 0.8616, "step": 8396 }, { "epoch": 0.2253381279519107, "grad_norm": 0.240234375, "learning_rate": 0.0011994148699239037, "loss": 0.8584, "step": 8397 }, { "epoch": 0.22536496350364962, "grad_norm": 0.244140625, "learning_rate": 0.0011994136284712003, "loss": 0.8395, "step": 8398 }, { "epoch": 0.2253917990553886, "grad_norm": 0.2138671875, "learning_rate": 0.0011994123857035598, "loss": 0.6808, "step": 8399 }, { "epoch": 0.22541863460712752, "grad_norm": 0.205078125, "learning_rate": 0.001199411141620985, "loss": 0.627, "step": 8400 }, { "epoch": 0.22544547015886646, "grad_norm": 0.232421875, "learning_rate": 0.0011994098962234783, "loss": 0.8112, "step": 8401 }, { "epoch": 0.22547230571060542, "grad_norm": 0.23046875, "learning_rate": 0.0011994086495110427, "loss": 0.8013, "step": 8402 }, { "epoch": 0.22549914126234435, "grad_norm": 0.2490234375, "learning_rate": 0.001199407401483681, "loss": 0.85, "step": 8403 }, { "epoch": 0.2255259768140833, "grad_norm": 0.236328125, "learning_rate": 0.0011994061521413957, "loss": 0.795, "step": 8404 }, { "epoch": 0.22555281236582225, "grad_norm": 0.23046875, "learning_rate": 0.0011994049014841896, "loss": 0.7734, "step": 8405 }, { "epoch": 0.2255796479175612, "grad_norm": 0.2353515625, "learning_rate": 0.0011994036495120656, "loss": 0.8514, "step": 8406 }, { "epoch": 0.22560648346930012, "grad_norm": 0.2333984375, "learning_rate": 0.0011994023962250263, "loss": 0.7737, "step": 8407 }, { "epoch": 0.22563331902103909, "grad_norm": 0.2294921875, "learning_rate": 0.0011994011416230745, "loss": 0.7507, "step": 8408 }, { "epoch": 0.22566015457277802, "grad_norm": 0.294921875, "learning_rate": 0.0011993998857062129, "loss": 0.7503, "step": 8409 }, { "epoch": 0.22568699012451696, "grad_norm": 0.2353515625, "learning_rate": 0.001199398628474444, "loss": 0.7823, "step": 8410 }, { "epoch": 0.2257138256762559, "grad_norm": 0.2236328125, "learning_rate": 0.0011993973699277714, "loss": 0.7314, "step": 8411 }, { "epoch": 0.22574066122799485, "grad_norm": 0.2255859375, "learning_rate": 0.0011993961100661972, "loss": 0.7834, "step": 8412 }, { "epoch": 0.2257674967797338, "grad_norm": 0.2236328125, "learning_rate": 0.001199394848889724, "loss": 0.7181, "step": 8413 }, { "epoch": 0.22579433233147272, "grad_norm": 0.2265625, "learning_rate": 0.0011993935863983552, "loss": 0.7, "step": 8414 }, { "epoch": 0.2258211678832117, "grad_norm": 0.2412109375, "learning_rate": 0.0011993923225920929, "loss": 0.7801, "step": 8415 }, { "epoch": 0.22584800343495062, "grad_norm": 0.21875, "learning_rate": 0.0011993910574709403, "loss": 0.6959, "step": 8416 }, { "epoch": 0.22587483898668956, "grad_norm": 0.2431640625, "learning_rate": 0.0011993897910349, "loss": 0.8026, "step": 8417 }, { "epoch": 0.22590167453842852, "grad_norm": 0.2197265625, "learning_rate": 0.0011993885232839752, "loss": 0.8454, "step": 8418 }, { "epoch": 0.22592851009016746, "grad_norm": 0.244140625, "learning_rate": 0.001199387254218168, "loss": 0.8396, "step": 8419 }, { "epoch": 0.2259553456419064, "grad_norm": 0.234375, "learning_rate": 0.0011993859838374814, "loss": 0.7317, "step": 8420 }, { "epoch": 0.22598218119364535, "grad_norm": 0.2158203125, "learning_rate": 0.0011993847121419185, "loss": 0.7195, "step": 8421 }, { "epoch": 0.2260090167453843, "grad_norm": 0.240234375, "learning_rate": 0.0011993834391314819, "loss": 0.8411, "step": 8422 }, { "epoch": 0.22603585229712322, "grad_norm": 0.2333984375, "learning_rate": 0.0011993821648061743, "loss": 0.788, "step": 8423 }, { "epoch": 0.22606268784886216, "grad_norm": 0.2421875, "learning_rate": 0.0011993808891659985, "loss": 0.8786, "step": 8424 }, { "epoch": 0.22608952340060112, "grad_norm": 0.203125, "learning_rate": 0.0011993796122109577, "loss": 0.6493, "step": 8425 }, { "epoch": 0.22611635895234006, "grad_norm": 0.2236328125, "learning_rate": 0.001199378333941054, "loss": 0.7233, "step": 8426 }, { "epoch": 0.226143194504079, "grad_norm": 0.228515625, "learning_rate": 0.0011993770543562907, "loss": 0.7871, "step": 8427 }, { "epoch": 0.22617003005581796, "grad_norm": 0.259765625, "learning_rate": 0.0011993757734566704, "loss": 0.9041, "step": 8428 }, { "epoch": 0.2261968656075569, "grad_norm": 0.255859375, "learning_rate": 0.001199374491242196, "loss": 0.7676, "step": 8429 }, { "epoch": 0.22622370115929583, "grad_norm": 0.21875, "learning_rate": 0.0011993732077128704, "loss": 0.7183, "step": 8430 }, { "epoch": 0.2262505367110348, "grad_norm": 0.22265625, "learning_rate": 0.0011993719228686963, "loss": 0.723, "step": 8431 }, { "epoch": 0.22627737226277372, "grad_norm": 0.2431640625, "learning_rate": 0.0011993706367096767, "loss": 0.87, "step": 8432 }, { "epoch": 0.22630420781451266, "grad_norm": 0.25, "learning_rate": 0.001199369349235814, "loss": 0.8271, "step": 8433 }, { "epoch": 0.22633104336625162, "grad_norm": 0.224609375, "learning_rate": 0.0011993680604471114, "loss": 0.7529, "step": 8434 }, { "epoch": 0.22635787891799056, "grad_norm": 0.23828125, "learning_rate": 0.0011993667703435716, "loss": 0.7568, "step": 8435 }, { "epoch": 0.2263847144697295, "grad_norm": 0.2421875, "learning_rate": 0.0011993654789251974, "loss": 0.8003, "step": 8436 }, { "epoch": 0.22641155002146846, "grad_norm": 0.25390625, "learning_rate": 0.0011993641861919916, "loss": 0.8877, "step": 8437 }, { "epoch": 0.2264383855732074, "grad_norm": 0.24609375, "learning_rate": 0.0011993628921439573, "loss": 0.8382, "step": 8438 }, { "epoch": 0.22646522112494633, "grad_norm": 0.21484375, "learning_rate": 0.0011993615967810971, "loss": 0.6938, "step": 8439 }, { "epoch": 0.22649205667668526, "grad_norm": 0.2353515625, "learning_rate": 0.001199360300103414, "loss": 0.8694, "step": 8440 }, { "epoch": 0.22651889222842422, "grad_norm": 0.23828125, "learning_rate": 0.0011993590021109105, "loss": 0.8252, "step": 8441 }, { "epoch": 0.22654572778016316, "grad_norm": 0.2392578125, "learning_rate": 0.0011993577028035897, "loss": 0.781, "step": 8442 }, { "epoch": 0.2265725633319021, "grad_norm": 0.244140625, "learning_rate": 0.0011993564021814545, "loss": 0.8891, "step": 8443 }, { "epoch": 0.22659939888364106, "grad_norm": 0.23046875, "learning_rate": 0.0011993551002445076, "loss": 0.7775, "step": 8444 }, { "epoch": 0.22662623443538, "grad_norm": 0.232421875, "learning_rate": 0.0011993537969927522, "loss": 0.7812, "step": 8445 }, { "epoch": 0.22665306998711893, "grad_norm": 0.2333984375, "learning_rate": 0.0011993524924261906, "loss": 0.7673, "step": 8446 }, { "epoch": 0.2266799055388579, "grad_norm": 0.2421875, "learning_rate": 0.0011993511865448262, "loss": 0.7889, "step": 8447 }, { "epoch": 0.22670674109059682, "grad_norm": 0.228515625, "learning_rate": 0.0011993498793486615, "loss": 0.7714, "step": 8448 }, { "epoch": 0.22673357664233576, "grad_norm": 0.236328125, "learning_rate": 0.0011993485708376996, "loss": 0.7718, "step": 8449 }, { "epoch": 0.22676041219407472, "grad_norm": 0.23046875, "learning_rate": 0.0011993472610119433, "loss": 0.7539, "step": 8450 }, { "epoch": 0.22678724774581366, "grad_norm": 0.2294921875, "learning_rate": 0.0011993459498713952, "loss": 0.7935, "step": 8451 }, { "epoch": 0.2268140832975526, "grad_norm": 0.24609375, "learning_rate": 0.0011993446374160585, "loss": 0.8507, "step": 8452 }, { "epoch": 0.22684091884929153, "grad_norm": 0.240234375, "learning_rate": 0.001199343323645936, "loss": 0.8464, "step": 8453 }, { "epoch": 0.2268677544010305, "grad_norm": 0.2578125, "learning_rate": 0.0011993420085610307, "loss": 0.9112, "step": 8454 }, { "epoch": 0.22689458995276943, "grad_norm": 0.23828125, "learning_rate": 0.0011993406921613455, "loss": 0.8071, "step": 8455 }, { "epoch": 0.22692142550450836, "grad_norm": 0.2373046875, "learning_rate": 0.0011993393744468828, "loss": 0.7649, "step": 8456 }, { "epoch": 0.22694826105624732, "grad_norm": 0.236328125, "learning_rate": 0.001199338055417646, "loss": 0.8289, "step": 8457 }, { "epoch": 0.22697509660798626, "grad_norm": 0.2197265625, "learning_rate": 0.0011993367350736379, "loss": 0.7682, "step": 8458 }, { "epoch": 0.2270019321597252, "grad_norm": 0.2236328125, "learning_rate": 0.0011993354134148613, "loss": 0.6944, "step": 8459 }, { "epoch": 0.22702876771146416, "grad_norm": 0.24609375, "learning_rate": 0.001199334090441319, "loss": 0.8626, "step": 8460 }, { "epoch": 0.2270556032632031, "grad_norm": 0.25390625, "learning_rate": 0.001199332766153014, "loss": 0.8175, "step": 8461 }, { "epoch": 0.22708243881494203, "grad_norm": 0.2216796875, "learning_rate": 0.0011993314405499495, "loss": 0.6826, "step": 8462 }, { "epoch": 0.227109274366681, "grad_norm": 0.2451171875, "learning_rate": 0.001199330113632128, "loss": 0.8454, "step": 8463 }, { "epoch": 0.22713610991841993, "grad_norm": 0.244140625, "learning_rate": 0.0011993287853995524, "loss": 0.6951, "step": 8464 }, { "epoch": 0.22716294547015886, "grad_norm": 0.255859375, "learning_rate": 0.001199327455852226, "loss": 0.8606, "step": 8465 }, { "epoch": 0.2271897810218978, "grad_norm": 0.2392578125, "learning_rate": 0.0011993261249901514, "loss": 0.8145, "step": 8466 }, { "epoch": 0.22721661657363676, "grad_norm": 0.26953125, "learning_rate": 0.0011993247928133316, "loss": 0.8381, "step": 8467 }, { "epoch": 0.2272434521253757, "grad_norm": 0.220703125, "learning_rate": 0.0011993234593217696, "loss": 0.7633, "step": 8468 }, { "epoch": 0.22727028767711463, "grad_norm": 0.248046875, "learning_rate": 0.001199322124515468, "loss": 0.8003, "step": 8469 }, { "epoch": 0.2272971232288536, "grad_norm": 0.220703125, "learning_rate": 0.0011993207883944303, "loss": 0.737, "step": 8470 }, { "epoch": 0.22732395878059253, "grad_norm": 0.263671875, "learning_rate": 0.001199319450958659, "loss": 0.8258, "step": 8471 }, { "epoch": 0.22735079433233146, "grad_norm": 0.2333984375, "learning_rate": 0.001199318112208157, "loss": 0.7758, "step": 8472 }, { "epoch": 0.22737762988407043, "grad_norm": 0.2314453125, "learning_rate": 0.0011993167721429277, "loss": 0.7469, "step": 8473 }, { "epoch": 0.22740446543580936, "grad_norm": 0.2412109375, "learning_rate": 0.0011993154307629735, "loss": 0.8412, "step": 8474 }, { "epoch": 0.2274313009875483, "grad_norm": 0.2255859375, "learning_rate": 0.0011993140880682976, "loss": 0.7714, "step": 8475 }, { "epoch": 0.22745813653928726, "grad_norm": 0.21875, "learning_rate": 0.001199312744058903, "loss": 0.6958, "step": 8476 }, { "epoch": 0.2274849720910262, "grad_norm": 0.236328125, "learning_rate": 0.0011993113987347924, "loss": 0.7168, "step": 8477 }, { "epoch": 0.22751180764276513, "grad_norm": 0.2216796875, "learning_rate": 0.0011993100520959691, "loss": 0.713, "step": 8478 }, { "epoch": 0.2275386431945041, "grad_norm": 0.25390625, "learning_rate": 0.0011993087041424358, "loss": 0.8696, "step": 8479 }, { "epoch": 0.22756547874624303, "grad_norm": 0.279296875, "learning_rate": 0.0011993073548741953, "loss": 0.8383, "step": 8480 }, { "epoch": 0.22759231429798196, "grad_norm": 0.255859375, "learning_rate": 0.001199306004291251, "loss": 0.933, "step": 8481 }, { "epoch": 0.2276191498497209, "grad_norm": 0.236328125, "learning_rate": 0.0011993046523936057, "loss": 0.7406, "step": 8482 }, { "epoch": 0.22764598540145986, "grad_norm": 0.271484375, "learning_rate": 0.001199303299181262, "loss": 0.9801, "step": 8483 }, { "epoch": 0.2276728209531988, "grad_norm": 0.2412109375, "learning_rate": 0.0011993019446542236, "loss": 0.7868, "step": 8484 }, { "epoch": 0.22769965650493773, "grad_norm": 0.25390625, "learning_rate": 0.0011993005888124928, "loss": 0.8766, "step": 8485 }, { "epoch": 0.2277264920566767, "grad_norm": 0.2236328125, "learning_rate": 0.001199299231656073, "loss": 0.7582, "step": 8486 }, { "epoch": 0.22775332760841563, "grad_norm": 0.2578125, "learning_rate": 0.0011992978731849666, "loss": 0.838, "step": 8487 }, { "epoch": 0.22778016316015456, "grad_norm": 0.234375, "learning_rate": 0.0011992965133991774, "loss": 0.709, "step": 8488 }, { "epoch": 0.22780699871189353, "grad_norm": 0.255859375, "learning_rate": 0.001199295152298708, "loss": 0.8362, "step": 8489 }, { "epoch": 0.22783383426363246, "grad_norm": 0.232421875, "learning_rate": 0.001199293789883561, "loss": 0.6754, "step": 8490 }, { "epoch": 0.2278606698153714, "grad_norm": 0.244140625, "learning_rate": 0.00119929242615374, "loss": 0.7631, "step": 8491 }, { "epoch": 0.22788750536711036, "grad_norm": 0.23828125, "learning_rate": 0.0011992910611092476, "loss": 0.7793, "step": 8492 }, { "epoch": 0.2279143409188493, "grad_norm": 0.2138671875, "learning_rate": 0.001199289694750087, "loss": 0.6944, "step": 8493 }, { "epoch": 0.22794117647058823, "grad_norm": 0.2275390625, "learning_rate": 0.001199288327076261, "loss": 0.764, "step": 8494 }, { "epoch": 0.22796801202232717, "grad_norm": 0.2373046875, "learning_rate": 0.0011992869580877729, "loss": 0.7058, "step": 8495 }, { "epoch": 0.22799484757406613, "grad_norm": 0.25, "learning_rate": 0.0011992855877846255, "loss": 0.8623, "step": 8496 }, { "epoch": 0.22802168312580506, "grad_norm": 0.259765625, "learning_rate": 0.0011992842161668218, "loss": 0.8523, "step": 8497 }, { "epoch": 0.228048518677544, "grad_norm": 0.2333984375, "learning_rate": 0.0011992828432343648, "loss": 0.7997, "step": 8498 }, { "epoch": 0.22807535422928296, "grad_norm": 0.2373046875, "learning_rate": 0.0011992814689872574, "loss": 0.7191, "step": 8499 }, { "epoch": 0.2281021897810219, "grad_norm": 0.240234375, "learning_rate": 0.001199280093425503, "loss": 0.7502, "step": 8500 }, { "epoch": 0.22812902533276083, "grad_norm": 0.2412109375, "learning_rate": 0.0011992787165491044, "loss": 0.8643, "step": 8501 }, { "epoch": 0.2281558608844998, "grad_norm": 0.2314453125, "learning_rate": 0.0011992773383580646, "loss": 0.788, "step": 8502 }, { "epoch": 0.22818269643623873, "grad_norm": 0.2353515625, "learning_rate": 0.0011992759588523865, "loss": 0.7565, "step": 8503 }, { "epoch": 0.22820953198797767, "grad_norm": 0.2451171875, "learning_rate": 0.0011992745780320733, "loss": 0.8012, "step": 8504 }, { "epoch": 0.22823636753971663, "grad_norm": 0.224609375, "learning_rate": 0.001199273195897128, "loss": 0.6799, "step": 8505 }, { "epoch": 0.22826320309145556, "grad_norm": 0.25, "learning_rate": 0.0011992718124475535, "loss": 0.8753, "step": 8506 }, { "epoch": 0.2282900386431945, "grad_norm": 0.25, "learning_rate": 0.0011992704276833533, "loss": 0.8465, "step": 8507 }, { "epoch": 0.22831687419493346, "grad_norm": 0.23046875, "learning_rate": 0.0011992690416045297, "loss": 0.757, "step": 8508 }, { "epoch": 0.2283437097466724, "grad_norm": 0.232421875, "learning_rate": 0.0011992676542110863, "loss": 0.8461, "step": 8509 }, { "epoch": 0.22837054529841133, "grad_norm": 0.2353515625, "learning_rate": 0.001199266265503026, "loss": 0.7856, "step": 8510 }, { "epoch": 0.22839738085015027, "grad_norm": 0.234375, "learning_rate": 0.0011992648754803519, "loss": 0.7821, "step": 8511 }, { "epoch": 0.22842421640188923, "grad_norm": 0.2451171875, "learning_rate": 0.0011992634841430667, "loss": 0.8575, "step": 8512 }, { "epoch": 0.22845105195362816, "grad_norm": 0.22265625, "learning_rate": 0.001199262091491174, "loss": 0.7333, "step": 8513 }, { "epoch": 0.2284778875053671, "grad_norm": 0.2001953125, "learning_rate": 0.0011992606975246764, "loss": 0.6244, "step": 8514 }, { "epoch": 0.22850472305710606, "grad_norm": 0.2255859375, "learning_rate": 0.0011992593022435772, "loss": 0.7692, "step": 8515 }, { "epoch": 0.228531558608845, "grad_norm": 0.2470703125, "learning_rate": 0.0011992579056478794, "loss": 0.8288, "step": 8516 }, { "epoch": 0.22855839416058393, "grad_norm": 0.220703125, "learning_rate": 0.001199256507737586, "loss": 0.7147, "step": 8517 }, { "epoch": 0.2285852297123229, "grad_norm": 0.267578125, "learning_rate": 0.0011992551085127001, "loss": 0.8003, "step": 8518 }, { "epoch": 0.22861206526406183, "grad_norm": 0.2236328125, "learning_rate": 0.0011992537079732251, "loss": 0.7405, "step": 8519 }, { "epoch": 0.22863890081580077, "grad_norm": 0.2138671875, "learning_rate": 0.0011992523061191635, "loss": 0.6859, "step": 8520 }, { "epoch": 0.22866573636753973, "grad_norm": 0.23046875, "learning_rate": 0.0011992509029505186, "loss": 0.7913, "step": 8521 }, { "epoch": 0.22869257191927866, "grad_norm": 0.234375, "learning_rate": 0.0011992494984672934, "loss": 0.7928, "step": 8522 }, { "epoch": 0.2287194074710176, "grad_norm": 0.259765625, "learning_rate": 0.0011992480926694914, "loss": 0.8443, "step": 8523 }, { "epoch": 0.22874624302275653, "grad_norm": 0.240234375, "learning_rate": 0.0011992466855571154, "loss": 0.7919, "step": 8524 }, { "epoch": 0.2287730785744955, "grad_norm": 0.2353515625, "learning_rate": 0.0011992452771301682, "loss": 0.8763, "step": 8525 }, { "epoch": 0.22879991412623443, "grad_norm": 0.251953125, "learning_rate": 0.0011992438673886535, "loss": 0.9539, "step": 8526 }, { "epoch": 0.22882674967797337, "grad_norm": 0.25390625, "learning_rate": 0.0011992424563325738, "loss": 0.9235, "step": 8527 }, { "epoch": 0.22885358522971233, "grad_norm": 0.23828125, "learning_rate": 0.0011992410439619326, "loss": 0.8223, "step": 8528 }, { "epoch": 0.22888042078145127, "grad_norm": 0.2412109375, "learning_rate": 0.0011992396302767329, "loss": 0.8039, "step": 8529 }, { "epoch": 0.2289072563331902, "grad_norm": 0.22265625, "learning_rate": 0.0011992382152769775, "loss": 0.7565, "step": 8530 }, { "epoch": 0.22893409188492916, "grad_norm": 0.2373046875, "learning_rate": 0.00119923679896267, "loss": 0.8099, "step": 8531 }, { "epoch": 0.2289609274366681, "grad_norm": 0.2490234375, "learning_rate": 0.001199235381333813, "loss": 0.8015, "step": 8532 }, { "epoch": 0.22898776298840703, "grad_norm": 0.2109375, "learning_rate": 0.00119923396239041, "loss": 0.688, "step": 8533 }, { "epoch": 0.229014598540146, "grad_norm": 0.240234375, "learning_rate": 0.0011992325421324639, "loss": 0.8404, "step": 8534 }, { "epoch": 0.22904143409188493, "grad_norm": 0.2294921875, "learning_rate": 0.001199231120559978, "loss": 0.8354, "step": 8535 }, { "epoch": 0.22906826964362387, "grad_norm": 0.2275390625, "learning_rate": 0.0011992296976729554, "loss": 0.7641, "step": 8536 }, { "epoch": 0.22909510519536283, "grad_norm": 0.2294921875, "learning_rate": 0.001199228273471399, "loss": 0.7617, "step": 8537 }, { "epoch": 0.22912194074710177, "grad_norm": 0.2216796875, "learning_rate": 0.0011992268479553122, "loss": 0.6951, "step": 8538 }, { "epoch": 0.2291487762988407, "grad_norm": 0.22265625, "learning_rate": 0.0011992254211246978, "loss": 0.7508, "step": 8539 }, { "epoch": 0.22917561185057964, "grad_norm": 0.2255859375, "learning_rate": 0.0011992239929795594, "loss": 0.7239, "step": 8540 }, { "epoch": 0.2292024474023186, "grad_norm": 0.232421875, "learning_rate": 0.0011992225635198996, "loss": 0.7673, "step": 8541 }, { "epoch": 0.22922928295405753, "grad_norm": 0.232421875, "learning_rate": 0.0011992211327457218, "loss": 0.7793, "step": 8542 }, { "epoch": 0.22925611850579647, "grad_norm": 0.2265625, "learning_rate": 0.0011992197006570292, "loss": 0.7694, "step": 8543 }, { "epoch": 0.22928295405753543, "grad_norm": 0.224609375, "learning_rate": 0.0011992182672538251, "loss": 0.7726, "step": 8544 }, { "epoch": 0.22930978960927437, "grad_norm": 0.216796875, "learning_rate": 0.001199216832536112, "loss": 0.7126, "step": 8545 }, { "epoch": 0.2293366251610133, "grad_norm": 0.2373046875, "learning_rate": 0.0011992153965038937, "loss": 0.8314, "step": 8546 }, { "epoch": 0.22936346071275227, "grad_norm": 0.2490234375, "learning_rate": 0.0011992139591571731, "loss": 0.7798, "step": 8547 }, { "epoch": 0.2293902962644912, "grad_norm": 0.2236328125, "learning_rate": 0.0011992125204959533, "loss": 0.7518, "step": 8548 }, { "epoch": 0.22941713181623014, "grad_norm": 0.251953125, "learning_rate": 0.0011992110805202376, "loss": 0.8596, "step": 8549 }, { "epoch": 0.2294439673679691, "grad_norm": 0.2294921875, "learning_rate": 0.0011992096392300288, "loss": 0.8206, "step": 8550 }, { "epoch": 0.22947080291970803, "grad_norm": 0.2255859375, "learning_rate": 0.0011992081966253308, "loss": 0.764, "step": 8551 }, { "epoch": 0.22949763847144697, "grad_norm": 0.2578125, "learning_rate": 0.0011992067527061459, "loss": 0.9007, "step": 8552 }, { "epoch": 0.2295244740231859, "grad_norm": 0.251953125, "learning_rate": 0.0011992053074724779, "loss": 0.9103, "step": 8553 }, { "epoch": 0.22955130957492487, "grad_norm": 0.25390625, "learning_rate": 0.0011992038609243297, "loss": 0.8848, "step": 8554 }, { "epoch": 0.2295781451266638, "grad_norm": 0.2333984375, "learning_rate": 0.0011992024130617045, "loss": 0.7728, "step": 8555 }, { "epoch": 0.22960498067840274, "grad_norm": 0.2431640625, "learning_rate": 0.0011992009638846055, "loss": 0.7981, "step": 8556 }, { "epoch": 0.2296318162301417, "grad_norm": 0.251953125, "learning_rate": 0.001199199513393036, "loss": 0.7592, "step": 8557 }, { "epoch": 0.22965865178188064, "grad_norm": 0.25, "learning_rate": 0.0011991980615869988, "loss": 0.7728, "step": 8558 }, { "epoch": 0.22968548733361957, "grad_norm": 0.2177734375, "learning_rate": 0.0011991966084664975, "loss": 0.7805, "step": 8559 }, { "epoch": 0.22971232288535853, "grad_norm": 0.2265625, "learning_rate": 0.001199195154031535, "loss": 0.6714, "step": 8560 }, { "epoch": 0.22973915843709747, "grad_norm": 0.2451171875, "learning_rate": 0.001199193698282115, "loss": 0.832, "step": 8561 }, { "epoch": 0.2297659939888364, "grad_norm": 0.232421875, "learning_rate": 0.0011991922412182398, "loss": 0.7643, "step": 8562 }, { "epoch": 0.22979282954057537, "grad_norm": 0.2265625, "learning_rate": 0.0011991907828399133, "loss": 0.7424, "step": 8563 }, { "epoch": 0.2298196650923143, "grad_norm": 0.2412109375, "learning_rate": 0.0011991893231471386, "loss": 0.8179, "step": 8564 }, { "epoch": 0.22984650064405324, "grad_norm": 0.21875, "learning_rate": 0.0011991878621399187, "loss": 0.6618, "step": 8565 }, { "epoch": 0.22987333619579217, "grad_norm": 0.232421875, "learning_rate": 0.0011991863998182571, "loss": 0.7335, "step": 8566 }, { "epoch": 0.22990017174753113, "grad_norm": 0.232421875, "learning_rate": 0.0011991849361821566, "loss": 0.775, "step": 8567 }, { "epoch": 0.22992700729927007, "grad_norm": 0.224609375, "learning_rate": 0.0011991834712316207, "loss": 0.6915, "step": 8568 }, { "epoch": 0.229953842851009, "grad_norm": 0.2470703125, "learning_rate": 0.0011991820049666525, "loss": 0.862, "step": 8569 }, { "epoch": 0.22998067840274797, "grad_norm": 0.232421875, "learning_rate": 0.0011991805373872553, "loss": 0.7019, "step": 8570 }, { "epoch": 0.2300075139544869, "grad_norm": 0.23828125, "learning_rate": 0.0011991790684934326, "loss": 0.695, "step": 8571 }, { "epoch": 0.23003434950622584, "grad_norm": 0.2490234375, "learning_rate": 0.0011991775982851869, "loss": 0.8955, "step": 8572 }, { "epoch": 0.2300611850579648, "grad_norm": 0.2490234375, "learning_rate": 0.0011991761267625219, "loss": 0.7886, "step": 8573 }, { "epoch": 0.23008802060970374, "grad_norm": 0.2255859375, "learning_rate": 0.0011991746539254408, "loss": 0.7103, "step": 8574 }, { "epoch": 0.23011485616144267, "grad_norm": 0.2431640625, "learning_rate": 0.0011991731797739468, "loss": 0.84, "step": 8575 }, { "epoch": 0.23014169171318163, "grad_norm": 0.2412109375, "learning_rate": 0.001199171704308043, "loss": 0.8477, "step": 8576 }, { "epoch": 0.23016852726492057, "grad_norm": 0.2373046875, "learning_rate": 0.001199170227527733, "loss": 0.7007, "step": 8577 }, { "epoch": 0.2301953628166595, "grad_norm": 0.2216796875, "learning_rate": 0.0011991687494330198, "loss": 0.7991, "step": 8578 }, { "epoch": 0.23022219836839847, "grad_norm": 0.2265625, "learning_rate": 0.0011991672700239063, "loss": 0.7839, "step": 8579 }, { "epoch": 0.2302490339201374, "grad_norm": 0.2451171875, "learning_rate": 0.0011991657893003964, "loss": 0.8597, "step": 8580 }, { "epoch": 0.23027586947187634, "grad_norm": 0.236328125, "learning_rate": 0.001199164307262493, "loss": 0.7949, "step": 8581 }, { "epoch": 0.23030270502361527, "grad_norm": 0.2314453125, "learning_rate": 0.0011991628239101993, "loss": 0.8026, "step": 8582 }, { "epoch": 0.23032954057535424, "grad_norm": 0.23828125, "learning_rate": 0.0011991613392435185, "loss": 0.8124, "step": 8583 }, { "epoch": 0.23035637612709317, "grad_norm": 0.234375, "learning_rate": 0.0011991598532624542, "loss": 0.7777, "step": 8584 }, { "epoch": 0.2303832116788321, "grad_norm": 0.25, "learning_rate": 0.0011991583659670094, "loss": 0.8754, "step": 8585 }, { "epoch": 0.23041004723057107, "grad_norm": 0.244140625, "learning_rate": 0.0011991568773571876, "loss": 0.8524, "step": 8586 }, { "epoch": 0.23043688278231, "grad_norm": 0.255859375, "learning_rate": 0.0011991553874329916, "loss": 0.8487, "step": 8587 }, { "epoch": 0.23046371833404894, "grad_norm": 0.2353515625, "learning_rate": 0.0011991538961944252, "loss": 0.7778, "step": 8588 }, { "epoch": 0.2304905538857879, "grad_norm": 0.234375, "learning_rate": 0.0011991524036414913, "loss": 0.8095, "step": 8589 }, { "epoch": 0.23051738943752684, "grad_norm": 0.2333984375, "learning_rate": 0.0011991509097741932, "loss": 0.8307, "step": 8590 }, { "epoch": 0.23054422498926577, "grad_norm": 0.2421875, "learning_rate": 0.0011991494145925344, "loss": 0.8216, "step": 8591 }, { "epoch": 0.23057106054100474, "grad_norm": 0.22265625, "learning_rate": 0.001199147918096518, "loss": 0.7443, "step": 8592 }, { "epoch": 0.23059789609274367, "grad_norm": 0.2431640625, "learning_rate": 0.0011991464202861476, "loss": 0.8658, "step": 8593 }, { "epoch": 0.2306247316444826, "grad_norm": 0.21875, "learning_rate": 0.0011991449211614259, "loss": 0.7032, "step": 8594 }, { "epoch": 0.23065156719622154, "grad_norm": 0.2294921875, "learning_rate": 0.0011991434207223566, "loss": 0.7667, "step": 8595 }, { "epoch": 0.2306784027479605, "grad_norm": 0.2431640625, "learning_rate": 0.001199141918968943, "loss": 0.8451, "step": 8596 }, { "epoch": 0.23070523829969944, "grad_norm": 0.23828125, "learning_rate": 0.001199140415901188, "loss": 0.7914, "step": 8597 }, { "epoch": 0.23073207385143837, "grad_norm": 0.2431640625, "learning_rate": 0.0011991389115190954, "loss": 0.7543, "step": 8598 }, { "epoch": 0.23075890940317734, "grad_norm": 0.232421875, "learning_rate": 0.0011991374058226685, "loss": 0.8195, "step": 8599 }, { "epoch": 0.23078574495491627, "grad_norm": 0.2412109375, "learning_rate": 0.0011991358988119101, "loss": 0.8502, "step": 8600 }, { "epoch": 0.2308125805066552, "grad_norm": 0.236328125, "learning_rate": 0.001199134390486824, "loss": 0.8461, "step": 8601 }, { "epoch": 0.23083941605839417, "grad_norm": 0.2216796875, "learning_rate": 0.0011991328808474131, "loss": 0.7741, "step": 8602 }, { "epoch": 0.2308662516101331, "grad_norm": 0.263671875, "learning_rate": 0.0011991313698936812, "loss": 0.8757, "step": 8603 }, { "epoch": 0.23089308716187204, "grad_norm": 0.2392578125, "learning_rate": 0.0011991298576256313, "loss": 0.8311, "step": 8604 }, { "epoch": 0.230919922713611, "grad_norm": 0.2353515625, "learning_rate": 0.0011991283440432668, "loss": 0.8037, "step": 8605 }, { "epoch": 0.23094675826534994, "grad_norm": 0.23828125, "learning_rate": 0.0011991268291465906, "loss": 0.7892, "step": 8606 }, { "epoch": 0.23097359381708887, "grad_norm": 0.228515625, "learning_rate": 0.0011991253129356068, "loss": 0.7479, "step": 8607 }, { "epoch": 0.23100042936882784, "grad_norm": 0.279296875, "learning_rate": 0.0011991237954103182, "loss": 0.9678, "step": 8608 }, { "epoch": 0.23102726492056677, "grad_norm": 0.228515625, "learning_rate": 0.0011991222765707283, "loss": 0.825, "step": 8609 }, { "epoch": 0.2310541004723057, "grad_norm": 0.232421875, "learning_rate": 0.0011991207564168403, "loss": 0.8058, "step": 8610 }, { "epoch": 0.23108093602404464, "grad_norm": 0.232421875, "learning_rate": 0.0011991192349486575, "loss": 0.8199, "step": 8611 }, { "epoch": 0.2311077715757836, "grad_norm": 0.2353515625, "learning_rate": 0.0011991177121661836, "loss": 0.7567, "step": 8612 }, { "epoch": 0.23113460712752254, "grad_norm": 0.212890625, "learning_rate": 0.0011991161880694217, "loss": 0.6458, "step": 8613 }, { "epoch": 0.23116144267926148, "grad_norm": 0.234375, "learning_rate": 0.0011991146626583748, "loss": 0.8211, "step": 8614 }, { "epoch": 0.23118827823100044, "grad_norm": 0.2431640625, "learning_rate": 0.001199113135933047, "loss": 0.8056, "step": 8615 }, { "epoch": 0.23121511378273937, "grad_norm": 0.22265625, "learning_rate": 0.0011991116078934412, "loss": 0.7708, "step": 8616 }, { "epoch": 0.2312419493344783, "grad_norm": 0.2470703125, "learning_rate": 0.0011991100785395607, "loss": 0.8053, "step": 8617 }, { "epoch": 0.23126878488621727, "grad_norm": 0.2421875, "learning_rate": 0.0011991085478714088, "loss": 0.8674, "step": 8618 }, { "epoch": 0.2312956204379562, "grad_norm": 0.2392578125, "learning_rate": 0.0011991070158889892, "loss": 0.7396, "step": 8619 }, { "epoch": 0.23132245598969514, "grad_norm": 0.216796875, "learning_rate": 0.001199105482592305, "loss": 0.728, "step": 8620 }, { "epoch": 0.2313492915414341, "grad_norm": 0.2470703125, "learning_rate": 0.0011991039479813596, "loss": 0.8587, "step": 8621 }, { "epoch": 0.23137612709317304, "grad_norm": 0.2373046875, "learning_rate": 0.0011991024120561566, "loss": 0.806, "step": 8622 }, { "epoch": 0.23140296264491197, "grad_norm": 0.2451171875, "learning_rate": 0.001199100874816699, "loss": 0.7671, "step": 8623 }, { "epoch": 0.2314297981966509, "grad_norm": 0.224609375, "learning_rate": 0.0011990993362629902, "loss": 0.6637, "step": 8624 }, { "epoch": 0.23145663374838987, "grad_norm": 0.24609375, "learning_rate": 0.001199097796395034, "loss": 0.8505, "step": 8625 }, { "epoch": 0.2314834693001288, "grad_norm": 0.2314453125, "learning_rate": 0.0011990962552128333, "loss": 0.8023, "step": 8626 }, { "epoch": 0.23151030485186774, "grad_norm": 0.2333984375, "learning_rate": 0.0011990947127163915, "loss": 0.7349, "step": 8627 }, { "epoch": 0.2315371404036067, "grad_norm": 0.240234375, "learning_rate": 0.0011990931689057126, "loss": 0.7942, "step": 8628 }, { "epoch": 0.23156397595534564, "grad_norm": 0.2236328125, "learning_rate": 0.0011990916237807992, "loss": 0.7457, "step": 8629 }, { "epoch": 0.23159081150708458, "grad_norm": 0.240234375, "learning_rate": 0.0011990900773416553, "loss": 0.7662, "step": 8630 }, { "epoch": 0.23161764705882354, "grad_norm": 0.2431640625, "learning_rate": 0.0011990885295882839, "loss": 0.7775, "step": 8631 }, { "epoch": 0.23164448261056247, "grad_norm": 0.228515625, "learning_rate": 0.0011990869805206883, "loss": 0.7469, "step": 8632 }, { "epoch": 0.2316713181623014, "grad_norm": 0.2353515625, "learning_rate": 0.0011990854301388723, "loss": 0.8172, "step": 8633 }, { "epoch": 0.23169815371404037, "grad_norm": 0.2236328125, "learning_rate": 0.0011990838784428392, "loss": 0.7859, "step": 8634 }, { "epoch": 0.2317249892657793, "grad_norm": 0.2255859375, "learning_rate": 0.0011990823254325923, "loss": 0.7456, "step": 8635 }, { "epoch": 0.23175182481751824, "grad_norm": 0.251953125, "learning_rate": 0.001199080771108135, "loss": 0.8408, "step": 8636 }, { "epoch": 0.2317786603692572, "grad_norm": 0.2294921875, "learning_rate": 0.0011990792154694709, "loss": 0.7396, "step": 8637 }, { "epoch": 0.23180549592099614, "grad_norm": 0.224609375, "learning_rate": 0.001199077658516603, "loss": 0.6785, "step": 8638 }, { "epoch": 0.23183233147273508, "grad_norm": 0.228515625, "learning_rate": 0.0011990761002495351, "loss": 0.7902, "step": 8639 }, { "epoch": 0.231859167024474, "grad_norm": 0.2333984375, "learning_rate": 0.0011990745406682704, "loss": 0.8622, "step": 8640 }, { "epoch": 0.23188600257621297, "grad_norm": 0.26171875, "learning_rate": 0.0011990729797728124, "loss": 0.9084, "step": 8641 }, { "epoch": 0.2319128381279519, "grad_norm": 0.26171875, "learning_rate": 0.0011990714175631647, "loss": 0.9356, "step": 8642 }, { "epoch": 0.23193967367969084, "grad_norm": 0.240234375, "learning_rate": 0.0011990698540393302, "loss": 0.8116, "step": 8643 }, { "epoch": 0.2319665092314298, "grad_norm": 0.2197265625, "learning_rate": 0.001199068289201313, "loss": 0.7373, "step": 8644 }, { "epoch": 0.23199334478316874, "grad_norm": 0.2294921875, "learning_rate": 0.0011990667230491162, "loss": 0.7885, "step": 8645 }, { "epoch": 0.23202018033490768, "grad_norm": 0.2138671875, "learning_rate": 0.0011990651555827433, "loss": 0.697, "step": 8646 }, { "epoch": 0.23204701588664664, "grad_norm": 0.23046875, "learning_rate": 0.0011990635868021975, "loss": 0.7086, "step": 8647 }, { "epoch": 0.23207385143838558, "grad_norm": 0.23828125, "learning_rate": 0.0011990620167074827, "loss": 0.7814, "step": 8648 }, { "epoch": 0.2321006869901245, "grad_norm": 0.212890625, "learning_rate": 0.0011990604452986019, "loss": 0.6989, "step": 8649 }, { "epoch": 0.23212752254186347, "grad_norm": 0.2294921875, "learning_rate": 0.0011990588725755585, "loss": 0.8234, "step": 8650 }, { "epoch": 0.2321543580936024, "grad_norm": 0.25, "learning_rate": 0.0011990572985383567, "loss": 0.9531, "step": 8651 }, { "epoch": 0.23218119364534134, "grad_norm": 0.2373046875, "learning_rate": 0.001199055723186999, "loss": 0.7774, "step": 8652 }, { "epoch": 0.23220802919708028, "grad_norm": 0.255859375, "learning_rate": 0.0011990541465214896, "loss": 0.8175, "step": 8653 }, { "epoch": 0.23223486474881924, "grad_norm": 0.2451171875, "learning_rate": 0.0011990525685418314, "loss": 0.8463, "step": 8654 }, { "epoch": 0.23226170030055818, "grad_norm": 0.2451171875, "learning_rate": 0.0011990509892480284, "loss": 0.8297, "step": 8655 }, { "epoch": 0.2322885358522971, "grad_norm": 0.291015625, "learning_rate": 0.0011990494086400834, "loss": 0.8106, "step": 8656 }, { "epoch": 0.23231537140403608, "grad_norm": 0.306640625, "learning_rate": 0.0011990478267180004, "loss": 0.8638, "step": 8657 }, { "epoch": 0.232342206955775, "grad_norm": 0.267578125, "learning_rate": 0.0011990462434817827, "loss": 0.9037, "step": 8658 }, { "epoch": 0.23236904250751395, "grad_norm": 0.296875, "learning_rate": 0.001199044658931434, "loss": 0.8969, "step": 8659 }, { "epoch": 0.2323958780592529, "grad_norm": 0.291015625, "learning_rate": 0.0011990430730669573, "loss": 0.8192, "step": 8660 }, { "epoch": 0.23242271361099184, "grad_norm": 0.279296875, "learning_rate": 0.0011990414858883563, "loss": 0.9161, "step": 8661 }, { "epoch": 0.23244954916273078, "grad_norm": 0.25390625, "learning_rate": 0.0011990398973956346, "loss": 0.8293, "step": 8662 }, { "epoch": 0.23247638471446974, "grad_norm": 0.2734375, "learning_rate": 0.0011990383075887957, "loss": 0.8637, "step": 8663 }, { "epoch": 0.23250322026620868, "grad_norm": 0.26953125, "learning_rate": 0.001199036716467843, "loss": 0.8525, "step": 8664 }, { "epoch": 0.2325300558179476, "grad_norm": 0.2734375, "learning_rate": 0.00119903512403278, "loss": 0.802, "step": 8665 }, { "epoch": 0.23255689136968655, "grad_norm": 0.263671875, "learning_rate": 0.0011990335302836102, "loss": 0.8692, "step": 8666 }, { "epoch": 0.2325837269214255, "grad_norm": 0.26171875, "learning_rate": 0.0011990319352203369, "loss": 0.8189, "step": 8667 }, { "epoch": 0.23261056247316445, "grad_norm": 0.25390625, "learning_rate": 0.0011990303388429637, "loss": 0.8663, "step": 8668 }, { "epoch": 0.23263739802490338, "grad_norm": 0.25390625, "learning_rate": 0.0011990287411514945, "loss": 0.7982, "step": 8669 }, { "epoch": 0.23266423357664234, "grad_norm": 0.2333984375, "learning_rate": 0.001199027142145932, "loss": 0.756, "step": 8670 }, { "epoch": 0.23269106912838128, "grad_norm": 0.2412109375, "learning_rate": 0.0011990255418262807, "loss": 0.8077, "step": 8671 }, { "epoch": 0.2327179046801202, "grad_norm": 0.25390625, "learning_rate": 0.0011990239401925434, "loss": 0.8449, "step": 8672 }, { "epoch": 0.23274474023185918, "grad_norm": 0.2412109375, "learning_rate": 0.0011990223372447238, "loss": 0.7879, "step": 8673 }, { "epoch": 0.2327715757835981, "grad_norm": 0.236328125, "learning_rate": 0.0011990207329828253, "loss": 0.7723, "step": 8674 }, { "epoch": 0.23279841133533705, "grad_norm": 0.2431640625, "learning_rate": 0.0011990191274068517, "loss": 0.8279, "step": 8675 }, { "epoch": 0.232825246887076, "grad_norm": 0.234375, "learning_rate": 0.0011990175205168064, "loss": 0.8426, "step": 8676 }, { "epoch": 0.23285208243881494, "grad_norm": 0.2412109375, "learning_rate": 0.0011990159123126927, "loss": 0.8113, "step": 8677 }, { "epoch": 0.23287891799055388, "grad_norm": 0.251953125, "learning_rate": 0.0011990143027945147, "loss": 0.8679, "step": 8678 }, { "epoch": 0.23290575354229284, "grad_norm": 0.240234375, "learning_rate": 0.0011990126919622752, "loss": 0.8551, "step": 8679 }, { "epoch": 0.23293258909403178, "grad_norm": 0.232421875, "learning_rate": 0.001199011079815978, "loss": 0.8203, "step": 8680 }, { "epoch": 0.2329594246457707, "grad_norm": 0.2421875, "learning_rate": 0.001199009466355627, "loss": 0.787, "step": 8681 }, { "epoch": 0.23298626019750965, "grad_norm": 0.224609375, "learning_rate": 0.0011990078515812254, "loss": 0.764, "step": 8682 }, { "epoch": 0.2330130957492486, "grad_norm": 0.2294921875, "learning_rate": 0.0011990062354927768, "loss": 0.7245, "step": 8683 }, { "epoch": 0.23303993130098755, "grad_norm": 0.23828125, "learning_rate": 0.0011990046180902847, "loss": 0.8179, "step": 8684 }, { "epoch": 0.23306676685272648, "grad_norm": 0.2333984375, "learning_rate": 0.0011990029993737528, "loss": 0.8066, "step": 8685 }, { "epoch": 0.23309360240446544, "grad_norm": 0.208984375, "learning_rate": 0.0011990013793431846, "loss": 0.7165, "step": 8686 }, { "epoch": 0.23312043795620438, "grad_norm": 0.2470703125, "learning_rate": 0.0011989997579985832, "loss": 0.8279, "step": 8687 }, { "epoch": 0.23314727350794331, "grad_norm": 0.2431640625, "learning_rate": 0.0011989981353399527, "loss": 0.9191, "step": 8688 }, { "epoch": 0.23317410905968228, "grad_norm": 0.25, "learning_rate": 0.001198996511367297, "loss": 0.8708, "step": 8689 }, { "epoch": 0.2332009446114212, "grad_norm": 0.228515625, "learning_rate": 0.0011989948860806187, "loss": 0.7657, "step": 8690 }, { "epoch": 0.23322778016316015, "grad_norm": 0.224609375, "learning_rate": 0.001198993259479922, "loss": 0.7521, "step": 8691 }, { "epoch": 0.2332546157148991, "grad_norm": 0.25390625, "learning_rate": 0.0011989916315652104, "loss": 0.9217, "step": 8692 }, { "epoch": 0.23328145126663805, "grad_norm": 0.2734375, "learning_rate": 0.0011989900023364872, "loss": 0.933, "step": 8693 }, { "epoch": 0.23330828681837698, "grad_norm": 0.2265625, "learning_rate": 0.0011989883717937562, "loss": 0.7196, "step": 8694 }, { "epoch": 0.23333512237011592, "grad_norm": 0.25390625, "learning_rate": 0.001198986739937021, "loss": 0.9307, "step": 8695 }, { "epoch": 0.23336195792185488, "grad_norm": 0.236328125, "learning_rate": 0.0011989851067662851, "loss": 0.7832, "step": 8696 }, { "epoch": 0.23338879347359381, "grad_norm": 0.2373046875, "learning_rate": 0.001198983472281552, "loss": 0.7686, "step": 8697 }, { "epoch": 0.23341562902533275, "grad_norm": 0.2412109375, "learning_rate": 0.0011989818364828255, "loss": 0.7583, "step": 8698 }, { "epoch": 0.2334424645770717, "grad_norm": 0.24609375, "learning_rate": 0.001198980199370109, "loss": 0.8548, "step": 8699 }, { "epoch": 0.23346930012881065, "grad_norm": 0.21875, "learning_rate": 0.0011989785609434062, "loss": 0.7063, "step": 8700 }, { "epoch": 0.23349613568054958, "grad_norm": 0.234375, "learning_rate": 0.001198976921202721, "loss": 0.8368, "step": 8701 }, { "epoch": 0.23352297123228855, "grad_norm": 0.216796875, "learning_rate": 0.0011989752801480561, "loss": 0.7149, "step": 8702 }, { "epoch": 0.23354980678402748, "grad_norm": 0.2265625, "learning_rate": 0.001198973637779416, "loss": 0.7191, "step": 8703 }, { "epoch": 0.23357664233576642, "grad_norm": 0.228515625, "learning_rate": 0.0011989719940968039, "loss": 0.7605, "step": 8704 }, { "epoch": 0.23360347788750538, "grad_norm": 0.2333984375, "learning_rate": 0.0011989703491002233, "loss": 0.7409, "step": 8705 }, { "epoch": 0.2336303134392443, "grad_norm": 0.23046875, "learning_rate": 0.0011989687027896781, "loss": 0.7893, "step": 8706 }, { "epoch": 0.23365714899098325, "grad_norm": 0.24609375, "learning_rate": 0.001198967055165172, "loss": 0.8604, "step": 8707 }, { "epoch": 0.2336839845427222, "grad_norm": 0.251953125, "learning_rate": 0.0011989654062267078, "loss": 0.8911, "step": 8708 }, { "epoch": 0.23371082009446115, "grad_norm": 0.2353515625, "learning_rate": 0.0011989637559742903, "loss": 0.7842, "step": 8709 }, { "epoch": 0.23373765564620008, "grad_norm": 0.2294921875, "learning_rate": 0.0011989621044079222, "loss": 0.8056, "step": 8710 }, { "epoch": 0.23376449119793902, "grad_norm": 0.240234375, "learning_rate": 0.0011989604515276076, "loss": 0.7834, "step": 8711 }, { "epoch": 0.23379132674967798, "grad_norm": 0.2275390625, "learning_rate": 0.00119895879733335, "loss": 0.7291, "step": 8712 }, { "epoch": 0.23381816230141692, "grad_norm": 0.23046875, "learning_rate": 0.001198957141825153, "loss": 0.809, "step": 8713 }, { "epoch": 0.23384499785315585, "grad_norm": 0.2177734375, "learning_rate": 0.0011989554850030202, "loss": 0.7574, "step": 8714 }, { "epoch": 0.2338718334048948, "grad_norm": 0.240234375, "learning_rate": 0.0011989538268669553, "loss": 0.8377, "step": 8715 }, { "epoch": 0.23389866895663375, "grad_norm": 0.23046875, "learning_rate": 0.0011989521674169621, "loss": 0.7357, "step": 8716 }, { "epoch": 0.23392550450837268, "grad_norm": 0.2265625, "learning_rate": 0.0011989505066530438, "loss": 0.757, "step": 8717 }, { "epoch": 0.23395234006011165, "grad_norm": 0.2236328125, "learning_rate": 0.0011989488445752043, "loss": 0.7179, "step": 8718 }, { "epoch": 0.23397917561185058, "grad_norm": 0.228515625, "learning_rate": 0.0011989471811834475, "loss": 0.7398, "step": 8719 }, { "epoch": 0.23400601116358952, "grad_norm": 0.2431640625, "learning_rate": 0.0011989455164777767, "loss": 0.8459, "step": 8720 }, { "epoch": 0.23403284671532848, "grad_norm": 0.2578125, "learning_rate": 0.0011989438504581957, "loss": 0.8775, "step": 8721 }, { "epoch": 0.23405968226706741, "grad_norm": 0.2333984375, "learning_rate": 0.0011989421831247078, "loss": 0.7408, "step": 8722 }, { "epoch": 0.23408651781880635, "grad_norm": 0.2412109375, "learning_rate": 0.0011989405144773172, "loss": 0.8137, "step": 8723 }, { "epoch": 0.23411335337054529, "grad_norm": 0.2421875, "learning_rate": 0.0011989388445160273, "loss": 0.8226, "step": 8724 }, { "epoch": 0.23414018892228425, "grad_norm": 0.2392578125, "learning_rate": 0.0011989371732408416, "loss": 0.9016, "step": 8725 }, { "epoch": 0.23416702447402318, "grad_norm": 0.2412109375, "learning_rate": 0.0011989355006517643, "loss": 0.7683, "step": 8726 }, { "epoch": 0.23419386002576212, "grad_norm": 0.23828125, "learning_rate": 0.0011989338267487984, "loss": 0.7484, "step": 8727 }, { "epoch": 0.23422069557750108, "grad_norm": 0.2392578125, "learning_rate": 0.0011989321515319482, "loss": 0.7133, "step": 8728 }, { "epoch": 0.23424753112924002, "grad_norm": 0.2373046875, "learning_rate": 0.001198930475001217, "loss": 0.8231, "step": 8729 }, { "epoch": 0.23427436668097895, "grad_norm": 0.2314453125, "learning_rate": 0.0011989287971566082, "loss": 0.8203, "step": 8730 }, { "epoch": 0.23430120223271791, "grad_norm": 0.21875, "learning_rate": 0.001198927117998126, "loss": 0.7554, "step": 8731 }, { "epoch": 0.23432803778445685, "grad_norm": 0.232421875, "learning_rate": 0.0011989254375257743, "loss": 0.7274, "step": 8732 }, { "epoch": 0.23435487333619578, "grad_norm": 0.2392578125, "learning_rate": 0.001198923755739556, "loss": 0.7915, "step": 8733 }, { "epoch": 0.23438170888793475, "grad_norm": 0.2431640625, "learning_rate": 0.0011989220726394752, "loss": 0.827, "step": 8734 }, { "epoch": 0.23440854443967368, "grad_norm": 0.2255859375, "learning_rate": 0.0011989203882255357, "loss": 0.7235, "step": 8735 }, { "epoch": 0.23443537999141262, "grad_norm": 0.21484375, "learning_rate": 0.001198918702497741, "loss": 0.6873, "step": 8736 }, { "epoch": 0.23446221554315158, "grad_norm": 0.232421875, "learning_rate": 0.001198917015456095, "loss": 0.7536, "step": 8737 }, { "epoch": 0.23448905109489052, "grad_norm": 0.2490234375, "learning_rate": 0.0011989153271006012, "loss": 0.8854, "step": 8738 }, { "epoch": 0.23451588664662945, "grad_norm": 0.232421875, "learning_rate": 0.0011989136374312632, "loss": 0.7349, "step": 8739 }, { "epoch": 0.2345427221983684, "grad_norm": 0.2490234375, "learning_rate": 0.001198911946448085, "loss": 0.7602, "step": 8740 }, { "epoch": 0.23456955775010735, "grad_norm": 0.2177734375, "learning_rate": 0.0011989102541510703, "loss": 0.6695, "step": 8741 }, { "epoch": 0.23459639330184628, "grad_norm": 0.251953125, "learning_rate": 0.0011989085605402224, "loss": 0.9141, "step": 8742 }, { "epoch": 0.23462322885358522, "grad_norm": 0.2255859375, "learning_rate": 0.0011989068656155456, "loss": 0.7481, "step": 8743 }, { "epoch": 0.23465006440532418, "grad_norm": 0.2412109375, "learning_rate": 0.0011989051693770433, "loss": 0.8516, "step": 8744 }, { "epoch": 0.23467689995706312, "grad_norm": 0.23046875, "learning_rate": 0.0011989034718247193, "loss": 0.7051, "step": 8745 }, { "epoch": 0.23470373550880205, "grad_norm": 0.2421875, "learning_rate": 0.0011989017729585772, "loss": 0.7959, "step": 8746 }, { "epoch": 0.23473057106054102, "grad_norm": 0.255859375, "learning_rate": 0.001198900072778621, "loss": 0.9242, "step": 8747 }, { "epoch": 0.23475740661227995, "grad_norm": 0.2294921875, "learning_rate": 0.0011988983712848539, "loss": 0.808, "step": 8748 }, { "epoch": 0.23478424216401889, "grad_norm": 0.21875, "learning_rate": 0.0011988966684772801, "loss": 0.79, "step": 8749 }, { "epoch": 0.23481107771575785, "grad_norm": 0.228515625, "learning_rate": 0.0011988949643559033, "loss": 0.8322, "step": 8750 }, { "epoch": 0.23483791326749678, "grad_norm": 0.2236328125, "learning_rate": 0.001198893258920727, "loss": 0.7435, "step": 8751 }, { "epoch": 0.23486474881923572, "grad_norm": 0.1904296875, "learning_rate": 0.0011988915521717553, "loss": 0.6293, "step": 8752 }, { "epoch": 0.23489158437097465, "grad_norm": 0.2158203125, "learning_rate": 0.0011988898441089915, "loss": 0.7184, "step": 8753 }, { "epoch": 0.23491841992271362, "grad_norm": 0.228515625, "learning_rate": 0.0011988881347324398, "loss": 0.7832, "step": 8754 }, { "epoch": 0.23494525547445255, "grad_norm": 0.2236328125, "learning_rate": 0.0011988864240421036, "loss": 0.7692, "step": 8755 }, { "epoch": 0.2349720910261915, "grad_norm": 0.2392578125, "learning_rate": 0.0011988847120379868, "loss": 0.8447, "step": 8756 }, { "epoch": 0.23499892657793045, "grad_norm": 0.25390625, "learning_rate": 0.001198882998720093, "loss": 0.9541, "step": 8757 }, { "epoch": 0.23502576212966939, "grad_norm": 0.2451171875, "learning_rate": 0.0011988812840884263, "loss": 0.9017, "step": 8758 }, { "epoch": 0.23505259768140832, "grad_norm": 0.2314453125, "learning_rate": 0.00119887956814299, "loss": 0.7826, "step": 8759 }, { "epoch": 0.23507943323314728, "grad_norm": 0.2353515625, "learning_rate": 0.0011988778508837884, "loss": 0.7793, "step": 8760 }, { "epoch": 0.23510626878488622, "grad_norm": 0.2412109375, "learning_rate": 0.001198876132310825, "loss": 0.7925, "step": 8761 }, { "epoch": 0.23513310433662515, "grad_norm": 0.2451171875, "learning_rate": 0.0011988744124241033, "loss": 0.8548, "step": 8762 }, { "epoch": 0.23515993988836412, "grad_norm": 0.2294921875, "learning_rate": 0.0011988726912236277, "loss": 0.7888, "step": 8763 }, { "epoch": 0.23518677544010305, "grad_norm": 0.2392578125, "learning_rate": 0.0011988709687094012, "loss": 0.8402, "step": 8764 }, { "epoch": 0.235213610991842, "grad_norm": 0.2314453125, "learning_rate": 0.0011988692448814283, "loss": 0.8313, "step": 8765 }, { "epoch": 0.23524044654358092, "grad_norm": 0.228515625, "learning_rate": 0.0011988675197397124, "loss": 0.7399, "step": 8766 }, { "epoch": 0.23526728209531989, "grad_norm": 0.2314453125, "learning_rate": 0.0011988657932842572, "loss": 0.7387, "step": 8767 }, { "epoch": 0.23529411764705882, "grad_norm": 0.244140625, "learning_rate": 0.0011988640655150667, "loss": 0.8379, "step": 8768 }, { "epoch": 0.23532095319879776, "grad_norm": 0.2197265625, "learning_rate": 0.0011988623364321447, "loss": 0.766, "step": 8769 }, { "epoch": 0.23534778875053672, "grad_norm": 0.2333984375, "learning_rate": 0.0011988606060354947, "loss": 0.828, "step": 8770 }, { "epoch": 0.23537462430227565, "grad_norm": 0.2333984375, "learning_rate": 0.001198858874325121, "loss": 0.8086, "step": 8771 }, { "epoch": 0.2354014598540146, "grad_norm": 0.2451171875, "learning_rate": 0.001198857141301027, "loss": 0.8928, "step": 8772 }, { "epoch": 0.23542829540575355, "grad_norm": 0.2431640625, "learning_rate": 0.0011988554069632168, "loss": 0.779, "step": 8773 }, { "epoch": 0.2354551309574925, "grad_norm": 0.228515625, "learning_rate": 0.0011988536713116937, "loss": 0.7698, "step": 8774 }, { "epoch": 0.23548196650923142, "grad_norm": 0.2470703125, "learning_rate": 0.001198851934346462, "loss": 0.8584, "step": 8775 }, { "epoch": 0.23550880206097038, "grad_norm": 0.2470703125, "learning_rate": 0.0011988501960675253, "loss": 0.8633, "step": 8776 }, { "epoch": 0.23553563761270932, "grad_norm": 0.265625, "learning_rate": 0.0011988484564748876, "loss": 0.9657, "step": 8777 }, { "epoch": 0.23556247316444826, "grad_norm": 0.224609375, "learning_rate": 0.0011988467155685523, "loss": 0.7806, "step": 8778 }, { "epoch": 0.23558930871618722, "grad_norm": 0.2578125, "learning_rate": 0.0011988449733485237, "loss": 0.7782, "step": 8779 }, { "epoch": 0.23561614426792615, "grad_norm": 0.224609375, "learning_rate": 0.0011988432298148054, "loss": 0.7179, "step": 8780 }, { "epoch": 0.2356429798196651, "grad_norm": 0.24609375, "learning_rate": 0.001198841484967401, "loss": 0.8315, "step": 8781 }, { "epoch": 0.23566981537140402, "grad_norm": 0.2490234375, "learning_rate": 0.001198839738806315, "loss": 0.8836, "step": 8782 }, { "epoch": 0.235696650923143, "grad_norm": 0.2392578125, "learning_rate": 0.0011988379913315504, "loss": 0.8426, "step": 8783 }, { "epoch": 0.23572348647488192, "grad_norm": 0.236328125, "learning_rate": 0.0011988362425431117, "loss": 0.8295, "step": 8784 }, { "epoch": 0.23575032202662086, "grad_norm": 0.2275390625, "learning_rate": 0.0011988344924410022, "loss": 0.7679, "step": 8785 }, { "epoch": 0.23577715757835982, "grad_norm": 0.2255859375, "learning_rate": 0.0011988327410252261, "loss": 0.7762, "step": 8786 }, { "epoch": 0.23580399313009875, "grad_norm": 0.21875, "learning_rate": 0.0011988309882957873, "loss": 0.7195, "step": 8787 }, { "epoch": 0.2358308286818377, "grad_norm": 0.22265625, "learning_rate": 0.0011988292342526893, "loss": 0.6959, "step": 8788 }, { "epoch": 0.23585766423357665, "grad_norm": 0.251953125, "learning_rate": 0.0011988274788959363, "loss": 0.8952, "step": 8789 }, { "epoch": 0.2358844997853156, "grad_norm": 0.2431640625, "learning_rate": 0.001198825722225532, "loss": 0.8602, "step": 8790 }, { "epoch": 0.23591133533705452, "grad_norm": 0.234375, "learning_rate": 0.00119882396424148, "loss": 0.7551, "step": 8791 }, { "epoch": 0.23593817088879349, "grad_norm": 0.236328125, "learning_rate": 0.0011988222049437846, "loss": 0.8891, "step": 8792 }, { "epoch": 0.23596500644053242, "grad_norm": 0.23828125, "learning_rate": 0.0011988204443324495, "loss": 0.8359, "step": 8793 }, { "epoch": 0.23599184199227136, "grad_norm": 0.240234375, "learning_rate": 0.0011988186824074783, "loss": 0.879, "step": 8794 }, { "epoch": 0.2360186775440103, "grad_norm": 0.228515625, "learning_rate": 0.0011988169191688752, "loss": 0.8184, "step": 8795 }, { "epoch": 0.23604551309574925, "grad_norm": 0.2255859375, "learning_rate": 0.001198815154616644, "loss": 0.7822, "step": 8796 }, { "epoch": 0.2360723486474882, "grad_norm": 0.23828125, "learning_rate": 0.0011988133887507882, "loss": 0.8208, "step": 8797 }, { "epoch": 0.23609918419922712, "grad_norm": 0.234375, "learning_rate": 0.0011988116215713126, "loss": 0.7951, "step": 8798 }, { "epoch": 0.2361260197509661, "grad_norm": 0.216796875, "learning_rate": 0.00119880985307822, "loss": 0.722, "step": 8799 }, { "epoch": 0.23615285530270502, "grad_norm": 0.2578125, "learning_rate": 0.001198808083271515, "loss": 0.8999, "step": 8800 }, { "epoch": 0.23617969085444396, "grad_norm": 0.2490234375, "learning_rate": 0.0011988063121512011, "loss": 0.8486, "step": 8801 }, { "epoch": 0.23620652640618292, "grad_norm": 0.2255859375, "learning_rate": 0.0011988045397172823, "loss": 0.7827, "step": 8802 }, { "epoch": 0.23623336195792186, "grad_norm": 0.2197265625, "learning_rate": 0.0011988027659697628, "loss": 0.7161, "step": 8803 }, { "epoch": 0.2362601975096608, "grad_norm": 0.2353515625, "learning_rate": 0.0011988009909086458, "loss": 0.8129, "step": 8804 }, { "epoch": 0.23628703306139975, "grad_norm": 0.23046875, "learning_rate": 0.001198799214533936, "loss": 0.8238, "step": 8805 }, { "epoch": 0.2363138686131387, "grad_norm": 0.2373046875, "learning_rate": 0.0011987974368456366, "loss": 0.7971, "step": 8806 }, { "epoch": 0.23634070416487762, "grad_norm": 0.234375, "learning_rate": 0.0011987956578437518, "loss": 0.7207, "step": 8807 }, { "epoch": 0.2363675397166166, "grad_norm": 0.2373046875, "learning_rate": 0.0011987938775282856, "loss": 0.8459, "step": 8808 }, { "epoch": 0.23639437526835552, "grad_norm": 0.2294921875, "learning_rate": 0.0011987920958992418, "loss": 0.7797, "step": 8809 }, { "epoch": 0.23642121082009446, "grad_norm": 0.2216796875, "learning_rate": 0.0011987903129566243, "loss": 0.6894, "step": 8810 }, { "epoch": 0.2364480463718334, "grad_norm": 0.2265625, "learning_rate": 0.001198788528700437, "loss": 0.8098, "step": 8811 }, { "epoch": 0.23647488192357236, "grad_norm": 0.2265625, "learning_rate": 0.0011987867431306838, "loss": 0.8225, "step": 8812 }, { "epoch": 0.2365017174753113, "grad_norm": 0.22265625, "learning_rate": 0.0011987849562473687, "loss": 0.744, "step": 8813 }, { "epoch": 0.23652855302705023, "grad_norm": 0.2275390625, "learning_rate": 0.0011987831680504955, "loss": 0.7833, "step": 8814 }, { "epoch": 0.2365553885787892, "grad_norm": 0.236328125, "learning_rate": 0.0011987813785400683, "loss": 0.8335, "step": 8815 }, { "epoch": 0.23658222413052812, "grad_norm": 0.23046875, "learning_rate": 0.0011987795877160907, "loss": 0.8034, "step": 8816 }, { "epoch": 0.23660905968226706, "grad_norm": 0.240234375, "learning_rate": 0.001198777795578567, "loss": 0.9024, "step": 8817 }, { "epoch": 0.23663589523400602, "grad_norm": 0.2421875, "learning_rate": 0.001198776002127501, "loss": 0.8373, "step": 8818 }, { "epoch": 0.23666273078574496, "grad_norm": 0.2294921875, "learning_rate": 0.0011987742073628967, "loss": 0.8128, "step": 8819 }, { "epoch": 0.2366895663374839, "grad_norm": 0.2041015625, "learning_rate": 0.0011987724112847578, "loss": 0.6231, "step": 8820 }, { "epoch": 0.23671640188922285, "grad_norm": 0.25390625, "learning_rate": 0.0011987706138930885, "loss": 0.9221, "step": 8821 }, { "epoch": 0.2367432374409618, "grad_norm": 0.2255859375, "learning_rate": 0.0011987688151878927, "loss": 0.8063, "step": 8822 }, { "epoch": 0.23677007299270073, "grad_norm": 0.2197265625, "learning_rate": 0.0011987670151691742, "loss": 0.7926, "step": 8823 }, { "epoch": 0.23679690854443966, "grad_norm": 0.2216796875, "learning_rate": 0.001198765213836937, "loss": 0.7438, "step": 8824 }, { "epoch": 0.23682374409617862, "grad_norm": 0.23828125, "learning_rate": 0.001198763411191185, "loss": 0.8343, "step": 8825 }, { "epoch": 0.23685057964791756, "grad_norm": 0.2265625, "learning_rate": 0.0011987616072319223, "loss": 0.7791, "step": 8826 }, { "epoch": 0.2368774151996565, "grad_norm": 0.2177734375, "learning_rate": 0.0011987598019591527, "loss": 0.682, "step": 8827 }, { "epoch": 0.23690425075139546, "grad_norm": 0.2412109375, "learning_rate": 0.0011987579953728804, "loss": 0.766, "step": 8828 }, { "epoch": 0.2369310863031344, "grad_norm": 0.21875, "learning_rate": 0.0011987561874731091, "loss": 0.7613, "step": 8829 }, { "epoch": 0.23695792185487333, "grad_norm": 0.2177734375, "learning_rate": 0.001198754378259843, "loss": 0.7069, "step": 8830 }, { "epoch": 0.2369847574066123, "grad_norm": 0.2060546875, "learning_rate": 0.0011987525677330858, "loss": 0.6177, "step": 8831 }, { "epoch": 0.23701159295835122, "grad_norm": 0.24609375, "learning_rate": 0.001198750755892842, "loss": 0.8139, "step": 8832 }, { "epoch": 0.23703842851009016, "grad_norm": 0.212890625, "learning_rate": 0.0011987489427391148, "loss": 0.6732, "step": 8833 }, { "epoch": 0.23706526406182912, "grad_norm": 0.2216796875, "learning_rate": 0.0011987471282719087, "loss": 0.6923, "step": 8834 }, { "epoch": 0.23709209961356806, "grad_norm": 0.21484375, "learning_rate": 0.0011987453124912275, "loss": 0.7461, "step": 8835 }, { "epoch": 0.237118935165307, "grad_norm": 0.23046875, "learning_rate": 0.001198743495397075, "loss": 0.8097, "step": 8836 }, { "epoch": 0.23714577071704596, "grad_norm": 0.2265625, "learning_rate": 0.001198741676989456, "loss": 0.7712, "step": 8837 }, { "epoch": 0.2371726062687849, "grad_norm": 0.2099609375, "learning_rate": 0.0011987398572683736, "loss": 0.7351, "step": 8838 }, { "epoch": 0.23719944182052383, "grad_norm": 0.21875, "learning_rate": 0.001198738036233832, "loss": 0.6956, "step": 8839 }, { "epoch": 0.23722627737226276, "grad_norm": 0.21875, "learning_rate": 0.0011987362138858356, "loss": 0.826, "step": 8840 }, { "epoch": 0.23725311292400172, "grad_norm": 0.2236328125, "learning_rate": 0.0011987343902243879, "loss": 0.7818, "step": 8841 }, { "epoch": 0.23727994847574066, "grad_norm": 0.2236328125, "learning_rate": 0.0011987325652494931, "loss": 0.7479, "step": 8842 }, { "epoch": 0.2373067840274796, "grad_norm": 0.23046875, "learning_rate": 0.0011987307389611551, "loss": 0.802, "step": 8843 }, { "epoch": 0.23733361957921856, "grad_norm": 0.21875, "learning_rate": 0.0011987289113593782, "loss": 0.6813, "step": 8844 }, { "epoch": 0.2373604551309575, "grad_norm": 0.2353515625, "learning_rate": 0.0011987270824441662, "loss": 0.818, "step": 8845 }, { "epoch": 0.23738729068269643, "grad_norm": 0.2216796875, "learning_rate": 0.001198725252215523, "loss": 0.7271, "step": 8846 }, { "epoch": 0.2374141262344354, "grad_norm": 0.2353515625, "learning_rate": 0.0011987234206734527, "loss": 0.8018, "step": 8847 }, { "epoch": 0.23744096178617433, "grad_norm": 0.22265625, "learning_rate": 0.0011987215878179595, "loss": 0.8506, "step": 8848 }, { "epoch": 0.23746779733791326, "grad_norm": 0.216796875, "learning_rate": 0.0011987197536490472, "loss": 0.7144, "step": 8849 }, { "epoch": 0.23749463288965222, "grad_norm": 0.220703125, "learning_rate": 0.0011987179181667201, "loss": 0.7381, "step": 8850 }, { "epoch": 0.23752146844139116, "grad_norm": 0.228515625, "learning_rate": 0.001198716081370982, "loss": 0.7056, "step": 8851 }, { "epoch": 0.2375483039931301, "grad_norm": 0.2109375, "learning_rate": 0.0011987142432618367, "loss": 0.725, "step": 8852 }, { "epoch": 0.23757513954486903, "grad_norm": 0.21875, "learning_rate": 0.0011987124038392888, "loss": 0.7333, "step": 8853 }, { "epoch": 0.237601975096608, "grad_norm": 0.2275390625, "learning_rate": 0.001198710563103342, "loss": 0.7949, "step": 8854 }, { "epoch": 0.23762881064834693, "grad_norm": 0.2236328125, "learning_rate": 0.0011987087210540003, "loss": 0.7717, "step": 8855 }, { "epoch": 0.23765564620008586, "grad_norm": 0.2138671875, "learning_rate": 0.0011987068776912677, "loss": 0.7433, "step": 8856 }, { "epoch": 0.23768248175182483, "grad_norm": 0.2041015625, "learning_rate": 0.0011987050330151483, "loss": 0.6726, "step": 8857 }, { "epoch": 0.23770931730356376, "grad_norm": 0.2353515625, "learning_rate": 0.0011987031870256465, "loss": 0.7307, "step": 8858 }, { "epoch": 0.2377361528553027, "grad_norm": 0.2255859375, "learning_rate": 0.0011987013397227657, "loss": 0.7514, "step": 8859 }, { "epoch": 0.23776298840704166, "grad_norm": 0.23046875, "learning_rate": 0.0011986994911065106, "loss": 0.7994, "step": 8860 }, { "epoch": 0.2377898239587806, "grad_norm": 0.25390625, "learning_rate": 0.0011986976411768847, "loss": 0.9296, "step": 8861 }, { "epoch": 0.23781665951051953, "grad_norm": 0.2412109375, "learning_rate": 0.0011986957899338924, "loss": 0.883, "step": 8862 }, { "epoch": 0.2378434950622585, "grad_norm": 0.2412109375, "learning_rate": 0.0011986939373775378, "loss": 0.872, "step": 8863 }, { "epoch": 0.23787033061399743, "grad_norm": 0.2392578125, "learning_rate": 0.0011986920835078247, "loss": 0.7573, "step": 8864 }, { "epoch": 0.23789716616573636, "grad_norm": 0.228515625, "learning_rate": 0.0011986902283247574, "loss": 0.7636, "step": 8865 }, { "epoch": 0.2379240017174753, "grad_norm": 0.23828125, "learning_rate": 0.0011986883718283399, "loss": 0.8212, "step": 8866 }, { "epoch": 0.23795083726921426, "grad_norm": 0.2421875, "learning_rate": 0.0011986865140185762, "loss": 0.8251, "step": 8867 }, { "epoch": 0.2379776728209532, "grad_norm": 0.224609375, "learning_rate": 0.0011986846548954705, "loss": 0.7873, "step": 8868 }, { "epoch": 0.23800450837269213, "grad_norm": 0.2470703125, "learning_rate": 0.0011986827944590265, "loss": 0.8736, "step": 8869 }, { "epoch": 0.2380313439244311, "grad_norm": 0.2138671875, "learning_rate": 0.0011986809327092487, "loss": 0.7297, "step": 8870 }, { "epoch": 0.23805817947617003, "grad_norm": 0.23046875, "learning_rate": 0.0011986790696461413, "loss": 0.8179, "step": 8871 }, { "epoch": 0.23808501502790896, "grad_norm": 0.2353515625, "learning_rate": 0.0011986772052697081, "loss": 0.7711, "step": 8872 }, { "epoch": 0.23811185057964793, "grad_norm": 0.240234375, "learning_rate": 0.001198675339579953, "loss": 0.8171, "step": 8873 }, { "epoch": 0.23813868613138686, "grad_norm": 0.220703125, "learning_rate": 0.0011986734725768808, "loss": 0.7319, "step": 8874 }, { "epoch": 0.2381655216831258, "grad_norm": 0.234375, "learning_rate": 0.0011986716042604946, "loss": 0.8814, "step": 8875 }, { "epoch": 0.23819235723486476, "grad_norm": 0.224609375, "learning_rate": 0.0011986697346307994, "loss": 0.7447, "step": 8876 }, { "epoch": 0.2382191927866037, "grad_norm": 0.2275390625, "learning_rate": 0.0011986678636877986, "loss": 0.7701, "step": 8877 }, { "epoch": 0.23824602833834263, "grad_norm": 0.22265625, "learning_rate": 0.001198665991431497, "loss": 0.7744, "step": 8878 }, { "epoch": 0.2382728638900816, "grad_norm": 0.232421875, "learning_rate": 0.0011986641178618983, "loss": 0.6867, "step": 8879 }, { "epoch": 0.23829969944182053, "grad_norm": 0.21875, "learning_rate": 0.0011986622429790064, "loss": 0.7617, "step": 8880 }, { "epoch": 0.23832653499355946, "grad_norm": 0.22265625, "learning_rate": 0.001198660366782826, "loss": 0.6826, "step": 8881 }, { "epoch": 0.2383533705452984, "grad_norm": 0.228515625, "learning_rate": 0.0011986584892733606, "loss": 0.6841, "step": 8882 }, { "epoch": 0.23838020609703736, "grad_norm": 0.255859375, "learning_rate": 0.0011986566104506147, "loss": 0.8794, "step": 8883 }, { "epoch": 0.2384070416487763, "grad_norm": 0.23828125, "learning_rate": 0.0011986547303145922, "loss": 0.806, "step": 8884 }, { "epoch": 0.23843387720051523, "grad_norm": 0.2373046875, "learning_rate": 0.0011986528488652976, "loss": 0.8549, "step": 8885 }, { "epoch": 0.2384607127522542, "grad_norm": 0.212890625, "learning_rate": 0.0011986509661027346, "loss": 0.6941, "step": 8886 }, { "epoch": 0.23848754830399313, "grad_norm": 0.2490234375, "learning_rate": 0.0011986490820269076, "loss": 0.8942, "step": 8887 }, { "epoch": 0.23851438385573207, "grad_norm": 0.2470703125, "learning_rate": 0.0011986471966378206, "loss": 0.8432, "step": 8888 }, { "epoch": 0.23854121940747103, "grad_norm": 0.2373046875, "learning_rate": 0.0011986453099354778, "loss": 0.804, "step": 8889 }, { "epoch": 0.23856805495920996, "grad_norm": 0.234375, "learning_rate": 0.0011986434219198833, "loss": 0.7678, "step": 8890 }, { "epoch": 0.2385948905109489, "grad_norm": 0.2275390625, "learning_rate": 0.001198641532591041, "loss": 0.8602, "step": 8891 }, { "epoch": 0.23862172606268786, "grad_norm": 0.2265625, "learning_rate": 0.0011986396419489558, "loss": 0.7996, "step": 8892 }, { "epoch": 0.2386485616144268, "grad_norm": 0.244140625, "learning_rate": 0.001198637749993631, "loss": 0.8716, "step": 8893 }, { "epoch": 0.23867539716616573, "grad_norm": 0.2470703125, "learning_rate": 0.001198635856725071, "loss": 0.8331, "step": 8894 }, { "epoch": 0.23870223271790467, "grad_norm": 0.2412109375, "learning_rate": 0.0011986339621432804, "loss": 0.7939, "step": 8895 }, { "epoch": 0.23872906826964363, "grad_norm": 0.2431640625, "learning_rate": 0.0011986320662482628, "loss": 0.905, "step": 8896 }, { "epoch": 0.23875590382138256, "grad_norm": 0.22265625, "learning_rate": 0.0011986301690400227, "loss": 0.7551, "step": 8897 }, { "epoch": 0.2387827393731215, "grad_norm": 0.2275390625, "learning_rate": 0.0011986282705185641, "loss": 0.7733, "step": 8898 }, { "epoch": 0.23880957492486046, "grad_norm": 0.208984375, "learning_rate": 0.001198626370683891, "loss": 0.6653, "step": 8899 }, { "epoch": 0.2388364104765994, "grad_norm": 0.224609375, "learning_rate": 0.0011986244695360079, "loss": 0.7157, "step": 8900 }, { "epoch": 0.23886324602833833, "grad_norm": 0.24609375, "learning_rate": 0.0011986225670749188, "loss": 0.8035, "step": 8901 }, { "epoch": 0.2388900815800773, "grad_norm": 0.2412109375, "learning_rate": 0.001198620663300628, "loss": 0.8688, "step": 8902 }, { "epoch": 0.23891691713181623, "grad_norm": 0.2412109375, "learning_rate": 0.0011986187582131394, "loss": 0.8389, "step": 8903 }, { "epoch": 0.23894375268355517, "grad_norm": 0.2412109375, "learning_rate": 0.0011986168518124576, "loss": 0.7778, "step": 8904 }, { "epoch": 0.23897058823529413, "grad_norm": 0.2275390625, "learning_rate": 0.0011986149440985864, "loss": 0.7545, "step": 8905 }, { "epoch": 0.23899742378703306, "grad_norm": 0.2412109375, "learning_rate": 0.0011986130350715302, "loss": 0.8529, "step": 8906 }, { "epoch": 0.239024259338772, "grad_norm": 0.212890625, "learning_rate": 0.001198611124731293, "loss": 0.6598, "step": 8907 }, { "epoch": 0.23905109489051096, "grad_norm": 0.23046875, "learning_rate": 0.0011986092130778791, "loss": 0.7269, "step": 8908 }, { "epoch": 0.2390779304422499, "grad_norm": 0.2353515625, "learning_rate": 0.0011986073001112928, "loss": 0.772, "step": 8909 }, { "epoch": 0.23910476599398883, "grad_norm": 0.248046875, "learning_rate": 0.001198605385831538, "loss": 0.8466, "step": 8910 }, { "epoch": 0.23913160154572777, "grad_norm": 0.236328125, "learning_rate": 0.0011986034702386194, "loss": 0.821, "step": 8911 }, { "epoch": 0.23915843709746673, "grad_norm": 0.2216796875, "learning_rate": 0.0011986015533325407, "loss": 0.7952, "step": 8912 }, { "epoch": 0.23918527264920567, "grad_norm": 0.2294921875, "learning_rate": 0.0011985996351133064, "loss": 0.7969, "step": 8913 }, { "epoch": 0.2392121082009446, "grad_norm": 0.2197265625, "learning_rate": 0.0011985977155809205, "loss": 0.7162, "step": 8914 }, { "epoch": 0.23923894375268356, "grad_norm": 0.234375, "learning_rate": 0.0011985957947353875, "loss": 0.8122, "step": 8915 }, { "epoch": 0.2392657793044225, "grad_norm": 0.21875, "learning_rate": 0.0011985938725767114, "loss": 0.7618, "step": 8916 }, { "epoch": 0.23929261485616143, "grad_norm": 0.2333984375, "learning_rate": 0.0011985919491048964, "loss": 0.8516, "step": 8917 }, { "epoch": 0.2393194504079004, "grad_norm": 0.21875, "learning_rate": 0.0011985900243199465, "loss": 0.7712, "step": 8918 }, { "epoch": 0.23934628595963933, "grad_norm": 0.2255859375, "learning_rate": 0.0011985880982218666, "loss": 0.7791, "step": 8919 }, { "epoch": 0.23937312151137827, "grad_norm": 0.234375, "learning_rate": 0.0011985861708106603, "loss": 0.8003, "step": 8920 }, { "epoch": 0.23939995706311723, "grad_norm": 0.22265625, "learning_rate": 0.0011985842420863323, "loss": 0.7526, "step": 8921 }, { "epoch": 0.23942679261485617, "grad_norm": 0.2216796875, "learning_rate": 0.0011985823120488862, "loss": 0.6979, "step": 8922 }, { "epoch": 0.2394536281665951, "grad_norm": 0.2373046875, "learning_rate": 0.001198580380698327, "loss": 0.8029, "step": 8923 }, { "epoch": 0.23948046371833404, "grad_norm": 0.2265625, "learning_rate": 0.0011985784480346583, "loss": 0.7961, "step": 8924 }, { "epoch": 0.239507299270073, "grad_norm": 0.216796875, "learning_rate": 0.0011985765140578846, "loss": 0.7262, "step": 8925 }, { "epoch": 0.23953413482181193, "grad_norm": 0.232421875, "learning_rate": 0.0011985745787680103, "loss": 0.8064, "step": 8926 }, { "epoch": 0.23956097037355087, "grad_norm": 0.259765625, "learning_rate": 0.0011985726421650392, "loss": 0.8989, "step": 8927 }, { "epoch": 0.23958780592528983, "grad_norm": 0.2216796875, "learning_rate": 0.0011985707042489761, "loss": 0.8165, "step": 8928 }, { "epoch": 0.23961464147702877, "grad_norm": 0.2373046875, "learning_rate": 0.0011985687650198247, "loss": 0.7796, "step": 8929 }, { "epoch": 0.2396414770287677, "grad_norm": 0.2333984375, "learning_rate": 0.0011985668244775898, "loss": 0.7438, "step": 8930 }, { "epoch": 0.23966831258050666, "grad_norm": 0.2333984375, "learning_rate": 0.0011985648826222752, "loss": 0.8418, "step": 8931 }, { "epoch": 0.2396951481322456, "grad_norm": 0.25390625, "learning_rate": 0.0011985629394538854, "loss": 0.9812, "step": 8932 }, { "epoch": 0.23972198368398454, "grad_norm": 0.2265625, "learning_rate": 0.0011985609949724248, "loss": 0.7399, "step": 8933 }, { "epoch": 0.2397488192357235, "grad_norm": 0.216796875, "learning_rate": 0.001198559049177897, "loss": 0.7452, "step": 8934 }, { "epoch": 0.23977565478746243, "grad_norm": 0.20703125, "learning_rate": 0.0011985571020703073, "loss": 0.6639, "step": 8935 }, { "epoch": 0.23980249033920137, "grad_norm": 0.2294921875, "learning_rate": 0.0011985551536496592, "loss": 0.7499, "step": 8936 }, { "epoch": 0.23982932589094033, "grad_norm": 0.2353515625, "learning_rate": 0.001198553203915957, "loss": 0.7931, "step": 8937 }, { "epoch": 0.23985616144267927, "grad_norm": 0.2314453125, "learning_rate": 0.0011985512528692053, "loss": 0.7935, "step": 8938 }, { "epoch": 0.2398829969944182, "grad_norm": 0.2099609375, "learning_rate": 0.0011985493005094083, "loss": 0.6635, "step": 8939 }, { "epoch": 0.23990983254615714, "grad_norm": 0.2236328125, "learning_rate": 0.0011985473468365702, "loss": 0.7363, "step": 8940 }, { "epoch": 0.2399366680978961, "grad_norm": 0.22265625, "learning_rate": 0.0011985453918506954, "loss": 0.7396, "step": 8941 }, { "epoch": 0.23996350364963503, "grad_norm": 0.248046875, "learning_rate": 0.001198543435551788, "loss": 0.8919, "step": 8942 }, { "epoch": 0.23999033920137397, "grad_norm": 0.2431640625, "learning_rate": 0.0011985414779398526, "loss": 0.7763, "step": 8943 }, { "epoch": 0.24001717475311293, "grad_norm": 0.2255859375, "learning_rate": 0.001198539519014893, "loss": 0.6829, "step": 8944 }, { "epoch": 0.24004401030485187, "grad_norm": 0.2236328125, "learning_rate": 0.0011985375587769138, "loss": 0.7306, "step": 8945 }, { "epoch": 0.2400708458565908, "grad_norm": 0.2255859375, "learning_rate": 0.0011985355972259195, "loss": 0.7409, "step": 8946 }, { "epoch": 0.24009768140832977, "grad_norm": 0.2490234375, "learning_rate": 0.0011985336343619142, "loss": 0.8618, "step": 8947 }, { "epoch": 0.2401245169600687, "grad_norm": 0.2314453125, "learning_rate": 0.001198531670184902, "loss": 0.7859, "step": 8948 }, { "epoch": 0.24015135251180764, "grad_norm": 0.23828125, "learning_rate": 0.0011985297046948876, "loss": 0.7609, "step": 8949 }, { "epoch": 0.2401781880635466, "grad_norm": 0.234375, "learning_rate": 0.001198527737891875, "loss": 0.7374, "step": 8950 }, { "epoch": 0.24020502361528553, "grad_norm": 0.2353515625, "learning_rate": 0.0011985257697758687, "loss": 0.7692, "step": 8951 }, { "epoch": 0.24023185916702447, "grad_norm": 0.244140625, "learning_rate": 0.0011985238003468727, "loss": 0.8047, "step": 8952 }, { "epoch": 0.2402586947187634, "grad_norm": 0.2294921875, "learning_rate": 0.0011985218296048919, "loss": 0.7913, "step": 8953 }, { "epoch": 0.24028553027050237, "grad_norm": 0.2294921875, "learning_rate": 0.00119851985754993, "loss": 0.7341, "step": 8954 }, { "epoch": 0.2403123658222413, "grad_norm": 0.25390625, "learning_rate": 0.0011985178841819918, "loss": 0.8071, "step": 8955 }, { "epoch": 0.24033920137398024, "grad_norm": 0.2265625, "learning_rate": 0.0011985159095010815, "loss": 0.7971, "step": 8956 }, { "epoch": 0.2403660369257192, "grad_norm": 0.224609375, "learning_rate": 0.0011985139335072032, "loss": 0.7436, "step": 8957 }, { "epoch": 0.24039287247745814, "grad_norm": 0.2294921875, "learning_rate": 0.0011985119562003615, "loss": 0.8325, "step": 8958 }, { "epoch": 0.24041970802919707, "grad_norm": 0.2490234375, "learning_rate": 0.0011985099775805607, "loss": 0.7713, "step": 8959 }, { "epoch": 0.24044654358093603, "grad_norm": 0.2353515625, "learning_rate": 0.0011985079976478052, "loss": 0.8311, "step": 8960 }, { "epoch": 0.24047337913267497, "grad_norm": 0.2275390625, "learning_rate": 0.001198506016402099, "loss": 0.7699, "step": 8961 }, { "epoch": 0.2405002146844139, "grad_norm": 0.23828125, "learning_rate": 0.001198504033843447, "loss": 0.7759, "step": 8962 }, { "epoch": 0.24052705023615287, "grad_norm": 0.2314453125, "learning_rate": 0.0011985020499718528, "loss": 0.772, "step": 8963 }, { "epoch": 0.2405538857878918, "grad_norm": 0.216796875, "learning_rate": 0.0011985000647873215, "loss": 0.6728, "step": 8964 }, { "epoch": 0.24058072133963074, "grad_norm": 0.234375, "learning_rate": 0.001198498078289857, "loss": 0.8322, "step": 8965 }, { "epoch": 0.24060755689136967, "grad_norm": 0.2412109375, "learning_rate": 0.0011984960904794638, "loss": 0.8544, "step": 8966 }, { "epoch": 0.24063439244310864, "grad_norm": 0.2265625, "learning_rate": 0.0011984941013561464, "loss": 0.7468, "step": 8967 }, { "epoch": 0.24066122799484757, "grad_norm": 0.2216796875, "learning_rate": 0.001198492110919909, "loss": 0.7366, "step": 8968 }, { "epoch": 0.2406880635465865, "grad_norm": 0.228515625, "learning_rate": 0.0011984901191707557, "loss": 0.7869, "step": 8969 }, { "epoch": 0.24071489909832547, "grad_norm": 0.23046875, "learning_rate": 0.0011984881261086914, "loss": 0.797, "step": 8970 }, { "epoch": 0.2407417346500644, "grad_norm": 0.23046875, "learning_rate": 0.0011984861317337202, "loss": 0.7655, "step": 8971 }, { "epoch": 0.24076857020180334, "grad_norm": 0.2236328125, "learning_rate": 0.0011984841360458463, "loss": 0.7906, "step": 8972 }, { "epoch": 0.2407954057535423, "grad_norm": 0.2216796875, "learning_rate": 0.0011984821390450748, "loss": 0.729, "step": 8973 }, { "epoch": 0.24082224130528124, "grad_norm": 0.2314453125, "learning_rate": 0.001198480140731409, "loss": 0.7751, "step": 8974 }, { "epoch": 0.24084907685702017, "grad_norm": 0.216796875, "learning_rate": 0.0011984781411048541, "loss": 0.7016, "step": 8975 }, { "epoch": 0.24087591240875914, "grad_norm": 0.2294921875, "learning_rate": 0.0011984761401654142, "loss": 0.8337, "step": 8976 }, { "epoch": 0.24090274796049807, "grad_norm": 0.2255859375, "learning_rate": 0.0011984741379130939, "loss": 0.7418, "step": 8977 }, { "epoch": 0.240929583512237, "grad_norm": 0.21875, "learning_rate": 0.0011984721343478973, "loss": 0.7282, "step": 8978 }, { "epoch": 0.24095641906397597, "grad_norm": 0.21875, "learning_rate": 0.0011984701294698288, "loss": 0.7307, "step": 8979 }, { "epoch": 0.2409832546157149, "grad_norm": 0.2431640625, "learning_rate": 0.001198468123278893, "loss": 0.8182, "step": 8980 }, { "epoch": 0.24101009016745384, "grad_norm": 0.220703125, "learning_rate": 0.0011984661157750942, "loss": 0.8228, "step": 8981 }, { "epoch": 0.24103692571919277, "grad_norm": 0.234375, "learning_rate": 0.0011984641069584367, "loss": 0.8123, "step": 8982 }, { "epoch": 0.24106376127093174, "grad_norm": 0.232421875, "learning_rate": 0.001198462096828925, "loss": 0.8797, "step": 8983 }, { "epoch": 0.24109059682267067, "grad_norm": 0.234375, "learning_rate": 0.0011984600853865639, "loss": 0.8215, "step": 8984 }, { "epoch": 0.2411174323744096, "grad_norm": 0.2373046875, "learning_rate": 0.0011984580726313572, "loss": 0.8582, "step": 8985 }, { "epoch": 0.24114426792614857, "grad_norm": 0.26171875, "learning_rate": 0.0011984560585633095, "loss": 0.9012, "step": 8986 }, { "epoch": 0.2411711034778875, "grad_norm": 0.2392578125, "learning_rate": 0.0011984540431824254, "loss": 0.8119, "step": 8987 }, { "epoch": 0.24119793902962644, "grad_norm": 0.23046875, "learning_rate": 0.001198452026488709, "loss": 0.8207, "step": 8988 }, { "epoch": 0.2412247745813654, "grad_norm": 0.2431640625, "learning_rate": 0.001198450008482165, "loss": 0.8345, "step": 8989 }, { "epoch": 0.24125161013310434, "grad_norm": 0.23828125, "learning_rate": 0.0011984479891627977, "loss": 0.8165, "step": 8990 }, { "epoch": 0.24127844568484327, "grad_norm": 0.2255859375, "learning_rate": 0.0011984459685306119, "loss": 0.824, "step": 8991 }, { "epoch": 0.24130528123658224, "grad_norm": 0.23828125, "learning_rate": 0.0011984439465856115, "loss": 0.843, "step": 8992 }, { "epoch": 0.24133211678832117, "grad_norm": 0.2421875, "learning_rate": 0.0011984419233278011, "loss": 0.8101, "step": 8993 }, { "epoch": 0.2413589523400601, "grad_norm": 0.21875, "learning_rate": 0.0011984398987571851, "loss": 0.8032, "step": 8994 }, { "epoch": 0.24138578789179904, "grad_norm": 0.2275390625, "learning_rate": 0.0011984378728737682, "loss": 0.7747, "step": 8995 }, { "epoch": 0.241412623443538, "grad_norm": 0.232421875, "learning_rate": 0.0011984358456775545, "loss": 0.8096, "step": 8996 }, { "epoch": 0.24143945899527694, "grad_norm": 0.24609375, "learning_rate": 0.0011984338171685488, "loss": 0.8337, "step": 8997 }, { "epoch": 0.24146629454701588, "grad_norm": 0.23046875, "learning_rate": 0.0011984317873467553, "loss": 0.7525, "step": 8998 }, { "epoch": 0.24149313009875484, "grad_norm": 0.2412109375, "learning_rate": 0.0011984297562121784, "loss": 0.8888, "step": 8999 }, { "epoch": 0.24151996565049377, "grad_norm": 0.224609375, "learning_rate": 0.0011984277237648228, "loss": 0.7912, "step": 9000 }, { "epoch": 0.2415468012022327, "grad_norm": 0.2265625, "learning_rate": 0.0011984256900046928, "loss": 0.7581, "step": 9001 }, { "epoch": 0.24157363675397167, "grad_norm": 0.251953125, "learning_rate": 0.001198423654931793, "loss": 0.8603, "step": 9002 }, { "epoch": 0.2416004723057106, "grad_norm": 0.216796875, "learning_rate": 0.0011984216185461278, "loss": 0.7226, "step": 9003 }, { "epoch": 0.24162730785744954, "grad_norm": 0.228515625, "learning_rate": 0.0011984195808477014, "loss": 0.8094, "step": 9004 }, { "epoch": 0.2416541434091885, "grad_norm": 0.2431640625, "learning_rate": 0.0011984175418365186, "loss": 0.7751, "step": 9005 }, { "epoch": 0.24168097896092744, "grad_norm": 0.2431640625, "learning_rate": 0.0011984155015125838, "loss": 0.8324, "step": 9006 }, { "epoch": 0.24170781451266637, "grad_norm": 0.25390625, "learning_rate": 0.0011984134598759014, "loss": 0.9061, "step": 9007 }, { "epoch": 0.24173465006440534, "grad_norm": 0.220703125, "learning_rate": 0.001198411416926476, "loss": 0.7565, "step": 9008 }, { "epoch": 0.24176148561614427, "grad_norm": 0.23046875, "learning_rate": 0.0011984093726643118, "loss": 0.8387, "step": 9009 }, { "epoch": 0.2417883211678832, "grad_norm": 0.21875, "learning_rate": 0.0011984073270894136, "loss": 0.7816, "step": 9010 }, { "epoch": 0.24181515671962214, "grad_norm": 0.2275390625, "learning_rate": 0.001198405280201786, "loss": 0.7672, "step": 9011 }, { "epoch": 0.2418419922713611, "grad_norm": 0.23046875, "learning_rate": 0.001198403232001433, "loss": 0.8168, "step": 9012 }, { "epoch": 0.24186882782310004, "grad_norm": 0.224609375, "learning_rate": 0.0011984011824883595, "loss": 0.6865, "step": 9013 }, { "epoch": 0.24189566337483898, "grad_norm": 0.2431640625, "learning_rate": 0.00119839913166257, "loss": 0.8465, "step": 9014 }, { "epoch": 0.24192249892657794, "grad_norm": 0.2392578125, "learning_rate": 0.0011983970795240685, "loss": 0.7992, "step": 9015 }, { "epoch": 0.24194933447831687, "grad_norm": 0.22265625, "learning_rate": 0.00119839502607286, "loss": 0.7081, "step": 9016 }, { "epoch": 0.2419761700300558, "grad_norm": 0.2236328125, "learning_rate": 0.0011983929713089488, "loss": 0.7505, "step": 9017 }, { "epoch": 0.24200300558179477, "grad_norm": 0.216796875, "learning_rate": 0.0011983909152323395, "loss": 0.7613, "step": 9018 }, { "epoch": 0.2420298411335337, "grad_norm": 0.255859375, "learning_rate": 0.0011983888578430367, "loss": 0.7804, "step": 9019 }, { "epoch": 0.24205667668527264, "grad_norm": 0.2373046875, "learning_rate": 0.0011983867991410449, "loss": 0.8014, "step": 9020 }, { "epoch": 0.2420835122370116, "grad_norm": 0.2177734375, "learning_rate": 0.0011983847391263682, "loss": 0.6808, "step": 9021 }, { "epoch": 0.24211034778875054, "grad_norm": 0.240234375, "learning_rate": 0.0011983826777990116, "loss": 0.8232, "step": 9022 }, { "epoch": 0.24213718334048948, "grad_norm": 0.232421875, "learning_rate": 0.0011983806151589795, "loss": 0.8092, "step": 9023 }, { "epoch": 0.2421640188922284, "grad_norm": 0.3203125, "learning_rate": 0.0011983785512062763, "loss": 0.6747, "step": 9024 }, { "epoch": 0.24219085444396737, "grad_norm": 0.228515625, "learning_rate": 0.0011983764859409066, "loss": 0.7989, "step": 9025 }, { "epoch": 0.2422176899957063, "grad_norm": 0.236328125, "learning_rate": 0.0011983744193628748, "loss": 0.7456, "step": 9026 }, { "epoch": 0.24224452554744524, "grad_norm": 0.2314453125, "learning_rate": 0.0011983723514721856, "loss": 0.7549, "step": 9027 }, { "epoch": 0.2422713610991842, "grad_norm": 0.2490234375, "learning_rate": 0.0011983702822688437, "loss": 0.8539, "step": 9028 }, { "epoch": 0.24229819665092314, "grad_norm": 0.236328125, "learning_rate": 0.0011983682117528533, "loss": 0.811, "step": 9029 }, { "epoch": 0.24232503220266208, "grad_norm": 0.208984375, "learning_rate": 0.0011983661399242193, "loss": 0.61, "step": 9030 }, { "epoch": 0.24235186775440104, "grad_norm": 0.2216796875, "learning_rate": 0.0011983640667829459, "loss": 0.7108, "step": 9031 }, { "epoch": 0.24237870330613998, "grad_norm": 0.2333984375, "learning_rate": 0.0011983619923290377, "loss": 0.7872, "step": 9032 }, { "epoch": 0.2424055388578789, "grad_norm": 0.2216796875, "learning_rate": 0.0011983599165624995, "loss": 0.7048, "step": 9033 }, { "epoch": 0.24243237440961787, "grad_norm": 0.216796875, "learning_rate": 0.0011983578394833354, "loss": 0.736, "step": 9034 }, { "epoch": 0.2424592099613568, "grad_norm": 0.24609375, "learning_rate": 0.0011983557610915506, "loss": 0.8021, "step": 9035 }, { "epoch": 0.24248604551309574, "grad_norm": 0.228515625, "learning_rate": 0.001198353681387149, "loss": 0.7914, "step": 9036 }, { "epoch": 0.2425128810648347, "grad_norm": 0.2314453125, "learning_rate": 0.0011983516003701354, "loss": 0.7936, "step": 9037 }, { "epoch": 0.24253971661657364, "grad_norm": 0.244140625, "learning_rate": 0.0011983495180405146, "loss": 0.8513, "step": 9038 }, { "epoch": 0.24256655216831258, "grad_norm": 0.22265625, "learning_rate": 0.001198347434398291, "loss": 0.7487, "step": 9039 }, { "epoch": 0.2425933877200515, "grad_norm": 0.2275390625, "learning_rate": 0.0011983453494434691, "loss": 0.7554, "step": 9040 }, { "epoch": 0.24262022327179047, "grad_norm": 0.220703125, "learning_rate": 0.0011983432631760538, "loss": 0.6631, "step": 9041 }, { "epoch": 0.2426470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.001198341175596049, "loss": 0.7347, "step": 9042 }, { "epoch": 0.24267389437526835, "grad_norm": 0.22265625, "learning_rate": 0.00119833908670346, "loss": 0.7171, "step": 9043 }, { "epoch": 0.2427007299270073, "grad_norm": 0.236328125, "learning_rate": 0.001198336996498291, "loss": 0.7898, "step": 9044 }, { "epoch": 0.24272756547874624, "grad_norm": 0.2333984375, "learning_rate": 0.0011983349049805466, "loss": 0.7535, "step": 9045 }, { "epoch": 0.24275440103048518, "grad_norm": 0.216796875, "learning_rate": 0.0011983328121502315, "loss": 0.7189, "step": 9046 }, { "epoch": 0.24278123658222414, "grad_norm": 0.2470703125, "learning_rate": 0.0011983307180073502, "loss": 0.8385, "step": 9047 }, { "epoch": 0.24280807213396308, "grad_norm": 0.228515625, "learning_rate": 0.0011983286225519074, "loss": 0.769, "step": 9048 }, { "epoch": 0.242834907685702, "grad_norm": 0.2216796875, "learning_rate": 0.0011983265257839077, "loss": 0.7711, "step": 9049 }, { "epoch": 0.24286174323744097, "grad_norm": 0.25, "learning_rate": 0.0011983244277033555, "loss": 0.9298, "step": 9050 }, { "epoch": 0.2428885787891799, "grad_norm": 0.22265625, "learning_rate": 0.0011983223283102556, "loss": 0.6916, "step": 9051 }, { "epoch": 0.24291541434091884, "grad_norm": 0.2294921875, "learning_rate": 0.0011983202276046124, "loss": 0.7714, "step": 9052 }, { "epoch": 0.24294224989265778, "grad_norm": 0.2451171875, "learning_rate": 0.0011983181255864309, "loss": 0.8517, "step": 9053 }, { "epoch": 0.24296908544439674, "grad_norm": 0.2099609375, "learning_rate": 0.0011983160222557151, "loss": 0.7246, "step": 9054 }, { "epoch": 0.24299592099613568, "grad_norm": 0.251953125, "learning_rate": 0.0011983139176124705, "loss": 0.8397, "step": 9055 }, { "epoch": 0.2430227565478746, "grad_norm": 0.205078125, "learning_rate": 0.0011983118116567007, "loss": 0.6611, "step": 9056 }, { "epoch": 0.24304959209961358, "grad_norm": 0.2275390625, "learning_rate": 0.001198309704388411, "loss": 0.785, "step": 9057 }, { "epoch": 0.2430764276513525, "grad_norm": 0.228515625, "learning_rate": 0.0011983075958076057, "loss": 0.7114, "step": 9058 }, { "epoch": 0.24310326320309145, "grad_norm": 0.2294921875, "learning_rate": 0.00119830548591429, "loss": 0.7679, "step": 9059 }, { "epoch": 0.2431300987548304, "grad_norm": 0.23828125, "learning_rate": 0.0011983033747084676, "loss": 0.7625, "step": 9060 }, { "epoch": 0.24315693430656934, "grad_norm": 0.2353515625, "learning_rate": 0.0011983012621901439, "loss": 0.8081, "step": 9061 }, { "epoch": 0.24318376985830828, "grad_norm": 0.228515625, "learning_rate": 0.001198299148359323, "loss": 0.7773, "step": 9062 }, { "epoch": 0.24321060541004724, "grad_norm": 0.220703125, "learning_rate": 0.00119829703321601, "loss": 0.7543, "step": 9063 }, { "epoch": 0.24323744096178618, "grad_norm": 0.2236328125, "learning_rate": 0.0011982949167602093, "loss": 0.7222, "step": 9064 }, { "epoch": 0.2432642765135251, "grad_norm": 0.2294921875, "learning_rate": 0.0011982927989919257, "loss": 0.8265, "step": 9065 }, { "epoch": 0.24329111206526405, "grad_norm": 0.22265625, "learning_rate": 0.0011982906799111635, "loss": 0.7734, "step": 9066 }, { "epoch": 0.243317947617003, "grad_norm": 0.2431640625, "learning_rate": 0.0011982885595179277, "loss": 0.8001, "step": 9067 }, { "epoch": 0.24334478316874195, "grad_norm": 0.2158203125, "learning_rate": 0.0011982864378122227, "loss": 0.6834, "step": 9068 }, { "epoch": 0.24337161872048088, "grad_norm": 0.2275390625, "learning_rate": 0.0011982843147940534, "loss": 0.8037, "step": 9069 }, { "epoch": 0.24339845427221984, "grad_norm": 0.2216796875, "learning_rate": 0.001198282190463424, "loss": 0.6943, "step": 9070 }, { "epoch": 0.24342528982395878, "grad_norm": 0.2265625, "learning_rate": 0.0011982800648203398, "loss": 0.7687, "step": 9071 }, { "epoch": 0.24345212537569771, "grad_norm": 0.2119140625, "learning_rate": 0.0011982779378648051, "loss": 0.6237, "step": 9072 }, { "epoch": 0.24347896092743668, "grad_norm": 0.244140625, "learning_rate": 0.0011982758095968246, "loss": 0.917, "step": 9073 }, { "epoch": 0.2435057964791756, "grad_norm": 0.2421875, "learning_rate": 0.0011982736800164031, "loss": 0.831, "step": 9074 }, { "epoch": 0.24353263203091455, "grad_norm": 0.2275390625, "learning_rate": 0.0011982715491235451, "loss": 0.748, "step": 9075 }, { "epoch": 0.2435594675826535, "grad_norm": 0.22265625, "learning_rate": 0.0011982694169182552, "loss": 0.728, "step": 9076 }, { "epoch": 0.24358630313439245, "grad_norm": 0.232421875, "learning_rate": 0.0011982672834005383, "loss": 0.7339, "step": 9077 }, { "epoch": 0.24361313868613138, "grad_norm": 0.2255859375, "learning_rate": 0.0011982651485703993, "loss": 0.7379, "step": 9078 }, { "epoch": 0.24363997423787034, "grad_norm": 0.234375, "learning_rate": 0.0011982630124278422, "loss": 0.7341, "step": 9079 }, { "epoch": 0.24366680978960928, "grad_norm": 0.228515625, "learning_rate": 0.0011982608749728722, "loss": 0.684, "step": 9080 }, { "epoch": 0.24369364534134821, "grad_norm": 0.2451171875, "learning_rate": 0.0011982587362054937, "loss": 0.8493, "step": 9081 }, { "epoch": 0.24372048089308715, "grad_norm": 0.2412109375, "learning_rate": 0.0011982565961257117, "loss": 0.8551, "step": 9082 }, { "epoch": 0.2437473164448261, "grad_norm": 0.2373046875, "learning_rate": 0.0011982544547335307, "loss": 0.788, "step": 9083 }, { "epoch": 0.24377415199656505, "grad_norm": 0.2265625, "learning_rate": 0.0011982523120289553, "loss": 0.746, "step": 9084 }, { "epoch": 0.24380098754830398, "grad_norm": 0.2392578125, "learning_rate": 0.0011982501680119907, "loss": 0.8455, "step": 9085 }, { "epoch": 0.24382782310004295, "grad_norm": 0.224609375, "learning_rate": 0.0011982480226826408, "loss": 0.7369, "step": 9086 }, { "epoch": 0.24385465865178188, "grad_norm": 0.216796875, "learning_rate": 0.001198245876040911, "loss": 0.7367, "step": 9087 }, { "epoch": 0.24388149420352082, "grad_norm": 0.2353515625, "learning_rate": 0.0011982437280868058, "loss": 0.7896, "step": 9088 }, { "epoch": 0.24390832975525978, "grad_norm": 0.2490234375, "learning_rate": 0.0011982415788203296, "loss": 0.8593, "step": 9089 }, { "epoch": 0.2439351653069987, "grad_norm": 0.2265625, "learning_rate": 0.0011982394282414877, "loss": 0.76, "step": 9090 }, { "epoch": 0.24396200085873765, "grad_norm": 0.2275390625, "learning_rate": 0.0011982372763502842, "loss": 0.6436, "step": 9091 }, { "epoch": 0.2439888364104766, "grad_norm": 0.2197265625, "learning_rate": 0.0011982351231467243, "loss": 0.7293, "step": 9092 }, { "epoch": 0.24401567196221555, "grad_norm": 0.2470703125, "learning_rate": 0.0011982329686308125, "loss": 0.8784, "step": 9093 }, { "epoch": 0.24404250751395448, "grad_norm": 0.220703125, "learning_rate": 0.0011982308128025536, "loss": 0.7606, "step": 9094 }, { "epoch": 0.24406934306569342, "grad_norm": 0.232421875, "learning_rate": 0.0011982286556619522, "loss": 0.7722, "step": 9095 }, { "epoch": 0.24409617861743238, "grad_norm": 0.22265625, "learning_rate": 0.0011982264972090133, "loss": 0.777, "step": 9096 }, { "epoch": 0.24412301416917132, "grad_norm": 0.2099609375, "learning_rate": 0.0011982243374437415, "loss": 0.6884, "step": 9097 }, { "epoch": 0.24414984972091025, "grad_norm": 0.2333984375, "learning_rate": 0.0011982221763661412, "loss": 0.8214, "step": 9098 }, { "epoch": 0.2441766852726492, "grad_norm": 0.2490234375, "learning_rate": 0.0011982200139762178, "loss": 0.8267, "step": 9099 }, { "epoch": 0.24420352082438815, "grad_norm": 0.25, "learning_rate": 0.0011982178502739753, "loss": 0.8856, "step": 9100 }, { "epoch": 0.24423035637612708, "grad_norm": 0.236328125, "learning_rate": 0.0011982156852594192, "loss": 0.8059, "step": 9101 }, { "epoch": 0.24425719192786605, "grad_norm": 0.232421875, "learning_rate": 0.0011982135189325537, "loss": 0.7722, "step": 9102 }, { "epoch": 0.24428402747960498, "grad_norm": 0.240234375, "learning_rate": 0.0011982113512933837, "loss": 0.8711, "step": 9103 }, { "epoch": 0.24431086303134392, "grad_norm": 0.2138671875, "learning_rate": 0.0011982091823419141, "loss": 0.7086, "step": 9104 }, { "epoch": 0.24433769858308288, "grad_norm": 0.212890625, "learning_rate": 0.0011982070120781494, "loss": 0.7278, "step": 9105 }, { "epoch": 0.24436453413482181, "grad_norm": 0.2197265625, "learning_rate": 0.0011982048405020948, "loss": 0.6492, "step": 9106 }, { "epoch": 0.24439136968656075, "grad_norm": 0.234375, "learning_rate": 0.0011982026676137544, "loss": 0.8188, "step": 9107 }, { "epoch": 0.2444182052382997, "grad_norm": 0.21484375, "learning_rate": 0.0011982004934131337, "loss": 0.7244, "step": 9108 }, { "epoch": 0.24444504079003865, "grad_norm": 0.2353515625, "learning_rate": 0.0011981983179002369, "loss": 0.8019, "step": 9109 }, { "epoch": 0.24447187634177758, "grad_norm": 0.2197265625, "learning_rate": 0.001198196141075069, "loss": 0.7677, "step": 9110 }, { "epoch": 0.24449871189351652, "grad_norm": 0.212890625, "learning_rate": 0.001198193962937635, "loss": 0.7433, "step": 9111 }, { "epoch": 0.24452554744525548, "grad_norm": 0.2353515625, "learning_rate": 0.001198191783487939, "loss": 0.8348, "step": 9112 }, { "epoch": 0.24455238299699442, "grad_norm": 0.2255859375, "learning_rate": 0.0011981896027259864, "loss": 0.7542, "step": 9113 }, { "epoch": 0.24457921854873335, "grad_norm": 0.2197265625, "learning_rate": 0.001198187420651782, "loss": 0.7676, "step": 9114 }, { "epoch": 0.24460605410047231, "grad_norm": 0.2177734375, "learning_rate": 0.0011981852372653303, "loss": 0.7327, "step": 9115 }, { "epoch": 0.24463288965221125, "grad_norm": 0.2294921875, "learning_rate": 0.001198183052566636, "loss": 0.7771, "step": 9116 }, { "epoch": 0.24465972520395018, "grad_norm": 0.2333984375, "learning_rate": 0.0011981808665557042, "loss": 0.7679, "step": 9117 }, { "epoch": 0.24468656075568915, "grad_norm": 0.22265625, "learning_rate": 0.0011981786792325394, "loss": 0.6948, "step": 9118 }, { "epoch": 0.24471339630742808, "grad_norm": 0.2236328125, "learning_rate": 0.001198176490597147, "loss": 0.6975, "step": 9119 }, { "epoch": 0.24474023185916702, "grad_norm": 0.2451171875, "learning_rate": 0.0011981743006495309, "loss": 0.8153, "step": 9120 }, { "epoch": 0.24476706741090598, "grad_norm": 0.2177734375, "learning_rate": 0.0011981721093896965, "loss": 0.6881, "step": 9121 }, { "epoch": 0.24479390296264492, "grad_norm": 0.2265625, "learning_rate": 0.0011981699168176485, "loss": 0.7308, "step": 9122 }, { "epoch": 0.24482073851438385, "grad_norm": 0.228515625, "learning_rate": 0.0011981677229333917, "loss": 0.7918, "step": 9123 }, { "epoch": 0.2448475740661228, "grad_norm": 0.2294921875, "learning_rate": 0.001198165527736931, "loss": 0.7147, "step": 9124 }, { "epoch": 0.24487440961786175, "grad_norm": 0.22265625, "learning_rate": 0.0011981633312282709, "loss": 0.7597, "step": 9125 }, { "epoch": 0.24490124516960068, "grad_norm": 0.21875, "learning_rate": 0.0011981611334074167, "loss": 0.6837, "step": 9126 }, { "epoch": 0.24492808072133962, "grad_norm": 0.216796875, "learning_rate": 0.0011981589342743727, "loss": 0.6939, "step": 9127 }, { "epoch": 0.24495491627307858, "grad_norm": 0.232421875, "learning_rate": 0.0011981567338291443, "loss": 0.8084, "step": 9128 }, { "epoch": 0.24498175182481752, "grad_norm": 0.2138671875, "learning_rate": 0.0011981545320717357, "loss": 0.7267, "step": 9129 }, { "epoch": 0.24500858737655645, "grad_norm": 0.2373046875, "learning_rate": 0.0011981523290021522, "loss": 0.7513, "step": 9130 }, { "epoch": 0.24503542292829542, "grad_norm": 0.2236328125, "learning_rate": 0.0011981501246203982, "loss": 0.7231, "step": 9131 }, { "epoch": 0.24506225848003435, "grad_norm": 0.234375, "learning_rate": 0.001198147918926479, "loss": 0.7968, "step": 9132 }, { "epoch": 0.24508909403177329, "grad_norm": 0.216796875, "learning_rate": 0.0011981457119203992, "loss": 0.7615, "step": 9133 }, { "epoch": 0.24511592958351225, "grad_norm": 0.2255859375, "learning_rate": 0.001198143503602164, "loss": 0.7597, "step": 9134 }, { "epoch": 0.24514276513525118, "grad_norm": 0.2255859375, "learning_rate": 0.0011981412939717775, "loss": 0.7907, "step": 9135 }, { "epoch": 0.24516960068699012, "grad_norm": 0.2138671875, "learning_rate": 0.0011981390830292451, "loss": 0.7451, "step": 9136 }, { "epoch": 0.24519643623872908, "grad_norm": 0.2314453125, "learning_rate": 0.0011981368707745716, "loss": 0.8454, "step": 9137 }, { "epoch": 0.24522327179046802, "grad_norm": 0.2275390625, "learning_rate": 0.001198134657207762, "loss": 0.7942, "step": 9138 }, { "epoch": 0.24525010734220695, "grad_norm": 0.224609375, "learning_rate": 0.0011981324423288205, "loss": 0.7147, "step": 9139 }, { "epoch": 0.2452769428939459, "grad_norm": 0.2412109375, "learning_rate": 0.0011981302261377528, "loss": 0.8737, "step": 9140 }, { "epoch": 0.24530377844568485, "grad_norm": 0.2412109375, "learning_rate": 0.001198128008634563, "loss": 0.8641, "step": 9141 }, { "epoch": 0.24533061399742379, "grad_norm": 0.2314453125, "learning_rate": 0.0011981257898192564, "loss": 0.838, "step": 9142 }, { "epoch": 0.24535744954916272, "grad_norm": 0.2578125, "learning_rate": 0.001198123569691838, "loss": 0.919, "step": 9143 }, { "epoch": 0.24538428510090168, "grad_norm": 0.20703125, "learning_rate": 0.001198121348252312, "loss": 0.757, "step": 9144 }, { "epoch": 0.24541112065264062, "grad_norm": 0.236328125, "learning_rate": 0.0011981191255006842, "loss": 0.8772, "step": 9145 }, { "epoch": 0.24543795620437955, "grad_norm": 0.224609375, "learning_rate": 0.0011981169014369589, "loss": 0.8163, "step": 9146 }, { "epoch": 0.24546479175611852, "grad_norm": 0.2275390625, "learning_rate": 0.001198114676061141, "loss": 0.7339, "step": 9147 }, { "epoch": 0.24549162730785745, "grad_norm": 0.2333984375, "learning_rate": 0.0011981124493732355, "loss": 0.7347, "step": 9148 }, { "epoch": 0.2455184628595964, "grad_norm": 0.2451171875, "learning_rate": 0.0011981102213732473, "loss": 0.8555, "step": 9149 }, { "epoch": 0.24554529841133535, "grad_norm": 0.2255859375, "learning_rate": 0.0011981079920611812, "loss": 0.7666, "step": 9150 }, { "epoch": 0.24557213396307428, "grad_norm": 0.216796875, "learning_rate": 0.001198105761437042, "loss": 0.7271, "step": 9151 }, { "epoch": 0.24559896951481322, "grad_norm": 0.201171875, "learning_rate": 0.001198103529500835, "loss": 0.6391, "step": 9152 }, { "epoch": 0.24562580506655216, "grad_norm": 0.234375, "learning_rate": 0.0011981012962525646, "loss": 0.8489, "step": 9153 }, { "epoch": 0.24565264061829112, "grad_norm": 0.2275390625, "learning_rate": 0.001198099061692236, "loss": 0.7872, "step": 9154 }, { "epoch": 0.24567947617003005, "grad_norm": 0.2392578125, "learning_rate": 0.001198096825819854, "loss": 0.8186, "step": 9155 }, { "epoch": 0.245706311721769, "grad_norm": 0.2431640625, "learning_rate": 0.0011980945886354236, "loss": 0.7881, "step": 9156 }, { "epoch": 0.24573314727350795, "grad_norm": 0.2275390625, "learning_rate": 0.0011980923501389495, "loss": 0.7853, "step": 9157 }, { "epoch": 0.2457599828252469, "grad_norm": 0.2353515625, "learning_rate": 0.001198090110330437, "loss": 0.787, "step": 9158 }, { "epoch": 0.24578681837698582, "grad_norm": 0.2314453125, "learning_rate": 0.0011980878692098905, "loss": 0.7432, "step": 9159 }, { "epoch": 0.24581365392872478, "grad_norm": 0.232421875, "learning_rate": 0.0011980856267773152, "loss": 0.785, "step": 9160 }, { "epoch": 0.24584048948046372, "grad_norm": 0.234375, "learning_rate": 0.001198083383032716, "loss": 0.8183, "step": 9161 }, { "epoch": 0.24586732503220265, "grad_norm": 0.2314453125, "learning_rate": 0.001198081137976098, "loss": 0.7913, "step": 9162 }, { "epoch": 0.24589416058394162, "grad_norm": 0.2392578125, "learning_rate": 0.0011980788916074658, "loss": 0.8026, "step": 9163 }, { "epoch": 0.24592099613568055, "grad_norm": 0.2255859375, "learning_rate": 0.0011980766439268246, "loss": 0.7509, "step": 9164 }, { "epoch": 0.2459478316874195, "grad_norm": 0.23828125, "learning_rate": 0.0011980743949341788, "loss": 0.7713, "step": 9165 }, { "epoch": 0.24597466723915842, "grad_norm": 0.2333984375, "learning_rate": 0.001198072144629534, "loss": 0.8057, "step": 9166 }, { "epoch": 0.2460015027908974, "grad_norm": 0.2294921875, "learning_rate": 0.001198069893012895, "loss": 0.7878, "step": 9167 }, { "epoch": 0.24602833834263632, "grad_norm": 0.2412109375, "learning_rate": 0.0011980676400842665, "loss": 0.7482, "step": 9168 }, { "epoch": 0.24605517389437526, "grad_norm": 0.2333984375, "learning_rate": 0.0011980653858436534, "loss": 0.8116, "step": 9169 }, { "epoch": 0.24608200944611422, "grad_norm": 0.248046875, "learning_rate": 0.001198063130291061, "loss": 0.8472, "step": 9170 }, { "epoch": 0.24610884499785315, "grad_norm": 0.248046875, "learning_rate": 0.001198060873426494, "loss": 0.8155, "step": 9171 }, { "epoch": 0.2461356805495921, "grad_norm": 0.2294921875, "learning_rate": 0.0011980586152499574, "loss": 0.7587, "step": 9172 }, { "epoch": 0.24616251610133105, "grad_norm": 0.2333984375, "learning_rate": 0.0011980563557614562, "loss": 0.7465, "step": 9173 }, { "epoch": 0.24618935165307, "grad_norm": 0.2314453125, "learning_rate": 0.0011980540949609953, "loss": 0.8299, "step": 9174 }, { "epoch": 0.24621618720480892, "grad_norm": 0.2392578125, "learning_rate": 0.0011980518328485795, "loss": 0.7996, "step": 9175 }, { "epoch": 0.24624302275654789, "grad_norm": 0.216796875, "learning_rate": 0.001198049569424214, "loss": 0.6862, "step": 9176 }, { "epoch": 0.24626985830828682, "grad_norm": 0.21875, "learning_rate": 0.0011980473046879036, "loss": 0.6788, "step": 9177 }, { "epoch": 0.24629669386002576, "grad_norm": 0.234375, "learning_rate": 0.0011980450386396536, "loss": 0.7697, "step": 9178 }, { "epoch": 0.24632352941176472, "grad_norm": 0.23828125, "learning_rate": 0.0011980427712794688, "loss": 0.8479, "step": 9179 }, { "epoch": 0.24635036496350365, "grad_norm": 0.2412109375, "learning_rate": 0.0011980405026073538, "loss": 0.8173, "step": 9180 }, { "epoch": 0.2463772005152426, "grad_norm": 0.2412109375, "learning_rate": 0.001198038232623314, "loss": 0.843, "step": 9181 }, { "epoch": 0.24640403606698152, "grad_norm": 0.232421875, "learning_rate": 0.0011980359613273542, "loss": 0.8006, "step": 9182 }, { "epoch": 0.2464308716187205, "grad_norm": 0.2041015625, "learning_rate": 0.0011980336887194795, "loss": 0.6919, "step": 9183 }, { "epoch": 0.24645770717045942, "grad_norm": 0.2333984375, "learning_rate": 0.0011980314147996948, "loss": 0.8545, "step": 9184 }, { "epoch": 0.24648454272219836, "grad_norm": 0.2275390625, "learning_rate": 0.0011980291395680054, "loss": 0.7545, "step": 9185 }, { "epoch": 0.24651137827393732, "grad_norm": 0.2080078125, "learning_rate": 0.0011980268630244156, "loss": 0.754, "step": 9186 }, { "epoch": 0.24653821382567626, "grad_norm": 0.2109375, "learning_rate": 0.0011980245851689311, "loss": 0.7343, "step": 9187 }, { "epoch": 0.2465650493774152, "grad_norm": 0.24609375, "learning_rate": 0.0011980223060015563, "loss": 0.8888, "step": 9188 }, { "epoch": 0.24659188492915415, "grad_norm": 0.2255859375, "learning_rate": 0.001198020025522297, "loss": 0.8244, "step": 9189 }, { "epoch": 0.2466187204808931, "grad_norm": 0.310546875, "learning_rate": 0.0011980177437311574, "loss": 0.8847, "step": 9190 }, { "epoch": 0.24664555603263202, "grad_norm": 0.322265625, "learning_rate": 0.0011980154606281426, "loss": 0.8693, "step": 9191 }, { "epoch": 0.246672391584371, "grad_norm": 0.26953125, "learning_rate": 0.0011980131762132581, "loss": 0.7788, "step": 9192 }, { "epoch": 0.24669922713610992, "grad_norm": 0.326171875, "learning_rate": 0.0011980108904865085, "loss": 0.8343, "step": 9193 }, { "epoch": 0.24672606268784886, "grad_norm": 0.369140625, "learning_rate": 0.001198008603447899, "loss": 0.841, "step": 9194 }, { "epoch": 0.2467528982395878, "grad_norm": 0.267578125, "learning_rate": 0.0011980063150974344, "loss": 0.8189, "step": 9195 }, { "epoch": 0.24677973379132676, "grad_norm": 0.23828125, "learning_rate": 0.00119800402543512, "loss": 0.6754, "step": 9196 }, { "epoch": 0.2468065693430657, "grad_norm": 0.27734375, "learning_rate": 0.0011980017344609608, "loss": 0.8805, "step": 9197 }, { "epoch": 0.24683340489480463, "grad_norm": 0.265625, "learning_rate": 0.0011979994421749617, "loss": 0.7954, "step": 9198 }, { "epoch": 0.2468602404465436, "grad_norm": 0.267578125, "learning_rate": 0.0011979971485771277, "loss": 0.7662, "step": 9199 }, { "epoch": 0.24688707599828252, "grad_norm": 0.232421875, "learning_rate": 0.001197994853667464, "loss": 0.7365, "step": 9200 }, { "epoch": 0.24691391155002146, "grad_norm": 0.2265625, "learning_rate": 0.0011979925574459754, "loss": 0.7642, "step": 9201 }, { "epoch": 0.24694074710176042, "grad_norm": 0.251953125, "learning_rate": 0.0011979902599126672, "loss": 0.8529, "step": 9202 }, { "epoch": 0.24696758265349936, "grad_norm": 0.263671875, "learning_rate": 0.001197987961067544, "loss": 0.838, "step": 9203 }, { "epoch": 0.2469944182052383, "grad_norm": 0.2578125, "learning_rate": 0.0011979856609106115, "loss": 0.7537, "step": 9204 }, { "epoch": 0.24702125375697725, "grad_norm": 0.2392578125, "learning_rate": 0.0011979833594418742, "loss": 0.8055, "step": 9205 }, { "epoch": 0.2470480893087162, "grad_norm": 0.2255859375, "learning_rate": 0.0011979810566613374, "loss": 0.7082, "step": 9206 }, { "epoch": 0.24707492486045513, "grad_norm": 0.244140625, "learning_rate": 0.001197978752569006, "loss": 0.8567, "step": 9207 }, { "epoch": 0.2471017604121941, "grad_norm": 0.2431640625, "learning_rate": 0.0011979764471648853, "loss": 0.773, "step": 9208 }, { "epoch": 0.24712859596393302, "grad_norm": 0.2080078125, "learning_rate": 0.0011979741404489802, "loss": 0.6978, "step": 9209 }, { "epoch": 0.24715543151567196, "grad_norm": 0.2412109375, "learning_rate": 0.0011979718324212955, "loss": 0.8531, "step": 9210 }, { "epoch": 0.2471822670674109, "grad_norm": 0.2353515625, "learning_rate": 0.0011979695230818366, "loss": 0.8295, "step": 9211 }, { "epoch": 0.24720910261914986, "grad_norm": 0.236328125, "learning_rate": 0.0011979672124306088, "loss": 0.8413, "step": 9212 }, { "epoch": 0.2472359381708888, "grad_norm": 0.2470703125, "learning_rate": 0.0011979649004676166, "loss": 0.8093, "step": 9213 }, { "epoch": 0.24726277372262773, "grad_norm": 0.2373046875, "learning_rate": 0.0011979625871928654, "loss": 0.7625, "step": 9214 }, { "epoch": 0.2472896092743667, "grad_norm": 0.251953125, "learning_rate": 0.00119796027260636, "loss": 0.81, "step": 9215 }, { "epoch": 0.24731644482610562, "grad_norm": 0.2275390625, "learning_rate": 0.0011979579567081061, "loss": 0.7664, "step": 9216 }, { "epoch": 0.24734328037784456, "grad_norm": 0.2255859375, "learning_rate": 0.001197955639498108, "loss": 0.7117, "step": 9217 }, { "epoch": 0.24737011592958352, "grad_norm": 0.2373046875, "learning_rate": 0.0011979533209763712, "loss": 0.8976, "step": 9218 }, { "epoch": 0.24739695148132246, "grad_norm": 0.24609375, "learning_rate": 0.001197951001142901, "loss": 0.8396, "step": 9219 }, { "epoch": 0.2474237870330614, "grad_norm": 0.271484375, "learning_rate": 0.001197948679997702, "loss": 0.9169, "step": 9220 }, { "epoch": 0.24745062258480036, "grad_norm": 0.2138671875, "learning_rate": 0.0011979463575407796, "loss": 0.7543, "step": 9221 }, { "epoch": 0.2474774581365393, "grad_norm": 0.2392578125, "learning_rate": 0.0011979440337721388, "loss": 0.8779, "step": 9222 }, { "epoch": 0.24750429368827823, "grad_norm": 0.2314453125, "learning_rate": 0.0011979417086917846, "loss": 0.7951, "step": 9223 }, { "epoch": 0.24753112924001716, "grad_norm": 0.23046875, "learning_rate": 0.0011979393822997223, "loss": 0.7839, "step": 9224 }, { "epoch": 0.24755796479175612, "grad_norm": 0.2255859375, "learning_rate": 0.001197937054595957, "loss": 0.7375, "step": 9225 }, { "epoch": 0.24758480034349506, "grad_norm": 0.251953125, "learning_rate": 0.0011979347255804936, "loss": 0.8455, "step": 9226 }, { "epoch": 0.247611635895234, "grad_norm": 0.236328125, "learning_rate": 0.0011979323952533373, "loss": 0.8687, "step": 9227 }, { "epoch": 0.24763847144697296, "grad_norm": 0.2158203125, "learning_rate": 0.0011979300636144933, "loss": 0.723, "step": 9228 }, { "epoch": 0.2476653069987119, "grad_norm": 0.22265625, "learning_rate": 0.0011979277306639665, "loss": 0.7493, "step": 9229 }, { "epoch": 0.24769214255045083, "grad_norm": 0.21484375, "learning_rate": 0.0011979253964017624, "loss": 0.723, "step": 9230 }, { "epoch": 0.2477189781021898, "grad_norm": 0.2353515625, "learning_rate": 0.0011979230608278857, "loss": 0.8249, "step": 9231 }, { "epoch": 0.24774581365392873, "grad_norm": 0.216796875, "learning_rate": 0.001197920723942342, "loss": 0.7679, "step": 9232 }, { "epoch": 0.24777264920566766, "grad_norm": 0.2294921875, "learning_rate": 0.0011979183857451358, "loss": 0.7966, "step": 9233 }, { "epoch": 0.24779948475740662, "grad_norm": 0.224609375, "learning_rate": 0.0011979160462362729, "loss": 0.782, "step": 9234 }, { "epoch": 0.24782632030914556, "grad_norm": 0.2216796875, "learning_rate": 0.0011979137054157578, "loss": 0.7343, "step": 9235 }, { "epoch": 0.2478531558608845, "grad_norm": 0.23046875, "learning_rate": 0.001197911363283596, "loss": 0.7882, "step": 9236 }, { "epoch": 0.24787999141262346, "grad_norm": 0.2255859375, "learning_rate": 0.0011979090198397926, "loss": 0.8171, "step": 9237 }, { "epoch": 0.2479068269643624, "grad_norm": 0.2216796875, "learning_rate": 0.0011979066750843529, "loss": 0.665, "step": 9238 }, { "epoch": 0.24793366251610133, "grad_norm": 0.236328125, "learning_rate": 0.0011979043290172818, "loss": 0.7899, "step": 9239 }, { "epoch": 0.24796049806784026, "grad_norm": 0.232421875, "learning_rate": 0.0011979019816385844, "loss": 0.7826, "step": 9240 }, { "epoch": 0.24798733361957923, "grad_norm": 0.2451171875, "learning_rate": 0.001197899632948266, "loss": 0.8293, "step": 9241 }, { "epoch": 0.24801416917131816, "grad_norm": 0.2412109375, "learning_rate": 0.0011978972829463316, "loss": 0.8454, "step": 9242 }, { "epoch": 0.2480410047230571, "grad_norm": 0.2451171875, "learning_rate": 0.0011978949316327867, "loss": 0.8431, "step": 9243 }, { "epoch": 0.24806784027479606, "grad_norm": 0.2353515625, "learning_rate": 0.001197892579007636, "loss": 0.8429, "step": 9244 }, { "epoch": 0.248094675826535, "grad_norm": 0.2353515625, "learning_rate": 0.001197890225070885, "loss": 0.8901, "step": 9245 }, { "epoch": 0.24812151137827393, "grad_norm": 0.22265625, "learning_rate": 0.0011978878698225388, "loss": 0.6873, "step": 9246 }, { "epoch": 0.2481483469300129, "grad_norm": 0.2578125, "learning_rate": 0.0011978855132626022, "loss": 0.8988, "step": 9247 }, { "epoch": 0.24817518248175183, "grad_norm": 0.220703125, "learning_rate": 0.0011978831553910811, "loss": 0.7628, "step": 9248 }, { "epoch": 0.24820201803349076, "grad_norm": 0.23046875, "learning_rate": 0.00119788079620798, "loss": 0.7825, "step": 9249 }, { "epoch": 0.24822885358522973, "grad_norm": 0.224609375, "learning_rate": 0.0011978784357133045, "loss": 0.7772, "step": 9250 }, { "epoch": 0.24825568913696866, "grad_norm": 0.2353515625, "learning_rate": 0.0011978760739070596, "loss": 0.8119, "step": 9251 }, { "epoch": 0.2482825246887076, "grad_norm": 0.23046875, "learning_rate": 0.0011978737107892503, "loss": 0.7254, "step": 9252 }, { "epoch": 0.24830936024044653, "grad_norm": 0.2412109375, "learning_rate": 0.001197871346359882, "loss": 0.8946, "step": 9253 }, { "epoch": 0.2483361957921855, "grad_norm": 0.2294921875, "learning_rate": 0.00119786898061896, "loss": 0.764, "step": 9254 }, { "epoch": 0.24836303134392443, "grad_norm": 0.236328125, "learning_rate": 0.0011978666135664894, "loss": 0.8386, "step": 9255 }, { "epoch": 0.24838986689566336, "grad_norm": 0.240234375, "learning_rate": 0.0011978642452024752, "loss": 0.8615, "step": 9256 }, { "epoch": 0.24841670244740233, "grad_norm": 0.228515625, "learning_rate": 0.0011978618755269228, "loss": 0.8693, "step": 9257 }, { "epoch": 0.24844353799914126, "grad_norm": 0.2353515625, "learning_rate": 0.0011978595045398373, "loss": 0.8239, "step": 9258 }, { "epoch": 0.2484703735508802, "grad_norm": 0.23828125, "learning_rate": 0.001197857132241224, "loss": 0.8157, "step": 9259 }, { "epoch": 0.24849720910261916, "grad_norm": 0.22265625, "learning_rate": 0.001197854758631088, "loss": 0.8084, "step": 9260 }, { "epoch": 0.2485240446543581, "grad_norm": 0.2451171875, "learning_rate": 0.0011978523837094346, "loss": 0.9458, "step": 9261 }, { "epoch": 0.24855088020609703, "grad_norm": 0.2138671875, "learning_rate": 0.0011978500074762688, "loss": 0.7283, "step": 9262 }, { "epoch": 0.248577715757836, "grad_norm": 0.2109375, "learning_rate": 0.0011978476299315962, "loss": 0.6089, "step": 9263 }, { "epoch": 0.24860455130957493, "grad_norm": 0.234375, "learning_rate": 0.0011978452510754219, "loss": 0.8095, "step": 9264 }, { "epoch": 0.24863138686131386, "grad_norm": 0.236328125, "learning_rate": 0.0011978428709077506, "loss": 0.8571, "step": 9265 }, { "epoch": 0.2486582224130528, "grad_norm": 0.2275390625, "learning_rate": 0.0011978404894285883, "loss": 0.8036, "step": 9266 }, { "epoch": 0.24868505796479176, "grad_norm": 0.21484375, "learning_rate": 0.0011978381066379395, "loss": 0.7378, "step": 9267 }, { "epoch": 0.2487118935165307, "grad_norm": 0.2138671875, "learning_rate": 0.00119783572253581, "loss": 0.721, "step": 9268 }, { "epoch": 0.24873872906826963, "grad_norm": 0.22265625, "learning_rate": 0.0011978333371222049, "loss": 0.8145, "step": 9269 }, { "epoch": 0.2487655646200086, "grad_norm": 0.236328125, "learning_rate": 0.001197830950397129, "loss": 0.8295, "step": 9270 }, { "epoch": 0.24879240017174753, "grad_norm": 0.2294921875, "learning_rate": 0.0011978285623605882, "loss": 0.7365, "step": 9271 }, { "epoch": 0.24881923572348646, "grad_norm": 0.2138671875, "learning_rate": 0.0011978261730125873, "loss": 0.6583, "step": 9272 }, { "epoch": 0.24884607127522543, "grad_norm": 0.2158203125, "learning_rate": 0.0011978237823531317, "loss": 0.7883, "step": 9273 }, { "epoch": 0.24887290682696436, "grad_norm": 0.2353515625, "learning_rate": 0.0011978213903822265, "loss": 0.8714, "step": 9274 }, { "epoch": 0.2488997423787033, "grad_norm": 0.2021484375, "learning_rate": 0.0011978189970998773, "loss": 0.699, "step": 9275 }, { "epoch": 0.24892657793044226, "grad_norm": 0.2333984375, "learning_rate": 0.0011978166025060886, "loss": 0.7783, "step": 9276 }, { "epoch": 0.2489534134821812, "grad_norm": 0.2197265625, "learning_rate": 0.0011978142066008668, "loss": 0.7458, "step": 9277 }, { "epoch": 0.24898024903392013, "grad_norm": 0.2216796875, "learning_rate": 0.001197811809384216, "loss": 0.7732, "step": 9278 }, { "epoch": 0.2490070845856591, "grad_norm": 0.228515625, "learning_rate": 0.0011978094108561423, "loss": 0.8101, "step": 9279 }, { "epoch": 0.24903392013739803, "grad_norm": 0.24609375, "learning_rate": 0.0011978070110166504, "loss": 0.8939, "step": 9280 }, { "epoch": 0.24906075568913696, "grad_norm": 0.2177734375, "learning_rate": 0.001197804609865746, "loss": 0.7572, "step": 9281 }, { "epoch": 0.2490875912408759, "grad_norm": 0.2333984375, "learning_rate": 0.001197802207403434, "loss": 0.84, "step": 9282 }, { "epoch": 0.24911442679261486, "grad_norm": 0.21875, "learning_rate": 0.0011977998036297199, "loss": 0.7311, "step": 9283 }, { "epoch": 0.2491412623443538, "grad_norm": 0.2294921875, "learning_rate": 0.0011977973985446088, "loss": 0.7801, "step": 9284 }, { "epoch": 0.24916809789609273, "grad_norm": 0.2216796875, "learning_rate": 0.0011977949921481061, "loss": 0.7408, "step": 9285 }, { "epoch": 0.2491949334478317, "grad_norm": 0.2294921875, "learning_rate": 0.0011977925844402172, "loss": 0.85, "step": 9286 }, { "epoch": 0.24922176899957063, "grad_norm": 0.24609375, "learning_rate": 0.001197790175420947, "loss": 0.8764, "step": 9287 }, { "epoch": 0.24924860455130957, "grad_norm": 0.2490234375, "learning_rate": 0.0011977877650903015, "loss": 0.8906, "step": 9288 }, { "epoch": 0.24927544010304853, "grad_norm": 0.23828125, "learning_rate": 0.0011977853534482852, "loss": 0.8377, "step": 9289 }, { "epoch": 0.24930227565478746, "grad_norm": 0.22265625, "learning_rate": 0.0011977829404949036, "loss": 0.7516, "step": 9290 }, { "epoch": 0.2493291112065264, "grad_norm": 0.2470703125, "learning_rate": 0.0011977805262301624, "loss": 0.8828, "step": 9291 }, { "epoch": 0.24935594675826536, "grad_norm": 0.2275390625, "learning_rate": 0.0011977781106540662, "loss": 0.8353, "step": 9292 }, { "epoch": 0.2493827823100043, "grad_norm": 0.2265625, "learning_rate": 0.001197775693766621, "loss": 0.7652, "step": 9293 }, { "epoch": 0.24940961786174323, "grad_norm": 0.248046875, "learning_rate": 0.0011977732755678316, "loss": 0.8439, "step": 9294 }, { "epoch": 0.24943645341348217, "grad_norm": 0.2265625, "learning_rate": 0.0011977708560577038, "loss": 0.738, "step": 9295 }, { "epoch": 0.24946328896522113, "grad_norm": 0.259765625, "learning_rate": 0.0011977684352362425, "loss": 0.8931, "step": 9296 }, { "epoch": 0.24949012451696007, "grad_norm": 0.228515625, "learning_rate": 0.0011977660131034528, "loss": 0.794, "step": 9297 }, { "epoch": 0.249516960068699, "grad_norm": 0.244140625, "learning_rate": 0.0011977635896593406, "loss": 0.8821, "step": 9298 }, { "epoch": 0.24954379562043796, "grad_norm": 0.2490234375, "learning_rate": 0.0011977611649039112, "loss": 0.8724, "step": 9299 }, { "epoch": 0.2495706311721769, "grad_norm": 0.23828125, "learning_rate": 0.0011977587388371691, "loss": 0.7725, "step": 9300 }, { "epoch": 0.24959746672391583, "grad_norm": 0.224609375, "learning_rate": 0.0011977563114591206, "loss": 0.7311, "step": 9301 }, { "epoch": 0.2496243022756548, "grad_norm": 0.2236328125, "learning_rate": 0.0011977538827697704, "loss": 0.8371, "step": 9302 }, { "epoch": 0.24965113782739373, "grad_norm": 0.23828125, "learning_rate": 0.0011977514527691242, "loss": 0.7847, "step": 9303 }, { "epoch": 0.24967797337913267, "grad_norm": 0.2275390625, "learning_rate": 0.001197749021457187, "loss": 0.7845, "step": 9304 }, { "epoch": 0.24970480893087163, "grad_norm": 0.224609375, "learning_rate": 0.0011977465888339646, "loss": 0.7632, "step": 9305 }, { "epoch": 0.24973164448261057, "grad_norm": 0.2333984375, "learning_rate": 0.001197744154899462, "loss": 0.8946, "step": 9306 }, { "epoch": 0.2497584800343495, "grad_norm": 0.216796875, "learning_rate": 0.0011977417196536843, "loss": 0.7716, "step": 9307 }, { "epoch": 0.24978531558608846, "grad_norm": 0.220703125, "learning_rate": 0.0011977392830966372, "loss": 0.7284, "step": 9308 }, { "epoch": 0.2498121511378274, "grad_norm": 0.23046875, "learning_rate": 0.001197736845228326, "loss": 0.736, "step": 9309 }, { "epoch": 0.24983898668956633, "grad_norm": 0.2041015625, "learning_rate": 0.0011977344060487562, "loss": 0.6593, "step": 9310 }, { "epoch": 0.24986582224130527, "grad_norm": 0.234375, "learning_rate": 0.0011977319655579327, "loss": 0.8348, "step": 9311 }, { "epoch": 0.24989265779304423, "grad_norm": 0.2255859375, "learning_rate": 0.0011977295237558612, "loss": 0.7954, "step": 9312 }, { "epoch": 0.24991949334478317, "grad_norm": 0.2314453125, "learning_rate": 0.001197727080642547, "loss": 0.8442, "step": 9313 }, { "epoch": 0.2499463288965221, "grad_norm": 0.23046875, "learning_rate": 0.0011977246362179954, "loss": 0.765, "step": 9314 }, { "epoch": 0.24997316444826106, "grad_norm": 0.2431640625, "learning_rate": 0.001197722190482212, "loss": 0.8284, "step": 9315 }, { "epoch": 0.25, "grad_norm": 0.224609375, "learning_rate": 0.001197719743435202, "loss": 0.7487, "step": 9316 }, { "epoch": 0.25002683555173894, "grad_norm": 0.23828125, "learning_rate": 0.0011977172950769705, "loss": 0.8747, "step": 9317 }, { "epoch": 0.25005367110347787, "grad_norm": 0.2490234375, "learning_rate": 0.0011977148454075232, "loss": 0.8715, "step": 9318 }, { "epoch": 0.2500805066552168, "grad_norm": 0.236328125, "learning_rate": 0.0011977123944268654, "loss": 0.8405, "step": 9319 }, { "epoch": 0.2501073422069558, "grad_norm": 0.2255859375, "learning_rate": 0.0011977099421350023, "loss": 0.777, "step": 9320 }, { "epoch": 0.25013417775869473, "grad_norm": 0.25390625, "learning_rate": 0.0011977074885319396, "loss": 0.9101, "step": 9321 }, { "epoch": 0.25016101331043367, "grad_norm": 0.208984375, "learning_rate": 0.0011977050336176825, "loss": 0.6864, "step": 9322 }, { "epoch": 0.2501878488621726, "grad_norm": 0.236328125, "learning_rate": 0.0011977025773922364, "loss": 0.7361, "step": 9323 }, { "epoch": 0.25021468441391154, "grad_norm": 0.2431640625, "learning_rate": 0.0011977001198556065, "loss": 0.848, "step": 9324 }, { "epoch": 0.25024151996565047, "grad_norm": 0.240234375, "learning_rate": 0.0011976976610077986, "loss": 0.8717, "step": 9325 }, { "epoch": 0.25026835551738946, "grad_norm": 0.2216796875, "learning_rate": 0.0011976952008488178, "loss": 0.7326, "step": 9326 }, { "epoch": 0.2502951910691284, "grad_norm": 0.236328125, "learning_rate": 0.0011976927393786698, "loss": 0.8683, "step": 9327 }, { "epoch": 0.25032202662086733, "grad_norm": 0.2578125, "learning_rate": 0.0011976902765973593, "loss": 0.8732, "step": 9328 }, { "epoch": 0.25034886217260627, "grad_norm": 0.23828125, "learning_rate": 0.0011976878125048926, "loss": 0.8046, "step": 9329 }, { "epoch": 0.2503756977243452, "grad_norm": 0.2333984375, "learning_rate": 0.0011976853471012744, "loss": 0.8224, "step": 9330 }, { "epoch": 0.25040253327608414, "grad_norm": 0.2353515625, "learning_rate": 0.0011976828803865105, "loss": 0.7898, "step": 9331 }, { "epoch": 0.2504293688278231, "grad_norm": 0.2431640625, "learning_rate": 0.0011976804123606064, "loss": 0.8374, "step": 9332 }, { "epoch": 0.25045620437956206, "grad_norm": 0.2255859375, "learning_rate": 0.0011976779430235669, "loss": 0.8106, "step": 9333 }, { "epoch": 0.250483039931301, "grad_norm": 0.2236328125, "learning_rate": 0.0011976754723753982, "loss": 0.7681, "step": 9334 }, { "epoch": 0.25050987548303993, "grad_norm": 0.2275390625, "learning_rate": 0.0011976730004161053, "loss": 0.8383, "step": 9335 }, { "epoch": 0.25053671103477887, "grad_norm": 0.2373046875, "learning_rate": 0.0011976705271456935, "loss": 0.8266, "step": 9336 }, { "epoch": 0.2505635465865178, "grad_norm": 0.2392578125, "learning_rate": 0.0011976680525641686, "loss": 0.7925, "step": 9337 }, { "epoch": 0.25059038213825674, "grad_norm": 0.240234375, "learning_rate": 0.0011976655766715357, "loss": 0.8974, "step": 9338 }, { "epoch": 0.25061721768999573, "grad_norm": 0.2158203125, "learning_rate": 0.0011976630994678004, "loss": 0.7021, "step": 9339 }, { "epoch": 0.25064405324173467, "grad_norm": 0.2177734375, "learning_rate": 0.001197660620952968, "loss": 0.7908, "step": 9340 }, { "epoch": 0.2506708887934736, "grad_norm": 0.2177734375, "learning_rate": 0.0011976581411270442, "loss": 0.689, "step": 9341 }, { "epoch": 0.25069772434521254, "grad_norm": 0.236328125, "learning_rate": 0.0011976556599900341, "loss": 0.738, "step": 9342 }, { "epoch": 0.25072455989695147, "grad_norm": 0.2041015625, "learning_rate": 0.0011976531775419433, "loss": 0.6888, "step": 9343 }, { "epoch": 0.2507513954486904, "grad_norm": 0.208984375, "learning_rate": 0.0011976506937827775, "loss": 0.7458, "step": 9344 }, { "epoch": 0.25077823100042934, "grad_norm": 0.2392578125, "learning_rate": 0.0011976482087125418, "loss": 0.8499, "step": 9345 }, { "epoch": 0.25080506655216833, "grad_norm": 0.2314453125, "learning_rate": 0.0011976457223312417, "loss": 0.8204, "step": 9346 }, { "epoch": 0.25083190210390727, "grad_norm": 0.21875, "learning_rate": 0.0011976432346388827, "loss": 0.7516, "step": 9347 }, { "epoch": 0.2508587376556462, "grad_norm": 0.224609375, "learning_rate": 0.0011976407456354703, "loss": 0.7698, "step": 9348 }, { "epoch": 0.25088557320738514, "grad_norm": 0.201171875, "learning_rate": 0.00119763825532101, "loss": 0.673, "step": 9349 }, { "epoch": 0.2509124087591241, "grad_norm": 0.2265625, "learning_rate": 0.001197635763695507, "loss": 0.7952, "step": 9350 }, { "epoch": 0.250939244310863, "grad_norm": 0.216796875, "learning_rate": 0.0011976332707589671, "loss": 0.7352, "step": 9351 }, { "epoch": 0.250966079862602, "grad_norm": 0.2451171875, "learning_rate": 0.0011976307765113959, "loss": 0.9073, "step": 9352 }, { "epoch": 0.25099291541434093, "grad_norm": 0.2177734375, "learning_rate": 0.0011976282809527982, "loss": 0.7573, "step": 9353 }, { "epoch": 0.25101975096607987, "grad_norm": 0.2314453125, "learning_rate": 0.00119762578408318, "loss": 0.79, "step": 9354 }, { "epoch": 0.2510465865178188, "grad_norm": 0.22265625, "learning_rate": 0.0011976232859025467, "loss": 0.7268, "step": 9355 }, { "epoch": 0.25107342206955774, "grad_norm": 0.2333984375, "learning_rate": 0.0011976207864109038, "loss": 0.8222, "step": 9356 }, { "epoch": 0.2511002576212967, "grad_norm": 0.2431640625, "learning_rate": 0.0011976182856082566, "loss": 0.8502, "step": 9357 }, { "epoch": 0.25112709317303566, "grad_norm": 0.2138671875, "learning_rate": 0.0011976157834946107, "loss": 0.6868, "step": 9358 }, { "epoch": 0.2511539287247746, "grad_norm": 0.2265625, "learning_rate": 0.0011976132800699715, "loss": 0.7487, "step": 9359 }, { "epoch": 0.25118076427651354, "grad_norm": 0.2255859375, "learning_rate": 0.001197610775334345, "loss": 0.7201, "step": 9360 }, { "epoch": 0.25120759982825247, "grad_norm": 0.2373046875, "learning_rate": 0.0011976082692877357, "loss": 0.835, "step": 9361 }, { "epoch": 0.2512344353799914, "grad_norm": 0.2119140625, "learning_rate": 0.00119760576193015, "loss": 0.7175, "step": 9362 }, { "epoch": 0.25126127093173034, "grad_norm": 0.21875, "learning_rate": 0.0011976032532615931, "loss": 0.7334, "step": 9363 }, { "epoch": 0.2512881064834693, "grad_norm": 0.212890625, "learning_rate": 0.0011976007432820705, "loss": 0.7392, "step": 9364 }, { "epoch": 0.25131494203520827, "grad_norm": 0.2314453125, "learning_rate": 0.0011975982319915874, "loss": 0.7644, "step": 9365 }, { "epoch": 0.2513417775869472, "grad_norm": 0.2578125, "learning_rate": 0.0011975957193901497, "loss": 0.975, "step": 9366 }, { "epoch": 0.25136861313868614, "grad_norm": 0.2119140625, "learning_rate": 0.0011975932054777631, "loss": 0.7655, "step": 9367 }, { "epoch": 0.25139544869042507, "grad_norm": 0.2333984375, "learning_rate": 0.0011975906902544327, "loss": 0.7722, "step": 9368 }, { "epoch": 0.251422284242164, "grad_norm": 0.20703125, "learning_rate": 0.001197588173720164, "loss": 0.6832, "step": 9369 }, { "epoch": 0.25144911979390294, "grad_norm": 0.23828125, "learning_rate": 0.0011975856558749628, "loss": 0.8316, "step": 9370 }, { "epoch": 0.25147595534564193, "grad_norm": 0.255859375, "learning_rate": 0.0011975831367188343, "loss": 0.9836, "step": 9371 }, { "epoch": 0.25150279089738087, "grad_norm": 0.2275390625, "learning_rate": 0.0011975806162517844, "loss": 0.8037, "step": 9372 }, { "epoch": 0.2515296264491198, "grad_norm": 0.2119140625, "learning_rate": 0.0011975780944738182, "loss": 0.6551, "step": 9373 }, { "epoch": 0.25155646200085874, "grad_norm": 0.21484375, "learning_rate": 0.0011975755713849415, "loss": 0.7062, "step": 9374 }, { "epoch": 0.2515832975525977, "grad_norm": 0.2265625, "learning_rate": 0.00119757304698516, "loss": 0.8095, "step": 9375 }, { "epoch": 0.2516101331043366, "grad_norm": 0.236328125, "learning_rate": 0.0011975705212744791, "loss": 0.7895, "step": 9376 }, { "epoch": 0.25163696865607554, "grad_norm": 0.2255859375, "learning_rate": 0.001197567994252904, "loss": 0.7439, "step": 9377 }, { "epoch": 0.25166380420781453, "grad_norm": 0.22265625, "learning_rate": 0.0011975654659204407, "loss": 0.7512, "step": 9378 }, { "epoch": 0.25169063975955347, "grad_norm": 0.236328125, "learning_rate": 0.0011975629362770948, "loss": 0.7667, "step": 9379 }, { "epoch": 0.2517174753112924, "grad_norm": 0.2353515625, "learning_rate": 0.0011975604053228713, "loss": 0.8166, "step": 9380 }, { "epoch": 0.25174431086303134, "grad_norm": 0.26171875, "learning_rate": 0.0011975578730577762, "loss": 0.8724, "step": 9381 }, { "epoch": 0.2517711464147703, "grad_norm": 0.2265625, "learning_rate": 0.001197555339481815, "loss": 0.8209, "step": 9382 }, { "epoch": 0.2517979819665092, "grad_norm": 0.2373046875, "learning_rate": 0.0011975528045949928, "loss": 0.8137, "step": 9383 }, { "epoch": 0.2518248175182482, "grad_norm": 0.23046875, "learning_rate": 0.001197550268397316, "loss": 0.8426, "step": 9384 }, { "epoch": 0.25185165306998714, "grad_norm": 0.23828125, "learning_rate": 0.0011975477308887894, "loss": 0.9043, "step": 9385 }, { "epoch": 0.25187848862172607, "grad_norm": 0.232421875, "learning_rate": 0.0011975451920694191, "loss": 0.7949, "step": 9386 }, { "epoch": 0.251905324173465, "grad_norm": 0.2392578125, "learning_rate": 0.0011975426519392103, "loss": 0.8007, "step": 9387 }, { "epoch": 0.25193215972520394, "grad_norm": 0.236328125, "learning_rate": 0.0011975401104981688, "loss": 0.8211, "step": 9388 }, { "epoch": 0.2519589952769429, "grad_norm": 0.23828125, "learning_rate": 0.0011975375677463, "loss": 0.8219, "step": 9389 }, { "epoch": 0.2519858308286818, "grad_norm": 0.2392578125, "learning_rate": 0.0011975350236836098, "loss": 0.8135, "step": 9390 }, { "epoch": 0.2520126663804208, "grad_norm": 0.2255859375, "learning_rate": 0.0011975324783101034, "loss": 0.707, "step": 9391 }, { "epoch": 0.25203950193215974, "grad_norm": 0.2265625, "learning_rate": 0.0011975299316257866, "loss": 0.8246, "step": 9392 }, { "epoch": 0.2520663374838987, "grad_norm": 0.2177734375, "learning_rate": 0.0011975273836306649, "loss": 0.6979, "step": 9393 }, { "epoch": 0.2520931730356376, "grad_norm": 0.2197265625, "learning_rate": 0.0011975248343247438, "loss": 0.695, "step": 9394 }, { "epoch": 0.25212000858737654, "grad_norm": 0.2236328125, "learning_rate": 0.0011975222837080292, "loss": 0.7093, "step": 9395 }, { "epoch": 0.2521468441391155, "grad_norm": 0.2294921875, "learning_rate": 0.0011975197317805263, "loss": 0.7443, "step": 9396 }, { "epoch": 0.25217367969085447, "grad_norm": 0.2353515625, "learning_rate": 0.001197517178542241, "loss": 0.813, "step": 9397 }, { "epoch": 0.2522005152425934, "grad_norm": 0.24609375, "learning_rate": 0.001197514623993179, "loss": 0.9168, "step": 9398 }, { "epoch": 0.25222735079433234, "grad_norm": 0.2373046875, "learning_rate": 0.0011975120681333455, "loss": 0.8832, "step": 9399 }, { "epoch": 0.2522541863460713, "grad_norm": 0.2275390625, "learning_rate": 0.001197509510962746, "loss": 0.8228, "step": 9400 }, { "epoch": 0.2522810218978102, "grad_norm": 0.2255859375, "learning_rate": 0.001197506952481387, "loss": 0.8139, "step": 9401 }, { "epoch": 0.25230785744954914, "grad_norm": 0.224609375, "learning_rate": 0.0011975043926892733, "loss": 0.7809, "step": 9402 }, { "epoch": 0.2523346930012881, "grad_norm": 0.232421875, "learning_rate": 0.0011975018315864107, "loss": 0.7599, "step": 9403 }, { "epoch": 0.25236152855302707, "grad_norm": 0.248046875, "learning_rate": 0.001197499269172805, "loss": 0.8891, "step": 9404 }, { "epoch": 0.252388364104766, "grad_norm": 0.2138671875, "learning_rate": 0.0011974967054484615, "loss": 0.6898, "step": 9405 }, { "epoch": 0.25241519965650494, "grad_norm": 0.2431640625, "learning_rate": 0.0011974941404133863, "loss": 0.8768, "step": 9406 }, { "epoch": 0.2524420352082439, "grad_norm": 0.2236328125, "learning_rate": 0.0011974915740675844, "loss": 0.7827, "step": 9407 }, { "epoch": 0.2524688707599828, "grad_norm": 0.2158203125, "learning_rate": 0.001197489006411062, "loss": 0.7072, "step": 9408 }, { "epoch": 0.25249570631172175, "grad_norm": 0.224609375, "learning_rate": 0.0011974864374438242, "loss": 0.7634, "step": 9409 }, { "epoch": 0.25252254186346074, "grad_norm": 0.240234375, "learning_rate": 0.0011974838671658774, "loss": 0.8162, "step": 9410 }, { "epoch": 0.25254937741519967, "grad_norm": 0.2197265625, "learning_rate": 0.0011974812955772266, "loss": 0.7151, "step": 9411 }, { "epoch": 0.2525762129669386, "grad_norm": 0.271484375, "learning_rate": 0.0011974787226778775, "loss": 0.8741, "step": 9412 }, { "epoch": 0.25260304851867754, "grad_norm": 0.2314453125, "learning_rate": 0.001197476148467836, "loss": 0.7692, "step": 9413 }, { "epoch": 0.2526298840704165, "grad_norm": 0.244140625, "learning_rate": 0.0011974735729471074, "loss": 0.8635, "step": 9414 }, { "epoch": 0.2526567196221554, "grad_norm": 0.2294921875, "learning_rate": 0.0011974709961156977, "loss": 0.7662, "step": 9415 }, { "epoch": 0.2526835551738944, "grad_norm": 0.2431640625, "learning_rate": 0.0011974684179736125, "loss": 0.8135, "step": 9416 }, { "epoch": 0.25271039072563334, "grad_norm": 0.234375, "learning_rate": 0.0011974658385208571, "loss": 0.7912, "step": 9417 }, { "epoch": 0.2527372262773723, "grad_norm": 0.22265625, "learning_rate": 0.0011974632577574377, "loss": 0.7401, "step": 9418 }, { "epoch": 0.2527640618291112, "grad_norm": 0.251953125, "learning_rate": 0.0011974606756833598, "loss": 0.7975, "step": 9419 }, { "epoch": 0.25279089738085014, "grad_norm": 0.22265625, "learning_rate": 0.0011974580922986286, "loss": 0.7086, "step": 9420 }, { "epoch": 0.2528177329325891, "grad_norm": 0.251953125, "learning_rate": 0.0011974555076032503, "loss": 0.889, "step": 9421 }, { "epoch": 0.252844568484328, "grad_norm": 0.2158203125, "learning_rate": 0.0011974529215972303, "loss": 0.7106, "step": 9422 }, { "epoch": 0.252871404036067, "grad_norm": 0.232421875, "learning_rate": 0.0011974503342805744, "loss": 0.7985, "step": 9423 }, { "epoch": 0.25289823958780594, "grad_norm": 0.212890625, "learning_rate": 0.0011974477456532882, "loss": 0.6917, "step": 9424 }, { "epoch": 0.2529250751395449, "grad_norm": 0.259765625, "learning_rate": 0.0011974451557153778, "loss": 0.9005, "step": 9425 }, { "epoch": 0.2529519106912838, "grad_norm": 0.2236328125, "learning_rate": 0.0011974425644668482, "loss": 0.778, "step": 9426 }, { "epoch": 0.25297874624302275, "grad_norm": 0.263671875, "learning_rate": 0.0011974399719077054, "loss": 0.8407, "step": 9427 }, { "epoch": 0.2530055817947617, "grad_norm": 0.2392578125, "learning_rate": 0.0011974373780379549, "loss": 0.8796, "step": 9428 }, { "epoch": 0.25303241734650067, "grad_norm": 0.2451171875, "learning_rate": 0.0011974347828576026, "loss": 0.8369, "step": 9429 }, { "epoch": 0.2530592528982396, "grad_norm": 0.234375, "learning_rate": 0.0011974321863666543, "loss": 0.8059, "step": 9430 }, { "epoch": 0.25308608844997854, "grad_norm": 0.236328125, "learning_rate": 0.0011974295885651156, "loss": 0.7856, "step": 9431 }, { "epoch": 0.2531129240017175, "grad_norm": 0.2177734375, "learning_rate": 0.001197426989452992, "loss": 0.7188, "step": 9432 }, { "epoch": 0.2531397595534564, "grad_norm": 0.228515625, "learning_rate": 0.0011974243890302894, "loss": 0.7319, "step": 9433 }, { "epoch": 0.25316659510519535, "grad_norm": 0.2431640625, "learning_rate": 0.0011974217872970133, "loss": 0.9291, "step": 9434 }, { "epoch": 0.2531934306569343, "grad_norm": 0.2216796875, "learning_rate": 0.00119741918425317, "loss": 0.7656, "step": 9435 }, { "epoch": 0.2532202662086733, "grad_norm": 0.224609375, "learning_rate": 0.0011974165798987642, "loss": 0.7591, "step": 9436 }, { "epoch": 0.2532471017604122, "grad_norm": 0.228515625, "learning_rate": 0.0011974139742338026, "loss": 0.7751, "step": 9437 }, { "epoch": 0.25327393731215114, "grad_norm": 0.2314453125, "learning_rate": 0.0011974113672582904, "loss": 0.794, "step": 9438 }, { "epoch": 0.2533007728638901, "grad_norm": 0.2353515625, "learning_rate": 0.0011974087589722334, "loss": 0.8509, "step": 9439 }, { "epoch": 0.253327608415629, "grad_norm": 0.2138671875, "learning_rate": 0.0011974061493756373, "loss": 0.7272, "step": 9440 }, { "epoch": 0.25335444396736795, "grad_norm": 0.234375, "learning_rate": 0.001197403538468508, "loss": 0.786, "step": 9441 }, { "epoch": 0.25338127951910694, "grad_norm": 0.21875, "learning_rate": 0.001197400926250851, "loss": 0.7259, "step": 9442 }, { "epoch": 0.2534081150708459, "grad_norm": 0.2197265625, "learning_rate": 0.001197398312722672, "loss": 0.7235, "step": 9443 }, { "epoch": 0.2534349506225848, "grad_norm": 0.228515625, "learning_rate": 0.001197395697883977, "loss": 0.7915, "step": 9444 }, { "epoch": 0.25346178617432374, "grad_norm": 0.2177734375, "learning_rate": 0.0011973930817347717, "loss": 0.7645, "step": 9445 }, { "epoch": 0.2534886217260627, "grad_norm": 0.234375, "learning_rate": 0.0011973904642750618, "loss": 0.8046, "step": 9446 }, { "epoch": 0.2535154572778016, "grad_norm": 0.2197265625, "learning_rate": 0.0011973878455048526, "loss": 0.7021, "step": 9447 }, { "epoch": 0.25354229282954055, "grad_norm": 0.232421875, "learning_rate": 0.0011973852254241505, "loss": 0.7769, "step": 9448 }, { "epoch": 0.25356912838127954, "grad_norm": 0.232421875, "learning_rate": 0.0011973826040329607, "loss": 0.7549, "step": 9449 }, { "epoch": 0.2535959639330185, "grad_norm": 0.2236328125, "learning_rate": 0.0011973799813312894, "loss": 0.7928, "step": 9450 }, { "epoch": 0.2536227994847574, "grad_norm": 0.2314453125, "learning_rate": 0.0011973773573191425, "loss": 0.8209, "step": 9451 }, { "epoch": 0.25364963503649635, "grad_norm": 0.2294921875, "learning_rate": 0.001197374731996525, "loss": 0.8212, "step": 9452 }, { "epoch": 0.2536764705882353, "grad_norm": 0.224609375, "learning_rate": 0.0011973721053634432, "loss": 0.7868, "step": 9453 }, { "epoch": 0.2537033061399742, "grad_norm": 0.232421875, "learning_rate": 0.0011973694774199027, "loss": 0.8128, "step": 9454 }, { "epoch": 0.2537301416917132, "grad_norm": 0.2236328125, "learning_rate": 0.0011973668481659095, "loss": 0.7605, "step": 9455 }, { "epoch": 0.25375697724345214, "grad_norm": 0.2314453125, "learning_rate": 0.001197364217601469, "loss": 0.7427, "step": 9456 }, { "epoch": 0.2537838127951911, "grad_norm": 0.236328125, "learning_rate": 0.0011973615857265871, "loss": 0.8325, "step": 9457 }, { "epoch": 0.25381064834693, "grad_norm": 0.2177734375, "learning_rate": 0.0011973589525412697, "loss": 0.7616, "step": 9458 }, { "epoch": 0.25383748389866895, "grad_norm": 0.23828125, "learning_rate": 0.0011973563180455225, "loss": 0.8527, "step": 9459 }, { "epoch": 0.2538643194504079, "grad_norm": 0.2373046875, "learning_rate": 0.0011973536822393513, "loss": 0.7461, "step": 9460 }, { "epoch": 0.2538911550021468, "grad_norm": 0.2373046875, "learning_rate": 0.001197351045122762, "loss": 0.8353, "step": 9461 }, { "epoch": 0.2539179905538858, "grad_norm": 0.2216796875, "learning_rate": 0.0011973484066957602, "loss": 0.7279, "step": 9462 }, { "epoch": 0.25394482610562474, "grad_norm": 0.2353515625, "learning_rate": 0.0011973457669583517, "loss": 0.7973, "step": 9463 }, { "epoch": 0.2539716616573637, "grad_norm": 0.2216796875, "learning_rate": 0.0011973431259105422, "loss": 0.7966, "step": 9464 }, { "epoch": 0.2539984972091026, "grad_norm": 0.2197265625, "learning_rate": 0.0011973404835523378, "loss": 0.7342, "step": 9465 }, { "epoch": 0.25402533276084155, "grad_norm": 0.2392578125, "learning_rate": 0.001197337839883744, "loss": 0.851, "step": 9466 }, { "epoch": 0.2540521683125805, "grad_norm": 0.2314453125, "learning_rate": 0.0011973351949047667, "loss": 0.7493, "step": 9467 }, { "epoch": 0.2540790038643195, "grad_norm": 0.228515625, "learning_rate": 0.0011973325486154117, "loss": 0.8371, "step": 9468 }, { "epoch": 0.2541058394160584, "grad_norm": 0.208984375, "learning_rate": 0.001197329901015685, "loss": 0.7131, "step": 9469 }, { "epoch": 0.25413267496779735, "grad_norm": 0.2333984375, "learning_rate": 0.0011973272521055922, "loss": 0.8522, "step": 9470 }, { "epoch": 0.2541595105195363, "grad_norm": 0.224609375, "learning_rate": 0.001197324601885139, "loss": 0.7951, "step": 9471 }, { "epoch": 0.2541863460712752, "grad_norm": 0.2197265625, "learning_rate": 0.0011973219503543316, "loss": 0.7495, "step": 9472 }, { "epoch": 0.25421318162301415, "grad_norm": 0.251953125, "learning_rate": 0.0011973192975131754, "loss": 0.7704, "step": 9473 }, { "epoch": 0.2542400171747531, "grad_norm": 0.2431640625, "learning_rate": 0.0011973166433616764, "loss": 0.9483, "step": 9474 }, { "epoch": 0.2542668527264921, "grad_norm": 0.2314453125, "learning_rate": 0.0011973139878998403, "loss": 0.8166, "step": 9475 }, { "epoch": 0.254293688278231, "grad_norm": 0.2294921875, "learning_rate": 0.0011973113311276732, "loss": 0.766, "step": 9476 }, { "epoch": 0.25432052382996995, "grad_norm": 0.2373046875, "learning_rate": 0.0011973086730451808, "loss": 0.8011, "step": 9477 }, { "epoch": 0.2543473593817089, "grad_norm": 0.2216796875, "learning_rate": 0.001197306013652369, "loss": 0.7094, "step": 9478 }, { "epoch": 0.2543741949334478, "grad_norm": 0.2255859375, "learning_rate": 0.0011973033529492432, "loss": 0.7148, "step": 9479 }, { "epoch": 0.25440103048518675, "grad_norm": 0.224609375, "learning_rate": 0.0011973006909358098, "loss": 0.7682, "step": 9480 }, { "epoch": 0.25442786603692574, "grad_norm": 0.2392578125, "learning_rate": 0.0011972980276120744, "loss": 0.8821, "step": 9481 }, { "epoch": 0.2544547015886647, "grad_norm": 0.208984375, "learning_rate": 0.001197295362978043, "loss": 0.7284, "step": 9482 }, { "epoch": 0.2544815371404036, "grad_norm": 0.2275390625, "learning_rate": 0.0011972926970337209, "loss": 0.7767, "step": 9483 }, { "epoch": 0.25450837269214255, "grad_norm": 0.2333984375, "learning_rate": 0.0011972900297791147, "loss": 0.7643, "step": 9484 }, { "epoch": 0.2545352082438815, "grad_norm": 0.23828125, "learning_rate": 0.0011972873612142298, "loss": 0.8082, "step": 9485 }, { "epoch": 0.2545620437956204, "grad_norm": 0.251953125, "learning_rate": 0.001197284691339072, "loss": 0.8699, "step": 9486 }, { "epoch": 0.2545888793473594, "grad_norm": 0.236328125, "learning_rate": 0.0011972820201536474, "loss": 0.7942, "step": 9487 }, { "epoch": 0.25461571489909834, "grad_norm": 0.23828125, "learning_rate": 0.001197279347657962, "loss": 0.8172, "step": 9488 }, { "epoch": 0.2546425504508373, "grad_norm": 0.2275390625, "learning_rate": 0.0011972766738520212, "loss": 0.7648, "step": 9489 }, { "epoch": 0.2546693860025762, "grad_norm": 0.2275390625, "learning_rate": 0.001197273998735831, "loss": 0.8221, "step": 9490 }, { "epoch": 0.25469622155431515, "grad_norm": 0.2255859375, "learning_rate": 0.0011972713223093977, "loss": 0.775, "step": 9491 }, { "epoch": 0.2547230571060541, "grad_norm": 0.2333984375, "learning_rate": 0.0011972686445727266, "loss": 0.7993, "step": 9492 }, { "epoch": 0.254749892657793, "grad_norm": 0.2373046875, "learning_rate": 0.0011972659655258239, "loss": 0.856, "step": 9493 }, { "epoch": 0.254776728209532, "grad_norm": 0.2412109375, "learning_rate": 0.0011972632851686954, "loss": 0.8098, "step": 9494 }, { "epoch": 0.25480356376127095, "grad_norm": 0.2109375, "learning_rate": 0.0011972606035013469, "loss": 0.7359, "step": 9495 }, { "epoch": 0.2548303993130099, "grad_norm": 0.2421875, "learning_rate": 0.0011972579205237843, "loss": 0.8811, "step": 9496 }, { "epoch": 0.2548572348647488, "grad_norm": 0.2392578125, "learning_rate": 0.0011972552362360135, "loss": 0.8319, "step": 9497 }, { "epoch": 0.25488407041648775, "grad_norm": 0.2373046875, "learning_rate": 0.0011972525506380406, "loss": 0.7406, "step": 9498 }, { "epoch": 0.2549109059682267, "grad_norm": 0.2451171875, "learning_rate": 0.0011972498637298713, "loss": 0.839, "step": 9499 }, { "epoch": 0.2549377415199657, "grad_norm": 0.236328125, "learning_rate": 0.0011972471755115116, "loss": 0.773, "step": 9500 }, { "epoch": 0.2549645770717046, "grad_norm": 0.2294921875, "learning_rate": 0.001197244485982967, "loss": 0.6922, "step": 9501 }, { "epoch": 0.25499141262344355, "grad_norm": 0.2236328125, "learning_rate": 0.001197241795144244, "loss": 0.6839, "step": 9502 }, { "epoch": 0.2550182481751825, "grad_norm": 0.2333984375, "learning_rate": 0.001197239102995348, "loss": 0.8246, "step": 9503 }, { "epoch": 0.2550450837269214, "grad_norm": 0.2275390625, "learning_rate": 0.0011972364095362853, "loss": 0.7524, "step": 9504 }, { "epoch": 0.25507191927866035, "grad_norm": 0.2265625, "learning_rate": 0.0011972337147670615, "loss": 0.7494, "step": 9505 }, { "epoch": 0.2550987548303993, "grad_norm": 0.23828125, "learning_rate": 0.0011972310186876827, "loss": 0.8548, "step": 9506 }, { "epoch": 0.2551255903821383, "grad_norm": 0.2490234375, "learning_rate": 0.0011972283212981548, "loss": 0.8511, "step": 9507 }, { "epoch": 0.2551524259338772, "grad_norm": 0.228515625, "learning_rate": 0.0011972256225984837, "loss": 0.827, "step": 9508 }, { "epoch": 0.25517926148561615, "grad_norm": 0.2197265625, "learning_rate": 0.0011972229225886751, "loss": 0.7403, "step": 9509 }, { "epoch": 0.2552060970373551, "grad_norm": 0.2373046875, "learning_rate": 0.0011972202212687352, "loss": 0.8581, "step": 9510 }, { "epoch": 0.255232932589094, "grad_norm": 0.248046875, "learning_rate": 0.00119721751863867, "loss": 0.8817, "step": 9511 }, { "epoch": 0.25525976814083295, "grad_norm": 0.224609375, "learning_rate": 0.001197214814698485, "loss": 0.6732, "step": 9512 }, { "epoch": 0.25528660369257195, "grad_norm": 0.2138671875, "learning_rate": 0.0011972121094481864, "loss": 0.6694, "step": 9513 }, { "epoch": 0.2553134392443109, "grad_norm": 0.2470703125, "learning_rate": 0.0011972094028877803, "loss": 0.7786, "step": 9514 }, { "epoch": 0.2553402747960498, "grad_norm": 0.22265625, "learning_rate": 0.0011972066950172726, "loss": 0.6992, "step": 9515 }, { "epoch": 0.25536711034778875, "grad_norm": 0.2119140625, "learning_rate": 0.001197203985836669, "loss": 0.6721, "step": 9516 }, { "epoch": 0.2553939458995277, "grad_norm": 0.2265625, "learning_rate": 0.0011972012753459754, "loss": 0.6952, "step": 9517 }, { "epoch": 0.2554207814512666, "grad_norm": 0.2373046875, "learning_rate": 0.001197198563545198, "loss": 0.797, "step": 9518 }, { "epoch": 0.25544761700300556, "grad_norm": 0.2421875, "learning_rate": 0.0011971958504343428, "loss": 0.8945, "step": 9519 }, { "epoch": 0.25547445255474455, "grad_norm": 0.2275390625, "learning_rate": 0.0011971931360134152, "loss": 0.8218, "step": 9520 }, { "epoch": 0.2555012881064835, "grad_norm": 0.2373046875, "learning_rate": 0.0011971904202824218, "loss": 0.8147, "step": 9521 }, { "epoch": 0.2555281236582224, "grad_norm": 0.23828125, "learning_rate": 0.0011971877032413683, "loss": 0.8893, "step": 9522 }, { "epoch": 0.25555495920996135, "grad_norm": 0.2216796875, "learning_rate": 0.0011971849848902607, "loss": 0.7537, "step": 9523 }, { "epoch": 0.2555817947617003, "grad_norm": 0.2294921875, "learning_rate": 0.001197182265229105, "loss": 0.7531, "step": 9524 }, { "epoch": 0.2556086303134392, "grad_norm": 0.2236328125, "learning_rate": 0.001197179544257907, "loss": 0.7873, "step": 9525 }, { "epoch": 0.2556354658651782, "grad_norm": 0.21875, "learning_rate": 0.0011971768219766728, "loss": 0.7824, "step": 9526 }, { "epoch": 0.25566230141691715, "grad_norm": 0.2041015625, "learning_rate": 0.0011971740983854084, "loss": 0.6872, "step": 9527 }, { "epoch": 0.2556891369686561, "grad_norm": 0.2333984375, "learning_rate": 0.0011971713734841198, "loss": 0.8289, "step": 9528 }, { "epoch": 0.255715972520395, "grad_norm": 0.251953125, "learning_rate": 0.0011971686472728126, "loss": 0.8422, "step": 9529 }, { "epoch": 0.25574280807213395, "grad_norm": 0.23046875, "learning_rate": 0.0011971659197514933, "loss": 0.8521, "step": 9530 }, { "epoch": 0.2557696436238729, "grad_norm": 0.2294921875, "learning_rate": 0.0011971631909201677, "loss": 0.7462, "step": 9531 }, { "epoch": 0.2557964791756118, "grad_norm": 0.22265625, "learning_rate": 0.0011971604607788418, "loss": 0.8034, "step": 9532 }, { "epoch": 0.2558233147273508, "grad_norm": 0.224609375, "learning_rate": 0.001197157729327521, "loss": 0.7512, "step": 9533 }, { "epoch": 0.25585015027908975, "grad_norm": 0.2353515625, "learning_rate": 0.0011971549965662123, "loss": 0.8505, "step": 9534 }, { "epoch": 0.2558769858308287, "grad_norm": 0.25, "learning_rate": 0.0011971522624949211, "loss": 0.8525, "step": 9535 }, { "epoch": 0.2559038213825676, "grad_norm": 0.2236328125, "learning_rate": 0.0011971495271136537, "loss": 0.7437, "step": 9536 }, { "epoch": 0.25593065693430656, "grad_norm": 0.248046875, "learning_rate": 0.001197146790422416, "loss": 0.8182, "step": 9537 }, { "epoch": 0.2559574924860455, "grad_norm": 0.2138671875, "learning_rate": 0.0011971440524212137, "loss": 0.7098, "step": 9538 }, { "epoch": 0.2559843280377845, "grad_norm": 0.236328125, "learning_rate": 0.0011971413131100527, "loss": 0.8123, "step": 9539 }, { "epoch": 0.2560111635895234, "grad_norm": 0.2314453125, "learning_rate": 0.0011971385724889399, "loss": 0.8161, "step": 9540 }, { "epoch": 0.25603799914126235, "grad_norm": 0.2333984375, "learning_rate": 0.0011971358305578805, "loss": 0.7849, "step": 9541 }, { "epoch": 0.2560648346930013, "grad_norm": 0.2392578125, "learning_rate": 0.001197133087316881, "loss": 0.7993, "step": 9542 }, { "epoch": 0.2560916702447402, "grad_norm": 0.2197265625, "learning_rate": 0.001197130342765947, "loss": 0.7506, "step": 9543 }, { "epoch": 0.25611850579647916, "grad_norm": 0.232421875, "learning_rate": 0.0011971275969050845, "loss": 0.8158, "step": 9544 }, { "epoch": 0.2561453413482181, "grad_norm": 0.2314453125, "learning_rate": 0.0011971248497343, "loss": 0.8123, "step": 9545 }, { "epoch": 0.2561721768999571, "grad_norm": 0.2431640625, "learning_rate": 0.0011971221012535992, "loss": 0.7938, "step": 9546 }, { "epoch": 0.256199012451696, "grad_norm": 0.2431640625, "learning_rate": 0.001197119351462988, "loss": 0.8691, "step": 9547 }, { "epoch": 0.25622584800343495, "grad_norm": 0.224609375, "learning_rate": 0.0011971166003624729, "loss": 0.7254, "step": 9548 }, { "epoch": 0.2562526835551739, "grad_norm": 0.2451171875, "learning_rate": 0.0011971138479520596, "loss": 0.802, "step": 9549 }, { "epoch": 0.2562795191069128, "grad_norm": 0.234375, "learning_rate": 0.001197111094231754, "loss": 0.8334, "step": 9550 }, { "epoch": 0.25630635465865176, "grad_norm": 0.2265625, "learning_rate": 0.0011971083392015624, "loss": 0.7248, "step": 9551 }, { "epoch": 0.25633319021039075, "grad_norm": 0.2099609375, "learning_rate": 0.001197105582861491, "loss": 0.7219, "step": 9552 }, { "epoch": 0.2563600257621297, "grad_norm": 0.2080078125, "learning_rate": 0.0011971028252115454, "loss": 0.6998, "step": 9553 }, { "epoch": 0.2563868613138686, "grad_norm": 0.2177734375, "learning_rate": 0.001197100066251732, "loss": 0.7881, "step": 9554 }, { "epoch": 0.25641369686560755, "grad_norm": 0.228515625, "learning_rate": 0.0011970973059820566, "loss": 0.7866, "step": 9555 }, { "epoch": 0.2564405324173465, "grad_norm": 0.224609375, "learning_rate": 0.0011970945444025253, "loss": 0.7916, "step": 9556 }, { "epoch": 0.2564673679690854, "grad_norm": 0.2255859375, "learning_rate": 0.0011970917815131445, "loss": 0.6795, "step": 9557 }, { "epoch": 0.2564942035208244, "grad_norm": 0.2451171875, "learning_rate": 0.0011970890173139198, "loss": 0.8144, "step": 9558 }, { "epoch": 0.25652103907256335, "grad_norm": 0.220703125, "learning_rate": 0.0011970862518048575, "loss": 0.7453, "step": 9559 }, { "epoch": 0.2565478746243023, "grad_norm": 0.232421875, "learning_rate": 0.0011970834849859638, "loss": 0.8503, "step": 9560 }, { "epoch": 0.2565747101760412, "grad_norm": 0.2255859375, "learning_rate": 0.0011970807168572445, "loss": 0.8423, "step": 9561 }, { "epoch": 0.25660154572778016, "grad_norm": 0.2353515625, "learning_rate": 0.0011970779474187057, "loss": 0.794, "step": 9562 }, { "epoch": 0.2566283812795191, "grad_norm": 0.232421875, "learning_rate": 0.0011970751766703536, "loss": 0.7562, "step": 9563 }, { "epoch": 0.256655216831258, "grad_norm": 0.2353515625, "learning_rate": 0.001197072404612194, "loss": 0.7534, "step": 9564 }, { "epoch": 0.256682052382997, "grad_norm": 0.232421875, "learning_rate": 0.0011970696312442334, "loss": 0.8001, "step": 9565 }, { "epoch": 0.25670888793473595, "grad_norm": 0.2392578125, "learning_rate": 0.0011970668565664778, "loss": 0.8406, "step": 9566 }, { "epoch": 0.2567357234864749, "grad_norm": 0.2265625, "learning_rate": 0.0011970640805789331, "loss": 0.7649, "step": 9567 }, { "epoch": 0.2567625590382138, "grad_norm": 0.255859375, "learning_rate": 0.0011970613032816055, "loss": 0.9041, "step": 9568 }, { "epoch": 0.25678939458995276, "grad_norm": 0.23828125, "learning_rate": 0.001197058524674501, "loss": 0.842, "step": 9569 }, { "epoch": 0.2568162301416917, "grad_norm": 0.208984375, "learning_rate": 0.0011970557447576256, "loss": 0.7216, "step": 9570 }, { "epoch": 0.2568430656934307, "grad_norm": 0.2138671875, "learning_rate": 0.0011970529635309858, "loss": 0.7443, "step": 9571 }, { "epoch": 0.2568699012451696, "grad_norm": 0.2353515625, "learning_rate": 0.0011970501809945874, "loss": 0.8082, "step": 9572 }, { "epoch": 0.25689673679690855, "grad_norm": 0.2314453125, "learning_rate": 0.0011970473971484364, "loss": 0.7857, "step": 9573 }, { "epoch": 0.2569235723486475, "grad_norm": 0.220703125, "learning_rate": 0.0011970446119925393, "loss": 0.7728, "step": 9574 }, { "epoch": 0.2569504079003864, "grad_norm": 0.23828125, "learning_rate": 0.0011970418255269019, "loss": 0.7863, "step": 9575 }, { "epoch": 0.25697724345212536, "grad_norm": 0.22265625, "learning_rate": 0.0011970390377515302, "loss": 0.759, "step": 9576 }, { "epoch": 0.2570040790038643, "grad_norm": 0.248046875, "learning_rate": 0.0011970362486664306, "loss": 0.8801, "step": 9577 }, { "epoch": 0.2570309145556033, "grad_norm": 0.1953125, "learning_rate": 0.0011970334582716092, "loss": 0.6156, "step": 9578 }, { "epoch": 0.2570577501073422, "grad_norm": 0.251953125, "learning_rate": 0.0011970306665670721, "loss": 0.9695, "step": 9579 }, { "epoch": 0.25708458565908116, "grad_norm": 0.25390625, "learning_rate": 0.0011970278735528254, "loss": 0.8417, "step": 9580 }, { "epoch": 0.2571114212108201, "grad_norm": 0.2255859375, "learning_rate": 0.001197025079228875, "loss": 0.7977, "step": 9581 }, { "epoch": 0.257138256762559, "grad_norm": 0.234375, "learning_rate": 0.0011970222835952272, "loss": 0.8874, "step": 9582 }, { "epoch": 0.25716509231429796, "grad_norm": 0.232421875, "learning_rate": 0.0011970194866518884, "loss": 0.8534, "step": 9583 }, { "epoch": 0.25719192786603695, "grad_norm": 0.216796875, "learning_rate": 0.0011970166883988641, "loss": 0.745, "step": 9584 }, { "epoch": 0.2572187634177759, "grad_norm": 0.212890625, "learning_rate": 0.0011970138888361612, "loss": 0.7251, "step": 9585 }, { "epoch": 0.2572455989695148, "grad_norm": 0.2294921875, "learning_rate": 0.0011970110879637851, "loss": 0.8081, "step": 9586 }, { "epoch": 0.25727243452125376, "grad_norm": 0.23046875, "learning_rate": 0.0011970082857817427, "loss": 0.782, "step": 9587 }, { "epoch": 0.2572992700729927, "grad_norm": 0.255859375, "learning_rate": 0.0011970054822900398, "loss": 0.9094, "step": 9588 }, { "epoch": 0.2573261056247316, "grad_norm": 0.2265625, "learning_rate": 0.001197002677488682, "loss": 0.8005, "step": 9589 }, { "epoch": 0.25735294117647056, "grad_norm": 0.228515625, "learning_rate": 0.0011969998713776764, "loss": 0.7573, "step": 9590 }, { "epoch": 0.25737977672820955, "grad_norm": 0.2392578125, "learning_rate": 0.001196997063957029, "loss": 0.822, "step": 9591 }, { "epoch": 0.2574066122799485, "grad_norm": 0.2099609375, "learning_rate": 0.001196994255226745, "loss": 0.6566, "step": 9592 }, { "epoch": 0.2574334478316874, "grad_norm": 0.2158203125, "learning_rate": 0.0011969914451868317, "loss": 0.7467, "step": 9593 }, { "epoch": 0.25746028338342636, "grad_norm": 0.2412109375, "learning_rate": 0.0011969886338372945, "loss": 0.7996, "step": 9594 }, { "epoch": 0.2574871189351653, "grad_norm": 0.2294921875, "learning_rate": 0.00119698582117814, "loss": 0.7661, "step": 9595 }, { "epoch": 0.25751395448690423, "grad_norm": 0.2109375, "learning_rate": 0.0011969830072093744, "loss": 0.7226, "step": 9596 }, { "epoch": 0.2575407900386432, "grad_norm": 0.2333984375, "learning_rate": 0.0011969801919310037, "loss": 0.8297, "step": 9597 }, { "epoch": 0.25756762559038215, "grad_norm": 0.21875, "learning_rate": 0.0011969773753430339, "loss": 0.7313, "step": 9598 }, { "epoch": 0.2575944611421211, "grad_norm": 0.2314453125, "learning_rate": 0.0011969745574454715, "loss": 0.7706, "step": 9599 }, { "epoch": 0.25762129669386, "grad_norm": 0.212890625, "learning_rate": 0.0011969717382383223, "loss": 0.7246, "step": 9600 }, { "epoch": 0.25764813224559896, "grad_norm": 0.2314453125, "learning_rate": 0.0011969689177215932, "loss": 0.8073, "step": 9601 }, { "epoch": 0.2576749677973379, "grad_norm": 0.2294921875, "learning_rate": 0.0011969660958952896, "loss": 0.7774, "step": 9602 }, { "epoch": 0.25770180334907683, "grad_norm": 0.2392578125, "learning_rate": 0.001196963272759418, "loss": 0.8998, "step": 9603 }, { "epoch": 0.2577286389008158, "grad_norm": 0.208984375, "learning_rate": 0.0011969604483139848, "loss": 0.6802, "step": 9604 }, { "epoch": 0.25775547445255476, "grad_norm": 0.23046875, "learning_rate": 0.001196957622558996, "loss": 0.7892, "step": 9605 }, { "epoch": 0.2577823100042937, "grad_norm": 0.2294921875, "learning_rate": 0.0011969547954944575, "loss": 0.7374, "step": 9606 }, { "epoch": 0.2578091455560326, "grad_norm": 0.251953125, "learning_rate": 0.001196951967120376, "loss": 0.9593, "step": 9607 }, { "epoch": 0.25783598110777156, "grad_norm": 0.220703125, "learning_rate": 0.0011969491374367576, "loss": 0.7103, "step": 9608 }, { "epoch": 0.2578628166595105, "grad_norm": 0.234375, "learning_rate": 0.0011969463064436083, "loss": 0.843, "step": 9609 }, { "epoch": 0.2578896522112495, "grad_norm": 0.2158203125, "learning_rate": 0.0011969434741409344, "loss": 0.7356, "step": 9610 }, { "epoch": 0.2579164877629884, "grad_norm": 0.25, "learning_rate": 0.0011969406405287423, "loss": 0.838, "step": 9611 }, { "epoch": 0.25794332331472736, "grad_norm": 0.244140625, "learning_rate": 0.001196937805607038, "loss": 0.8395, "step": 9612 }, { "epoch": 0.2579701588664663, "grad_norm": 0.236328125, "learning_rate": 0.0011969349693758276, "loss": 0.7927, "step": 9613 }, { "epoch": 0.2579969944182052, "grad_norm": 0.2294921875, "learning_rate": 0.0011969321318351179, "loss": 0.7461, "step": 9614 }, { "epoch": 0.25802382996994416, "grad_norm": 0.23046875, "learning_rate": 0.0011969292929849143, "loss": 0.8104, "step": 9615 }, { "epoch": 0.25805066552168315, "grad_norm": 0.236328125, "learning_rate": 0.0011969264528252238, "loss": 0.7793, "step": 9616 }, { "epoch": 0.2580775010734221, "grad_norm": 0.224609375, "learning_rate": 0.0011969236113560518, "loss": 0.7634, "step": 9617 }, { "epoch": 0.258104336625161, "grad_norm": 0.24609375, "learning_rate": 0.0011969207685774055, "loss": 0.9075, "step": 9618 }, { "epoch": 0.25813117217689996, "grad_norm": 0.224609375, "learning_rate": 0.0011969179244892905, "loss": 0.7021, "step": 9619 }, { "epoch": 0.2581580077286389, "grad_norm": 0.224609375, "learning_rate": 0.001196915079091713, "loss": 0.6828, "step": 9620 }, { "epoch": 0.25818484328037783, "grad_norm": 0.2431640625, "learning_rate": 0.0011969122323846797, "loss": 0.8905, "step": 9621 }, { "epoch": 0.25821167883211676, "grad_norm": 0.2333984375, "learning_rate": 0.0011969093843681965, "loss": 0.7877, "step": 9622 }, { "epoch": 0.25823851438385576, "grad_norm": 0.2353515625, "learning_rate": 0.0011969065350422696, "loss": 0.811, "step": 9623 }, { "epoch": 0.2582653499355947, "grad_norm": 0.2265625, "learning_rate": 0.0011969036844069053, "loss": 0.823, "step": 9624 }, { "epoch": 0.2582921854873336, "grad_norm": 0.22265625, "learning_rate": 0.0011969008324621102, "loss": 0.7413, "step": 9625 }, { "epoch": 0.25831902103907256, "grad_norm": 0.216796875, "learning_rate": 0.0011968979792078903, "loss": 0.7162, "step": 9626 }, { "epoch": 0.2583458565908115, "grad_norm": 0.224609375, "learning_rate": 0.0011968951246442516, "loss": 0.7943, "step": 9627 }, { "epoch": 0.25837269214255043, "grad_norm": 0.248046875, "learning_rate": 0.0011968922687712006, "loss": 0.9955, "step": 9628 }, { "epoch": 0.2583995276942894, "grad_norm": 0.2353515625, "learning_rate": 0.0011968894115887438, "loss": 0.8516, "step": 9629 }, { "epoch": 0.25842636324602836, "grad_norm": 0.2333984375, "learning_rate": 0.0011968865530968872, "loss": 0.8476, "step": 9630 }, { "epoch": 0.2584531987977673, "grad_norm": 0.2265625, "learning_rate": 0.001196883693295637, "loss": 0.8268, "step": 9631 }, { "epoch": 0.2584800343495062, "grad_norm": 0.2138671875, "learning_rate": 0.0011968808321849995, "loss": 0.6554, "step": 9632 }, { "epoch": 0.25850686990124516, "grad_norm": 0.2421875, "learning_rate": 0.0011968779697649813, "loss": 0.7534, "step": 9633 }, { "epoch": 0.2585337054529841, "grad_norm": 0.23046875, "learning_rate": 0.0011968751060355883, "loss": 0.8349, "step": 9634 }, { "epoch": 0.25856054100472303, "grad_norm": 0.2236328125, "learning_rate": 0.001196872240996827, "loss": 0.7471, "step": 9635 }, { "epoch": 0.258587376556462, "grad_norm": 0.2216796875, "learning_rate": 0.0011968693746487035, "loss": 0.773, "step": 9636 }, { "epoch": 0.25861421210820096, "grad_norm": 0.2294921875, "learning_rate": 0.0011968665069912242, "loss": 0.8362, "step": 9637 }, { "epoch": 0.2586410476599399, "grad_norm": 0.2431640625, "learning_rate": 0.0011968636380243955, "loss": 0.866, "step": 9638 }, { "epoch": 0.25866788321167883, "grad_norm": 0.2275390625, "learning_rate": 0.0011968607677482234, "loss": 0.778, "step": 9639 }, { "epoch": 0.25869471876341776, "grad_norm": 0.236328125, "learning_rate": 0.0011968578961627143, "loss": 0.7929, "step": 9640 }, { "epoch": 0.2587215543151567, "grad_norm": 0.2294921875, "learning_rate": 0.001196855023267875, "loss": 0.7565, "step": 9641 }, { "epoch": 0.2587483898668957, "grad_norm": 0.2138671875, "learning_rate": 0.001196852149063711, "loss": 0.8028, "step": 9642 }, { "epoch": 0.2587752254186346, "grad_norm": 0.2216796875, "learning_rate": 0.001196849273550229, "loss": 0.7892, "step": 9643 }, { "epoch": 0.25880206097037356, "grad_norm": 0.21875, "learning_rate": 0.0011968463967274355, "loss": 0.745, "step": 9644 }, { "epoch": 0.2588288965221125, "grad_norm": 0.2431640625, "learning_rate": 0.0011968435185953365, "loss": 0.768, "step": 9645 }, { "epoch": 0.25885573207385143, "grad_norm": 0.2314453125, "learning_rate": 0.0011968406391539383, "loss": 0.8889, "step": 9646 }, { "epoch": 0.25888256762559037, "grad_norm": 0.22265625, "learning_rate": 0.0011968377584032474, "loss": 0.8207, "step": 9647 }, { "epoch": 0.2589094031773293, "grad_norm": 0.232421875, "learning_rate": 0.0011968348763432702, "loss": 0.8222, "step": 9648 }, { "epoch": 0.2589362387290683, "grad_norm": 0.2412109375, "learning_rate": 0.0011968319929740129, "loss": 0.8059, "step": 9649 }, { "epoch": 0.2589630742808072, "grad_norm": 0.2236328125, "learning_rate": 0.0011968291082954815, "loss": 0.6926, "step": 9650 }, { "epoch": 0.25898990983254616, "grad_norm": 0.22265625, "learning_rate": 0.001196826222307683, "loss": 0.6741, "step": 9651 }, { "epoch": 0.2590167453842851, "grad_norm": 0.2236328125, "learning_rate": 0.001196823335010623, "loss": 0.6983, "step": 9652 }, { "epoch": 0.25904358093602403, "grad_norm": 0.228515625, "learning_rate": 0.0011968204464043084, "loss": 0.8102, "step": 9653 }, { "epoch": 0.25907041648776297, "grad_norm": 0.2333984375, "learning_rate": 0.0011968175564887453, "loss": 0.8015, "step": 9654 }, { "epoch": 0.25909725203950196, "grad_norm": 0.2314453125, "learning_rate": 0.0011968146652639401, "loss": 0.8134, "step": 9655 }, { "epoch": 0.2591240875912409, "grad_norm": 0.2314453125, "learning_rate": 0.0011968117727298988, "loss": 0.7637, "step": 9656 }, { "epoch": 0.2591509231429798, "grad_norm": 0.23046875, "learning_rate": 0.0011968088788866283, "loss": 0.7894, "step": 9657 }, { "epoch": 0.25917775869471876, "grad_norm": 0.2177734375, "learning_rate": 0.0011968059837341347, "loss": 0.7125, "step": 9658 }, { "epoch": 0.2592045942464577, "grad_norm": 0.2392578125, "learning_rate": 0.0011968030872724243, "loss": 0.8455, "step": 9659 }, { "epoch": 0.25923142979819663, "grad_norm": 0.22265625, "learning_rate": 0.0011968001895015037, "loss": 0.7875, "step": 9660 }, { "epoch": 0.25925826534993557, "grad_norm": 0.2294921875, "learning_rate": 0.0011967972904213789, "loss": 0.721, "step": 9661 }, { "epoch": 0.25928510090167456, "grad_norm": 0.220703125, "learning_rate": 0.0011967943900320565, "loss": 0.7129, "step": 9662 }, { "epoch": 0.2593119364534135, "grad_norm": 0.2294921875, "learning_rate": 0.0011967914883335427, "loss": 0.8296, "step": 9663 }, { "epoch": 0.25933877200515243, "grad_norm": 0.2421875, "learning_rate": 0.001196788585325844, "loss": 0.9557, "step": 9664 }, { "epoch": 0.25936560755689136, "grad_norm": 0.224609375, "learning_rate": 0.0011967856810089668, "loss": 0.7891, "step": 9665 }, { "epoch": 0.2593924431086303, "grad_norm": 0.2265625, "learning_rate": 0.0011967827753829171, "loss": 0.7947, "step": 9666 }, { "epoch": 0.25941927866036923, "grad_norm": 0.2490234375, "learning_rate": 0.001196779868447702, "loss": 0.8778, "step": 9667 }, { "epoch": 0.2594461142121082, "grad_norm": 0.224609375, "learning_rate": 0.001196776960203327, "loss": 0.7492, "step": 9668 }, { "epoch": 0.25947294976384716, "grad_norm": 0.23828125, "learning_rate": 0.0011967740506497993, "loss": 0.9066, "step": 9669 }, { "epoch": 0.2594997853155861, "grad_norm": 0.2109375, "learning_rate": 0.0011967711397871247, "loss": 0.7006, "step": 9670 }, { "epoch": 0.25952662086732503, "grad_norm": 0.2197265625, "learning_rate": 0.0011967682276153098, "loss": 0.7884, "step": 9671 }, { "epoch": 0.25955345641906397, "grad_norm": 0.2119140625, "learning_rate": 0.0011967653141343611, "loss": 0.7094, "step": 9672 }, { "epoch": 0.2595802919708029, "grad_norm": 0.234375, "learning_rate": 0.001196762399344285, "loss": 0.6911, "step": 9673 }, { "epoch": 0.25960712752254184, "grad_norm": 0.2197265625, "learning_rate": 0.0011967594832450873, "loss": 0.8017, "step": 9674 }, { "epoch": 0.2596339630742808, "grad_norm": 0.212890625, "learning_rate": 0.0011967565658367752, "loss": 0.704, "step": 9675 }, { "epoch": 0.25966079862601976, "grad_norm": 0.2216796875, "learning_rate": 0.0011967536471193547, "loss": 0.7372, "step": 9676 }, { "epoch": 0.2596876341777587, "grad_norm": 0.216796875, "learning_rate": 0.0011967507270928322, "loss": 0.7142, "step": 9677 }, { "epoch": 0.25971446972949763, "grad_norm": 0.2421875, "learning_rate": 0.0011967478057572142, "loss": 0.8575, "step": 9678 }, { "epoch": 0.25974130528123657, "grad_norm": 0.2236328125, "learning_rate": 0.0011967448831125072, "loss": 0.7486, "step": 9679 }, { "epoch": 0.2597681408329755, "grad_norm": 0.2314453125, "learning_rate": 0.0011967419591587172, "loss": 0.7981, "step": 9680 }, { "epoch": 0.2597949763847145, "grad_norm": 0.23046875, "learning_rate": 0.0011967390338958512, "loss": 0.785, "step": 9681 }, { "epoch": 0.25982181193645343, "grad_norm": 0.212890625, "learning_rate": 0.0011967361073239153, "loss": 0.7365, "step": 9682 }, { "epoch": 0.25984864748819236, "grad_norm": 0.2080078125, "learning_rate": 0.0011967331794429158, "loss": 0.7828, "step": 9683 }, { "epoch": 0.2598754830399313, "grad_norm": 0.2275390625, "learning_rate": 0.0011967302502528592, "loss": 0.8035, "step": 9684 }, { "epoch": 0.25990231859167023, "grad_norm": 0.224609375, "learning_rate": 0.0011967273197537522, "loss": 0.7716, "step": 9685 }, { "epoch": 0.25992915414340917, "grad_norm": 0.20703125, "learning_rate": 0.001196724387945601, "loss": 0.715, "step": 9686 }, { "epoch": 0.25995598969514816, "grad_norm": 0.2138671875, "learning_rate": 0.001196721454828412, "loss": 0.7267, "step": 9687 }, { "epoch": 0.2599828252468871, "grad_norm": 0.21484375, "learning_rate": 0.0011967185204021917, "loss": 0.7282, "step": 9688 }, { "epoch": 0.26000966079862603, "grad_norm": 0.2080078125, "learning_rate": 0.0011967155846669465, "loss": 0.6652, "step": 9689 }, { "epoch": 0.26003649635036497, "grad_norm": 0.228515625, "learning_rate": 0.0011967126476226829, "loss": 0.8233, "step": 9690 }, { "epoch": 0.2600633319021039, "grad_norm": 0.2236328125, "learning_rate": 0.0011967097092694073, "loss": 0.7312, "step": 9691 }, { "epoch": 0.26009016745384284, "grad_norm": 0.2392578125, "learning_rate": 0.0011967067696071261, "loss": 0.8357, "step": 9692 }, { "epoch": 0.26011700300558177, "grad_norm": 0.2197265625, "learning_rate": 0.0011967038286358458, "loss": 0.7216, "step": 9693 }, { "epoch": 0.26014383855732076, "grad_norm": 0.208984375, "learning_rate": 0.001196700886355573, "loss": 0.6577, "step": 9694 }, { "epoch": 0.2601706741090597, "grad_norm": 0.240234375, "learning_rate": 0.0011966979427663138, "loss": 0.8782, "step": 9695 }, { "epoch": 0.26019750966079863, "grad_norm": 0.2216796875, "learning_rate": 0.001196694997868075, "loss": 0.7925, "step": 9696 }, { "epoch": 0.26022434521253757, "grad_norm": 0.21484375, "learning_rate": 0.001196692051660863, "loss": 0.7188, "step": 9697 }, { "epoch": 0.2602511807642765, "grad_norm": 0.2333984375, "learning_rate": 0.001196689104144684, "loss": 0.8328, "step": 9698 }, { "epoch": 0.26027801631601544, "grad_norm": 0.23828125, "learning_rate": 0.0011966861553195445, "loss": 0.8636, "step": 9699 }, { "epoch": 0.2603048518677544, "grad_norm": 0.2333984375, "learning_rate": 0.0011966832051854513, "loss": 0.8293, "step": 9700 }, { "epoch": 0.26033168741949336, "grad_norm": 0.228515625, "learning_rate": 0.0011966802537424109, "loss": 0.8047, "step": 9701 }, { "epoch": 0.2603585229712323, "grad_norm": 0.220703125, "learning_rate": 0.0011966773009904294, "loss": 0.7158, "step": 9702 }, { "epoch": 0.26038535852297123, "grad_norm": 0.228515625, "learning_rate": 0.0011966743469295133, "loss": 0.7607, "step": 9703 }, { "epoch": 0.26041219407471017, "grad_norm": 0.224609375, "learning_rate": 0.0011966713915596695, "loss": 0.7233, "step": 9704 }, { "epoch": 0.2604390296264491, "grad_norm": 0.2392578125, "learning_rate": 0.001196668434880904, "loss": 0.8218, "step": 9705 }, { "epoch": 0.26046586517818804, "grad_norm": 0.2431640625, "learning_rate": 0.0011966654768932237, "loss": 0.8356, "step": 9706 }, { "epoch": 0.26049270072992703, "grad_norm": 0.2265625, "learning_rate": 0.0011966625175966349, "loss": 0.7496, "step": 9707 }, { "epoch": 0.26051953628166596, "grad_norm": 0.2470703125, "learning_rate": 0.0011966595569911438, "loss": 0.8276, "step": 9708 }, { "epoch": 0.2605463718334049, "grad_norm": 0.2275390625, "learning_rate": 0.0011966565950767573, "loss": 0.7316, "step": 9709 }, { "epoch": 0.26057320738514383, "grad_norm": 0.244140625, "learning_rate": 0.0011966536318534819, "loss": 0.8628, "step": 9710 }, { "epoch": 0.26060004293688277, "grad_norm": 0.2333984375, "learning_rate": 0.001196650667321324, "loss": 0.806, "step": 9711 }, { "epoch": 0.2606268784886217, "grad_norm": 0.2470703125, "learning_rate": 0.0011966477014802898, "loss": 0.8705, "step": 9712 }, { "epoch": 0.2606537140403607, "grad_norm": 0.341796875, "learning_rate": 0.0011966447343303862, "loss": 0.8772, "step": 9713 }, { "epoch": 0.26068054959209963, "grad_norm": 0.310546875, "learning_rate": 0.0011966417658716196, "loss": 0.7975, "step": 9714 }, { "epoch": 0.26070738514383857, "grad_norm": 0.263671875, "learning_rate": 0.0011966387961039964, "loss": 0.8392, "step": 9715 }, { "epoch": 0.2607342206955775, "grad_norm": 0.306640625, "learning_rate": 0.0011966358250275232, "loss": 0.7572, "step": 9716 }, { "epoch": 0.26076105624731644, "grad_norm": 0.353515625, "learning_rate": 0.0011966328526422068, "loss": 0.8847, "step": 9717 }, { "epoch": 0.26078789179905537, "grad_norm": 0.296875, "learning_rate": 0.0011966298789480533, "loss": 0.7829, "step": 9718 }, { "epoch": 0.2608147273507943, "grad_norm": 0.2421875, "learning_rate": 0.0011966269039450693, "loss": 0.8783, "step": 9719 }, { "epoch": 0.2608415629025333, "grad_norm": 0.2451171875, "learning_rate": 0.0011966239276332616, "loss": 0.8183, "step": 9720 }, { "epoch": 0.26086839845427223, "grad_norm": 0.265625, "learning_rate": 0.0011966209500126365, "loss": 0.8455, "step": 9721 }, { "epoch": 0.26089523400601117, "grad_norm": 0.2451171875, "learning_rate": 0.0011966179710832003, "loss": 0.8258, "step": 9722 }, { "epoch": 0.2609220695577501, "grad_norm": 0.26953125, "learning_rate": 0.00119661499084496, "loss": 0.8906, "step": 9723 }, { "epoch": 0.26094890510948904, "grad_norm": 0.259765625, "learning_rate": 0.001196612009297922, "loss": 0.8621, "step": 9724 }, { "epoch": 0.260975740661228, "grad_norm": 0.2255859375, "learning_rate": 0.0011966090264420926, "loss": 0.7819, "step": 9725 }, { "epoch": 0.26100257621296696, "grad_norm": 0.2333984375, "learning_rate": 0.0011966060422774786, "loss": 0.807, "step": 9726 }, { "epoch": 0.2610294117647059, "grad_norm": 0.2451171875, "learning_rate": 0.0011966030568040865, "loss": 0.8311, "step": 9727 }, { "epoch": 0.26105624731644483, "grad_norm": 0.248046875, "learning_rate": 0.0011966000700219229, "loss": 0.764, "step": 9728 }, { "epoch": 0.26108308286818377, "grad_norm": 0.255859375, "learning_rate": 0.001196597081930994, "loss": 0.8859, "step": 9729 }, { "epoch": 0.2611099184199227, "grad_norm": 0.23046875, "learning_rate": 0.0011965940925313069, "loss": 0.8064, "step": 9730 }, { "epoch": 0.26113675397166164, "grad_norm": 0.22265625, "learning_rate": 0.0011965911018228677, "loss": 0.7746, "step": 9731 }, { "epoch": 0.2611635895234006, "grad_norm": 0.2294921875, "learning_rate": 0.0011965881098056834, "loss": 0.8199, "step": 9732 }, { "epoch": 0.26119042507513957, "grad_norm": 0.236328125, "learning_rate": 0.00119658511647976, "loss": 0.879, "step": 9733 }, { "epoch": 0.2612172606268785, "grad_norm": 0.2421875, "learning_rate": 0.0011965821218451046, "loss": 0.846, "step": 9734 }, { "epoch": 0.26124409617861744, "grad_norm": 0.2255859375, "learning_rate": 0.0011965791259017232, "loss": 0.7804, "step": 9735 }, { "epoch": 0.26127093173035637, "grad_norm": 0.224609375, "learning_rate": 0.001196576128649623, "loss": 0.7709, "step": 9736 }, { "epoch": 0.2612977672820953, "grad_norm": 0.2412109375, "learning_rate": 0.0011965731300888104, "loss": 0.8742, "step": 9737 }, { "epoch": 0.26132460283383424, "grad_norm": 0.248046875, "learning_rate": 0.0011965701302192916, "loss": 0.8952, "step": 9738 }, { "epoch": 0.26135143838557323, "grad_norm": 0.2080078125, "learning_rate": 0.0011965671290410736, "loss": 0.6866, "step": 9739 }, { "epoch": 0.26137827393731217, "grad_norm": 0.2265625, "learning_rate": 0.0011965641265541628, "loss": 0.7574, "step": 9740 }, { "epoch": 0.2614051094890511, "grad_norm": 0.23046875, "learning_rate": 0.0011965611227585657, "loss": 0.8414, "step": 9741 }, { "epoch": 0.26143194504079004, "grad_norm": 0.220703125, "learning_rate": 0.0011965581176542892, "loss": 0.7701, "step": 9742 }, { "epoch": 0.26145878059252897, "grad_norm": 0.228515625, "learning_rate": 0.0011965551112413397, "loss": 0.8097, "step": 9743 }, { "epoch": 0.2614856161442679, "grad_norm": 0.2275390625, "learning_rate": 0.0011965521035197236, "loss": 0.8773, "step": 9744 }, { "epoch": 0.26151245169600684, "grad_norm": 0.2353515625, "learning_rate": 0.0011965490944894478, "loss": 0.8336, "step": 9745 }, { "epoch": 0.26153928724774583, "grad_norm": 0.21875, "learning_rate": 0.0011965460841505187, "loss": 0.7533, "step": 9746 }, { "epoch": 0.26156612279948477, "grad_norm": 0.2421875, "learning_rate": 0.001196543072502943, "loss": 0.8835, "step": 9747 }, { "epoch": 0.2615929583512237, "grad_norm": 0.234375, "learning_rate": 0.0011965400595467276, "loss": 0.8596, "step": 9748 }, { "epoch": 0.26161979390296264, "grad_norm": 0.2333984375, "learning_rate": 0.0011965370452818784, "loss": 0.7733, "step": 9749 }, { "epoch": 0.2616466294547016, "grad_norm": 0.2197265625, "learning_rate": 0.0011965340297084028, "loss": 0.7529, "step": 9750 }, { "epoch": 0.2616734650064405, "grad_norm": 0.2177734375, "learning_rate": 0.0011965310128263069, "loss": 0.6989, "step": 9751 }, { "epoch": 0.2617003005581795, "grad_norm": 0.228515625, "learning_rate": 0.0011965279946355974, "loss": 0.782, "step": 9752 }, { "epoch": 0.26172713610991843, "grad_norm": 0.21875, "learning_rate": 0.001196524975136281, "loss": 0.7288, "step": 9753 }, { "epoch": 0.26175397166165737, "grad_norm": 0.212890625, "learning_rate": 0.0011965219543283642, "loss": 0.7298, "step": 9754 }, { "epoch": 0.2617808072133963, "grad_norm": 0.25, "learning_rate": 0.001196518932211854, "loss": 0.8594, "step": 9755 }, { "epoch": 0.26180764276513524, "grad_norm": 0.2177734375, "learning_rate": 0.0011965159087867566, "loss": 0.7501, "step": 9756 }, { "epoch": 0.2618344783168742, "grad_norm": 0.2265625, "learning_rate": 0.0011965128840530788, "loss": 0.8123, "step": 9757 }, { "epoch": 0.26186131386861317, "grad_norm": 0.2373046875, "learning_rate": 0.0011965098580108272, "loss": 0.8601, "step": 9758 }, { "epoch": 0.2618881494203521, "grad_norm": 0.2333984375, "learning_rate": 0.0011965068306600086, "loss": 0.7794, "step": 9759 }, { "epoch": 0.26191498497209104, "grad_norm": 0.228515625, "learning_rate": 0.0011965038020006293, "loss": 0.8014, "step": 9760 }, { "epoch": 0.26194182052382997, "grad_norm": 0.205078125, "learning_rate": 0.0011965007720326963, "loss": 0.6825, "step": 9761 }, { "epoch": 0.2619686560755689, "grad_norm": 0.23046875, "learning_rate": 0.001196497740756216, "loss": 0.8216, "step": 9762 }, { "epoch": 0.26199549162730784, "grad_norm": 0.2333984375, "learning_rate": 0.0011964947081711955, "loss": 0.7881, "step": 9763 }, { "epoch": 0.2620223271790468, "grad_norm": 0.2373046875, "learning_rate": 0.0011964916742776407, "loss": 0.8187, "step": 9764 }, { "epoch": 0.26204916273078577, "grad_norm": 0.228515625, "learning_rate": 0.0011964886390755586, "loss": 0.8085, "step": 9765 }, { "epoch": 0.2620759982825247, "grad_norm": 0.234375, "learning_rate": 0.0011964856025649562, "loss": 0.8271, "step": 9766 }, { "epoch": 0.26210283383426364, "grad_norm": 0.2333984375, "learning_rate": 0.0011964825647458397, "loss": 0.8039, "step": 9767 }, { "epoch": 0.2621296693860026, "grad_norm": 0.2412109375, "learning_rate": 0.0011964795256182161, "loss": 0.8525, "step": 9768 }, { "epoch": 0.2621565049377415, "grad_norm": 0.21484375, "learning_rate": 0.001196476485182092, "loss": 0.7727, "step": 9769 }, { "epoch": 0.26218334048948044, "grad_norm": 0.205078125, "learning_rate": 0.0011964734434374736, "loss": 0.7218, "step": 9770 }, { "epoch": 0.26221017604121943, "grad_norm": 0.22265625, "learning_rate": 0.001196470400384368, "loss": 0.7329, "step": 9771 }, { "epoch": 0.26223701159295837, "grad_norm": 0.23828125, "learning_rate": 0.001196467356022782, "loss": 0.8798, "step": 9772 }, { "epoch": 0.2622638471446973, "grad_norm": 0.2158203125, "learning_rate": 0.0011964643103527222, "loss": 0.7866, "step": 9773 }, { "epoch": 0.26229068269643624, "grad_norm": 0.228515625, "learning_rate": 0.001196461263374195, "loss": 0.8012, "step": 9774 }, { "epoch": 0.2623175182481752, "grad_norm": 0.212890625, "learning_rate": 0.0011964582150872074, "loss": 0.7426, "step": 9775 }, { "epoch": 0.2623443537999141, "grad_norm": 0.2333984375, "learning_rate": 0.0011964551654917658, "loss": 0.8024, "step": 9776 }, { "epoch": 0.26237118935165304, "grad_norm": 0.216796875, "learning_rate": 0.0011964521145878771, "loss": 0.7602, "step": 9777 }, { "epoch": 0.26239802490339204, "grad_norm": 0.228515625, "learning_rate": 0.0011964490623755481, "loss": 0.8137, "step": 9778 }, { "epoch": 0.26242486045513097, "grad_norm": 0.2197265625, "learning_rate": 0.0011964460088547853, "loss": 0.6912, "step": 9779 }, { "epoch": 0.2624516960068699, "grad_norm": 0.220703125, "learning_rate": 0.001196442954025595, "loss": 0.8057, "step": 9780 }, { "epoch": 0.26247853155860884, "grad_norm": 0.205078125, "learning_rate": 0.001196439897887985, "loss": 0.6875, "step": 9781 }, { "epoch": 0.2625053671103478, "grad_norm": 0.2265625, "learning_rate": 0.0011964368404419608, "loss": 0.8044, "step": 9782 }, { "epoch": 0.2625322026620867, "grad_norm": 0.2392578125, "learning_rate": 0.00119643378168753, "loss": 0.8543, "step": 9783 }, { "epoch": 0.2625590382138257, "grad_norm": 0.234375, "learning_rate": 0.0011964307216246986, "loss": 0.8116, "step": 9784 }, { "epoch": 0.26258587376556464, "grad_norm": 0.232421875, "learning_rate": 0.0011964276602534742, "loss": 0.8017, "step": 9785 }, { "epoch": 0.26261270931730357, "grad_norm": 0.23046875, "learning_rate": 0.0011964245975738625, "loss": 0.8104, "step": 9786 }, { "epoch": 0.2626395448690425, "grad_norm": 0.2265625, "learning_rate": 0.0011964215335858708, "loss": 0.7467, "step": 9787 }, { "epoch": 0.26266638042078144, "grad_norm": 0.232421875, "learning_rate": 0.0011964184682895058, "loss": 0.769, "step": 9788 }, { "epoch": 0.2626932159725204, "grad_norm": 0.2197265625, "learning_rate": 0.0011964154016847741, "loss": 0.7336, "step": 9789 }, { "epoch": 0.2627200515242593, "grad_norm": 0.1953125, "learning_rate": 0.0011964123337716826, "loss": 0.6207, "step": 9790 }, { "epoch": 0.2627468870759983, "grad_norm": 0.2119140625, "learning_rate": 0.001196409264550238, "loss": 0.726, "step": 9791 }, { "epoch": 0.26277372262773724, "grad_norm": 0.20703125, "learning_rate": 0.0011964061940204467, "loss": 0.6803, "step": 9792 }, { "epoch": 0.2628005581794762, "grad_norm": 0.212890625, "learning_rate": 0.0011964031221823157, "loss": 0.7565, "step": 9793 }, { "epoch": 0.2628273937312151, "grad_norm": 0.2177734375, "learning_rate": 0.0011964000490358516, "loss": 0.7559, "step": 9794 }, { "epoch": 0.26285422928295404, "grad_norm": 0.2265625, "learning_rate": 0.0011963969745810616, "loss": 0.7779, "step": 9795 }, { "epoch": 0.262881064834693, "grad_norm": 0.24609375, "learning_rate": 0.001196393898817952, "loss": 0.8065, "step": 9796 }, { "epoch": 0.26290790038643197, "grad_norm": 0.21484375, "learning_rate": 0.0011963908217465294, "loss": 0.7213, "step": 9797 }, { "epoch": 0.2629347359381709, "grad_norm": 0.2255859375, "learning_rate": 0.001196387743366801, "loss": 0.7324, "step": 9798 }, { "epoch": 0.26296157148990984, "grad_norm": 0.23046875, "learning_rate": 0.0011963846636787734, "loss": 0.842, "step": 9799 }, { "epoch": 0.2629884070416488, "grad_norm": 0.2314453125, "learning_rate": 0.0011963815826824533, "loss": 0.7526, "step": 9800 }, { "epoch": 0.2630152425933877, "grad_norm": 0.2314453125, "learning_rate": 0.0011963785003778473, "loss": 0.6448, "step": 9801 }, { "epoch": 0.26304207814512665, "grad_norm": 0.2353515625, "learning_rate": 0.0011963754167649623, "loss": 0.8257, "step": 9802 }, { "epoch": 0.2630689136968656, "grad_norm": 0.22265625, "learning_rate": 0.0011963723318438053, "loss": 0.6932, "step": 9803 }, { "epoch": 0.26309574924860457, "grad_norm": 0.2353515625, "learning_rate": 0.0011963692456143826, "loss": 0.7991, "step": 9804 }, { "epoch": 0.2631225848003435, "grad_norm": 0.236328125, "learning_rate": 0.0011963661580767016, "loss": 0.7622, "step": 9805 }, { "epoch": 0.26314942035208244, "grad_norm": 0.20703125, "learning_rate": 0.0011963630692307685, "loss": 0.7182, "step": 9806 }, { "epoch": 0.2631762559038214, "grad_norm": 0.2255859375, "learning_rate": 0.0011963599790765901, "loss": 0.7953, "step": 9807 }, { "epoch": 0.2632030914555603, "grad_norm": 0.220703125, "learning_rate": 0.0011963568876141735, "loss": 0.7113, "step": 9808 }, { "epoch": 0.26322992700729925, "grad_norm": 0.2412109375, "learning_rate": 0.0011963537948435255, "loss": 0.7761, "step": 9809 }, { "epoch": 0.26325676255903824, "grad_norm": 0.2333984375, "learning_rate": 0.0011963507007646526, "loss": 0.8581, "step": 9810 }, { "epoch": 0.2632835981107772, "grad_norm": 0.224609375, "learning_rate": 0.0011963476053775618, "loss": 0.8249, "step": 9811 }, { "epoch": 0.2633104336625161, "grad_norm": 0.224609375, "learning_rate": 0.0011963445086822597, "loss": 0.7539, "step": 9812 }, { "epoch": 0.26333726921425504, "grad_norm": 0.2265625, "learning_rate": 0.0011963414106787532, "loss": 0.7522, "step": 9813 }, { "epoch": 0.263364104765994, "grad_norm": 0.2314453125, "learning_rate": 0.001196338311367049, "loss": 0.7819, "step": 9814 }, { "epoch": 0.2633909403177329, "grad_norm": 0.232421875, "learning_rate": 0.001196335210747154, "loss": 0.7622, "step": 9815 }, { "epoch": 0.2634177758694719, "grad_norm": 0.21875, "learning_rate": 0.001196332108819075, "loss": 0.6845, "step": 9816 }, { "epoch": 0.26344461142121084, "grad_norm": 0.2373046875, "learning_rate": 0.001196329005582819, "loss": 0.8231, "step": 9817 }, { "epoch": 0.2634714469729498, "grad_norm": 0.2333984375, "learning_rate": 0.0011963259010383926, "loss": 0.7962, "step": 9818 }, { "epoch": 0.2634982825246887, "grad_norm": 0.2392578125, "learning_rate": 0.0011963227951858025, "loss": 0.8382, "step": 9819 }, { "epoch": 0.26352511807642764, "grad_norm": 0.2421875, "learning_rate": 0.0011963196880250555, "loss": 0.8797, "step": 9820 }, { "epoch": 0.2635519536281666, "grad_norm": 0.2314453125, "learning_rate": 0.0011963165795561587, "loss": 0.8623, "step": 9821 }, { "epoch": 0.2635787891799055, "grad_norm": 0.22265625, "learning_rate": 0.0011963134697791189, "loss": 0.7797, "step": 9822 }, { "epoch": 0.2636056247316445, "grad_norm": 0.244140625, "learning_rate": 0.0011963103586939425, "loss": 0.7294, "step": 9823 }, { "epoch": 0.26363246028338344, "grad_norm": 0.2451171875, "learning_rate": 0.0011963072463006368, "loss": 0.8556, "step": 9824 }, { "epoch": 0.2636592958351224, "grad_norm": 0.23828125, "learning_rate": 0.0011963041325992083, "loss": 0.7983, "step": 9825 }, { "epoch": 0.2636861313868613, "grad_norm": 0.2373046875, "learning_rate": 0.001196301017589664, "loss": 0.8289, "step": 9826 }, { "epoch": 0.26371296693860025, "grad_norm": 0.2265625, "learning_rate": 0.0011962979012720108, "loss": 0.7672, "step": 9827 }, { "epoch": 0.2637398024903392, "grad_norm": 0.2314453125, "learning_rate": 0.0011962947836462554, "loss": 0.757, "step": 9828 }, { "epoch": 0.26376663804207817, "grad_norm": 0.24609375, "learning_rate": 0.0011962916647124046, "loss": 0.8283, "step": 9829 }, { "epoch": 0.2637934735938171, "grad_norm": 0.2216796875, "learning_rate": 0.0011962885444704652, "loss": 0.7108, "step": 9830 }, { "epoch": 0.26382030914555604, "grad_norm": 0.236328125, "learning_rate": 0.0011962854229204444, "loss": 0.8271, "step": 9831 }, { "epoch": 0.263847144697295, "grad_norm": 0.2265625, "learning_rate": 0.0011962823000623488, "loss": 0.6803, "step": 9832 }, { "epoch": 0.2638739802490339, "grad_norm": 0.2216796875, "learning_rate": 0.0011962791758961852, "loss": 0.7625, "step": 9833 }, { "epoch": 0.26390081580077285, "grad_norm": 0.236328125, "learning_rate": 0.0011962760504219605, "loss": 0.8365, "step": 9834 }, { "epoch": 0.2639276513525118, "grad_norm": 0.21875, "learning_rate": 0.0011962729236396816, "loss": 0.7446, "step": 9835 }, { "epoch": 0.2639544869042508, "grad_norm": 0.2431640625, "learning_rate": 0.0011962697955493554, "loss": 0.8331, "step": 9836 }, { "epoch": 0.2639813224559897, "grad_norm": 0.259765625, "learning_rate": 0.0011962666661509886, "loss": 0.8508, "step": 9837 }, { "epoch": 0.26400815800772864, "grad_norm": 0.234375, "learning_rate": 0.0011962635354445881, "loss": 0.8389, "step": 9838 }, { "epoch": 0.2640349935594676, "grad_norm": 0.2314453125, "learning_rate": 0.001196260403430161, "loss": 0.8236, "step": 9839 }, { "epoch": 0.2640618291112065, "grad_norm": 0.2265625, "learning_rate": 0.0011962572701077138, "loss": 0.8072, "step": 9840 }, { "epoch": 0.26408866466294545, "grad_norm": 0.228515625, "learning_rate": 0.0011962541354772539, "loss": 0.7961, "step": 9841 }, { "epoch": 0.26411550021468444, "grad_norm": 0.2392578125, "learning_rate": 0.0011962509995387875, "loss": 0.8655, "step": 9842 }, { "epoch": 0.2641423357664234, "grad_norm": 0.2333984375, "learning_rate": 0.001196247862292322, "loss": 0.77, "step": 9843 }, { "epoch": 0.2641691713181623, "grad_norm": 0.2314453125, "learning_rate": 0.001196244723737864, "loss": 0.8424, "step": 9844 }, { "epoch": 0.26419600686990125, "grad_norm": 0.232421875, "learning_rate": 0.0011962415838754207, "loss": 0.7772, "step": 9845 }, { "epoch": 0.2642228424216402, "grad_norm": 0.23046875, "learning_rate": 0.0011962384427049986, "loss": 0.7963, "step": 9846 }, { "epoch": 0.2642496779733791, "grad_norm": 0.23046875, "learning_rate": 0.0011962353002266048, "loss": 0.8835, "step": 9847 }, { "epoch": 0.26427651352511805, "grad_norm": 0.2177734375, "learning_rate": 0.0011962321564402462, "loss": 0.7241, "step": 9848 }, { "epoch": 0.26430334907685704, "grad_norm": 0.2080078125, "learning_rate": 0.0011962290113459294, "loss": 0.7052, "step": 9849 }, { "epoch": 0.264330184628596, "grad_norm": 0.224609375, "learning_rate": 0.0011962258649436618, "loss": 0.834, "step": 9850 }, { "epoch": 0.2643570201803349, "grad_norm": 0.22265625, "learning_rate": 0.0011962227172334502, "loss": 0.7135, "step": 9851 }, { "epoch": 0.26438385573207385, "grad_norm": 0.21484375, "learning_rate": 0.0011962195682153013, "loss": 0.7217, "step": 9852 }, { "epoch": 0.2644106912838128, "grad_norm": 0.2197265625, "learning_rate": 0.0011962164178892219, "loss": 0.756, "step": 9853 }, { "epoch": 0.2644375268355517, "grad_norm": 0.236328125, "learning_rate": 0.0011962132662552193, "loss": 0.7911, "step": 9854 }, { "epoch": 0.2644643623872907, "grad_norm": 0.224609375, "learning_rate": 0.0011962101133133, "loss": 0.7881, "step": 9855 }, { "epoch": 0.26449119793902964, "grad_norm": 0.2373046875, "learning_rate": 0.0011962069590634712, "loss": 0.7664, "step": 9856 }, { "epoch": 0.2645180334907686, "grad_norm": 0.236328125, "learning_rate": 0.0011962038035057397, "loss": 0.7527, "step": 9857 }, { "epoch": 0.2645448690425075, "grad_norm": 0.220703125, "learning_rate": 0.0011962006466401127, "loss": 0.8203, "step": 9858 }, { "epoch": 0.26457170459424645, "grad_norm": 0.2578125, "learning_rate": 0.0011961974884665966, "loss": 0.8107, "step": 9859 }, { "epoch": 0.2645985401459854, "grad_norm": 0.234375, "learning_rate": 0.001196194328985199, "loss": 0.7856, "step": 9860 }, { "epoch": 0.2646253756977243, "grad_norm": 0.2177734375, "learning_rate": 0.0011961911681959258, "loss": 0.7301, "step": 9861 }, { "epoch": 0.2646522112494633, "grad_norm": 0.2265625, "learning_rate": 0.001196188006098785, "loss": 0.7107, "step": 9862 }, { "epoch": 0.26467904680120224, "grad_norm": 0.2197265625, "learning_rate": 0.0011961848426937831, "loss": 0.783, "step": 9863 }, { "epoch": 0.2647058823529412, "grad_norm": 0.2353515625, "learning_rate": 0.001196181677980927, "loss": 0.811, "step": 9864 }, { "epoch": 0.2647327179046801, "grad_norm": 0.232421875, "learning_rate": 0.0011961785119602238, "loss": 0.8089, "step": 9865 }, { "epoch": 0.26475955345641905, "grad_norm": 0.234375, "learning_rate": 0.0011961753446316802, "loss": 0.7852, "step": 9866 }, { "epoch": 0.264786389008158, "grad_norm": 0.251953125, "learning_rate": 0.0011961721759953033, "loss": 0.9587, "step": 9867 }, { "epoch": 0.264813224559897, "grad_norm": 0.21875, "learning_rate": 0.0011961690060511001, "loss": 0.8105, "step": 9868 }, { "epoch": 0.2648400601116359, "grad_norm": 0.224609375, "learning_rate": 0.0011961658347990776, "loss": 0.7807, "step": 9869 }, { "epoch": 0.26486689566337485, "grad_norm": 0.232421875, "learning_rate": 0.0011961626622392424, "loss": 0.9011, "step": 9870 }, { "epoch": 0.2648937312151138, "grad_norm": 0.2392578125, "learning_rate": 0.001196159488371602, "loss": 0.8887, "step": 9871 }, { "epoch": 0.2649205667668527, "grad_norm": 0.234375, "learning_rate": 0.0011961563131961629, "loss": 0.8205, "step": 9872 }, { "epoch": 0.26494740231859165, "grad_norm": 0.201171875, "learning_rate": 0.0011961531367129324, "loss": 0.7013, "step": 9873 }, { "epoch": 0.2649742378703306, "grad_norm": 0.2265625, "learning_rate": 0.001196149958921917, "loss": 0.8021, "step": 9874 }, { "epoch": 0.2650010734220696, "grad_norm": 0.22265625, "learning_rate": 0.0011961467798231242, "loss": 0.7516, "step": 9875 }, { "epoch": 0.2650279089738085, "grad_norm": 0.2080078125, "learning_rate": 0.0011961435994165606, "loss": 0.724, "step": 9876 }, { "epoch": 0.26505474452554745, "grad_norm": 0.2373046875, "learning_rate": 0.0011961404177022335, "loss": 0.8105, "step": 9877 }, { "epoch": 0.2650815800772864, "grad_norm": 0.228515625, "learning_rate": 0.0011961372346801498, "loss": 0.7803, "step": 9878 }, { "epoch": 0.2651084156290253, "grad_norm": 0.244140625, "learning_rate": 0.001196134050350316, "loss": 0.8237, "step": 9879 }, { "epoch": 0.26513525118076425, "grad_norm": 0.212890625, "learning_rate": 0.0011961308647127399, "loss": 0.7143, "step": 9880 }, { "epoch": 0.26516208673250324, "grad_norm": 0.228515625, "learning_rate": 0.0011961276777674279, "loss": 0.7196, "step": 9881 }, { "epoch": 0.2651889222842422, "grad_norm": 0.2255859375, "learning_rate": 0.0011961244895143872, "loss": 0.7855, "step": 9882 }, { "epoch": 0.2652157578359811, "grad_norm": 0.248046875, "learning_rate": 0.0011961212999536245, "loss": 0.8455, "step": 9883 }, { "epoch": 0.26524259338772005, "grad_norm": 0.23046875, "learning_rate": 0.0011961181090851473, "loss": 0.8108, "step": 9884 }, { "epoch": 0.265269428939459, "grad_norm": 0.2412109375, "learning_rate": 0.0011961149169089625, "loss": 0.9095, "step": 9885 }, { "epoch": 0.2652962644911979, "grad_norm": 0.251953125, "learning_rate": 0.0011961117234250765, "loss": 0.8794, "step": 9886 }, { "epoch": 0.2653231000429369, "grad_norm": 0.2265625, "learning_rate": 0.001196108528633497, "loss": 0.7705, "step": 9887 }, { "epoch": 0.26534993559467585, "grad_norm": 0.240234375, "learning_rate": 0.0011961053325342309, "loss": 0.889, "step": 9888 }, { "epoch": 0.2653767711464148, "grad_norm": 0.232421875, "learning_rate": 0.0011961021351272851, "loss": 0.7606, "step": 9889 }, { "epoch": 0.2654036066981537, "grad_norm": 0.234375, "learning_rate": 0.0011960989364126662, "loss": 0.7863, "step": 9890 }, { "epoch": 0.26543044224989265, "grad_norm": 0.2373046875, "learning_rate": 0.001196095736390382, "loss": 0.865, "step": 9891 }, { "epoch": 0.2654572778016316, "grad_norm": 0.2294921875, "learning_rate": 0.0011960925350604388, "loss": 0.789, "step": 9892 }, { "epoch": 0.2654841133533705, "grad_norm": 0.2138671875, "learning_rate": 0.0011960893324228442, "loss": 0.8074, "step": 9893 }, { "epoch": 0.2655109489051095, "grad_norm": 0.232421875, "learning_rate": 0.001196086128477605, "loss": 0.8787, "step": 9894 }, { "epoch": 0.26553778445684845, "grad_norm": 0.2490234375, "learning_rate": 0.001196082923224728, "loss": 0.9248, "step": 9895 }, { "epoch": 0.2655646200085874, "grad_norm": 0.2255859375, "learning_rate": 0.0011960797166642207, "loss": 0.7683, "step": 9896 }, { "epoch": 0.2655914555603263, "grad_norm": 0.251953125, "learning_rate": 0.0011960765087960896, "loss": 0.7775, "step": 9897 }, { "epoch": 0.26561829111206525, "grad_norm": 0.2216796875, "learning_rate": 0.0011960732996203422, "loss": 0.7908, "step": 9898 }, { "epoch": 0.2656451266638042, "grad_norm": 0.21875, "learning_rate": 0.0011960700891369852, "loss": 0.7312, "step": 9899 }, { "epoch": 0.2656719622155432, "grad_norm": 0.2158203125, "learning_rate": 0.0011960668773460259, "loss": 0.7498, "step": 9900 }, { "epoch": 0.2656987977672821, "grad_norm": 0.23828125, "learning_rate": 0.0011960636642474713, "loss": 0.8478, "step": 9901 }, { "epoch": 0.26572563331902105, "grad_norm": 0.2177734375, "learning_rate": 0.0011960604498413283, "loss": 0.7186, "step": 9902 }, { "epoch": 0.26575246887076, "grad_norm": 0.220703125, "learning_rate": 0.001196057234127604, "loss": 0.7501, "step": 9903 }, { "epoch": 0.2657793044224989, "grad_norm": 0.2392578125, "learning_rate": 0.0011960540171063055, "loss": 0.8991, "step": 9904 }, { "epoch": 0.26580613997423785, "grad_norm": 0.22265625, "learning_rate": 0.0011960507987774397, "loss": 0.7627, "step": 9905 }, { "epoch": 0.2658329755259768, "grad_norm": 0.2353515625, "learning_rate": 0.001196047579141014, "loss": 0.8832, "step": 9906 }, { "epoch": 0.2658598110777158, "grad_norm": 0.20703125, "learning_rate": 0.0011960443581970352, "loss": 0.6581, "step": 9907 }, { "epoch": 0.2658866466294547, "grad_norm": 0.2421875, "learning_rate": 0.0011960411359455106, "loss": 0.9195, "step": 9908 }, { "epoch": 0.26591348218119365, "grad_norm": 0.2255859375, "learning_rate": 0.001196037912386447, "loss": 0.792, "step": 9909 }, { "epoch": 0.2659403177329326, "grad_norm": 0.23046875, "learning_rate": 0.0011960346875198517, "loss": 0.7849, "step": 9910 }, { "epoch": 0.2659671532846715, "grad_norm": 0.2373046875, "learning_rate": 0.0011960314613457313, "loss": 0.8718, "step": 9911 }, { "epoch": 0.26599398883641046, "grad_norm": 0.2265625, "learning_rate": 0.0011960282338640937, "loss": 0.786, "step": 9912 }, { "epoch": 0.26602082438814945, "grad_norm": 0.2177734375, "learning_rate": 0.001196025005074945, "loss": 0.8031, "step": 9913 }, { "epoch": 0.2660476599398884, "grad_norm": 0.2392578125, "learning_rate": 0.0011960217749782931, "loss": 0.835, "step": 9914 }, { "epoch": 0.2660744954916273, "grad_norm": 0.208984375, "learning_rate": 0.0011960185435741448, "loss": 0.7325, "step": 9915 }, { "epoch": 0.26610133104336625, "grad_norm": 0.224609375, "learning_rate": 0.001196015310862507, "loss": 0.7629, "step": 9916 }, { "epoch": 0.2661281665951052, "grad_norm": 0.236328125, "learning_rate": 0.0011960120768433871, "loss": 0.851, "step": 9917 }, { "epoch": 0.2661550021468441, "grad_norm": 0.224609375, "learning_rate": 0.0011960088415167922, "loss": 0.8449, "step": 9918 }, { "epoch": 0.26618183769858306, "grad_norm": 0.2294921875, "learning_rate": 0.001196005604882729, "loss": 0.7763, "step": 9919 }, { "epoch": 0.26620867325032205, "grad_norm": 0.2236328125, "learning_rate": 0.001196002366941205, "loss": 0.8222, "step": 9920 }, { "epoch": 0.266235508802061, "grad_norm": 0.2275390625, "learning_rate": 0.001195999127692227, "loss": 0.7748, "step": 9921 }, { "epoch": 0.2662623443537999, "grad_norm": 0.2255859375, "learning_rate": 0.0011959958871358021, "loss": 0.8111, "step": 9922 }, { "epoch": 0.26628917990553885, "grad_norm": 0.236328125, "learning_rate": 0.001195992645271938, "loss": 0.7947, "step": 9923 }, { "epoch": 0.2663160154572778, "grad_norm": 0.216796875, "learning_rate": 0.0011959894021006411, "loss": 0.7542, "step": 9924 }, { "epoch": 0.2663428510090167, "grad_norm": 0.2197265625, "learning_rate": 0.0011959861576219188, "loss": 0.8062, "step": 9925 }, { "epoch": 0.2663696865607557, "grad_norm": 0.2177734375, "learning_rate": 0.0011959829118357783, "loss": 0.6914, "step": 9926 }, { "epoch": 0.26639652211249465, "grad_norm": 0.251953125, "learning_rate": 0.0011959796647422268, "loss": 0.9043, "step": 9927 }, { "epoch": 0.2664233576642336, "grad_norm": 0.228515625, "learning_rate": 0.0011959764163412708, "loss": 0.7567, "step": 9928 }, { "epoch": 0.2664501932159725, "grad_norm": 0.2236328125, "learning_rate": 0.001195973166632918, "loss": 0.706, "step": 9929 }, { "epoch": 0.26647702876771145, "grad_norm": 0.2314453125, "learning_rate": 0.0011959699156171757, "loss": 0.8307, "step": 9930 }, { "epoch": 0.2665038643194504, "grad_norm": 0.240234375, "learning_rate": 0.0011959666632940507, "loss": 0.795, "step": 9931 }, { "epoch": 0.2665306998711893, "grad_norm": 0.232421875, "learning_rate": 0.00119596340966355, "loss": 0.8243, "step": 9932 }, { "epoch": 0.2665575354229283, "grad_norm": 0.21875, "learning_rate": 0.001195960154725681, "loss": 0.7443, "step": 9933 }, { "epoch": 0.26658437097466725, "grad_norm": 0.2353515625, "learning_rate": 0.0011959568984804508, "loss": 0.8284, "step": 9934 }, { "epoch": 0.2666112065264062, "grad_norm": 0.2412109375, "learning_rate": 0.0011959536409278664, "loss": 0.8536, "step": 9935 }, { "epoch": 0.2666380420781451, "grad_norm": 0.2236328125, "learning_rate": 0.0011959503820679351, "loss": 0.787, "step": 9936 }, { "epoch": 0.26666487762988406, "grad_norm": 0.2333984375, "learning_rate": 0.001195947121900664, "loss": 0.8167, "step": 9937 }, { "epoch": 0.266691713181623, "grad_norm": 0.2392578125, "learning_rate": 0.0011959438604260601, "loss": 0.815, "step": 9938 }, { "epoch": 0.266718548733362, "grad_norm": 0.2333984375, "learning_rate": 0.001195940597644131, "loss": 0.7961, "step": 9939 }, { "epoch": 0.2667453842851009, "grad_norm": 0.2373046875, "learning_rate": 0.0011959373335548831, "loss": 0.8474, "step": 9940 }, { "epoch": 0.26677221983683985, "grad_norm": 0.2216796875, "learning_rate": 0.0011959340681583245, "loss": 0.766, "step": 9941 }, { "epoch": 0.2667990553885788, "grad_norm": 0.2353515625, "learning_rate": 0.0011959308014544616, "loss": 0.8042, "step": 9942 }, { "epoch": 0.2668258909403177, "grad_norm": 0.244140625, "learning_rate": 0.001195927533443302, "loss": 0.8761, "step": 9943 }, { "epoch": 0.26685272649205666, "grad_norm": 0.232421875, "learning_rate": 0.0011959242641248525, "loss": 0.7997, "step": 9944 }, { "epoch": 0.2668795620437956, "grad_norm": 0.2197265625, "learning_rate": 0.0011959209934991207, "loss": 0.7864, "step": 9945 }, { "epoch": 0.2669063975955346, "grad_norm": 0.2275390625, "learning_rate": 0.0011959177215661134, "loss": 0.8172, "step": 9946 }, { "epoch": 0.2669332331472735, "grad_norm": 0.212890625, "learning_rate": 0.001195914448325838, "loss": 0.6914, "step": 9947 }, { "epoch": 0.26696006869901245, "grad_norm": 0.23828125, "learning_rate": 0.0011959111737783017, "loss": 0.8032, "step": 9948 }, { "epoch": 0.2669869042507514, "grad_norm": 0.2373046875, "learning_rate": 0.0011959078979235118, "loss": 0.8514, "step": 9949 }, { "epoch": 0.2670137398024903, "grad_norm": 0.220703125, "learning_rate": 0.0011959046207614748, "loss": 0.7857, "step": 9950 }, { "epoch": 0.26704057535422926, "grad_norm": 0.22265625, "learning_rate": 0.0011959013422921986, "loss": 0.7833, "step": 9951 }, { "epoch": 0.26706741090596825, "grad_norm": 0.2392578125, "learning_rate": 0.0011958980625156903, "loss": 0.9112, "step": 9952 }, { "epoch": 0.2670942464577072, "grad_norm": 0.228515625, "learning_rate": 0.001195894781431957, "loss": 0.7996, "step": 9953 }, { "epoch": 0.2671210820094461, "grad_norm": 0.240234375, "learning_rate": 0.0011958914990410057, "loss": 0.8635, "step": 9954 }, { "epoch": 0.26714791756118506, "grad_norm": 0.23046875, "learning_rate": 0.0011958882153428436, "loss": 0.8534, "step": 9955 }, { "epoch": 0.267174753112924, "grad_norm": 0.2216796875, "learning_rate": 0.0011958849303374784, "loss": 0.6796, "step": 9956 }, { "epoch": 0.2672015886646629, "grad_norm": 0.228515625, "learning_rate": 0.0011958816440249167, "loss": 0.8097, "step": 9957 }, { "epoch": 0.2672284242164019, "grad_norm": 0.2255859375, "learning_rate": 0.0011958783564051663, "loss": 0.8362, "step": 9958 }, { "epoch": 0.26725525976814085, "grad_norm": 0.216796875, "learning_rate": 0.0011958750674782338, "loss": 0.7891, "step": 9959 }, { "epoch": 0.2672820953198798, "grad_norm": 0.2373046875, "learning_rate": 0.0011958717772441267, "loss": 0.8779, "step": 9960 }, { "epoch": 0.2673089308716187, "grad_norm": 0.2158203125, "learning_rate": 0.0011958684857028524, "loss": 0.7063, "step": 9961 }, { "epoch": 0.26733576642335766, "grad_norm": 0.2216796875, "learning_rate": 0.001195865192854418, "loss": 0.7993, "step": 9962 }, { "epoch": 0.2673626019750966, "grad_norm": 0.201171875, "learning_rate": 0.0011958618986988307, "loss": 0.6355, "step": 9963 }, { "epoch": 0.2673894375268355, "grad_norm": 0.2353515625, "learning_rate": 0.0011958586032360973, "loss": 0.8023, "step": 9964 }, { "epoch": 0.2674162730785745, "grad_norm": 0.232421875, "learning_rate": 0.0011958553064662259, "loss": 0.8146, "step": 9965 }, { "epoch": 0.26744310863031345, "grad_norm": 0.21875, "learning_rate": 0.001195852008389223, "loss": 0.7503, "step": 9966 }, { "epoch": 0.2674699441820524, "grad_norm": 0.228515625, "learning_rate": 0.0011958487090050962, "loss": 0.8272, "step": 9967 }, { "epoch": 0.2674967797337913, "grad_norm": 0.23046875, "learning_rate": 0.0011958454083138526, "loss": 0.7592, "step": 9968 }, { "epoch": 0.26752361528553026, "grad_norm": 0.232421875, "learning_rate": 0.0011958421063154995, "loss": 0.8135, "step": 9969 }, { "epoch": 0.2675504508372692, "grad_norm": 0.22265625, "learning_rate": 0.0011958388030100442, "loss": 0.7802, "step": 9970 }, { "epoch": 0.2675772863890082, "grad_norm": 0.205078125, "learning_rate": 0.0011958354983974935, "loss": 0.6234, "step": 9971 }, { "epoch": 0.2676041219407471, "grad_norm": 0.2255859375, "learning_rate": 0.0011958321924778552, "loss": 0.7464, "step": 9972 }, { "epoch": 0.26763095749248605, "grad_norm": 0.224609375, "learning_rate": 0.0011958288852511366, "loss": 0.8027, "step": 9973 }, { "epoch": 0.267657793044225, "grad_norm": 0.212890625, "learning_rate": 0.0011958255767173446, "loss": 0.7023, "step": 9974 }, { "epoch": 0.2676846285959639, "grad_norm": 0.2119140625, "learning_rate": 0.0011958222668764864, "loss": 0.7418, "step": 9975 }, { "epoch": 0.26771146414770286, "grad_norm": 0.216796875, "learning_rate": 0.0011958189557285696, "loss": 0.7309, "step": 9976 }, { "epoch": 0.2677382996994418, "grad_norm": 0.22265625, "learning_rate": 0.0011958156432736013, "loss": 0.7308, "step": 9977 }, { "epoch": 0.2677651352511808, "grad_norm": 0.224609375, "learning_rate": 0.001195812329511589, "loss": 0.7663, "step": 9978 }, { "epoch": 0.2677919708029197, "grad_norm": 0.2158203125, "learning_rate": 0.0011958090144425391, "loss": 0.7396, "step": 9979 }, { "epoch": 0.26781880635465866, "grad_norm": 0.228515625, "learning_rate": 0.0011958056980664602, "loss": 0.7993, "step": 9980 }, { "epoch": 0.2678456419063976, "grad_norm": 0.2236328125, "learning_rate": 0.0011958023803833584, "loss": 0.8121, "step": 9981 }, { "epoch": 0.2678724774581365, "grad_norm": 0.2294921875, "learning_rate": 0.0011957990613932416, "loss": 0.7021, "step": 9982 }, { "epoch": 0.26789931300987546, "grad_norm": 0.220703125, "learning_rate": 0.0011957957410961171, "loss": 0.727, "step": 9983 }, { "epoch": 0.26792614856161445, "grad_norm": 0.236328125, "learning_rate": 0.001195792419491992, "loss": 0.8716, "step": 9984 }, { "epoch": 0.2679529841133534, "grad_norm": 0.220703125, "learning_rate": 0.0011957890965808736, "loss": 0.6951, "step": 9985 }, { "epoch": 0.2679798196650923, "grad_norm": 0.240234375, "learning_rate": 0.001195785772362769, "loss": 0.9078, "step": 9986 }, { "epoch": 0.26800665521683126, "grad_norm": 0.240234375, "learning_rate": 0.0011957824468376858, "loss": 0.848, "step": 9987 }, { "epoch": 0.2680334907685702, "grad_norm": 0.205078125, "learning_rate": 0.0011957791200056314, "loss": 0.6732, "step": 9988 }, { "epoch": 0.26806032632030913, "grad_norm": 0.2236328125, "learning_rate": 0.0011957757918666127, "loss": 0.8436, "step": 9989 }, { "epoch": 0.26808716187204806, "grad_norm": 0.216796875, "learning_rate": 0.001195772462420637, "loss": 0.7243, "step": 9990 }, { "epoch": 0.26811399742378705, "grad_norm": 0.234375, "learning_rate": 0.0011957691316677123, "loss": 0.8412, "step": 9991 }, { "epoch": 0.268140832975526, "grad_norm": 0.2138671875, "learning_rate": 0.001195765799607845, "loss": 0.7035, "step": 9992 }, { "epoch": 0.2681676685272649, "grad_norm": 0.23046875, "learning_rate": 0.001195762466241043, "loss": 0.813, "step": 9993 }, { "epoch": 0.26819450407900386, "grad_norm": 0.2255859375, "learning_rate": 0.0011957591315673133, "loss": 0.7782, "step": 9994 }, { "epoch": 0.2682213396307428, "grad_norm": 0.2392578125, "learning_rate": 0.0011957557955866633, "loss": 0.89, "step": 9995 }, { "epoch": 0.26824817518248173, "grad_norm": 0.2158203125, "learning_rate": 0.0011957524582991005, "loss": 0.7397, "step": 9996 }, { "epoch": 0.2682750107342207, "grad_norm": 0.234375, "learning_rate": 0.0011957491197046321, "loss": 0.7689, "step": 9997 }, { "epoch": 0.26830184628595966, "grad_norm": 0.2138671875, "learning_rate": 0.0011957457798032654, "loss": 0.7321, "step": 9998 }, { "epoch": 0.2683286818376986, "grad_norm": 0.2060546875, "learning_rate": 0.0011957424385950078, "loss": 0.7119, "step": 9999 }, { "epoch": 0.2683555173894375, "grad_norm": 0.2255859375, "learning_rate": 0.0011957390960798662, "loss": 0.7172, "step": 10000 }, { "epoch": 0.26838235294117646, "grad_norm": 0.2236328125, "learning_rate": 0.0011957357522578485, "loss": 0.7663, "step": 10001 }, { "epoch": 0.2684091884929154, "grad_norm": 0.2216796875, "learning_rate": 0.001195732407128962, "loss": 0.757, "step": 10002 }, { "epoch": 0.26843602404465433, "grad_norm": 0.21875, "learning_rate": 0.0011957290606932137, "loss": 0.6946, "step": 10003 }, { "epoch": 0.2684628595963933, "grad_norm": 0.2333984375, "learning_rate": 0.001195725712950611, "loss": 0.8236, "step": 10004 }, { "epoch": 0.26848969514813226, "grad_norm": 0.2373046875, "learning_rate": 0.0011957223639011616, "loss": 0.7751, "step": 10005 }, { "epoch": 0.2685165306998712, "grad_norm": 0.2431640625, "learning_rate": 0.0011957190135448725, "loss": 0.8748, "step": 10006 }, { "epoch": 0.2685433662516101, "grad_norm": 0.2109375, "learning_rate": 0.0011957156618817512, "loss": 0.7665, "step": 10007 }, { "epoch": 0.26857020180334906, "grad_norm": 0.2431640625, "learning_rate": 0.0011957123089118048, "loss": 0.8345, "step": 10008 }, { "epoch": 0.268597037355088, "grad_norm": 0.212890625, "learning_rate": 0.0011957089546350411, "loss": 0.7105, "step": 10009 }, { "epoch": 0.268623872906827, "grad_norm": 0.205078125, "learning_rate": 0.0011957055990514669, "loss": 0.7098, "step": 10010 }, { "epoch": 0.2686507084585659, "grad_norm": 0.2177734375, "learning_rate": 0.00119570224216109, "loss": 0.766, "step": 10011 }, { "epoch": 0.26867754401030486, "grad_norm": 0.2041015625, "learning_rate": 0.001195698883963918, "loss": 0.7161, "step": 10012 }, { "epoch": 0.2687043795620438, "grad_norm": 0.216796875, "learning_rate": 0.0011956955244599575, "loss": 0.6568, "step": 10013 }, { "epoch": 0.26873121511378273, "grad_norm": 0.224609375, "learning_rate": 0.0011956921636492163, "loss": 0.764, "step": 10014 }, { "epoch": 0.26875805066552166, "grad_norm": 0.2490234375, "learning_rate": 0.001195688801531702, "loss": 0.9017, "step": 10015 }, { "epoch": 0.26878488621726065, "grad_norm": 0.2490234375, "learning_rate": 0.0011956854381074216, "loss": 0.8588, "step": 10016 }, { "epoch": 0.2688117217689996, "grad_norm": 0.2216796875, "learning_rate": 0.0011956820733763824, "loss": 0.7164, "step": 10017 }, { "epoch": 0.2688385573207385, "grad_norm": 0.220703125, "learning_rate": 0.0011956787073385921, "loss": 0.7625, "step": 10018 }, { "epoch": 0.26886539287247746, "grad_norm": 0.251953125, "learning_rate": 0.001195675339994058, "loss": 0.8993, "step": 10019 }, { "epoch": 0.2688922284242164, "grad_norm": 0.2216796875, "learning_rate": 0.0011956719713427876, "loss": 0.7609, "step": 10020 }, { "epoch": 0.26891906397595533, "grad_norm": 0.21875, "learning_rate": 0.001195668601384788, "loss": 0.7424, "step": 10021 }, { "epoch": 0.26894589952769427, "grad_norm": 0.2265625, "learning_rate": 0.0011956652301200667, "loss": 0.8075, "step": 10022 }, { "epoch": 0.26897273507943326, "grad_norm": 0.2255859375, "learning_rate": 0.001195661857548631, "loss": 0.8376, "step": 10023 }, { "epoch": 0.2689995706311722, "grad_norm": 0.2216796875, "learning_rate": 0.001195658483670489, "loss": 0.8302, "step": 10024 }, { "epoch": 0.2690264061829111, "grad_norm": 0.2275390625, "learning_rate": 0.001195655108485647, "loss": 0.763, "step": 10025 }, { "epoch": 0.26905324173465006, "grad_norm": 0.2080078125, "learning_rate": 0.0011956517319941132, "loss": 0.6829, "step": 10026 }, { "epoch": 0.269080077286389, "grad_norm": 0.2314453125, "learning_rate": 0.0011956483541958943, "loss": 0.8265, "step": 10027 }, { "epoch": 0.26910691283812793, "grad_norm": 0.2265625, "learning_rate": 0.0011956449750909987, "loss": 0.839, "step": 10028 }, { "epoch": 0.2691337483898669, "grad_norm": 0.2197265625, "learning_rate": 0.001195641594679433, "loss": 0.7686, "step": 10029 }, { "epoch": 0.26916058394160586, "grad_norm": 0.2412109375, "learning_rate": 0.0011956382129612048, "loss": 0.9197, "step": 10030 }, { "epoch": 0.2691874194933448, "grad_norm": 0.2119140625, "learning_rate": 0.0011956348299363218, "loss": 0.7635, "step": 10031 }, { "epoch": 0.26921425504508373, "grad_norm": 0.2255859375, "learning_rate": 0.001195631445604791, "loss": 0.7672, "step": 10032 }, { "epoch": 0.26924109059682266, "grad_norm": 0.244140625, "learning_rate": 0.0011956280599666204, "loss": 0.8671, "step": 10033 }, { "epoch": 0.2692679261485616, "grad_norm": 0.2265625, "learning_rate": 0.0011956246730218168, "loss": 0.7921, "step": 10034 }, { "epoch": 0.26929476170030053, "grad_norm": 0.2216796875, "learning_rate": 0.0011956212847703878, "loss": 0.8015, "step": 10035 }, { "epoch": 0.2693215972520395, "grad_norm": 0.2236328125, "learning_rate": 0.0011956178952123413, "loss": 0.8054, "step": 10036 }, { "epoch": 0.26934843280377846, "grad_norm": 0.2294921875, "learning_rate": 0.0011956145043476843, "loss": 0.8922, "step": 10037 }, { "epoch": 0.2693752683555174, "grad_norm": 0.2216796875, "learning_rate": 0.001195611112176424, "loss": 0.8449, "step": 10038 }, { "epoch": 0.26940210390725633, "grad_norm": 0.2265625, "learning_rate": 0.0011956077186985683, "loss": 0.7899, "step": 10039 }, { "epoch": 0.26942893945899526, "grad_norm": 0.216796875, "learning_rate": 0.0011956043239141248, "loss": 0.712, "step": 10040 }, { "epoch": 0.2694557750107342, "grad_norm": 0.21875, "learning_rate": 0.0011956009278231003, "loss": 0.7411, "step": 10041 }, { "epoch": 0.2694826105624732, "grad_norm": 0.2119140625, "learning_rate": 0.0011955975304255026, "loss": 0.7152, "step": 10042 }, { "epoch": 0.2695094461142121, "grad_norm": 0.2314453125, "learning_rate": 0.0011955941317213393, "loss": 0.7802, "step": 10043 }, { "epoch": 0.26953628166595106, "grad_norm": 0.23046875, "learning_rate": 0.0011955907317106175, "loss": 0.8091, "step": 10044 }, { "epoch": 0.26956311721769, "grad_norm": 0.23046875, "learning_rate": 0.001195587330393345, "loss": 0.8759, "step": 10045 }, { "epoch": 0.26958995276942893, "grad_norm": 0.23046875, "learning_rate": 0.001195583927769529, "loss": 0.8244, "step": 10046 }, { "epoch": 0.26961678832116787, "grad_norm": 0.2431640625, "learning_rate": 0.0011955805238391775, "loss": 0.8481, "step": 10047 }, { "epoch": 0.2696436238729068, "grad_norm": 0.220703125, "learning_rate": 0.001195577118602297, "loss": 0.7005, "step": 10048 }, { "epoch": 0.2696704594246458, "grad_norm": 0.212890625, "learning_rate": 0.0011955737120588957, "loss": 0.7341, "step": 10049 }, { "epoch": 0.2696972949763847, "grad_norm": 0.2294921875, "learning_rate": 0.0011955703042089809, "loss": 0.7595, "step": 10050 }, { "epoch": 0.26972413052812366, "grad_norm": 0.216796875, "learning_rate": 0.0011955668950525602, "loss": 0.7374, "step": 10051 }, { "epoch": 0.2697509660798626, "grad_norm": 0.2216796875, "learning_rate": 0.001195563484589641, "loss": 0.7955, "step": 10052 }, { "epoch": 0.26977780163160153, "grad_norm": 0.224609375, "learning_rate": 0.0011955600728202306, "loss": 0.8125, "step": 10053 }, { "epoch": 0.26980463718334047, "grad_norm": 0.2275390625, "learning_rate": 0.0011955566597443366, "loss": 0.801, "step": 10054 }, { "epoch": 0.26983147273507946, "grad_norm": 0.2255859375, "learning_rate": 0.0011955532453619664, "loss": 0.7053, "step": 10055 }, { "epoch": 0.2698583082868184, "grad_norm": 0.2080078125, "learning_rate": 0.0011955498296731277, "loss": 0.6727, "step": 10056 }, { "epoch": 0.26988514383855733, "grad_norm": 0.2119140625, "learning_rate": 0.001195546412677828, "loss": 0.6587, "step": 10057 }, { "epoch": 0.26991197939029626, "grad_norm": 0.22265625, "learning_rate": 0.0011955429943760745, "loss": 0.7833, "step": 10058 }, { "epoch": 0.2699388149420352, "grad_norm": 0.2314453125, "learning_rate": 0.001195539574767875, "loss": 0.8523, "step": 10059 }, { "epoch": 0.26996565049377413, "grad_norm": 0.236328125, "learning_rate": 0.0011955361538532369, "loss": 0.8292, "step": 10060 }, { "epoch": 0.26999248604551307, "grad_norm": 0.2333984375, "learning_rate": 0.0011955327316321676, "loss": 0.8048, "step": 10061 }, { "epoch": 0.27001932159725206, "grad_norm": 0.20703125, "learning_rate": 0.0011955293081046749, "loss": 0.6769, "step": 10062 }, { "epoch": 0.270046157148991, "grad_norm": 0.232421875, "learning_rate": 0.0011955258832707658, "loss": 0.7653, "step": 10063 }, { "epoch": 0.27007299270072993, "grad_norm": 0.234375, "learning_rate": 0.0011955224571304484, "loss": 0.9098, "step": 10064 }, { "epoch": 0.27009982825246887, "grad_norm": 0.2373046875, "learning_rate": 0.0011955190296837298, "loss": 0.824, "step": 10065 }, { "epoch": 0.2701266638042078, "grad_norm": 0.25390625, "learning_rate": 0.0011955156009306174, "loss": 0.8444, "step": 10066 }, { "epoch": 0.27015349935594674, "grad_norm": 0.21484375, "learning_rate": 0.0011955121708711195, "loss": 0.6908, "step": 10067 }, { "epoch": 0.2701803349076857, "grad_norm": 0.216796875, "learning_rate": 0.0011955087395052427, "loss": 0.747, "step": 10068 }, { "epoch": 0.27020717045942466, "grad_norm": 0.2265625, "learning_rate": 0.0011955053068329951, "loss": 0.8042, "step": 10069 }, { "epoch": 0.2702340060111636, "grad_norm": 0.2265625, "learning_rate": 0.001195501872854384, "loss": 0.841, "step": 10070 }, { "epoch": 0.27026084156290253, "grad_norm": 0.236328125, "learning_rate": 0.001195498437569417, "loss": 0.7112, "step": 10071 }, { "epoch": 0.27028767711464147, "grad_norm": 0.216796875, "learning_rate": 0.0011954950009781017, "loss": 0.745, "step": 10072 }, { "epoch": 0.2703145126663804, "grad_norm": 0.2333984375, "learning_rate": 0.0011954915630804455, "loss": 0.8062, "step": 10073 }, { "epoch": 0.27034134821811934, "grad_norm": 0.2314453125, "learning_rate": 0.001195488123876456, "loss": 0.808, "step": 10074 }, { "epoch": 0.27036818376985833, "grad_norm": 0.2109375, "learning_rate": 0.0011954846833661407, "loss": 0.8005, "step": 10075 }, { "epoch": 0.27039501932159726, "grad_norm": 0.232421875, "learning_rate": 0.0011954812415495071, "loss": 0.83, "step": 10076 }, { "epoch": 0.2704218548733362, "grad_norm": 0.216796875, "learning_rate": 0.001195477798426563, "loss": 0.7659, "step": 10077 }, { "epoch": 0.27044869042507513, "grad_norm": 0.25, "learning_rate": 0.0011954743539973158, "loss": 0.8154, "step": 10078 }, { "epoch": 0.27047552597681407, "grad_norm": 0.2353515625, "learning_rate": 0.0011954709082617732, "loss": 0.7597, "step": 10079 }, { "epoch": 0.270502361528553, "grad_norm": 0.232421875, "learning_rate": 0.0011954674612199424, "loss": 0.7381, "step": 10080 }, { "epoch": 0.270529197080292, "grad_norm": 0.2314453125, "learning_rate": 0.0011954640128718315, "loss": 0.8641, "step": 10081 }, { "epoch": 0.27055603263203093, "grad_norm": 0.2158203125, "learning_rate": 0.0011954605632174473, "loss": 0.7414, "step": 10082 }, { "epoch": 0.27058286818376986, "grad_norm": 0.220703125, "learning_rate": 0.001195457112256798, "loss": 0.6572, "step": 10083 }, { "epoch": 0.2706097037355088, "grad_norm": 0.2197265625, "learning_rate": 0.001195453659989891, "loss": 0.6753, "step": 10084 }, { "epoch": 0.27063653928724773, "grad_norm": 0.2333984375, "learning_rate": 0.0011954502064167338, "loss": 0.7763, "step": 10085 }, { "epoch": 0.27066337483898667, "grad_norm": 0.2353515625, "learning_rate": 0.001195446751537334, "loss": 0.7032, "step": 10086 }, { "epoch": 0.27069021039072566, "grad_norm": 0.2294921875, "learning_rate": 0.0011954432953516993, "loss": 0.8333, "step": 10087 }, { "epoch": 0.2707170459424646, "grad_norm": 0.2294921875, "learning_rate": 0.0011954398378598372, "loss": 0.7913, "step": 10088 }, { "epoch": 0.27074388149420353, "grad_norm": 0.2265625, "learning_rate": 0.0011954363790617553, "loss": 0.8498, "step": 10089 }, { "epoch": 0.27077071704594247, "grad_norm": 0.2451171875, "learning_rate": 0.001195432918957461, "loss": 0.822, "step": 10090 }, { "epoch": 0.2707975525976814, "grad_norm": 0.220703125, "learning_rate": 0.0011954294575469624, "loss": 0.7004, "step": 10091 }, { "epoch": 0.27082438814942034, "grad_norm": 0.2197265625, "learning_rate": 0.0011954259948302665, "loss": 0.7224, "step": 10092 }, { "epoch": 0.27085122370115927, "grad_norm": 0.1923828125, "learning_rate": 0.0011954225308073814, "loss": 0.617, "step": 10093 }, { "epoch": 0.27087805925289826, "grad_norm": 0.2412109375, "learning_rate": 0.001195419065478314, "loss": 0.8914, "step": 10094 }, { "epoch": 0.2709048948046372, "grad_norm": 0.2353515625, "learning_rate": 0.0011954155988430726, "loss": 0.7849, "step": 10095 }, { "epoch": 0.27093173035637613, "grad_norm": 0.2255859375, "learning_rate": 0.0011954121309016644, "loss": 0.7334, "step": 10096 }, { "epoch": 0.27095856590811507, "grad_norm": 0.2216796875, "learning_rate": 0.0011954086616540972, "loss": 0.7542, "step": 10097 }, { "epoch": 0.270985401459854, "grad_norm": 0.2216796875, "learning_rate": 0.0011954051911003787, "loss": 0.7468, "step": 10098 }, { "epoch": 0.27101223701159294, "grad_norm": 0.21484375, "learning_rate": 0.0011954017192405163, "loss": 0.7526, "step": 10099 }, { "epoch": 0.27103907256333193, "grad_norm": 0.22265625, "learning_rate": 0.0011953982460745176, "loss": 0.7712, "step": 10100 }, { "epoch": 0.27106590811507086, "grad_norm": 0.2431640625, "learning_rate": 0.0011953947716023907, "loss": 0.8236, "step": 10101 }, { "epoch": 0.2710927436668098, "grad_norm": 0.2255859375, "learning_rate": 0.0011953912958241424, "loss": 0.8503, "step": 10102 }, { "epoch": 0.27111957921854873, "grad_norm": 0.228515625, "learning_rate": 0.001195387818739781, "loss": 0.7961, "step": 10103 }, { "epoch": 0.27114641477028767, "grad_norm": 0.2177734375, "learning_rate": 0.001195384340349314, "loss": 0.7936, "step": 10104 }, { "epoch": 0.2711732503220266, "grad_norm": 0.2255859375, "learning_rate": 0.0011953808606527485, "loss": 0.7821, "step": 10105 }, { "epoch": 0.27120008587376554, "grad_norm": 0.2314453125, "learning_rate": 0.001195377379650093, "loss": 0.8674, "step": 10106 }, { "epoch": 0.27122692142550453, "grad_norm": 0.2451171875, "learning_rate": 0.0011953738973413544, "loss": 0.8652, "step": 10107 }, { "epoch": 0.27125375697724347, "grad_norm": 0.2255859375, "learning_rate": 0.0011953704137265407, "loss": 0.6657, "step": 10108 }, { "epoch": 0.2712805925289824, "grad_norm": 0.216796875, "learning_rate": 0.0011953669288056596, "loss": 0.7286, "step": 10109 }, { "epoch": 0.27130742808072134, "grad_norm": 0.2177734375, "learning_rate": 0.0011953634425787185, "loss": 0.7409, "step": 10110 }, { "epoch": 0.27133426363246027, "grad_norm": 0.232421875, "learning_rate": 0.0011953599550457252, "loss": 0.7836, "step": 10111 }, { "epoch": 0.2713610991841992, "grad_norm": 0.2353515625, "learning_rate": 0.0011953564662066873, "loss": 0.7858, "step": 10112 }, { "epoch": 0.2713879347359382, "grad_norm": 0.2275390625, "learning_rate": 0.0011953529760616124, "loss": 0.7671, "step": 10113 }, { "epoch": 0.27141477028767713, "grad_norm": 0.212890625, "learning_rate": 0.0011953494846105083, "loss": 0.6644, "step": 10114 }, { "epoch": 0.27144160583941607, "grad_norm": 0.244140625, "learning_rate": 0.0011953459918533826, "loss": 0.802, "step": 10115 }, { "epoch": 0.271468441391155, "grad_norm": 0.2265625, "learning_rate": 0.001195342497790243, "loss": 0.8604, "step": 10116 }, { "epoch": 0.27149527694289394, "grad_norm": 0.22265625, "learning_rate": 0.0011953390024210969, "loss": 0.7419, "step": 10117 }, { "epoch": 0.2715221124946329, "grad_norm": 0.2373046875, "learning_rate": 0.0011953355057459523, "loss": 0.8175, "step": 10118 }, { "epoch": 0.2715489480463718, "grad_norm": 0.224609375, "learning_rate": 0.0011953320077648167, "loss": 0.7814, "step": 10119 }, { "epoch": 0.2715757835981108, "grad_norm": 0.2177734375, "learning_rate": 0.001195328508477698, "loss": 0.6687, "step": 10120 }, { "epoch": 0.27160261914984973, "grad_norm": 0.224609375, "learning_rate": 0.0011953250078846035, "loss": 0.7499, "step": 10121 }, { "epoch": 0.27162945470158867, "grad_norm": 0.2138671875, "learning_rate": 0.0011953215059855413, "loss": 0.6882, "step": 10122 }, { "epoch": 0.2716562902533276, "grad_norm": 0.2255859375, "learning_rate": 0.0011953180027805186, "loss": 0.8225, "step": 10123 }, { "epoch": 0.27168312580506654, "grad_norm": 0.2158203125, "learning_rate": 0.0011953144982695435, "loss": 0.7187, "step": 10124 }, { "epoch": 0.2717099613568055, "grad_norm": 0.2060546875, "learning_rate": 0.0011953109924526235, "loss": 0.7165, "step": 10125 }, { "epoch": 0.27173679690854446, "grad_norm": 0.23046875, "learning_rate": 0.0011953074853297664, "loss": 0.7655, "step": 10126 }, { "epoch": 0.2717636324602834, "grad_norm": 0.2236328125, "learning_rate": 0.0011953039769009798, "loss": 0.6299, "step": 10127 }, { "epoch": 0.27179046801202233, "grad_norm": 0.22265625, "learning_rate": 0.0011953004671662712, "loss": 0.7279, "step": 10128 }, { "epoch": 0.27181730356376127, "grad_norm": 0.2265625, "learning_rate": 0.0011952969561256487, "loss": 0.7823, "step": 10129 }, { "epoch": 0.2718441391155002, "grad_norm": 0.23046875, "learning_rate": 0.00119529344377912, "loss": 0.8469, "step": 10130 }, { "epoch": 0.27187097466723914, "grad_norm": 0.216796875, "learning_rate": 0.0011952899301266925, "loss": 0.6908, "step": 10131 }, { "epoch": 0.2718978102189781, "grad_norm": 0.2314453125, "learning_rate": 0.001195286415168374, "loss": 0.8477, "step": 10132 }, { "epoch": 0.27192464577071707, "grad_norm": 0.232421875, "learning_rate": 0.001195282898904172, "loss": 0.7764, "step": 10133 }, { "epoch": 0.271951481322456, "grad_norm": 0.21875, "learning_rate": 0.0011952793813340948, "loss": 0.7054, "step": 10134 }, { "epoch": 0.27197831687419494, "grad_norm": 0.220703125, "learning_rate": 0.0011952758624581496, "loss": 0.7903, "step": 10135 }, { "epoch": 0.27200515242593387, "grad_norm": 0.21875, "learning_rate": 0.0011952723422763442, "loss": 0.7705, "step": 10136 }, { "epoch": 0.2720319879776728, "grad_norm": 0.216796875, "learning_rate": 0.0011952688207886868, "loss": 0.7339, "step": 10137 }, { "epoch": 0.27205882352941174, "grad_norm": 0.232421875, "learning_rate": 0.0011952652979951841, "loss": 0.858, "step": 10138 }, { "epoch": 0.27208565908115073, "grad_norm": 0.2431640625, "learning_rate": 0.001195261773895845, "loss": 0.8852, "step": 10139 }, { "epoch": 0.27211249463288967, "grad_norm": 0.2216796875, "learning_rate": 0.0011952582484906766, "loss": 0.7224, "step": 10140 }, { "epoch": 0.2721393301846286, "grad_norm": 0.2265625, "learning_rate": 0.0011952547217796866, "loss": 0.8104, "step": 10141 }, { "epoch": 0.27216616573636754, "grad_norm": 0.23828125, "learning_rate": 0.001195251193762883, "loss": 0.8696, "step": 10142 }, { "epoch": 0.2721930012881065, "grad_norm": 0.2275390625, "learning_rate": 0.0011952476644402732, "loss": 0.8211, "step": 10143 }, { "epoch": 0.2722198368398454, "grad_norm": 0.2236328125, "learning_rate": 0.0011952441338118653, "loss": 0.7975, "step": 10144 }, { "epoch": 0.27224667239158434, "grad_norm": 0.22265625, "learning_rate": 0.0011952406018776668, "loss": 0.7759, "step": 10145 }, { "epoch": 0.27227350794332333, "grad_norm": 0.23046875, "learning_rate": 0.0011952370686376856, "loss": 0.8155, "step": 10146 }, { "epoch": 0.27230034349506227, "grad_norm": 0.22265625, "learning_rate": 0.0011952335340919295, "loss": 0.6559, "step": 10147 }, { "epoch": 0.2723271790468012, "grad_norm": 0.2275390625, "learning_rate": 0.001195229998240406, "loss": 0.7719, "step": 10148 }, { "epoch": 0.27235401459854014, "grad_norm": 0.2216796875, "learning_rate": 0.0011952264610831233, "loss": 0.7369, "step": 10149 }, { "epoch": 0.2723808501502791, "grad_norm": 0.2119140625, "learning_rate": 0.0011952229226200885, "loss": 0.6843, "step": 10150 }, { "epoch": 0.272407685702018, "grad_norm": 0.23828125, "learning_rate": 0.0011952193828513099, "loss": 0.8116, "step": 10151 }, { "epoch": 0.272434521253757, "grad_norm": 0.2275390625, "learning_rate": 0.001195215841776795, "loss": 0.7633, "step": 10152 }, { "epoch": 0.27246135680549594, "grad_norm": 0.208984375, "learning_rate": 0.0011952122993965518, "loss": 0.655, "step": 10153 }, { "epoch": 0.27248819235723487, "grad_norm": 0.236328125, "learning_rate": 0.0011952087557105875, "loss": 0.794, "step": 10154 }, { "epoch": 0.2725150279089738, "grad_norm": 0.21484375, "learning_rate": 0.0011952052107189108, "loss": 0.7488, "step": 10155 }, { "epoch": 0.27254186346071274, "grad_norm": 0.2353515625, "learning_rate": 0.001195201664421529, "loss": 0.7303, "step": 10156 }, { "epoch": 0.2725686990124517, "grad_norm": 0.2177734375, "learning_rate": 0.0011951981168184494, "loss": 0.7432, "step": 10157 }, { "epoch": 0.27259553456419067, "grad_norm": 0.2138671875, "learning_rate": 0.0011951945679096808, "loss": 0.7065, "step": 10158 }, { "epoch": 0.2726223701159296, "grad_norm": 0.232421875, "learning_rate": 0.00119519101769523, "loss": 0.8235, "step": 10159 }, { "epoch": 0.27264920566766854, "grad_norm": 0.22265625, "learning_rate": 0.0011951874661751054, "loss": 0.7811, "step": 10160 }, { "epoch": 0.2726760412194075, "grad_norm": 0.2177734375, "learning_rate": 0.0011951839133493144, "loss": 0.6873, "step": 10161 }, { "epoch": 0.2727028767711464, "grad_norm": 0.2353515625, "learning_rate": 0.0011951803592178654, "loss": 0.8258, "step": 10162 }, { "epoch": 0.27272971232288534, "grad_norm": 0.216796875, "learning_rate": 0.0011951768037807655, "loss": 0.7244, "step": 10163 }, { "epoch": 0.2727565478746243, "grad_norm": 0.2255859375, "learning_rate": 0.0011951732470380227, "loss": 0.728, "step": 10164 }, { "epoch": 0.27278338342636327, "grad_norm": 0.2431640625, "learning_rate": 0.0011951696889896451, "loss": 0.7981, "step": 10165 }, { "epoch": 0.2728102189781022, "grad_norm": 0.2109375, "learning_rate": 0.0011951661296356402, "loss": 0.6594, "step": 10166 }, { "epoch": 0.27283705452984114, "grad_norm": 0.2216796875, "learning_rate": 0.001195162568976016, "loss": 0.8131, "step": 10167 }, { "epoch": 0.2728638900815801, "grad_norm": 0.228515625, "learning_rate": 0.0011951590070107803, "loss": 0.6755, "step": 10168 }, { "epoch": 0.272890725633319, "grad_norm": 0.22265625, "learning_rate": 0.0011951554437399407, "loss": 0.7271, "step": 10169 }, { "epoch": 0.27291756118505794, "grad_norm": 0.2216796875, "learning_rate": 0.0011951518791635052, "loss": 0.7796, "step": 10170 }, { "epoch": 0.27294439673679693, "grad_norm": 0.2041015625, "learning_rate": 0.0011951483132814815, "loss": 0.6708, "step": 10171 }, { "epoch": 0.27297123228853587, "grad_norm": 0.2353515625, "learning_rate": 0.0011951447460938776, "loss": 0.901, "step": 10172 }, { "epoch": 0.2729980678402748, "grad_norm": 0.2314453125, "learning_rate": 0.0011951411776007011, "loss": 0.7779, "step": 10173 }, { "epoch": 0.27302490339201374, "grad_norm": 0.23046875, "learning_rate": 0.00119513760780196, "loss": 0.8105, "step": 10174 }, { "epoch": 0.2730517389437527, "grad_norm": 0.2041015625, "learning_rate": 0.0011951340366976623, "loss": 0.7326, "step": 10175 }, { "epoch": 0.2730785744954916, "grad_norm": 0.2314453125, "learning_rate": 0.0011951304642878154, "loss": 0.8288, "step": 10176 }, { "epoch": 0.27310541004723055, "grad_norm": 0.2216796875, "learning_rate": 0.0011951268905724271, "loss": 0.7381, "step": 10177 }, { "epoch": 0.27313224559896954, "grad_norm": 0.228515625, "learning_rate": 0.001195123315551506, "loss": 0.8442, "step": 10178 }, { "epoch": 0.27315908115070847, "grad_norm": 0.2255859375, "learning_rate": 0.0011951197392250591, "loss": 0.8758, "step": 10179 }, { "epoch": 0.2731859167024474, "grad_norm": 0.220703125, "learning_rate": 0.0011951161615930946, "loss": 0.7441, "step": 10180 }, { "epoch": 0.27321275225418634, "grad_norm": 0.2333984375, "learning_rate": 0.0011951125826556205, "loss": 0.757, "step": 10181 }, { "epoch": 0.2732395878059253, "grad_norm": 0.228515625, "learning_rate": 0.0011951090024126445, "loss": 0.7887, "step": 10182 }, { "epoch": 0.2732664233576642, "grad_norm": 0.2236328125, "learning_rate": 0.0011951054208641741, "loss": 0.8085, "step": 10183 }, { "epoch": 0.2732932589094032, "grad_norm": 0.205078125, "learning_rate": 0.0011951018380102177, "loss": 0.667, "step": 10184 }, { "epoch": 0.27332009446114214, "grad_norm": 0.20703125, "learning_rate": 0.0011950982538507832, "loss": 0.7321, "step": 10185 }, { "epoch": 0.2733469300128811, "grad_norm": 0.2392578125, "learning_rate": 0.0011950946683858777, "loss": 0.8064, "step": 10186 }, { "epoch": 0.27337376556462, "grad_norm": 0.2197265625, "learning_rate": 0.0011950910816155098, "loss": 0.6853, "step": 10187 }, { "epoch": 0.27340060111635894, "grad_norm": 0.2265625, "learning_rate": 0.0011950874935396873, "loss": 0.8029, "step": 10188 }, { "epoch": 0.2734274366680979, "grad_norm": 0.2314453125, "learning_rate": 0.0011950839041584178, "loss": 0.8215, "step": 10189 }, { "epoch": 0.2734542722198368, "grad_norm": 0.1953125, "learning_rate": 0.0011950803134717094, "loss": 0.6497, "step": 10190 }, { "epoch": 0.2734811077715758, "grad_norm": 0.232421875, "learning_rate": 0.0011950767214795697, "loss": 0.8347, "step": 10191 }, { "epoch": 0.27350794332331474, "grad_norm": 0.216796875, "learning_rate": 0.0011950731281820067, "loss": 0.7651, "step": 10192 }, { "epoch": 0.2735347788750537, "grad_norm": 0.2109375, "learning_rate": 0.0011950695335790284, "loss": 0.7104, "step": 10193 }, { "epoch": 0.2735616144267926, "grad_norm": 0.208984375, "learning_rate": 0.0011950659376706427, "loss": 0.7689, "step": 10194 }, { "epoch": 0.27358844997853154, "grad_norm": 0.2255859375, "learning_rate": 0.0011950623404568574, "loss": 0.744, "step": 10195 }, { "epoch": 0.2736152855302705, "grad_norm": 0.2197265625, "learning_rate": 0.0011950587419376805, "loss": 0.7466, "step": 10196 }, { "epoch": 0.27364212108200947, "grad_norm": 0.2294921875, "learning_rate": 0.0011950551421131194, "loss": 0.7865, "step": 10197 }, { "epoch": 0.2736689566337484, "grad_norm": 0.21875, "learning_rate": 0.0011950515409831827, "loss": 0.7523, "step": 10198 }, { "epoch": 0.27369579218548734, "grad_norm": 0.21875, "learning_rate": 0.001195047938547878, "loss": 0.7677, "step": 10199 }, { "epoch": 0.2737226277372263, "grad_norm": 0.2265625, "learning_rate": 0.001195044334807213, "loss": 0.7729, "step": 10200 }, { "epoch": 0.2737494632889652, "grad_norm": 0.23828125, "learning_rate": 0.001195040729761196, "loss": 0.8698, "step": 10201 }, { "epoch": 0.27377629884070415, "grad_norm": 0.236328125, "learning_rate": 0.0011950371234098344, "loss": 0.795, "step": 10202 }, { "epoch": 0.2738031343924431, "grad_norm": 0.2158203125, "learning_rate": 0.0011950335157531367, "loss": 0.7072, "step": 10203 }, { "epoch": 0.2738299699441821, "grad_norm": 0.216796875, "learning_rate": 0.0011950299067911105, "loss": 0.8118, "step": 10204 }, { "epoch": 0.273856805495921, "grad_norm": 0.2138671875, "learning_rate": 0.0011950262965237637, "loss": 0.768, "step": 10205 }, { "epoch": 0.27388364104765994, "grad_norm": 0.20703125, "learning_rate": 0.001195022684951104, "loss": 0.6732, "step": 10206 }, { "epoch": 0.2739104765993989, "grad_norm": 0.2197265625, "learning_rate": 0.0011950190720731398, "loss": 0.7683, "step": 10207 }, { "epoch": 0.2739373121511378, "grad_norm": 0.2255859375, "learning_rate": 0.001195015457889879, "loss": 0.7931, "step": 10208 }, { "epoch": 0.27396414770287675, "grad_norm": 0.2255859375, "learning_rate": 0.0011950118424013292, "loss": 0.7872, "step": 10209 }, { "epoch": 0.27399098325461574, "grad_norm": 0.234375, "learning_rate": 0.0011950082256074986, "loss": 0.793, "step": 10210 }, { "epoch": 0.2740178188063547, "grad_norm": 0.2138671875, "learning_rate": 0.0011950046075083947, "loss": 0.7542, "step": 10211 }, { "epoch": 0.2740446543580936, "grad_norm": 0.2314453125, "learning_rate": 0.001195000988104026, "loss": 0.8364, "step": 10212 }, { "epoch": 0.27407148990983254, "grad_norm": 0.22265625, "learning_rate": 0.0011949973673944, "loss": 0.8195, "step": 10213 }, { "epoch": 0.2740983254615715, "grad_norm": 0.21484375, "learning_rate": 0.0011949937453795251, "loss": 0.7202, "step": 10214 }, { "epoch": 0.2741251610133104, "grad_norm": 0.2236328125, "learning_rate": 0.0011949901220594085, "loss": 0.7917, "step": 10215 }, { "epoch": 0.2741519965650494, "grad_norm": 0.208984375, "learning_rate": 0.001194986497434059, "loss": 0.7084, "step": 10216 }, { "epoch": 0.27417883211678834, "grad_norm": 0.2333984375, "learning_rate": 0.0011949828715034842, "loss": 0.7813, "step": 10217 }, { "epoch": 0.2742056676685273, "grad_norm": 0.2275390625, "learning_rate": 0.0011949792442676919, "loss": 0.8104, "step": 10218 }, { "epoch": 0.2742325032202662, "grad_norm": 0.240234375, "learning_rate": 0.0011949756157266902, "loss": 0.8364, "step": 10219 }, { "epoch": 0.27425933877200515, "grad_norm": 0.20703125, "learning_rate": 0.001194971985880487, "loss": 0.7427, "step": 10220 }, { "epoch": 0.2742861743237441, "grad_norm": 0.23046875, "learning_rate": 0.0011949683547290903, "loss": 0.8216, "step": 10221 }, { "epoch": 0.274313009875483, "grad_norm": 0.21484375, "learning_rate": 0.001194964722272508, "loss": 0.7163, "step": 10222 }, { "epoch": 0.274339845427222, "grad_norm": 0.2392578125, "learning_rate": 0.0011949610885107485, "loss": 0.9955, "step": 10223 }, { "epoch": 0.27436668097896094, "grad_norm": 0.22265625, "learning_rate": 0.0011949574534438191, "loss": 0.7596, "step": 10224 }, { "epoch": 0.2743935165306999, "grad_norm": 0.2255859375, "learning_rate": 0.001194953817071728, "loss": 0.7777, "step": 10225 }, { "epoch": 0.2744203520824388, "grad_norm": 0.2119140625, "learning_rate": 0.0011949501793944836, "loss": 0.7203, "step": 10226 }, { "epoch": 0.27444718763417775, "grad_norm": 0.333984375, "learning_rate": 0.0011949465404120932, "loss": 0.9009, "step": 10227 }, { "epoch": 0.2744740231859167, "grad_norm": 0.318359375, "learning_rate": 0.0011949429001245654, "loss": 0.9796, "step": 10228 }, { "epoch": 0.2745008587376557, "grad_norm": 0.265625, "learning_rate": 0.0011949392585319078, "loss": 0.8288, "step": 10229 }, { "epoch": 0.2745276942893946, "grad_norm": 0.2578125, "learning_rate": 0.0011949356156341286, "loss": 0.7113, "step": 10230 }, { "epoch": 0.27455452984113354, "grad_norm": 0.328125, "learning_rate": 0.0011949319714312356, "loss": 0.8598, "step": 10231 }, { "epoch": 0.2745813653928725, "grad_norm": 0.314453125, "learning_rate": 0.0011949283259232368, "loss": 0.7753, "step": 10232 }, { "epoch": 0.2746082009446114, "grad_norm": 0.369140625, "learning_rate": 0.0011949246791101403, "loss": 0.7099, "step": 10233 }, { "epoch": 0.27463503649635035, "grad_norm": 0.30078125, "learning_rate": 0.0011949210309919543, "loss": 0.8309, "step": 10234 }, { "epoch": 0.2746618720480893, "grad_norm": 0.236328125, "learning_rate": 0.0011949173815686864, "loss": 0.761, "step": 10235 }, { "epoch": 0.2746887075998283, "grad_norm": 0.28125, "learning_rate": 0.0011949137308403448, "loss": 0.7773, "step": 10236 }, { "epoch": 0.2747155431515672, "grad_norm": 0.25390625, "learning_rate": 0.0011949100788069377, "loss": 0.7765, "step": 10237 }, { "epoch": 0.27474237870330614, "grad_norm": 0.26953125, "learning_rate": 0.0011949064254684727, "loss": 0.7044, "step": 10238 }, { "epoch": 0.2747692142550451, "grad_norm": 0.265625, "learning_rate": 0.001194902770824958, "loss": 0.8633, "step": 10239 }, { "epoch": 0.274796049806784, "grad_norm": 0.2265625, "learning_rate": 0.0011948991148764017, "loss": 0.7644, "step": 10240 }, { "epoch": 0.27482288535852295, "grad_norm": 0.2392578125, "learning_rate": 0.001194895457622812, "loss": 0.8211, "step": 10241 }, { "epoch": 0.27484972091026194, "grad_norm": 0.2490234375, "learning_rate": 0.0011948917990641963, "loss": 0.7919, "step": 10242 }, { "epoch": 0.2748765564620009, "grad_norm": 0.2265625, "learning_rate": 0.001194888139200563, "loss": 0.7609, "step": 10243 }, { "epoch": 0.2749033920137398, "grad_norm": 0.2294921875, "learning_rate": 0.0011948844780319205, "loss": 0.751, "step": 10244 }, { "epoch": 0.27493022756547875, "grad_norm": 0.2275390625, "learning_rate": 0.0011948808155582762, "loss": 0.6956, "step": 10245 }, { "epoch": 0.2749570631172177, "grad_norm": 0.2294921875, "learning_rate": 0.0011948771517796386, "loss": 0.794, "step": 10246 }, { "epoch": 0.2749838986689566, "grad_norm": 0.234375, "learning_rate": 0.0011948734866960152, "loss": 0.7484, "step": 10247 }, { "epoch": 0.27501073422069555, "grad_norm": 0.2294921875, "learning_rate": 0.0011948698203074148, "loss": 0.7445, "step": 10248 }, { "epoch": 0.27503756977243454, "grad_norm": 0.2373046875, "learning_rate": 0.001194866152613845, "loss": 0.832, "step": 10249 }, { "epoch": 0.2750644053241735, "grad_norm": 0.2431640625, "learning_rate": 0.0011948624836153135, "loss": 0.8689, "step": 10250 }, { "epoch": 0.2750912408759124, "grad_norm": 0.20703125, "learning_rate": 0.001194858813311829, "loss": 0.6416, "step": 10251 }, { "epoch": 0.27511807642765135, "grad_norm": 0.232421875, "learning_rate": 0.001194855141703399, "loss": 0.8424, "step": 10252 }, { "epoch": 0.2751449119793903, "grad_norm": 0.232421875, "learning_rate": 0.0011948514687900322, "loss": 0.7763, "step": 10253 }, { "epoch": 0.2751717475311292, "grad_norm": 0.2265625, "learning_rate": 0.0011948477945717363, "loss": 0.7566, "step": 10254 }, { "epoch": 0.2751985830828682, "grad_norm": 0.2490234375, "learning_rate": 0.001194844119048519, "loss": 0.8424, "step": 10255 }, { "epoch": 0.27522541863460714, "grad_norm": 0.2421875, "learning_rate": 0.001194840442220389, "loss": 0.7922, "step": 10256 }, { "epoch": 0.2752522541863461, "grad_norm": 0.255859375, "learning_rate": 0.0011948367640873538, "loss": 0.8438, "step": 10257 }, { "epoch": 0.275279089738085, "grad_norm": 0.2490234375, "learning_rate": 0.001194833084649422, "loss": 0.931, "step": 10258 }, { "epoch": 0.27530592528982395, "grad_norm": 0.2158203125, "learning_rate": 0.0011948294039066014, "loss": 0.7149, "step": 10259 }, { "epoch": 0.2753327608415629, "grad_norm": 0.22265625, "learning_rate": 0.0011948257218589, "loss": 0.7136, "step": 10260 }, { "epoch": 0.2753595963933018, "grad_norm": 0.2255859375, "learning_rate": 0.0011948220385063258, "loss": 0.7745, "step": 10261 }, { "epoch": 0.2753864319450408, "grad_norm": 0.2451171875, "learning_rate": 0.0011948183538488873, "loss": 0.8633, "step": 10262 }, { "epoch": 0.27541326749677975, "grad_norm": 0.2431640625, "learning_rate": 0.0011948146678865924, "loss": 0.8837, "step": 10263 }, { "epoch": 0.2754401030485187, "grad_norm": 0.2255859375, "learning_rate": 0.0011948109806194488, "loss": 0.7509, "step": 10264 }, { "epoch": 0.2754669386002576, "grad_norm": 0.21484375, "learning_rate": 0.0011948072920474652, "loss": 0.7176, "step": 10265 }, { "epoch": 0.27549377415199655, "grad_norm": 0.2333984375, "learning_rate": 0.0011948036021706492, "loss": 0.8292, "step": 10266 }, { "epoch": 0.2755206097037355, "grad_norm": 0.2216796875, "learning_rate": 0.0011947999109890091, "loss": 0.7987, "step": 10267 }, { "epoch": 0.2755474452554745, "grad_norm": 0.228515625, "learning_rate": 0.0011947962185025532, "loss": 0.7656, "step": 10268 }, { "epoch": 0.2755742808072134, "grad_norm": 0.2294921875, "learning_rate": 0.0011947925247112893, "loss": 0.7561, "step": 10269 }, { "epoch": 0.27560111635895235, "grad_norm": 0.2353515625, "learning_rate": 0.0011947888296152255, "loss": 0.8144, "step": 10270 }, { "epoch": 0.2756279519106913, "grad_norm": 0.22265625, "learning_rate": 0.00119478513321437, "loss": 0.7739, "step": 10271 }, { "epoch": 0.2756547874624302, "grad_norm": 0.2412109375, "learning_rate": 0.0011947814355087309, "loss": 0.8111, "step": 10272 }, { "epoch": 0.27568162301416915, "grad_norm": 0.224609375, "learning_rate": 0.0011947777364983165, "loss": 0.7574, "step": 10273 }, { "epoch": 0.2757084585659081, "grad_norm": 0.244140625, "learning_rate": 0.0011947740361831344, "loss": 0.9263, "step": 10274 }, { "epoch": 0.2757352941176471, "grad_norm": 0.2333984375, "learning_rate": 0.001194770334563193, "loss": 0.8143, "step": 10275 }, { "epoch": 0.275762129669386, "grad_norm": 0.2294921875, "learning_rate": 0.001194766631638501, "loss": 0.8364, "step": 10276 }, { "epoch": 0.27578896522112495, "grad_norm": 0.2294921875, "learning_rate": 0.0011947629274090656, "loss": 0.8608, "step": 10277 }, { "epoch": 0.2758158007728639, "grad_norm": 0.2314453125, "learning_rate": 0.0011947592218748955, "loss": 0.8183, "step": 10278 }, { "epoch": 0.2758426363246028, "grad_norm": 0.2060546875, "learning_rate": 0.0011947555150359985, "loss": 0.7359, "step": 10279 }, { "epoch": 0.27586947187634175, "grad_norm": 0.2177734375, "learning_rate": 0.001194751806892383, "loss": 0.7491, "step": 10280 }, { "epoch": 0.27589630742808074, "grad_norm": 0.2021484375, "learning_rate": 0.001194748097444057, "loss": 0.6973, "step": 10281 }, { "epoch": 0.2759231429798197, "grad_norm": 0.2138671875, "learning_rate": 0.0011947443866910286, "loss": 0.6904, "step": 10282 }, { "epoch": 0.2759499785315586, "grad_norm": 0.2099609375, "learning_rate": 0.0011947406746333057, "loss": 0.6721, "step": 10283 }, { "epoch": 0.27597681408329755, "grad_norm": 0.2197265625, "learning_rate": 0.0011947369612708974, "loss": 0.7654, "step": 10284 }, { "epoch": 0.2760036496350365, "grad_norm": 0.2294921875, "learning_rate": 0.0011947332466038106, "loss": 0.7954, "step": 10285 }, { "epoch": 0.2760304851867754, "grad_norm": 0.234375, "learning_rate": 0.0011947295306320542, "loss": 0.7998, "step": 10286 }, { "epoch": 0.2760573207385144, "grad_norm": 0.21484375, "learning_rate": 0.0011947258133556363, "loss": 0.6744, "step": 10287 }, { "epoch": 0.27608415629025335, "grad_norm": 0.2314453125, "learning_rate": 0.001194722094774565, "loss": 0.8417, "step": 10288 }, { "epoch": 0.2761109918419923, "grad_norm": 0.244140625, "learning_rate": 0.0011947183748888483, "loss": 0.8944, "step": 10289 }, { "epoch": 0.2761378273937312, "grad_norm": 0.2236328125, "learning_rate": 0.0011947146536984944, "loss": 0.798, "step": 10290 }, { "epoch": 0.27616466294547015, "grad_norm": 0.2236328125, "learning_rate": 0.0011947109312035115, "loss": 0.8514, "step": 10291 }, { "epoch": 0.2761914984972091, "grad_norm": 0.232421875, "learning_rate": 0.0011947072074039076, "loss": 0.9018, "step": 10292 }, { "epoch": 0.276218334048948, "grad_norm": 0.2197265625, "learning_rate": 0.0011947034822996914, "loss": 0.7252, "step": 10293 }, { "epoch": 0.276245169600687, "grad_norm": 0.2255859375, "learning_rate": 0.0011946997558908705, "loss": 0.7987, "step": 10294 }, { "epoch": 0.27627200515242595, "grad_norm": 0.236328125, "learning_rate": 0.0011946960281774535, "loss": 0.8957, "step": 10295 }, { "epoch": 0.2762988407041649, "grad_norm": 0.2021484375, "learning_rate": 0.001194692299159448, "loss": 0.6891, "step": 10296 }, { "epoch": 0.2763256762559038, "grad_norm": 0.2265625, "learning_rate": 0.001194688568836863, "loss": 0.8187, "step": 10297 }, { "epoch": 0.27635251180764275, "grad_norm": 0.2490234375, "learning_rate": 0.001194684837209706, "loss": 0.8531, "step": 10298 }, { "epoch": 0.2763793473593817, "grad_norm": 0.23828125, "learning_rate": 0.0011946811042779855, "loss": 0.7378, "step": 10299 }, { "epoch": 0.2764061829111207, "grad_norm": 0.2158203125, "learning_rate": 0.0011946773700417098, "loss": 0.684, "step": 10300 }, { "epoch": 0.2764330184628596, "grad_norm": 0.23046875, "learning_rate": 0.0011946736345008868, "loss": 0.8056, "step": 10301 }, { "epoch": 0.27645985401459855, "grad_norm": 0.2236328125, "learning_rate": 0.0011946698976555245, "loss": 0.7499, "step": 10302 }, { "epoch": 0.2764866895663375, "grad_norm": 0.212890625, "learning_rate": 0.0011946661595056318, "loss": 0.7742, "step": 10303 }, { "epoch": 0.2765135251180764, "grad_norm": 0.2255859375, "learning_rate": 0.0011946624200512162, "loss": 0.7664, "step": 10304 }, { "epoch": 0.27654036066981535, "grad_norm": 0.2255859375, "learning_rate": 0.0011946586792922862, "loss": 0.8084, "step": 10305 }, { "epoch": 0.2765671962215543, "grad_norm": 0.2080078125, "learning_rate": 0.0011946549372288504, "loss": 0.7219, "step": 10306 }, { "epoch": 0.2765940317732933, "grad_norm": 0.23046875, "learning_rate": 0.0011946511938609162, "loss": 0.8111, "step": 10307 }, { "epoch": 0.2766208673250322, "grad_norm": 0.2236328125, "learning_rate": 0.0011946474491884925, "loss": 0.8239, "step": 10308 }, { "epoch": 0.27664770287677115, "grad_norm": 0.2294921875, "learning_rate": 0.0011946437032115868, "loss": 0.8227, "step": 10309 }, { "epoch": 0.2766745384285101, "grad_norm": 0.2021484375, "learning_rate": 0.0011946399559302082, "loss": 0.6846, "step": 10310 }, { "epoch": 0.276701373980249, "grad_norm": 0.224609375, "learning_rate": 0.0011946362073443643, "loss": 0.762, "step": 10311 }, { "epoch": 0.27672820953198796, "grad_norm": 0.2451171875, "learning_rate": 0.0011946324574540635, "loss": 0.9199, "step": 10312 }, { "epoch": 0.27675504508372695, "grad_norm": 0.2294921875, "learning_rate": 0.0011946287062593141, "loss": 0.8127, "step": 10313 }, { "epoch": 0.2767818806354659, "grad_norm": 0.2294921875, "learning_rate": 0.0011946249537601243, "loss": 0.7591, "step": 10314 }, { "epoch": 0.2768087161872048, "grad_norm": 0.2138671875, "learning_rate": 0.0011946211999565022, "loss": 0.7274, "step": 10315 }, { "epoch": 0.27683555173894375, "grad_norm": 0.23046875, "learning_rate": 0.0011946174448484565, "loss": 0.8352, "step": 10316 }, { "epoch": 0.2768623872906827, "grad_norm": 0.2041015625, "learning_rate": 0.0011946136884359946, "loss": 0.7305, "step": 10317 }, { "epoch": 0.2768892228424216, "grad_norm": 0.2333984375, "learning_rate": 0.0011946099307191254, "loss": 0.8483, "step": 10318 }, { "epoch": 0.27691605839416056, "grad_norm": 0.2138671875, "learning_rate": 0.0011946061716978566, "loss": 0.6733, "step": 10319 }, { "epoch": 0.27694289394589955, "grad_norm": 0.228515625, "learning_rate": 0.0011946024113721973, "loss": 0.7755, "step": 10320 }, { "epoch": 0.2769697294976385, "grad_norm": 0.2255859375, "learning_rate": 0.0011945986497421549, "loss": 0.7687, "step": 10321 }, { "epoch": 0.2769965650493774, "grad_norm": 0.23046875, "learning_rate": 0.0011945948868077383, "loss": 0.774, "step": 10322 }, { "epoch": 0.27702340060111635, "grad_norm": 0.234375, "learning_rate": 0.0011945911225689553, "loss": 0.8257, "step": 10323 }, { "epoch": 0.2770502361528553, "grad_norm": 0.23828125, "learning_rate": 0.0011945873570258143, "loss": 0.8884, "step": 10324 }, { "epoch": 0.2770770717045942, "grad_norm": 0.2236328125, "learning_rate": 0.0011945835901783235, "loss": 0.824, "step": 10325 }, { "epoch": 0.2771039072563332, "grad_norm": 0.2138671875, "learning_rate": 0.0011945798220264914, "loss": 0.7057, "step": 10326 }, { "epoch": 0.27713074280807215, "grad_norm": 0.2109375, "learning_rate": 0.0011945760525703258, "loss": 0.7472, "step": 10327 }, { "epoch": 0.2771575783598111, "grad_norm": 0.2041015625, "learning_rate": 0.0011945722818098355, "loss": 0.6401, "step": 10328 }, { "epoch": 0.27718441391155, "grad_norm": 0.2255859375, "learning_rate": 0.0011945685097450285, "loss": 0.7972, "step": 10329 }, { "epoch": 0.27721124946328896, "grad_norm": 0.236328125, "learning_rate": 0.0011945647363759131, "loss": 0.8157, "step": 10330 }, { "epoch": 0.2772380850150279, "grad_norm": 0.2109375, "learning_rate": 0.0011945609617024977, "loss": 0.7046, "step": 10331 }, { "epoch": 0.2772649205667668, "grad_norm": 0.2265625, "learning_rate": 0.0011945571857247903, "loss": 0.8237, "step": 10332 }, { "epoch": 0.2772917561185058, "grad_norm": 0.2265625, "learning_rate": 0.0011945534084427994, "loss": 0.7913, "step": 10333 }, { "epoch": 0.27731859167024475, "grad_norm": 0.205078125, "learning_rate": 0.0011945496298565333, "loss": 0.6645, "step": 10334 }, { "epoch": 0.2773454272219837, "grad_norm": 0.228515625, "learning_rate": 0.0011945458499660003, "loss": 0.7799, "step": 10335 }, { "epoch": 0.2773722627737226, "grad_norm": 0.23046875, "learning_rate": 0.0011945420687712086, "loss": 0.8501, "step": 10336 }, { "epoch": 0.27739909832546156, "grad_norm": 0.248046875, "learning_rate": 0.0011945382862721663, "loss": 0.8723, "step": 10337 }, { "epoch": 0.2774259338772005, "grad_norm": 0.2275390625, "learning_rate": 0.001194534502468882, "loss": 0.7141, "step": 10338 }, { "epoch": 0.2774527694289395, "grad_norm": 0.2158203125, "learning_rate": 0.0011945307173613642, "loss": 0.7106, "step": 10339 }, { "epoch": 0.2774796049806784, "grad_norm": 0.2236328125, "learning_rate": 0.0011945269309496206, "loss": 0.7879, "step": 10340 }, { "epoch": 0.27750644053241735, "grad_norm": 0.216796875, "learning_rate": 0.00119452314323366, "loss": 0.7008, "step": 10341 }, { "epoch": 0.2775332760841563, "grad_norm": 0.2392578125, "learning_rate": 0.0011945193542134904, "loss": 0.89, "step": 10342 }, { "epoch": 0.2775601116358952, "grad_norm": 0.23828125, "learning_rate": 0.0011945155638891202, "loss": 0.8699, "step": 10343 }, { "epoch": 0.27758694718763416, "grad_norm": 0.2275390625, "learning_rate": 0.001194511772260558, "loss": 0.809, "step": 10344 }, { "epoch": 0.2776137827393731, "grad_norm": 0.2216796875, "learning_rate": 0.0011945079793278116, "loss": 0.7389, "step": 10345 }, { "epoch": 0.2776406182911121, "grad_norm": 0.2177734375, "learning_rate": 0.0011945041850908898, "loss": 0.7233, "step": 10346 }, { "epoch": 0.277667453842851, "grad_norm": 0.228515625, "learning_rate": 0.0011945003895498009, "loss": 0.7388, "step": 10347 }, { "epoch": 0.27769428939458995, "grad_norm": 0.208984375, "learning_rate": 0.0011944965927045525, "loss": 0.6149, "step": 10348 }, { "epoch": 0.2777211249463289, "grad_norm": 0.236328125, "learning_rate": 0.001194492794555154, "loss": 0.892, "step": 10349 }, { "epoch": 0.2777479604980678, "grad_norm": 0.2060546875, "learning_rate": 0.001194488995101613, "loss": 0.6893, "step": 10350 }, { "epoch": 0.27777479604980676, "grad_norm": 0.2216796875, "learning_rate": 0.001194485194343938, "loss": 0.7741, "step": 10351 }, { "epoch": 0.27780163160154575, "grad_norm": 0.2392578125, "learning_rate": 0.0011944813922821374, "loss": 0.8252, "step": 10352 }, { "epoch": 0.2778284671532847, "grad_norm": 0.234375, "learning_rate": 0.0011944775889162196, "loss": 0.8656, "step": 10353 }, { "epoch": 0.2778553027050236, "grad_norm": 0.2314453125, "learning_rate": 0.0011944737842461929, "loss": 0.85, "step": 10354 }, { "epoch": 0.27788213825676256, "grad_norm": 0.2275390625, "learning_rate": 0.0011944699782720654, "loss": 0.807, "step": 10355 }, { "epoch": 0.2779089738085015, "grad_norm": 0.2353515625, "learning_rate": 0.001194466170993846, "loss": 0.796, "step": 10356 }, { "epoch": 0.2779358093602404, "grad_norm": 0.2294921875, "learning_rate": 0.0011944623624115425, "loss": 0.8064, "step": 10357 }, { "epoch": 0.2779626449119794, "grad_norm": 0.232421875, "learning_rate": 0.0011944585525251632, "loss": 0.7652, "step": 10358 }, { "epoch": 0.27798948046371835, "grad_norm": 0.240234375, "learning_rate": 0.001194454741334717, "loss": 0.8433, "step": 10359 }, { "epoch": 0.2780163160154573, "grad_norm": 0.2109375, "learning_rate": 0.0011944509288402123, "loss": 0.7396, "step": 10360 }, { "epoch": 0.2780431515671962, "grad_norm": 0.2294921875, "learning_rate": 0.001194447115041657, "loss": 0.7991, "step": 10361 }, { "epoch": 0.27806998711893516, "grad_norm": 0.24609375, "learning_rate": 0.0011944432999390594, "loss": 0.7528, "step": 10362 }, { "epoch": 0.2780968226706741, "grad_norm": 0.224609375, "learning_rate": 0.0011944394835324281, "loss": 0.7126, "step": 10363 }, { "epoch": 0.27812365822241303, "grad_norm": 0.2294921875, "learning_rate": 0.0011944356658217716, "loss": 0.8352, "step": 10364 }, { "epoch": 0.278150493774152, "grad_norm": 0.2099609375, "learning_rate": 0.0011944318468070979, "loss": 0.7364, "step": 10365 }, { "epoch": 0.27817732932589095, "grad_norm": 0.2177734375, "learning_rate": 0.0011944280264884158, "loss": 0.7232, "step": 10366 }, { "epoch": 0.2782041648776299, "grad_norm": 0.2138671875, "learning_rate": 0.0011944242048657337, "loss": 0.6992, "step": 10367 }, { "epoch": 0.2782310004293688, "grad_norm": 0.2197265625, "learning_rate": 0.0011944203819390594, "loss": 0.7907, "step": 10368 }, { "epoch": 0.27825783598110776, "grad_norm": 0.208984375, "learning_rate": 0.0011944165577084018, "loss": 0.639, "step": 10369 }, { "epoch": 0.2782846715328467, "grad_norm": 0.2138671875, "learning_rate": 0.001194412732173769, "loss": 0.7361, "step": 10370 }, { "epoch": 0.2783115070845857, "grad_norm": 0.2255859375, "learning_rate": 0.0011944089053351698, "loss": 0.8365, "step": 10371 }, { "epoch": 0.2783383426363246, "grad_norm": 0.2177734375, "learning_rate": 0.0011944050771926123, "loss": 0.7766, "step": 10372 }, { "epoch": 0.27836517818806356, "grad_norm": 0.21484375, "learning_rate": 0.0011944012477461048, "loss": 0.7123, "step": 10373 }, { "epoch": 0.2783920137398025, "grad_norm": 0.224609375, "learning_rate": 0.001194397416995656, "loss": 0.8069, "step": 10374 }, { "epoch": 0.2784188492915414, "grad_norm": 0.2314453125, "learning_rate": 0.0011943935849412742, "loss": 0.8738, "step": 10375 }, { "epoch": 0.27844568484328036, "grad_norm": 0.2216796875, "learning_rate": 0.0011943897515829674, "loss": 0.7842, "step": 10376 }, { "epoch": 0.2784725203950193, "grad_norm": 0.2060546875, "learning_rate": 0.0011943859169207447, "loss": 0.7403, "step": 10377 }, { "epoch": 0.2784993559467583, "grad_norm": 0.22265625, "learning_rate": 0.001194382080954614, "loss": 0.7853, "step": 10378 }, { "epoch": 0.2785261914984972, "grad_norm": 0.228515625, "learning_rate": 0.0011943782436845839, "loss": 0.8468, "step": 10379 }, { "epoch": 0.27855302705023616, "grad_norm": 0.2138671875, "learning_rate": 0.001194374405110663, "loss": 0.775, "step": 10380 }, { "epoch": 0.2785798626019751, "grad_norm": 0.2158203125, "learning_rate": 0.0011943705652328592, "loss": 0.7705, "step": 10381 }, { "epoch": 0.278606698153714, "grad_norm": 0.2265625, "learning_rate": 0.0011943667240511815, "loss": 0.8365, "step": 10382 }, { "epoch": 0.27863353370545296, "grad_norm": 0.244140625, "learning_rate": 0.0011943628815656378, "loss": 0.7824, "step": 10383 }, { "epoch": 0.27866036925719195, "grad_norm": 0.23046875, "learning_rate": 0.001194359037776237, "loss": 0.7944, "step": 10384 }, { "epoch": 0.2786872048089309, "grad_norm": 0.228515625, "learning_rate": 0.0011943551926829874, "loss": 0.8207, "step": 10385 }, { "epoch": 0.2787140403606698, "grad_norm": 0.2294921875, "learning_rate": 0.0011943513462858973, "loss": 0.8523, "step": 10386 }, { "epoch": 0.27874087591240876, "grad_norm": 0.2197265625, "learning_rate": 0.0011943474985849753, "loss": 0.7926, "step": 10387 }, { "epoch": 0.2787677114641477, "grad_norm": 0.2314453125, "learning_rate": 0.0011943436495802293, "loss": 0.8488, "step": 10388 }, { "epoch": 0.27879454701588663, "grad_norm": 0.2236328125, "learning_rate": 0.0011943397992716686, "loss": 0.801, "step": 10389 }, { "epoch": 0.27882138256762556, "grad_norm": 0.2275390625, "learning_rate": 0.0011943359476593012, "loss": 0.8578, "step": 10390 }, { "epoch": 0.27884821811936455, "grad_norm": 0.2412109375, "learning_rate": 0.0011943320947431356, "loss": 0.8524, "step": 10391 }, { "epoch": 0.2788750536711035, "grad_norm": 0.2255859375, "learning_rate": 0.00119432824052318, "loss": 0.7569, "step": 10392 }, { "epoch": 0.2789018892228424, "grad_norm": 0.2236328125, "learning_rate": 0.0011943243849994434, "loss": 0.8413, "step": 10393 }, { "epoch": 0.27892872477458136, "grad_norm": 0.2294921875, "learning_rate": 0.001194320528171934, "loss": 0.9003, "step": 10394 }, { "epoch": 0.2789555603263203, "grad_norm": 0.2373046875, "learning_rate": 0.00119431667004066, "loss": 0.7774, "step": 10395 }, { "epoch": 0.27898239587805923, "grad_norm": 0.2216796875, "learning_rate": 0.00119431281060563, "loss": 0.7836, "step": 10396 }, { "epoch": 0.2790092314297982, "grad_norm": 0.2119140625, "learning_rate": 0.0011943089498668528, "loss": 0.7192, "step": 10397 }, { "epoch": 0.27903606698153716, "grad_norm": 0.224609375, "learning_rate": 0.0011943050878243365, "loss": 0.7625, "step": 10398 }, { "epoch": 0.2790629025332761, "grad_norm": 0.212890625, "learning_rate": 0.0011943012244780894, "loss": 0.7338, "step": 10399 }, { "epoch": 0.279089738085015, "grad_norm": 0.228515625, "learning_rate": 0.0011942973598281206, "loss": 0.7734, "step": 10400 }, { "epoch": 0.27911657363675396, "grad_norm": 0.2373046875, "learning_rate": 0.001194293493874438, "loss": 0.8984, "step": 10401 }, { "epoch": 0.2791434091884929, "grad_norm": 0.2060546875, "learning_rate": 0.0011942896266170506, "loss": 0.705, "step": 10402 }, { "epoch": 0.27917024474023183, "grad_norm": 0.2119140625, "learning_rate": 0.0011942857580559665, "loss": 0.6732, "step": 10403 }, { "epoch": 0.2791970802919708, "grad_norm": 0.208984375, "learning_rate": 0.0011942818881911944, "loss": 0.693, "step": 10404 }, { "epoch": 0.27922391584370976, "grad_norm": 0.2197265625, "learning_rate": 0.0011942780170227424, "loss": 0.7647, "step": 10405 }, { "epoch": 0.2792507513954487, "grad_norm": 0.2236328125, "learning_rate": 0.0011942741445506195, "loss": 0.7813, "step": 10406 }, { "epoch": 0.27927758694718763, "grad_norm": 0.228515625, "learning_rate": 0.0011942702707748339, "loss": 0.804, "step": 10407 }, { "epoch": 0.27930442249892656, "grad_norm": 0.2294921875, "learning_rate": 0.001194266395695394, "loss": 0.8927, "step": 10408 }, { "epoch": 0.2793312580506655, "grad_norm": 0.2255859375, "learning_rate": 0.0011942625193123088, "loss": 0.8115, "step": 10409 }, { "epoch": 0.2793580936024045, "grad_norm": 0.203125, "learning_rate": 0.0011942586416255861, "loss": 0.6759, "step": 10410 }, { "epoch": 0.2793849291541434, "grad_norm": 0.220703125, "learning_rate": 0.001194254762635235, "loss": 0.706, "step": 10411 }, { "epoch": 0.27941176470588236, "grad_norm": 0.2158203125, "learning_rate": 0.0011942508823412636, "loss": 0.7395, "step": 10412 }, { "epoch": 0.2794386002576213, "grad_norm": 0.21875, "learning_rate": 0.0011942470007436808, "loss": 0.7865, "step": 10413 }, { "epoch": 0.27946543580936023, "grad_norm": 0.2265625, "learning_rate": 0.0011942431178424949, "loss": 0.8352, "step": 10414 }, { "epoch": 0.27949227136109916, "grad_norm": 0.2216796875, "learning_rate": 0.0011942392336377144, "loss": 0.7646, "step": 10415 }, { "epoch": 0.27951910691283816, "grad_norm": 0.2021484375, "learning_rate": 0.001194235348129348, "loss": 0.68, "step": 10416 }, { "epoch": 0.2795459424645771, "grad_norm": 0.208984375, "learning_rate": 0.0011942314613174036, "loss": 0.6964, "step": 10417 }, { "epoch": 0.279572778016316, "grad_norm": 0.2333984375, "learning_rate": 0.0011942275732018904, "loss": 0.8208, "step": 10418 }, { "epoch": 0.27959961356805496, "grad_norm": 0.236328125, "learning_rate": 0.001194223683782817, "loss": 0.9014, "step": 10419 }, { "epoch": 0.2796264491197939, "grad_norm": 0.21875, "learning_rate": 0.0011942197930601914, "loss": 0.785, "step": 10420 }, { "epoch": 0.27965328467153283, "grad_norm": 0.224609375, "learning_rate": 0.0011942159010340226, "loss": 0.8038, "step": 10421 }, { "epoch": 0.27968012022327177, "grad_norm": 0.22265625, "learning_rate": 0.0011942120077043186, "loss": 0.8242, "step": 10422 }, { "epoch": 0.27970695577501076, "grad_norm": 0.2119140625, "learning_rate": 0.0011942081130710885, "loss": 0.7112, "step": 10423 }, { "epoch": 0.2797337913267497, "grad_norm": 0.2216796875, "learning_rate": 0.0011942042171343406, "loss": 0.786, "step": 10424 }, { "epoch": 0.2797606268784886, "grad_norm": 0.2265625, "learning_rate": 0.0011942003198940835, "loss": 0.8064, "step": 10425 }, { "epoch": 0.27978746243022756, "grad_norm": 0.224609375, "learning_rate": 0.0011941964213503257, "loss": 0.8102, "step": 10426 }, { "epoch": 0.2798142979819665, "grad_norm": 0.2177734375, "learning_rate": 0.0011941925215030757, "loss": 0.7992, "step": 10427 }, { "epoch": 0.27984113353370543, "grad_norm": 0.2177734375, "learning_rate": 0.0011941886203523423, "loss": 0.7415, "step": 10428 }, { "epoch": 0.2798679690854444, "grad_norm": 0.228515625, "learning_rate": 0.0011941847178981336, "loss": 0.8157, "step": 10429 }, { "epoch": 0.27989480463718336, "grad_norm": 0.23046875, "learning_rate": 0.0011941808141404586, "loss": 0.7771, "step": 10430 }, { "epoch": 0.2799216401889223, "grad_norm": 0.23046875, "learning_rate": 0.0011941769090793258, "loss": 0.847, "step": 10431 }, { "epoch": 0.27994847574066123, "grad_norm": 0.216796875, "learning_rate": 0.0011941730027147436, "loss": 0.7707, "step": 10432 }, { "epoch": 0.27997531129240016, "grad_norm": 0.216796875, "learning_rate": 0.0011941690950467206, "loss": 0.7483, "step": 10433 }, { "epoch": 0.2800021468441391, "grad_norm": 0.208984375, "learning_rate": 0.0011941651860752653, "loss": 0.7324, "step": 10434 }, { "epoch": 0.28002898239587803, "grad_norm": 0.21875, "learning_rate": 0.0011941612758003864, "loss": 0.7445, "step": 10435 }, { "epoch": 0.280055817947617, "grad_norm": 0.2333984375, "learning_rate": 0.0011941573642220925, "loss": 0.8509, "step": 10436 }, { "epoch": 0.28008265349935596, "grad_norm": 0.216796875, "learning_rate": 0.0011941534513403925, "loss": 0.7587, "step": 10437 }, { "epoch": 0.2801094890510949, "grad_norm": 0.2314453125, "learning_rate": 0.0011941495371552942, "loss": 0.7709, "step": 10438 }, { "epoch": 0.28013632460283383, "grad_norm": 0.2294921875, "learning_rate": 0.001194145621666807, "loss": 0.7979, "step": 10439 }, { "epoch": 0.28016316015457277, "grad_norm": 0.2216796875, "learning_rate": 0.0011941417048749387, "loss": 0.7855, "step": 10440 }, { "epoch": 0.2801899957063117, "grad_norm": 0.2255859375, "learning_rate": 0.0011941377867796985, "loss": 0.8648, "step": 10441 }, { "epoch": 0.2802168312580507, "grad_norm": 0.2099609375, "learning_rate": 0.0011941338673810947, "loss": 0.7099, "step": 10442 }, { "epoch": 0.2802436668097896, "grad_norm": 0.2412109375, "learning_rate": 0.001194129946679136, "loss": 0.9191, "step": 10443 }, { "epoch": 0.28027050236152856, "grad_norm": 0.2236328125, "learning_rate": 0.0011941260246738311, "loss": 0.7924, "step": 10444 }, { "epoch": 0.2802973379132675, "grad_norm": 0.2294921875, "learning_rate": 0.0011941221013651883, "loss": 0.7718, "step": 10445 }, { "epoch": 0.28032417346500643, "grad_norm": 0.2294921875, "learning_rate": 0.0011941181767532165, "loss": 0.8084, "step": 10446 }, { "epoch": 0.28035100901674537, "grad_norm": 0.2275390625, "learning_rate": 0.0011941142508379245, "loss": 0.8199, "step": 10447 }, { "epoch": 0.2803778445684843, "grad_norm": 0.224609375, "learning_rate": 0.0011941103236193203, "loss": 0.7998, "step": 10448 }, { "epoch": 0.2804046801202233, "grad_norm": 0.2216796875, "learning_rate": 0.0011941063950974128, "loss": 0.7864, "step": 10449 }, { "epoch": 0.28043151567196223, "grad_norm": 0.208984375, "learning_rate": 0.001194102465272211, "loss": 0.7079, "step": 10450 }, { "epoch": 0.28045835122370116, "grad_norm": 0.2216796875, "learning_rate": 0.001194098534143723, "loss": 0.718, "step": 10451 }, { "epoch": 0.2804851867754401, "grad_norm": 0.2099609375, "learning_rate": 0.0011940946017119575, "loss": 0.662, "step": 10452 }, { "epoch": 0.28051202232717903, "grad_norm": 0.2294921875, "learning_rate": 0.0011940906679769232, "loss": 0.8391, "step": 10453 }, { "epoch": 0.28053885787891797, "grad_norm": 0.21875, "learning_rate": 0.0011940867329386289, "loss": 0.7994, "step": 10454 }, { "epoch": 0.28056569343065696, "grad_norm": 0.2392578125, "learning_rate": 0.001194082796597083, "loss": 0.8449, "step": 10455 }, { "epoch": 0.2805925289823959, "grad_norm": 0.2255859375, "learning_rate": 0.0011940788589522942, "loss": 0.791, "step": 10456 }, { "epoch": 0.28061936453413483, "grad_norm": 0.2236328125, "learning_rate": 0.0011940749200042714, "loss": 0.7069, "step": 10457 }, { "epoch": 0.28064620008587376, "grad_norm": 0.2314453125, "learning_rate": 0.0011940709797530226, "loss": 0.7763, "step": 10458 }, { "epoch": 0.2806730356376127, "grad_norm": 0.244140625, "learning_rate": 0.0011940670381985572, "loss": 0.9355, "step": 10459 }, { "epoch": 0.28069987118935164, "grad_norm": 0.22265625, "learning_rate": 0.0011940630953408834, "loss": 0.7508, "step": 10460 }, { "epoch": 0.28072670674109057, "grad_norm": 0.2333984375, "learning_rate": 0.0011940591511800098, "loss": 0.8982, "step": 10461 }, { "epoch": 0.28075354229282956, "grad_norm": 0.234375, "learning_rate": 0.0011940552057159454, "loss": 0.8348, "step": 10462 }, { "epoch": 0.2807803778445685, "grad_norm": 0.232421875, "learning_rate": 0.0011940512589486986, "loss": 0.7873, "step": 10463 }, { "epoch": 0.28080721339630743, "grad_norm": 0.2392578125, "learning_rate": 0.001194047310878278, "loss": 0.8673, "step": 10464 }, { "epoch": 0.28083404894804637, "grad_norm": 0.232421875, "learning_rate": 0.0011940433615046924, "loss": 0.7846, "step": 10465 }, { "epoch": 0.2808608844997853, "grad_norm": 0.216796875, "learning_rate": 0.0011940394108279507, "loss": 0.723, "step": 10466 }, { "epoch": 0.28088772005152424, "grad_norm": 0.228515625, "learning_rate": 0.001194035458848061, "loss": 0.7371, "step": 10467 }, { "epoch": 0.2809145556032632, "grad_norm": 0.22265625, "learning_rate": 0.0011940315055650322, "loss": 0.794, "step": 10468 }, { "epoch": 0.28094139115500216, "grad_norm": 0.228515625, "learning_rate": 0.0011940275509788733, "loss": 0.8102, "step": 10469 }, { "epoch": 0.2809682267067411, "grad_norm": 0.2236328125, "learning_rate": 0.0011940235950895926, "loss": 0.827, "step": 10470 }, { "epoch": 0.28099506225848003, "grad_norm": 0.228515625, "learning_rate": 0.001194019637897199, "loss": 0.7837, "step": 10471 }, { "epoch": 0.28102189781021897, "grad_norm": 0.2294921875, "learning_rate": 0.001194015679401701, "loss": 0.8342, "step": 10472 }, { "epoch": 0.2810487333619579, "grad_norm": 0.236328125, "learning_rate": 0.0011940117196031074, "loss": 0.8793, "step": 10473 }, { "epoch": 0.28107556891369684, "grad_norm": 0.234375, "learning_rate": 0.0011940077585014266, "loss": 0.8816, "step": 10474 }, { "epoch": 0.28110240446543583, "grad_norm": 0.2119140625, "learning_rate": 0.0011940037960966679, "loss": 0.6808, "step": 10475 }, { "epoch": 0.28112924001717476, "grad_norm": 0.2177734375, "learning_rate": 0.0011939998323888396, "loss": 0.8076, "step": 10476 }, { "epoch": 0.2811560755689137, "grad_norm": 0.224609375, "learning_rate": 0.00119399586737795, "loss": 0.7957, "step": 10477 }, { "epoch": 0.28118291112065263, "grad_norm": 0.22265625, "learning_rate": 0.0011939919010640087, "loss": 0.7744, "step": 10478 }, { "epoch": 0.28120974667239157, "grad_norm": 0.20703125, "learning_rate": 0.0011939879334470238, "loss": 0.7141, "step": 10479 }, { "epoch": 0.2812365822241305, "grad_norm": 0.22265625, "learning_rate": 0.001193983964527004, "loss": 0.7982, "step": 10480 }, { "epoch": 0.2812634177758695, "grad_norm": 0.2197265625, "learning_rate": 0.0011939799943039584, "loss": 0.8139, "step": 10481 }, { "epoch": 0.28129025332760843, "grad_norm": 0.232421875, "learning_rate": 0.0011939760227778953, "loss": 0.8255, "step": 10482 }, { "epoch": 0.28131708887934737, "grad_norm": 0.2158203125, "learning_rate": 0.0011939720499488232, "loss": 0.7449, "step": 10483 }, { "epoch": 0.2813439244310863, "grad_norm": 0.216796875, "learning_rate": 0.0011939680758167516, "loss": 0.7164, "step": 10484 }, { "epoch": 0.28137075998282524, "grad_norm": 0.21875, "learning_rate": 0.0011939641003816887, "loss": 0.7703, "step": 10485 }, { "epoch": 0.28139759553456417, "grad_norm": 0.220703125, "learning_rate": 0.0011939601236436433, "loss": 0.7529, "step": 10486 }, { "epoch": 0.28142443108630316, "grad_norm": 0.2275390625, "learning_rate": 0.001193956145602624, "loss": 0.7798, "step": 10487 }, { "epoch": 0.2814512666380421, "grad_norm": 0.232421875, "learning_rate": 0.00119395216625864, "loss": 0.8231, "step": 10488 }, { "epoch": 0.28147810218978103, "grad_norm": 0.2294921875, "learning_rate": 0.0011939481856116992, "loss": 0.8731, "step": 10489 }, { "epoch": 0.28150493774151997, "grad_norm": 0.2265625, "learning_rate": 0.0011939442036618111, "loss": 0.8038, "step": 10490 }, { "epoch": 0.2815317732932589, "grad_norm": 0.2060546875, "learning_rate": 0.0011939402204089844, "loss": 0.7074, "step": 10491 }, { "epoch": 0.28155860884499784, "grad_norm": 0.2001953125, "learning_rate": 0.0011939362358532272, "loss": 0.6609, "step": 10492 }, { "epoch": 0.2815854443967368, "grad_norm": 0.2236328125, "learning_rate": 0.0011939322499945487, "loss": 0.7213, "step": 10493 }, { "epoch": 0.28161227994847576, "grad_norm": 0.2138671875, "learning_rate": 0.0011939282628329578, "loss": 0.6887, "step": 10494 }, { "epoch": 0.2816391155002147, "grad_norm": 0.21875, "learning_rate": 0.001193924274368463, "loss": 0.7512, "step": 10495 }, { "epoch": 0.28166595105195363, "grad_norm": 0.2138671875, "learning_rate": 0.001193920284601073, "loss": 0.7715, "step": 10496 }, { "epoch": 0.28169278660369257, "grad_norm": 0.23046875, "learning_rate": 0.0011939162935307964, "loss": 0.8344, "step": 10497 }, { "epoch": 0.2817196221554315, "grad_norm": 0.234375, "learning_rate": 0.0011939123011576423, "loss": 0.7925, "step": 10498 }, { "epoch": 0.28174645770717044, "grad_norm": 0.2236328125, "learning_rate": 0.0011939083074816194, "loss": 0.7832, "step": 10499 }, { "epoch": 0.28177329325890943, "grad_norm": 0.228515625, "learning_rate": 0.0011939043125027363, "loss": 0.7844, "step": 10500 }, { "epoch": 0.28180012881064836, "grad_norm": 0.1982421875, "learning_rate": 0.0011939003162210022, "loss": 0.6781, "step": 10501 }, { "epoch": 0.2818269643623873, "grad_norm": 0.224609375, "learning_rate": 0.0011938963186364255, "loss": 0.6903, "step": 10502 }, { "epoch": 0.28185379991412624, "grad_norm": 0.21484375, "learning_rate": 0.0011938923197490146, "loss": 0.7557, "step": 10503 }, { "epoch": 0.28188063546586517, "grad_norm": 0.2236328125, "learning_rate": 0.001193888319558779, "loss": 0.7815, "step": 10504 }, { "epoch": 0.2819074710176041, "grad_norm": 0.228515625, "learning_rate": 0.001193884318065727, "loss": 0.7788, "step": 10505 }, { "epoch": 0.28193430656934304, "grad_norm": 0.2392578125, "learning_rate": 0.0011938803152698675, "loss": 0.8331, "step": 10506 }, { "epoch": 0.28196114212108203, "grad_norm": 0.234375, "learning_rate": 0.0011938763111712096, "loss": 0.8738, "step": 10507 }, { "epoch": 0.28198797767282097, "grad_norm": 0.234375, "learning_rate": 0.0011938723057697614, "loss": 0.8422, "step": 10508 }, { "epoch": 0.2820148132245599, "grad_norm": 0.2255859375, "learning_rate": 0.0011938682990655324, "loss": 0.7974, "step": 10509 }, { "epoch": 0.28204164877629884, "grad_norm": 0.2275390625, "learning_rate": 0.001193864291058531, "loss": 0.7681, "step": 10510 }, { "epoch": 0.28206848432803777, "grad_norm": 0.234375, "learning_rate": 0.001193860281748766, "loss": 0.9045, "step": 10511 }, { "epoch": 0.2820953198797767, "grad_norm": 0.228515625, "learning_rate": 0.0011938562711362464, "loss": 0.8587, "step": 10512 }, { "epoch": 0.2821221554315157, "grad_norm": 0.216796875, "learning_rate": 0.0011938522592209806, "loss": 0.8072, "step": 10513 }, { "epoch": 0.28214899098325463, "grad_norm": 0.22265625, "learning_rate": 0.0011938482460029777, "loss": 0.7533, "step": 10514 }, { "epoch": 0.28217582653499357, "grad_norm": 0.2255859375, "learning_rate": 0.0011938442314822465, "loss": 0.8059, "step": 10515 }, { "epoch": 0.2822026620867325, "grad_norm": 0.2099609375, "learning_rate": 0.001193840215658796, "loss": 0.7144, "step": 10516 }, { "epoch": 0.28222949763847144, "grad_norm": 0.20703125, "learning_rate": 0.0011938361985326345, "loss": 0.6792, "step": 10517 }, { "epoch": 0.2822563331902104, "grad_norm": 0.2158203125, "learning_rate": 0.001193832180103771, "loss": 0.7131, "step": 10518 }, { "epoch": 0.2822831687419493, "grad_norm": 0.2265625, "learning_rate": 0.0011938281603722147, "loss": 0.8084, "step": 10519 }, { "epoch": 0.2823100042936883, "grad_norm": 0.220703125, "learning_rate": 0.0011938241393379741, "loss": 0.7624, "step": 10520 }, { "epoch": 0.28233683984542723, "grad_norm": 0.2119140625, "learning_rate": 0.0011938201170010579, "loss": 0.7168, "step": 10521 }, { "epoch": 0.28236367539716617, "grad_norm": 0.23046875, "learning_rate": 0.0011938160933614753, "loss": 0.7701, "step": 10522 }, { "epoch": 0.2823905109489051, "grad_norm": 0.2451171875, "learning_rate": 0.0011938120684192345, "loss": 0.9325, "step": 10523 }, { "epoch": 0.28241734650064404, "grad_norm": 0.2158203125, "learning_rate": 0.001193808042174345, "loss": 0.7615, "step": 10524 }, { "epoch": 0.282444182052383, "grad_norm": 0.224609375, "learning_rate": 0.0011938040146268153, "loss": 0.8114, "step": 10525 }, { "epoch": 0.28247101760412197, "grad_norm": 0.2275390625, "learning_rate": 0.0011937999857766543, "loss": 0.8044, "step": 10526 }, { "epoch": 0.2824978531558609, "grad_norm": 0.2265625, "learning_rate": 0.001193795955623871, "loss": 0.8119, "step": 10527 }, { "epoch": 0.28252468870759984, "grad_norm": 0.23828125, "learning_rate": 0.0011937919241684736, "loss": 0.8458, "step": 10528 }, { "epoch": 0.28255152425933877, "grad_norm": 0.2392578125, "learning_rate": 0.0011937878914104717, "loss": 0.8214, "step": 10529 }, { "epoch": 0.2825783598110777, "grad_norm": 0.2314453125, "learning_rate": 0.0011937838573498737, "loss": 0.8162, "step": 10530 }, { "epoch": 0.28260519536281664, "grad_norm": 0.2392578125, "learning_rate": 0.001193779821986689, "loss": 0.8867, "step": 10531 }, { "epoch": 0.2826320309145556, "grad_norm": 0.23046875, "learning_rate": 0.001193775785320926, "loss": 0.8022, "step": 10532 }, { "epoch": 0.28265886646629457, "grad_norm": 0.22265625, "learning_rate": 0.0011937717473525932, "loss": 0.7576, "step": 10533 }, { "epoch": 0.2826857020180335, "grad_norm": 0.232421875, "learning_rate": 0.0011937677080817001, "loss": 0.753, "step": 10534 }, { "epoch": 0.28271253756977244, "grad_norm": 0.2265625, "learning_rate": 0.0011937636675082555, "loss": 0.8, "step": 10535 }, { "epoch": 0.2827393731215114, "grad_norm": 0.2060546875, "learning_rate": 0.001193759625632268, "loss": 0.697, "step": 10536 }, { "epoch": 0.2827662086732503, "grad_norm": 0.224609375, "learning_rate": 0.0011937555824537464, "loss": 0.8435, "step": 10537 }, { "epoch": 0.28279304422498924, "grad_norm": 0.2265625, "learning_rate": 0.0011937515379727, "loss": 0.8186, "step": 10538 }, { "epoch": 0.28281987977672823, "grad_norm": 0.2099609375, "learning_rate": 0.0011937474921891373, "loss": 0.6555, "step": 10539 }, { "epoch": 0.28284671532846717, "grad_norm": 0.2177734375, "learning_rate": 0.0011937434451030672, "loss": 0.7769, "step": 10540 }, { "epoch": 0.2828735508802061, "grad_norm": 0.2373046875, "learning_rate": 0.0011937393967144988, "loss": 0.8604, "step": 10541 }, { "epoch": 0.28290038643194504, "grad_norm": 0.2158203125, "learning_rate": 0.0011937353470234406, "loss": 0.769, "step": 10542 }, { "epoch": 0.282927221983684, "grad_norm": 0.2021484375, "learning_rate": 0.0011937312960299022, "loss": 0.6855, "step": 10543 }, { "epoch": 0.2829540575354229, "grad_norm": 0.212890625, "learning_rate": 0.0011937272437338915, "loss": 0.7087, "step": 10544 }, { "epoch": 0.28298089308716184, "grad_norm": 0.2216796875, "learning_rate": 0.001193723190135418, "loss": 0.7879, "step": 10545 }, { "epoch": 0.28300772863890084, "grad_norm": 0.251953125, "learning_rate": 0.0011937191352344907, "loss": 0.96, "step": 10546 }, { "epoch": 0.28303456419063977, "grad_norm": 0.201171875, "learning_rate": 0.0011937150790311184, "loss": 0.6214, "step": 10547 }, { "epoch": 0.2830613997423787, "grad_norm": 0.228515625, "learning_rate": 0.0011937110215253096, "loss": 0.8361, "step": 10548 }, { "epoch": 0.28308823529411764, "grad_norm": 0.2197265625, "learning_rate": 0.0011937069627170737, "loss": 0.7583, "step": 10549 }, { "epoch": 0.2831150708458566, "grad_norm": 0.248046875, "learning_rate": 0.0011937029026064191, "loss": 0.8969, "step": 10550 }, { "epoch": 0.2831419063975955, "grad_norm": 0.2236328125, "learning_rate": 0.0011936988411933552, "loss": 0.8, "step": 10551 }, { "epoch": 0.2831687419493345, "grad_norm": 0.2392578125, "learning_rate": 0.0011936947784778908, "loss": 0.8489, "step": 10552 }, { "epoch": 0.28319557750107344, "grad_norm": 0.22265625, "learning_rate": 0.0011936907144600346, "loss": 0.8327, "step": 10553 }, { "epoch": 0.28322241305281237, "grad_norm": 0.22265625, "learning_rate": 0.0011936866491397955, "loss": 0.7738, "step": 10554 }, { "epoch": 0.2832492486045513, "grad_norm": 0.228515625, "learning_rate": 0.0011936825825171829, "loss": 0.8176, "step": 10555 }, { "epoch": 0.28327608415629024, "grad_norm": 0.228515625, "learning_rate": 0.001193678514592205, "loss": 0.808, "step": 10556 }, { "epoch": 0.2833029197080292, "grad_norm": 0.2265625, "learning_rate": 0.0011936744453648715, "loss": 0.796, "step": 10557 }, { "epoch": 0.28332975525976817, "grad_norm": 0.2236328125, "learning_rate": 0.0011936703748351906, "loss": 0.789, "step": 10558 }, { "epoch": 0.2833565908115071, "grad_norm": 0.228515625, "learning_rate": 0.001193666303003172, "loss": 0.8199, "step": 10559 }, { "epoch": 0.28338342636324604, "grad_norm": 0.2275390625, "learning_rate": 0.0011936622298688237, "loss": 0.8, "step": 10560 }, { "epoch": 0.283410261914985, "grad_norm": 0.2099609375, "learning_rate": 0.0011936581554321554, "loss": 0.7169, "step": 10561 }, { "epoch": 0.2834370974667239, "grad_norm": 0.2421875, "learning_rate": 0.0011936540796931756, "loss": 0.9373, "step": 10562 }, { "epoch": 0.28346393301846284, "grad_norm": 0.2333984375, "learning_rate": 0.0011936500026518936, "loss": 0.8225, "step": 10563 }, { "epoch": 0.2834907685702018, "grad_norm": 0.21484375, "learning_rate": 0.001193645924308318, "loss": 0.7095, "step": 10564 }, { "epoch": 0.28351760412194077, "grad_norm": 0.216796875, "learning_rate": 0.0011936418446624579, "loss": 0.7246, "step": 10565 }, { "epoch": 0.2835444396736797, "grad_norm": 0.2412109375, "learning_rate": 0.001193637763714322, "loss": 0.8706, "step": 10566 }, { "epoch": 0.28357127522541864, "grad_norm": 0.232421875, "learning_rate": 0.00119363368146392, "loss": 0.7531, "step": 10567 }, { "epoch": 0.2835981107771576, "grad_norm": 0.2197265625, "learning_rate": 0.00119362959791126, "loss": 0.7428, "step": 10568 }, { "epoch": 0.2836249463288965, "grad_norm": 0.2236328125, "learning_rate": 0.0011936255130563513, "loss": 0.7157, "step": 10569 }, { "epoch": 0.28365178188063545, "grad_norm": 0.220703125, "learning_rate": 0.0011936214268992032, "loss": 0.731, "step": 10570 }, { "epoch": 0.28367861743237444, "grad_norm": 0.2236328125, "learning_rate": 0.001193617339439824, "loss": 0.7445, "step": 10571 }, { "epoch": 0.28370545298411337, "grad_norm": 0.2216796875, "learning_rate": 0.0011936132506782232, "loss": 0.8023, "step": 10572 }, { "epoch": 0.2837322885358523, "grad_norm": 0.2353515625, "learning_rate": 0.0011936091606144096, "loss": 0.8389, "step": 10573 }, { "epoch": 0.28375912408759124, "grad_norm": 0.224609375, "learning_rate": 0.001193605069248392, "loss": 0.7629, "step": 10574 }, { "epoch": 0.2837859596393302, "grad_norm": 0.2236328125, "learning_rate": 0.0011936009765801795, "loss": 0.8307, "step": 10575 }, { "epoch": 0.2838127951910691, "grad_norm": 0.2275390625, "learning_rate": 0.0011935968826097812, "loss": 0.8333, "step": 10576 }, { "epoch": 0.28383963074280805, "grad_norm": 0.234375, "learning_rate": 0.001193592787337206, "loss": 0.8173, "step": 10577 }, { "epoch": 0.28386646629454704, "grad_norm": 0.2265625, "learning_rate": 0.0011935886907624626, "loss": 0.8048, "step": 10578 }, { "epoch": 0.283893301846286, "grad_norm": 0.20703125, "learning_rate": 0.0011935845928855607, "loss": 0.7081, "step": 10579 }, { "epoch": 0.2839201373980249, "grad_norm": 0.2138671875, "learning_rate": 0.0011935804937065085, "loss": 0.7035, "step": 10580 }, { "epoch": 0.28394697294976384, "grad_norm": 0.2109375, "learning_rate": 0.0011935763932253154, "loss": 0.7306, "step": 10581 }, { "epoch": 0.2839738085015028, "grad_norm": 0.23046875, "learning_rate": 0.0011935722914419905, "loss": 0.7025, "step": 10582 }, { "epoch": 0.2840006440532417, "grad_norm": 0.212890625, "learning_rate": 0.0011935681883565423, "loss": 0.7575, "step": 10583 }, { "epoch": 0.2840274796049807, "grad_norm": 0.2177734375, "learning_rate": 0.0011935640839689805, "loss": 0.6916, "step": 10584 }, { "epoch": 0.28405431515671964, "grad_norm": 0.2099609375, "learning_rate": 0.0011935599782793133, "loss": 0.728, "step": 10585 }, { "epoch": 0.2840811507084586, "grad_norm": 0.236328125, "learning_rate": 0.0011935558712875504, "loss": 0.7745, "step": 10586 }, { "epoch": 0.2841079862601975, "grad_norm": 0.2333984375, "learning_rate": 0.0011935517629937007, "loss": 0.8234, "step": 10587 }, { "epoch": 0.28413482181193644, "grad_norm": 0.2197265625, "learning_rate": 0.0011935476533977726, "loss": 0.742, "step": 10588 }, { "epoch": 0.2841616573636754, "grad_norm": 0.2314453125, "learning_rate": 0.0011935435424997758, "loss": 0.7664, "step": 10589 }, { "epoch": 0.2841884929154143, "grad_norm": 0.2177734375, "learning_rate": 0.0011935394302997192, "loss": 0.6842, "step": 10590 }, { "epoch": 0.2842153284671533, "grad_norm": 0.2021484375, "learning_rate": 0.0011935353167976117, "loss": 0.6098, "step": 10591 }, { "epoch": 0.28424216401889224, "grad_norm": 0.21875, "learning_rate": 0.0011935312019934622, "loss": 0.7071, "step": 10592 }, { "epoch": 0.2842689995706312, "grad_norm": 0.2158203125, "learning_rate": 0.00119352708588728, "loss": 0.7074, "step": 10593 }, { "epoch": 0.2842958351223701, "grad_norm": 0.21875, "learning_rate": 0.0011935229684790739, "loss": 0.6893, "step": 10594 }, { "epoch": 0.28432267067410905, "grad_norm": 0.234375, "learning_rate": 0.0011935188497688531, "loss": 0.8225, "step": 10595 }, { "epoch": 0.284349506225848, "grad_norm": 0.2236328125, "learning_rate": 0.0011935147297566266, "loss": 0.7972, "step": 10596 }, { "epoch": 0.28437634177758697, "grad_norm": 0.220703125, "learning_rate": 0.0011935106084424033, "loss": 0.698, "step": 10597 }, { "epoch": 0.2844031773293259, "grad_norm": 0.2099609375, "learning_rate": 0.0011935064858261924, "loss": 0.7205, "step": 10598 }, { "epoch": 0.28443001288106484, "grad_norm": 0.2216796875, "learning_rate": 0.0011935023619080026, "loss": 0.7571, "step": 10599 }, { "epoch": 0.2844568484328038, "grad_norm": 0.224609375, "learning_rate": 0.0011934982366878437, "loss": 0.7859, "step": 10600 }, { "epoch": 0.2844836839845427, "grad_norm": 0.22265625, "learning_rate": 0.001193494110165724, "loss": 0.7559, "step": 10601 }, { "epoch": 0.28451051953628165, "grad_norm": 0.232421875, "learning_rate": 0.0011934899823416527, "loss": 0.7568, "step": 10602 }, { "epoch": 0.2845373550880206, "grad_norm": 0.21875, "learning_rate": 0.0011934858532156392, "loss": 0.7869, "step": 10603 }, { "epoch": 0.2845641906397596, "grad_norm": 0.2373046875, "learning_rate": 0.0011934817227876923, "loss": 0.8796, "step": 10604 }, { "epoch": 0.2845910261914985, "grad_norm": 0.2119140625, "learning_rate": 0.0011934775910578213, "loss": 0.7708, "step": 10605 }, { "epoch": 0.28461786174323744, "grad_norm": 0.220703125, "learning_rate": 0.0011934734580260347, "loss": 0.7788, "step": 10606 }, { "epoch": 0.2846446972949764, "grad_norm": 0.2255859375, "learning_rate": 0.0011934693236923422, "loss": 0.7589, "step": 10607 }, { "epoch": 0.2846715328467153, "grad_norm": 0.224609375, "learning_rate": 0.0011934651880567524, "loss": 0.7716, "step": 10608 }, { "epoch": 0.28469836839845425, "grad_norm": 0.2314453125, "learning_rate": 0.0011934610511192749, "loss": 0.8321, "step": 10609 }, { "epoch": 0.28472520395019324, "grad_norm": 0.22265625, "learning_rate": 0.0011934569128799182, "loss": 0.7554, "step": 10610 }, { "epoch": 0.2847520395019322, "grad_norm": 0.208984375, "learning_rate": 0.0011934527733386917, "loss": 0.6624, "step": 10611 }, { "epoch": 0.2847788750536711, "grad_norm": 0.2216796875, "learning_rate": 0.0011934486324956042, "loss": 0.7497, "step": 10612 }, { "epoch": 0.28480571060541005, "grad_norm": 0.2177734375, "learning_rate": 0.0011934444903506653, "loss": 0.7308, "step": 10613 }, { "epoch": 0.284832546157149, "grad_norm": 0.2421875, "learning_rate": 0.0011934403469038835, "loss": 0.8615, "step": 10614 }, { "epoch": 0.2848593817088879, "grad_norm": 0.2294921875, "learning_rate": 0.0011934362021552684, "loss": 0.762, "step": 10615 }, { "epoch": 0.2848862172606269, "grad_norm": 0.20703125, "learning_rate": 0.0011934320561048288, "loss": 0.6794, "step": 10616 }, { "epoch": 0.28491305281236584, "grad_norm": 0.23046875, "learning_rate": 0.0011934279087525738, "loss": 0.7715, "step": 10617 }, { "epoch": 0.2849398883641048, "grad_norm": 0.22265625, "learning_rate": 0.0011934237600985129, "loss": 0.7589, "step": 10618 }, { "epoch": 0.2849667239158437, "grad_norm": 0.2138671875, "learning_rate": 0.0011934196101426543, "loss": 0.7381, "step": 10619 }, { "epoch": 0.28499355946758265, "grad_norm": 0.216796875, "learning_rate": 0.001193415458885008, "loss": 0.7071, "step": 10620 }, { "epoch": 0.2850203950193216, "grad_norm": 0.2265625, "learning_rate": 0.0011934113063255829, "loss": 0.7354, "step": 10621 }, { "epoch": 0.2850472305710605, "grad_norm": 0.2275390625, "learning_rate": 0.0011934071524643877, "loss": 0.7613, "step": 10622 }, { "epoch": 0.2850740661227995, "grad_norm": 0.2236328125, "learning_rate": 0.0011934029973014319, "loss": 0.8089, "step": 10623 }, { "epoch": 0.28510090167453844, "grad_norm": 0.22265625, "learning_rate": 0.0011933988408367243, "loss": 0.7049, "step": 10624 }, { "epoch": 0.2851277372262774, "grad_norm": 0.2255859375, "learning_rate": 0.0011933946830702746, "loss": 0.8171, "step": 10625 }, { "epoch": 0.2851545727780163, "grad_norm": 0.216796875, "learning_rate": 0.0011933905240020914, "loss": 0.7182, "step": 10626 }, { "epoch": 0.28518140832975525, "grad_norm": 0.232421875, "learning_rate": 0.0011933863636321839, "loss": 0.7341, "step": 10627 }, { "epoch": 0.2852082438814942, "grad_norm": 0.228515625, "learning_rate": 0.0011933822019605611, "loss": 0.8127, "step": 10628 }, { "epoch": 0.2852350794332332, "grad_norm": 0.2099609375, "learning_rate": 0.0011933780389872326, "loss": 0.6737, "step": 10629 }, { "epoch": 0.2852619149849721, "grad_norm": 0.2177734375, "learning_rate": 0.0011933738747122071, "loss": 0.7029, "step": 10630 }, { "epoch": 0.28528875053671104, "grad_norm": 0.21875, "learning_rate": 0.0011933697091354941, "loss": 0.806, "step": 10631 }, { "epoch": 0.28531558608845, "grad_norm": 0.23046875, "learning_rate": 0.0011933655422571023, "loss": 0.8348, "step": 10632 }, { "epoch": 0.2853424216401889, "grad_norm": 0.21875, "learning_rate": 0.0011933613740770412, "loss": 0.7848, "step": 10633 }, { "epoch": 0.28536925719192785, "grad_norm": 0.2255859375, "learning_rate": 0.00119335720459532, "loss": 0.7606, "step": 10634 }, { "epoch": 0.2853960927436668, "grad_norm": 0.208984375, "learning_rate": 0.0011933530338119475, "loss": 0.7617, "step": 10635 }, { "epoch": 0.2854229282954058, "grad_norm": 0.22265625, "learning_rate": 0.001193348861726933, "loss": 0.7593, "step": 10636 }, { "epoch": 0.2854497638471447, "grad_norm": 0.205078125, "learning_rate": 0.0011933446883402856, "loss": 0.6828, "step": 10637 }, { "epoch": 0.28547659939888365, "grad_norm": 0.203125, "learning_rate": 0.0011933405136520146, "loss": 0.6496, "step": 10638 }, { "epoch": 0.2855034349506226, "grad_norm": 0.2119140625, "learning_rate": 0.0011933363376621289, "loss": 0.728, "step": 10639 }, { "epoch": 0.2855302705023615, "grad_norm": 0.2216796875, "learning_rate": 0.001193332160370638, "loss": 0.7745, "step": 10640 }, { "epoch": 0.28555710605410045, "grad_norm": 0.23828125, "learning_rate": 0.0011933279817775509, "loss": 0.8076, "step": 10641 }, { "epoch": 0.28558394160583944, "grad_norm": 0.22265625, "learning_rate": 0.0011933238018828765, "loss": 0.7529, "step": 10642 }, { "epoch": 0.2856107771575784, "grad_norm": 0.2314453125, "learning_rate": 0.0011933196206866247, "loss": 0.8252, "step": 10643 }, { "epoch": 0.2856376127093173, "grad_norm": 0.2490234375, "learning_rate": 0.001193315438188804, "loss": 0.8693, "step": 10644 }, { "epoch": 0.28566444826105625, "grad_norm": 0.220703125, "learning_rate": 0.0011933112543894239, "loss": 0.7791, "step": 10645 }, { "epoch": 0.2856912838127952, "grad_norm": 0.2099609375, "learning_rate": 0.0011933070692884931, "loss": 0.7336, "step": 10646 }, { "epoch": 0.2857181193645341, "grad_norm": 0.2265625, "learning_rate": 0.0011933028828860215, "loss": 0.8617, "step": 10647 }, { "epoch": 0.28574495491627305, "grad_norm": 0.25, "learning_rate": 0.0011932986951820178, "loss": 0.9016, "step": 10648 }, { "epoch": 0.28577179046801204, "grad_norm": 0.21875, "learning_rate": 0.0011932945061764913, "loss": 0.7865, "step": 10649 }, { "epoch": 0.285798626019751, "grad_norm": 0.248046875, "learning_rate": 0.001193290315869451, "loss": 0.8668, "step": 10650 }, { "epoch": 0.2858254615714899, "grad_norm": 0.208984375, "learning_rate": 0.0011932861242609067, "loss": 0.7192, "step": 10651 }, { "epoch": 0.28585229712322885, "grad_norm": 0.2333984375, "learning_rate": 0.0011932819313508671, "loss": 0.8329, "step": 10652 }, { "epoch": 0.2858791326749678, "grad_norm": 0.212890625, "learning_rate": 0.0011932777371393413, "loss": 0.7685, "step": 10653 }, { "epoch": 0.2859059682267067, "grad_norm": 0.2236328125, "learning_rate": 0.0011932735416263387, "loss": 0.7914, "step": 10654 }, { "epoch": 0.2859328037784457, "grad_norm": 0.2216796875, "learning_rate": 0.0011932693448118688, "loss": 0.7181, "step": 10655 }, { "epoch": 0.28595963933018465, "grad_norm": 0.2255859375, "learning_rate": 0.0011932651466959401, "loss": 0.7821, "step": 10656 }, { "epoch": 0.2859864748819236, "grad_norm": 0.220703125, "learning_rate": 0.0011932609472785625, "loss": 0.7252, "step": 10657 }, { "epoch": 0.2860133104336625, "grad_norm": 0.2265625, "learning_rate": 0.0011932567465597448, "loss": 0.8268, "step": 10658 }, { "epoch": 0.28604014598540145, "grad_norm": 0.2314453125, "learning_rate": 0.0011932525445394963, "loss": 0.7843, "step": 10659 }, { "epoch": 0.2860669815371404, "grad_norm": 0.21484375, "learning_rate": 0.0011932483412178262, "loss": 0.7823, "step": 10660 }, { "epoch": 0.2860938170888793, "grad_norm": 0.205078125, "learning_rate": 0.001193244136594744, "loss": 0.7044, "step": 10661 }, { "epoch": 0.2861206526406183, "grad_norm": 0.2216796875, "learning_rate": 0.0011932399306702585, "loss": 0.7749, "step": 10662 }, { "epoch": 0.28614748819235725, "grad_norm": 0.23828125, "learning_rate": 0.0011932357234443792, "loss": 0.8204, "step": 10663 }, { "epoch": 0.2861743237440962, "grad_norm": 0.2392578125, "learning_rate": 0.0011932315149171153, "loss": 0.7829, "step": 10664 }, { "epoch": 0.2862011592958351, "grad_norm": 0.23828125, "learning_rate": 0.001193227305088476, "loss": 0.8776, "step": 10665 }, { "epoch": 0.28622799484757405, "grad_norm": 0.21875, "learning_rate": 0.0011932230939584705, "loss": 0.7101, "step": 10666 }, { "epoch": 0.286254830399313, "grad_norm": 0.2158203125, "learning_rate": 0.001193218881527108, "loss": 0.7555, "step": 10667 }, { "epoch": 0.286281665951052, "grad_norm": 0.2158203125, "learning_rate": 0.0011932146677943979, "loss": 0.8082, "step": 10668 }, { "epoch": 0.2863085015027909, "grad_norm": 0.19921875, "learning_rate": 0.001193210452760349, "loss": 0.6491, "step": 10669 }, { "epoch": 0.28633533705452985, "grad_norm": 0.2265625, "learning_rate": 0.0011932062364249714, "loss": 0.8113, "step": 10670 }, { "epoch": 0.2863621726062688, "grad_norm": 0.2373046875, "learning_rate": 0.0011932020187882734, "loss": 0.8191, "step": 10671 }, { "epoch": 0.2863890081580077, "grad_norm": 0.2060546875, "learning_rate": 0.001193197799850265, "loss": 0.7064, "step": 10672 }, { "epoch": 0.28641584370974665, "grad_norm": 0.2314453125, "learning_rate": 0.001193193579610955, "loss": 0.7583, "step": 10673 }, { "epoch": 0.2864426792614856, "grad_norm": 0.22265625, "learning_rate": 0.001193189358070353, "loss": 0.7682, "step": 10674 }, { "epoch": 0.2864695148132246, "grad_norm": 0.224609375, "learning_rate": 0.001193185135228468, "loss": 0.7399, "step": 10675 }, { "epoch": 0.2864963503649635, "grad_norm": 0.232421875, "learning_rate": 0.0011931809110853093, "loss": 0.8185, "step": 10676 }, { "epoch": 0.28652318591670245, "grad_norm": 0.2333984375, "learning_rate": 0.001193176685640886, "loss": 0.845, "step": 10677 }, { "epoch": 0.2865500214684414, "grad_norm": 0.208984375, "learning_rate": 0.0011931724588952078, "loss": 0.7549, "step": 10678 }, { "epoch": 0.2865768570201803, "grad_norm": 0.2177734375, "learning_rate": 0.0011931682308482835, "loss": 0.7755, "step": 10679 }, { "epoch": 0.28660369257191926, "grad_norm": 0.2451171875, "learning_rate": 0.0011931640015001228, "loss": 0.9116, "step": 10680 }, { "epoch": 0.28663052812365825, "grad_norm": 0.2177734375, "learning_rate": 0.0011931597708507349, "loss": 0.7771, "step": 10681 }, { "epoch": 0.2866573636753972, "grad_norm": 0.2294921875, "learning_rate": 0.0011931555389001287, "loss": 0.7901, "step": 10682 }, { "epoch": 0.2866841992271361, "grad_norm": 0.208984375, "learning_rate": 0.001193151305648314, "loss": 0.651, "step": 10683 }, { "epoch": 0.28671103477887505, "grad_norm": 0.2177734375, "learning_rate": 0.0011931470710952997, "loss": 0.7806, "step": 10684 }, { "epoch": 0.286737870330614, "grad_norm": 0.220703125, "learning_rate": 0.0011931428352410952, "loss": 0.7467, "step": 10685 }, { "epoch": 0.2867647058823529, "grad_norm": 0.1962890625, "learning_rate": 0.00119313859808571, "loss": 0.674, "step": 10686 }, { "epoch": 0.2867915414340919, "grad_norm": 0.2177734375, "learning_rate": 0.001193134359629153, "loss": 0.7745, "step": 10687 }, { "epoch": 0.28681837698583085, "grad_norm": 0.208984375, "learning_rate": 0.0011931301198714339, "loss": 0.6732, "step": 10688 }, { "epoch": 0.2868452125375698, "grad_norm": 0.2412109375, "learning_rate": 0.0011931258788125617, "loss": 0.907, "step": 10689 }, { "epoch": 0.2868720480893087, "grad_norm": 0.216796875, "learning_rate": 0.001193121636452546, "loss": 0.7698, "step": 10690 }, { "epoch": 0.28689888364104765, "grad_norm": 0.216796875, "learning_rate": 0.0011931173927913958, "loss": 0.7411, "step": 10691 }, { "epoch": 0.2869257191927866, "grad_norm": 0.2138671875, "learning_rate": 0.0011931131478291205, "loss": 0.7299, "step": 10692 }, { "epoch": 0.2869525547445255, "grad_norm": 0.2041015625, "learning_rate": 0.0011931089015657296, "loss": 0.7037, "step": 10693 }, { "epoch": 0.2869793902962645, "grad_norm": 0.22265625, "learning_rate": 0.001193104654001232, "loss": 0.7984, "step": 10694 }, { "epoch": 0.28700622584800345, "grad_norm": 0.2099609375, "learning_rate": 0.0011931004051356376, "loss": 0.7004, "step": 10695 }, { "epoch": 0.2870330613997424, "grad_norm": 0.201171875, "learning_rate": 0.001193096154968955, "loss": 0.6677, "step": 10696 }, { "epoch": 0.2870598969514813, "grad_norm": 0.2197265625, "learning_rate": 0.0011930919035011943, "loss": 0.8084, "step": 10697 }, { "epoch": 0.28708673250322025, "grad_norm": 0.234375, "learning_rate": 0.0011930876507323643, "loss": 0.7807, "step": 10698 }, { "epoch": 0.2871135680549592, "grad_norm": 0.2216796875, "learning_rate": 0.0011930833966624745, "loss": 0.7854, "step": 10699 }, { "epoch": 0.2871404036066982, "grad_norm": 0.2255859375, "learning_rate": 0.0011930791412915341, "loss": 0.7945, "step": 10700 }, { "epoch": 0.2871672391584371, "grad_norm": 0.2099609375, "learning_rate": 0.0011930748846195529, "loss": 0.7776, "step": 10701 }, { "epoch": 0.28719407471017605, "grad_norm": 0.2265625, "learning_rate": 0.0011930706266465394, "loss": 0.7663, "step": 10702 }, { "epoch": 0.287220910261915, "grad_norm": 0.203125, "learning_rate": 0.001193066367372504, "loss": 0.6779, "step": 10703 }, { "epoch": 0.2872477458136539, "grad_norm": 0.2314453125, "learning_rate": 0.001193062106797455, "loss": 0.818, "step": 10704 }, { "epoch": 0.28727458136539286, "grad_norm": 0.2275390625, "learning_rate": 0.0011930578449214023, "loss": 0.7573, "step": 10705 }, { "epoch": 0.2873014169171318, "grad_norm": 0.228515625, "learning_rate": 0.0011930535817443555, "loss": 0.8275, "step": 10706 }, { "epoch": 0.2873282524688708, "grad_norm": 0.2392578125, "learning_rate": 0.0011930493172663233, "loss": 0.8357, "step": 10707 }, { "epoch": 0.2873550880206097, "grad_norm": 0.2236328125, "learning_rate": 0.0011930450514873153, "loss": 0.8159, "step": 10708 }, { "epoch": 0.28738192357234865, "grad_norm": 0.23046875, "learning_rate": 0.0011930407844073412, "loss": 0.7685, "step": 10709 }, { "epoch": 0.2874087591240876, "grad_norm": 0.2109375, "learning_rate": 0.00119303651602641, "loss": 0.7547, "step": 10710 }, { "epoch": 0.2874355946758265, "grad_norm": 0.220703125, "learning_rate": 0.001193032246344531, "loss": 0.705, "step": 10711 }, { "epoch": 0.28746243022756546, "grad_norm": 0.2333984375, "learning_rate": 0.001193027975361714, "loss": 0.8645, "step": 10712 }, { "epoch": 0.28748926577930445, "grad_norm": 0.228515625, "learning_rate": 0.001193023703077968, "loss": 0.7856, "step": 10713 }, { "epoch": 0.2875161013310434, "grad_norm": 0.21484375, "learning_rate": 0.0011930194294933023, "loss": 0.7467, "step": 10714 }, { "epoch": 0.2875429368827823, "grad_norm": 0.2255859375, "learning_rate": 0.0011930151546077265, "loss": 0.806, "step": 10715 }, { "epoch": 0.28756977243452125, "grad_norm": 0.2109375, "learning_rate": 0.00119301087842125, "loss": 0.7232, "step": 10716 }, { "epoch": 0.2875966079862602, "grad_norm": 0.2177734375, "learning_rate": 0.001193006600933882, "loss": 0.7637, "step": 10717 }, { "epoch": 0.2876234435379991, "grad_norm": 0.21484375, "learning_rate": 0.001193002322145632, "loss": 0.7337, "step": 10718 }, { "epoch": 0.28765027908973806, "grad_norm": 0.2236328125, "learning_rate": 0.0011929980420565096, "loss": 0.7745, "step": 10719 }, { "epoch": 0.28767711464147705, "grad_norm": 0.2177734375, "learning_rate": 0.001192993760666524, "loss": 0.7118, "step": 10720 }, { "epoch": 0.287703950193216, "grad_norm": 0.2275390625, "learning_rate": 0.0011929894779756842, "loss": 0.7741, "step": 10721 }, { "epoch": 0.2877307857449549, "grad_norm": 0.201171875, "learning_rate": 0.001192985193984, "loss": 0.6383, "step": 10722 }, { "epoch": 0.28775762129669386, "grad_norm": 0.2216796875, "learning_rate": 0.001192980908691481, "loss": 0.7812, "step": 10723 }, { "epoch": 0.2877844568484328, "grad_norm": 0.21875, "learning_rate": 0.001192976622098136, "loss": 0.6998, "step": 10724 }, { "epoch": 0.2878112924001717, "grad_norm": 0.2177734375, "learning_rate": 0.0011929723342039749, "loss": 0.7143, "step": 10725 }, { "epoch": 0.2878381279519107, "grad_norm": 0.232421875, "learning_rate": 0.001192968045009007, "loss": 0.8635, "step": 10726 }, { "epoch": 0.28786496350364965, "grad_norm": 0.2158203125, "learning_rate": 0.0011929637545132416, "loss": 0.7442, "step": 10727 }, { "epoch": 0.2878917990553886, "grad_norm": 0.2177734375, "learning_rate": 0.0011929594627166883, "loss": 0.711, "step": 10728 }, { "epoch": 0.2879186346071275, "grad_norm": 0.2392578125, "learning_rate": 0.0011929551696193562, "loss": 0.8525, "step": 10729 }, { "epoch": 0.28794547015886646, "grad_norm": 0.2177734375, "learning_rate": 0.001192950875221255, "loss": 0.7557, "step": 10730 }, { "epoch": 0.2879723057106054, "grad_norm": 0.216796875, "learning_rate": 0.0011929465795223941, "loss": 0.7091, "step": 10731 }, { "epoch": 0.2879991412623443, "grad_norm": 0.2353515625, "learning_rate": 0.0011929422825227827, "loss": 0.8868, "step": 10732 }, { "epoch": 0.2880259768140833, "grad_norm": 0.201171875, "learning_rate": 0.0011929379842224306, "loss": 0.6973, "step": 10733 }, { "epoch": 0.28805281236582225, "grad_norm": 0.201171875, "learning_rate": 0.0011929336846213468, "loss": 0.6735, "step": 10734 }, { "epoch": 0.2880796479175612, "grad_norm": 0.2060546875, "learning_rate": 0.001192929383719541, "loss": 0.687, "step": 10735 }, { "epoch": 0.2881064834693001, "grad_norm": 0.244140625, "learning_rate": 0.0011929250815170226, "loss": 0.9097, "step": 10736 }, { "epoch": 0.28813331902103906, "grad_norm": 0.232421875, "learning_rate": 0.001192920778013801, "loss": 0.8293, "step": 10737 }, { "epoch": 0.288160154572778, "grad_norm": 0.2119140625, "learning_rate": 0.0011929164732098856, "loss": 0.7719, "step": 10738 }, { "epoch": 0.288186990124517, "grad_norm": 0.22265625, "learning_rate": 0.001192912167105286, "loss": 0.8151, "step": 10739 }, { "epoch": 0.2882138256762559, "grad_norm": 0.19921875, "learning_rate": 0.0011929078597000115, "loss": 0.6274, "step": 10740 }, { "epoch": 0.28824066122799485, "grad_norm": 0.20703125, "learning_rate": 0.0011929035509940717, "loss": 0.6585, "step": 10741 }, { "epoch": 0.2882674967797338, "grad_norm": 0.232421875, "learning_rate": 0.0011928992409874757, "loss": 0.8411, "step": 10742 }, { "epoch": 0.2882943323314727, "grad_norm": 0.2578125, "learning_rate": 0.0011928949296802333, "loss": 0.8025, "step": 10743 }, { "epoch": 0.28832116788321166, "grad_norm": 0.322265625, "learning_rate": 0.001192890617072354, "loss": 0.8272, "step": 10744 }, { "epoch": 0.2883480034349506, "grad_norm": 0.310546875, "learning_rate": 0.0011928863031638469, "loss": 0.8162, "step": 10745 }, { "epoch": 0.2883748389866896, "grad_norm": 0.2490234375, "learning_rate": 0.0011928819879547217, "loss": 0.8039, "step": 10746 }, { "epoch": 0.2884016745384285, "grad_norm": 0.3984375, "learning_rate": 0.001192877671444988, "loss": 0.8216, "step": 10747 }, { "epoch": 0.28842851009016746, "grad_norm": 0.291015625, "learning_rate": 0.001192873353634655, "loss": 0.7151, "step": 10748 }, { "epoch": 0.2884553456419064, "grad_norm": 0.251953125, "learning_rate": 0.0011928690345237324, "loss": 0.7944, "step": 10749 }, { "epoch": 0.2884821811936453, "grad_norm": 0.2373046875, "learning_rate": 0.0011928647141122295, "loss": 0.7551, "step": 10750 }, { "epoch": 0.28850901674538426, "grad_norm": 0.216796875, "learning_rate": 0.0011928603924001558, "loss": 0.6776, "step": 10751 }, { "epoch": 0.28853585229712325, "grad_norm": 0.2373046875, "learning_rate": 0.0011928560693875209, "loss": 0.8351, "step": 10752 }, { "epoch": 0.2885626878488622, "grad_norm": 0.228515625, "learning_rate": 0.001192851745074334, "loss": 0.6673, "step": 10753 }, { "epoch": 0.2885895234006011, "grad_norm": 0.21484375, "learning_rate": 0.001192847419460605, "loss": 0.6324, "step": 10754 }, { "epoch": 0.28861635895234006, "grad_norm": 0.25, "learning_rate": 0.001192843092546343, "loss": 0.8694, "step": 10755 }, { "epoch": 0.288643194504079, "grad_norm": 0.2333984375, "learning_rate": 0.001192838764331558, "loss": 0.7815, "step": 10756 }, { "epoch": 0.2886700300558179, "grad_norm": 0.22265625, "learning_rate": 0.001192834434816259, "loss": 0.7584, "step": 10757 }, { "epoch": 0.2886968656075569, "grad_norm": 0.2109375, "learning_rate": 0.0011928301040004556, "loss": 0.7193, "step": 10758 }, { "epoch": 0.28872370115929585, "grad_norm": 0.224609375, "learning_rate": 0.0011928257718841575, "loss": 0.7366, "step": 10759 }, { "epoch": 0.2887505367110348, "grad_norm": 0.2265625, "learning_rate": 0.001192821438467374, "loss": 0.7156, "step": 10760 }, { "epoch": 0.2887773722627737, "grad_norm": 0.23828125, "learning_rate": 0.0011928171037501145, "loss": 0.7408, "step": 10761 }, { "epoch": 0.28880420781451266, "grad_norm": 0.2177734375, "learning_rate": 0.0011928127677323889, "loss": 0.7249, "step": 10762 }, { "epoch": 0.2888310433662516, "grad_norm": 0.2236328125, "learning_rate": 0.0011928084304142063, "loss": 0.7334, "step": 10763 }, { "epoch": 0.28885787891799053, "grad_norm": 0.1962890625, "learning_rate": 0.0011928040917955766, "loss": 0.6087, "step": 10764 }, { "epoch": 0.2888847144697295, "grad_norm": 0.220703125, "learning_rate": 0.001192799751876509, "loss": 0.7643, "step": 10765 }, { "epoch": 0.28891155002146846, "grad_norm": 0.25390625, "learning_rate": 0.0011927954106570131, "loss": 0.8259, "step": 10766 }, { "epoch": 0.2889383855732074, "grad_norm": 0.2373046875, "learning_rate": 0.001192791068137099, "loss": 0.7807, "step": 10767 }, { "epoch": 0.2889652211249463, "grad_norm": 0.2255859375, "learning_rate": 0.0011927867243167751, "loss": 0.7143, "step": 10768 }, { "epoch": 0.28899205667668526, "grad_norm": 0.2255859375, "learning_rate": 0.0011927823791960516, "loss": 0.7773, "step": 10769 }, { "epoch": 0.2890188922284242, "grad_norm": 0.2236328125, "learning_rate": 0.0011927780327749382, "loss": 0.8036, "step": 10770 }, { "epoch": 0.2890457277801632, "grad_norm": 0.2158203125, "learning_rate": 0.001192773685053444, "loss": 0.6517, "step": 10771 }, { "epoch": 0.2890725633319021, "grad_norm": 0.228515625, "learning_rate": 0.0011927693360315789, "loss": 0.805, "step": 10772 }, { "epoch": 0.28909939888364106, "grad_norm": 0.21875, "learning_rate": 0.001192764985709352, "loss": 0.7642, "step": 10773 }, { "epoch": 0.28912623443538, "grad_norm": 0.2158203125, "learning_rate": 0.0011927606340867733, "loss": 0.7404, "step": 10774 }, { "epoch": 0.2891530699871189, "grad_norm": 0.205078125, "learning_rate": 0.001192756281163852, "loss": 0.6463, "step": 10775 }, { "epoch": 0.28917990553885786, "grad_norm": 0.2158203125, "learning_rate": 0.0011927519269405982, "loss": 0.6929, "step": 10776 }, { "epoch": 0.2892067410905968, "grad_norm": 0.23828125, "learning_rate": 0.0011927475714170208, "loss": 0.8398, "step": 10777 }, { "epoch": 0.2892335766423358, "grad_norm": 0.2265625, "learning_rate": 0.0011927432145931295, "loss": 0.7684, "step": 10778 }, { "epoch": 0.2892604121940747, "grad_norm": 0.2275390625, "learning_rate": 0.0011927388564689342, "loss": 0.7171, "step": 10779 }, { "epoch": 0.28928724774581366, "grad_norm": 0.201171875, "learning_rate": 0.0011927344970444442, "loss": 0.6645, "step": 10780 }, { "epoch": 0.2893140832975526, "grad_norm": 0.232421875, "learning_rate": 0.0011927301363196688, "loss": 0.8777, "step": 10781 }, { "epoch": 0.28934091884929153, "grad_norm": 0.220703125, "learning_rate": 0.0011927257742946182, "loss": 0.676, "step": 10782 }, { "epoch": 0.28936775440103046, "grad_norm": 0.2177734375, "learning_rate": 0.0011927214109693017, "loss": 0.6337, "step": 10783 }, { "epoch": 0.28939458995276945, "grad_norm": 0.22265625, "learning_rate": 0.0011927170463437286, "loss": 0.6578, "step": 10784 }, { "epoch": 0.2894214255045084, "grad_norm": 0.22265625, "learning_rate": 0.0011927126804179087, "loss": 0.7578, "step": 10785 }, { "epoch": 0.2894482610562473, "grad_norm": 0.2099609375, "learning_rate": 0.0011927083131918514, "loss": 0.7552, "step": 10786 }, { "epoch": 0.28947509660798626, "grad_norm": 0.2099609375, "learning_rate": 0.0011927039446655667, "loss": 0.7058, "step": 10787 }, { "epoch": 0.2895019321597252, "grad_norm": 0.23046875, "learning_rate": 0.001192699574839064, "loss": 0.8081, "step": 10788 }, { "epoch": 0.28952876771146413, "grad_norm": 0.232421875, "learning_rate": 0.0011926952037123527, "loss": 0.6825, "step": 10789 }, { "epoch": 0.28955560326320307, "grad_norm": 0.2392578125, "learning_rate": 0.0011926908312854423, "loss": 0.8316, "step": 10790 }, { "epoch": 0.28958243881494206, "grad_norm": 0.2333984375, "learning_rate": 0.0011926864575583427, "loss": 0.7429, "step": 10791 }, { "epoch": 0.289609274366681, "grad_norm": 0.224609375, "learning_rate": 0.0011926820825310634, "loss": 0.7277, "step": 10792 }, { "epoch": 0.2896361099184199, "grad_norm": 0.20703125, "learning_rate": 0.001192677706203614, "loss": 0.6242, "step": 10793 }, { "epoch": 0.28966294547015886, "grad_norm": 0.2119140625, "learning_rate": 0.001192673328576004, "loss": 0.6785, "step": 10794 }, { "epoch": 0.2896897810218978, "grad_norm": 0.2314453125, "learning_rate": 0.0011926689496482433, "loss": 0.8282, "step": 10795 }, { "epoch": 0.28971661657363673, "grad_norm": 0.22265625, "learning_rate": 0.0011926645694203411, "loss": 0.7559, "step": 10796 }, { "epoch": 0.2897434521253757, "grad_norm": 0.2333984375, "learning_rate": 0.0011926601878923073, "loss": 0.7977, "step": 10797 }, { "epoch": 0.28977028767711466, "grad_norm": 0.2060546875, "learning_rate": 0.0011926558050641513, "loss": 0.738, "step": 10798 }, { "epoch": 0.2897971232288536, "grad_norm": 0.21484375, "learning_rate": 0.001192651420935883, "loss": 0.7055, "step": 10799 }, { "epoch": 0.2898239587805925, "grad_norm": 0.228515625, "learning_rate": 0.0011926470355075117, "loss": 0.8228, "step": 10800 }, { "epoch": 0.28985079433233146, "grad_norm": 0.2451171875, "learning_rate": 0.0011926426487790472, "loss": 0.7638, "step": 10801 }, { "epoch": 0.2898776298840704, "grad_norm": 0.21484375, "learning_rate": 0.001192638260750499, "loss": 0.6177, "step": 10802 }, { "epoch": 0.28990446543580933, "grad_norm": 0.2236328125, "learning_rate": 0.001192633871421877, "loss": 0.797, "step": 10803 }, { "epoch": 0.2899313009875483, "grad_norm": 0.2265625, "learning_rate": 0.0011926294807931906, "loss": 0.8245, "step": 10804 }, { "epoch": 0.28995813653928726, "grad_norm": 0.22265625, "learning_rate": 0.0011926250888644493, "loss": 0.775, "step": 10805 }, { "epoch": 0.2899849720910262, "grad_norm": 0.2314453125, "learning_rate": 0.001192620695635663, "loss": 0.8326, "step": 10806 }, { "epoch": 0.29001180764276513, "grad_norm": 0.2353515625, "learning_rate": 0.0011926163011068414, "loss": 0.8292, "step": 10807 }, { "epoch": 0.29003864319450406, "grad_norm": 0.2197265625, "learning_rate": 0.0011926119052779937, "loss": 0.6596, "step": 10808 }, { "epoch": 0.290065478746243, "grad_norm": 0.208984375, "learning_rate": 0.00119260750814913, "loss": 0.6782, "step": 10809 }, { "epoch": 0.290092314297982, "grad_norm": 0.2109375, "learning_rate": 0.00119260310972026, "loss": 0.7644, "step": 10810 }, { "epoch": 0.2901191498497209, "grad_norm": 0.2314453125, "learning_rate": 0.0011925987099913927, "loss": 0.8348, "step": 10811 }, { "epoch": 0.29014598540145986, "grad_norm": 0.220703125, "learning_rate": 0.0011925943089625384, "loss": 0.7331, "step": 10812 }, { "epoch": 0.2901728209531988, "grad_norm": 0.22265625, "learning_rate": 0.0011925899066337065, "loss": 0.6855, "step": 10813 }, { "epoch": 0.29019965650493773, "grad_norm": 0.1982421875, "learning_rate": 0.0011925855030049065, "loss": 0.6483, "step": 10814 }, { "epoch": 0.29022649205667667, "grad_norm": 0.2265625, "learning_rate": 0.0011925810980761484, "loss": 0.7904, "step": 10815 }, { "epoch": 0.29025332760841566, "grad_norm": 0.2041015625, "learning_rate": 0.0011925766918474418, "loss": 0.7532, "step": 10816 }, { "epoch": 0.2902801631601546, "grad_norm": 0.224609375, "learning_rate": 0.0011925722843187962, "loss": 0.7533, "step": 10817 }, { "epoch": 0.2903069987118935, "grad_norm": 0.205078125, "learning_rate": 0.0011925678754902214, "loss": 0.6866, "step": 10818 }, { "epoch": 0.29033383426363246, "grad_norm": 0.2080078125, "learning_rate": 0.001192563465361727, "loss": 0.6614, "step": 10819 }, { "epoch": 0.2903606698153714, "grad_norm": 0.2119140625, "learning_rate": 0.0011925590539333226, "loss": 0.7427, "step": 10820 }, { "epoch": 0.29038750536711033, "grad_norm": 0.21875, "learning_rate": 0.001192554641205018, "loss": 0.6853, "step": 10821 }, { "epoch": 0.29041434091884927, "grad_norm": 0.2080078125, "learning_rate": 0.0011925502271768228, "loss": 0.6808, "step": 10822 }, { "epoch": 0.29044117647058826, "grad_norm": 0.2177734375, "learning_rate": 0.0011925458118487468, "loss": 0.7598, "step": 10823 }, { "epoch": 0.2904680120223272, "grad_norm": 0.2119140625, "learning_rate": 0.0011925413952207998, "loss": 0.7009, "step": 10824 }, { "epoch": 0.29049484757406613, "grad_norm": 0.197265625, "learning_rate": 0.001192536977292991, "loss": 0.6649, "step": 10825 }, { "epoch": 0.29052168312580506, "grad_norm": 0.1953125, "learning_rate": 0.0011925325580653305, "loss": 0.693, "step": 10826 }, { "epoch": 0.290548518677544, "grad_norm": 0.2109375, "learning_rate": 0.0011925281375378281, "loss": 0.6292, "step": 10827 }, { "epoch": 0.29057535422928293, "grad_norm": 0.2412109375, "learning_rate": 0.001192523715710493, "loss": 0.8374, "step": 10828 }, { "epoch": 0.2906021897810219, "grad_norm": 0.21484375, "learning_rate": 0.0011925192925833356, "loss": 0.7285, "step": 10829 }, { "epoch": 0.29062902533276086, "grad_norm": 0.2021484375, "learning_rate": 0.0011925148681563647, "loss": 0.6923, "step": 10830 }, { "epoch": 0.2906558608844998, "grad_norm": 0.203125, "learning_rate": 0.0011925104424295907, "loss": 0.6911, "step": 10831 }, { "epoch": 0.29068269643623873, "grad_norm": 0.2138671875, "learning_rate": 0.0011925060154030231, "loss": 0.7323, "step": 10832 }, { "epoch": 0.29070953198797767, "grad_norm": 0.2138671875, "learning_rate": 0.0011925015870766717, "loss": 0.7468, "step": 10833 }, { "epoch": 0.2907363675397166, "grad_norm": 0.2275390625, "learning_rate": 0.001192497157450546, "loss": 0.8399, "step": 10834 }, { "epoch": 0.29076320309145554, "grad_norm": 0.1943359375, "learning_rate": 0.0011924927265246558, "loss": 0.667, "step": 10835 }, { "epoch": 0.2907900386431945, "grad_norm": 0.19921875, "learning_rate": 0.0011924882942990112, "loss": 0.6985, "step": 10836 }, { "epoch": 0.29081687419493346, "grad_norm": 0.212890625, "learning_rate": 0.0011924838607736213, "loss": 0.7358, "step": 10837 }, { "epoch": 0.2908437097466724, "grad_norm": 0.2021484375, "learning_rate": 0.0011924794259484963, "loss": 0.672, "step": 10838 }, { "epoch": 0.29087054529841133, "grad_norm": 0.1962890625, "learning_rate": 0.0011924749898236456, "loss": 0.6259, "step": 10839 }, { "epoch": 0.29089738085015027, "grad_norm": 0.2158203125, "learning_rate": 0.0011924705523990791, "loss": 0.8111, "step": 10840 }, { "epoch": 0.2909242164018892, "grad_norm": 0.208984375, "learning_rate": 0.0011924661136748064, "loss": 0.6531, "step": 10841 }, { "epoch": 0.2909510519536282, "grad_norm": 0.21484375, "learning_rate": 0.0011924616736508377, "loss": 0.7458, "step": 10842 }, { "epoch": 0.2909778875053671, "grad_norm": 0.224609375, "learning_rate": 0.0011924572323271823, "loss": 0.7604, "step": 10843 }, { "epoch": 0.29100472305710606, "grad_norm": 0.2255859375, "learning_rate": 0.0011924527897038497, "loss": 0.812, "step": 10844 }, { "epoch": 0.291031558608845, "grad_norm": 0.2275390625, "learning_rate": 0.0011924483457808504, "loss": 0.8155, "step": 10845 }, { "epoch": 0.29105839416058393, "grad_norm": 0.2109375, "learning_rate": 0.0011924439005581934, "loss": 0.6686, "step": 10846 }, { "epoch": 0.29108522971232287, "grad_norm": 0.2158203125, "learning_rate": 0.001192439454035889, "loss": 0.7895, "step": 10847 }, { "epoch": 0.2911120652640618, "grad_norm": 0.2294921875, "learning_rate": 0.0011924350062139468, "loss": 0.8286, "step": 10848 }, { "epoch": 0.2911389008158008, "grad_norm": 0.228515625, "learning_rate": 0.001192430557092376, "loss": 0.9118, "step": 10849 }, { "epoch": 0.29116573636753973, "grad_norm": 0.224609375, "learning_rate": 0.0011924261066711874, "loss": 0.7643, "step": 10850 }, { "epoch": 0.29119257191927866, "grad_norm": 0.2119140625, "learning_rate": 0.00119242165495039, "loss": 0.7121, "step": 10851 }, { "epoch": 0.2912194074710176, "grad_norm": 0.2265625, "learning_rate": 0.0011924172019299939, "loss": 0.8385, "step": 10852 }, { "epoch": 0.29124624302275653, "grad_norm": 0.20703125, "learning_rate": 0.0011924127476100085, "loss": 0.6999, "step": 10853 }, { "epoch": 0.29127307857449547, "grad_norm": 0.2060546875, "learning_rate": 0.001192408291990444, "loss": 0.716, "step": 10854 }, { "epoch": 0.29129991412623446, "grad_norm": 0.25, "learning_rate": 0.0011924038350713101, "loss": 0.7535, "step": 10855 }, { "epoch": 0.2913267496779734, "grad_norm": 0.236328125, "learning_rate": 0.0011923993768526164, "loss": 0.813, "step": 10856 }, { "epoch": 0.29135358522971233, "grad_norm": 0.216796875, "learning_rate": 0.0011923949173343726, "loss": 0.738, "step": 10857 }, { "epoch": 0.29138042078145127, "grad_norm": 0.22265625, "learning_rate": 0.0011923904565165886, "loss": 0.7464, "step": 10858 }, { "epoch": 0.2914072563331902, "grad_norm": 0.21875, "learning_rate": 0.0011923859943992745, "loss": 0.7581, "step": 10859 }, { "epoch": 0.29143409188492914, "grad_norm": 0.2177734375, "learning_rate": 0.0011923815309824396, "loss": 0.8074, "step": 10860 }, { "epoch": 0.29146092743666807, "grad_norm": 0.2216796875, "learning_rate": 0.001192377066266094, "loss": 0.6523, "step": 10861 }, { "epoch": 0.29148776298840706, "grad_norm": 0.22265625, "learning_rate": 0.0011923726002502474, "loss": 0.7785, "step": 10862 }, { "epoch": 0.291514598540146, "grad_norm": 0.1982421875, "learning_rate": 0.0011923681329349097, "loss": 0.6367, "step": 10863 }, { "epoch": 0.29154143409188493, "grad_norm": 0.2216796875, "learning_rate": 0.0011923636643200904, "loss": 0.7388, "step": 10864 }, { "epoch": 0.29156826964362387, "grad_norm": 0.228515625, "learning_rate": 0.0011923591944057994, "loss": 0.7753, "step": 10865 }, { "epoch": 0.2915951051953628, "grad_norm": 0.2412109375, "learning_rate": 0.001192354723192047, "loss": 0.7993, "step": 10866 }, { "epoch": 0.29162194074710174, "grad_norm": 0.2109375, "learning_rate": 0.0011923502506788423, "loss": 0.7387, "step": 10867 }, { "epoch": 0.29164877629884073, "grad_norm": 0.23828125, "learning_rate": 0.0011923457768661955, "loss": 0.8111, "step": 10868 }, { "epoch": 0.29167561185057966, "grad_norm": 0.2236328125, "learning_rate": 0.0011923413017541162, "loss": 0.8159, "step": 10869 }, { "epoch": 0.2917024474023186, "grad_norm": 0.2099609375, "learning_rate": 0.0011923368253426146, "loss": 0.7363, "step": 10870 }, { "epoch": 0.29172928295405753, "grad_norm": 0.2109375, "learning_rate": 0.0011923323476317, "loss": 0.712, "step": 10871 }, { "epoch": 0.29175611850579647, "grad_norm": 0.21484375, "learning_rate": 0.001192327868621383, "loss": 0.7226, "step": 10872 }, { "epoch": 0.2917829540575354, "grad_norm": 0.2109375, "learning_rate": 0.0011923233883116724, "loss": 0.7333, "step": 10873 }, { "epoch": 0.29180978960927434, "grad_norm": 0.21875, "learning_rate": 0.0011923189067025786, "loss": 0.7632, "step": 10874 }, { "epoch": 0.29183662516101333, "grad_norm": 0.2080078125, "learning_rate": 0.0011923144237941117, "loss": 0.747, "step": 10875 }, { "epoch": 0.29186346071275227, "grad_norm": 0.208984375, "learning_rate": 0.0011923099395862809, "loss": 0.6711, "step": 10876 }, { "epoch": 0.2918902962644912, "grad_norm": 0.234375, "learning_rate": 0.0011923054540790964, "loss": 0.8372, "step": 10877 }, { "epoch": 0.29191713181623014, "grad_norm": 0.212890625, "learning_rate": 0.0011923009672725681, "loss": 0.7559, "step": 10878 }, { "epoch": 0.29194396736796907, "grad_norm": 0.2255859375, "learning_rate": 0.001192296479166706, "loss": 0.7894, "step": 10879 }, { "epoch": 0.291970802919708, "grad_norm": 0.2021484375, "learning_rate": 0.001192291989761519, "loss": 0.6576, "step": 10880 }, { "epoch": 0.291997638471447, "grad_norm": 0.203125, "learning_rate": 0.0011922874990570181, "loss": 0.6871, "step": 10881 }, { "epoch": 0.29202447402318593, "grad_norm": 0.212890625, "learning_rate": 0.0011922830070532125, "loss": 0.7547, "step": 10882 }, { "epoch": 0.29205130957492487, "grad_norm": 0.2255859375, "learning_rate": 0.0011922785137501125, "loss": 0.774, "step": 10883 }, { "epoch": 0.2920781451266638, "grad_norm": 0.2109375, "learning_rate": 0.0011922740191477276, "loss": 0.7013, "step": 10884 }, { "epoch": 0.29210498067840274, "grad_norm": 0.23046875, "learning_rate": 0.0011922695232460675, "loss": 0.8654, "step": 10885 }, { "epoch": 0.29213181623014167, "grad_norm": 0.236328125, "learning_rate": 0.0011922650260451425, "loss": 0.8572, "step": 10886 }, { "epoch": 0.29215865178188066, "grad_norm": 0.2099609375, "learning_rate": 0.0011922605275449622, "loss": 0.7394, "step": 10887 }, { "epoch": 0.2921854873336196, "grad_norm": 0.216796875, "learning_rate": 0.0011922560277455368, "loss": 0.6653, "step": 10888 }, { "epoch": 0.29221232288535853, "grad_norm": 0.234375, "learning_rate": 0.0011922515266468757, "loss": 0.8728, "step": 10889 }, { "epoch": 0.29223915843709747, "grad_norm": 0.2021484375, "learning_rate": 0.001192247024248989, "loss": 0.678, "step": 10890 }, { "epoch": 0.2922659939888364, "grad_norm": 0.19140625, "learning_rate": 0.0011922425205518866, "loss": 0.6116, "step": 10891 }, { "epoch": 0.29229282954057534, "grad_norm": 0.216796875, "learning_rate": 0.0011922380155555783, "loss": 0.7347, "step": 10892 }, { "epoch": 0.2923196650923143, "grad_norm": 0.2001953125, "learning_rate": 0.0011922335092600742, "loss": 0.656, "step": 10893 }, { "epoch": 0.29234650064405326, "grad_norm": 0.2109375, "learning_rate": 0.001192229001665384, "loss": 0.7328, "step": 10894 }, { "epoch": 0.2923733361957922, "grad_norm": 0.2236328125, "learning_rate": 0.0011922244927715174, "loss": 0.7775, "step": 10895 }, { "epoch": 0.29240017174753113, "grad_norm": 0.2099609375, "learning_rate": 0.0011922199825784845, "loss": 0.6553, "step": 10896 }, { "epoch": 0.29242700729927007, "grad_norm": 0.2099609375, "learning_rate": 0.0011922154710862951, "loss": 0.7574, "step": 10897 }, { "epoch": 0.292453842851009, "grad_norm": 0.2109375, "learning_rate": 0.0011922109582949596, "loss": 0.6993, "step": 10898 }, { "epoch": 0.29248067840274794, "grad_norm": 0.21875, "learning_rate": 0.0011922064442044873, "loss": 0.7116, "step": 10899 }, { "epoch": 0.29250751395448693, "grad_norm": 0.2216796875, "learning_rate": 0.0011922019288148882, "loss": 0.793, "step": 10900 }, { "epoch": 0.29253434950622587, "grad_norm": 0.21875, "learning_rate": 0.0011921974121261725, "loss": 0.7712, "step": 10901 }, { "epoch": 0.2925611850579648, "grad_norm": 0.220703125, "learning_rate": 0.0011921928941383496, "loss": 0.7495, "step": 10902 }, { "epoch": 0.29258802060970374, "grad_norm": 0.21484375, "learning_rate": 0.00119218837485143, "loss": 0.6906, "step": 10903 }, { "epoch": 0.29261485616144267, "grad_norm": 0.23828125, "learning_rate": 0.0011921838542654232, "loss": 0.85, "step": 10904 }, { "epoch": 0.2926416917131816, "grad_norm": 0.212890625, "learning_rate": 0.0011921793323803392, "loss": 0.7722, "step": 10905 }, { "epoch": 0.29266852726492054, "grad_norm": 0.2060546875, "learning_rate": 0.001192174809196188, "loss": 0.7044, "step": 10906 }, { "epoch": 0.29269536281665953, "grad_norm": 0.234375, "learning_rate": 0.0011921702847129795, "loss": 0.7125, "step": 10907 }, { "epoch": 0.29272219836839847, "grad_norm": 0.216796875, "learning_rate": 0.0011921657589307236, "loss": 0.7088, "step": 10908 }, { "epoch": 0.2927490339201374, "grad_norm": 0.203125, "learning_rate": 0.0011921612318494305, "loss": 0.6789, "step": 10909 }, { "epoch": 0.29277586947187634, "grad_norm": 0.2080078125, "learning_rate": 0.0011921567034691096, "loss": 0.6955, "step": 10910 }, { "epoch": 0.2928027050236153, "grad_norm": 0.2294921875, "learning_rate": 0.001192152173789771, "loss": 0.8372, "step": 10911 }, { "epoch": 0.2928295405753542, "grad_norm": 0.234375, "learning_rate": 0.001192147642811425, "loss": 0.7769, "step": 10912 }, { "epoch": 0.2928563761270932, "grad_norm": 0.2138671875, "learning_rate": 0.0011921431105340811, "loss": 0.743, "step": 10913 }, { "epoch": 0.29288321167883213, "grad_norm": 0.2060546875, "learning_rate": 0.0011921385769577495, "loss": 0.6595, "step": 10914 }, { "epoch": 0.29291004723057107, "grad_norm": 0.2236328125, "learning_rate": 0.00119213404208244, "loss": 0.6936, "step": 10915 }, { "epoch": 0.29293688278231, "grad_norm": 0.2060546875, "learning_rate": 0.0011921295059081627, "loss": 0.6114, "step": 10916 }, { "epoch": 0.29296371833404894, "grad_norm": 0.2080078125, "learning_rate": 0.0011921249684349277, "loss": 0.7433, "step": 10917 }, { "epoch": 0.2929905538857879, "grad_norm": 0.2216796875, "learning_rate": 0.0011921204296627443, "loss": 0.8348, "step": 10918 }, { "epoch": 0.2930173894375268, "grad_norm": 0.22265625, "learning_rate": 0.0011921158895916231, "loss": 0.743, "step": 10919 }, { "epoch": 0.2930442249892658, "grad_norm": 0.2216796875, "learning_rate": 0.0011921113482215738, "loss": 0.7844, "step": 10920 }, { "epoch": 0.29307106054100474, "grad_norm": 0.2216796875, "learning_rate": 0.0011921068055526065, "loss": 0.7551, "step": 10921 }, { "epoch": 0.29309789609274367, "grad_norm": 0.205078125, "learning_rate": 0.001192102261584731, "loss": 0.7044, "step": 10922 }, { "epoch": 0.2931247316444826, "grad_norm": 0.2138671875, "learning_rate": 0.0011920977163179572, "loss": 0.7657, "step": 10923 }, { "epoch": 0.29315156719622154, "grad_norm": 0.2236328125, "learning_rate": 0.0011920931697522953, "loss": 0.7429, "step": 10924 }, { "epoch": 0.2931784027479605, "grad_norm": 0.2041015625, "learning_rate": 0.0011920886218877553, "loss": 0.7231, "step": 10925 }, { "epoch": 0.29320523829969947, "grad_norm": 0.228515625, "learning_rate": 0.001192084072724347, "loss": 0.8707, "step": 10926 }, { "epoch": 0.2932320738514384, "grad_norm": 0.22265625, "learning_rate": 0.0011920795222620803, "loss": 0.8038, "step": 10927 }, { "epoch": 0.29325890940317734, "grad_norm": 0.2197265625, "learning_rate": 0.0011920749705009656, "loss": 0.7937, "step": 10928 }, { "epoch": 0.29328574495491627, "grad_norm": 0.203125, "learning_rate": 0.0011920704174410122, "loss": 0.7246, "step": 10929 }, { "epoch": 0.2933125805066552, "grad_norm": 0.2216796875, "learning_rate": 0.001192065863082231, "loss": 0.7429, "step": 10930 }, { "epoch": 0.29333941605839414, "grad_norm": 0.2236328125, "learning_rate": 0.0011920613074246312, "loss": 0.8253, "step": 10931 }, { "epoch": 0.2933662516101331, "grad_norm": 0.2060546875, "learning_rate": 0.001192056750468223, "loss": 0.7263, "step": 10932 }, { "epoch": 0.29339308716187207, "grad_norm": 0.2197265625, "learning_rate": 0.0011920521922130166, "loss": 0.827, "step": 10933 }, { "epoch": 0.293419922713611, "grad_norm": 0.2177734375, "learning_rate": 0.0011920476326590219, "loss": 0.7482, "step": 10934 }, { "epoch": 0.29344675826534994, "grad_norm": 0.2080078125, "learning_rate": 0.0011920430718062487, "loss": 0.7189, "step": 10935 }, { "epoch": 0.2934735938170889, "grad_norm": 0.201171875, "learning_rate": 0.0011920385096547074, "loss": 0.6944, "step": 10936 }, { "epoch": 0.2935004293688278, "grad_norm": 0.1953125, "learning_rate": 0.0011920339462044077, "loss": 0.7036, "step": 10937 }, { "epoch": 0.29352726492056674, "grad_norm": 0.220703125, "learning_rate": 0.0011920293814553598, "loss": 0.8245, "step": 10938 }, { "epoch": 0.29355410047230573, "grad_norm": 0.216796875, "learning_rate": 0.0011920248154075736, "loss": 0.7893, "step": 10939 }, { "epoch": 0.29358093602404467, "grad_norm": 0.216796875, "learning_rate": 0.001192020248061059, "loss": 0.796, "step": 10940 }, { "epoch": 0.2936077715757836, "grad_norm": 0.2158203125, "learning_rate": 0.0011920156794158262, "loss": 0.7183, "step": 10941 }, { "epoch": 0.29363460712752254, "grad_norm": 0.2060546875, "learning_rate": 0.001192011109471885, "loss": 0.6884, "step": 10942 }, { "epoch": 0.2936614426792615, "grad_norm": 0.216796875, "learning_rate": 0.0011920065382292457, "loss": 0.8044, "step": 10943 }, { "epoch": 0.2936882782310004, "grad_norm": 0.232421875, "learning_rate": 0.0011920019656879183, "loss": 0.7797, "step": 10944 }, { "epoch": 0.29371511378273935, "grad_norm": 0.2265625, "learning_rate": 0.0011919973918479128, "loss": 0.8348, "step": 10945 }, { "epoch": 0.29374194933447834, "grad_norm": 0.220703125, "learning_rate": 0.0011919928167092393, "loss": 0.7342, "step": 10946 }, { "epoch": 0.29376878488621727, "grad_norm": 0.2080078125, "learning_rate": 0.0011919882402719076, "loss": 0.7048, "step": 10947 }, { "epoch": 0.2937956204379562, "grad_norm": 0.208984375, "learning_rate": 0.0011919836625359278, "loss": 0.7542, "step": 10948 }, { "epoch": 0.29382245598969514, "grad_norm": 0.216796875, "learning_rate": 0.00119197908350131, "loss": 0.727, "step": 10949 }, { "epoch": 0.2938492915414341, "grad_norm": 0.2265625, "learning_rate": 0.0011919745031680642, "loss": 0.8531, "step": 10950 }, { "epoch": 0.293876127093173, "grad_norm": 0.208984375, "learning_rate": 0.0011919699215362006, "loss": 0.7144, "step": 10951 }, { "epoch": 0.293902962644912, "grad_norm": 0.2158203125, "learning_rate": 0.0011919653386057293, "loss": 0.6759, "step": 10952 }, { "epoch": 0.29392979819665094, "grad_norm": 0.2021484375, "learning_rate": 0.0011919607543766598, "loss": 0.6434, "step": 10953 }, { "epoch": 0.2939566337483899, "grad_norm": 0.2099609375, "learning_rate": 0.001191956168849003, "loss": 0.7152, "step": 10954 }, { "epoch": 0.2939834693001288, "grad_norm": 0.193359375, "learning_rate": 0.0011919515820227682, "loss": 0.6143, "step": 10955 }, { "epoch": 0.29401030485186774, "grad_norm": 0.1982421875, "learning_rate": 0.001191946993897966, "loss": 0.624, "step": 10956 }, { "epoch": 0.2940371404036067, "grad_norm": 0.224609375, "learning_rate": 0.0011919424044746062, "loss": 0.7761, "step": 10957 }, { "epoch": 0.29406397595534567, "grad_norm": 0.2099609375, "learning_rate": 0.0011919378137526988, "loss": 0.6863, "step": 10958 }, { "epoch": 0.2940908115070846, "grad_norm": 0.208984375, "learning_rate": 0.0011919332217322541, "loss": 0.7192, "step": 10959 }, { "epoch": 0.29411764705882354, "grad_norm": 0.216796875, "learning_rate": 0.0011919286284132822, "loss": 0.7644, "step": 10960 }, { "epoch": 0.2941444826105625, "grad_norm": 0.2138671875, "learning_rate": 0.0011919240337957928, "loss": 0.7307, "step": 10961 }, { "epoch": 0.2941713181623014, "grad_norm": 0.2177734375, "learning_rate": 0.0011919194378797963, "loss": 0.7786, "step": 10962 }, { "epoch": 0.29419815371404034, "grad_norm": 0.2099609375, "learning_rate": 0.0011919148406653026, "loss": 0.6634, "step": 10963 }, { "epoch": 0.2942249892657793, "grad_norm": 0.20703125, "learning_rate": 0.001191910242152322, "loss": 0.7144, "step": 10964 }, { "epoch": 0.29425182481751827, "grad_norm": 0.208984375, "learning_rate": 0.0011919056423408646, "loss": 0.6949, "step": 10965 }, { "epoch": 0.2942786603692572, "grad_norm": 0.20703125, "learning_rate": 0.00119190104123094, "loss": 0.6863, "step": 10966 }, { "epoch": 0.29430549592099614, "grad_norm": 0.2109375, "learning_rate": 0.0011918964388225588, "loss": 0.7287, "step": 10967 }, { "epoch": 0.2943323314727351, "grad_norm": 0.220703125, "learning_rate": 0.0011918918351157312, "loss": 0.7375, "step": 10968 }, { "epoch": 0.294359167024474, "grad_norm": 0.2041015625, "learning_rate": 0.001191887230110467, "loss": 0.6833, "step": 10969 }, { "epoch": 0.29438600257621295, "grad_norm": 0.203125, "learning_rate": 0.0011918826238067762, "loss": 0.6804, "step": 10970 }, { "epoch": 0.29441283812795194, "grad_norm": 0.2236328125, "learning_rate": 0.0011918780162046688, "loss": 0.795, "step": 10971 }, { "epoch": 0.29443967367969087, "grad_norm": 0.2099609375, "learning_rate": 0.0011918734073041554, "loss": 0.7554, "step": 10972 }, { "epoch": 0.2944665092314298, "grad_norm": 0.21875, "learning_rate": 0.001191868797105246, "loss": 0.7783, "step": 10973 }, { "epoch": 0.29449334478316874, "grad_norm": 0.2197265625, "learning_rate": 0.0011918641856079504, "loss": 0.6896, "step": 10974 }, { "epoch": 0.2945201803349077, "grad_norm": 0.216796875, "learning_rate": 0.0011918595728122791, "loss": 0.8024, "step": 10975 }, { "epoch": 0.2945470158866466, "grad_norm": 0.216796875, "learning_rate": 0.001191854958718242, "loss": 0.7439, "step": 10976 }, { "epoch": 0.29457385143838555, "grad_norm": 0.205078125, "learning_rate": 0.0011918503433258492, "loss": 0.674, "step": 10977 }, { "epoch": 0.29460068699012454, "grad_norm": 0.2216796875, "learning_rate": 0.001191845726635111, "loss": 0.817, "step": 10978 }, { "epoch": 0.2946275225418635, "grad_norm": 0.2080078125, "learning_rate": 0.0011918411086460372, "loss": 0.7317, "step": 10979 }, { "epoch": 0.2946543580936024, "grad_norm": 0.21875, "learning_rate": 0.0011918364893586382, "loss": 0.7921, "step": 10980 }, { "epoch": 0.29468119364534134, "grad_norm": 0.22265625, "learning_rate": 0.001191831868772924, "loss": 0.7884, "step": 10981 }, { "epoch": 0.2947080291970803, "grad_norm": 0.248046875, "learning_rate": 0.0011918272468889048, "loss": 0.9407, "step": 10982 }, { "epoch": 0.2947348647488192, "grad_norm": 0.21875, "learning_rate": 0.0011918226237065909, "loss": 0.7423, "step": 10983 }, { "epoch": 0.2947617003005582, "grad_norm": 0.2099609375, "learning_rate": 0.001191817999225992, "loss": 0.6992, "step": 10984 }, { "epoch": 0.29478853585229714, "grad_norm": 0.2216796875, "learning_rate": 0.0011918133734471188, "loss": 0.7489, "step": 10985 }, { "epoch": 0.2948153714040361, "grad_norm": 0.20703125, "learning_rate": 0.0011918087463699812, "loss": 0.7055, "step": 10986 }, { "epoch": 0.294842206955775, "grad_norm": 0.2041015625, "learning_rate": 0.001191804117994589, "loss": 0.6548, "step": 10987 }, { "epoch": 0.29486904250751395, "grad_norm": 0.2119140625, "learning_rate": 0.0011917994883209528, "loss": 0.7493, "step": 10988 }, { "epoch": 0.2948958780592529, "grad_norm": 0.1962890625, "learning_rate": 0.0011917948573490827, "loss": 0.5762, "step": 10989 }, { "epoch": 0.2949227136109918, "grad_norm": 0.2294921875, "learning_rate": 0.001191790225078989, "loss": 0.7846, "step": 10990 }, { "epoch": 0.2949495491627308, "grad_norm": 0.2197265625, "learning_rate": 0.0011917855915106813, "loss": 0.7278, "step": 10991 }, { "epoch": 0.29497638471446974, "grad_norm": 0.197265625, "learning_rate": 0.0011917809566441703, "loss": 0.6748, "step": 10992 }, { "epoch": 0.2950032202662087, "grad_norm": 0.224609375, "learning_rate": 0.001191776320479466, "loss": 0.8213, "step": 10993 }, { "epoch": 0.2950300558179476, "grad_norm": 0.2001953125, "learning_rate": 0.0011917716830165785, "loss": 0.6809, "step": 10994 }, { "epoch": 0.29505689136968655, "grad_norm": 0.234375, "learning_rate": 0.001191767044255518, "loss": 0.8538, "step": 10995 }, { "epoch": 0.2950837269214255, "grad_norm": 0.208984375, "learning_rate": 0.0011917624041962946, "loss": 0.7128, "step": 10996 }, { "epoch": 0.2951105624731645, "grad_norm": 0.205078125, "learning_rate": 0.0011917577628389189, "loss": 0.7208, "step": 10997 }, { "epoch": 0.2951373980249034, "grad_norm": 0.2060546875, "learning_rate": 0.0011917531201834004, "loss": 0.6793, "step": 10998 }, { "epoch": 0.29516423357664234, "grad_norm": 0.234375, "learning_rate": 0.00119174847622975, "loss": 0.8098, "step": 10999 }, { "epoch": 0.2951910691283813, "grad_norm": 0.2080078125, "learning_rate": 0.0011917438309779773, "loss": 0.6767, "step": 11000 }, { "epoch": 0.2952179046801202, "grad_norm": 0.2138671875, "learning_rate": 0.0011917391844280928, "loss": 0.7605, "step": 11001 }, { "epoch": 0.29524474023185915, "grad_norm": 0.2060546875, "learning_rate": 0.0011917345365801066, "loss": 0.6398, "step": 11002 }, { "epoch": 0.2952715757835981, "grad_norm": 0.21875, "learning_rate": 0.001191729887434029, "loss": 0.746, "step": 11003 }, { "epoch": 0.2952984113353371, "grad_norm": 0.2177734375, "learning_rate": 0.00119172523698987, "loss": 0.7421, "step": 11004 }, { "epoch": 0.295325246887076, "grad_norm": 0.224609375, "learning_rate": 0.00119172058524764, "loss": 0.6624, "step": 11005 }, { "epoch": 0.29535208243881494, "grad_norm": 0.220703125, "learning_rate": 0.001191715932207349, "loss": 0.7724, "step": 11006 }, { "epoch": 0.2953789179905539, "grad_norm": 0.216796875, "learning_rate": 0.0011917112778690076, "loss": 0.787, "step": 11007 }, { "epoch": 0.2954057535422928, "grad_norm": 0.2060546875, "learning_rate": 0.0011917066222326255, "loss": 0.6759, "step": 11008 }, { "epoch": 0.29543258909403175, "grad_norm": 0.2275390625, "learning_rate": 0.0011917019652982132, "loss": 0.8033, "step": 11009 }, { "epoch": 0.29545942464577074, "grad_norm": 0.2158203125, "learning_rate": 0.001191697307065781, "loss": 0.6827, "step": 11010 }, { "epoch": 0.2954862601975097, "grad_norm": 0.21484375, "learning_rate": 0.0011916926475353388, "loss": 0.6511, "step": 11011 }, { "epoch": 0.2955130957492486, "grad_norm": 0.212890625, "learning_rate": 0.0011916879867068973, "loss": 0.6857, "step": 11012 }, { "epoch": 0.29553993130098755, "grad_norm": 0.2158203125, "learning_rate": 0.001191683324580466, "loss": 0.7601, "step": 11013 }, { "epoch": 0.2955667668527265, "grad_norm": 0.201171875, "learning_rate": 0.0011916786611560557, "loss": 0.6953, "step": 11014 }, { "epoch": 0.2955936024044654, "grad_norm": 0.2158203125, "learning_rate": 0.0011916739964336768, "loss": 0.706, "step": 11015 }, { "epoch": 0.2956204379562044, "grad_norm": 0.2158203125, "learning_rate": 0.0011916693304133389, "loss": 0.6769, "step": 11016 }, { "epoch": 0.29564727350794334, "grad_norm": 0.2265625, "learning_rate": 0.0011916646630950527, "loss": 0.7082, "step": 11017 }, { "epoch": 0.2956741090596823, "grad_norm": 0.224609375, "learning_rate": 0.0011916599944788281, "loss": 0.7467, "step": 11018 }, { "epoch": 0.2957009446114212, "grad_norm": 0.212890625, "learning_rate": 0.001191655324564676, "loss": 0.7437, "step": 11019 }, { "epoch": 0.29572778016316015, "grad_norm": 0.2041015625, "learning_rate": 0.0011916506533526058, "loss": 0.6658, "step": 11020 }, { "epoch": 0.2957546157148991, "grad_norm": 0.2138671875, "learning_rate": 0.0011916459808426281, "loss": 0.7099, "step": 11021 }, { "epoch": 0.295781451266638, "grad_norm": 0.23046875, "learning_rate": 0.0011916413070347533, "loss": 0.8104, "step": 11022 }, { "epoch": 0.295808286818377, "grad_norm": 0.2119140625, "learning_rate": 0.0011916366319289916, "loss": 0.7314, "step": 11023 }, { "epoch": 0.29583512237011594, "grad_norm": 0.216796875, "learning_rate": 0.0011916319555253531, "loss": 0.7586, "step": 11024 }, { "epoch": 0.2958619579218549, "grad_norm": 0.21484375, "learning_rate": 0.0011916272778238484, "loss": 0.7205, "step": 11025 }, { "epoch": 0.2958887934735938, "grad_norm": 0.2138671875, "learning_rate": 0.0011916225988244872, "loss": 0.7272, "step": 11026 }, { "epoch": 0.29591562902533275, "grad_norm": 0.2041015625, "learning_rate": 0.0011916179185272801, "loss": 0.7097, "step": 11027 }, { "epoch": 0.2959424645770717, "grad_norm": 0.2021484375, "learning_rate": 0.0011916132369322375, "loss": 0.6491, "step": 11028 }, { "epoch": 0.2959693001288107, "grad_norm": 0.2275390625, "learning_rate": 0.0011916085540393693, "loss": 0.6764, "step": 11029 }, { "epoch": 0.2959961356805496, "grad_norm": 0.2216796875, "learning_rate": 0.0011916038698486862, "loss": 0.7579, "step": 11030 }, { "epoch": 0.29602297123228855, "grad_norm": 0.201171875, "learning_rate": 0.0011915991843601981, "loss": 0.6749, "step": 11031 }, { "epoch": 0.2960498067840275, "grad_norm": 0.2119140625, "learning_rate": 0.0011915944975739157, "loss": 0.6734, "step": 11032 }, { "epoch": 0.2960766423357664, "grad_norm": 0.2216796875, "learning_rate": 0.0011915898094898487, "loss": 0.7601, "step": 11033 }, { "epoch": 0.29610347788750535, "grad_norm": 0.2109375, "learning_rate": 0.001191585120108008, "loss": 0.697, "step": 11034 }, { "epoch": 0.2961303134392443, "grad_norm": 0.208984375, "learning_rate": 0.0011915804294284035, "loss": 0.75, "step": 11035 }, { "epoch": 0.2961571489909833, "grad_norm": 0.2216796875, "learning_rate": 0.0011915757374510454, "loss": 0.7793, "step": 11036 }, { "epoch": 0.2961839845427222, "grad_norm": 0.2197265625, "learning_rate": 0.0011915710441759441, "loss": 0.8103, "step": 11037 }, { "epoch": 0.29621082009446115, "grad_norm": 0.212890625, "learning_rate": 0.0011915663496031102, "loss": 0.716, "step": 11038 }, { "epoch": 0.2962376556462001, "grad_norm": 0.2099609375, "learning_rate": 0.0011915616537325538, "loss": 0.7258, "step": 11039 }, { "epoch": 0.296264491197939, "grad_norm": 0.208984375, "learning_rate": 0.0011915569565642851, "loss": 0.7179, "step": 11040 }, { "epoch": 0.29629132674967795, "grad_norm": 0.2158203125, "learning_rate": 0.0011915522580983147, "loss": 0.7164, "step": 11041 }, { "epoch": 0.29631816230141694, "grad_norm": 0.2060546875, "learning_rate": 0.0011915475583346522, "loss": 0.7328, "step": 11042 }, { "epoch": 0.2963449978531559, "grad_norm": 0.2197265625, "learning_rate": 0.0011915428572733088, "loss": 0.7631, "step": 11043 }, { "epoch": 0.2963718334048948, "grad_norm": 0.21484375, "learning_rate": 0.0011915381549142943, "loss": 0.7504, "step": 11044 }, { "epoch": 0.29639866895663375, "grad_norm": 0.2177734375, "learning_rate": 0.001191533451257619, "loss": 0.6974, "step": 11045 }, { "epoch": 0.2964255045083727, "grad_norm": 0.228515625, "learning_rate": 0.0011915287463032935, "loss": 0.7721, "step": 11046 }, { "epoch": 0.2964523400601116, "grad_norm": 0.2158203125, "learning_rate": 0.001191524040051328, "loss": 0.7091, "step": 11047 }, { "epoch": 0.29647917561185055, "grad_norm": 0.2138671875, "learning_rate": 0.0011915193325017326, "loss": 0.6969, "step": 11048 }, { "epoch": 0.29650601116358954, "grad_norm": 0.2060546875, "learning_rate": 0.0011915146236545179, "loss": 0.6834, "step": 11049 }, { "epoch": 0.2965328467153285, "grad_norm": 0.2080078125, "learning_rate": 0.0011915099135096942, "loss": 0.7145, "step": 11050 }, { "epoch": 0.2965596822670674, "grad_norm": 0.2158203125, "learning_rate": 0.0011915052020672716, "loss": 0.7372, "step": 11051 }, { "epoch": 0.29658651781880635, "grad_norm": 0.2177734375, "learning_rate": 0.0011915004893272606, "loss": 0.6831, "step": 11052 }, { "epoch": 0.2966133533705453, "grad_norm": 0.2099609375, "learning_rate": 0.0011914957752896718, "loss": 0.6447, "step": 11053 }, { "epoch": 0.2966401889222842, "grad_norm": 0.2099609375, "learning_rate": 0.0011914910599545151, "loss": 0.6679, "step": 11054 }, { "epoch": 0.2966670244740232, "grad_norm": 0.2265625, "learning_rate": 0.0011914863433218012, "loss": 0.859, "step": 11055 }, { "epoch": 0.29669386002576215, "grad_norm": 0.1943359375, "learning_rate": 0.00119148162539154, "loss": 0.6316, "step": 11056 }, { "epoch": 0.2967206955775011, "grad_norm": 0.21875, "learning_rate": 0.0011914769061637425, "loss": 0.6964, "step": 11057 }, { "epoch": 0.29674753112924, "grad_norm": 0.2392578125, "learning_rate": 0.0011914721856384185, "loss": 0.8312, "step": 11058 }, { "epoch": 0.29677436668097895, "grad_norm": 0.21484375, "learning_rate": 0.0011914674638155785, "loss": 0.7413, "step": 11059 }, { "epoch": 0.2968012022327179, "grad_norm": 0.19921875, "learning_rate": 0.0011914627406952327, "loss": 0.634, "step": 11060 }, { "epoch": 0.2968280377844568, "grad_norm": 0.2119140625, "learning_rate": 0.001191458016277392, "loss": 0.6559, "step": 11061 }, { "epoch": 0.2968548733361958, "grad_norm": 0.2255859375, "learning_rate": 0.0011914532905620665, "loss": 0.7137, "step": 11062 }, { "epoch": 0.29688170888793475, "grad_norm": 0.2255859375, "learning_rate": 0.001191448563549266, "loss": 0.763, "step": 11063 }, { "epoch": 0.2969085444396737, "grad_norm": 0.220703125, "learning_rate": 0.0011914438352390018, "loss": 0.6868, "step": 11064 }, { "epoch": 0.2969353799914126, "grad_norm": 0.193359375, "learning_rate": 0.0011914391056312835, "loss": 0.61, "step": 11065 }, { "epoch": 0.29696221554315155, "grad_norm": 0.20703125, "learning_rate": 0.0011914343747261222, "loss": 0.6723, "step": 11066 }, { "epoch": 0.2969890510948905, "grad_norm": 0.2265625, "learning_rate": 0.0011914296425235276, "loss": 0.7984, "step": 11067 }, { "epoch": 0.2970158866466295, "grad_norm": 0.2138671875, "learning_rate": 0.0011914249090235105, "loss": 0.6941, "step": 11068 }, { "epoch": 0.2970427221983684, "grad_norm": 0.220703125, "learning_rate": 0.001191420174226081, "loss": 0.7744, "step": 11069 }, { "epoch": 0.29706955775010735, "grad_norm": 0.203125, "learning_rate": 0.0011914154381312497, "loss": 0.6783, "step": 11070 }, { "epoch": 0.2970963933018463, "grad_norm": 0.2158203125, "learning_rate": 0.0011914107007390269, "loss": 0.6793, "step": 11071 }, { "epoch": 0.2971232288535852, "grad_norm": 0.224609375, "learning_rate": 0.0011914059620494232, "loss": 0.7984, "step": 11072 }, { "epoch": 0.29715006440532415, "grad_norm": 0.23046875, "learning_rate": 0.0011914012220624486, "loss": 0.7521, "step": 11073 }, { "epoch": 0.2971768999570631, "grad_norm": 0.2060546875, "learning_rate": 0.0011913964807781139, "loss": 0.6443, "step": 11074 }, { "epoch": 0.2972037355088021, "grad_norm": 0.224609375, "learning_rate": 0.001191391738196429, "loss": 0.7598, "step": 11075 }, { "epoch": 0.297230571060541, "grad_norm": 0.212890625, "learning_rate": 0.001191386994317405, "loss": 0.6978, "step": 11076 }, { "epoch": 0.29725740661227995, "grad_norm": 0.2158203125, "learning_rate": 0.0011913822491410517, "loss": 0.7034, "step": 11077 }, { "epoch": 0.2972842421640189, "grad_norm": 0.197265625, "learning_rate": 0.0011913775026673797, "loss": 0.7036, "step": 11078 }, { "epoch": 0.2973110777157578, "grad_norm": 0.2197265625, "learning_rate": 0.0011913727548963995, "loss": 0.8177, "step": 11079 }, { "epoch": 0.29733791326749676, "grad_norm": 0.203125, "learning_rate": 0.0011913680058281216, "loss": 0.7149, "step": 11080 }, { "epoch": 0.29736474881923575, "grad_norm": 0.2236328125, "learning_rate": 0.001191363255462556, "loss": 0.6867, "step": 11081 }, { "epoch": 0.2973915843709747, "grad_norm": 0.2216796875, "learning_rate": 0.0011913585037997138, "loss": 0.769, "step": 11082 }, { "epoch": 0.2974184199227136, "grad_norm": 0.1904296875, "learning_rate": 0.0011913537508396047, "loss": 0.6064, "step": 11083 }, { "epoch": 0.29744525547445255, "grad_norm": 0.208984375, "learning_rate": 0.0011913489965822393, "loss": 0.7203, "step": 11084 }, { "epoch": 0.2974720910261915, "grad_norm": 0.212890625, "learning_rate": 0.0011913442410276284, "loss": 0.6566, "step": 11085 }, { "epoch": 0.2974989265779304, "grad_norm": 0.212890625, "learning_rate": 0.0011913394841757824, "loss": 0.7396, "step": 11086 }, { "epoch": 0.2975257621296694, "grad_norm": 0.220703125, "learning_rate": 0.0011913347260267112, "loss": 0.7984, "step": 11087 }, { "epoch": 0.29755259768140835, "grad_norm": 0.212890625, "learning_rate": 0.0011913299665804258, "loss": 0.6841, "step": 11088 }, { "epoch": 0.2975794332331473, "grad_norm": 0.20703125, "learning_rate": 0.0011913252058369365, "loss": 0.6999, "step": 11089 }, { "epoch": 0.2976062687848862, "grad_norm": 0.216796875, "learning_rate": 0.0011913204437962535, "loss": 0.7465, "step": 11090 }, { "epoch": 0.29763310433662515, "grad_norm": 0.2236328125, "learning_rate": 0.0011913156804583874, "loss": 0.8029, "step": 11091 }, { "epoch": 0.2976599398883641, "grad_norm": 0.2060546875, "learning_rate": 0.0011913109158233487, "loss": 0.6787, "step": 11092 }, { "epoch": 0.297686775440103, "grad_norm": 0.1884765625, "learning_rate": 0.001191306149891148, "loss": 0.6358, "step": 11093 }, { "epoch": 0.297713610991842, "grad_norm": 0.201171875, "learning_rate": 0.0011913013826617953, "loss": 0.6955, "step": 11094 }, { "epoch": 0.29774044654358095, "grad_norm": 0.2119140625, "learning_rate": 0.0011912966141353015, "loss": 0.7545, "step": 11095 }, { "epoch": 0.2977672820953199, "grad_norm": 0.2216796875, "learning_rate": 0.001191291844311677, "loss": 0.7097, "step": 11096 }, { "epoch": 0.2977941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.0011912870731909317, "loss": 0.7272, "step": 11097 }, { "epoch": 0.29782095319879776, "grad_norm": 0.2041015625, "learning_rate": 0.001191282300773077, "loss": 0.7208, "step": 11098 }, { "epoch": 0.2978477887505367, "grad_norm": 0.232421875, "learning_rate": 0.0011912775270581228, "loss": 0.771, "step": 11099 }, { "epoch": 0.2978746243022757, "grad_norm": 0.22265625, "learning_rate": 0.0011912727520460792, "loss": 0.6791, "step": 11100 }, { "epoch": 0.2979014598540146, "grad_norm": 0.22265625, "learning_rate": 0.0011912679757369576, "loss": 0.7269, "step": 11101 }, { "epoch": 0.29792829540575355, "grad_norm": 0.2177734375, "learning_rate": 0.0011912631981307678, "loss": 0.7894, "step": 11102 }, { "epoch": 0.2979551309574925, "grad_norm": 0.21875, "learning_rate": 0.0011912584192275207, "loss": 0.7269, "step": 11103 }, { "epoch": 0.2979819665092314, "grad_norm": 0.2236328125, "learning_rate": 0.0011912536390272265, "loss": 0.7619, "step": 11104 }, { "epoch": 0.29800880206097036, "grad_norm": 0.2138671875, "learning_rate": 0.0011912488575298958, "loss": 0.7072, "step": 11105 }, { "epoch": 0.2980356376127093, "grad_norm": 0.216796875, "learning_rate": 0.0011912440747355389, "loss": 0.754, "step": 11106 }, { "epoch": 0.2980624731644483, "grad_norm": 0.20703125, "learning_rate": 0.0011912392906441667, "loss": 0.7008, "step": 11107 }, { "epoch": 0.2980893087161872, "grad_norm": 0.2109375, "learning_rate": 0.0011912345052557892, "loss": 0.7351, "step": 11108 }, { "epoch": 0.29811614426792615, "grad_norm": 0.21875, "learning_rate": 0.0011912297185704174, "loss": 0.718, "step": 11109 }, { "epoch": 0.2981429798196651, "grad_norm": 0.1806640625, "learning_rate": 0.0011912249305880613, "loss": 0.6027, "step": 11110 }, { "epoch": 0.298169815371404, "grad_norm": 0.212890625, "learning_rate": 0.0011912201413087316, "loss": 0.8212, "step": 11111 }, { "epoch": 0.29819665092314296, "grad_norm": 0.1962890625, "learning_rate": 0.0011912153507324392, "loss": 0.6543, "step": 11112 }, { "epoch": 0.29822348647488195, "grad_norm": 0.1953125, "learning_rate": 0.0011912105588591938, "loss": 0.6707, "step": 11113 }, { "epoch": 0.2982503220266209, "grad_norm": 0.2138671875, "learning_rate": 0.0011912057656890066, "loss": 0.7277, "step": 11114 }, { "epoch": 0.2982771575783598, "grad_norm": 0.21875, "learning_rate": 0.001191200971221888, "loss": 0.7084, "step": 11115 }, { "epoch": 0.29830399313009875, "grad_norm": 0.2138671875, "learning_rate": 0.0011911961754578484, "loss": 0.7364, "step": 11116 }, { "epoch": 0.2983308286818377, "grad_norm": 0.197265625, "learning_rate": 0.0011911913783968979, "loss": 0.6431, "step": 11117 }, { "epoch": 0.2983576642335766, "grad_norm": 0.2216796875, "learning_rate": 0.001191186580039048, "loss": 0.7381, "step": 11118 }, { "epoch": 0.29838449978531556, "grad_norm": 0.224609375, "learning_rate": 0.0011911817803843083, "loss": 0.7791, "step": 11119 }, { "epoch": 0.29841133533705455, "grad_norm": 0.23046875, "learning_rate": 0.0011911769794326899, "loss": 0.7579, "step": 11120 }, { "epoch": 0.2984381708887935, "grad_norm": 0.212890625, "learning_rate": 0.001191172177184203, "loss": 0.7721, "step": 11121 }, { "epoch": 0.2984650064405324, "grad_norm": 0.212890625, "learning_rate": 0.0011911673736388583, "loss": 0.7162, "step": 11122 }, { "epoch": 0.29849184199227136, "grad_norm": 0.2177734375, "learning_rate": 0.0011911625687966664, "loss": 0.7486, "step": 11123 }, { "epoch": 0.2985186775440103, "grad_norm": 0.2138671875, "learning_rate": 0.0011911577626576378, "loss": 0.728, "step": 11124 }, { "epoch": 0.2985455130957492, "grad_norm": 0.2275390625, "learning_rate": 0.0011911529552217828, "loss": 0.7503, "step": 11125 }, { "epoch": 0.2985723486474882, "grad_norm": 0.2099609375, "learning_rate": 0.0011911481464891122, "loss": 0.6537, "step": 11126 }, { "epoch": 0.29859918419922715, "grad_norm": 0.224609375, "learning_rate": 0.0011911433364596366, "loss": 0.7218, "step": 11127 }, { "epoch": 0.2986260197509661, "grad_norm": 0.216796875, "learning_rate": 0.0011911385251333663, "loss": 0.7223, "step": 11128 }, { "epoch": 0.298652855302705, "grad_norm": 0.2119140625, "learning_rate": 0.0011911337125103119, "loss": 0.7135, "step": 11129 }, { "epoch": 0.29867969085444396, "grad_norm": 0.212890625, "learning_rate": 0.0011911288985904842, "loss": 0.7179, "step": 11130 }, { "epoch": 0.2987065264061829, "grad_norm": 0.2255859375, "learning_rate": 0.0011911240833738935, "loss": 0.7892, "step": 11131 }, { "epoch": 0.29873336195792183, "grad_norm": 0.21484375, "learning_rate": 0.0011911192668605506, "loss": 0.7424, "step": 11132 }, { "epoch": 0.2987601975096608, "grad_norm": 0.224609375, "learning_rate": 0.001191114449050466, "loss": 0.7027, "step": 11133 }, { "epoch": 0.29878703306139975, "grad_norm": 0.21484375, "learning_rate": 0.0011911096299436499, "loss": 0.7059, "step": 11134 }, { "epoch": 0.2988138686131387, "grad_norm": 0.2255859375, "learning_rate": 0.0011911048095401137, "loss": 0.8507, "step": 11135 }, { "epoch": 0.2988407041648776, "grad_norm": 0.22265625, "learning_rate": 0.001191099987839867, "loss": 0.769, "step": 11136 }, { "epoch": 0.29886753971661656, "grad_norm": 0.2138671875, "learning_rate": 0.0011910951648429209, "loss": 0.7597, "step": 11137 }, { "epoch": 0.2988943752683555, "grad_norm": 0.2216796875, "learning_rate": 0.001191090340549286, "loss": 0.7607, "step": 11138 }, { "epoch": 0.2989212108200945, "grad_norm": 0.224609375, "learning_rate": 0.0011910855149589727, "loss": 0.798, "step": 11139 }, { "epoch": 0.2989480463718334, "grad_norm": 0.224609375, "learning_rate": 0.0011910806880719917, "loss": 0.7185, "step": 11140 }, { "epoch": 0.29897488192357236, "grad_norm": 0.2314453125, "learning_rate": 0.0011910758598883539, "loss": 0.8317, "step": 11141 }, { "epoch": 0.2990017174753113, "grad_norm": 0.2138671875, "learning_rate": 0.001191071030408069, "loss": 0.7757, "step": 11142 }, { "epoch": 0.2990285530270502, "grad_norm": 0.234375, "learning_rate": 0.0011910661996311486, "loss": 0.778, "step": 11143 }, { "epoch": 0.29905538857878916, "grad_norm": 0.2255859375, "learning_rate": 0.0011910613675576026, "loss": 0.7411, "step": 11144 }, { "epoch": 0.2990822241305281, "grad_norm": 0.1982421875, "learning_rate": 0.001191056534187442, "loss": 0.6059, "step": 11145 }, { "epoch": 0.2991090596822671, "grad_norm": 0.2060546875, "learning_rate": 0.001191051699520677, "loss": 0.7018, "step": 11146 }, { "epoch": 0.299135895234006, "grad_norm": 0.2177734375, "learning_rate": 0.0011910468635573188, "loss": 0.7424, "step": 11147 }, { "epoch": 0.29916273078574496, "grad_norm": 0.212890625, "learning_rate": 0.0011910420262973775, "loss": 0.7299, "step": 11148 }, { "epoch": 0.2991895663374839, "grad_norm": 0.2314453125, "learning_rate": 0.001191037187740864, "loss": 0.8636, "step": 11149 }, { "epoch": 0.2992164018892228, "grad_norm": 0.2275390625, "learning_rate": 0.0011910323478877885, "loss": 0.7826, "step": 11150 }, { "epoch": 0.29924323744096176, "grad_norm": 0.2197265625, "learning_rate": 0.0011910275067381622, "loss": 0.7246, "step": 11151 }, { "epoch": 0.29927007299270075, "grad_norm": 0.19921875, "learning_rate": 0.0011910226642919955, "loss": 0.6616, "step": 11152 }, { "epoch": 0.2992969085444397, "grad_norm": 0.212890625, "learning_rate": 0.0011910178205492988, "loss": 0.7324, "step": 11153 }, { "epoch": 0.2993237440961786, "grad_norm": 0.216796875, "learning_rate": 0.0011910129755100828, "loss": 0.6722, "step": 11154 }, { "epoch": 0.29935057964791756, "grad_norm": 0.21875, "learning_rate": 0.0011910081291743582, "loss": 0.7045, "step": 11155 }, { "epoch": 0.2993774151996565, "grad_norm": 0.232421875, "learning_rate": 0.0011910032815421357, "loss": 0.8097, "step": 11156 }, { "epoch": 0.29940425075139543, "grad_norm": 0.203125, "learning_rate": 0.001190998432613426, "loss": 0.6816, "step": 11157 }, { "epoch": 0.2994310863031344, "grad_norm": 0.2158203125, "learning_rate": 0.0011909935823882396, "loss": 0.7496, "step": 11158 }, { "epoch": 0.29945792185487335, "grad_norm": 0.2138671875, "learning_rate": 0.001190988730866587, "loss": 0.683, "step": 11159 }, { "epoch": 0.2994847574066123, "grad_norm": 0.224609375, "learning_rate": 0.001190983878048479, "loss": 0.7648, "step": 11160 }, { "epoch": 0.2995115929583512, "grad_norm": 0.2294921875, "learning_rate": 0.0011909790239339265, "loss": 0.8072, "step": 11161 }, { "epoch": 0.29953842851009016, "grad_norm": 0.21484375, "learning_rate": 0.0011909741685229396, "loss": 0.7353, "step": 11162 }, { "epoch": 0.2995652640618291, "grad_norm": 0.2099609375, "learning_rate": 0.0011909693118155294, "loss": 0.6846, "step": 11163 }, { "epoch": 0.29959209961356803, "grad_norm": 0.212890625, "learning_rate": 0.0011909644538117064, "loss": 0.7067, "step": 11164 }, { "epoch": 0.299618935165307, "grad_norm": 0.2041015625, "learning_rate": 0.001190959594511481, "loss": 0.5938, "step": 11165 }, { "epoch": 0.29964577071704596, "grad_norm": 0.220703125, "learning_rate": 0.0011909547339148644, "loss": 0.6969, "step": 11166 }, { "epoch": 0.2996726062687849, "grad_norm": 0.1943359375, "learning_rate": 0.0011909498720218668, "loss": 0.5792, "step": 11167 }, { "epoch": 0.2996994418205238, "grad_norm": 0.201171875, "learning_rate": 0.0011909450088324993, "loss": 0.6702, "step": 11168 }, { "epoch": 0.29972627737226276, "grad_norm": 0.228515625, "learning_rate": 0.001190940144346772, "loss": 0.7974, "step": 11169 }, { "epoch": 0.2997531129240017, "grad_norm": 0.2216796875, "learning_rate": 0.0011909352785646962, "loss": 0.8226, "step": 11170 }, { "epoch": 0.2997799484757407, "grad_norm": 0.21875, "learning_rate": 0.001190930411486282, "loss": 0.6964, "step": 11171 }, { "epoch": 0.2998067840274796, "grad_norm": 0.2119140625, "learning_rate": 0.0011909255431115403, "loss": 0.6702, "step": 11172 }, { "epoch": 0.29983361957921856, "grad_norm": 0.21875, "learning_rate": 0.001190920673440482, "loss": 0.7721, "step": 11173 }, { "epoch": 0.2998604551309575, "grad_norm": 0.220703125, "learning_rate": 0.0011909158024731175, "loss": 0.7595, "step": 11174 }, { "epoch": 0.29988729068269643, "grad_norm": 0.2333984375, "learning_rate": 0.0011909109302094575, "loss": 0.8189, "step": 11175 }, { "epoch": 0.29991412623443536, "grad_norm": 0.2109375, "learning_rate": 0.001190906056649513, "loss": 0.7092, "step": 11176 }, { "epoch": 0.2999409617861743, "grad_norm": 0.216796875, "learning_rate": 0.0011909011817932942, "loss": 0.8144, "step": 11177 }, { "epoch": 0.2999677973379133, "grad_norm": 0.21875, "learning_rate": 0.0011908963056408122, "loss": 0.7896, "step": 11178 }, { "epoch": 0.2999946328896522, "grad_norm": 0.1953125, "learning_rate": 0.0011908914281920776, "loss": 0.6369, "step": 11179 }, { "epoch": 0.30002146844139116, "grad_norm": 0.2060546875, "learning_rate": 0.0011908865494471008, "loss": 0.7045, "step": 11180 }, { "epoch": 0.3000483039931301, "grad_norm": 0.2138671875, "learning_rate": 0.001190881669405893, "loss": 0.7681, "step": 11181 }, { "epoch": 0.30007513954486903, "grad_norm": 0.2041015625, "learning_rate": 0.0011908767880684647, "loss": 0.6982, "step": 11182 }, { "epoch": 0.30010197509660796, "grad_norm": 0.2197265625, "learning_rate": 0.0011908719054348263, "loss": 0.7679, "step": 11183 }, { "epoch": 0.30012881064834696, "grad_norm": 0.201171875, "learning_rate": 0.001190867021504989, "loss": 0.6736, "step": 11184 }, { "epoch": 0.3001556462000859, "grad_norm": 0.2158203125, "learning_rate": 0.0011908621362789632, "loss": 0.6947, "step": 11185 }, { "epoch": 0.3001824817518248, "grad_norm": 0.2197265625, "learning_rate": 0.0011908572497567596, "loss": 0.743, "step": 11186 }, { "epoch": 0.30020931730356376, "grad_norm": 0.201171875, "learning_rate": 0.0011908523619383893, "loss": 0.6378, "step": 11187 }, { "epoch": 0.3002361528553027, "grad_norm": 0.2060546875, "learning_rate": 0.0011908474728238624, "loss": 0.677, "step": 11188 }, { "epoch": 0.30026298840704163, "grad_norm": 0.2177734375, "learning_rate": 0.0011908425824131903, "loss": 0.7628, "step": 11189 }, { "epoch": 0.30028982395878057, "grad_norm": 0.203125, "learning_rate": 0.0011908376907063833, "loss": 0.6802, "step": 11190 }, { "epoch": 0.30031665951051956, "grad_norm": 0.2275390625, "learning_rate": 0.001190832797703452, "loss": 0.7206, "step": 11191 }, { "epoch": 0.3003434950622585, "grad_norm": 0.2080078125, "learning_rate": 0.0011908279034044077, "loss": 0.6757, "step": 11192 }, { "epoch": 0.3003703306139974, "grad_norm": 0.2177734375, "learning_rate": 0.0011908230078092608, "loss": 0.7106, "step": 11193 }, { "epoch": 0.30039716616573636, "grad_norm": 0.2119140625, "learning_rate": 0.001190818110918022, "loss": 0.7177, "step": 11194 }, { "epoch": 0.3004240017174753, "grad_norm": 0.21875, "learning_rate": 0.001190813212730702, "loss": 0.7444, "step": 11195 }, { "epoch": 0.30045083726921423, "grad_norm": 0.2080078125, "learning_rate": 0.0011908083132473114, "loss": 0.7233, "step": 11196 }, { "epoch": 0.3004776728209532, "grad_norm": 0.2294921875, "learning_rate": 0.0011908034124678616, "loss": 0.8119, "step": 11197 }, { "epoch": 0.30050450837269216, "grad_norm": 0.216796875, "learning_rate": 0.0011907985103923626, "loss": 0.7106, "step": 11198 }, { "epoch": 0.3005313439244311, "grad_norm": 0.21484375, "learning_rate": 0.0011907936070208256, "loss": 0.7021, "step": 11199 }, { "epoch": 0.30055817947617003, "grad_norm": 0.21484375, "learning_rate": 0.0011907887023532614, "loss": 0.7279, "step": 11200 }, { "epoch": 0.30058501502790896, "grad_norm": 0.2255859375, "learning_rate": 0.0011907837963896805, "loss": 0.735, "step": 11201 }, { "epoch": 0.3006118505796479, "grad_norm": 0.2099609375, "learning_rate": 0.0011907788891300935, "loss": 0.7161, "step": 11202 }, { "epoch": 0.30063868613138683, "grad_norm": 0.1982421875, "learning_rate": 0.0011907739805745118, "loss": 0.6474, "step": 11203 }, { "epoch": 0.3006655216831258, "grad_norm": 0.1943359375, "learning_rate": 0.0011907690707229457, "loss": 0.6689, "step": 11204 }, { "epoch": 0.30069235723486476, "grad_norm": 0.21484375, "learning_rate": 0.0011907641595754058, "loss": 0.6932, "step": 11205 }, { "epoch": 0.3007191927866037, "grad_norm": 0.201171875, "learning_rate": 0.0011907592471319035, "loss": 0.6322, "step": 11206 }, { "epoch": 0.30074602833834263, "grad_norm": 0.20703125, "learning_rate": 0.001190754333392449, "loss": 0.7508, "step": 11207 }, { "epoch": 0.30077286389008157, "grad_norm": 0.21484375, "learning_rate": 0.0011907494183570535, "loss": 0.7083, "step": 11208 }, { "epoch": 0.3007996994418205, "grad_norm": 0.201171875, "learning_rate": 0.0011907445020257275, "loss": 0.6483, "step": 11209 }, { "epoch": 0.3008265349935595, "grad_norm": 0.2255859375, "learning_rate": 0.0011907395843984818, "loss": 0.8028, "step": 11210 }, { "epoch": 0.3008533705452984, "grad_norm": 0.1962890625, "learning_rate": 0.0011907346654753271, "loss": 0.6342, "step": 11211 }, { "epoch": 0.30088020609703736, "grad_norm": 0.208984375, "learning_rate": 0.0011907297452562746, "loss": 0.7272, "step": 11212 }, { "epoch": 0.3009070416487763, "grad_norm": 0.2138671875, "learning_rate": 0.0011907248237413347, "loss": 0.7592, "step": 11213 }, { "epoch": 0.30093387720051523, "grad_norm": 0.2109375, "learning_rate": 0.0011907199009305184, "loss": 0.7361, "step": 11214 }, { "epoch": 0.30096071275225417, "grad_norm": 0.1953125, "learning_rate": 0.0011907149768238363, "loss": 0.7, "step": 11215 }, { "epoch": 0.30098754830399316, "grad_norm": 0.23046875, "learning_rate": 0.0011907100514212995, "loss": 0.8149, "step": 11216 }, { "epoch": 0.3010143838557321, "grad_norm": 0.201171875, "learning_rate": 0.0011907051247229186, "loss": 0.6916, "step": 11217 }, { "epoch": 0.30104121940747103, "grad_norm": 0.2216796875, "learning_rate": 0.0011907001967287043, "loss": 0.7703, "step": 11218 }, { "epoch": 0.30106805495920996, "grad_norm": 0.2001953125, "learning_rate": 0.0011906952674386676, "loss": 0.6803, "step": 11219 }, { "epoch": 0.3010948905109489, "grad_norm": 0.216796875, "learning_rate": 0.0011906903368528194, "loss": 0.6987, "step": 11220 }, { "epoch": 0.30112172606268783, "grad_norm": 0.21484375, "learning_rate": 0.0011906854049711702, "loss": 0.6686, "step": 11221 }, { "epoch": 0.30114856161442677, "grad_norm": 0.2216796875, "learning_rate": 0.0011906804717937312, "loss": 0.7744, "step": 11222 }, { "epoch": 0.30117539716616576, "grad_norm": 0.2109375, "learning_rate": 0.001190675537320513, "loss": 0.7007, "step": 11223 }, { "epoch": 0.3012022327179047, "grad_norm": 0.2109375, "learning_rate": 0.0011906706015515263, "loss": 0.6353, "step": 11224 }, { "epoch": 0.30122906826964363, "grad_norm": 0.2236328125, "learning_rate": 0.0011906656644867823, "loss": 0.8107, "step": 11225 }, { "epoch": 0.30125590382138256, "grad_norm": 0.1982421875, "learning_rate": 0.0011906607261262913, "loss": 0.6167, "step": 11226 }, { "epoch": 0.3012827393731215, "grad_norm": 0.220703125, "learning_rate": 0.0011906557864700647, "loss": 0.7897, "step": 11227 }, { "epoch": 0.30130957492486043, "grad_norm": 0.2119140625, "learning_rate": 0.0011906508455181131, "loss": 0.7167, "step": 11228 }, { "epoch": 0.3013364104765994, "grad_norm": 0.2021484375, "learning_rate": 0.001190645903270447, "loss": 0.6823, "step": 11229 }, { "epoch": 0.30136324602833836, "grad_norm": 0.2216796875, "learning_rate": 0.001190640959727078, "loss": 0.7879, "step": 11230 }, { "epoch": 0.3013900815800773, "grad_norm": 0.2109375, "learning_rate": 0.0011906360148880163, "loss": 0.7072, "step": 11231 }, { "epoch": 0.30141691713181623, "grad_norm": 0.224609375, "learning_rate": 0.001190631068753273, "loss": 0.8193, "step": 11232 }, { "epoch": 0.30144375268355517, "grad_norm": 0.2021484375, "learning_rate": 0.0011906261213228586, "loss": 0.6699, "step": 11233 }, { "epoch": 0.3014705882352941, "grad_norm": 0.2119140625, "learning_rate": 0.0011906211725967845, "loss": 0.7272, "step": 11234 }, { "epoch": 0.30149742378703304, "grad_norm": 0.21484375, "learning_rate": 0.0011906162225750613, "loss": 0.7229, "step": 11235 }, { "epoch": 0.301524259338772, "grad_norm": 0.19921875, "learning_rate": 0.0011906112712576997, "loss": 0.6631, "step": 11236 }, { "epoch": 0.30155109489051096, "grad_norm": 0.234375, "learning_rate": 0.001190606318644711, "loss": 0.8407, "step": 11237 }, { "epoch": 0.3015779304422499, "grad_norm": 0.2099609375, "learning_rate": 0.0011906013647361058, "loss": 0.658, "step": 11238 }, { "epoch": 0.30160476599398883, "grad_norm": 0.2158203125, "learning_rate": 0.0011905964095318949, "loss": 0.7387, "step": 11239 }, { "epoch": 0.30163160154572777, "grad_norm": 0.2109375, "learning_rate": 0.001190591453032089, "loss": 0.7389, "step": 11240 }, { "epoch": 0.3016584370974667, "grad_norm": 0.2236328125, "learning_rate": 0.0011905864952366995, "loss": 0.7908, "step": 11241 }, { "epoch": 0.3016852726492057, "grad_norm": 0.2177734375, "learning_rate": 0.001190581536145737, "loss": 0.787, "step": 11242 }, { "epoch": 0.30171210820094463, "grad_norm": 0.2001953125, "learning_rate": 0.001190576575759212, "loss": 0.6634, "step": 11243 }, { "epoch": 0.30173894375268356, "grad_norm": 0.1982421875, "learning_rate": 0.0011905716140771362, "loss": 0.6859, "step": 11244 }, { "epoch": 0.3017657793044225, "grad_norm": 0.21484375, "learning_rate": 0.00119056665109952, "loss": 0.6919, "step": 11245 }, { "epoch": 0.30179261485616143, "grad_norm": 0.2119140625, "learning_rate": 0.001190561686826374, "loss": 0.7343, "step": 11246 }, { "epoch": 0.30181945040790037, "grad_norm": 0.201171875, "learning_rate": 0.0011905567212577093, "loss": 0.6627, "step": 11247 }, { "epoch": 0.3018462859596393, "grad_norm": 0.240234375, "learning_rate": 0.0011905517543935372, "loss": 0.7797, "step": 11248 }, { "epoch": 0.3018731215113783, "grad_norm": 0.236328125, "learning_rate": 0.0011905467862338682, "loss": 0.7768, "step": 11249 }, { "epoch": 0.30189995706311723, "grad_norm": 0.232421875, "learning_rate": 0.0011905418167787133, "loss": 0.8354, "step": 11250 }, { "epoch": 0.30192679261485617, "grad_norm": 0.1953125, "learning_rate": 0.0011905368460280834, "loss": 0.6229, "step": 11251 }, { "epoch": 0.3019536281665951, "grad_norm": 0.216796875, "learning_rate": 0.0011905318739819895, "loss": 0.7546, "step": 11252 }, { "epoch": 0.30198046371833404, "grad_norm": 0.20703125, "learning_rate": 0.0011905269006404423, "loss": 0.6749, "step": 11253 }, { "epoch": 0.30200729927007297, "grad_norm": 0.228515625, "learning_rate": 0.001190521926003453, "loss": 0.7157, "step": 11254 }, { "epoch": 0.30203413482181196, "grad_norm": 0.2138671875, "learning_rate": 0.001190516950071032, "loss": 0.685, "step": 11255 }, { "epoch": 0.3020609703735509, "grad_norm": 0.2119140625, "learning_rate": 0.0011905119728431908, "loss": 0.7118, "step": 11256 }, { "epoch": 0.30208780592528983, "grad_norm": 0.2119140625, "learning_rate": 0.0011905069943199398, "loss": 0.7492, "step": 11257 }, { "epoch": 0.30211464147702877, "grad_norm": 0.212890625, "learning_rate": 0.0011905020145012903, "loss": 0.7673, "step": 11258 }, { "epoch": 0.3021414770287677, "grad_norm": 0.21484375, "learning_rate": 0.0011904970333872532, "loss": 0.6504, "step": 11259 }, { "epoch": 0.30216831258050664, "grad_norm": 0.2177734375, "learning_rate": 0.0011904920509778392, "loss": 0.7349, "step": 11260 }, { "epoch": 0.3021951481322456, "grad_norm": 0.21484375, "learning_rate": 0.0011904870672730596, "loss": 0.6912, "step": 11261 }, { "epoch": 0.30222198368398456, "grad_norm": 0.236328125, "learning_rate": 0.0011904820822729249, "loss": 0.8666, "step": 11262 }, { "epoch": 0.3022488192357235, "grad_norm": 0.2109375, "learning_rate": 0.0011904770959774462, "loss": 0.7036, "step": 11263 }, { "epoch": 0.30227565478746243, "grad_norm": 0.2099609375, "learning_rate": 0.0011904721083866347, "loss": 0.6787, "step": 11264 }, { "epoch": 0.30230249033920137, "grad_norm": 0.2333984375, "learning_rate": 0.0011904671195005008, "loss": 0.7598, "step": 11265 }, { "epoch": 0.3023293258909403, "grad_norm": 0.216796875, "learning_rate": 0.001190462129319056, "loss": 0.6814, "step": 11266 }, { "epoch": 0.30235616144267924, "grad_norm": 0.216796875, "learning_rate": 0.001190457137842311, "loss": 0.7418, "step": 11267 }, { "epoch": 0.30238299699441823, "grad_norm": 0.212890625, "learning_rate": 0.0011904521450702768, "loss": 0.6754, "step": 11268 }, { "epoch": 0.30240983254615716, "grad_norm": 0.2099609375, "learning_rate": 0.0011904471510029641, "loss": 0.7166, "step": 11269 }, { "epoch": 0.3024366680978961, "grad_norm": 0.2197265625, "learning_rate": 0.0011904421556403842, "loss": 0.753, "step": 11270 }, { "epoch": 0.30246350364963503, "grad_norm": 0.2158203125, "learning_rate": 0.001190437158982548, "loss": 0.7842, "step": 11271 }, { "epoch": 0.30249033920137397, "grad_norm": 0.2158203125, "learning_rate": 0.0011904321610294662, "loss": 0.7389, "step": 11272 }, { "epoch": 0.3025171747531129, "grad_norm": 0.234375, "learning_rate": 0.00119042716178115, "loss": 0.8364, "step": 11273 }, { "epoch": 0.30254401030485184, "grad_norm": 0.2216796875, "learning_rate": 0.0011904221612376103, "loss": 0.7099, "step": 11274 }, { "epoch": 0.30257084585659083, "grad_norm": 0.2021484375, "learning_rate": 0.001190417159398858, "loss": 0.636, "step": 11275 }, { "epoch": 0.30259768140832977, "grad_norm": 0.21484375, "learning_rate": 0.0011904121562649045, "loss": 0.7094, "step": 11276 }, { "epoch": 0.3026245169600687, "grad_norm": 0.22265625, "learning_rate": 0.00119040715183576, "loss": 0.8473, "step": 11277 }, { "epoch": 0.30265135251180764, "grad_norm": 0.21484375, "learning_rate": 0.0011904021461114362, "loss": 0.7646, "step": 11278 }, { "epoch": 0.30267818806354657, "grad_norm": 0.2158203125, "learning_rate": 0.0011903971390919436, "loss": 0.7539, "step": 11279 }, { "epoch": 0.3027050236152855, "grad_norm": 0.2119140625, "learning_rate": 0.0011903921307772937, "loss": 0.6951, "step": 11280 }, { "epoch": 0.3027318591670245, "grad_norm": 0.205078125, "learning_rate": 0.0011903871211674968, "loss": 0.7372, "step": 11281 }, { "epoch": 0.30275869471876343, "grad_norm": 0.2158203125, "learning_rate": 0.0011903821102625643, "loss": 0.8019, "step": 11282 }, { "epoch": 0.30278553027050237, "grad_norm": 0.2138671875, "learning_rate": 0.0011903770980625074, "loss": 0.7196, "step": 11283 }, { "epoch": 0.3028123658222413, "grad_norm": 0.21875, "learning_rate": 0.0011903720845673365, "loss": 0.7338, "step": 11284 }, { "epoch": 0.30283920137398024, "grad_norm": 0.2294921875, "learning_rate": 0.0011903670697770632, "loss": 0.7803, "step": 11285 }, { "epoch": 0.3028660369257192, "grad_norm": 0.20703125, "learning_rate": 0.0011903620536916982, "loss": 0.6983, "step": 11286 }, { "epoch": 0.30289287247745816, "grad_norm": 0.205078125, "learning_rate": 0.0011903570363112524, "loss": 0.7315, "step": 11287 }, { "epoch": 0.3029197080291971, "grad_norm": 0.1953125, "learning_rate": 0.001190352017635737, "loss": 0.6597, "step": 11288 }, { "epoch": 0.30294654358093603, "grad_norm": 0.212890625, "learning_rate": 0.001190346997665163, "loss": 0.7578, "step": 11289 }, { "epoch": 0.30297337913267497, "grad_norm": 0.212890625, "learning_rate": 0.0011903419763995414, "loss": 0.783, "step": 11290 }, { "epoch": 0.3030002146844139, "grad_norm": 0.2216796875, "learning_rate": 0.001190336953838883, "loss": 0.7644, "step": 11291 }, { "epoch": 0.30302705023615284, "grad_norm": 0.20703125, "learning_rate": 0.0011903319299831992, "loss": 0.7055, "step": 11292 }, { "epoch": 0.3030538857878918, "grad_norm": 0.208984375, "learning_rate": 0.0011903269048325008, "loss": 0.7438, "step": 11293 }, { "epoch": 0.30308072133963077, "grad_norm": 0.20703125, "learning_rate": 0.0011903218783867989, "loss": 0.6712, "step": 11294 }, { "epoch": 0.3031075568913697, "grad_norm": 0.232421875, "learning_rate": 0.0011903168506461044, "loss": 0.7938, "step": 11295 }, { "epoch": 0.30313439244310864, "grad_norm": 0.283203125, "learning_rate": 0.0011903118216104282, "loss": 0.8426, "step": 11296 }, { "epoch": 0.30316122799484757, "grad_norm": 0.3125, "learning_rate": 0.0011903067912797818, "loss": 0.9576, "step": 11297 }, { "epoch": 0.3031880635465865, "grad_norm": 0.298828125, "learning_rate": 0.0011903017596541759, "loss": 0.8936, "step": 11298 }, { "epoch": 0.30321489909832544, "grad_norm": 0.263671875, "learning_rate": 0.0011902967267336216, "loss": 0.849, "step": 11299 }, { "epoch": 0.30324173465006443, "grad_norm": 0.40234375, "learning_rate": 0.0011902916925181299, "loss": 0.7944, "step": 11300 }, { "epoch": 0.30326857020180337, "grad_norm": 0.2890625, "learning_rate": 0.001190286657007712, "loss": 0.7135, "step": 11301 }, { "epoch": 0.3032954057535423, "grad_norm": 0.291015625, "learning_rate": 0.0011902816202023789, "loss": 0.966, "step": 11302 }, { "epoch": 0.30332224130528124, "grad_norm": 0.2490234375, "learning_rate": 0.0011902765821021413, "loss": 0.8192, "step": 11303 }, { "epoch": 0.3033490768570202, "grad_norm": 0.244140625, "learning_rate": 0.0011902715427070107, "loss": 0.7887, "step": 11304 }, { "epoch": 0.3033759124087591, "grad_norm": 0.25390625, "learning_rate": 0.0011902665020169982, "loss": 0.917, "step": 11305 }, { "epoch": 0.30340274796049804, "grad_norm": 0.244140625, "learning_rate": 0.0011902614600321144, "loss": 0.8357, "step": 11306 }, { "epoch": 0.30342958351223703, "grad_norm": 0.248046875, "learning_rate": 0.0011902564167523707, "loss": 0.8518, "step": 11307 }, { "epoch": 0.30345641906397597, "grad_norm": 0.2265625, "learning_rate": 0.001190251372177778, "loss": 0.7988, "step": 11308 }, { "epoch": 0.3034832546157149, "grad_norm": 0.234375, "learning_rate": 0.0011902463263083476, "loss": 0.8526, "step": 11309 }, { "epoch": 0.30351009016745384, "grad_norm": 0.2353515625, "learning_rate": 0.0011902412791440903, "loss": 0.7906, "step": 11310 }, { "epoch": 0.3035369257191928, "grad_norm": 0.259765625, "learning_rate": 0.0011902362306850172, "loss": 0.9613, "step": 11311 }, { "epoch": 0.3035637612709317, "grad_norm": 0.2421875, "learning_rate": 0.0011902311809311396, "loss": 0.8995, "step": 11312 }, { "epoch": 0.3035905968226707, "grad_norm": 0.21875, "learning_rate": 0.0011902261298824682, "loss": 0.7556, "step": 11313 }, { "epoch": 0.30361743237440963, "grad_norm": 0.2353515625, "learning_rate": 0.0011902210775390147, "loss": 0.8563, "step": 11314 }, { "epoch": 0.30364426792614857, "grad_norm": 0.2216796875, "learning_rate": 0.0011902160239007896, "loss": 0.8401, "step": 11315 }, { "epoch": 0.3036711034778875, "grad_norm": 0.2255859375, "learning_rate": 0.0011902109689678041, "loss": 0.7797, "step": 11316 }, { "epoch": 0.30369793902962644, "grad_norm": 0.2451171875, "learning_rate": 0.0011902059127400697, "loss": 0.9013, "step": 11317 }, { "epoch": 0.3037247745813654, "grad_norm": 0.2138671875, "learning_rate": 0.0011902008552175968, "loss": 0.7172, "step": 11318 }, { "epoch": 0.3037516101331043, "grad_norm": 0.208984375, "learning_rate": 0.001190195796400397, "loss": 0.713, "step": 11319 }, { "epoch": 0.3037784456848433, "grad_norm": 0.2265625, "learning_rate": 0.0011901907362884814, "loss": 0.785, "step": 11320 }, { "epoch": 0.30380528123658224, "grad_norm": 0.232421875, "learning_rate": 0.0011901856748818608, "loss": 0.836, "step": 11321 }, { "epoch": 0.30383211678832117, "grad_norm": 0.21875, "learning_rate": 0.0011901806121805466, "loss": 0.8503, "step": 11322 }, { "epoch": 0.3038589523400601, "grad_norm": 0.2353515625, "learning_rate": 0.0011901755481845495, "loss": 0.8698, "step": 11323 }, { "epoch": 0.30388578789179904, "grad_norm": 0.2197265625, "learning_rate": 0.001190170482893881, "loss": 0.7892, "step": 11324 }, { "epoch": 0.303912623443538, "grad_norm": 0.236328125, "learning_rate": 0.0011901654163085523, "loss": 0.839, "step": 11325 }, { "epoch": 0.30393945899527697, "grad_norm": 0.21875, "learning_rate": 0.0011901603484285742, "loss": 0.773, "step": 11326 }, { "epoch": 0.3039662945470159, "grad_norm": 0.2255859375, "learning_rate": 0.0011901552792539579, "loss": 0.7451, "step": 11327 }, { "epoch": 0.30399313009875484, "grad_norm": 0.2294921875, "learning_rate": 0.0011901502087847144, "loss": 0.8489, "step": 11328 }, { "epoch": 0.3040199656504938, "grad_norm": 0.244140625, "learning_rate": 0.001190145137020855, "loss": 0.9307, "step": 11329 }, { "epoch": 0.3040468012022327, "grad_norm": 0.22265625, "learning_rate": 0.0011901400639623908, "loss": 0.7641, "step": 11330 }, { "epoch": 0.30407363675397164, "grad_norm": 0.2265625, "learning_rate": 0.001190134989609333, "loss": 0.8127, "step": 11331 }, { "epoch": 0.3041004723057106, "grad_norm": 0.22265625, "learning_rate": 0.0011901299139616928, "loss": 0.7812, "step": 11332 }, { "epoch": 0.30412730785744957, "grad_norm": 0.2236328125, "learning_rate": 0.001190124837019481, "loss": 0.7805, "step": 11333 }, { "epoch": 0.3041541434091885, "grad_norm": 0.2421875, "learning_rate": 0.0011901197587827089, "loss": 0.7921, "step": 11334 }, { "epoch": 0.30418097896092744, "grad_norm": 0.2138671875, "learning_rate": 0.001190114679251388, "loss": 0.7249, "step": 11335 }, { "epoch": 0.3042078145126664, "grad_norm": 0.2236328125, "learning_rate": 0.0011901095984255285, "loss": 0.7936, "step": 11336 }, { "epoch": 0.3042346500644053, "grad_norm": 0.2265625, "learning_rate": 0.0011901045163051427, "loss": 0.7953, "step": 11337 }, { "epoch": 0.30426148561614424, "grad_norm": 0.2392578125, "learning_rate": 0.0011900994328902408, "loss": 0.809, "step": 11338 }, { "epoch": 0.30428832116788324, "grad_norm": 0.2470703125, "learning_rate": 0.0011900943481808345, "loss": 0.865, "step": 11339 }, { "epoch": 0.30431515671962217, "grad_norm": 0.2236328125, "learning_rate": 0.001190089262176935, "loss": 0.7911, "step": 11340 }, { "epoch": 0.3043419922713611, "grad_norm": 0.2470703125, "learning_rate": 0.0011900841748785532, "loss": 0.9813, "step": 11341 }, { "epoch": 0.30436882782310004, "grad_norm": 0.2373046875, "learning_rate": 0.0011900790862857003, "loss": 0.8609, "step": 11342 }, { "epoch": 0.304395663374839, "grad_norm": 0.19921875, "learning_rate": 0.0011900739963983872, "loss": 0.7126, "step": 11343 }, { "epoch": 0.3044224989265779, "grad_norm": 0.2314453125, "learning_rate": 0.0011900689052166257, "loss": 0.8591, "step": 11344 }, { "epoch": 0.30444933447831685, "grad_norm": 0.228515625, "learning_rate": 0.0011900638127404265, "loss": 0.8223, "step": 11345 }, { "epoch": 0.30447617003005584, "grad_norm": 0.2236328125, "learning_rate": 0.001190058718969801, "loss": 0.7689, "step": 11346 }, { "epoch": 0.3045030055817948, "grad_norm": 0.21484375, "learning_rate": 0.00119005362390476, "loss": 0.7414, "step": 11347 }, { "epoch": 0.3045298411335337, "grad_norm": 0.2275390625, "learning_rate": 0.0011900485275453151, "loss": 0.8438, "step": 11348 }, { "epoch": 0.30455667668527264, "grad_norm": 0.2138671875, "learning_rate": 0.0011900434298914775, "loss": 0.7708, "step": 11349 }, { "epoch": 0.3045835122370116, "grad_norm": 0.2265625, "learning_rate": 0.001190038330943258, "loss": 0.7221, "step": 11350 }, { "epoch": 0.3046103477887505, "grad_norm": 0.2265625, "learning_rate": 0.001190033230700668, "loss": 0.8428, "step": 11351 }, { "epoch": 0.3046371833404895, "grad_norm": 0.2421875, "learning_rate": 0.0011900281291637189, "loss": 0.9159, "step": 11352 }, { "epoch": 0.30466401889222844, "grad_norm": 0.23046875, "learning_rate": 0.0011900230263324214, "loss": 0.8522, "step": 11353 }, { "epoch": 0.3046908544439674, "grad_norm": 0.2275390625, "learning_rate": 0.001190017922206787, "loss": 0.8199, "step": 11354 }, { "epoch": 0.3047176899957063, "grad_norm": 0.2333984375, "learning_rate": 0.001190012816786827, "loss": 0.8448, "step": 11355 }, { "epoch": 0.30474452554744524, "grad_norm": 0.2080078125, "learning_rate": 0.0011900077100725523, "loss": 0.694, "step": 11356 }, { "epoch": 0.3047713610991842, "grad_norm": 0.244140625, "learning_rate": 0.0011900026020639745, "loss": 0.9446, "step": 11357 }, { "epoch": 0.30479819665092317, "grad_norm": 0.2333984375, "learning_rate": 0.0011899974927611044, "loss": 0.8246, "step": 11358 }, { "epoch": 0.3048250322026621, "grad_norm": 0.2451171875, "learning_rate": 0.0011899923821639534, "loss": 0.8985, "step": 11359 }, { "epoch": 0.30485186775440104, "grad_norm": 0.224609375, "learning_rate": 0.0011899872702725326, "loss": 0.8019, "step": 11360 }, { "epoch": 0.30487870330614, "grad_norm": 0.2333984375, "learning_rate": 0.0011899821570868535, "loss": 0.9132, "step": 11361 }, { "epoch": 0.3049055388578789, "grad_norm": 0.2294921875, "learning_rate": 0.0011899770426069271, "loss": 0.8584, "step": 11362 }, { "epoch": 0.30493237440961785, "grad_norm": 0.2236328125, "learning_rate": 0.0011899719268327646, "loss": 0.8521, "step": 11363 }, { "epoch": 0.3049592099613568, "grad_norm": 0.2099609375, "learning_rate": 0.0011899668097643773, "loss": 0.6932, "step": 11364 }, { "epoch": 0.30498604551309577, "grad_norm": 0.212890625, "learning_rate": 0.0011899616914017761, "loss": 0.7288, "step": 11365 }, { "epoch": 0.3050128810648347, "grad_norm": 0.21875, "learning_rate": 0.0011899565717449728, "loss": 0.8646, "step": 11366 }, { "epoch": 0.30503971661657364, "grad_norm": 0.2265625, "learning_rate": 0.0011899514507939784, "loss": 0.8306, "step": 11367 }, { "epoch": 0.3050665521683126, "grad_norm": 0.220703125, "learning_rate": 0.001189946328548804, "loss": 0.8031, "step": 11368 }, { "epoch": 0.3050933877200515, "grad_norm": 0.220703125, "learning_rate": 0.001189941205009461, "loss": 0.7355, "step": 11369 }, { "epoch": 0.30512022327179045, "grad_norm": 0.2294921875, "learning_rate": 0.0011899360801759602, "loss": 0.8667, "step": 11370 }, { "epoch": 0.30514705882352944, "grad_norm": 0.2294921875, "learning_rate": 0.0011899309540483137, "loss": 0.8075, "step": 11371 }, { "epoch": 0.3051738943752684, "grad_norm": 0.2294921875, "learning_rate": 0.001189925826626532, "loss": 0.8361, "step": 11372 }, { "epoch": 0.3052007299270073, "grad_norm": 0.21875, "learning_rate": 0.0011899206979106266, "loss": 0.7363, "step": 11373 }, { "epoch": 0.30522756547874624, "grad_norm": 0.232421875, "learning_rate": 0.0011899155679006086, "loss": 0.8558, "step": 11374 }, { "epoch": 0.3052544010304852, "grad_norm": 0.2373046875, "learning_rate": 0.0011899104365964898, "loss": 0.883, "step": 11375 }, { "epoch": 0.3052812365822241, "grad_norm": 0.228515625, "learning_rate": 0.0011899053039982808, "loss": 0.817, "step": 11376 }, { "epoch": 0.30530807213396305, "grad_norm": 0.228515625, "learning_rate": 0.0011899001701059931, "loss": 0.7775, "step": 11377 }, { "epoch": 0.30533490768570204, "grad_norm": 0.2158203125, "learning_rate": 0.001189895034919638, "loss": 0.7597, "step": 11378 }, { "epoch": 0.305361743237441, "grad_norm": 0.2158203125, "learning_rate": 0.0011898898984392266, "loss": 0.7478, "step": 11379 }, { "epoch": 0.3053885787891799, "grad_norm": 0.23046875, "learning_rate": 0.0011898847606647705, "loss": 0.7933, "step": 11380 }, { "epoch": 0.30541541434091884, "grad_norm": 0.2109375, "learning_rate": 0.001189879621596281, "loss": 0.7645, "step": 11381 }, { "epoch": 0.3054422498926578, "grad_norm": 0.2265625, "learning_rate": 0.0011898744812337688, "loss": 0.815, "step": 11382 }, { "epoch": 0.3054690854443967, "grad_norm": 0.2177734375, "learning_rate": 0.0011898693395772455, "loss": 0.7537, "step": 11383 }, { "epoch": 0.3054959209961357, "grad_norm": 0.2265625, "learning_rate": 0.0011898641966267226, "loss": 0.7508, "step": 11384 }, { "epoch": 0.30552275654787464, "grad_norm": 0.2255859375, "learning_rate": 0.0011898590523822111, "loss": 0.8756, "step": 11385 }, { "epoch": 0.3055495920996136, "grad_norm": 0.22265625, "learning_rate": 0.0011898539068437223, "loss": 0.8208, "step": 11386 }, { "epoch": 0.3055764276513525, "grad_norm": 0.228515625, "learning_rate": 0.0011898487600112678, "loss": 0.8531, "step": 11387 }, { "epoch": 0.30560326320309145, "grad_norm": 0.224609375, "learning_rate": 0.0011898436118848586, "loss": 0.8239, "step": 11388 }, { "epoch": 0.3056300987548304, "grad_norm": 0.2236328125, "learning_rate": 0.001189838462464506, "loss": 0.7473, "step": 11389 }, { "epoch": 0.3056569343065693, "grad_norm": 0.2392578125, "learning_rate": 0.0011898333117502213, "loss": 0.8341, "step": 11390 }, { "epoch": 0.3056837698583083, "grad_norm": 0.23828125, "learning_rate": 0.0011898281597420158, "loss": 0.9069, "step": 11391 }, { "epoch": 0.30571060541004724, "grad_norm": 0.205078125, "learning_rate": 0.001189823006439901, "loss": 0.7753, "step": 11392 }, { "epoch": 0.3057374409617862, "grad_norm": 0.2265625, "learning_rate": 0.0011898178518438879, "loss": 0.8339, "step": 11393 }, { "epoch": 0.3057642765135251, "grad_norm": 0.2265625, "learning_rate": 0.0011898126959539882, "loss": 0.7773, "step": 11394 }, { "epoch": 0.30579111206526405, "grad_norm": 0.2255859375, "learning_rate": 0.0011898075387702127, "loss": 0.8006, "step": 11395 }, { "epoch": 0.305817947617003, "grad_norm": 0.2451171875, "learning_rate": 0.001189802380292573, "loss": 1.0309, "step": 11396 }, { "epoch": 0.305844783168742, "grad_norm": 0.216796875, "learning_rate": 0.0011897972205210806, "loss": 0.7755, "step": 11397 }, { "epoch": 0.3058716187204809, "grad_norm": 0.2275390625, "learning_rate": 0.0011897920594557466, "loss": 0.8362, "step": 11398 }, { "epoch": 0.30589845427221984, "grad_norm": 0.236328125, "learning_rate": 0.0011897868970965823, "loss": 0.8562, "step": 11399 }, { "epoch": 0.3059252898239588, "grad_norm": 0.2265625, "learning_rate": 0.0011897817334435988, "loss": 0.8628, "step": 11400 }, { "epoch": 0.3059521253756977, "grad_norm": 0.2109375, "learning_rate": 0.001189776568496808, "loss": 0.7734, "step": 11401 }, { "epoch": 0.30597896092743665, "grad_norm": 0.23828125, "learning_rate": 0.0011897714022562208, "loss": 0.8828, "step": 11402 }, { "epoch": 0.3060057964791756, "grad_norm": 0.2373046875, "learning_rate": 0.0011897662347218486, "loss": 0.8162, "step": 11403 }, { "epoch": 0.3060326320309146, "grad_norm": 0.2294921875, "learning_rate": 0.001189761065893703, "loss": 0.8365, "step": 11404 }, { "epoch": 0.3060594675826535, "grad_norm": 0.2255859375, "learning_rate": 0.0011897558957717949, "loss": 0.8497, "step": 11405 }, { "epoch": 0.30608630313439245, "grad_norm": 0.220703125, "learning_rate": 0.001189750724356136, "loss": 0.8041, "step": 11406 }, { "epoch": 0.3061131386861314, "grad_norm": 0.2373046875, "learning_rate": 0.0011897455516467377, "loss": 0.9201, "step": 11407 }, { "epoch": 0.3061399742378703, "grad_norm": 0.2109375, "learning_rate": 0.001189740377643611, "loss": 0.7284, "step": 11408 }, { "epoch": 0.30616680978960925, "grad_norm": 0.22265625, "learning_rate": 0.0011897352023467673, "loss": 0.9089, "step": 11409 }, { "epoch": 0.30619364534134824, "grad_norm": 0.2197265625, "learning_rate": 0.001189730025756218, "loss": 0.7562, "step": 11410 }, { "epoch": 0.3062204808930872, "grad_norm": 0.2333984375, "learning_rate": 0.0011897248478719747, "loss": 0.845, "step": 11411 }, { "epoch": 0.3062473164448261, "grad_norm": 0.228515625, "learning_rate": 0.0011897196686940488, "loss": 0.8806, "step": 11412 }, { "epoch": 0.30627415199656505, "grad_norm": 0.2236328125, "learning_rate": 0.001189714488222451, "loss": 0.8313, "step": 11413 }, { "epoch": 0.306300987548304, "grad_norm": 0.220703125, "learning_rate": 0.0011897093064571936, "loss": 0.7558, "step": 11414 }, { "epoch": 0.3063278231000429, "grad_norm": 0.21484375, "learning_rate": 0.001189704123398287, "loss": 0.7613, "step": 11415 }, { "epoch": 0.3063546586517819, "grad_norm": 0.2333984375, "learning_rate": 0.0011896989390457432, "loss": 0.8919, "step": 11416 }, { "epoch": 0.30638149420352084, "grad_norm": 0.21484375, "learning_rate": 0.0011896937533995735, "loss": 0.7916, "step": 11417 }, { "epoch": 0.3064083297552598, "grad_norm": 0.2265625, "learning_rate": 0.0011896885664597892, "loss": 0.792, "step": 11418 }, { "epoch": 0.3064351653069987, "grad_norm": 0.2333984375, "learning_rate": 0.0011896833782264016, "loss": 0.8515, "step": 11419 }, { "epoch": 0.30646200085873765, "grad_norm": 0.2294921875, "learning_rate": 0.0011896781886994222, "loss": 0.7858, "step": 11420 }, { "epoch": 0.3064888364104766, "grad_norm": 0.205078125, "learning_rate": 0.0011896729978788623, "loss": 0.6927, "step": 11421 }, { "epoch": 0.3065156719622155, "grad_norm": 0.2080078125, "learning_rate": 0.0011896678057647333, "loss": 0.7261, "step": 11422 }, { "epoch": 0.3065425075139545, "grad_norm": 0.2138671875, "learning_rate": 0.0011896626123570468, "loss": 0.749, "step": 11423 }, { "epoch": 0.30656934306569344, "grad_norm": 0.2470703125, "learning_rate": 0.0011896574176558138, "loss": 0.988, "step": 11424 }, { "epoch": 0.3065961786174324, "grad_norm": 0.2275390625, "learning_rate": 0.001189652221661046, "loss": 0.7634, "step": 11425 }, { "epoch": 0.3066230141691713, "grad_norm": 0.2177734375, "learning_rate": 0.0011896470243727547, "loss": 0.7871, "step": 11426 }, { "epoch": 0.30664984972091025, "grad_norm": 0.2197265625, "learning_rate": 0.0011896418257909514, "loss": 0.7817, "step": 11427 }, { "epoch": 0.3066766852726492, "grad_norm": 0.244140625, "learning_rate": 0.0011896366259156472, "loss": 0.9554, "step": 11428 }, { "epoch": 0.3067035208243882, "grad_norm": 0.2255859375, "learning_rate": 0.0011896314247468538, "loss": 0.8187, "step": 11429 }, { "epoch": 0.3067303563761271, "grad_norm": 0.20703125, "learning_rate": 0.0011896262222845827, "loss": 0.7142, "step": 11430 }, { "epoch": 0.30675719192786605, "grad_norm": 0.205078125, "learning_rate": 0.0011896210185288448, "loss": 0.7322, "step": 11431 }, { "epoch": 0.306784027479605, "grad_norm": 0.234375, "learning_rate": 0.0011896158134796521, "loss": 0.8595, "step": 11432 }, { "epoch": 0.3068108630313439, "grad_norm": 0.2431640625, "learning_rate": 0.0011896106071370158, "loss": 0.9239, "step": 11433 }, { "epoch": 0.30683769858308285, "grad_norm": 0.2333984375, "learning_rate": 0.0011896053995009471, "loss": 0.8112, "step": 11434 }, { "epoch": 0.3068645341348218, "grad_norm": 0.205078125, "learning_rate": 0.0011896001905714578, "loss": 0.727, "step": 11435 }, { "epoch": 0.3068913696865608, "grad_norm": 0.21875, "learning_rate": 0.001189594980348559, "loss": 0.7751, "step": 11436 }, { "epoch": 0.3069182052382997, "grad_norm": 0.232421875, "learning_rate": 0.0011895897688322624, "loss": 0.8234, "step": 11437 }, { "epoch": 0.30694504079003865, "grad_norm": 0.224609375, "learning_rate": 0.0011895845560225794, "loss": 0.7904, "step": 11438 }, { "epoch": 0.3069718763417776, "grad_norm": 0.244140625, "learning_rate": 0.001189579341919521, "loss": 0.8549, "step": 11439 }, { "epoch": 0.3069987118935165, "grad_norm": 0.2099609375, "learning_rate": 0.0011895741265230992, "loss": 0.7376, "step": 11440 }, { "epoch": 0.30702554744525545, "grad_norm": 0.22265625, "learning_rate": 0.0011895689098333252, "loss": 0.7571, "step": 11441 }, { "epoch": 0.30705238299699444, "grad_norm": 0.189453125, "learning_rate": 0.0011895636918502106, "loss": 0.6089, "step": 11442 }, { "epoch": 0.3070792185487334, "grad_norm": 0.2080078125, "learning_rate": 0.0011895584725737664, "loss": 0.7098, "step": 11443 }, { "epoch": 0.3071060541004723, "grad_norm": 0.2236328125, "learning_rate": 0.0011895532520040046, "loss": 0.8118, "step": 11444 }, { "epoch": 0.30713288965221125, "grad_norm": 0.216796875, "learning_rate": 0.0011895480301409363, "loss": 0.7837, "step": 11445 }, { "epoch": 0.3071597252039502, "grad_norm": 0.2138671875, "learning_rate": 0.0011895428069845732, "loss": 0.7237, "step": 11446 }, { "epoch": 0.3071865607556891, "grad_norm": 0.2216796875, "learning_rate": 0.0011895375825349266, "loss": 0.8087, "step": 11447 }, { "epoch": 0.30721339630742805, "grad_norm": 0.21875, "learning_rate": 0.001189532356792008, "loss": 0.7617, "step": 11448 }, { "epoch": 0.30724023185916705, "grad_norm": 0.2265625, "learning_rate": 0.0011895271297558286, "loss": 0.7758, "step": 11449 }, { "epoch": 0.307267067410906, "grad_norm": 0.2216796875, "learning_rate": 0.0011895219014264003, "loss": 0.7922, "step": 11450 }, { "epoch": 0.3072939029626449, "grad_norm": 0.2197265625, "learning_rate": 0.0011895166718037344, "loss": 0.7896, "step": 11451 }, { "epoch": 0.30732073851438385, "grad_norm": 0.2412109375, "learning_rate": 0.0011895114408878424, "loss": 0.8919, "step": 11452 }, { "epoch": 0.3073475740661228, "grad_norm": 0.2080078125, "learning_rate": 0.0011895062086787359, "loss": 0.7394, "step": 11453 }, { "epoch": 0.3073744096178617, "grad_norm": 0.2197265625, "learning_rate": 0.001189500975176426, "loss": 0.7615, "step": 11454 }, { "epoch": 0.3074012451696007, "grad_norm": 0.2236328125, "learning_rate": 0.0011894957403809242, "loss": 0.8124, "step": 11455 }, { "epoch": 0.30742808072133965, "grad_norm": 0.224609375, "learning_rate": 0.0011894905042922424, "loss": 0.7652, "step": 11456 }, { "epoch": 0.3074549162730786, "grad_norm": 0.21875, "learning_rate": 0.001189485266910392, "loss": 0.777, "step": 11457 }, { "epoch": 0.3074817518248175, "grad_norm": 0.2236328125, "learning_rate": 0.0011894800282353842, "loss": 0.7817, "step": 11458 }, { "epoch": 0.30750858737655645, "grad_norm": 0.22265625, "learning_rate": 0.0011894747882672306, "loss": 0.7984, "step": 11459 }, { "epoch": 0.3075354229282954, "grad_norm": 0.2294921875, "learning_rate": 0.0011894695470059428, "loss": 0.8177, "step": 11460 }, { "epoch": 0.3075622584800343, "grad_norm": 0.2314453125, "learning_rate": 0.0011894643044515322, "loss": 0.8293, "step": 11461 }, { "epoch": 0.3075890940317733, "grad_norm": 0.2255859375, "learning_rate": 0.0011894590606040102, "loss": 0.9161, "step": 11462 }, { "epoch": 0.30761592958351225, "grad_norm": 0.2314453125, "learning_rate": 0.0011894538154633887, "loss": 0.8154, "step": 11463 }, { "epoch": 0.3076427651352512, "grad_norm": 0.2216796875, "learning_rate": 0.001189448569029679, "loss": 0.764, "step": 11464 }, { "epoch": 0.3076696006869901, "grad_norm": 0.22265625, "learning_rate": 0.0011894433213028922, "loss": 0.8251, "step": 11465 }, { "epoch": 0.30769643623872905, "grad_norm": 0.2216796875, "learning_rate": 0.0011894380722830406, "loss": 0.822, "step": 11466 }, { "epoch": 0.307723271790468, "grad_norm": 0.228515625, "learning_rate": 0.0011894328219701348, "loss": 0.8577, "step": 11467 }, { "epoch": 0.307750107342207, "grad_norm": 0.21875, "learning_rate": 0.0011894275703641871, "loss": 0.8047, "step": 11468 }, { "epoch": 0.3077769428939459, "grad_norm": 0.224609375, "learning_rate": 0.0011894223174652087, "loss": 0.8728, "step": 11469 }, { "epoch": 0.30780377844568485, "grad_norm": 0.2392578125, "learning_rate": 0.0011894170632732112, "loss": 0.954, "step": 11470 }, { "epoch": 0.3078306139974238, "grad_norm": 0.201171875, "learning_rate": 0.001189411807788206, "loss": 0.6593, "step": 11471 }, { "epoch": 0.3078574495491627, "grad_norm": 0.228515625, "learning_rate": 0.0011894065510102048, "loss": 0.841, "step": 11472 }, { "epoch": 0.30788428510090166, "grad_norm": 0.2138671875, "learning_rate": 0.001189401292939219, "loss": 0.7385, "step": 11473 }, { "epoch": 0.3079111206526406, "grad_norm": 0.2314453125, "learning_rate": 0.0011893960335752599, "loss": 0.9342, "step": 11474 }, { "epoch": 0.3079379562043796, "grad_norm": 0.2294921875, "learning_rate": 0.0011893907729183396, "loss": 0.8316, "step": 11475 }, { "epoch": 0.3079647917561185, "grad_norm": 0.224609375, "learning_rate": 0.0011893855109684692, "loss": 0.8681, "step": 11476 }, { "epoch": 0.30799162730785745, "grad_norm": 0.2119140625, "learning_rate": 0.0011893802477256606, "loss": 0.7487, "step": 11477 }, { "epoch": 0.3080184628595964, "grad_norm": 0.220703125, "learning_rate": 0.0011893749831899249, "loss": 0.8309, "step": 11478 }, { "epoch": 0.3080452984113353, "grad_norm": 0.2158203125, "learning_rate": 0.0011893697173612741, "loss": 0.7695, "step": 11479 }, { "epoch": 0.30807213396307426, "grad_norm": 0.2373046875, "learning_rate": 0.0011893644502397193, "loss": 0.8656, "step": 11480 }, { "epoch": 0.30809896951481325, "grad_norm": 0.21875, "learning_rate": 0.0011893591818252724, "loss": 0.7504, "step": 11481 }, { "epoch": 0.3081258050665522, "grad_norm": 0.236328125, "learning_rate": 0.0011893539121179447, "loss": 0.9038, "step": 11482 }, { "epoch": 0.3081526406182911, "grad_norm": 0.2197265625, "learning_rate": 0.001189348641117748, "loss": 0.782, "step": 11483 }, { "epoch": 0.30817947617003005, "grad_norm": 0.2236328125, "learning_rate": 0.001189343368824694, "loss": 0.8093, "step": 11484 }, { "epoch": 0.308206311721769, "grad_norm": 0.2177734375, "learning_rate": 0.0011893380952387937, "loss": 0.8295, "step": 11485 }, { "epoch": 0.3082331472735079, "grad_norm": 0.2490234375, "learning_rate": 0.0011893328203600592, "loss": 0.9707, "step": 11486 }, { "epoch": 0.3082599828252469, "grad_norm": 0.21484375, "learning_rate": 0.001189327544188502, "loss": 0.7633, "step": 11487 }, { "epoch": 0.30828681837698585, "grad_norm": 0.2109375, "learning_rate": 0.0011893222667241333, "loss": 0.7792, "step": 11488 }, { "epoch": 0.3083136539287248, "grad_norm": 0.23046875, "learning_rate": 0.0011893169879669651, "loss": 0.8942, "step": 11489 }, { "epoch": 0.3083404894804637, "grad_norm": 0.2109375, "learning_rate": 0.0011893117079170086, "loss": 0.7349, "step": 11490 }, { "epoch": 0.30836732503220265, "grad_norm": 0.21875, "learning_rate": 0.001189306426574276, "loss": 0.8383, "step": 11491 }, { "epoch": 0.3083941605839416, "grad_norm": 0.2236328125, "learning_rate": 0.001189301143938778, "loss": 0.757, "step": 11492 }, { "epoch": 0.3084209961356805, "grad_norm": 0.2216796875, "learning_rate": 0.0011892958600105267, "loss": 0.7684, "step": 11493 }, { "epoch": 0.3084478316874195, "grad_norm": 0.2197265625, "learning_rate": 0.0011892905747895339, "loss": 0.8008, "step": 11494 }, { "epoch": 0.30847466723915845, "grad_norm": 0.212890625, "learning_rate": 0.0011892852882758108, "loss": 0.686, "step": 11495 }, { "epoch": 0.3085015027908974, "grad_norm": 0.2119140625, "learning_rate": 0.0011892800004693694, "loss": 0.7475, "step": 11496 }, { "epoch": 0.3085283383426363, "grad_norm": 0.2080078125, "learning_rate": 0.0011892747113702207, "loss": 0.7515, "step": 11497 }, { "epoch": 0.30855517389437526, "grad_norm": 0.2138671875, "learning_rate": 0.0011892694209783767, "loss": 0.8133, "step": 11498 }, { "epoch": 0.3085820094461142, "grad_norm": 0.2255859375, "learning_rate": 0.001189264129293849, "loss": 0.8039, "step": 11499 }, { "epoch": 0.3086088449978532, "grad_norm": 0.2470703125, "learning_rate": 0.0011892588363166493, "loss": 0.896, "step": 11500 }, { "epoch": 0.3086356805495921, "grad_norm": 0.2255859375, "learning_rate": 0.001189253542046789, "loss": 0.7754, "step": 11501 }, { "epoch": 0.30866251610133105, "grad_norm": 0.2216796875, "learning_rate": 0.00118924824648428, "loss": 0.832, "step": 11502 }, { "epoch": 0.30868935165307, "grad_norm": 0.205078125, "learning_rate": 0.0011892429496291334, "loss": 0.7177, "step": 11503 }, { "epoch": 0.3087161872048089, "grad_norm": 0.2255859375, "learning_rate": 0.0011892376514813612, "loss": 0.8164, "step": 11504 }, { "epoch": 0.30874302275654786, "grad_norm": 0.2265625, "learning_rate": 0.001189232352040975, "loss": 0.8416, "step": 11505 }, { "epoch": 0.3087698583082868, "grad_norm": 0.208984375, "learning_rate": 0.0011892270513079865, "loss": 0.7054, "step": 11506 }, { "epoch": 0.3087966938600258, "grad_norm": 0.2314453125, "learning_rate": 0.001189221749282407, "loss": 0.8172, "step": 11507 }, { "epoch": 0.3088235294117647, "grad_norm": 0.22265625, "learning_rate": 0.0011892164459642483, "loss": 0.7776, "step": 11508 }, { "epoch": 0.30885036496350365, "grad_norm": 0.228515625, "learning_rate": 0.0011892111413535221, "loss": 0.8354, "step": 11509 }, { "epoch": 0.3088772005152426, "grad_norm": 0.2216796875, "learning_rate": 0.00118920583545024, "loss": 0.8219, "step": 11510 }, { "epoch": 0.3089040360669815, "grad_norm": 0.224609375, "learning_rate": 0.0011892005282544138, "loss": 0.8256, "step": 11511 }, { "epoch": 0.30893087161872046, "grad_norm": 0.21484375, "learning_rate": 0.0011891952197660548, "loss": 0.7276, "step": 11512 }, { "epoch": 0.30895770717045945, "grad_norm": 0.2236328125, "learning_rate": 0.001189189909985175, "loss": 0.8254, "step": 11513 }, { "epoch": 0.3089845427221984, "grad_norm": 0.2392578125, "learning_rate": 0.0011891845989117859, "loss": 0.8733, "step": 11514 }, { "epoch": 0.3090113782739373, "grad_norm": 0.22265625, "learning_rate": 0.001189179286545899, "loss": 0.7569, "step": 11515 }, { "epoch": 0.30903821382567626, "grad_norm": 0.2353515625, "learning_rate": 0.0011891739728875261, "loss": 0.8168, "step": 11516 }, { "epoch": 0.3090650493774152, "grad_norm": 0.2158203125, "learning_rate": 0.0011891686579366789, "loss": 0.774, "step": 11517 }, { "epoch": 0.3090918849291541, "grad_norm": 0.2236328125, "learning_rate": 0.001189163341693369, "loss": 0.7911, "step": 11518 }, { "epoch": 0.30911872048089306, "grad_norm": 0.203125, "learning_rate": 0.0011891580241576078, "loss": 0.6955, "step": 11519 }, { "epoch": 0.30914555603263205, "grad_norm": 0.2177734375, "learning_rate": 0.0011891527053294077, "loss": 0.7495, "step": 11520 }, { "epoch": 0.309172391584371, "grad_norm": 0.232421875, "learning_rate": 0.0011891473852087796, "loss": 0.8746, "step": 11521 }, { "epoch": 0.3091992271361099, "grad_norm": 0.2099609375, "learning_rate": 0.0011891420637957356, "loss": 0.7297, "step": 11522 }, { "epoch": 0.30922606268784886, "grad_norm": 0.2314453125, "learning_rate": 0.001189136741090287, "loss": 0.8732, "step": 11523 }, { "epoch": 0.3092528982395878, "grad_norm": 0.212890625, "learning_rate": 0.0011891314170924458, "loss": 0.7776, "step": 11524 }, { "epoch": 0.3092797337913267, "grad_norm": 0.2236328125, "learning_rate": 0.0011891260918022238, "loss": 0.7831, "step": 11525 }, { "epoch": 0.3093065693430657, "grad_norm": 0.2177734375, "learning_rate": 0.0011891207652196321, "loss": 0.763, "step": 11526 }, { "epoch": 0.30933340489480465, "grad_norm": 0.2470703125, "learning_rate": 0.0011891154373446832, "loss": 0.9619, "step": 11527 }, { "epoch": 0.3093602404465436, "grad_norm": 0.228515625, "learning_rate": 0.001189110108177388, "loss": 0.8171, "step": 11528 }, { "epoch": 0.3093870759982825, "grad_norm": 0.23046875, "learning_rate": 0.001189104777717759, "loss": 0.8304, "step": 11529 }, { "epoch": 0.30941391155002146, "grad_norm": 0.2333984375, "learning_rate": 0.001189099445965807, "loss": 0.7818, "step": 11530 }, { "epoch": 0.3094407471017604, "grad_norm": 0.2177734375, "learning_rate": 0.0011890941129215442, "loss": 0.8082, "step": 11531 }, { "epoch": 0.30946758265349933, "grad_norm": 0.21875, "learning_rate": 0.0011890887785849823, "loss": 0.7816, "step": 11532 }, { "epoch": 0.3094944182052383, "grad_norm": 0.2333984375, "learning_rate": 0.001189083442956133, "loss": 0.8933, "step": 11533 }, { "epoch": 0.30952125375697725, "grad_norm": 0.234375, "learning_rate": 0.0011890781060350077, "loss": 0.882, "step": 11534 }, { "epoch": 0.3095480893087162, "grad_norm": 0.197265625, "learning_rate": 0.0011890727678216186, "loss": 0.6568, "step": 11535 }, { "epoch": 0.3095749248604551, "grad_norm": 0.2353515625, "learning_rate": 0.0011890674283159771, "loss": 0.7757, "step": 11536 }, { "epoch": 0.30960176041219406, "grad_norm": 0.228515625, "learning_rate": 0.001189062087518095, "loss": 0.7646, "step": 11537 }, { "epoch": 0.309628595963933, "grad_norm": 0.22265625, "learning_rate": 0.0011890567454279838, "loss": 0.8546, "step": 11538 }, { "epoch": 0.309655431515672, "grad_norm": 0.2314453125, "learning_rate": 0.0011890514020456554, "loss": 0.8676, "step": 11539 }, { "epoch": 0.3096822670674109, "grad_norm": 0.224609375, "learning_rate": 0.0011890460573711218, "loss": 0.8319, "step": 11540 }, { "epoch": 0.30970910261914986, "grad_norm": 0.2158203125, "learning_rate": 0.0011890407114043943, "loss": 0.7433, "step": 11541 }, { "epoch": 0.3097359381708888, "grad_norm": 0.2109375, "learning_rate": 0.0011890353641454848, "loss": 0.7095, "step": 11542 }, { "epoch": 0.3097627737226277, "grad_norm": 0.220703125, "learning_rate": 0.0011890300155944049, "loss": 0.8153, "step": 11543 }, { "epoch": 0.30978960927436666, "grad_norm": 0.2333984375, "learning_rate": 0.0011890246657511668, "loss": 0.854, "step": 11544 }, { "epoch": 0.3098164448261056, "grad_norm": 0.2451171875, "learning_rate": 0.0011890193146157815, "loss": 0.8632, "step": 11545 }, { "epoch": 0.3098432803778446, "grad_norm": 0.21484375, "learning_rate": 0.001189013962188261, "loss": 0.7232, "step": 11546 }, { "epoch": 0.3098701159295835, "grad_norm": 0.232421875, "learning_rate": 0.0011890086084686174, "loss": 0.8542, "step": 11547 }, { "epoch": 0.30989695148132246, "grad_norm": 0.205078125, "learning_rate": 0.0011890032534568622, "loss": 0.6621, "step": 11548 }, { "epoch": 0.3099237870330614, "grad_norm": 0.2294921875, "learning_rate": 0.0011889978971530073, "loss": 0.8001, "step": 11549 }, { "epoch": 0.30995062258480033, "grad_norm": 0.228515625, "learning_rate": 0.001188992539557064, "loss": 0.8292, "step": 11550 }, { "epoch": 0.30997745813653926, "grad_norm": 0.251953125, "learning_rate": 0.0011889871806690445, "loss": 0.9736, "step": 11551 }, { "epoch": 0.31000429368827825, "grad_norm": 0.2236328125, "learning_rate": 0.0011889818204889604, "loss": 0.7473, "step": 11552 }, { "epoch": 0.3100311292400172, "grad_norm": 0.232421875, "learning_rate": 0.0011889764590168235, "loss": 0.8084, "step": 11553 }, { "epoch": 0.3100579647917561, "grad_norm": 0.2275390625, "learning_rate": 0.0011889710962526452, "loss": 0.8217, "step": 11554 }, { "epoch": 0.31008480034349506, "grad_norm": 0.2216796875, "learning_rate": 0.0011889657321964382, "loss": 0.7811, "step": 11555 }, { "epoch": 0.310111635895234, "grad_norm": 0.2314453125, "learning_rate": 0.0011889603668482132, "loss": 0.8328, "step": 11556 }, { "epoch": 0.31013847144697293, "grad_norm": 0.2236328125, "learning_rate": 0.0011889550002079825, "loss": 0.8254, "step": 11557 }, { "epoch": 0.3101653069987119, "grad_norm": 0.21875, "learning_rate": 0.001188949632275758, "loss": 0.8291, "step": 11558 }, { "epoch": 0.31019214255045086, "grad_norm": 0.2216796875, "learning_rate": 0.001188944263051551, "loss": 0.7752, "step": 11559 }, { "epoch": 0.3102189781021898, "grad_norm": 0.248046875, "learning_rate": 0.0011889388925353738, "loss": 0.8703, "step": 11560 }, { "epoch": 0.3102458136539287, "grad_norm": 0.240234375, "learning_rate": 0.001188933520727238, "loss": 0.9731, "step": 11561 }, { "epoch": 0.31027264920566766, "grad_norm": 0.244140625, "learning_rate": 0.001188928147627155, "loss": 0.9074, "step": 11562 }, { "epoch": 0.3102994847574066, "grad_norm": 0.2333984375, "learning_rate": 0.001188922773235137, "loss": 0.8373, "step": 11563 }, { "epoch": 0.31032632030914553, "grad_norm": 0.2275390625, "learning_rate": 0.0011889173975511958, "loss": 0.7622, "step": 11564 }, { "epoch": 0.3103531558608845, "grad_norm": 0.2373046875, "learning_rate": 0.001188912020575343, "loss": 0.8866, "step": 11565 }, { "epoch": 0.31037999141262346, "grad_norm": 0.220703125, "learning_rate": 0.0011889066423075906, "loss": 0.7699, "step": 11566 }, { "epoch": 0.3104068269643624, "grad_norm": 0.212890625, "learning_rate": 0.0011889012627479502, "loss": 0.7622, "step": 11567 }, { "epoch": 0.3104336625161013, "grad_norm": 0.220703125, "learning_rate": 0.0011888958818964338, "loss": 0.8046, "step": 11568 }, { "epoch": 0.31046049806784026, "grad_norm": 0.2314453125, "learning_rate": 0.001188890499753053, "loss": 0.8214, "step": 11569 }, { "epoch": 0.3104873336195792, "grad_norm": 0.25, "learning_rate": 0.0011888851163178195, "loss": 0.9392, "step": 11570 }, { "epoch": 0.3105141691713182, "grad_norm": 0.2333984375, "learning_rate": 0.0011888797315907458, "loss": 0.8297, "step": 11571 }, { "epoch": 0.3105410047230571, "grad_norm": 0.212890625, "learning_rate": 0.001188874345571843, "loss": 0.7685, "step": 11572 }, { "epoch": 0.31056784027479606, "grad_norm": 0.224609375, "learning_rate": 0.001188868958261123, "loss": 0.7836, "step": 11573 }, { "epoch": 0.310594675826535, "grad_norm": 0.2373046875, "learning_rate": 0.0011888635696585977, "loss": 0.9202, "step": 11574 }, { "epoch": 0.31062151137827393, "grad_norm": 0.2255859375, "learning_rate": 0.001188858179764279, "loss": 0.7696, "step": 11575 }, { "epoch": 0.31064834693001286, "grad_norm": 0.23828125, "learning_rate": 0.001188852788578179, "loss": 0.9147, "step": 11576 }, { "epoch": 0.3106751824817518, "grad_norm": 0.205078125, "learning_rate": 0.0011888473961003088, "loss": 0.7058, "step": 11577 }, { "epoch": 0.3107020180334908, "grad_norm": 0.2197265625, "learning_rate": 0.001188842002330681, "loss": 0.8072, "step": 11578 }, { "epoch": 0.3107288535852297, "grad_norm": 0.2197265625, "learning_rate": 0.0011888366072693069, "loss": 0.8226, "step": 11579 }, { "epoch": 0.31075568913696866, "grad_norm": 0.2353515625, "learning_rate": 0.0011888312109161986, "loss": 0.8441, "step": 11580 }, { "epoch": 0.3107825246887076, "grad_norm": 0.2236328125, "learning_rate": 0.001188825813271368, "loss": 0.8005, "step": 11581 }, { "epoch": 0.31080936024044653, "grad_norm": 0.216796875, "learning_rate": 0.0011888204143348266, "loss": 0.853, "step": 11582 }, { "epoch": 0.31083619579218547, "grad_norm": 0.2158203125, "learning_rate": 0.0011888150141065865, "loss": 0.7768, "step": 11583 }, { "epoch": 0.31086303134392446, "grad_norm": 0.2197265625, "learning_rate": 0.0011888096125866594, "loss": 0.7801, "step": 11584 }, { "epoch": 0.3108898668956634, "grad_norm": 0.2255859375, "learning_rate": 0.0011888042097750575, "loss": 0.8204, "step": 11585 }, { "epoch": 0.3109167024474023, "grad_norm": 0.224609375, "learning_rate": 0.001188798805671792, "loss": 0.8704, "step": 11586 }, { "epoch": 0.31094353799914126, "grad_norm": 0.2294921875, "learning_rate": 0.0011887934002768755, "loss": 0.886, "step": 11587 }, { "epoch": 0.3109703735508802, "grad_norm": 0.220703125, "learning_rate": 0.0011887879935903191, "loss": 0.7728, "step": 11588 }, { "epoch": 0.31099720910261913, "grad_norm": 0.22265625, "learning_rate": 0.0011887825856121356, "loss": 0.7451, "step": 11589 }, { "epoch": 0.31102404465435807, "grad_norm": 0.240234375, "learning_rate": 0.001188777176342336, "loss": 0.8848, "step": 11590 }, { "epoch": 0.31105088020609706, "grad_norm": 0.224609375, "learning_rate": 0.0011887717657809325, "loss": 0.7947, "step": 11591 }, { "epoch": 0.311077715757836, "grad_norm": 0.22265625, "learning_rate": 0.001188766353927937, "loss": 0.8155, "step": 11592 }, { "epoch": 0.31110455130957493, "grad_norm": 0.2197265625, "learning_rate": 0.0011887609407833614, "loss": 0.7682, "step": 11593 }, { "epoch": 0.31113138686131386, "grad_norm": 0.2275390625, "learning_rate": 0.0011887555263472174, "loss": 0.7779, "step": 11594 }, { "epoch": 0.3111582224130528, "grad_norm": 0.224609375, "learning_rate": 0.0011887501106195172, "loss": 0.8111, "step": 11595 }, { "epoch": 0.31118505796479173, "grad_norm": 0.2314453125, "learning_rate": 0.0011887446936002723, "loss": 0.8554, "step": 11596 }, { "epoch": 0.3112118935165307, "grad_norm": 0.2265625, "learning_rate": 0.0011887392752894948, "loss": 0.8305, "step": 11597 }, { "epoch": 0.31123872906826966, "grad_norm": 0.2275390625, "learning_rate": 0.0011887338556871966, "loss": 0.8132, "step": 11598 }, { "epoch": 0.3112655646200086, "grad_norm": 0.2431640625, "learning_rate": 0.0011887284347933895, "loss": 0.927, "step": 11599 }, { "epoch": 0.31129240017174753, "grad_norm": 0.216796875, "learning_rate": 0.0011887230126080852, "loss": 0.7873, "step": 11600 }, { "epoch": 0.31131923572348646, "grad_norm": 0.2294921875, "learning_rate": 0.001188717589131296, "loss": 0.8804, "step": 11601 }, { "epoch": 0.3113460712752254, "grad_norm": 0.22265625, "learning_rate": 0.0011887121643630336, "loss": 0.8344, "step": 11602 }, { "epoch": 0.31137290682696434, "grad_norm": 0.244140625, "learning_rate": 0.0011887067383033099, "loss": 0.8808, "step": 11603 }, { "epoch": 0.3113997423787033, "grad_norm": 0.2177734375, "learning_rate": 0.001188701310952137, "loss": 0.8445, "step": 11604 }, { "epoch": 0.31142657793044226, "grad_norm": 0.212890625, "learning_rate": 0.0011886958823095263, "loss": 0.7326, "step": 11605 }, { "epoch": 0.3114534134821812, "grad_norm": 0.216796875, "learning_rate": 0.0011886904523754902, "loss": 0.7729, "step": 11606 }, { "epoch": 0.31148024903392013, "grad_norm": 0.2158203125, "learning_rate": 0.0011886850211500404, "loss": 0.8298, "step": 11607 }, { "epoch": 0.31150708458565907, "grad_norm": 0.2197265625, "learning_rate": 0.0011886795886331888, "loss": 0.7941, "step": 11608 }, { "epoch": 0.311533920137398, "grad_norm": 0.2275390625, "learning_rate": 0.0011886741548249476, "loss": 0.8551, "step": 11609 }, { "epoch": 0.311560755689137, "grad_norm": 0.236328125, "learning_rate": 0.0011886687197253283, "loss": 0.7899, "step": 11610 }, { "epoch": 0.3115875912408759, "grad_norm": 0.2109375, "learning_rate": 0.0011886632833343431, "loss": 0.7483, "step": 11611 }, { "epoch": 0.31161442679261486, "grad_norm": 0.220703125, "learning_rate": 0.0011886578456520038, "loss": 0.7611, "step": 11612 }, { "epoch": 0.3116412623443538, "grad_norm": 0.2412109375, "learning_rate": 0.0011886524066783224, "loss": 0.8526, "step": 11613 }, { "epoch": 0.31166809789609273, "grad_norm": 0.2265625, "learning_rate": 0.001188646966413311, "loss": 0.7894, "step": 11614 }, { "epoch": 0.31169493344783167, "grad_norm": 0.228515625, "learning_rate": 0.001188641524856981, "loss": 0.7995, "step": 11615 }, { "epoch": 0.31172176899957066, "grad_norm": 0.2158203125, "learning_rate": 0.0011886360820093447, "loss": 0.7113, "step": 11616 }, { "epoch": 0.3117486045513096, "grad_norm": 0.2392578125, "learning_rate": 0.001188630637870414, "loss": 0.891, "step": 11617 }, { "epoch": 0.31177544010304853, "grad_norm": 0.220703125, "learning_rate": 0.0011886251924402012, "loss": 0.7363, "step": 11618 }, { "epoch": 0.31180227565478746, "grad_norm": 0.228515625, "learning_rate": 0.0011886197457187176, "loss": 0.7574, "step": 11619 }, { "epoch": 0.3118291112065264, "grad_norm": 0.234375, "learning_rate": 0.0011886142977059755, "loss": 0.8049, "step": 11620 }, { "epoch": 0.31185594675826533, "grad_norm": 0.2158203125, "learning_rate": 0.001188608848401987, "loss": 0.7076, "step": 11621 }, { "epoch": 0.31188278231000427, "grad_norm": 0.208984375, "learning_rate": 0.0011886033978067635, "loss": 0.6608, "step": 11622 }, { "epoch": 0.31190961786174326, "grad_norm": 0.20703125, "learning_rate": 0.0011885979459203178, "loss": 0.7958, "step": 11623 }, { "epoch": 0.3119364534134822, "grad_norm": 0.23046875, "learning_rate": 0.001188592492742661, "loss": 0.824, "step": 11624 }, { "epoch": 0.31196328896522113, "grad_norm": 0.2333984375, "learning_rate": 0.0011885870382738054, "loss": 0.7702, "step": 11625 }, { "epoch": 0.31199012451696007, "grad_norm": 0.2099609375, "learning_rate": 0.001188581582513763, "loss": 0.7729, "step": 11626 }, { "epoch": 0.312016960068699, "grad_norm": 0.216796875, "learning_rate": 0.0011885761254625462, "loss": 0.7132, "step": 11627 }, { "epoch": 0.31204379562043794, "grad_norm": 0.2353515625, "learning_rate": 0.0011885706671201663, "loss": 0.8195, "step": 11628 }, { "epoch": 0.3120706311721769, "grad_norm": 0.2470703125, "learning_rate": 0.0011885652074866352, "loss": 0.906, "step": 11629 }, { "epoch": 0.31209746672391586, "grad_norm": 0.2138671875, "learning_rate": 0.0011885597465619655, "loss": 0.7076, "step": 11630 }, { "epoch": 0.3121243022756548, "grad_norm": 0.220703125, "learning_rate": 0.0011885542843461689, "loss": 0.7888, "step": 11631 }, { "epoch": 0.31215113782739373, "grad_norm": 0.22265625, "learning_rate": 0.0011885488208392572, "loss": 0.7445, "step": 11632 }, { "epoch": 0.31217797337913267, "grad_norm": 0.234375, "learning_rate": 0.0011885433560412427, "loss": 0.8907, "step": 11633 }, { "epoch": 0.3122048089308716, "grad_norm": 0.224609375, "learning_rate": 0.001188537889952137, "loss": 0.8666, "step": 11634 }, { "epoch": 0.31223164448261054, "grad_norm": 0.236328125, "learning_rate": 0.0011885324225719524, "loss": 0.8595, "step": 11635 }, { "epoch": 0.31225848003434953, "grad_norm": 0.21484375, "learning_rate": 0.0011885269539007011, "loss": 0.8225, "step": 11636 }, { "epoch": 0.31228531558608846, "grad_norm": 0.2177734375, "learning_rate": 0.0011885214839383947, "loss": 0.7937, "step": 11637 }, { "epoch": 0.3123121511378274, "grad_norm": 0.23046875, "learning_rate": 0.001188516012685045, "loss": 0.8018, "step": 11638 }, { "epoch": 0.31233898668956633, "grad_norm": 0.2021484375, "learning_rate": 0.0011885105401406647, "loss": 0.7083, "step": 11639 }, { "epoch": 0.31236582224130527, "grad_norm": 0.2177734375, "learning_rate": 0.0011885050663052652, "loss": 0.7937, "step": 11640 }, { "epoch": 0.3123926577930442, "grad_norm": 0.2177734375, "learning_rate": 0.0011884995911788587, "loss": 0.8655, "step": 11641 }, { "epoch": 0.3124194933447832, "grad_norm": 0.236328125, "learning_rate": 0.0011884941147614573, "loss": 0.873, "step": 11642 }, { "epoch": 0.31244632889652213, "grad_norm": 0.236328125, "learning_rate": 0.0011884886370530728, "loss": 0.9259, "step": 11643 }, { "epoch": 0.31247316444826106, "grad_norm": 0.2353515625, "learning_rate": 0.0011884831580537176, "loss": 0.8411, "step": 11644 }, { "epoch": 0.3125, "grad_norm": 0.21484375, "learning_rate": 0.0011884776777634034, "loss": 0.7234, "step": 11645 }, { "epoch": 0.31252683555173894, "grad_norm": 0.2197265625, "learning_rate": 0.0011884721961821422, "loss": 0.8058, "step": 11646 }, { "epoch": 0.31255367110347787, "grad_norm": 0.2197265625, "learning_rate": 0.0011884667133099463, "loss": 0.8289, "step": 11647 }, { "epoch": 0.3125805066552168, "grad_norm": 0.224609375, "learning_rate": 0.0011884612291468274, "loss": 0.85, "step": 11648 }, { "epoch": 0.3126073422069558, "grad_norm": 0.2236328125, "learning_rate": 0.001188455743692798, "loss": 0.8678, "step": 11649 }, { "epoch": 0.31263417775869473, "grad_norm": 0.2197265625, "learning_rate": 0.0011884502569478696, "loss": 0.7941, "step": 11650 }, { "epoch": 0.31266101331043367, "grad_norm": 0.224609375, "learning_rate": 0.0011884447689120543, "loss": 0.8259, "step": 11651 }, { "epoch": 0.3126878488621726, "grad_norm": 0.2353515625, "learning_rate": 0.0011884392795853647, "loss": 0.8769, "step": 11652 }, { "epoch": 0.31271468441391154, "grad_norm": 0.2236328125, "learning_rate": 0.0011884337889678121, "loss": 0.8345, "step": 11653 }, { "epoch": 0.31274151996565047, "grad_norm": 0.2177734375, "learning_rate": 0.001188428297059409, "loss": 0.7956, "step": 11654 }, { "epoch": 0.31276835551738946, "grad_norm": 0.201171875, "learning_rate": 0.0011884228038601674, "loss": 0.7271, "step": 11655 }, { "epoch": 0.3127951910691284, "grad_norm": 0.2451171875, "learning_rate": 0.0011884173093700992, "loss": 0.907, "step": 11656 }, { "epoch": 0.31282202662086733, "grad_norm": 0.205078125, "learning_rate": 0.0011884118135892165, "loss": 0.6639, "step": 11657 }, { "epoch": 0.31284886217260627, "grad_norm": 0.2158203125, "learning_rate": 0.0011884063165175315, "loss": 0.7863, "step": 11658 }, { "epoch": 0.3128756977243452, "grad_norm": 0.21875, "learning_rate": 0.0011884008181550561, "loss": 0.7803, "step": 11659 }, { "epoch": 0.31290253327608414, "grad_norm": 0.2021484375, "learning_rate": 0.0011883953185018025, "loss": 0.723, "step": 11660 }, { "epoch": 0.3129293688278231, "grad_norm": 0.220703125, "learning_rate": 0.0011883898175577825, "loss": 0.8636, "step": 11661 }, { "epoch": 0.31295620437956206, "grad_norm": 0.2255859375, "learning_rate": 0.0011883843153230086, "loss": 0.8357, "step": 11662 }, { "epoch": 0.312983039931301, "grad_norm": 0.2255859375, "learning_rate": 0.0011883788117974924, "loss": 0.8385, "step": 11663 }, { "epoch": 0.31300987548303993, "grad_norm": 0.224609375, "learning_rate": 0.0011883733069812461, "loss": 0.7952, "step": 11664 }, { "epoch": 0.31303671103477887, "grad_norm": 0.220703125, "learning_rate": 0.0011883678008742821, "loss": 0.8342, "step": 11665 }, { "epoch": 0.3130635465865178, "grad_norm": 0.21484375, "learning_rate": 0.0011883622934766123, "loss": 0.7776, "step": 11666 }, { "epoch": 0.31309038213825674, "grad_norm": 0.2431640625, "learning_rate": 0.0011883567847882487, "loss": 0.9128, "step": 11667 }, { "epoch": 0.31311721768999573, "grad_norm": 0.220703125, "learning_rate": 0.001188351274809203, "loss": 0.7122, "step": 11668 }, { "epoch": 0.31314405324173467, "grad_norm": 0.2275390625, "learning_rate": 0.001188345763539488, "loss": 0.9109, "step": 11669 }, { "epoch": 0.3131708887934736, "grad_norm": 0.23046875, "learning_rate": 0.0011883402509791155, "loss": 0.868, "step": 11670 }, { "epoch": 0.31319772434521254, "grad_norm": 0.23828125, "learning_rate": 0.0011883347371280978, "loss": 0.8307, "step": 11671 }, { "epoch": 0.31322455989695147, "grad_norm": 0.224609375, "learning_rate": 0.0011883292219864465, "loss": 0.7424, "step": 11672 }, { "epoch": 0.3132513954486904, "grad_norm": 0.2451171875, "learning_rate": 0.001188323705554174, "loss": 0.9669, "step": 11673 }, { "epoch": 0.31327823100042934, "grad_norm": 0.2294921875, "learning_rate": 0.0011883181878312925, "loss": 0.8477, "step": 11674 }, { "epoch": 0.31330506655216833, "grad_norm": 0.2373046875, "learning_rate": 0.0011883126688178138, "loss": 0.8622, "step": 11675 }, { "epoch": 0.31333190210390727, "grad_norm": 0.240234375, "learning_rate": 0.0011883071485137503, "loss": 0.821, "step": 11676 }, { "epoch": 0.3133587376556462, "grad_norm": 0.236328125, "learning_rate": 0.001188301626919114, "loss": 0.8098, "step": 11677 }, { "epoch": 0.31338557320738514, "grad_norm": 0.20703125, "learning_rate": 0.0011882961040339168, "loss": 0.681, "step": 11678 }, { "epoch": 0.3134124087591241, "grad_norm": 0.224609375, "learning_rate": 0.0011882905798581713, "loss": 0.8942, "step": 11679 }, { "epoch": 0.313439244310863, "grad_norm": 0.228515625, "learning_rate": 0.0011882850543918895, "loss": 0.7963, "step": 11680 }, { "epoch": 0.313466079862602, "grad_norm": 0.2197265625, "learning_rate": 0.001188279527635083, "loss": 0.7823, "step": 11681 }, { "epoch": 0.31349291541434093, "grad_norm": 0.212890625, "learning_rate": 0.0011882739995877644, "loss": 0.7712, "step": 11682 }, { "epoch": 0.31351975096607987, "grad_norm": 0.197265625, "learning_rate": 0.0011882684702499457, "loss": 0.7025, "step": 11683 }, { "epoch": 0.3135465865178188, "grad_norm": 0.2275390625, "learning_rate": 0.0011882629396216392, "loss": 0.9223, "step": 11684 }, { "epoch": 0.31357342206955774, "grad_norm": 0.21484375, "learning_rate": 0.0011882574077028566, "loss": 0.7811, "step": 11685 }, { "epoch": 0.3136002576212967, "grad_norm": 0.216796875, "learning_rate": 0.0011882518744936105, "loss": 0.7163, "step": 11686 }, { "epoch": 0.31362709317303566, "grad_norm": 0.216796875, "learning_rate": 0.001188246339993913, "loss": 0.805, "step": 11687 }, { "epoch": 0.3136539287247746, "grad_norm": 0.2255859375, "learning_rate": 0.0011882408042037758, "loss": 0.7899, "step": 11688 }, { "epoch": 0.31368076427651354, "grad_norm": 0.240234375, "learning_rate": 0.0011882352671232115, "loss": 0.9288, "step": 11689 }, { "epoch": 0.31370759982825247, "grad_norm": 0.2138671875, "learning_rate": 0.001188229728752232, "loss": 0.7764, "step": 11690 }, { "epoch": 0.3137344353799914, "grad_norm": 0.2119140625, "learning_rate": 0.0011882241890908496, "loss": 0.7704, "step": 11691 }, { "epoch": 0.31376127093173034, "grad_norm": 0.2265625, "learning_rate": 0.0011882186481390765, "loss": 0.8831, "step": 11692 }, { "epoch": 0.3137881064834693, "grad_norm": 0.2294921875, "learning_rate": 0.0011882131058969245, "loss": 0.874, "step": 11693 }, { "epoch": 0.31381494203520827, "grad_norm": 0.2255859375, "learning_rate": 0.001188207562364406, "loss": 0.7945, "step": 11694 }, { "epoch": 0.3138417775869472, "grad_norm": 0.232421875, "learning_rate": 0.0011882020175415335, "loss": 0.8824, "step": 11695 }, { "epoch": 0.31386861313868614, "grad_norm": 0.216796875, "learning_rate": 0.0011881964714283186, "loss": 0.7593, "step": 11696 }, { "epoch": 0.31389544869042507, "grad_norm": 0.23046875, "learning_rate": 0.0011881909240247736, "loss": 0.8671, "step": 11697 }, { "epoch": 0.313922284242164, "grad_norm": 0.2216796875, "learning_rate": 0.0011881853753309107, "loss": 0.7775, "step": 11698 }, { "epoch": 0.31394911979390294, "grad_norm": 0.2197265625, "learning_rate": 0.0011881798253467425, "loss": 0.7816, "step": 11699 }, { "epoch": 0.31397595534564193, "grad_norm": 0.2080078125, "learning_rate": 0.0011881742740722807, "loss": 0.6646, "step": 11700 }, { "epoch": 0.31400279089738087, "grad_norm": 0.228515625, "learning_rate": 0.0011881687215075374, "loss": 0.8933, "step": 11701 }, { "epoch": 0.3140296264491198, "grad_norm": 0.2197265625, "learning_rate": 0.001188163167652525, "loss": 0.8341, "step": 11702 }, { "epoch": 0.31405646200085874, "grad_norm": 0.2236328125, "learning_rate": 0.0011881576125072557, "loss": 0.7922, "step": 11703 }, { "epoch": 0.3140832975525977, "grad_norm": 0.2255859375, "learning_rate": 0.0011881520560717417, "loss": 0.798, "step": 11704 }, { "epoch": 0.3141101331043366, "grad_norm": 0.220703125, "learning_rate": 0.0011881464983459948, "loss": 0.7877, "step": 11705 }, { "epoch": 0.31413696865607554, "grad_norm": 0.2109375, "learning_rate": 0.001188140939330028, "loss": 0.7369, "step": 11706 }, { "epoch": 0.31416380420781453, "grad_norm": 0.2197265625, "learning_rate": 0.0011881353790238528, "loss": 0.7887, "step": 11707 }, { "epoch": 0.31419063975955347, "grad_norm": 0.2119140625, "learning_rate": 0.0011881298174274814, "loss": 0.6914, "step": 11708 }, { "epoch": 0.3142174753112924, "grad_norm": 0.22265625, "learning_rate": 0.0011881242545409266, "loss": 0.7846, "step": 11709 }, { "epoch": 0.31424431086303134, "grad_norm": 0.23046875, "learning_rate": 0.0011881186903642, "loss": 0.8393, "step": 11710 }, { "epoch": 0.3142711464147703, "grad_norm": 0.2119140625, "learning_rate": 0.001188113124897314, "loss": 0.7713, "step": 11711 }, { "epoch": 0.3142979819665092, "grad_norm": 0.22265625, "learning_rate": 0.0011881075581402808, "loss": 0.8006, "step": 11712 }, { "epoch": 0.3143248175182482, "grad_norm": 0.240234375, "learning_rate": 0.0011881019900931127, "loss": 0.9176, "step": 11713 }, { "epoch": 0.31435165306998714, "grad_norm": 0.23828125, "learning_rate": 0.001188096420755822, "loss": 0.8831, "step": 11714 }, { "epoch": 0.31437848862172607, "grad_norm": 0.228515625, "learning_rate": 0.0011880908501284207, "loss": 0.8517, "step": 11715 }, { "epoch": 0.314405324173465, "grad_norm": 0.2451171875, "learning_rate": 0.001188085278210921, "loss": 0.9324, "step": 11716 }, { "epoch": 0.31443215972520394, "grad_norm": 0.2236328125, "learning_rate": 0.001188079705003335, "loss": 0.8632, "step": 11717 }, { "epoch": 0.3144589952769429, "grad_norm": 0.1982421875, "learning_rate": 0.0011880741305056756, "loss": 0.7006, "step": 11718 }, { "epoch": 0.3144858308286818, "grad_norm": 0.232421875, "learning_rate": 0.0011880685547179545, "loss": 0.9005, "step": 11719 }, { "epoch": 0.3145126663804208, "grad_norm": 0.2392578125, "learning_rate": 0.0011880629776401838, "loss": 0.7656, "step": 11720 }, { "epoch": 0.31453950193215974, "grad_norm": 0.22265625, "learning_rate": 0.0011880573992723762, "loss": 0.8618, "step": 11721 }, { "epoch": 0.3145663374838987, "grad_norm": 0.2265625, "learning_rate": 0.0011880518196145432, "loss": 0.7758, "step": 11722 }, { "epoch": 0.3145931730356376, "grad_norm": 0.2177734375, "learning_rate": 0.0011880462386666978, "loss": 0.7313, "step": 11723 }, { "epoch": 0.31462000858737654, "grad_norm": 0.21875, "learning_rate": 0.001188040656428852, "loss": 0.8046, "step": 11724 }, { "epoch": 0.3146468441391155, "grad_norm": 0.224609375, "learning_rate": 0.001188035072901018, "loss": 0.8485, "step": 11725 }, { "epoch": 0.31467367969085447, "grad_norm": 0.234375, "learning_rate": 0.0011880294880832081, "loss": 0.8156, "step": 11726 }, { "epoch": 0.3147005152425934, "grad_norm": 0.2255859375, "learning_rate": 0.0011880239019754345, "loss": 0.7711, "step": 11727 }, { "epoch": 0.31472735079433234, "grad_norm": 0.2392578125, "learning_rate": 0.0011880183145777093, "loss": 0.8172, "step": 11728 }, { "epoch": 0.3147541863460713, "grad_norm": 0.2373046875, "learning_rate": 0.0011880127258900451, "loss": 0.856, "step": 11729 }, { "epoch": 0.3147810218978102, "grad_norm": 0.2265625, "learning_rate": 0.0011880071359124539, "loss": 0.7504, "step": 11730 }, { "epoch": 0.31480785744954914, "grad_norm": 0.2197265625, "learning_rate": 0.0011880015446449478, "loss": 0.7298, "step": 11731 }, { "epoch": 0.3148346930012881, "grad_norm": 0.228515625, "learning_rate": 0.0011879959520875395, "loss": 0.8315, "step": 11732 }, { "epoch": 0.31486152855302707, "grad_norm": 0.224609375, "learning_rate": 0.0011879903582402414, "loss": 0.7713, "step": 11733 }, { "epoch": 0.314888364104766, "grad_norm": 0.22265625, "learning_rate": 0.001187984763103065, "loss": 0.7911, "step": 11734 }, { "epoch": 0.31491519965650494, "grad_norm": 0.20703125, "learning_rate": 0.001187979166676023, "loss": 0.7332, "step": 11735 }, { "epoch": 0.3149420352082439, "grad_norm": 0.2138671875, "learning_rate": 0.001187973568959128, "loss": 0.832, "step": 11736 }, { "epoch": 0.3149688707599828, "grad_norm": 0.2099609375, "learning_rate": 0.001187967969952392, "loss": 0.751, "step": 11737 }, { "epoch": 0.31499570631172175, "grad_norm": 0.2255859375, "learning_rate": 0.0011879623696558269, "loss": 0.8576, "step": 11738 }, { "epoch": 0.31502254186346074, "grad_norm": 0.2099609375, "learning_rate": 0.0011879567680694455, "loss": 0.7241, "step": 11739 }, { "epoch": 0.31504937741519967, "grad_norm": 0.216796875, "learning_rate": 0.00118795116519326, "loss": 0.6979, "step": 11740 }, { "epoch": 0.3150762129669386, "grad_norm": 0.2275390625, "learning_rate": 0.0011879455610272825, "loss": 0.7909, "step": 11741 }, { "epoch": 0.31510304851867754, "grad_norm": 0.228515625, "learning_rate": 0.0011879399555715256, "loss": 0.8541, "step": 11742 }, { "epoch": 0.3151298840704165, "grad_norm": 0.24609375, "learning_rate": 0.0011879343488260012, "loss": 0.8564, "step": 11743 }, { "epoch": 0.3151567196221554, "grad_norm": 0.2275390625, "learning_rate": 0.0011879287407907217, "loss": 0.8052, "step": 11744 }, { "epoch": 0.3151835551738944, "grad_norm": 0.21484375, "learning_rate": 0.0011879231314656999, "loss": 0.7814, "step": 11745 }, { "epoch": 0.31521039072563334, "grad_norm": 0.216796875, "learning_rate": 0.0011879175208509474, "loss": 0.7856, "step": 11746 }, { "epoch": 0.3152372262773723, "grad_norm": 0.2109375, "learning_rate": 0.001187911908946477, "loss": 0.7359, "step": 11747 }, { "epoch": 0.3152640618291112, "grad_norm": 0.224609375, "learning_rate": 0.0011879062957523008, "loss": 0.7243, "step": 11748 }, { "epoch": 0.31529089738085014, "grad_norm": 0.2275390625, "learning_rate": 0.0011879006812684314, "loss": 0.8152, "step": 11749 }, { "epoch": 0.3153177329325891, "grad_norm": 0.2314453125, "learning_rate": 0.0011878950654948804, "loss": 0.8333, "step": 11750 }, { "epoch": 0.315344568484328, "grad_norm": 0.244140625, "learning_rate": 0.0011878894484316609, "loss": 0.8605, "step": 11751 }, { "epoch": 0.315371404036067, "grad_norm": 0.224609375, "learning_rate": 0.0011878838300787848, "loss": 0.8342, "step": 11752 }, { "epoch": 0.31539823958780594, "grad_norm": 0.224609375, "learning_rate": 0.0011878782104362646, "loss": 0.7097, "step": 11753 }, { "epoch": 0.3154250751395449, "grad_norm": 0.2109375, "learning_rate": 0.0011878725895041126, "loss": 0.7262, "step": 11754 }, { "epoch": 0.3154519106912838, "grad_norm": 0.2236328125, "learning_rate": 0.001187866967282341, "loss": 0.8338, "step": 11755 }, { "epoch": 0.31547874624302275, "grad_norm": 0.23828125, "learning_rate": 0.0011878613437709621, "loss": 0.8356, "step": 11756 }, { "epoch": 0.3155055817947617, "grad_norm": 0.2177734375, "learning_rate": 0.0011878557189699884, "loss": 0.7489, "step": 11757 }, { "epoch": 0.31553241734650067, "grad_norm": 0.2119140625, "learning_rate": 0.0011878500928794323, "loss": 0.7548, "step": 11758 }, { "epoch": 0.3155592528982396, "grad_norm": 0.2119140625, "learning_rate": 0.0011878444654993062, "loss": 0.7774, "step": 11759 }, { "epoch": 0.31558608844997854, "grad_norm": 0.1962890625, "learning_rate": 0.0011878388368296222, "loss": 0.6804, "step": 11760 }, { "epoch": 0.3156129240017175, "grad_norm": 0.2138671875, "learning_rate": 0.0011878332068703925, "loss": 0.7728, "step": 11761 }, { "epoch": 0.3156397595534564, "grad_norm": 0.2255859375, "learning_rate": 0.00118782757562163, "loss": 0.8461, "step": 11762 }, { "epoch": 0.31566659510519535, "grad_norm": 0.21875, "learning_rate": 0.0011878219430833466, "loss": 0.7916, "step": 11763 }, { "epoch": 0.3156934306569343, "grad_norm": 0.2373046875, "learning_rate": 0.0011878163092555547, "loss": 0.7638, "step": 11764 }, { "epoch": 0.3157202662086733, "grad_norm": 0.2890625, "learning_rate": 0.001187810674138267, "loss": 0.9126, "step": 11765 }, { "epoch": 0.3157471017604122, "grad_norm": 0.29296875, "learning_rate": 0.0011878050377314953, "loss": 0.9795, "step": 11766 }, { "epoch": 0.31577393731215114, "grad_norm": 0.2373046875, "learning_rate": 0.0011877994000352526, "loss": 0.8302, "step": 11767 }, { "epoch": 0.3158007728638901, "grad_norm": 0.259765625, "learning_rate": 0.0011877937610495508, "loss": 0.8625, "step": 11768 }, { "epoch": 0.315827608415629, "grad_norm": 0.28125, "learning_rate": 0.0011877881207744024, "loss": 0.8349, "step": 11769 }, { "epoch": 0.31585444396736795, "grad_norm": 0.287109375, "learning_rate": 0.00118778247920982, "loss": 0.9067, "step": 11770 }, { "epoch": 0.31588127951910694, "grad_norm": 0.2578125, "learning_rate": 0.0011877768363558153, "loss": 0.8669, "step": 11771 }, { "epoch": 0.3159081150708459, "grad_norm": 0.228515625, "learning_rate": 0.0011877711922124016, "loss": 0.7088, "step": 11772 }, { "epoch": 0.3159349506225848, "grad_norm": 0.2392578125, "learning_rate": 0.0011877655467795907, "loss": 0.9076, "step": 11773 }, { "epoch": 0.31596178617432374, "grad_norm": 0.2294921875, "learning_rate": 0.001187759900057395, "loss": 0.8443, "step": 11774 }, { "epoch": 0.3159886217260627, "grad_norm": 0.2412109375, "learning_rate": 0.001187754252045827, "loss": 0.8058, "step": 11775 }, { "epoch": 0.3160154572778016, "grad_norm": 0.2578125, "learning_rate": 0.001187748602744899, "loss": 1.0294, "step": 11776 }, { "epoch": 0.31604229282954055, "grad_norm": 0.2197265625, "learning_rate": 0.001187742952154624, "loss": 0.7182, "step": 11777 }, { "epoch": 0.31606912838127954, "grad_norm": 0.234375, "learning_rate": 0.0011877373002750133, "loss": 0.7808, "step": 11778 }, { "epoch": 0.3160959639330185, "grad_norm": 0.2431640625, "learning_rate": 0.0011877316471060803, "loss": 0.9045, "step": 11779 }, { "epoch": 0.3161227994847574, "grad_norm": 0.212890625, "learning_rate": 0.0011877259926478367, "loss": 0.7985, "step": 11780 }, { "epoch": 0.31614963503649635, "grad_norm": 0.2353515625, "learning_rate": 0.0011877203369002954, "loss": 0.8858, "step": 11781 }, { "epoch": 0.3161764705882353, "grad_norm": 0.2265625, "learning_rate": 0.0011877146798634683, "loss": 0.8633, "step": 11782 }, { "epoch": 0.3162033061399742, "grad_norm": 0.2294921875, "learning_rate": 0.0011877090215373684, "loss": 0.8532, "step": 11783 }, { "epoch": 0.3162301416917132, "grad_norm": 0.2216796875, "learning_rate": 0.0011877033619220076, "loss": 0.7822, "step": 11784 }, { "epoch": 0.31625697724345214, "grad_norm": 0.2314453125, "learning_rate": 0.0011876977010173988, "loss": 0.8811, "step": 11785 }, { "epoch": 0.3162838127951911, "grad_norm": 0.224609375, "learning_rate": 0.0011876920388235537, "loss": 0.8314, "step": 11786 }, { "epoch": 0.31631064834693, "grad_norm": 0.2197265625, "learning_rate": 0.0011876863753404856, "loss": 0.7775, "step": 11787 }, { "epoch": 0.31633748389866895, "grad_norm": 0.2138671875, "learning_rate": 0.0011876807105682063, "loss": 0.7637, "step": 11788 }, { "epoch": 0.3163643194504079, "grad_norm": 0.2236328125, "learning_rate": 0.0011876750445067284, "loss": 0.8274, "step": 11789 }, { "epoch": 0.3163911550021468, "grad_norm": 0.2353515625, "learning_rate": 0.0011876693771560643, "loss": 0.9319, "step": 11790 }, { "epoch": 0.3164179905538858, "grad_norm": 0.216796875, "learning_rate": 0.0011876637085162266, "loss": 0.774, "step": 11791 }, { "epoch": 0.31644482610562474, "grad_norm": 0.2265625, "learning_rate": 0.0011876580385872277, "loss": 0.8283, "step": 11792 }, { "epoch": 0.3164716616573637, "grad_norm": 0.212890625, "learning_rate": 0.0011876523673690798, "loss": 0.7606, "step": 11793 }, { "epoch": 0.3164984972091026, "grad_norm": 0.2333984375, "learning_rate": 0.0011876466948617954, "loss": 0.8737, "step": 11794 }, { "epoch": 0.31652533276084155, "grad_norm": 0.2294921875, "learning_rate": 0.0011876410210653872, "loss": 0.9185, "step": 11795 }, { "epoch": 0.3165521683125805, "grad_norm": 0.23046875, "learning_rate": 0.0011876353459798674, "loss": 0.8795, "step": 11796 }, { "epoch": 0.3165790038643195, "grad_norm": 0.2265625, "learning_rate": 0.0011876296696052486, "loss": 0.8813, "step": 11797 }, { "epoch": 0.3166058394160584, "grad_norm": 0.228515625, "learning_rate": 0.0011876239919415433, "loss": 0.8853, "step": 11798 }, { "epoch": 0.31663267496779735, "grad_norm": 0.2294921875, "learning_rate": 0.0011876183129887637, "loss": 0.827, "step": 11799 }, { "epoch": 0.3166595105195363, "grad_norm": 0.216796875, "learning_rate": 0.0011876126327469223, "loss": 0.7234, "step": 11800 }, { "epoch": 0.3166863460712752, "grad_norm": 0.232421875, "learning_rate": 0.0011876069512160318, "loss": 0.9005, "step": 11801 }, { "epoch": 0.31671318162301415, "grad_norm": 0.208984375, "learning_rate": 0.0011876012683961045, "loss": 0.7614, "step": 11802 }, { "epoch": 0.3167400171747531, "grad_norm": 0.216796875, "learning_rate": 0.001187595584287153, "loss": 0.7574, "step": 11803 }, { "epoch": 0.3167668527264921, "grad_norm": 0.23828125, "learning_rate": 0.0011875898988891894, "loss": 0.9106, "step": 11804 }, { "epoch": 0.316793688278231, "grad_norm": 0.2275390625, "learning_rate": 0.001187584212202227, "loss": 0.8503, "step": 11805 }, { "epoch": 0.31682052382996995, "grad_norm": 0.2451171875, "learning_rate": 0.001187578524226277, "loss": 0.9433, "step": 11806 }, { "epoch": 0.3168473593817089, "grad_norm": 0.236328125, "learning_rate": 0.001187572834961353, "loss": 0.8954, "step": 11807 }, { "epoch": 0.3168741949334478, "grad_norm": 0.244140625, "learning_rate": 0.001187567144407467, "loss": 0.8307, "step": 11808 }, { "epoch": 0.31690103048518675, "grad_norm": 0.2412109375, "learning_rate": 0.0011875614525646313, "loss": 0.8964, "step": 11809 }, { "epoch": 0.31692786603692574, "grad_norm": 0.232421875, "learning_rate": 0.0011875557594328591, "loss": 0.8791, "step": 11810 }, { "epoch": 0.3169547015886647, "grad_norm": 0.2265625, "learning_rate": 0.001187550065012162, "loss": 0.8471, "step": 11811 }, { "epoch": 0.3169815371404036, "grad_norm": 0.22265625, "learning_rate": 0.0011875443693025532, "loss": 0.772, "step": 11812 }, { "epoch": 0.31700837269214255, "grad_norm": 0.2255859375, "learning_rate": 0.0011875386723040448, "loss": 0.8391, "step": 11813 }, { "epoch": 0.3170352082438815, "grad_norm": 0.224609375, "learning_rate": 0.0011875329740166493, "loss": 0.8553, "step": 11814 }, { "epoch": 0.3170620437956204, "grad_norm": 0.232421875, "learning_rate": 0.0011875272744403796, "loss": 0.8172, "step": 11815 }, { "epoch": 0.3170888793473594, "grad_norm": 0.2197265625, "learning_rate": 0.0011875215735752476, "loss": 0.7676, "step": 11816 }, { "epoch": 0.31711571489909834, "grad_norm": 0.2421875, "learning_rate": 0.0011875158714212662, "loss": 0.9403, "step": 11817 }, { "epoch": 0.3171425504508373, "grad_norm": 0.23046875, "learning_rate": 0.001187510167978448, "loss": 0.8577, "step": 11818 }, { "epoch": 0.3171693860025762, "grad_norm": 0.2314453125, "learning_rate": 0.001187504463246805, "loss": 0.889, "step": 11819 }, { "epoch": 0.31719622155431515, "grad_norm": 0.2314453125, "learning_rate": 0.0011874987572263506, "loss": 0.8402, "step": 11820 }, { "epoch": 0.3172230571060541, "grad_norm": 0.220703125, "learning_rate": 0.0011874930499170962, "loss": 0.726, "step": 11821 }, { "epoch": 0.317249892657793, "grad_norm": 0.2294921875, "learning_rate": 0.0011874873413190554, "loss": 0.8351, "step": 11822 }, { "epoch": 0.317276728209532, "grad_norm": 0.240234375, "learning_rate": 0.0011874816314322398, "loss": 0.9137, "step": 11823 }, { "epoch": 0.31730356376127095, "grad_norm": 0.2373046875, "learning_rate": 0.0011874759202566624, "loss": 0.9806, "step": 11824 }, { "epoch": 0.3173303993130099, "grad_norm": 0.216796875, "learning_rate": 0.0011874702077923358, "loss": 0.8451, "step": 11825 }, { "epoch": 0.3173572348647488, "grad_norm": 0.21875, "learning_rate": 0.0011874644940392724, "loss": 0.8149, "step": 11826 }, { "epoch": 0.31738407041648775, "grad_norm": 0.2060546875, "learning_rate": 0.0011874587789974848, "loss": 0.6774, "step": 11827 }, { "epoch": 0.3174109059682267, "grad_norm": 0.25, "learning_rate": 0.0011874530626669853, "loss": 0.9693, "step": 11828 }, { "epoch": 0.3174377415199657, "grad_norm": 0.234375, "learning_rate": 0.0011874473450477867, "loss": 0.8494, "step": 11829 }, { "epoch": 0.3174645770717046, "grad_norm": 0.21875, "learning_rate": 0.0011874416261399014, "loss": 0.7224, "step": 11830 }, { "epoch": 0.31749141262344355, "grad_norm": 0.2451171875, "learning_rate": 0.001187435905943342, "loss": 0.9371, "step": 11831 }, { "epoch": 0.3175182481751825, "grad_norm": 0.220703125, "learning_rate": 0.0011874301844581213, "loss": 0.8524, "step": 11832 }, { "epoch": 0.3175450837269214, "grad_norm": 0.216796875, "learning_rate": 0.0011874244616842512, "loss": 0.7379, "step": 11833 }, { "epoch": 0.31757191927866035, "grad_norm": 0.228515625, "learning_rate": 0.001187418737621745, "loss": 0.8465, "step": 11834 }, { "epoch": 0.3175987548303993, "grad_norm": 0.2333984375, "learning_rate": 0.0011874130122706147, "loss": 0.8824, "step": 11835 }, { "epoch": 0.3176255903821383, "grad_norm": 0.2138671875, "learning_rate": 0.001187407285630873, "loss": 0.7709, "step": 11836 }, { "epoch": 0.3176524259338772, "grad_norm": 0.20703125, "learning_rate": 0.0011874015577025326, "loss": 0.773, "step": 11837 }, { "epoch": 0.31767926148561615, "grad_norm": 0.216796875, "learning_rate": 0.001187395828485606, "loss": 0.7359, "step": 11838 }, { "epoch": 0.3177060970373551, "grad_norm": 0.2216796875, "learning_rate": 0.001187390097980106, "loss": 0.8559, "step": 11839 }, { "epoch": 0.317732932589094, "grad_norm": 0.224609375, "learning_rate": 0.0011873843661860447, "loss": 0.8412, "step": 11840 }, { "epoch": 0.31775976814083295, "grad_norm": 0.23046875, "learning_rate": 0.001187378633103435, "loss": 0.8475, "step": 11841 }, { "epoch": 0.31778660369257195, "grad_norm": 0.2080078125, "learning_rate": 0.0011873728987322892, "loss": 0.7791, "step": 11842 }, { "epoch": 0.3178134392443109, "grad_norm": 0.2314453125, "learning_rate": 0.0011873671630726204, "loss": 0.8762, "step": 11843 }, { "epoch": 0.3178402747960498, "grad_norm": 0.2421875, "learning_rate": 0.0011873614261244407, "loss": 0.8433, "step": 11844 }, { "epoch": 0.31786711034778875, "grad_norm": 0.2255859375, "learning_rate": 0.0011873556878877628, "loss": 0.8218, "step": 11845 }, { "epoch": 0.3178939458995277, "grad_norm": 0.2294921875, "learning_rate": 0.001187349948362599, "loss": 0.8544, "step": 11846 }, { "epoch": 0.3179207814512666, "grad_norm": 0.2138671875, "learning_rate": 0.0011873442075489627, "loss": 0.8052, "step": 11847 }, { "epoch": 0.31794761700300556, "grad_norm": 0.2490234375, "learning_rate": 0.001187338465446866, "loss": 1.0012, "step": 11848 }, { "epoch": 0.31797445255474455, "grad_norm": 0.236328125, "learning_rate": 0.0011873327220563213, "loss": 0.9289, "step": 11849 }, { "epoch": 0.3180012881064835, "grad_norm": 0.23828125, "learning_rate": 0.0011873269773773414, "loss": 0.9007, "step": 11850 }, { "epoch": 0.3180281236582224, "grad_norm": 0.220703125, "learning_rate": 0.001187321231409939, "loss": 0.7994, "step": 11851 }, { "epoch": 0.31805495920996135, "grad_norm": 0.224609375, "learning_rate": 0.0011873154841541265, "loss": 0.8093, "step": 11852 }, { "epoch": 0.3180817947617003, "grad_norm": 0.2294921875, "learning_rate": 0.0011873097356099168, "loss": 0.8304, "step": 11853 }, { "epoch": 0.3181086303134392, "grad_norm": 0.236328125, "learning_rate": 0.0011873039857773222, "loss": 0.9191, "step": 11854 }, { "epoch": 0.3181354658651782, "grad_norm": 0.25390625, "learning_rate": 0.0011872982346563556, "loss": 1.0215, "step": 11855 }, { "epoch": 0.31816230141691715, "grad_norm": 0.2158203125, "learning_rate": 0.0011872924822470294, "loss": 0.7979, "step": 11856 }, { "epoch": 0.3181891369686561, "grad_norm": 0.208984375, "learning_rate": 0.0011872867285493561, "loss": 0.748, "step": 11857 }, { "epoch": 0.318215972520395, "grad_norm": 0.2197265625, "learning_rate": 0.0011872809735633486, "loss": 0.7835, "step": 11858 }, { "epoch": 0.31824280807213395, "grad_norm": 0.228515625, "learning_rate": 0.0011872752172890195, "loss": 0.8151, "step": 11859 }, { "epoch": 0.3182696436238729, "grad_norm": 0.205078125, "learning_rate": 0.0011872694597263812, "loss": 0.7232, "step": 11860 }, { "epoch": 0.3182964791756118, "grad_norm": 0.197265625, "learning_rate": 0.0011872637008754465, "loss": 0.6698, "step": 11861 }, { "epoch": 0.3183233147273508, "grad_norm": 0.2392578125, "learning_rate": 0.001187257940736228, "loss": 0.9429, "step": 11862 }, { "epoch": 0.31835015027908975, "grad_norm": 0.23046875, "learning_rate": 0.0011872521793087386, "loss": 0.8434, "step": 11863 }, { "epoch": 0.3183769858308287, "grad_norm": 0.2197265625, "learning_rate": 0.0011872464165929904, "loss": 0.8224, "step": 11864 }, { "epoch": 0.3184038213825676, "grad_norm": 0.2080078125, "learning_rate": 0.0011872406525889965, "loss": 0.7297, "step": 11865 }, { "epoch": 0.31843065693430656, "grad_norm": 0.20703125, "learning_rate": 0.0011872348872967694, "loss": 0.7548, "step": 11866 }, { "epoch": 0.3184574924860455, "grad_norm": 0.2216796875, "learning_rate": 0.0011872291207163216, "loss": 0.8141, "step": 11867 }, { "epoch": 0.3184843280377845, "grad_norm": 0.2275390625, "learning_rate": 0.001187223352847666, "loss": 0.8642, "step": 11868 }, { "epoch": 0.3185111635895234, "grad_norm": 0.2421875, "learning_rate": 0.001187217583690815, "loss": 0.9134, "step": 11869 }, { "epoch": 0.31853799914126235, "grad_norm": 0.21875, "learning_rate": 0.0011872118132457813, "loss": 0.834, "step": 11870 }, { "epoch": 0.3185648346930013, "grad_norm": 0.2080078125, "learning_rate": 0.001187206041512578, "loss": 0.7856, "step": 11871 }, { "epoch": 0.3185916702447402, "grad_norm": 0.2197265625, "learning_rate": 0.0011872002684912171, "loss": 0.7216, "step": 11872 }, { "epoch": 0.31861850579647916, "grad_norm": 0.2138671875, "learning_rate": 0.0011871944941817118, "loss": 0.7663, "step": 11873 }, { "epoch": 0.3186453413482181, "grad_norm": 0.23046875, "learning_rate": 0.0011871887185840742, "loss": 0.9016, "step": 11874 }, { "epoch": 0.3186721768999571, "grad_norm": 0.2236328125, "learning_rate": 0.0011871829416983176, "loss": 0.8848, "step": 11875 }, { "epoch": 0.318699012451696, "grad_norm": 0.23828125, "learning_rate": 0.0011871771635244542, "loss": 0.9095, "step": 11876 }, { "epoch": 0.31872584800343495, "grad_norm": 0.2265625, "learning_rate": 0.001187171384062497, "loss": 0.849, "step": 11877 }, { "epoch": 0.3187526835551739, "grad_norm": 0.224609375, "learning_rate": 0.0011871656033124584, "loss": 0.7469, "step": 11878 }, { "epoch": 0.3187795191069128, "grad_norm": 0.2109375, "learning_rate": 0.0011871598212743514, "loss": 0.7476, "step": 11879 }, { "epoch": 0.31880635465865176, "grad_norm": 0.2177734375, "learning_rate": 0.0011871540379481882, "loss": 0.7934, "step": 11880 }, { "epoch": 0.31883319021039075, "grad_norm": 0.224609375, "learning_rate": 0.001187148253333982, "loss": 0.817, "step": 11881 }, { "epoch": 0.3188600257621297, "grad_norm": 0.228515625, "learning_rate": 0.0011871424674317452, "loss": 0.8688, "step": 11882 }, { "epoch": 0.3188868613138686, "grad_norm": 0.244140625, "learning_rate": 0.0011871366802414906, "loss": 0.9019, "step": 11883 }, { "epoch": 0.31891369686560755, "grad_norm": 0.234375, "learning_rate": 0.001187130891763231, "loss": 0.8443, "step": 11884 }, { "epoch": 0.3189405324173465, "grad_norm": 0.2255859375, "learning_rate": 0.0011871251019969788, "loss": 0.8085, "step": 11885 }, { "epoch": 0.3189673679690854, "grad_norm": 0.2109375, "learning_rate": 0.001187119310942747, "loss": 0.7156, "step": 11886 }, { "epoch": 0.3189942035208244, "grad_norm": 0.2236328125, "learning_rate": 0.001187113518600548, "loss": 0.8688, "step": 11887 }, { "epoch": 0.31902103907256335, "grad_norm": 0.23046875, "learning_rate": 0.0011871077249703944, "loss": 0.9083, "step": 11888 }, { "epoch": 0.3190478746243023, "grad_norm": 0.2431640625, "learning_rate": 0.0011871019300522998, "loss": 0.9182, "step": 11889 }, { "epoch": 0.3190747101760412, "grad_norm": 0.228515625, "learning_rate": 0.0011870961338462758, "loss": 0.8579, "step": 11890 }, { "epoch": 0.31910154572778016, "grad_norm": 0.2216796875, "learning_rate": 0.001187090336352336, "loss": 0.7948, "step": 11891 }, { "epoch": 0.3191283812795191, "grad_norm": 0.212890625, "learning_rate": 0.0011870845375704924, "loss": 0.7958, "step": 11892 }, { "epoch": 0.319155216831258, "grad_norm": 0.21484375, "learning_rate": 0.001187078737500758, "loss": 0.8005, "step": 11893 }, { "epoch": 0.319182052382997, "grad_norm": 0.23828125, "learning_rate": 0.0011870729361431458, "loss": 0.8371, "step": 11894 }, { "epoch": 0.31920888793473595, "grad_norm": 0.2158203125, "learning_rate": 0.0011870671334976682, "loss": 0.8084, "step": 11895 }, { "epoch": 0.3192357234864749, "grad_norm": 0.23046875, "learning_rate": 0.0011870613295643382, "loss": 0.8526, "step": 11896 }, { "epoch": 0.3192625590382138, "grad_norm": 0.2265625, "learning_rate": 0.001187055524343168, "loss": 0.7654, "step": 11897 }, { "epoch": 0.31928939458995276, "grad_norm": 0.21484375, "learning_rate": 0.0011870497178341711, "loss": 0.7781, "step": 11898 }, { "epoch": 0.3193162301416917, "grad_norm": 0.2236328125, "learning_rate": 0.0011870439100373597, "loss": 0.8137, "step": 11899 }, { "epoch": 0.3193430656934307, "grad_norm": 0.228515625, "learning_rate": 0.0011870381009527464, "loss": 0.8855, "step": 11900 }, { "epoch": 0.3193699012451696, "grad_norm": 0.22265625, "learning_rate": 0.0011870322905803445, "loss": 0.8, "step": 11901 }, { "epoch": 0.31939673679690855, "grad_norm": 0.24609375, "learning_rate": 0.0011870264789201664, "loss": 0.8906, "step": 11902 }, { "epoch": 0.3194235723486475, "grad_norm": 0.2392578125, "learning_rate": 0.0011870206659722249, "loss": 0.94, "step": 11903 }, { "epoch": 0.3194504079003864, "grad_norm": 0.220703125, "learning_rate": 0.0011870148517365328, "loss": 0.8336, "step": 11904 }, { "epoch": 0.31947724345212536, "grad_norm": 0.2353515625, "learning_rate": 0.0011870090362131028, "loss": 0.7939, "step": 11905 }, { "epoch": 0.3195040790038643, "grad_norm": 0.232421875, "learning_rate": 0.0011870032194019476, "loss": 0.8537, "step": 11906 }, { "epoch": 0.3195309145556033, "grad_norm": 0.2099609375, "learning_rate": 0.0011869974013030801, "loss": 0.7293, "step": 11907 }, { "epoch": 0.3195577501073422, "grad_norm": 0.2255859375, "learning_rate": 0.0011869915819165131, "loss": 0.8698, "step": 11908 }, { "epoch": 0.31958458565908116, "grad_norm": 0.2333984375, "learning_rate": 0.0011869857612422591, "loss": 0.8493, "step": 11909 }, { "epoch": 0.3196114212108201, "grad_norm": 0.21875, "learning_rate": 0.0011869799392803311, "loss": 0.7736, "step": 11910 }, { "epoch": 0.319638256762559, "grad_norm": 0.2275390625, "learning_rate": 0.0011869741160307418, "loss": 0.812, "step": 11911 }, { "epoch": 0.31966509231429796, "grad_norm": 0.2216796875, "learning_rate": 0.001186968291493504, "loss": 0.7953, "step": 11912 }, { "epoch": 0.31969192786603695, "grad_norm": 0.216796875, "learning_rate": 0.0011869624656686306, "loss": 0.7877, "step": 11913 }, { "epoch": 0.3197187634177759, "grad_norm": 0.220703125, "learning_rate": 0.001186956638556134, "loss": 0.7475, "step": 11914 }, { "epoch": 0.3197455989695148, "grad_norm": 0.2275390625, "learning_rate": 0.0011869508101560273, "loss": 0.813, "step": 11915 }, { "epoch": 0.31977243452125376, "grad_norm": 0.22265625, "learning_rate": 0.0011869449804683232, "loss": 0.87, "step": 11916 }, { "epoch": 0.3197992700729927, "grad_norm": 0.228515625, "learning_rate": 0.0011869391494930344, "loss": 0.8684, "step": 11917 }, { "epoch": 0.3198261056247316, "grad_norm": 0.2236328125, "learning_rate": 0.001186933317230174, "loss": 0.7872, "step": 11918 }, { "epoch": 0.31985294117647056, "grad_norm": 0.2373046875, "learning_rate": 0.0011869274836797546, "loss": 0.8918, "step": 11919 }, { "epoch": 0.31987977672820955, "grad_norm": 0.2216796875, "learning_rate": 0.0011869216488417887, "loss": 0.7574, "step": 11920 }, { "epoch": 0.3199066122799485, "grad_norm": 0.2265625, "learning_rate": 0.0011869158127162896, "loss": 0.7492, "step": 11921 }, { "epoch": 0.3199334478316874, "grad_norm": 0.2236328125, "learning_rate": 0.0011869099753032697, "loss": 0.8002, "step": 11922 }, { "epoch": 0.31996028338342636, "grad_norm": 0.23828125, "learning_rate": 0.001186904136602742, "loss": 0.8959, "step": 11923 }, { "epoch": 0.3199871189351653, "grad_norm": 0.232421875, "learning_rate": 0.0011868982966147196, "loss": 0.7993, "step": 11924 }, { "epoch": 0.32001395448690423, "grad_norm": 0.21875, "learning_rate": 0.0011868924553392146, "loss": 0.7519, "step": 11925 }, { "epoch": 0.3200407900386432, "grad_norm": 0.23046875, "learning_rate": 0.0011868866127762404, "loss": 0.7937, "step": 11926 }, { "epoch": 0.3200407900386432, "eval_loss": 3.1069695949554443, "eval_runtime": 582.3467, "eval_samples_per_second": 82.319, "eval_steps_per_second": 20.581, "step": 11926 }, { "epoch": 0.32006762559038215, "grad_norm": 0.2392578125, "learning_rate": 0.0011868807689258097, "loss": 0.9479, "step": 11927 }, { "epoch": 0.3200944611421211, "grad_norm": 0.2431640625, "learning_rate": 0.0011868749237879351, "loss": 0.9184, "step": 11928 }, { "epoch": 0.32012129669386, "grad_norm": 0.224609375, "learning_rate": 0.0011868690773626299, "loss": 0.8075, "step": 11929 }, { "epoch": 0.32014813224559896, "grad_norm": 0.2275390625, "learning_rate": 0.0011868632296499063, "loss": 0.8408, "step": 11930 }, { "epoch": 0.3201749677973379, "grad_norm": 0.2265625, "learning_rate": 0.0011868573806497775, "loss": 0.9112, "step": 11931 }, { "epoch": 0.32020180334907683, "grad_norm": 0.220703125, "learning_rate": 0.0011868515303622563, "loss": 0.7585, "step": 11932 }, { "epoch": 0.3202286389008158, "grad_norm": 0.248046875, "learning_rate": 0.0011868456787873555, "loss": 0.947, "step": 11933 }, { "epoch": 0.32025547445255476, "grad_norm": 0.2451171875, "learning_rate": 0.001186839825925088, "loss": 1.008, "step": 11934 }, { "epoch": 0.3202823100042937, "grad_norm": 0.21875, "learning_rate": 0.0011868339717754663, "loss": 0.7377, "step": 11935 }, { "epoch": 0.3203091455560326, "grad_norm": 0.2255859375, "learning_rate": 0.0011868281163385037, "loss": 0.8329, "step": 11936 }, { "epoch": 0.32033598110777156, "grad_norm": 0.2333984375, "learning_rate": 0.0011868222596142129, "loss": 0.8566, "step": 11937 }, { "epoch": 0.3203628166595105, "grad_norm": 0.236328125, "learning_rate": 0.0011868164016026067, "loss": 0.886, "step": 11938 }, { "epoch": 0.3203896522112495, "grad_norm": 0.2216796875, "learning_rate": 0.001186810542303698, "loss": 0.8492, "step": 11939 }, { "epoch": 0.3204164877629884, "grad_norm": 0.2255859375, "learning_rate": 0.0011868046817174994, "loss": 0.8062, "step": 11940 }, { "epoch": 0.32044332331472736, "grad_norm": 0.2314453125, "learning_rate": 0.0011867988198440242, "loss": 0.8692, "step": 11941 }, { "epoch": 0.3204701588664663, "grad_norm": 0.216796875, "learning_rate": 0.0011867929566832848, "loss": 0.7865, "step": 11942 }, { "epoch": 0.3204969944182052, "grad_norm": 0.2314453125, "learning_rate": 0.0011867870922352944, "loss": 0.8117, "step": 11943 }, { "epoch": 0.32052382996994416, "grad_norm": 0.23046875, "learning_rate": 0.001186781226500066, "loss": 0.872, "step": 11944 }, { "epoch": 0.32055066552168315, "grad_norm": 0.2119140625, "learning_rate": 0.001186775359477612, "loss": 0.7717, "step": 11945 }, { "epoch": 0.3205775010734221, "grad_norm": 0.2216796875, "learning_rate": 0.0011867694911679454, "loss": 0.8561, "step": 11946 }, { "epoch": 0.320604336625161, "grad_norm": 0.208984375, "learning_rate": 0.001186763621571079, "loss": 0.8017, "step": 11947 }, { "epoch": 0.32063117217689996, "grad_norm": 0.216796875, "learning_rate": 0.0011867577506870263, "loss": 0.7849, "step": 11948 }, { "epoch": 0.3206580077286389, "grad_norm": 0.2197265625, "learning_rate": 0.0011867518785157996, "loss": 0.7656, "step": 11949 }, { "epoch": 0.32068484328037783, "grad_norm": 0.2109375, "learning_rate": 0.0011867460050574118, "loss": 0.7893, "step": 11950 }, { "epoch": 0.32071167883211676, "grad_norm": 0.263671875, "learning_rate": 0.0011867401303118758, "loss": 0.8531, "step": 11951 }, { "epoch": 0.32073851438385576, "grad_norm": 0.2294921875, "learning_rate": 0.0011867342542792047, "loss": 0.8767, "step": 11952 }, { "epoch": 0.3207653499355947, "grad_norm": 0.2294921875, "learning_rate": 0.0011867283769594111, "loss": 0.8524, "step": 11953 }, { "epoch": 0.3207921854873336, "grad_norm": 0.216796875, "learning_rate": 0.0011867224983525084, "loss": 0.7387, "step": 11954 }, { "epoch": 0.32081902103907256, "grad_norm": 0.236328125, "learning_rate": 0.001186716618458509, "loss": 0.8417, "step": 11955 }, { "epoch": 0.3208458565908115, "grad_norm": 0.216796875, "learning_rate": 0.0011867107372774259, "loss": 0.7644, "step": 11956 }, { "epoch": 0.32087269214255043, "grad_norm": 0.216796875, "learning_rate": 0.001186704854809272, "loss": 0.7894, "step": 11957 }, { "epoch": 0.3208995276942894, "grad_norm": 0.216796875, "learning_rate": 0.0011866989710540604, "loss": 0.7919, "step": 11958 }, { "epoch": 0.32092636324602836, "grad_norm": 0.21484375, "learning_rate": 0.0011866930860118037, "loss": 0.7755, "step": 11959 }, { "epoch": 0.3209531987977673, "grad_norm": 0.228515625, "learning_rate": 0.0011866871996825152, "loss": 0.8057, "step": 11960 }, { "epoch": 0.3209800343495062, "grad_norm": 0.216796875, "learning_rate": 0.0011866813120662075, "loss": 0.7907, "step": 11961 }, { "epoch": 0.32100686990124516, "grad_norm": 0.2236328125, "learning_rate": 0.0011866754231628935, "loss": 0.7711, "step": 11962 }, { "epoch": 0.3210337054529841, "grad_norm": 0.2197265625, "learning_rate": 0.0011866695329725864, "loss": 0.7661, "step": 11963 }, { "epoch": 0.32106054100472303, "grad_norm": 0.232421875, "learning_rate": 0.0011866636414952988, "loss": 0.8874, "step": 11964 }, { "epoch": 0.321087376556462, "grad_norm": 0.2138671875, "learning_rate": 0.0011866577487310438, "loss": 0.7438, "step": 11965 }, { "epoch": 0.32111421210820096, "grad_norm": 0.2294921875, "learning_rate": 0.0011866518546798344, "loss": 0.8251, "step": 11966 }, { "epoch": 0.3211410476599399, "grad_norm": 0.2294921875, "learning_rate": 0.0011866459593416832, "loss": 0.8621, "step": 11967 }, { "epoch": 0.32116788321167883, "grad_norm": 0.2197265625, "learning_rate": 0.0011866400627166035, "loss": 0.8363, "step": 11968 }, { "epoch": 0.32119471876341776, "grad_norm": 0.2353515625, "learning_rate": 0.001186634164804608, "loss": 0.8794, "step": 11969 }, { "epoch": 0.3212215543151567, "grad_norm": 0.224609375, "learning_rate": 0.0011866282656057099, "loss": 0.8506, "step": 11970 }, { "epoch": 0.3212483898668957, "grad_norm": 0.2294921875, "learning_rate": 0.001186622365119922, "loss": 0.8497, "step": 11971 }, { "epoch": 0.3212752254186346, "grad_norm": 0.2353515625, "learning_rate": 0.0011866164633472572, "loss": 0.8744, "step": 11972 }, { "epoch": 0.32130206097037356, "grad_norm": 0.2236328125, "learning_rate": 0.0011866105602877284, "loss": 0.8455, "step": 11973 }, { "epoch": 0.3213288965221125, "grad_norm": 0.22265625, "learning_rate": 0.0011866046559413485, "loss": 0.8772, "step": 11974 }, { "epoch": 0.32135573207385143, "grad_norm": 0.21875, "learning_rate": 0.0011865987503081306, "loss": 0.774, "step": 11975 }, { "epoch": 0.32138256762559037, "grad_norm": 0.234375, "learning_rate": 0.0011865928433880877, "loss": 0.9026, "step": 11976 }, { "epoch": 0.3214094031773293, "grad_norm": 0.234375, "learning_rate": 0.0011865869351812327, "loss": 0.8328, "step": 11977 }, { "epoch": 0.3214362387290683, "grad_norm": 0.212890625, "learning_rate": 0.0011865810256875785, "loss": 0.7647, "step": 11978 }, { "epoch": 0.3214630742808072, "grad_norm": 0.2431640625, "learning_rate": 0.001186575114907138, "loss": 0.8964, "step": 11979 }, { "epoch": 0.32148990983254616, "grad_norm": 0.2255859375, "learning_rate": 0.0011865692028399245, "loss": 0.7954, "step": 11980 }, { "epoch": 0.3215167453842851, "grad_norm": 0.228515625, "learning_rate": 0.0011865632894859507, "loss": 0.9077, "step": 11981 }, { "epoch": 0.32154358093602403, "grad_norm": 0.2275390625, "learning_rate": 0.0011865573748452295, "loss": 0.813, "step": 11982 }, { "epoch": 0.32157041648776297, "grad_norm": 0.2158203125, "learning_rate": 0.0011865514589177739, "loss": 0.7836, "step": 11983 }, { "epoch": 0.32159725203950196, "grad_norm": 0.2177734375, "learning_rate": 0.0011865455417035971, "loss": 0.7067, "step": 11984 }, { "epoch": 0.3216240875912409, "grad_norm": 0.220703125, "learning_rate": 0.0011865396232027118, "loss": 0.8387, "step": 11985 }, { "epoch": 0.3216509231429798, "grad_norm": 0.22265625, "learning_rate": 0.0011865337034151315, "loss": 0.7979, "step": 11986 }, { "epoch": 0.32167775869471876, "grad_norm": 0.2333984375, "learning_rate": 0.0011865277823408685, "loss": 0.9236, "step": 11987 }, { "epoch": 0.3217045942464577, "grad_norm": 0.2421875, "learning_rate": 0.001186521859979936, "loss": 0.9692, "step": 11988 }, { "epoch": 0.32173142979819663, "grad_norm": 0.2236328125, "learning_rate": 0.0011865159363323473, "loss": 0.8635, "step": 11989 }, { "epoch": 0.32175826534993557, "grad_norm": 0.2421875, "learning_rate": 0.0011865100113981153, "loss": 0.9537, "step": 11990 }, { "epoch": 0.32178510090167456, "grad_norm": 0.205078125, "learning_rate": 0.0011865040851772527, "loss": 0.7232, "step": 11991 }, { "epoch": 0.3218119364534135, "grad_norm": 0.216796875, "learning_rate": 0.0011864981576697729, "loss": 0.7514, "step": 11992 }, { "epoch": 0.32183877200515243, "grad_norm": 0.228515625, "learning_rate": 0.0011864922288756883, "loss": 0.891, "step": 11993 }, { "epoch": 0.32186560755689136, "grad_norm": 0.240234375, "learning_rate": 0.0011864862987950127, "loss": 0.8989, "step": 11994 }, { "epoch": 0.3218924431086303, "grad_norm": 0.216796875, "learning_rate": 0.0011864803674277583, "loss": 0.7679, "step": 11995 }, { "epoch": 0.32191927866036923, "grad_norm": 0.22265625, "learning_rate": 0.001186474434773939, "loss": 0.7732, "step": 11996 }, { "epoch": 0.3219461142121082, "grad_norm": 0.22265625, "learning_rate": 0.001186468500833567, "loss": 0.8991, "step": 11997 }, { "epoch": 0.32197294976384716, "grad_norm": 0.224609375, "learning_rate": 0.0011864625656066558, "loss": 0.7924, "step": 11998 }, { "epoch": 0.3219997853155861, "grad_norm": 0.220703125, "learning_rate": 0.0011864566290932183, "loss": 0.8317, "step": 11999 }, { "epoch": 0.32202662086732503, "grad_norm": 0.2314453125, "learning_rate": 0.0011864506912932673, "loss": 0.8858, "step": 12000 }, { "epoch": 0.32205345641906397, "grad_norm": 0.23828125, "learning_rate": 0.0011864447522068162, "loss": 0.8663, "step": 12001 }, { "epoch": 0.3220802919708029, "grad_norm": 0.216796875, "learning_rate": 0.0011864388118338778, "loss": 0.7786, "step": 12002 }, { "epoch": 0.32210712752254184, "grad_norm": 0.2177734375, "learning_rate": 0.001186432870174465, "loss": 0.7944, "step": 12003 }, { "epoch": 0.3221339630742808, "grad_norm": 0.216796875, "learning_rate": 0.0011864269272285912, "loss": 0.7596, "step": 12004 }, { "epoch": 0.32216079862601976, "grad_norm": 0.2197265625, "learning_rate": 0.0011864209829962693, "loss": 0.8084, "step": 12005 }, { "epoch": 0.3221876341777587, "grad_norm": 0.23828125, "learning_rate": 0.001186415037477512, "loss": 0.8912, "step": 12006 }, { "epoch": 0.32221446972949763, "grad_norm": 0.2080078125, "learning_rate": 0.0011864090906723332, "loss": 0.7216, "step": 12007 }, { "epoch": 0.32224130528123657, "grad_norm": 0.234375, "learning_rate": 0.0011864031425807449, "loss": 0.9319, "step": 12008 }, { "epoch": 0.3222681408329755, "grad_norm": 0.2177734375, "learning_rate": 0.0011863971932027607, "loss": 0.7908, "step": 12009 }, { "epoch": 0.3222949763847145, "grad_norm": 0.208984375, "learning_rate": 0.0011863912425383938, "loss": 0.7187, "step": 12010 }, { "epoch": 0.32232181193645343, "grad_norm": 0.2255859375, "learning_rate": 0.0011863852905876567, "loss": 0.7548, "step": 12011 }, { "epoch": 0.32234864748819236, "grad_norm": 0.234375, "learning_rate": 0.001186379337350563, "loss": 0.8514, "step": 12012 }, { "epoch": 0.3223754830399313, "grad_norm": 0.216796875, "learning_rate": 0.0011863733828271256, "loss": 0.8093, "step": 12013 }, { "epoch": 0.32240231859167023, "grad_norm": 0.2060546875, "learning_rate": 0.0011863674270173573, "loss": 0.7091, "step": 12014 }, { "epoch": 0.32242915414340917, "grad_norm": 0.2333984375, "learning_rate": 0.0011863614699212715, "loss": 0.8339, "step": 12015 }, { "epoch": 0.32245598969514816, "grad_norm": 0.244140625, "learning_rate": 0.0011863555115388812, "loss": 0.923, "step": 12016 }, { "epoch": 0.3224828252468871, "grad_norm": 0.2177734375, "learning_rate": 0.0011863495518701992, "loss": 0.8334, "step": 12017 }, { "epoch": 0.32250966079862603, "grad_norm": 0.2314453125, "learning_rate": 0.001186343590915239, "loss": 0.8957, "step": 12018 }, { "epoch": 0.32253649635036497, "grad_norm": 0.224609375, "learning_rate": 0.0011863376286740134, "loss": 0.882, "step": 12019 }, { "epoch": 0.3225633319021039, "grad_norm": 0.2216796875, "learning_rate": 0.0011863316651465358, "loss": 0.8183, "step": 12020 }, { "epoch": 0.32259016745384284, "grad_norm": 0.2255859375, "learning_rate": 0.0011863257003328187, "loss": 0.7312, "step": 12021 }, { "epoch": 0.32261700300558177, "grad_norm": 0.21484375, "learning_rate": 0.0011863197342328756, "loss": 0.7659, "step": 12022 }, { "epoch": 0.32264383855732076, "grad_norm": 0.2236328125, "learning_rate": 0.0011863137668467194, "loss": 0.831, "step": 12023 }, { "epoch": 0.3226706741090597, "grad_norm": 0.2236328125, "learning_rate": 0.0011863077981743634, "loss": 0.8002, "step": 12024 }, { "epoch": 0.32269750966079863, "grad_norm": 0.21484375, "learning_rate": 0.0011863018282158206, "loss": 0.7313, "step": 12025 }, { "epoch": 0.32272434521253757, "grad_norm": 0.224609375, "learning_rate": 0.001186295856971104, "loss": 0.7657, "step": 12026 }, { "epoch": 0.3227511807642765, "grad_norm": 0.2177734375, "learning_rate": 0.0011862898844402269, "loss": 0.8143, "step": 12027 }, { "epoch": 0.32277801631601544, "grad_norm": 0.2275390625, "learning_rate": 0.001186283910623202, "loss": 0.7804, "step": 12028 }, { "epoch": 0.3228048518677544, "grad_norm": 0.220703125, "learning_rate": 0.001186277935520043, "loss": 0.7506, "step": 12029 }, { "epoch": 0.32283168741949336, "grad_norm": 0.2138671875, "learning_rate": 0.0011862719591307627, "loss": 0.7521, "step": 12030 }, { "epoch": 0.3228585229712323, "grad_norm": 0.212890625, "learning_rate": 0.0011862659814553742, "loss": 0.7736, "step": 12031 }, { "epoch": 0.32288535852297123, "grad_norm": 0.2216796875, "learning_rate": 0.0011862600024938905, "loss": 0.7509, "step": 12032 }, { "epoch": 0.32291219407471017, "grad_norm": 0.2177734375, "learning_rate": 0.0011862540222463249, "loss": 0.8124, "step": 12033 }, { "epoch": 0.3229390296264491, "grad_norm": 0.2431640625, "learning_rate": 0.0011862480407126904, "loss": 0.8997, "step": 12034 }, { "epoch": 0.32296586517818804, "grad_norm": 0.22265625, "learning_rate": 0.0011862420578930002, "loss": 0.8509, "step": 12035 }, { "epoch": 0.32299270072992703, "grad_norm": 0.2265625, "learning_rate": 0.0011862360737872673, "loss": 0.8248, "step": 12036 }, { "epoch": 0.32301953628166596, "grad_norm": 0.23046875, "learning_rate": 0.001186230088395505, "loss": 0.8874, "step": 12037 }, { "epoch": 0.3230463718334049, "grad_norm": 0.236328125, "learning_rate": 0.0011862241017177265, "loss": 0.8088, "step": 12038 }, { "epoch": 0.32307320738514383, "grad_norm": 0.205078125, "learning_rate": 0.0011862181137539446, "loss": 0.6707, "step": 12039 }, { "epoch": 0.32310004293688277, "grad_norm": 0.23828125, "learning_rate": 0.0011862121245041727, "loss": 0.9448, "step": 12040 }, { "epoch": 0.3231268784886217, "grad_norm": 0.2255859375, "learning_rate": 0.0011862061339684238, "loss": 0.8015, "step": 12041 }, { "epoch": 0.3231537140403607, "grad_norm": 0.2177734375, "learning_rate": 0.0011862001421467112, "loss": 0.8484, "step": 12042 }, { "epoch": 0.32318054959209963, "grad_norm": 0.22265625, "learning_rate": 0.0011861941490390478, "loss": 0.8072, "step": 12043 }, { "epoch": 0.32320738514383857, "grad_norm": 0.2197265625, "learning_rate": 0.0011861881546454472, "loss": 0.7993, "step": 12044 }, { "epoch": 0.3232342206955775, "grad_norm": 0.2392578125, "learning_rate": 0.001186182158965922, "loss": 0.7989, "step": 12045 }, { "epoch": 0.32326105624731644, "grad_norm": 0.2236328125, "learning_rate": 0.0011861761620004857, "loss": 0.8241, "step": 12046 }, { "epoch": 0.32328789179905537, "grad_norm": 0.2197265625, "learning_rate": 0.0011861701637491512, "loss": 0.7932, "step": 12047 }, { "epoch": 0.3233147273507943, "grad_norm": 0.23046875, "learning_rate": 0.001186164164211932, "loss": 0.8205, "step": 12048 }, { "epoch": 0.3233415629025333, "grad_norm": 0.2294921875, "learning_rate": 0.001186158163388841, "loss": 0.8624, "step": 12049 }, { "epoch": 0.32336839845427223, "grad_norm": 0.2275390625, "learning_rate": 0.0011861521612798913, "loss": 0.8222, "step": 12050 }, { "epoch": 0.32339523400601117, "grad_norm": 0.22265625, "learning_rate": 0.0011861461578850964, "loss": 0.8128, "step": 12051 }, { "epoch": 0.3234220695577501, "grad_norm": 0.23828125, "learning_rate": 0.0011861401532044693, "loss": 0.9037, "step": 12052 }, { "epoch": 0.32344890510948904, "grad_norm": 0.228515625, "learning_rate": 0.001186134147238023, "loss": 0.8705, "step": 12053 }, { "epoch": 0.323475740661228, "grad_norm": 0.2265625, "learning_rate": 0.001186128139985771, "loss": 0.8637, "step": 12054 }, { "epoch": 0.32350257621296696, "grad_norm": 0.224609375, "learning_rate": 0.0011861221314477261, "loss": 0.7709, "step": 12055 }, { "epoch": 0.3235294117647059, "grad_norm": 0.228515625, "learning_rate": 0.001186116121623902, "loss": 0.8236, "step": 12056 }, { "epoch": 0.32355624731644483, "grad_norm": 0.2216796875, "learning_rate": 0.0011861101105143112, "loss": 0.8135, "step": 12057 }, { "epoch": 0.32358308286818377, "grad_norm": 0.2412109375, "learning_rate": 0.0011861040981189675, "loss": 0.9735, "step": 12058 }, { "epoch": 0.3236099184199227, "grad_norm": 0.23046875, "learning_rate": 0.0011860980844378838, "loss": 0.9219, "step": 12059 }, { "epoch": 0.32363675397166164, "grad_norm": 0.2236328125, "learning_rate": 0.0011860920694710731, "loss": 0.8229, "step": 12060 }, { "epoch": 0.3236635895234006, "grad_norm": 0.228515625, "learning_rate": 0.0011860860532185492, "loss": 0.8521, "step": 12061 }, { "epoch": 0.32369042507513957, "grad_norm": 0.236328125, "learning_rate": 0.0011860800356803247, "loss": 0.94, "step": 12062 }, { "epoch": 0.3237172606268785, "grad_norm": 0.2236328125, "learning_rate": 0.001186074016856413, "loss": 0.8342, "step": 12063 }, { "epoch": 0.32374409617861744, "grad_norm": 0.2177734375, "learning_rate": 0.0011860679967468275, "loss": 0.7669, "step": 12064 }, { "epoch": 0.32377093173035637, "grad_norm": 0.2216796875, "learning_rate": 0.0011860619753515812, "loss": 0.7978, "step": 12065 }, { "epoch": 0.3237977672820953, "grad_norm": 0.2255859375, "learning_rate": 0.0011860559526706872, "loss": 0.8232, "step": 12066 }, { "epoch": 0.32382460283383424, "grad_norm": 0.2412109375, "learning_rate": 0.001186049928704159, "loss": 0.922, "step": 12067 }, { "epoch": 0.32385143838557323, "grad_norm": 0.1962890625, "learning_rate": 0.0011860439034520095, "loss": 0.6575, "step": 12068 }, { "epoch": 0.32387827393731217, "grad_norm": 0.2119140625, "learning_rate": 0.0011860378769142524, "loss": 0.8099, "step": 12069 }, { "epoch": 0.3239051094890511, "grad_norm": 0.216796875, "learning_rate": 0.0011860318490909004, "loss": 0.7691, "step": 12070 }, { "epoch": 0.32393194504079004, "grad_norm": 0.21484375, "learning_rate": 0.001186025819981967, "loss": 0.7593, "step": 12071 }, { "epoch": 0.32395878059252897, "grad_norm": 0.2373046875, "learning_rate": 0.0011860197895874655, "loss": 0.9792, "step": 12072 }, { "epoch": 0.3239856161442679, "grad_norm": 0.2119140625, "learning_rate": 0.0011860137579074086, "loss": 0.8, "step": 12073 }, { "epoch": 0.32401245169600684, "grad_norm": 0.2275390625, "learning_rate": 0.0011860077249418101, "loss": 0.8398, "step": 12074 }, { "epoch": 0.32403928724774583, "grad_norm": 0.2080078125, "learning_rate": 0.0011860016906906831, "loss": 0.7338, "step": 12075 }, { "epoch": 0.32406612279948477, "grad_norm": 0.2333984375, "learning_rate": 0.0011859956551540408, "loss": 0.8662, "step": 12076 }, { "epoch": 0.3240929583512237, "grad_norm": 0.236328125, "learning_rate": 0.0011859896183318965, "loss": 0.8561, "step": 12077 }, { "epoch": 0.32411979390296264, "grad_norm": 0.220703125, "learning_rate": 0.0011859835802242632, "loss": 0.7737, "step": 12078 }, { "epoch": 0.3241466294547016, "grad_norm": 0.2216796875, "learning_rate": 0.0011859775408311546, "loss": 0.7897, "step": 12079 }, { "epoch": 0.3241734650064405, "grad_norm": 0.2412109375, "learning_rate": 0.0011859715001525835, "loss": 0.8636, "step": 12080 }, { "epoch": 0.3242003005581795, "grad_norm": 0.234375, "learning_rate": 0.0011859654581885635, "loss": 0.8794, "step": 12081 }, { "epoch": 0.32422713610991843, "grad_norm": 0.2421875, "learning_rate": 0.0011859594149391073, "loss": 0.9825, "step": 12082 }, { "epoch": 0.32425397166165737, "grad_norm": 0.2236328125, "learning_rate": 0.0011859533704042288, "loss": 0.787, "step": 12083 }, { "epoch": 0.3242808072133963, "grad_norm": 0.220703125, "learning_rate": 0.001185947324583941, "loss": 0.7616, "step": 12084 }, { "epoch": 0.32430764276513524, "grad_norm": 0.25, "learning_rate": 0.001185941277478257, "loss": 1.0195, "step": 12085 }, { "epoch": 0.3243344783168742, "grad_norm": 0.2236328125, "learning_rate": 0.0011859352290871903, "loss": 0.7898, "step": 12086 }, { "epoch": 0.32436131386861317, "grad_norm": 0.2177734375, "learning_rate": 0.0011859291794107541, "loss": 0.8271, "step": 12087 }, { "epoch": 0.3243881494203521, "grad_norm": 0.2412109375, "learning_rate": 0.0011859231284489617, "loss": 0.8451, "step": 12088 }, { "epoch": 0.32441498497209104, "grad_norm": 0.2255859375, "learning_rate": 0.0011859170762018264, "loss": 0.8004, "step": 12089 }, { "epoch": 0.32444182052382997, "grad_norm": 0.251953125, "learning_rate": 0.0011859110226693612, "loss": 0.8631, "step": 12090 }, { "epoch": 0.3244686560755689, "grad_norm": 0.21875, "learning_rate": 0.00118590496785158, "loss": 0.7526, "step": 12091 }, { "epoch": 0.32449549162730784, "grad_norm": 0.21875, "learning_rate": 0.0011858989117484954, "loss": 0.746, "step": 12092 }, { "epoch": 0.3245223271790468, "grad_norm": 0.21875, "learning_rate": 0.0011858928543601207, "loss": 0.7786, "step": 12093 }, { "epoch": 0.32454916273078577, "grad_norm": 0.232421875, "learning_rate": 0.0011858867956864698, "loss": 0.8778, "step": 12094 }, { "epoch": 0.3245759982825247, "grad_norm": 0.2275390625, "learning_rate": 0.0011858807357275555, "loss": 0.7991, "step": 12095 }, { "epoch": 0.32460283383426364, "grad_norm": 0.234375, "learning_rate": 0.0011858746744833914, "loss": 0.8722, "step": 12096 }, { "epoch": 0.3246296693860026, "grad_norm": 0.2314453125, "learning_rate": 0.0011858686119539904, "loss": 0.7993, "step": 12097 }, { "epoch": 0.3246565049377415, "grad_norm": 0.234375, "learning_rate": 0.0011858625481393661, "loss": 0.8847, "step": 12098 }, { "epoch": 0.32468334048948044, "grad_norm": 0.2158203125, "learning_rate": 0.0011858564830395319, "loss": 0.7496, "step": 12099 }, { "epoch": 0.32471017604121943, "grad_norm": 0.240234375, "learning_rate": 0.001185850416654501, "loss": 0.8988, "step": 12100 }, { "epoch": 0.32473701159295837, "grad_norm": 0.2314453125, "learning_rate": 0.0011858443489842865, "loss": 0.8544, "step": 12101 }, { "epoch": 0.3247638471446973, "grad_norm": 0.220703125, "learning_rate": 0.0011858382800289016, "loss": 0.7259, "step": 12102 }, { "epoch": 0.32479068269643624, "grad_norm": 0.21875, "learning_rate": 0.00118583220978836, "loss": 0.7999, "step": 12103 }, { "epoch": 0.3248175182481752, "grad_norm": 0.2333984375, "learning_rate": 0.0011858261382626752, "loss": 0.8532, "step": 12104 }, { "epoch": 0.3248443537999141, "grad_norm": 0.234375, "learning_rate": 0.0011858200654518598, "loss": 0.8589, "step": 12105 }, { "epoch": 0.32487118935165304, "grad_norm": 0.2158203125, "learning_rate": 0.0011858139913559279, "loss": 0.8002, "step": 12106 }, { "epoch": 0.32489802490339204, "grad_norm": 0.224609375, "learning_rate": 0.0011858079159748922, "loss": 0.8255, "step": 12107 }, { "epoch": 0.32492486045513097, "grad_norm": 0.2265625, "learning_rate": 0.0011858018393087664, "loss": 0.8358, "step": 12108 }, { "epoch": 0.3249516960068699, "grad_norm": 0.2451171875, "learning_rate": 0.0011857957613575637, "loss": 0.8962, "step": 12109 }, { "epoch": 0.32497853155860884, "grad_norm": 0.228515625, "learning_rate": 0.0011857896821212972, "loss": 0.7984, "step": 12110 }, { "epoch": 0.3250053671103478, "grad_norm": 0.23046875, "learning_rate": 0.0011857836015999807, "loss": 0.8442, "step": 12111 }, { "epoch": 0.3250322026620867, "grad_norm": 0.23046875, "learning_rate": 0.0011857775197936275, "loss": 0.8082, "step": 12112 }, { "epoch": 0.3250590382138257, "grad_norm": 0.2216796875, "learning_rate": 0.0011857714367022505, "loss": 0.7665, "step": 12113 }, { "epoch": 0.32508587376556464, "grad_norm": 0.2431640625, "learning_rate": 0.0011857653523258632, "loss": 0.8827, "step": 12114 }, { "epoch": 0.32511270931730357, "grad_norm": 0.2216796875, "learning_rate": 0.0011857592666644793, "loss": 0.7628, "step": 12115 }, { "epoch": 0.3251395448690425, "grad_norm": 0.216796875, "learning_rate": 0.0011857531797181118, "loss": 0.7858, "step": 12116 }, { "epoch": 0.32516638042078144, "grad_norm": 0.21484375, "learning_rate": 0.0011857470914867741, "loss": 0.8112, "step": 12117 }, { "epoch": 0.3251932159725204, "grad_norm": 0.2275390625, "learning_rate": 0.0011857410019704797, "loss": 0.8705, "step": 12118 }, { "epoch": 0.3252200515242593, "grad_norm": 0.2451171875, "learning_rate": 0.001185734911169242, "loss": 0.9489, "step": 12119 }, { "epoch": 0.3252468870759983, "grad_norm": 0.2314453125, "learning_rate": 0.001185728819083074, "loss": 0.7691, "step": 12120 }, { "epoch": 0.32527372262773724, "grad_norm": 0.2431640625, "learning_rate": 0.0011857227257119894, "loss": 0.8767, "step": 12121 }, { "epoch": 0.3253005581794762, "grad_norm": 0.2255859375, "learning_rate": 0.0011857166310560014, "loss": 0.7483, "step": 12122 }, { "epoch": 0.3253273937312151, "grad_norm": 0.20703125, "learning_rate": 0.0011857105351151234, "loss": 0.7686, "step": 12123 }, { "epoch": 0.32535422928295404, "grad_norm": 0.2412109375, "learning_rate": 0.001185704437889369, "loss": 0.8669, "step": 12124 }, { "epoch": 0.325381064834693, "grad_norm": 0.2353515625, "learning_rate": 0.0011856983393787512, "loss": 0.7562, "step": 12125 }, { "epoch": 0.32540790038643197, "grad_norm": 0.2216796875, "learning_rate": 0.0011856922395832835, "loss": 0.7776, "step": 12126 }, { "epoch": 0.3254347359381709, "grad_norm": 0.2373046875, "learning_rate": 0.0011856861385029795, "loss": 0.9012, "step": 12127 }, { "epoch": 0.32546157148990984, "grad_norm": 0.2236328125, "learning_rate": 0.0011856800361378524, "loss": 0.7435, "step": 12128 }, { "epoch": 0.3254884070416488, "grad_norm": 0.2353515625, "learning_rate": 0.0011856739324879156, "loss": 0.8613, "step": 12129 }, { "epoch": 0.3255152425933877, "grad_norm": 0.236328125, "learning_rate": 0.0011856678275531825, "loss": 0.8577, "step": 12130 }, { "epoch": 0.32554207814512665, "grad_norm": 0.2216796875, "learning_rate": 0.0011856617213336666, "loss": 0.7567, "step": 12131 }, { "epoch": 0.3255689136968656, "grad_norm": 0.2236328125, "learning_rate": 0.001185655613829381, "loss": 0.7997, "step": 12132 }, { "epoch": 0.32559574924860457, "grad_norm": 0.2353515625, "learning_rate": 0.0011856495050403392, "loss": 0.817, "step": 12133 }, { "epoch": 0.3256225848003435, "grad_norm": 0.2216796875, "learning_rate": 0.001185643394966555, "loss": 0.8143, "step": 12134 }, { "epoch": 0.32564942035208244, "grad_norm": 0.2373046875, "learning_rate": 0.0011856372836080414, "loss": 0.9469, "step": 12135 }, { "epoch": 0.3256762559038214, "grad_norm": 0.2294921875, "learning_rate": 0.0011856311709648117, "loss": 0.8343, "step": 12136 }, { "epoch": 0.3257030914555603, "grad_norm": 0.2255859375, "learning_rate": 0.0011856250570368796, "loss": 0.809, "step": 12137 }, { "epoch": 0.32572992700729925, "grad_norm": 0.224609375, "learning_rate": 0.0011856189418242584, "loss": 0.8086, "step": 12138 }, { "epoch": 0.32575676255903824, "grad_norm": 0.21875, "learning_rate": 0.0011856128253269617, "loss": 0.7354, "step": 12139 }, { "epoch": 0.3257835981107772, "grad_norm": 0.2216796875, "learning_rate": 0.0011856067075450025, "loss": 0.7717, "step": 12140 }, { "epoch": 0.3258104336625161, "grad_norm": 0.2294921875, "learning_rate": 0.0011856005884783947, "loss": 0.8875, "step": 12141 }, { "epoch": 0.32583726921425504, "grad_norm": 0.224609375, "learning_rate": 0.0011855944681271513, "loss": 0.7888, "step": 12142 }, { "epoch": 0.325864104765994, "grad_norm": 0.212890625, "learning_rate": 0.0011855883464912862, "loss": 0.7808, "step": 12143 }, { "epoch": 0.3258909403177329, "grad_norm": 0.236328125, "learning_rate": 0.0011855822235708124, "loss": 0.8515, "step": 12144 }, { "epoch": 0.3259177758694719, "grad_norm": 0.2470703125, "learning_rate": 0.0011855760993657433, "loss": 0.9106, "step": 12145 }, { "epoch": 0.32594461142121084, "grad_norm": 0.212890625, "learning_rate": 0.0011855699738760926, "loss": 0.8226, "step": 12146 }, { "epoch": 0.3259714469729498, "grad_norm": 0.228515625, "learning_rate": 0.0011855638471018738, "loss": 0.8589, "step": 12147 }, { "epoch": 0.3259982825246887, "grad_norm": 0.232421875, "learning_rate": 0.0011855577190431, "loss": 0.8987, "step": 12148 }, { "epoch": 0.32602511807642764, "grad_norm": 0.1943359375, "learning_rate": 0.001185551589699785, "loss": 0.6709, "step": 12149 }, { "epoch": 0.3260519536281666, "grad_norm": 0.21484375, "learning_rate": 0.0011855454590719422, "loss": 0.7646, "step": 12150 }, { "epoch": 0.3260787891799055, "grad_norm": 0.216796875, "learning_rate": 0.0011855393271595846, "loss": 0.7716, "step": 12151 }, { "epoch": 0.3261056247316445, "grad_norm": 0.2275390625, "learning_rate": 0.0011855331939627262, "loss": 0.8335, "step": 12152 }, { "epoch": 0.32613246028338344, "grad_norm": 0.216796875, "learning_rate": 0.0011855270594813803, "loss": 0.7861, "step": 12153 }, { "epoch": 0.3261592958351224, "grad_norm": 0.2255859375, "learning_rate": 0.0011855209237155602, "loss": 0.843, "step": 12154 }, { "epoch": 0.3261861313868613, "grad_norm": 0.2060546875, "learning_rate": 0.0011855147866652795, "loss": 0.7654, "step": 12155 }, { "epoch": 0.32621296693860025, "grad_norm": 0.236328125, "learning_rate": 0.0011855086483305514, "loss": 0.8727, "step": 12156 }, { "epoch": 0.3262398024903392, "grad_norm": 0.2236328125, "learning_rate": 0.0011855025087113898, "loss": 0.8179, "step": 12157 }, { "epoch": 0.32626663804207817, "grad_norm": 0.2197265625, "learning_rate": 0.001185496367807808, "loss": 0.826, "step": 12158 }, { "epoch": 0.3262934735938171, "grad_norm": 0.23046875, "learning_rate": 0.0011854902256198192, "loss": 0.8299, "step": 12159 }, { "epoch": 0.32632030914555604, "grad_norm": 0.2275390625, "learning_rate": 0.0011854840821474374, "loss": 0.8935, "step": 12160 }, { "epoch": 0.326347144697295, "grad_norm": 0.2080078125, "learning_rate": 0.0011854779373906756, "loss": 0.7486, "step": 12161 }, { "epoch": 0.3263739802490339, "grad_norm": 0.2333984375, "learning_rate": 0.0011854717913495475, "loss": 0.9438, "step": 12162 }, { "epoch": 0.32640081580077285, "grad_norm": 0.234375, "learning_rate": 0.0011854656440240664, "loss": 0.9437, "step": 12163 }, { "epoch": 0.3264276513525118, "grad_norm": 0.234375, "learning_rate": 0.0011854594954142461, "loss": 0.8178, "step": 12164 }, { "epoch": 0.3264544869042508, "grad_norm": 0.208984375, "learning_rate": 0.0011854533455200999, "loss": 0.754, "step": 12165 }, { "epoch": 0.3264813224559897, "grad_norm": 0.220703125, "learning_rate": 0.0011854471943416412, "loss": 0.8056, "step": 12166 }, { "epoch": 0.32650815800772864, "grad_norm": 0.2275390625, "learning_rate": 0.0011854410418788836, "loss": 0.8468, "step": 12167 }, { "epoch": 0.3265349935594676, "grad_norm": 0.224609375, "learning_rate": 0.0011854348881318406, "loss": 0.7599, "step": 12168 }, { "epoch": 0.3265618291112065, "grad_norm": 0.2314453125, "learning_rate": 0.0011854287331005257, "loss": 0.8119, "step": 12169 }, { "epoch": 0.32658866466294545, "grad_norm": 0.2353515625, "learning_rate": 0.0011854225767849525, "loss": 0.874, "step": 12170 }, { "epoch": 0.32661550021468444, "grad_norm": 0.2353515625, "learning_rate": 0.0011854164191851342, "loss": 0.8712, "step": 12171 }, { "epoch": 0.3266423357664234, "grad_norm": 0.240234375, "learning_rate": 0.0011854102603010847, "loss": 0.8832, "step": 12172 }, { "epoch": 0.3266691713181623, "grad_norm": 0.2158203125, "learning_rate": 0.0011854041001328171, "loss": 0.7486, "step": 12173 }, { "epoch": 0.32669600686990125, "grad_norm": 0.2294921875, "learning_rate": 0.0011853979386803454, "loss": 0.8789, "step": 12174 }, { "epoch": 0.3267228424216402, "grad_norm": 0.2373046875, "learning_rate": 0.0011853917759436828, "loss": 0.963, "step": 12175 }, { "epoch": 0.3267496779733791, "grad_norm": 0.2314453125, "learning_rate": 0.0011853856119228426, "loss": 0.8907, "step": 12176 }, { "epoch": 0.32677651352511805, "grad_norm": 0.2333984375, "learning_rate": 0.001185379446617839, "loss": 0.8958, "step": 12177 }, { "epoch": 0.32680334907685704, "grad_norm": 0.2392578125, "learning_rate": 0.001185373280028685, "loss": 0.8454, "step": 12178 }, { "epoch": 0.326830184628596, "grad_norm": 0.220703125, "learning_rate": 0.0011853671121553938, "loss": 0.8168, "step": 12179 }, { "epoch": 0.3268570201803349, "grad_norm": 0.232421875, "learning_rate": 0.00118536094299798, "loss": 0.8729, "step": 12180 }, { "epoch": 0.32688385573207385, "grad_norm": 0.2197265625, "learning_rate": 0.001185354772556456, "loss": 0.8272, "step": 12181 }, { "epoch": 0.3269106912838128, "grad_norm": 0.2255859375, "learning_rate": 0.0011853486008308362, "loss": 0.8607, "step": 12182 }, { "epoch": 0.3269375268355517, "grad_norm": 0.216796875, "learning_rate": 0.0011853424278211336, "loss": 0.8508, "step": 12183 }, { "epoch": 0.3269643623872907, "grad_norm": 0.244140625, "learning_rate": 0.001185336253527362, "loss": 0.8834, "step": 12184 }, { "epoch": 0.32699119793902964, "grad_norm": 0.224609375, "learning_rate": 0.001185330077949535, "loss": 0.7915, "step": 12185 }, { "epoch": 0.3270180334907686, "grad_norm": 0.2265625, "learning_rate": 0.0011853239010876657, "loss": 0.7963, "step": 12186 }, { "epoch": 0.3270448690425075, "grad_norm": 0.21484375, "learning_rate": 0.001185317722941768, "loss": 0.7577, "step": 12187 }, { "epoch": 0.32707170459424645, "grad_norm": 0.2216796875, "learning_rate": 0.0011853115435118556, "loss": 0.7691, "step": 12188 }, { "epoch": 0.3270985401459854, "grad_norm": 0.2373046875, "learning_rate": 0.0011853053627979419, "loss": 0.9015, "step": 12189 }, { "epoch": 0.3271253756977243, "grad_norm": 0.2314453125, "learning_rate": 0.0011852991808000404, "loss": 0.8389, "step": 12190 }, { "epoch": 0.3271522112494633, "grad_norm": 0.228515625, "learning_rate": 0.0011852929975181645, "loss": 0.8987, "step": 12191 }, { "epoch": 0.32717904680120224, "grad_norm": 0.2255859375, "learning_rate": 0.0011852868129523283, "loss": 0.8109, "step": 12192 }, { "epoch": 0.3272058823529412, "grad_norm": 0.2333984375, "learning_rate": 0.0011852806271025447, "loss": 0.912, "step": 12193 }, { "epoch": 0.3272327179046801, "grad_norm": 0.2255859375, "learning_rate": 0.0011852744399688277, "loss": 0.7672, "step": 12194 }, { "epoch": 0.32725955345641905, "grad_norm": 0.2333984375, "learning_rate": 0.0011852682515511909, "loss": 0.9189, "step": 12195 }, { "epoch": 0.327286389008158, "grad_norm": 0.224609375, "learning_rate": 0.0011852620618496477, "loss": 0.8951, "step": 12196 }, { "epoch": 0.327313224559897, "grad_norm": 0.224609375, "learning_rate": 0.0011852558708642115, "loss": 0.851, "step": 12197 }, { "epoch": 0.3273400601116359, "grad_norm": 0.220703125, "learning_rate": 0.0011852496785948964, "loss": 0.8266, "step": 12198 }, { "epoch": 0.32736689566337485, "grad_norm": 0.2158203125, "learning_rate": 0.0011852434850417154, "loss": 0.787, "step": 12199 }, { "epoch": 0.3273937312151138, "grad_norm": 0.21875, "learning_rate": 0.0011852372902046826, "loss": 0.802, "step": 12200 }, { "epoch": 0.3274205667668527, "grad_norm": 0.216796875, "learning_rate": 0.0011852310940838114, "loss": 0.7699, "step": 12201 }, { "epoch": 0.32744740231859165, "grad_norm": 0.2236328125, "learning_rate": 0.0011852248966791152, "loss": 0.818, "step": 12202 }, { "epoch": 0.3274742378703306, "grad_norm": 0.1982421875, "learning_rate": 0.0011852186979906078, "loss": 0.7011, "step": 12203 }, { "epoch": 0.3275010734220696, "grad_norm": 0.2421875, "learning_rate": 0.0011852124980183028, "loss": 0.8079, "step": 12204 }, { "epoch": 0.3275279089738085, "grad_norm": 0.2451171875, "learning_rate": 0.0011852062967622136, "loss": 0.8416, "step": 12205 }, { "epoch": 0.32755474452554745, "grad_norm": 0.2109375, "learning_rate": 0.001185200094222354, "loss": 0.7172, "step": 12206 }, { "epoch": 0.3275815800772864, "grad_norm": 0.224609375, "learning_rate": 0.0011851938903987378, "loss": 0.7888, "step": 12207 }, { "epoch": 0.3276084156290253, "grad_norm": 0.2353515625, "learning_rate": 0.001185187685291378, "loss": 0.8213, "step": 12208 }, { "epoch": 0.32763525118076425, "grad_norm": 0.236328125, "learning_rate": 0.001185181478900289, "loss": 0.8654, "step": 12209 }, { "epoch": 0.32766208673250324, "grad_norm": 0.2255859375, "learning_rate": 0.0011851752712254836, "loss": 0.7695, "step": 12210 }, { "epoch": 0.3276889222842422, "grad_norm": 0.251953125, "learning_rate": 0.0011851690622669759, "loss": 0.9548, "step": 12211 }, { "epoch": 0.3277157578359811, "grad_norm": 0.3125, "learning_rate": 0.0011851628520247797, "loss": 1.0601, "step": 12212 }, { "epoch": 0.32774259338772005, "grad_norm": 0.3125, "learning_rate": 0.0011851566404989082, "loss": 0.9652, "step": 12213 }, { "epoch": 0.327769428939459, "grad_norm": 0.283203125, "learning_rate": 0.0011851504276893753, "loss": 1.011, "step": 12214 }, { "epoch": 0.3277962644911979, "grad_norm": 0.26953125, "learning_rate": 0.0011851442135961943, "loss": 0.9689, "step": 12215 }, { "epoch": 0.3278231000429369, "grad_norm": 0.400390625, "learning_rate": 0.001185137998219379, "loss": 0.963, "step": 12216 }, { "epoch": 0.32784993559467585, "grad_norm": 0.298828125, "learning_rate": 0.0011851317815589433, "loss": 0.8499, "step": 12217 }, { "epoch": 0.3278767711464148, "grad_norm": 0.2578125, "learning_rate": 0.0011851255636149006, "loss": 0.9219, "step": 12218 }, { "epoch": 0.3279036066981537, "grad_norm": 0.2451171875, "learning_rate": 0.0011851193443872645, "loss": 0.8401, "step": 12219 }, { "epoch": 0.32793044224989265, "grad_norm": 0.25390625, "learning_rate": 0.0011851131238760488, "loss": 0.9564, "step": 12220 }, { "epoch": 0.3279572778016316, "grad_norm": 0.267578125, "learning_rate": 0.001185106902081267, "loss": 0.9634, "step": 12221 }, { "epoch": 0.3279841133533705, "grad_norm": 0.25, "learning_rate": 0.0011851006790029326, "loss": 0.8992, "step": 12222 }, { "epoch": 0.3280109489051095, "grad_norm": 0.27734375, "learning_rate": 0.0011850944546410594, "loss": 0.9231, "step": 12223 }, { "epoch": 0.32803778445684845, "grad_norm": 0.2431640625, "learning_rate": 0.0011850882289956616, "loss": 0.8644, "step": 12224 }, { "epoch": 0.3280646200085874, "grad_norm": 0.240234375, "learning_rate": 0.001185082002066752, "loss": 0.9508, "step": 12225 }, { "epoch": 0.3280914555603263, "grad_norm": 0.251953125, "learning_rate": 0.0011850757738543446, "loss": 0.8964, "step": 12226 }, { "epoch": 0.32811829111206525, "grad_norm": 0.2470703125, "learning_rate": 0.0011850695443584532, "loss": 0.9475, "step": 12227 }, { "epoch": 0.3281451266638042, "grad_norm": 0.2412109375, "learning_rate": 0.0011850633135790914, "loss": 0.9344, "step": 12228 }, { "epoch": 0.3281719622155432, "grad_norm": 0.2421875, "learning_rate": 0.0011850570815162727, "loss": 0.8993, "step": 12229 }, { "epoch": 0.3281987977672821, "grad_norm": 0.2255859375, "learning_rate": 0.001185050848170011, "loss": 0.8767, "step": 12230 }, { "epoch": 0.32822563331902105, "grad_norm": 0.232421875, "learning_rate": 0.0011850446135403198, "loss": 0.7687, "step": 12231 }, { "epoch": 0.32825246887076, "grad_norm": 0.23828125, "learning_rate": 0.0011850383776272127, "loss": 0.9007, "step": 12232 }, { "epoch": 0.3282793044224989, "grad_norm": 0.23828125, "learning_rate": 0.0011850321404307037, "loss": 0.9336, "step": 12233 }, { "epoch": 0.32830613997423785, "grad_norm": 0.2255859375, "learning_rate": 0.0011850259019508063, "loss": 0.7808, "step": 12234 }, { "epoch": 0.3283329755259768, "grad_norm": 0.240234375, "learning_rate": 0.001185019662187534, "loss": 0.9404, "step": 12235 }, { "epoch": 0.3283598110777158, "grad_norm": 0.2294921875, "learning_rate": 0.001185013421140901, "loss": 0.8847, "step": 12236 }, { "epoch": 0.3283866466294547, "grad_norm": 0.236328125, "learning_rate": 0.0011850071788109204, "loss": 0.9362, "step": 12237 }, { "epoch": 0.32841348218119365, "grad_norm": 0.267578125, "learning_rate": 0.0011850009351976063, "loss": 1.0468, "step": 12238 }, { "epoch": 0.3284403177329326, "grad_norm": 0.2412109375, "learning_rate": 0.0011849946903009722, "loss": 0.9424, "step": 12239 }, { "epoch": 0.3284671532846715, "grad_norm": 0.232421875, "learning_rate": 0.0011849884441210317, "loss": 0.9219, "step": 12240 }, { "epoch": 0.32849398883641046, "grad_norm": 0.2392578125, "learning_rate": 0.001184982196657799, "loss": 0.9307, "step": 12241 }, { "epoch": 0.32852082438814945, "grad_norm": 0.2294921875, "learning_rate": 0.0011849759479112872, "loss": 0.9368, "step": 12242 }, { "epoch": 0.3285476599398884, "grad_norm": 0.2421875, "learning_rate": 0.0011849696978815103, "loss": 0.9437, "step": 12243 }, { "epoch": 0.3285744954916273, "grad_norm": 0.236328125, "learning_rate": 0.001184963446568482, "loss": 0.8382, "step": 12244 }, { "epoch": 0.32860133104336625, "grad_norm": 0.2275390625, "learning_rate": 0.0011849571939722163, "loss": 0.9029, "step": 12245 }, { "epoch": 0.3286281665951052, "grad_norm": 0.2255859375, "learning_rate": 0.0011849509400927263, "loss": 0.9039, "step": 12246 }, { "epoch": 0.3286550021468441, "grad_norm": 0.2373046875, "learning_rate": 0.001184944684930026, "loss": 0.8575, "step": 12247 }, { "epoch": 0.32868183769858306, "grad_norm": 0.248046875, "learning_rate": 0.0011849384284841292, "loss": 0.9338, "step": 12248 }, { "epoch": 0.32870867325032205, "grad_norm": 0.236328125, "learning_rate": 0.0011849321707550495, "loss": 0.9179, "step": 12249 }, { "epoch": 0.328735508802061, "grad_norm": 0.2412109375, "learning_rate": 0.0011849259117428011, "loss": 0.9291, "step": 12250 }, { "epoch": 0.3287623443537999, "grad_norm": 0.224609375, "learning_rate": 0.0011849196514473969, "loss": 0.8499, "step": 12251 }, { "epoch": 0.32878917990553885, "grad_norm": 0.2412109375, "learning_rate": 0.0011849133898688513, "loss": 0.9636, "step": 12252 }, { "epoch": 0.3288160154572778, "grad_norm": 0.23046875, "learning_rate": 0.0011849071270071778, "loss": 0.8335, "step": 12253 }, { "epoch": 0.3288428510090167, "grad_norm": 0.2158203125, "learning_rate": 0.00118490086286239, "loss": 0.8245, "step": 12254 }, { "epoch": 0.3288696865607557, "grad_norm": 0.232421875, "learning_rate": 0.001184894597434502, "loss": 0.9152, "step": 12255 }, { "epoch": 0.32889652211249465, "grad_norm": 0.232421875, "learning_rate": 0.0011848883307235272, "loss": 0.878, "step": 12256 }, { "epoch": 0.3289233576642336, "grad_norm": 0.26171875, "learning_rate": 0.0011848820627294795, "loss": 1.043, "step": 12257 }, { "epoch": 0.3289501932159725, "grad_norm": 0.234375, "learning_rate": 0.0011848757934523725, "loss": 0.9541, "step": 12258 }, { "epoch": 0.32897702876771145, "grad_norm": 0.255859375, "learning_rate": 0.0011848695228922203, "loss": 1.1277, "step": 12259 }, { "epoch": 0.3290038643194504, "grad_norm": 0.2158203125, "learning_rate": 0.0011848632510490361, "loss": 0.8208, "step": 12260 }, { "epoch": 0.3290306998711893, "grad_norm": 0.2353515625, "learning_rate": 0.0011848569779228345, "loss": 0.9269, "step": 12261 }, { "epoch": 0.3290575354229283, "grad_norm": 0.2333984375, "learning_rate": 0.0011848507035136285, "loss": 0.919, "step": 12262 }, { "epoch": 0.32908437097466725, "grad_norm": 0.2392578125, "learning_rate": 0.0011848444278214321, "loss": 0.9204, "step": 12263 }, { "epoch": 0.3291112065264062, "grad_norm": 0.228515625, "learning_rate": 0.001184838150846259, "loss": 0.8641, "step": 12264 }, { "epoch": 0.3291380420781451, "grad_norm": 0.2451171875, "learning_rate": 0.0011848318725881232, "loss": 0.9547, "step": 12265 }, { "epoch": 0.32916487762988406, "grad_norm": 0.228515625, "learning_rate": 0.0011848255930470384, "loss": 0.9005, "step": 12266 }, { "epoch": 0.329191713181623, "grad_norm": 0.205078125, "learning_rate": 0.001184819312223018, "loss": 0.7463, "step": 12267 }, { "epoch": 0.329218548733362, "grad_norm": 0.24609375, "learning_rate": 0.0011848130301160766, "loss": 0.9255, "step": 12268 }, { "epoch": 0.3292453842851009, "grad_norm": 0.2314453125, "learning_rate": 0.001184806746726227, "loss": 0.8641, "step": 12269 }, { "epoch": 0.32927221983683985, "grad_norm": 0.2294921875, "learning_rate": 0.0011848004620534835, "loss": 0.9209, "step": 12270 }, { "epoch": 0.3292990553885788, "grad_norm": 0.2373046875, "learning_rate": 0.00118479417609786, "loss": 0.9085, "step": 12271 }, { "epoch": 0.3293258909403177, "grad_norm": 0.2431640625, "learning_rate": 0.00118478788885937, "loss": 0.9924, "step": 12272 }, { "epoch": 0.32935272649205666, "grad_norm": 0.2314453125, "learning_rate": 0.0011847816003380275, "loss": 0.8886, "step": 12273 }, { "epoch": 0.3293795620437956, "grad_norm": 0.234375, "learning_rate": 0.0011847753105338464, "loss": 0.9143, "step": 12274 }, { "epoch": 0.3294063975955346, "grad_norm": 0.244140625, "learning_rate": 0.00118476901944684, "loss": 0.9164, "step": 12275 }, { "epoch": 0.3294332331472735, "grad_norm": 0.2373046875, "learning_rate": 0.0011847627270770225, "loss": 0.9883, "step": 12276 }, { "epoch": 0.32946006869901245, "grad_norm": 0.251953125, "learning_rate": 0.0011847564334244076, "loss": 1.0557, "step": 12277 }, { "epoch": 0.3294869042507514, "grad_norm": 0.24609375, "learning_rate": 0.0011847501384890091, "loss": 0.9473, "step": 12278 }, { "epoch": 0.3295137398024903, "grad_norm": 0.2421875, "learning_rate": 0.001184743842270841, "loss": 1.0031, "step": 12279 }, { "epoch": 0.32954057535422926, "grad_norm": 0.2333984375, "learning_rate": 0.0011847375447699168, "loss": 0.8959, "step": 12280 }, { "epoch": 0.32956741090596825, "grad_norm": 0.2265625, "learning_rate": 0.0011847312459862505, "loss": 0.8471, "step": 12281 }, { "epoch": 0.3295942464577072, "grad_norm": 0.23828125, "learning_rate": 0.001184724945919856, "loss": 0.9145, "step": 12282 }, { "epoch": 0.3296210820094461, "grad_norm": 0.236328125, "learning_rate": 0.0011847186445707468, "loss": 0.9431, "step": 12283 }, { "epoch": 0.32964791756118506, "grad_norm": 0.2275390625, "learning_rate": 0.0011847123419389371, "loss": 0.8587, "step": 12284 }, { "epoch": 0.329674753112924, "grad_norm": 0.2333984375, "learning_rate": 0.0011847060380244404, "loss": 0.8335, "step": 12285 }, { "epoch": 0.3297015886646629, "grad_norm": 0.21875, "learning_rate": 0.0011846997328272706, "loss": 0.7521, "step": 12286 }, { "epoch": 0.3297284242164019, "grad_norm": 0.220703125, "learning_rate": 0.0011846934263474418, "loss": 0.8814, "step": 12287 }, { "epoch": 0.32975525976814085, "grad_norm": 0.2373046875, "learning_rate": 0.0011846871185849676, "loss": 0.9251, "step": 12288 }, { "epoch": 0.3297820953198798, "grad_norm": 0.2275390625, "learning_rate": 0.0011846808095398618, "loss": 0.8615, "step": 12289 }, { "epoch": 0.3298089308716187, "grad_norm": 0.24609375, "learning_rate": 0.0011846744992121384, "loss": 0.9283, "step": 12290 }, { "epoch": 0.32983576642335766, "grad_norm": 0.26171875, "learning_rate": 0.001184668187601811, "loss": 1.0223, "step": 12291 }, { "epoch": 0.3298626019750966, "grad_norm": 0.259765625, "learning_rate": 0.001184661874708894, "loss": 0.9624, "step": 12292 }, { "epoch": 0.3298894375268355, "grad_norm": 0.2421875, "learning_rate": 0.0011846555605334005, "loss": 0.9018, "step": 12293 }, { "epoch": 0.3299162730785745, "grad_norm": 0.234375, "learning_rate": 0.0011846492450753446, "loss": 0.9051, "step": 12294 }, { "epoch": 0.32994310863031345, "grad_norm": 0.23828125, "learning_rate": 0.0011846429283347405, "loss": 0.9304, "step": 12295 }, { "epoch": 0.3299699441820524, "grad_norm": 0.255859375, "learning_rate": 0.0011846366103116018, "loss": 0.9701, "step": 12296 }, { "epoch": 0.3299967797337913, "grad_norm": 0.236328125, "learning_rate": 0.0011846302910059425, "loss": 0.8806, "step": 12297 }, { "epoch": 0.33002361528553026, "grad_norm": 0.2412109375, "learning_rate": 0.0011846239704177762, "loss": 0.9508, "step": 12298 }, { "epoch": 0.3300504508372692, "grad_norm": 0.23046875, "learning_rate": 0.0011846176485471168, "loss": 0.8691, "step": 12299 }, { "epoch": 0.3300772863890082, "grad_norm": 0.244140625, "learning_rate": 0.0011846113253939786, "loss": 1.0215, "step": 12300 }, { "epoch": 0.3301041219407471, "grad_norm": 0.2431640625, "learning_rate": 0.001184605000958375, "loss": 0.8886, "step": 12301 }, { "epoch": 0.33013095749248605, "grad_norm": 0.2333984375, "learning_rate": 0.0011845986752403199, "loss": 0.9484, "step": 12302 }, { "epoch": 0.330157793044225, "grad_norm": 0.23046875, "learning_rate": 0.0011845923482398273, "loss": 0.8915, "step": 12303 }, { "epoch": 0.3301846285959639, "grad_norm": 0.236328125, "learning_rate": 0.001184586019956911, "loss": 0.8905, "step": 12304 }, { "epoch": 0.33021146414770286, "grad_norm": 0.251953125, "learning_rate": 0.0011845796903915853, "loss": 0.9075, "step": 12305 }, { "epoch": 0.3302382996994418, "grad_norm": 0.234375, "learning_rate": 0.0011845733595438634, "loss": 0.9995, "step": 12306 }, { "epoch": 0.3302651352511808, "grad_norm": 0.2177734375, "learning_rate": 0.0011845670274137597, "loss": 0.791, "step": 12307 }, { "epoch": 0.3302919708029197, "grad_norm": 0.2294921875, "learning_rate": 0.001184560694001288, "loss": 0.8409, "step": 12308 }, { "epoch": 0.33031880635465866, "grad_norm": 0.2353515625, "learning_rate": 0.001184554359306462, "loss": 0.8962, "step": 12309 }, { "epoch": 0.3303456419063976, "grad_norm": 0.2197265625, "learning_rate": 0.0011845480233292956, "loss": 0.8016, "step": 12310 }, { "epoch": 0.3303724774581365, "grad_norm": 0.2216796875, "learning_rate": 0.001184541686069803, "loss": 0.8032, "step": 12311 }, { "epoch": 0.33039931300987546, "grad_norm": 0.2373046875, "learning_rate": 0.0011845353475279977, "loss": 0.8976, "step": 12312 }, { "epoch": 0.33042614856161445, "grad_norm": 0.24609375, "learning_rate": 0.001184529007703894, "loss": 0.9994, "step": 12313 }, { "epoch": 0.3304529841133534, "grad_norm": 0.25, "learning_rate": 0.0011845226665975058, "loss": 0.9695, "step": 12314 }, { "epoch": 0.3304798196650923, "grad_norm": 0.2197265625, "learning_rate": 0.0011845163242088464, "loss": 0.7776, "step": 12315 }, { "epoch": 0.33050665521683126, "grad_norm": 0.22265625, "learning_rate": 0.0011845099805379306, "loss": 0.8077, "step": 12316 }, { "epoch": 0.3305334907685702, "grad_norm": 0.2333984375, "learning_rate": 0.0011845036355847716, "loss": 0.9522, "step": 12317 }, { "epoch": 0.33056032632030913, "grad_norm": 0.2314453125, "learning_rate": 0.0011844972893493837, "loss": 0.9284, "step": 12318 }, { "epoch": 0.33058716187204806, "grad_norm": 0.2431640625, "learning_rate": 0.0011844909418317806, "loss": 0.9734, "step": 12319 }, { "epoch": 0.33061399742378705, "grad_norm": 0.224609375, "learning_rate": 0.0011844845930319764, "loss": 0.8583, "step": 12320 }, { "epoch": 0.330640832975526, "grad_norm": 0.2353515625, "learning_rate": 0.0011844782429499849, "loss": 0.8936, "step": 12321 }, { "epoch": 0.3306676685272649, "grad_norm": 0.23828125, "learning_rate": 0.0011844718915858202, "loss": 0.9073, "step": 12322 }, { "epoch": 0.33069450407900386, "grad_norm": 0.248046875, "learning_rate": 0.0011844655389394961, "loss": 1.0214, "step": 12323 }, { "epoch": 0.3307213396307428, "grad_norm": 0.21484375, "learning_rate": 0.0011844591850110266, "loss": 0.8291, "step": 12324 }, { "epoch": 0.33074817518248173, "grad_norm": 0.240234375, "learning_rate": 0.0011844528298004255, "loss": 0.9336, "step": 12325 }, { "epoch": 0.3307750107342207, "grad_norm": 0.234375, "learning_rate": 0.0011844464733077069, "loss": 0.9008, "step": 12326 }, { "epoch": 0.33080184628595966, "grad_norm": 0.259765625, "learning_rate": 0.0011844401155328846, "loss": 1.1155, "step": 12327 }, { "epoch": 0.3308286818376986, "grad_norm": 0.22265625, "learning_rate": 0.0011844337564759727, "loss": 0.8765, "step": 12328 }, { "epoch": 0.3308555173894375, "grad_norm": 0.236328125, "learning_rate": 0.0011844273961369852, "loss": 0.8756, "step": 12329 }, { "epoch": 0.33088235294117646, "grad_norm": 0.2421875, "learning_rate": 0.0011844210345159356, "loss": 0.9888, "step": 12330 }, { "epoch": 0.3309091884929154, "grad_norm": 0.25, "learning_rate": 0.0011844146716128384, "loss": 0.9337, "step": 12331 }, { "epoch": 0.33093602404465433, "grad_norm": 0.2392578125, "learning_rate": 0.0011844083074277073, "loss": 0.9558, "step": 12332 }, { "epoch": 0.3309628595963933, "grad_norm": 0.2080078125, "learning_rate": 0.0011844019419605563, "loss": 0.7447, "step": 12333 }, { "epoch": 0.33098969514813226, "grad_norm": 0.22265625, "learning_rate": 0.0011843955752113995, "loss": 0.8634, "step": 12334 }, { "epoch": 0.3310165306998712, "grad_norm": 0.2412109375, "learning_rate": 0.0011843892071802507, "loss": 0.9476, "step": 12335 }, { "epoch": 0.3310433662516101, "grad_norm": 0.248046875, "learning_rate": 0.0011843828378671239, "loss": 1.0061, "step": 12336 }, { "epoch": 0.33107020180334906, "grad_norm": 0.234375, "learning_rate": 0.001184376467272033, "loss": 0.9045, "step": 12337 }, { "epoch": 0.331097037355088, "grad_norm": 0.251953125, "learning_rate": 0.0011843700953949922, "loss": 0.9587, "step": 12338 }, { "epoch": 0.331123872906827, "grad_norm": 0.2431640625, "learning_rate": 0.0011843637222360153, "loss": 0.9606, "step": 12339 }, { "epoch": 0.3311507084585659, "grad_norm": 0.232421875, "learning_rate": 0.001184357347795116, "loss": 0.9318, "step": 12340 }, { "epoch": 0.33117754401030486, "grad_norm": 0.228515625, "learning_rate": 0.001184350972072309, "loss": 0.8911, "step": 12341 }, { "epoch": 0.3312043795620438, "grad_norm": 0.228515625, "learning_rate": 0.0011843445950676075, "loss": 0.8395, "step": 12342 }, { "epoch": 0.33123121511378273, "grad_norm": 0.234375, "learning_rate": 0.0011843382167810263, "loss": 0.9606, "step": 12343 }, { "epoch": 0.33125805066552166, "grad_norm": 0.2294921875, "learning_rate": 0.0011843318372125788, "loss": 0.8935, "step": 12344 }, { "epoch": 0.33128488621726065, "grad_norm": 0.2197265625, "learning_rate": 0.001184325456362279, "loss": 0.8032, "step": 12345 }, { "epoch": 0.3313117217689996, "grad_norm": 0.2255859375, "learning_rate": 0.0011843190742301414, "loss": 0.835, "step": 12346 }, { "epoch": 0.3313385573207385, "grad_norm": 0.236328125, "learning_rate": 0.0011843126908161795, "loss": 0.8879, "step": 12347 }, { "epoch": 0.33136539287247746, "grad_norm": 0.236328125, "learning_rate": 0.0011843063061204074, "loss": 0.8887, "step": 12348 }, { "epoch": 0.3313922284242164, "grad_norm": 0.240234375, "learning_rate": 0.0011842999201428393, "loss": 0.9258, "step": 12349 }, { "epoch": 0.33141906397595533, "grad_norm": 0.23828125, "learning_rate": 0.001184293532883489, "loss": 0.9014, "step": 12350 }, { "epoch": 0.33144589952769427, "grad_norm": 0.234375, "learning_rate": 0.0011842871443423705, "loss": 0.8604, "step": 12351 }, { "epoch": 0.33147273507943326, "grad_norm": 0.2412109375, "learning_rate": 0.0011842807545194979, "loss": 0.8402, "step": 12352 }, { "epoch": 0.3314995706311722, "grad_norm": 0.2353515625, "learning_rate": 0.0011842743634148853, "loss": 0.8979, "step": 12353 }, { "epoch": 0.3315264061829111, "grad_norm": 0.236328125, "learning_rate": 0.0011842679710285468, "loss": 0.8471, "step": 12354 }, { "epoch": 0.33155324173465006, "grad_norm": 0.23828125, "learning_rate": 0.001184261577360496, "loss": 0.8755, "step": 12355 }, { "epoch": 0.331580077286389, "grad_norm": 0.23828125, "learning_rate": 0.0011842551824107473, "loss": 0.9696, "step": 12356 }, { "epoch": 0.33160691283812793, "grad_norm": 0.232421875, "learning_rate": 0.0011842487861793145, "loss": 0.8412, "step": 12357 }, { "epoch": 0.3316337483898669, "grad_norm": 0.2412109375, "learning_rate": 0.0011842423886662121, "loss": 1.0313, "step": 12358 }, { "epoch": 0.33166058394160586, "grad_norm": 0.2470703125, "learning_rate": 0.0011842359898714535, "loss": 0.9862, "step": 12359 }, { "epoch": 0.3316874194933448, "grad_norm": 0.2451171875, "learning_rate": 0.001184229589795053, "loss": 1.0022, "step": 12360 }, { "epoch": 0.33171425504508373, "grad_norm": 0.2275390625, "learning_rate": 0.0011842231884370248, "loss": 0.8574, "step": 12361 }, { "epoch": 0.33174109059682266, "grad_norm": 0.2490234375, "learning_rate": 0.0011842167857973828, "loss": 1.0228, "step": 12362 }, { "epoch": 0.3317679261485616, "grad_norm": 0.2431640625, "learning_rate": 0.001184210381876141, "loss": 0.9132, "step": 12363 }, { "epoch": 0.33179476170030053, "grad_norm": 0.2294921875, "learning_rate": 0.0011842039766733134, "loss": 0.8683, "step": 12364 }, { "epoch": 0.3318215972520395, "grad_norm": 0.22265625, "learning_rate": 0.0011841975701889142, "loss": 0.7898, "step": 12365 }, { "epoch": 0.33184843280377846, "grad_norm": 0.2177734375, "learning_rate": 0.0011841911624229577, "loss": 0.8609, "step": 12366 }, { "epoch": 0.3318752683555174, "grad_norm": 0.22265625, "learning_rate": 0.001184184753375457, "loss": 0.8023, "step": 12367 }, { "epoch": 0.33190210390725633, "grad_norm": 0.25390625, "learning_rate": 0.0011841783430464273, "loss": 1.0021, "step": 12368 }, { "epoch": 0.33192893945899526, "grad_norm": 0.23046875, "learning_rate": 0.0011841719314358823, "loss": 0.8569, "step": 12369 }, { "epoch": 0.3319557750107342, "grad_norm": 0.25390625, "learning_rate": 0.0011841655185438357, "loss": 1.0014, "step": 12370 }, { "epoch": 0.3319826105624732, "grad_norm": 0.2353515625, "learning_rate": 0.0011841591043703018, "loss": 0.9365, "step": 12371 }, { "epoch": 0.3320094461142121, "grad_norm": 0.2294921875, "learning_rate": 0.001184152688915295, "loss": 0.8514, "step": 12372 }, { "epoch": 0.33203628166595106, "grad_norm": 0.24609375, "learning_rate": 0.0011841462721788288, "loss": 1.0125, "step": 12373 }, { "epoch": 0.33206311721769, "grad_norm": 0.2392578125, "learning_rate": 0.0011841398541609174, "loss": 0.9032, "step": 12374 }, { "epoch": 0.33208995276942893, "grad_norm": 0.21875, "learning_rate": 0.0011841334348615753, "loss": 0.7913, "step": 12375 }, { "epoch": 0.33211678832116787, "grad_norm": 0.2578125, "learning_rate": 0.0011841270142808162, "loss": 0.9973, "step": 12376 }, { "epoch": 0.3321436238729068, "grad_norm": 0.251953125, "learning_rate": 0.0011841205924186542, "loss": 1.0062, "step": 12377 }, { "epoch": 0.3321704594246458, "grad_norm": 0.2431640625, "learning_rate": 0.0011841141692751035, "loss": 0.985, "step": 12378 }, { "epoch": 0.3321972949763847, "grad_norm": 0.240234375, "learning_rate": 0.0011841077448501783, "loss": 0.9311, "step": 12379 }, { "epoch": 0.33222413052812366, "grad_norm": 0.212890625, "learning_rate": 0.0011841013191438924, "loss": 0.7857, "step": 12380 }, { "epoch": 0.3322509660798626, "grad_norm": 0.2294921875, "learning_rate": 0.0011840948921562602, "loss": 0.8404, "step": 12381 }, { "epoch": 0.33227780163160153, "grad_norm": 0.2275390625, "learning_rate": 0.0011840884638872956, "loss": 0.8789, "step": 12382 }, { "epoch": 0.33230463718334047, "grad_norm": 0.2333984375, "learning_rate": 0.0011840820343370125, "loss": 0.8325, "step": 12383 }, { "epoch": 0.33233147273507946, "grad_norm": 0.2275390625, "learning_rate": 0.0011840756035054255, "loss": 0.8397, "step": 12384 }, { "epoch": 0.3323583082868184, "grad_norm": 0.2333984375, "learning_rate": 0.0011840691713925484, "loss": 0.8938, "step": 12385 }, { "epoch": 0.33238514383855733, "grad_norm": 0.2294921875, "learning_rate": 0.0011840627379983955, "loss": 0.8916, "step": 12386 }, { "epoch": 0.33241197939029626, "grad_norm": 0.234375, "learning_rate": 0.0011840563033229808, "loss": 0.8933, "step": 12387 }, { "epoch": 0.3324388149420352, "grad_norm": 0.2392578125, "learning_rate": 0.0011840498673663183, "loss": 0.91, "step": 12388 }, { "epoch": 0.33246565049377413, "grad_norm": 0.23828125, "learning_rate": 0.001184043430128422, "loss": 0.9263, "step": 12389 }, { "epoch": 0.33249248604551307, "grad_norm": 0.24609375, "learning_rate": 0.0011840369916093065, "loss": 0.9407, "step": 12390 }, { "epoch": 0.33251932159725206, "grad_norm": 0.244140625, "learning_rate": 0.0011840305518089856, "loss": 0.9462, "step": 12391 }, { "epoch": 0.332546157148991, "grad_norm": 0.23046875, "learning_rate": 0.0011840241107274735, "loss": 0.8636, "step": 12392 }, { "epoch": 0.33257299270072993, "grad_norm": 0.236328125, "learning_rate": 0.0011840176683647844, "loss": 0.9278, "step": 12393 }, { "epoch": 0.33259982825246887, "grad_norm": 0.2216796875, "learning_rate": 0.0011840112247209322, "loss": 0.8213, "step": 12394 }, { "epoch": 0.3326266638042078, "grad_norm": 0.248046875, "learning_rate": 0.0011840047797959314, "loss": 0.9796, "step": 12395 }, { "epoch": 0.33265349935594674, "grad_norm": 0.2314453125, "learning_rate": 0.0011839983335897959, "loss": 0.9084, "step": 12396 }, { "epoch": 0.3326803349076857, "grad_norm": 0.2373046875, "learning_rate": 0.0011839918861025398, "loss": 1.0106, "step": 12397 }, { "epoch": 0.33270717045942466, "grad_norm": 0.2158203125, "learning_rate": 0.0011839854373341771, "loss": 0.7728, "step": 12398 }, { "epoch": 0.3327340060111636, "grad_norm": 0.2373046875, "learning_rate": 0.0011839789872847223, "loss": 0.9397, "step": 12399 }, { "epoch": 0.33276084156290253, "grad_norm": 0.2333984375, "learning_rate": 0.0011839725359541896, "loss": 0.8898, "step": 12400 }, { "epoch": 0.33278767711464147, "grad_norm": 0.232421875, "learning_rate": 0.0011839660833425929, "loss": 0.9119, "step": 12401 }, { "epoch": 0.3328145126663804, "grad_norm": 0.2197265625, "learning_rate": 0.0011839596294499464, "loss": 0.8687, "step": 12402 }, { "epoch": 0.33284134821811934, "grad_norm": 0.21875, "learning_rate": 0.0011839531742762642, "loss": 0.8237, "step": 12403 }, { "epoch": 0.33286818376985833, "grad_norm": 0.2373046875, "learning_rate": 0.0011839467178215606, "loss": 0.9446, "step": 12404 }, { "epoch": 0.33289501932159726, "grad_norm": 0.234375, "learning_rate": 0.0011839402600858496, "loss": 0.9621, "step": 12405 }, { "epoch": 0.3329218548733362, "grad_norm": 0.234375, "learning_rate": 0.0011839338010691456, "loss": 0.8668, "step": 12406 }, { "epoch": 0.33294869042507513, "grad_norm": 0.236328125, "learning_rate": 0.0011839273407714625, "loss": 0.912, "step": 12407 }, { "epoch": 0.33297552597681407, "grad_norm": 0.24609375, "learning_rate": 0.0011839208791928147, "loss": 0.9518, "step": 12408 }, { "epoch": 0.333002361528553, "grad_norm": 0.232421875, "learning_rate": 0.0011839144163332163, "loss": 0.9113, "step": 12409 }, { "epoch": 0.333029197080292, "grad_norm": 0.2490234375, "learning_rate": 0.0011839079521926814, "loss": 0.9248, "step": 12410 }, { "epoch": 0.33305603263203093, "grad_norm": 0.20703125, "learning_rate": 0.0011839014867712242, "loss": 0.7426, "step": 12411 }, { "epoch": 0.33308286818376986, "grad_norm": 0.251953125, "learning_rate": 0.0011838950200688592, "loss": 0.9498, "step": 12412 }, { "epoch": 0.3331097037355088, "grad_norm": 0.23828125, "learning_rate": 0.0011838885520856, "loss": 0.8596, "step": 12413 }, { "epoch": 0.33313653928724773, "grad_norm": 0.2431640625, "learning_rate": 0.0011838820828214613, "loss": 0.9688, "step": 12414 }, { "epoch": 0.33316337483898667, "grad_norm": 0.236328125, "learning_rate": 0.001183875612276457, "loss": 0.9339, "step": 12415 }, { "epoch": 0.33319021039072566, "grad_norm": 0.25, "learning_rate": 0.0011838691404506014, "loss": 0.9891, "step": 12416 }, { "epoch": 0.3332170459424646, "grad_norm": 0.2314453125, "learning_rate": 0.0011838626673439086, "loss": 0.9301, "step": 12417 }, { "epoch": 0.33324388149420353, "grad_norm": 0.2333984375, "learning_rate": 0.001183856192956393, "loss": 0.8432, "step": 12418 }, { "epoch": 0.33327071704594247, "grad_norm": 0.2255859375, "learning_rate": 0.0011838497172880684, "loss": 0.8047, "step": 12419 }, { "epoch": 0.3332975525976814, "grad_norm": 0.2265625, "learning_rate": 0.0011838432403389497, "loss": 0.9019, "step": 12420 }, { "epoch": 0.33332438814942034, "grad_norm": 0.2490234375, "learning_rate": 0.0011838367621090505, "loss": 0.9901, "step": 12421 }, { "epoch": 0.33335122370115927, "grad_norm": 0.2353515625, "learning_rate": 0.0011838302825983853, "loss": 0.8855, "step": 12422 }, { "epoch": 0.33337805925289826, "grad_norm": 0.2216796875, "learning_rate": 0.0011838238018069681, "loss": 0.8646, "step": 12423 }, { "epoch": 0.3334048948046372, "grad_norm": 0.234375, "learning_rate": 0.0011838173197348133, "loss": 0.9788, "step": 12424 }, { "epoch": 0.33343173035637613, "grad_norm": 0.23046875, "learning_rate": 0.001183810836381935, "loss": 0.881, "step": 12425 }, { "epoch": 0.33345856590811507, "grad_norm": 0.23828125, "learning_rate": 0.0011838043517483476, "loss": 0.9354, "step": 12426 }, { "epoch": 0.333485401459854, "grad_norm": 0.2373046875, "learning_rate": 0.0011837978658340651, "loss": 0.9492, "step": 12427 }, { "epoch": 0.33351223701159294, "grad_norm": 0.2294921875, "learning_rate": 0.0011837913786391019, "loss": 0.8782, "step": 12428 }, { "epoch": 0.33353907256333193, "grad_norm": 0.220703125, "learning_rate": 0.0011837848901634722, "loss": 0.756, "step": 12429 }, { "epoch": 0.33356590811507086, "grad_norm": 0.236328125, "learning_rate": 0.0011837784004071902, "loss": 0.8543, "step": 12430 }, { "epoch": 0.3335927436668098, "grad_norm": 0.2294921875, "learning_rate": 0.00118377190937027, "loss": 0.817, "step": 12431 }, { "epoch": 0.33361957921854873, "grad_norm": 0.23828125, "learning_rate": 0.0011837654170527262, "loss": 0.9223, "step": 12432 }, { "epoch": 0.33364641477028767, "grad_norm": 0.228515625, "learning_rate": 0.0011837589234545726, "loss": 0.8934, "step": 12433 }, { "epoch": 0.3336732503220266, "grad_norm": 0.220703125, "learning_rate": 0.0011837524285758239, "loss": 0.8269, "step": 12434 }, { "epoch": 0.33370008587376554, "grad_norm": 0.2236328125, "learning_rate": 0.0011837459324164938, "loss": 0.8672, "step": 12435 }, { "epoch": 0.33372692142550453, "grad_norm": 0.244140625, "learning_rate": 0.0011837394349765971, "loss": 0.9321, "step": 12436 }, { "epoch": 0.33375375697724347, "grad_norm": 0.240234375, "learning_rate": 0.0011837329362561478, "loss": 0.9686, "step": 12437 }, { "epoch": 0.3337805925289824, "grad_norm": 0.228515625, "learning_rate": 0.0011837264362551602, "loss": 0.8957, "step": 12438 }, { "epoch": 0.33380742808072134, "grad_norm": 0.24609375, "learning_rate": 0.0011837199349736484, "loss": 0.9333, "step": 12439 }, { "epoch": 0.33383426363246027, "grad_norm": 0.244140625, "learning_rate": 0.001183713432411627, "loss": 0.9527, "step": 12440 }, { "epoch": 0.3338610991841992, "grad_norm": 0.2373046875, "learning_rate": 0.00118370692856911, "loss": 0.9022, "step": 12441 }, { "epoch": 0.3338879347359382, "grad_norm": 0.23046875, "learning_rate": 0.0011837004234461115, "loss": 0.9348, "step": 12442 }, { "epoch": 0.33391477028767713, "grad_norm": 0.2275390625, "learning_rate": 0.0011836939170426462, "loss": 0.9144, "step": 12443 }, { "epoch": 0.33394160583941607, "grad_norm": 0.2158203125, "learning_rate": 0.0011836874093587282, "loss": 0.7747, "step": 12444 }, { "epoch": 0.333968441391155, "grad_norm": 0.2392578125, "learning_rate": 0.0011836809003943716, "loss": 1.004, "step": 12445 }, { "epoch": 0.33399527694289394, "grad_norm": 0.23828125, "learning_rate": 0.0011836743901495908, "loss": 0.9414, "step": 12446 }, { "epoch": 0.3340221124946329, "grad_norm": 0.21875, "learning_rate": 0.0011836678786244002, "loss": 0.8055, "step": 12447 }, { "epoch": 0.3340489480463718, "grad_norm": 0.228515625, "learning_rate": 0.0011836613658188142, "loss": 0.909, "step": 12448 }, { "epoch": 0.3340757835981108, "grad_norm": 0.2265625, "learning_rate": 0.0011836548517328465, "loss": 0.8509, "step": 12449 }, { "epoch": 0.33410261914984973, "grad_norm": 0.234375, "learning_rate": 0.0011836483363665122, "loss": 0.928, "step": 12450 }, { "epoch": 0.33412945470158867, "grad_norm": 0.23046875, "learning_rate": 0.0011836418197198248, "loss": 0.8925, "step": 12451 }, { "epoch": 0.3341562902533276, "grad_norm": 0.220703125, "learning_rate": 0.001183635301792799, "loss": 0.7787, "step": 12452 }, { "epoch": 0.33418312580506654, "grad_norm": 0.2431640625, "learning_rate": 0.0011836287825854493, "loss": 0.9794, "step": 12453 }, { "epoch": 0.3342099613568055, "grad_norm": 0.2294921875, "learning_rate": 0.0011836222620977895, "loss": 0.8562, "step": 12454 }, { "epoch": 0.33423679690854446, "grad_norm": 0.2353515625, "learning_rate": 0.0011836157403298344, "loss": 0.9628, "step": 12455 }, { "epoch": 0.3342636324602834, "grad_norm": 0.2294921875, "learning_rate": 0.0011836092172815978, "loss": 0.8165, "step": 12456 }, { "epoch": 0.33429046801202233, "grad_norm": 0.2451171875, "learning_rate": 0.0011836026929530944, "loss": 0.9412, "step": 12457 }, { "epoch": 0.33431730356376127, "grad_norm": 0.2451171875, "learning_rate": 0.0011835961673443383, "loss": 0.9342, "step": 12458 }, { "epoch": 0.3343441391155002, "grad_norm": 0.2314453125, "learning_rate": 0.001183589640455344, "loss": 0.8791, "step": 12459 }, { "epoch": 0.33437097466723914, "grad_norm": 0.25390625, "learning_rate": 0.0011835831122861257, "loss": 1.0264, "step": 12460 }, { "epoch": 0.3343978102189781, "grad_norm": 0.228515625, "learning_rate": 0.0011835765828366977, "loss": 0.8539, "step": 12461 }, { "epoch": 0.33442464577071707, "grad_norm": 0.234375, "learning_rate": 0.0011835700521070746, "loss": 0.936, "step": 12462 }, { "epoch": 0.334451481322456, "grad_norm": 0.25, "learning_rate": 0.0011835635200972703, "loss": 1.0249, "step": 12463 }, { "epoch": 0.33447831687419494, "grad_norm": 0.2490234375, "learning_rate": 0.0011835569868072994, "loss": 0.9116, "step": 12464 }, { "epoch": 0.33450515242593387, "grad_norm": 0.23046875, "learning_rate": 0.001183550452237176, "loss": 0.9029, "step": 12465 }, { "epoch": 0.3345319879776728, "grad_norm": 0.2373046875, "learning_rate": 0.0011835439163869147, "loss": 0.8936, "step": 12466 }, { "epoch": 0.33455882352941174, "grad_norm": 0.236328125, "learning_rate": 0.0011835373792565297, "loss": 0.882, "step": 12467 }, { "epoch": 0.33458565908115073, "grad_norm": 0.23046875, "learning_rate": 0.0011835308408460353, "loss": 0.7946, "step": 12468 }, { "epoch": 0.33461249463288967, "grad_norm": 0.2421875, "learning_rate": 0.001183524301155446, "loss": 0.9371, "step": 12469 }, { "epoch": 0.3346393301846286, "grad_norm": 0.25390625, "learning_rate": 0.001183517760184776, "loss": 0.957, "step": 12470 }, { "epoch": 0.33466616573636754, "grad_norm": 0.244140625, "learning_rate": 0.00118351121793404, "loss": 0.954, "step": 12471 }, { "epoch": 0.3346930012881065, "grad_norm": 0.2490234375, "learning_rate": 0.0011835046744032517, "loss": 0.9893, "step": 12472 }, { "epoch": 0.3347198368398454, "grad_norm": 0.2470703125, "learning_rate": 0.001183498129592426, "loss": 0.9386, "step": 12473 }, { "epoch": 0.33474667239158434, "grad_norm": 0.244140625, "learning_rate": 0.001183491583501577, "loss": 0.9013, "step": 12474 }, { "epoch": 0.33477350794332333, "grad_norm": 0.248046875, "learning_rate": 0.001183485036130719, "loss": 0.9119, "step": 12475 }, { "epoch": 0.33480034349506227, "grad_norm": 0.244140625, "learning_rate": 0.0011834784874798668, "loss": 0.8668, "step": 12476 }, { "epoch": 0.3348271790468012, "grad_norm": 0.2490234375, "learning_rate": 0.0011834719375490342, "loss": 1.0354, "step": 12477 }, { "epoch": 0.33485401459854014, "grad_norm": 0.2373046875, "learning_rate": 0.001183465386338236, "loss": 0.9222, "step": 12478 }, { "epoch": 0.3348808501502791, "grad_norm": 0.2470703125, "learning_rate": 0.0011834588338474861, "loss": 0.9438, "step": 12479 }, { "epoch": 0.334907685702018, "grad_norm": 0.2314453125, "learning_rate": 0.0011834522800767996, "loss": 0.8962, "step": 12480 }, { "epoch": 0.334934521253757, "grad_norm": 0.251953125, "learning_rate": 0.0011834457250261903, "loss": 0.9807, "step": 12481 }, { "epoch": 0.33496135680549594, "grad_norm": 0.236328125, "learning_rate": 0.0011834391686956726, "loss": 0.8636, "step": 12482 }, { "epoch": 0.33498819235723487, "grad_norm": 0.24609375, "learning_rate": 0.001183432611085261, "loss": 0.9734, "step": 12483 }, { "epoch": 0.3350150279089738, "grad_norm": 0.2294921875, "learning_rate": 0.0011834260521949701, "loss": 0.88, "step": 12484 }, { "epoch": 0.33504186346071274, "grad_norm": 0.2236328125, "learning_rate": 0.0011834194920248141, "loss": 0.8731, "step": 12485 }, { "epoch": 0.3350686990124517, "grad_norm": 0.23828125, "learning_rate": 0.0011834129305748073, "loss": 0.9065, "step": 12486 }, { "epoch": 0.33509553456419067, "grad_norm": 0.2412109375, "learning_rate": 0.001183406367844964, "loss": 0.9615, "step": 12487 }, { "epoch": 0.3351223701159296, "grad_norm": 0.240234375, "learning_rate": 0.001183399803835299, "loss": 0.9266, "step": 12488 }, { "epoch": 0.33514920566766854, "grad_norm": 0.2314453125, "learning_rate": 0.0011833932385458261, "loss": 0.8665, "step": 12489 }, { "epoch": 0.3351760412194075, "grad_norm": 0.2333984375, "learning_rate": 0.0011833866719765606, "loss": 0.8609, "step": 12490 }, { "epoch": 0.3352028767711464, "grad_norm": 0.2265625, "learning_rate": 0.0011833801041275162, "loss": 0.7654, "step": 12491 }, { "epoch": 0.33522971232288534, "grad_norm": 0.2265625, "learning_rate": 0.0011833735349987074, "loss": 0.8625, "step": 12492 }, { "epoch": 0.3352565478746243, "grad_norm": 0.2392578125, "learning_rate": 0.0011833669645901488, "loss": 0.9166, "step": 12493 }, { "epoch": 0.33528338342636327, "grad_norm": 0.2353515625, "learning_rate": 0.0011833603929018543, "loss": 0.9158, "step": 12494 }, { "epoch": 0.3353102189781022, "grad_norm": 0.23046875, "learning_rate": 0.0011833538199338392, "loss": 0.8545, "step": 12495 }, { "epoch": 0.33533705452984114, "grad_norm": 0.232421875, "learning_rate": 0.0011833472456861172, "loss": 0.9188, "step": 12496 }, { "epoch": 0.3353638900815801, "grad_norm": 0.2294921875, "learning_rate": 0.0011833406701587032, "loss": 0.8528, "step": 12497 }, { "epoch": 0.335390725633319, "grad_norm": 0.232421875, "learning_rate": 0.0011833340933516112, "loss": 0.8771, "step": 12498 }, { "epoch": 0.33541756118505794, "grad_norm": 0.232421875, "learning_rate": 0.0011833275152648558, "loss": 0.8803, "step": 12499 }, { "epoch": 0.33544439673679693, "grad_norm": 0.2353515625, "learning_rate": 0.0011833209358984515, "loss": 0.9479, "step": 12500 }, { "epoch": 0.33547123228853587, "grad_norm": 0.248046875, "learning_rate": 0.0011833143552524128, "loss": 0.9408, "step": 12501 }, { "epoch": 0.3354980678402748, "grad_norm": 0.228515625, "learning_rate": 0.0011833077733267537, "loss": 0.906, "step": 12502 }, { "epoch": 0.33552490339201374, "grad_norm": 0.232421875, "learning_rate": 0.0011833011901214894, "loss": 0.9501, "step": 12503 }, { "epoch": 0.3355517389437527, "grad_norm": 0.228515625, "learning_rate": 0.0011832946056366336, "loss": 0.8895, "step": 12504 }, { "epoch": 0.3355785744954916, "grad_norm": 0.234375, "learning_rate": 0.0011832880198722011, "loss": 0.8983, "step": 12505 }, { "epoch": 0.33560541004723055, "grad_norm": 0.2421875, "learning_rate": 0.0011832814328282064, "loss": 0.9609, "step": 12506 }, { "epoch": 0.33563224559896954, "grad_norm": 0.259765625, "learning_rate": 0.0011832748445046637, "loss": 1.0054, "step": 12507 }, { "epoch": 0.33565908115070847, "grad_norm": 0.2421875, "learning_rate": 0.0011832682549015876, "loss": 0.9374, "step": 12508 }, { "epoch": 0.3356859167024474, "grad_norm": 0.232421875, "learning_rate": 0.0011832616640189925, "loss": 0.8826, "step": 12509 }, { "epoch": 0.33571275225418634, "grad_norm": 0.21875, "learning_rate": 0.001183255071856893, "loss": 0.8021, "step": 12510 }, { "epoch": 0.3357395878059253, "grad_norm": 0.25, "learning_rate": 0.0011832484784153035, "loss": 0.9856, "step": 12511 }, { "epoch": 0.3357664233576642, "grad_norm": 0.224609375, "learning_rate": 0.0011832418836942383, "loss": 0.8876, "step": 12512 }, { "epoch": 0.3357932589094032, "grad_norm": 0.2265625, "learning_rate": 0.001183235287693712, "loss": 0.8893, "step": 12513 }, { "epoch": 0.33582009446114214, "grad_norm": 0.23828125, "learning_rate": 0.0011832286904137393, "loss": 0.8681, "step": 12514 }, { "epoch": 0.3358469300128811, "grad_norm": 0.236328125, "learning_rate": 0.0011832220918543343, "loss": 0.8824, "step": 12515 }, { "epoch": 0.33587376556462, "grad_norm": 0.2294921875, "learning_rate": 0.0011832154920155117, "loss": 0.8275, "step": 12516 }, { "epoch": 0.33590060111635894, "grad_norm": 0.2236328125, "learning_rate": 0.0011832088908972857, "loss": 0.8377, "step": 12517 }, { "epoch": 0.3359274366680979, "grad_norm": 0.2197265625, "learning_rate": 0.0011832022884996712, "loss": 0.8094, "step": 12518 }, { "epoch": 0.3359542722198368, "grad_norm": 0.2236328125, "learning_rate": 0.0011831956848226822, "loss": 0.8735, "step": 12519 }, { "epoch": 0.3359811077715758, "grad_norm": 0.2470703125, "learning_rate": 0.0011831890798663337, "loss": 1.0389, "step": 12520 }, { "epoch": 0.33600794332331474, "grad_norm": 0.2421875, "learning_rate": 0.0011831824736306398, "loss": 0.9559, "step": 12521 }, { "epoch": 0.3360347788750537, "grad_norm": 0.21875, "learning_rate": 0.0011831758661156153, "loss": 0.8347, "step": 12522 }, { "epoch": 0.3360616144267926, "grad_norm": 0.22265625, "learning_rate": 0.0011831692573212743, "loss": 0.8879, "step": 12523 }, { "epoch": 0.33608844997853154, "grad_norm": 0.2197265625, "learning_rate": 0.0011831626472476315, "loss": 0.8097, "step": 12524 }, { "epoch": 0.3361152855302705, "grad_norm": 0.2265625, "learning_rate": 0.0011831560358947016, "loss": 0.8558, "step": 12525 }, { "epoch": 0.33614212108200947, "grad_norm": 0.25, "learning_rate": 0.001183149423262499, "loss": 1.0171, "step": 12526 }, { "epoch": 0.3361689566337484, "grad_norm": 0.220703125, "learning_rate": 0.0011831428093510379, "loss": 0.8589, "step": 12527 }, { "epoch": 0.33619579218548734, "grad_norm": 0.2294921875, "learning_rate": 0.001183136194160333, "loss": 0.8834, "step": 12528 }, { "epoch": 0.3362226277372263, "grad_norm": 0.244140625, "learning_rate": 0.001183129577690399, "loss": 0.8856, "step": 12529 }, { "epoch": 0.3362494632889652, "grad_norm": 0.25, "learning_rate": 0.0011831229599412502, "loss": 0.9773, "step": 12530 }, { "epoch": 0.33627629884070415, "grad_norm": 0.2333984375, "learning_rate": 0.0011831163409129013, "loss": 0.87, "step": 12531 }, { "epoch": 0.3363031343924431, "grad_norm": 0.2373046875, "learning_rate": 0.0011831097206053665, "loss": 0.9371, "step": 12532 }, { "epoch": 0.3363299699441821, "grad_norm": 0.2265625, "learning_rate": 0.0011831030990186607, "loss": 0.8614, "step": 12533 }, { "epoch": 0.336356805495921, "grad_norm": 0.24609375, "learning_rate": 0.0011830964761527982, "loss": 0.966, "step": 12534 }, { "epoch": 0.33638364104765994, "grad_norm": 0.2099609375, "learning_rate": 0.0011830898520077935, "loss": 0.7503, "step": 12535 }, { "epoch": 0.3364104765993989, "grad_norm": 0.251953125, "learning_rate": 0.0011830832265836615, "loss": 0.9532, "step": 12536 }, { "epoch": 0.3364373121511378, "grad_norm": 0.2333984375, "learning_rate": 0.001183076599880416, "loss": 0.931, "step": 12537 }, { "epoch": 0.33646414770287675, "grad_norm": 0.240234375, "learning_rate": 0.0011830699718980724, "loss": 0.9425, "step": 12538 }, { "epoch": 0.33649098325461574, "grad_norm": 0.2392578125, "learning_rate": 0.0011830633426366446, "loss": 0.9412, "step": 12539 }, { "epoch": 0.3365178188063547, "grad_norm": 0.21484375, "learning_rate": 0.0011830567120961473, "loss": 0.8288, "step": 12540 }, { "epoch": 0.3365446543580936, "grad_norm": 0.2470703125, "learning_rate": 0.0011830500802765954, "loss": 1.0395, "step": 12541 }, { "epoch": 0.33657148990983254, "grad_norm": 0.216796875, "learning_rate": 0.001183043447178003, "loss": 0.7876, "step": 12542 }, { "epoch": 0.3365983254615715, "grad_norm": 0.25, "learning_rate": 0.0011830368128003847, "loss": 1.0534, "step": 12543 }, { "epoch": 0.3366251610133104, "grad_norm": 0.2353515625, "learning_rate": 0.0011830301771437552, "loss": 0.9236, "step": 12544 }, { "epoch": 0.3366519965650494, "grad_norm": 0.2470703125, "learning_rate": 0.0011830235402081292, "loss": 0.9737, "step": 12545 }, { "epoch": 0.33667883211678834, "grad_norm": 0.228515625, "learning_rate": 0.001183016901993521, "loss": 0.9249, "step": 12546 }, { "epoch": 0.3367056676685273, "grad_norm": 0.2421875, "learning_rate": 0.0011830102624999451, "loss": 0.9659, "step": 12547 }, { "epoch": 0.3367325032202662, "grad_norm": 0.2265625, "learning_rate": 0.0011830036217274163, "loss": 0.8181, "step": 12548 }, { "epoch": 0.33675933877200515, "grad_norm": 0.2236328125, "learning_rate": 0.001182996979675949, "loss": 0.8453, "step": 12549 }, { "epoch": 0.3367861743237441, "grad_norm": 0.23046875, "learning_rate": 0.001182990336345558, "loss": 0.8557, "step": 12550 }, { "epoch": 0.336813009875483, "grad_norm": 0.23828125, "learning_rate": 0.0011829836917362578, "loss": 0.8938, "step": 12551 }, { "epoch": 0.336839845427222, "grad_norm": 0.2294921875, "learning_rate": 0.0011829770458480627, "loss": 0.906, "step": 12552 }, { "epoch": 0.33686668097896094, "grad_norm": 0.2197265625, "learning_rate": 0.0011829703986809875, "loss": 0.831, "step": 12553 }, { "epoch": 0.3368935165306999, "grad_norm": 0.2333984375, "learning_rate": 0.0011829637502350468, "loss": 0.8879, "step": 12554 }, { "epoch": 0.3369203520824388, "grad_norm": 0.2314453125, "learning_rate": 0.0011829571005102552, "loss": 0.9044, "step": 12555 }, { "epoch": 0.33694718763417775, "grad_norm": 0.224609375, "learning_rate": 0.0011829504495066271, "loss": 0.8777, "step": 12556 }, { "epoch": 0.3369740231859167, "grad_norm": 0.2451171875, "learning_rate": 0.0011829437972241772, "loss": 0.9537, "step": 12557 }, { "epoch": 0.3370008587376557, "grad_norm": 0.234375, "learning_rate": 0.0011829371436629203, "loss": 0.8568, "step": 12558 }, { "epoch": 0.3370276942893946, "grad_norm": 0.25390625, "learning_rate": 0.0011829304888228706, "loss": 1.0331, "step": 12559 }, { "epoch": 0.33705452984113354, "grad_norm": 0.23046875, "learning_rate": 0.001182923832704043, "loss": 0.8855, "step": 12560 }, { "epoch": 0.3370813653928725, "grad_norm": 0.228515625, "learning_rate": 0.001182917175306452, "loss": 0.8125, "step": 12561 }, { "epoch": 0.3371082009446114, "grad_norm": 0.2275390625, "learning_rate": 0.0011829105166301123, "loss": 0.9202, "step": 12562 }, { "epoch": 0.33713503649635035, "grad_norm": 0.24609375, "learning_rate": 0.0011829038566750383, "loss": 0.9105, "step": 12563 }, { "epoch": 0.3371618720480893, "grad_norm": 0.22265625, "learning_rate": 0.001182897195441245, "loss": 0.8334, "step": 12564 }, { "epoch": 0.3371887075998283, "grad_norm": 0.232421875, "learning_rate": 0.0011828905329287465, "loss": 0.9029, "step": 12565 }, { "epoch": 0.3372155431515672, "grad_norm": 0.224609375, "learning_rate": 0.0011828838691375575, "loss": 0.8867, "step": 12566 }, { "epoch": 0.33724237870330614, "grad_norm": 0.2451171875, "learning_rate": 0.001182877204067693, "loss": 0.9512, "step": 12567 }, { "epoch": 0.3372692142550451, "grad_norm": 0.2392578125, "learning_rate": 0.0011828705377191675, "loss": 0.907, "step": 12568 }, { "epoch": 0.337296049806784, "grad_norm": 0.23828125, "learning_rate": 0.0011828638700919953, "loss": 0.9305, "step": 12569 }, { "epoch": 0.33732288535852295, "grad_norm": 0.2421875, "learning_rate": 0.0011828572011861912, "loss": 0.921, "step": 12570 }, { "epoch": 0.33734972091026194, "grad_norm": 0.23046875, "learning_rate": 0.0011828505310017702, "loss": 0.9304, "step": 12571 }, { "epoch": 0.3373765564620009, "grad_norm": 0.236328125, "learning_rate": 0.0011828438595387464, "loss": 0.8905, "step": 12572 }, { "epoch": 0.3374033920137398, "grad_norm": 0.2236328125, "learning_rate": 0.0011828371867971346, "loss": 0.8962, "step": 12573 }, { "epoch": 0.33743022756547875, "grad_norm": 0.2353515625, "learning_rate": 0.0011828305127769495, "loss": 0.9321, "step": 12574 }, { "epoch": 0.3374570631172177, "grad_norm": 0.216796875, "learning_rate": 0.0011828238374782059, "loss": 0.7702, "step": 12575 }, { "epoch": 0.3374838986689566, "grad_norm": 0.234375, "learning_rate": 0.0011828171609009183, "loss": 0.8796, "step": 12576 }, { "epoch": 0.33751073422069555, "grad_norm": 0.2451171875, "learning_rate": 0.0011828104830451012, "loss": 0.9523, "step": 12577 }, { "epoch": 0.33753756977243454, "grad_norm": 0.2578125, "learning_rate": 0.0011828038039107692, "loss": 1.086, "step": 12578 }, { "epoch": 0.3375644053241735, "grad_norm": 0.234375, "learning_rate": 0.0011827971234979373, "loss": 0.8744, "step": 12579 }, { "epoch": 0.3375912408759124, "grad_norm": 0.2314453125, "learning_rate": 0.00118279044180662, "loss": 0.8432, "step": 12580 }, { "epoch": 0.33761807642765135, "grad_norm": 0.2294921875, "learning_rate": 0.0011827837588368317, "loss": 0.8281, "step": 12581 }, { "epoch": 0.3376449119793903, "grad_norm": 0.248046875, "learning_rate": 0.0011827770745885876, "loss": 0.9421, "step": 12582 }, { "epoch": 0.3376717475311292, "grad_norm": 0.2265625, "learning_rate": 0.001182770389061902, "loss": 0.8731, "step": 12583 }, { "epoch": 0.3376985830828682, "grad_norm": 0.24609375, "learning_rate": 0.0011827637022567893, "loss": 0.9135, "step": 12584 }, { "epoch": 0.33772541863460714, "grad_norm": 0.22265625, "learning_rate": 0.0011827570141732648, "loss": 0.8203, "step": 12585 }, { "epoch": 0.3377522541863461, "grad_norm": 0.24609375, "learning_rate": 0.0011827503248113427, "loss": 1.0121, "step": 12586 }, { "epoch": 0.337779089738085, "grad_norm": 0.2392578125, "learning_rate": 0.001182743634171038, "loss": 0.9394, "step": 12587 }, { "epoch": 0.33780592528982395, "grad_norm": 0.240234375, "learning_rate": 0.001182736942252365, "loss": 0.9698, "step": 12588 }, { "epoch": 0.3378327608415629, "grad_norm": 0.244140625, "learning_rate": 0.0011827302490553387, "loss": 0.9959, "step": 12589 }, { "epoch": 0.3378595963933018, "grad_norm": 0.2275390625, "learning_rate": 0.0011827235545799738, "loss": 0.8868, "step": 12590 }, { "epoch": 0.3378864319450408, "grad_norm": 0.232421875, "learning_rate": 0.0011827168588262847, "loss": 0.8461, "step": 12591 }, { "epoch": 0.33791326749677975, "grad_norm": 0.240234375, "learning_rate": 0.0011827101617942863, "loss": 0.8866, "step": 12592 }, { "epoch": 0.3379401030485187, "grad_norm": 0.251953125, "learning_rate": 0.0011827034634839934, "loss": 0.9593, "step": 12593 }, { "epoch": 0.3379669386002576, "grad_norm": 0.23828125, "learning_rate": 0.0011826967638954202, "loss": 0.978, "step": 12594 }, { "epoch": 0.33799377415199655, "grad_norm": 0.2470703125, "learning_rate": 0.001182690063028582, "loss": 0.9565, "step": 12595 }, { "epoch": 0.3380206097037355, "grad_norm": 0.22265625, "learning_rate": 0.0011826833608834931, "loss": 0.8025, "step": 12596 }, { "epoch": 0.3380474452554745, "grad_norm": 0.2109375, "learning_rate": 0.0011826766574601685, "loss": 0.8009, "step": 12597 }, { "epoch": 0.3380742808072134, "grad_norm": 0.236328125, "learning_rate": 0.0011826699527586226, "loss": 0.8514, "step": 12598 }, { "epoch": 0.33810111635895235, "grad_norm": 0.208984375, "learning_rate": 0.0011826632467788702, "loss": 0.7893, "step": 12599 }, { "epoch": 0.3381279519106913, "grad_norm": 0.251953125, "learning_rate": 0.001182656539520926, "loss": 1.1072, "step": 12600 }, { "epoch": 0.3381547874624302, "grad_norm": 0.228515625, "learning_rate": 0.001182649830984805, "loss": 0.8778, "step": 12601 }, { "epoch": 0.33818162301416915, "grad_norm": 0.2177734375, "learning_rate": 0.0011826431211705217, "loss": 0.8617, "step": 12602 }, { "epoch": 0.3382084585659081, "grad_norm": 0.25, "learning_rate": 0.0011826364100780906, "loss": 0.9805, "step": 12603 }, { "epoch": 0.3382352941176471, "grad_norm": 0.228515625, "learning_rate": 0.0011826296977075267, "loss": 0.8865, "step": 12604 }, { "epoch": 0.338262129669386, "grad_norm": 0.2431640625, "learning_rate": 0.0011826229840588448, "loss": 0.961, "step": 12605 }, { "epoch": 0.33828896522112495, "grad_norm": 0.3125, "learning_rate": 0.0011826162691320594, "loss": 1.047, "step": 12606 }, { "epoch": 0.3383158007728639, "grad_norm": 0.30859375, "learning_rate": 0.0011826095529271852, "loss": 1.1169, "step": 12607 }, { "epoch": 0.3383426363246028, "grad_norm": 0.275390625, "learning_rate": 0.001182602835444237, "loss": 0.9638, "step": 12608 }, { "epoch": 0.33836947187634175, "grad_norm": 0.263671875, "learning_rate": 0.0011825961166832299, "loss": 0.9969, "step": 12609 }, { "epoch": 0.33839630742808074, "grad_norm": 0.2890625, "learning_rate": 0.001182589396644178, "loss": 1.1593, "step": 12610 }, { "epoch": 0.3384231429798197, "grad_norm": 0.31640625, "learning_rate": 0.0011825826753270965, "loss": 1.1621, "step": 12611 }, { "epoch": 0.3384499785315586, "grad_norm": 0.283203125, "learning_rate": 0.0011825759527320002, "loss": 0.9976, "step": 12612 }, { "epoch": 0.33847681408329755, "grad_norm": 0.296875, "learning_rate": 0.0011825692288589033, "loss": 1.1616, "step": 12613 }, { "epoch": 0.3385036496350365, "grad_norm": 0.244140625, "learning_rate": 0.001182562503707821, "loss": 0.8758, "step": 12614 }, { "epoch": 0.3385304851867754, "grad_norm": 0.25390625, "learning_rate": 0.001182555777278768, "loss": 1.0791, "step": 12615 }, { "epoch": 0.3385573207385144, "grad_norm": 0.265625, "learning_rate": 0.0011825490495717592, "loss": 1.0711, "step": 12616 }, { "epoch": 0.33858415629025335, "grad_norm": 0.263671875, "learning_rate": 0.001182542320586809, "loss": 1.0388, "step": 12617 }, { "epoch": 0.3386109918419923, "grad_norm": 0.267578125, "learning_rate": 0.0011825355903239323, "loss": 1.0109, "step": 12618 }, { "epoch": 0.3386378273937312, "grad_norm": 0.244140625, "learning_rate": 0.001182528858783144, "loss": 0.9235, "step": 12619 }, { "epoch": 0.33866466294547015, "grad_norm": 0.25390625, "learning_rate": 0.0011825221259644588, "loss": 1.0716, "step": 12620 }, { "epoch": 0.3386914984972091, "grad_norm": 0.265625, "learning_rate": 0.0011825153918678913, "loss": 1.0431, "step": 12621 }, { "epoch": 0.338718334048948, "grad_norm": 0.255859375, "learning_rate": 0.0011825086564934565, "loss": 1.0753, "step": 12622 }, { "epoch": 0.338745169600687, "grad_norm": 0.2333984375, "learning_rate": 0.001182501919841169, "loss": 0.9219, "step": 12623 }, { "epoch": 0.33877200515242595, "grad_norm": 0.240234375, "learning_rate": 0.0011824951819110437, "loss": 1.0092, "step": 12624 }, { "epoch": 0.3387988407041649, "grad_norm": 0.24609375, "learning_rate": 0.0011824884427030953, "loss": 0.9576, "step": 12625 }, { "epoch": 0.3388256762559038, "grad_norm": 0.2451171875, "learning_rate": 0.0011824817022173387, "loss": 0.9784, "step": 12626 }, { "epoch": 0.33885251180764275, "grad_norm": 0.251953125, "learning_rate": 0.0011824749604537885, "loss": 1.0507, "step": 12627 }, { "epoch": 0.3388793473593817, "grad_norm": 0.271484375, "learning_rate": 0.0011824682174124597, "loss": 1.1566, "step": 12628 }, { "epoch": 0.3389061829111207, "grad_norm": 0.255859375, "learning_rate": 0.001182461473093367, "loss": 1.0179, "step": 12629 }, { "epoch": 0.3389330184628596, "grad_norm": 0.265625, "learning_rate": 0.0011824547274965251, "loss": 1.1172, "step": 12630 }, { "epoch": 0.33895985401459855, "grad_norm": 0.265625, "learning_rate": 0.0011824479806219492, "loss": 1.1328, "step": 12631 }, { "epoch": 0.3389866895663375, "grad_norm": 0.2470703125, "learning_rate": 0.0011824412324696536, "loss": 0.9897, "step": 12632 }, { "epoch": 0.3390135251180764, "grad_norm": 0.25390625, "learning_rate": 0.001182434483039653, "loss": 1.0913, "step": 12633 }, { "epoch": 0.33904036066981535, "grad_norm": 0.251953125, "learning_rate": 0.0011824277323319629, "loss": 1.0225, "step": 12634 }, { "epoch": 0.3390671962215543, "grad_norm": 0.2490234375, "learning_rate": 0.0011824209803465975, "loss": 1.0287, "step": 12635 }, { "epoch": 0.3390940317732933, "grad_norm": 0.2734375, "learning_rate": 0.001182414227083572, "loss": 1.131, "step": 12636 }, { "epoch": 0.3391208673250322, "grad_norm": 0.24609375, "learning_rate": 0.0011824074725429008, "loss": 1.0191, "step": 12637 }, { "epoch": 0.33914770287677115, "grad_norm": 0.2451171875, "learning_rate": 0.001182400716724599, "loss": 1.015, "step": 12638 }, { "epoch": 0.3391745384285101, "grad_norm": 0.240234375, "learning_rate": 0.0011823939596286815, "loss": 0.9823, "step": 12639 }, { "epoch": 0.339201373980249, "grad_norm": 0.251953125, "learning_rate": 0.001182387201255163, "loss": 1.0082, "step": 12640 }, { "epoch": 0.33922820953198796, "grad_norm": 0.234375, "learning_rate": 0.0011823804416040582, "loss": 0.9442, "step": 12641 }, { "epoch": 0.33925504508372695, "grad_norm": 0.263671875, "learning_rate": 0.0011823736806753822, "loss": 1.1204, "step": 12642 }, { "epoch": 0.3392818806354659, "grad_norm": 0.255859375, "learning_rate": 0.0011823669184691495, "loss": 1.035, "step": 12643 }, { "epoch": 0.3393087161872048, "grad_norm": 0.2470703125, "learning_rate": 0.0011823601549853751, "loss": 1.0599, "step": 12644 }, { "epoch": 0.33933555173894375, "grad_norm": 0.236328125, "learning_rate": 0.0011823533902240741, "loss": 0.9582, "step": 12645 }, { "epoch": 0.3393623872906827, "grad_norm": 0.251953125, "learning_rate": 0.001182346624185261, "loss": 0.9907, "step": 12646 }, { "epoch": 0.3393892228424216, "grad_norm": 0.2470703125, "learning_rate": 0.0011823398568689505, "loss": 1.0179, "step": 12647 }, { "epoch": 0.33941605839416056, "grad_norm": 0.263671875, "learning_rate": 0.0011823330882751578, "loss": 1.1564, "step": 12648 }, { "epoch": 0.33944289394589955, "grad_norm": 0.232421875, "learning_rate": 0.001182326318403898, "loss": 0.8963, "step": 12649 }, { "epoch": 0.3394697294976385, "grad_norm": 0.2392578125, "learning_rate": 0.001182319547255185, "loss": 0.9913, "step": 12650 }, { "epoch": 0.3394965650493774, "grad_norm": 0.240234375, "learning_rate": 0.0011823127748290345, "loss": 0.9897, "step": 12651 }, { "epoch": 0.33952340060111635, "grad_norm": 0.2392578125, "learning_rate": 0.0011823060011254612, "loss": 0.9524, "step": 12652 }, { "epoch": 0.3395502361528553, "grad_norm": 0.2392578125, "learning_rate": 0.0011822992261444798, "loss": 1.0034, "step": 12653 }, { "epoch": 0.3395770717045942, "grad_norm": 0.283203125, "learning_rate": 0.001182292449886105, "loss": 1.1747, "step": 12654 }, { "epoch": 0.3396039072563332, "grad_norm": 0.248046875, "learning_rate": 0.001182285672350352, "loss": 0.9797, "step": 12655 }, { "epoch": 0.33963074280807215, "grad_norm": 0.2578125, "learning_rate": 0.0011822788935372357, "loss": 1.1242, "step": 12656 }, { "epoch": 0.3396575783598111, "grad_norm": 0.24609375, "learning_rate": 0.001182272113446771, "loss": 1.044, "step": 12657 }, { "epoch": 0.33968441391155, "grad_norm": 0.259765625, "learning_rate": 0.001182265332078972, "loss": 1.1146, "step": 12658 }, { "epoch": 0.33971124946328896, "grad_norm": 0.23046875, "learning_rate": 0.0011822585494338546, "loss": 0.8996, "step": 12659 }, { "epoch": 0.3397380850150279, "grad_norm": 0.2470703125, "learning_rate": 0.0011822517655114332, "loss": 1.0112, "step": 12660 }, { "epoch": 0.3397649205667668, "grad_norm": 0.265625, "learning_rate": 0.0011822449803117225, "loss": 1.093, "step": 12661 }, { "epoch": 0.3397917561185058, "grad_norm": 0.2353515625, "learning_rate": 0.0011822381938347377, "loss": 0.9411, "step": 12662 }, { "epoch": 0.33981859167024475, "grad_norm": 0.24609375, "learning_rate": 0.0011822314060804936, "loss": 1.001, "step": 12663 }, { "epoch": 0.3398454272219837, "grad_norm": 0.2392578125, "learning_rate": 0.0011822246170490052, "loss": 0.9661, "step": 12664 }, { "epoch": 0.3398722627737226, "grad_norm": 0.2412109375, "learning_rate": 0.001182217826740287, "loss": 1.0008, "step": 12665 }, { "epoch": 0.33989909832546156, "grad_norm": 0.2578125, "learning_rate": 0.0011822110351543545, "loss": 1.016, "step": 12666 }, { "epoch": 0.3399259338772005, "grad_norm": 0.26171875, "learning_rate": 0.0011822042422912223, "loss": 1.1189, "step": 12667 }, { "epoch": 0.3399527694289395, "grad_norm": 0.24609375, "learning_rate": 0.0011821974481509051, "loss": 1.0275, "step": 12668 }, { "epoch": 0.3399796049806784, "grad_norm": 0.255859375, "learning_rate": 0.001182190652733418, "loss": 1.0481, "step": 12669 }, { "epoch": 0.34000644053241735, "grad_norm": 0.259765625, "learning_rate": 0.001182183856038776, "loss": 1.1171, "step": 12670 }, { "epoch": 0.3400332760841563, "grad_norm": 0.23828125, "learning_rate": 0.0011821770580669939, "loss": 0.9662, "step": 12671 }, { "epoch": 0.3400601116358952, "grad_norm": 0.2431640625, "learning_rate": 0.0011821702588180865, "loss": 1.0257, "step": 12672 }, { "epoch": 0.34008694718763416, "grad_norm": 0.248046875, "learning_rate": 0.0011821634582920688, "loss": 1.0471, "step": 12673 }, { "epoch": 0.3401137827393731, "grad_norm": 0.2236328125, "learning_rate": 0.0011821566564889558, "loss": 0.8959, "step": 12674 }, { "epoch": 0.3401406182911121, "grad_norm": 0.25390625, "learning_rate": 0.0011821498534087625, "loss": 1.0818, "step": 12675 }, { "epoch": 0.340167453842851, "grad_norm": 0.25390625, "learning_rate": 0.0011821430490515036, "loss": 1.038, "step": 12676 }, { "epoch": 0.34019428939458995, "grad_norm": 0.255859375, "learning_rate": 0.001182136243417194, "loss": 1.0355, "step": 12677 }, { "epoch": 0.3402211249463289, "grad_norm": 0.2431640625, "learning_rate": 0.001182129436505849, "loss": 1.0042, "step": 12678 }, { "epoch": 0.3402479604980678, "grad_norm": 0.255859375, "learning_rate": 0.001182122628317483, "loss": 1.0674, "step": 12679 }, { "epoch": 0.34027479604980676, "grad_norm": 0.24609375, "learning_rate": 0.0011821158188521112, "loss": 1.0263, "step": 12680 }, { "epoch": 0.34030163160154575, "grad_norm": 0.2314453125, "learning_rate": 0.001182109008109749, "loss": 0.9722, "step": 12681 }, { "epoch": 0.3403284671532847, "grad_norm": 0.26953125, "learning_rate": 0.0011821021960904104, "loss": 1.1342, "step": 12682 }, { "epoch": 0.3403553027050236, "grad_norm": 0.259765625, "learning_rate": 0.001182095382794111, "loss": 1.0946, "step": 12683 }, { "epoch": 0.34038213825676256, "grad_norm": 0.244140625, "learning_rate": 0.0011820885682208655, "loss": 1.018, "step": 12684 }, { "epoch": 0.3404089738085015, "grad_norm": 0.2578125, "learning_rate": 0.0011820817523706894, "loss": 1.0548, "step": 12685 }, { "epoch": 0.3404358093602404, "grad_norm": 0.24609375, "learning_rate": 0.0011820749352435967, "loss": 1.0217, "step": 12686 }, { "epoch": 0.3404626449119794, "grad_norm": 0.244140625, "learning_rate": 0.001182068116839603, "loss": 1.0565, "step": 12687 }, { "epoch": 0.34048948046371835, "grad_norm": 0.26171875, "learning_rate": 0.001182061297158723, "loss": 1.0751, "step": 12688 }, { "epoch": 0.3405163160154573, "grad_norm": 0.255859375, "learning_rate": 0.0011820544762009718, "loss": 1.0986, "step": 12689 }, { "epoch": 0.3405431515671962, "grad_norm": 0.248046875, "learning_rate": 0.0011820476539663644, "loss": 1.0258, "step": 12690 }, { "epoch": 0.34056998711893516, "grad_norm": 0.234375, "learning_rate": 0.0011820408304549159, "loss": 0.9804, "step": 12691 }, { "epoch": 0.3405968226706741, "grad_norm": 0.267578125, "learning_rate": 0.0011820340056666408, "loss": 1.0901, "step": 12692 }, { "epoch": 0.34062365822241303, "grad_norm": 0.251953125, "learning_rate": 0.0011820271796015543, "loss": 1.0319, "step": 12693 }, { "epoch": 0.340650493774152, "grad_norm": 0.2421875, "learning_rate": 0.0011820203522596713, "loss": 1.005, "step": 12694 }, { "epoch": 0.34067732932589095, "grad_norm": 0.26171875, "learning_rate": 0.001182013523641007, "loss": 1.0352, "step": 12695 }, { "epoch": 0.3407041648776299, "grad_norm": 0.2490234375, "learning_rate": 0.0011820066937455762, "loss": 1.0091, "step": 12696 }, { "epoch": 0.3407310004293688, "grad_norm": 0.2578125, "learning_rate": 0.0011819998625733942, "loss": 1.0928, "step": 12697 }, { "epoch": 0.34075783598110776, "grad_norm": 0.263671875, "learning_rate": 0.0011819930301244755, "loss": 1.1311, "step": 12698 }, { "epoch": 0.3407846715328467, "grad_norm": 0.232421875, "learning_rate": 0.0011819861963988355, "loss": 0.9409, "step": 12699 }, { "epoch": 0.3408115070845857, "grad_norm": 0.244140625, "learning_rate": 0.0011819793613964888, "loss": 1.0107, "step": 12700 }, { "epoch": 0.3408383426363246, "grad_norm": 0.2265625, "learning_rate": 0.0011819725251174507, "loss": 0.908, "step": 12701 }, { "epoch": 0.34086517818806356, "grad_norm": 0.251953125, "learning_rate": 0.001181965687561736, "loss": 0.998, "step": 12702 }, { "epoch": 0.3408920137398025, "grad_norm": 0.251953125, "learning_rate": 0.00118195884872936, "loss": 0.9654, "step": 12703 }, { "epoch": 0.3409188492915414, "grad_norm": 0.236328125, "learning_rate": 0.0011819520086203373, "loss": 1.0121, "step": 12704 }, { "epoch": 0.34094568484328036, "grad_norm": 0.2470703125, "learning_rate": 0.0011819451672346831, "loss": 1.0189, "step": 12705 }, { "epoch": 0.3409725203950193, "grad_norm": 0.25, "learning_rate": 0.0011819383245724126, "loss": 1.0368, "step": 12706 }, { "epoch": 0.3409993559467583, "grad_norm": 0.2353515625, "learning_rate": 0.0011819314806335403, "loss": 1.0042, "step": 12707 }, { "epoch": 0.3410261914984972, "grad_norm": 0.236328125, "learning_rate": 0.0011819246354180819, "loss": 0.9403, "step": 12708 }, { "epoch": 0.34105302705023616, "grad_norm": 0.2392578125, "learning_rate": 0.0011819177889260519, "loss": 0.9731, "step": 12709 }, { "epoch": 0.3410798626019751, "grad_norm": 0.2373046875, "learning_rate": 0.0011819109411574655, "loss": 1.0089, "step": 12710 }, { "epoch": 0.341106698153714, "grad_norm": 0.2578125, "learning_rate": 0.0011819040921123376, "loss": 1.0474, "step": 12711 }, { "epoch": 0.34113353370545296, "grad_norm": 0.23828125, "learning_rate": 0.0011818972417906835, "loss": 1.0048, "step": 12712 }, { "epoch": 0.34116036925719195, "grad_norm": 0.2578125, "learning_rate": 0.001181890390192518, "loss": 1.0727, "step": 12713 }, { "epoch": 0.3411872048089309, "grad_norm": 0.2373046875, "learning_rate": 0.001181883537317856, "loss": 0.9148, "step": 12714 }, { "epoch": 0.3412140403606698, "grad_norm": 0.25, "learning_rate": 0.001181876683166713, "loss": 1.0425, "step": 12715 }, { "epoch": 0.34124087591240876, "grad_norm": 0.2392578125, "learning_rate": 0.0011818698277391035, "loss": 0.9573, "step": 12716 }, { "epoch": 0.3412677114641477, "grad_norm": 0.2431640625, "learning_rate": 0.0011818629710350428, "loss": 0.9694, "step": 12717 }, { "epoch": 0.34129454701588663, "grad_norm": 0.248046875, "learning_rate": 0.0011818561130545462, "loss": 1.0382, "step": 12718 }, { "epoch": 0.34132138256762556, "grad_norm": 0.251953125, "learning_rate": 0.001181849253797628, "loss": 1.0158, "step": 12719 }, { "epoch": 0.34134821811936455, "grad_norm": 0.24609375, "learning_rate": 0.0011818423932643042, "loss": 0.98, "step": 12720 }, { "epoch": 0.3413750536711035, "grad_norm": 0.267578125, "learning_rate": 0.001181835531454589, "loss": 1.1726, "step": 12721 }, { "epoch": 0.3414018892228424, "grad_norm": 0.2353515625, "learning_rate": 0.0011818286683684981, "loss": 0.9114, "step": 12722 }, { "epoch": 0.34142872477458136, "grad_norm": 0.2392578125, "learning_rate": 0.0011818218040060462, "loss": 0.9733, "step": 12723 }, { "epoch": 0.3414555603263203, "grad_norm": 0.24609375, "learning_rate": 0.0011818149383672483, "loss": 1.0364, "step": 12724 }, { "epoch": 0.34148239587805923, "grad_norm": 0.2412109375, "learning_rate": 0.0011818080714521197, "loss": 0.9107, "step": 12725 }, { "epoch": 0.3415092314297982, "grad_norm": 0.248046875, "learning_rate": 0.0011818012032606753, "loss": 1.0556, "step": 12726 }, { "epoch": 0.34153606698153716, "grad_norm": 0.2431640625, "learning_rate": 0.0011817943337929303, "loss": 1.0252, "step": 12727 }, { "epoch": 0.3415629025332761, "grad_norm": 0.2490234375, "learning_rate": 0.0011817874630489, "loss": 1.0516, "step": 12728 }, { "epoch": 0.341589738085015, "grad_norm": 0.2431640625, "learning_rate": 0.0011817805910285988, "loss": 0.9846, "step": 12729 }, { "epoch": 0.34161657363675396, "grad_norm": 0.248046875, "learning_rate": 0.001181773717732042, "loss": 0.999, "step": 12730 }, { "epoch": 0.3416434091884929, "grad_norm": 0.2421875, "learning_rate": 0.0011817668431592452, "loss": 1.0342, "step": 12731 }, { "epoch": 0.34167024474023183, "grad_norm": 0.2412109375, "learning_rate": 0.001181759967310223, "loss": 1.0259, "step": 12732 }, { "epoch": 0.3416970802919708, "grad_norm": 0.251953125, "learning_rate": 0.0011817530901849903, "loss": 1.0558, "step": 12733 }, { "epoch": 0.34172391584370976, "grad_norm": 0.248046875, "learning_rate": 0.001181746211783563, "loss": 1.0792, "step": 12734 }, { "epoch": 0.3417507513954487, "grad_norm": 0.244140625, "learning_rate": 0.0011817393321059552, "loss": 1.013, "step": 12735 }, { "epoch": 0.34177758694718763, "grad_norm": 0.248046875, "learning_rate": 0.0011817324511521827, "loss": 1.0013, "step": 12736 }, { "epoch": 0.34180442249892656, "grad_norm": 0.240234375, "learning_rate": 0.0011817255689222602, "loss": 0.9785, "step": 12737 }, { "epoch": 0.3418312580506655, "grad_norm": 0.251953125, "learning_rate": 0.001181718685416203, "loss": 1.0921, "step": 12738 }, { "epoch": 0.3418580936024045, "grad_norm": 0.2314453125, "learning_rate": 0.001181711800634026, "loss": 0.939, "step": 12739 }, { "epoch": 0.3418849291541434, "grad_norm": 0.2451171875, "learning_rate": 0.0011817049145757448, "loss": 0.9927, "step": 12740 }, { "epoch": 0.34191176470588236, "grad_norm": 0.25, "learning_rate": 0.0011816980272413738, "loss": 1.013, "step": 12741 }, { "epoch": 0.3419386002576213, "grad_norm": 0.2470703125, "learning_rate": 0.0011816911386309287, "loss": 1.0228, "step": 12742 }, { "epoch": 0.34196543580936023, "grad_norm": 0.2412109375, "learning_rate": 0.0011816842487444243, "loss": 0.9777, "step": 12743 }, { "epoch": 0.34199227136109916, "grad_norm": 0.24609375, "learning_rate": 0.001181677357581876, "loss": 0.9692, "step": 12744 }, { "epoch": 0.34201910691283816, "grad_norm": 0.2490234375, "learning_rate": 0.0011816704651432983, "loss": 1.0409, "step": 12745 }, { "epoch": 0.3420459424645771, "grad_norm": 0.2333984375, "learning_rate": 0.001181663571428707, "loss": 0.9183, "step": 12746 }, { "epoch": 0.342072778016316, "grad_norm": 0.240234375, "learning_rate": 0.0011816566764381168, "loss": 0.9575, "step": 12747 }, { "epoch": 0.34209961356805496, "grad_norm": 0.25, "learning_rate": 0.0011816497801715431, "loss": 1.051, "step": 12748 }, { "epoch": 0.3421264491197939, "grad_norm": 0.2490234375, "learning_rate": 0.001181642882629001, "loss": 0.9732, "step": 12749 }, { "epoch": 0.34215328467153283, "grad_norm": 0.2294921875, "learning_rate": 0.0011816359838105054, "loss": 0.9429, "step": 12750 }, { "epoch": 0.34218012022327177, "grad_norm": 0.2421875, "learning_rate": 0.0011816290837160716, "loss": 0.9875, "step": 12751 }, { "epoch": 0.34220695577501076, "grad_norm": 0.2412109375, "learning_rate": 0.0011816221823457147, "loss": 0.9331, "step": 12752 }, { "epoch": 0.3422337913267497, "grad_norm": 0.251953125, "learning_rate": 0.0011816152796994498, "loss": 1.0498, "step": 12753 }, { "epoch": 0.3422606268784886, "grad_norm": 0.240234375, "learning_rate": 0.001181608375777292, "loss": 0.9606, "step": 12754 }, { "epoch": 0.34228746243022756, "grad_norm": 0.248046875, "learning_rate": 0.0011816014705792568, "loss": 1.0138, "step": 12755 }, { "epoch": 0.3423142979819665, "grad_norm": 0.255859375, "learning_rate": 0.001181594564105359, "loss": 1.0737, "step": 12756 }, { "epoch": 0.34234113353370543, "grad_norm": 0.2392578125, "learning_rate": 0.0011815876563556137, "loss": 0.9663, "step": 12757 }, { "epoch": 0.3423679690854444, "grad_norm": 0.255859375, "learning_rate": 0.0011815807473300364, "loss": 1.0932, "step": 12758 }, { "epoch": 0.34239480463718336, "grad_norm": 0.25390625, "learning_rate": 0.001181573837028642, "loss": 0.9927, "step": 12759 }, { "epoch": 0.3424216401889223, "grad_norm": 0.2392578125, "learning_rate": 0.0011815669254514455, "loss": 1.0211, "step": 12760 }, { "epoch": 0.34244847574066123, "grad_norm": 0.244140625, "learning_rate": 0.0011815600125984625, "loss": 1.0179, "step": 12761 }, { "epoch": 0.34247531129240016, "grad_norm": 0.2470703125, "learning_rate": 0.0011815530984697078, "loss": 0.9924, "step": 12762 }, { "epoch": 0.3425021468441391, "grad_norm": 0.251953125, "learning_rate": 0.0011815461830651968, "loss": 1.0042, "step": 12763 }, { "epoch": 0.34252898239587803, "grad_norm": 0.2431640625, "learning_rate": 0.0011815392663849444, "loss": 1.0181, "step": 12764 }, { "epoch": 0.342555817947617, "grad_norm": 0.2353515625, "learning_rate": 0.0011815323484289661, "loss": 0.8978, "step": 12765 }, { "epoch": 0.34258265349935596, "grad_norm": 0.2490234375, "learning_rate": 0.0011815254291972768, "loss": 1.0627, "step": 12766 }, { "epoch": 0.3426094890510949, "grad_norm": 0.23828125, "learning_rate": 0.001181518508689892, "loss": 1.0408, "step": 12767 }, { "epoch": 0.34263632460283383, "grad_norm": 0.244140625, "learning_rate": 0.0011815115869068264, "loss": 1.0654, "step": 12768 }, { "epoch": 0.34266316015457277, "grad_norm": 0.244140625, "learning_rate": 0.0011815046638480955, "loss": 0.9823, "step": 12769 }, { "epoch": 0.3426899957063117, "grad_norm": 0.234375, "learning_rate": 0.0011814977395137145, "loss": 0.937, "step": 12770 }, { "epoch": 0.3427168312580507, "grad_norm": 0.24609375, "learning_rate": 0.0011814908139036987, "loss": 1.0074, "step": 12771 }, { "epoch": 0.3427436668097896, "grad_norm": 0.2412109375, "learning_rate": 0.001181483887018063, "loss": 0.9252, "step": 12772 }, { "epoch": 0.34277050236152856, "grad_norm": 0.25390625, "learning_rate": 0.0011814769588568226, "loss": 1.0761, "step": 12773 }, { "epoch": 0.3427973379132675, "grad_norm": 0.244140625, "learning_rate": 0.001181470029419993, "loss": 0.9911, "step": 12774 }, { "epoch": 0.34282417346500643, "grad_norm": 0.2578125, "learning_rate": 0.0011814630987075892, "loss": 1.0695, "step": 12775 }, { "epoch": 0.34285100901674537, "grad_norm": 0.2470703125, "learning_rate": 0.0011814561667196264, "loss": 0.9969, "step": 12776 }, { "epoch": 0.3428778445684843, "grad_norm": 0.2392578125, "learning_rate": 0.0011814492334561197, "loss": 0.9756, "step": 12777 }, { "epoch": 0.3429046801202233, "grad_norm": 0.2333984375, "learning_rate": 0.0011814422989170849, "loss": 0.9199, "step": 12778 }, { "epoch": 0.34293151567196223, "grad_norm": 0.248046875, "learning_rate": 0.0011814353631025363, "loss": 1.1245, "step": 12779 }, { "epoch": 0.34295835122370116, "grad_norm": 0.2373046875, "learning_rate": 0.0011814284260124896, "loss": 0.9929, "step": 12780 }, { "epoch": 0.3429851867754401, "grad_norm": 0.2451171875, "learning_rate": 0.0011814214876469601, "loss": 1.0328, "step": 12781 }, { "epoch": 0.34301202232717903, "grad_norm": 0.25390625, "learning_rate": 0.001181414548005963, "loss": 1.0805, "step": 12782 }, { "epoch": 0.34303885787891797, "grad_norm": 0.2421875, "learning_rate": 0.0011814076070895134, "loss": 0.9808, "step": 12783 }, { "epoch": 0.34306569343065696, "grad_norm": 0.2578125, "learning_rate": 0.0011814006648976265, "loss": 1.1077, "step": 12784 }, { "epoch": 0.3430925289823959, "grad_norm": 0.259765625, "learning_rate": 0.0011813937214303176, "loss": 1.1055, "step": 12785 }, { "epoch": 0.34311936453413483, "grad_norm": 0.2451171875, "learning_rate": 0.001181386776687602, "loss": 0.9407, "step": 12786 }, { "epoch": 0.34314620008587376, "grad_norm": 0.255859375, "learning_rate": 0.0011813798306694948, "loss": 1.026, "step": 12787 }, { "epoch": 0.3431730356376127, "grad_norm": 0.25390625, "learning_rate": 0.0011813728833760112, "loss": 1.056, "step": 12788 }, { "epoch": 0.34319987118935164, "grad_norm": 0.240234375, "learning_rate": 0.0011813659348071668, "loss": 1.0235, "step": 12789 }, { "epoch": 0.34322670674109057, "grad_norm": 0.2373046875, "learning_rate": 0.0011813589849629764, "loss": 0.9632, "step": 12790 }, { "epoch": 0.34325354229282956, "grad_norm": 0.2373046875, "learning_rate": 0.0011813520338434555, "loss": 0.9648, "step": 12791 }, { "epoch": 0.3432803778445685, "grad_norm": 0.2333984375, "learning_rate": 0.0011813450814486192, "loss": 0.9, "step": 12792 }, { "epoch": 0.34330721339630743, "grad_norm": 0.236328125, "learning_rate": 0.0011813381277784828, "loss": 0.9699, "step": 12793 }, { "epoch": 0.34333404894804637, "grad_norm": 0.25390625, "learning_rate": 0.0011813311728330615, "loss": 1.0722, "step": 12794 }, { "epoch": 0.3433608844997853, "grad_norm": 0.244140625, "learning_rate": 0.001181324216612371, "loss": 0.9735, "step": 12795 }, { "epoch": 0.34338772005152424, "grad_norm": 0.25390625, "learning_rate": 0.0011813172591164259, "loss": 1.0691, "step": 12796 }, { "epoch": 0.3434145556032632, "grad_norm": 0.244140625, "learning_rate": 0.0011813103003452418, "loss": 0.9777, "step": 12797 }, { "epoch": 0.34344139115500216, "grad_norm": 0.2490234375, "learning_rate": 0.0011813033402988337, "loss": 1.0311, "step": 12798 }, { "epoch": 0.3434682267067411, "grad_norm": 0.2412109375, "learning_rate": 0.0011812963789772175, "loss": 0.9183, "step": 12799 }, { "epoch": 0.34349506225848003, "grad_norm": 0.24609375, "learning_rate": 0.0011812894163804077, "loss": 1.0487, "step": 12800 }, { "epoch": 0.34352189781021897, "grad_norm": 0.2314453125, "learning_rate": 0.0011812824525084204, "loss": 0.8697, "step": 12801 }, { "epoch": 0.3435487333619579, "grad_norm": 0.24609375, "learning_rate": 0.00118127548736127, "loss": 1.0076, "step": 12802 }, { "epoch": 0.34357556891369684, "grad_norm": 0.251953125, "learning_rate": 0.0011812685209389724, "loss": 1.0136, "step": 12803 }, { "epoch": 0.34360240446543583, "grad_norm": 0.244140625, "learning_rate": 0.0011812615532415425, "loss": 0.9772, "step": 12804 }, { "epoch": 0.34362924001717476, "grad_norm": 0.25390625, "learning_rate": 0.001181254584268996, "loss": 1.0388, "step": 12805 }, { "epoch": 0.3436560755689137, "grad_norm": 0.2294921875, "learning_rate": 0.001181247614021348, "loss": 0.8993, "step": 12806 }, { "epoch": 0.34368291112065263, "grad_norm": 0.2451171875, "learning_rate": 0.0011812406424986134, "loss": 1.0103, "step": 12807 }, { "epoch": 0.34370974667239157, "grad_norm": 0.259765625, "learning_rate": 0.0011812336697008081, "loss": 1.1218, "step": 12808 }, { "epoch": 0.3437365822241305, "grad_norm": 0.244140625, "learning_rate": 0.001181226695627947, "loss": 0.9695, "step": 12809 }, { "epoch": 0.3437634177758695, "grad_norm": 0.25, "learning_rate": 0.0011812197202800455, "loss": 0.9991, "step": 12810 }, { "epoch": 0.34379025332760843, "grad_norm": 0.26171875, "learning_rate": 0.001181212743657119, "loss": 1.1067, "step": 12811 }, { "epoch": 0.34381708887934737, "grad_norm": 0.24609375, "learning_rate": 0.0011812057657591827, "loss": 1.0015, "step": 12812 }, { "epoch": 0.3438439244310863, "grad_norm": 0.2353515625, "learning_rate": 0.001181198786586252, "loss": 0.8929, "step": 12813 }, { "epoch": 0.34387075998282524, "grad_norm": 0.24609375, "learning_rate": 0.0011811918061383421, "loss": 1.0119, "step": 12814 }, { "epoch": 0.34389759553456417, "grad_norm": 0.259765625, "learning_rate": 0.0011811848244154685, "loss": 1.0665, "step": 12815 }, { "epoch": 0.34392443108630316, "grad_norm": 0.236328125, "learning_rate": 0.0011811778414176463, "loss": 0.9756, "step": 12816 }, { "epoch": 0.3439512666380421, "grad_norm": 0.244140625, "learning_rate": 0.0011811708571448907, "loss": 1.0175, "step": 12817 }, { "epoch": 0.34397810218978103, "grad_norm": 0.25390625, "learning_rate": 0.0011811638715972176, "loss": 1.091, "step": 12818 }, { "epoch": 0.34400493774151997, "grad_norm": 0.240234375, "learning_rate": 0.0011811568847746416, "loss": 0.9776, "step": 12819 }, { "epoch": 0.3440317732932589, "grad_norm": 0.2392578125, "learning_rate": 0.0011811498966771785, "loss": 0.9716, "step": 12820 }, { "epoch": 0.34405860884499784, "grad_norm": 0.259765625, "learning_rate": 0.0011811429073048435, "loss": 1.0384, "step": 12821 }, { "epoch": 0.3440854443967368, "grad_norm": 0.25, "learning_rate": 0.001181135916657652, "loss": 1.0036, "step": 12822 }, { "epoch": 0.34411227994847576, "grad_norm": 0.244140625, "learning_rate": 0.0011811289247356193, "loss": 1.0072, "step": 12823 }, { "epoch": 0.3441391155002147, "grad_norm": 0.2578125, "learning_rate": 0.0011811219315387607, "loss": 1.0571, "step": 12824 }, { "epoch": 0.34416595105195363, "grad_norm": 0.2578125, "learning_rate": 0.0011811149370670915, "loss": 1.0576, "step": 12825 }, { "epoch": 0.34419278660369257, "grad_norm": 0.2490234375, "learning_rate": 0.001181107941320627, "loss": 1.0319, "step": 12826 }, { "epoch": 0.3442196221554315, "grad_norm": 0.236328125, "learning_rate": 0.0011811009442993826, "loss": 0.9408, "step": 12827 }, { "epoch": 0.34424645770717044, "grad_norm": 0.2412109375, "learning_rate": 0.001181093946003374, "loss": 1.0387, "step": 12828 }, { "epoch": 0.34427329325890943, "grad_norm": 0.2353515625, "learning_rate": 0.0011810869464326161, "loss": 0.898, "step": 12829 }, { "epoch": 0.34430012881064836, "grad_norm": 0.244140625, "learning_rate": 0.0011810799455871243, "loss": 0.9663, "step": 12830 }, { "epoch": 0.3443269643623873, "grad_norm": 0.2421875, "learning_rate": 0.0011810729434669142, "loss": 1.003, "step": 12831 }, { "epoch": 0.34435379991412624, "grad_norm": 0.25390625, "learning_rate": 0.001181065940072001, "loss": 1.0425, "step": 12832 }, { "epoch": 0.34438063546586517, "grad_norm": 0.228515625, "learning_rate": 0.0011810589354023999, "loss": 0.9596, "step": 12833 }, { "epoch": 0.3444074710176041, "grad_norm": 0.25, "learning_rate": 0.0011810519294581266, "loss": 1.0767, "step": 12834 }, { "epoch": 0.34443430656934304, "grad_norm": 0.2353515625, "learning_rate": 0.0011810449222391963, "loss": 0.9324, "step": 12835 }, { "epoch": 0.34446114212108203, "grad_norm": 0.2470703125, "learning_rate": 0.001181037913745624, "loss": 1.0667, "step": 12836 }, { "epoch": 0.34448797767282097, "grad_norm": 0.240234375, "learning_rate": 0.0011810309039774261, "loss": 0.9767, "step": 12837 }, { "epoch": 0.3445148132245599, "grad_norm": 0.2314453125, "learning_rate": 0.001181023892934617, "loss": 0.9645, "step": 12838 }, { "epoch": 0.34454164877629884, "grad_norm": 0.23046875, "learning_rate": 0.0011810168806172124, "loss": 0.868, "step": 12839 }, { "epoch": 0.34456848432803777, "grad_norm": 0.2412109375, "learning_rate": 0.0011810098670252279, "loss": 1.038, "step": 12840 }, { "epoch": 0.3445953198797767, "grad_norm": 0.220703125, "learning_rate": 0.0011810028521586785, "loss": 0.8401, "step": 12841 }, { "epoch": 0.3446221554315157, "grad_norm": 0.2421875, "learning_rate": 0.0011809958360175798, "loss": 0.9619, "step": 12842 }, { "epoch": 0.34464899098325463, "grad_norm": 0.2451171875, "learning_rate": 0.0011809888186019473, "loss": 0.9934, "step": 12843 }, { "epoch": 0.34467582653499357, "grad_norm": 0.251953125, "learning_rate": 0.001180981799911796, "loss": 0.9616, "step": 12844 }, { "epoch": 0.3447026620867325, "grad_norm": 0.25390625, "learning_rate": 0.001180974779947142, "loss": 1.0993, "step": 12845 }, { "epoch": 0.34472949763847144, "grad_norm": 0.2431640625, "learning_rate": 0.0011809677587079999, "loss": 0.9431, "step": 12846 }, { "epoch": 0.3447563331902104, "grad_norm": 0.25390625, "learning_rate": 0.0011809607361943856, "loss": 1.0137, "step": 12847 }, { "epoch": 0.3447831687419493, "grad_norm": 0.251953125, "learning_rate": 0.0011809537124063143, "loss": 1.075, "step": 12848 }, { "epoch": 0.3448100042936883, "grad_norm": 0.228515625, "learning_rate": 0.0011809466873438014, "loss": 0.8976, "step": 12849 }, { "epoch": 0.34483683984542723, "grad_norm": 0.23828125, "learning_rate": 0.0011809396610068625, "loss": 1.028, "step": 12850 }, { "epoch": 0.34486367539716617, "grad_norm": 0.255859375, "learning_rate": 0.001180932633395513, "loss": 1.0053, "step": 12851 }, { "epoch": 0.3448905109489051, "grad_norm": 0.2421875, "learning_rate": 0.001180925604509768, "loss": 1.0353, "step": 12852 }, { "epoch": 0.34491734650064404, "grad_norm": 0.2314453125, "learning_rate": 0.0011809185743496431, "loss": 0.9236, "step": 12853 }, { "epoch": 0.344944182052383, "grad_norm": 0.2421875, "learning_rate": 0.001180911542915154, "loss": 1.0605, "step": 12854 }, { "epoch": 0.34497101760412197, "grad_norm": 0.244140625, "learning_rate": 0.0011809045102063158, "loss": 1.0191, "step": 12855 }, { "epoch": 0.3449978531558609, "grad_norm": 0.24609375, "learning_rate": 0.001180897476223144, "loss": 0.9896, "step": 12856 }, { "epoch": 0.34502468870759984, "grad_norm": 0.23828125, "learning_rate": 0.0011808904409656542, "loss": 0.9994, "step": 12857 }, { "epoch": 0.34505152425933877, "grad_norm": 0.25390625, "learning_rate": 0.0011808834044338612, "loss": 1.0871, "step": 12858 }, { "epoch": 0.3450783598110777, "grad_norm": 0.23828125, "learning_rate": 0.0011808763666277813, "loss": 1.0289, "step": 12859 }, { "epoch": 0.34510519536281664, "grad_norm": 0.2431640625, "learning_rate": 0.0011808693275474295, "loss": 1.0351, "step": 12860 }, { "epoch": 0.3451320309145556, "grad_norm": 0.2451171875, "learning_rate": 0.0011808622871928211, "loss": 0.9382, "step": 12861 }, { "epoch": 0.34515886646629457, "grad_norm": 0.23828125, "learning_rate": 0.001180855245563972, "loss": 0.9601, "step": 12862 }, { "epoch": 0.3451857020180335, "grad_norm": 0.263671875, "learning_rate": 0.0011808482026608971, "loss": 1.1193, "step": 12863 }, { "epoch": 0.34521253756977244, "grad_norm": 0.244140625, "learning_rate": 0.0011808411584836125, "loss": 0.9412, "step": 12864 }, { "epoch": 0.3452393731215114, "grad_norm": 0.2490234375, "learning_rate": 0.0011808341130321329, "loss": 1.0263, "step": 12865 }, { "epoch": 0.3452662086732503, "grad_norm": 0.251953125, "learning_rate": 0.0011808270663064744, "loss": 1.093, "step": 12866 }, { "epoch": 0.34529304422498924, "grad_norm": 0.251953125, "learning_rate": 0.001180820018306652, "loss": 1.0798, "step": 12867 }, { "epoch": 0.34531987977672823, "grad_norm": 0.2470703125, "learning_rate": 0.0011808129690326816, "loss": 1.0647, "step": 12868 }, { "epoch": 0.34534671532846717, "grad_norm": 0.2451171875, "learning_rate": 0.001180805918484578, "loss": 1.0028, "step": 12869 }, { "epoch": 0.3453735508802061, "grad_norm": 0.255859375, "learning_rate": 0.0011807988666623574, "loss": 1.0123, "step": 12870 }, { "epoch": 0.34540038643194504, "grad_norm": 0.23828125, "learning_rate": 0.0011807918135660351, "loss": 0.9035, "step": 12871 }, { "epoch": 0.345427221983684, "grad_norm": 0.240234375, "learning_rate": 0.0011807847591956261, "loss": 0.9687, "step": 12872 }, { "epoch": 0.3454540575354229, "grad_norm": 0.24609375, "learning_rate": 0.0011807777035511466, "loss": 1.0776, "step": 12873 }, { "epoch": 0.34548089308716184, "grad_norm": 0.2333984375, "learning_rate": 0.0011807706466326114, "loss": 0.9559, "step": 12874 }, { "epoch": 0.34550772863890084, "grad_norm": 0.2470703125, "learning_rate": 0.001180763588440036, "loss": 1.0719, "step": 12875 }, { "epoch": 0.34553456419063977, "grad_norm": 0.25, "learning_rate": 0.0011807565289734367, "loss": 1.0618, "step": 12876 }, { "epoch": 0.3455613997423787, "grad_norm": 0.232421875, "learning_rate": 0.0011807494682328282, "loss": 0.9573, "step": 12877 }, { "epoch": 0.34558823529411764, "grad_norm": 0.2451171875, "learning_rate": 0.0011807424062182262, "loss": 1.0702, "step": 12878 }, { "epoch": 0.3456150708458566, "grad_norm": 0.24609375, "learning_rate": 0.0011807353429296463, "loss": 1.056, "step": 12879 }, { "epoch": 0.3456419063975955, "grad_norm": 0.2294921875, "learning_rate": 0.001180728278367104, "loss": 0.8909, "step": 12880 }, { "epoch": 0.3456687419493345, "grad_norm": 0.2578125, "learning_rate": 0.0011807212125306144, "loss": 1.0559, "step": 12881 }, { "epoch": 0.34569557750107344, "grad_norm": 0.2333984375, "learning_rate": 0.0011807141454201937, "loss": 0.9125, "step": 12882 }, { "epoch": 0.34572241305281237, "grad_norm": 0.2578125, "learning_rate": 0.0011807070770358567, "loss": 1.1082, "step": 12883 }, { "epoch": 0.3457492486045513, "grad_norm": 0.255859375, "learning_rate": 0.0011807000073776194, "loss": 1.0217, "step": 12884 }, { "epoch": 0.34577608415629024, "grad_norm": 0.2470703125, "learning_rate": 0.001180692936445497, "loss": 0.9696, "step": 12885 }, { "epoch": 0.3458029197080292, "grad_norm": 0.2421875, "learning_rate": 0.0011806858642395052, "loss": 0.9576, "step": 12886 }, { "epoch": 0.34582975525976817, "grad_norm": 0.25390625, "learning_rate": 0.0011806787907596595, "loss": 1.0776, "step": 12887 }, { "epoch": 0.3458565908115071, "grad_norm": 0.2314453125, "learning_rate": 0.0011806717160059753, "loss": 0.9667, "step": 12888 }, { "epoch": 0.34588342636324604, "grad_norm": 0.232421875, "learning_rate": 0.0011806646399784681, "loss": 0.9245, "step": 12889 }, { "epoch": 0.345910261914985, "grad_norm": 0.2392578125, "learning_rate": 0.0011806575626771537, "loss": 0.9013, "step": 12890 }, { "epoch": 0.3459370974667239, "grad_norm": 0.24609375, "learning_rate": 0.0011806504841020474, "loss": 0.985, "step": 12891 }, { "epoch": 0.34596393301846284, "grad_norm": 0.255859375, "learning_rate": 0.0011806434042531647, "loss": 1.0088, "step": 12892 }, { "epoch": 0.3459907685702018, "grad_norm": 0.2412109375, "learning_rate": 0.0011806363231305211, "loss": 1.0048, "step": 12893 }, { "epoch": 0.34601760412194077, "grad_norm": 0.25390625, "learning_rate": 0.0011806292407341324, "loss": 1.0508, "step": 12894 }, { "epoch": 0.3460444396736797, "grad_norm": 0.25390625, "learning_rate": 0.001180622157064014, "loss": 1.1058, "step": 12895 }, { "epoch": 0.34607127522541864, "grad_norm": 0.2470703125, "learning_rate": 0.0011806150721201812, "loss": 1.05, "step": 12896 }, { "epoch": 0.3460981107771576, "grad_norm": 0.2412109375, "learning_rate": 0.00118060798590265, "loss": 1.0309, "step": 12897 }, { "epoch": 0.3461249463288965, "grad_norm": 0.2490234375, "learning_rate": 0.0011806008984114355, "loss": 0.9951, "step": 12898 }, { "epoch": 0.34615178188063545, "grad_norm": 0.2421875, "learning_rate": 0.0011805938096465538, "loss": 0.9619, "step": 12899 }, { "epoch": 0.34617861743237444, "grad_norm": 0.259765625, "learning_rate": 0.0011805867196080196, "loss": 1.0471, "step": 12900 }, { "epoch": 0.34620545298411337, "grad_norm": 0.265625, "learning_rate": 0.0011805796282958493, "loss": 1.0435, "step": 12901 }, { "epoch": 0.3462322885358523, "grad_norm": 0.24609375, "learning_rate": 0.0011805725357100579, "loss": 1.0238, "step": 12902 }, { "epoch": 0.34625912408759124, "grad_norm": 0.251953125, "learning_rate": 0.0011805654418506613, "loss": 1.0481, "step": 12903 }, { "epoch": 0.3462859596393302, "grad_norm": 0.2373046875, "learning_rate": 0.0011805583467176747, "loss": 0.9744, "step": 12904 }, { "epoch": 0.3463127951910691, "grad_norm": 0.244140625, "learning_rate": 0.0011805512503111142, "loss": 0.9796, "step": 12905 }, { "epoch": 0.34633963074280805, "grad_norm": 0.248046875, "learning_rate": 0.0011805441526309948, "loss": 1.02, "step": 12906 }, { "epoch": 0.34636646629454704, "grad_norm": 0.2373046875, "learning_rate": 0.0011805370536773325, "loss": 0.9647, "step": 12907 }, { "epoch": 0.346393301846286, "grad_norm": 0.255859375, "learning_rate": 0.0011805299534501427, "loss": 1.1307, "step": 12908 }, { "epoch": 0.3464201373980249, "grad_norm": 0.2578125, "learning_rate": 0.001180522851949441, "loss": 1.1134, "step": 12909 }, { "epoch": 0.34644697294976384, "grad_norm": 0.228515625, "learning_rate": 0.0011805157491752427, "loss": 0.9494, "step": 12910 }, { "epoch": 0.3464738085015028, "grad_norm": 0.23828125, "learning_rate": 0.0011805086451275638, "loss": 0.9386, "step": 12911 }, { "epoch": 0.3465006440532417, "grad_norm": 0.2451171875, "learning_rate": 0.0011805015398064196, "loss": 1.0011, "step": 12912 }, { "epoch": 0.3465274796049807, "grad_norm": 0.2412109375, "learning_rate": 0.0011804944332118263, "loss": 0.9543, "step": 12913 }, { "epoch": 0.34655431515671964, "grad_norm": 0.2431640625, "learning_rate": 0.0011804873253437986, "loss": 0.9954, "step": 12914 }, { "epoch": 0.3465811507084586, "grad_norm": 0.2470703125, "learning_rate": 0.0011804802162023525, "loss": 1.018, "step": 12915 }, { "epoch": 0.3466079862601975, "grad_norm": 0.25390625, "learning_rate": 0.0011804731057875036, "loss": 1.1401, "step": 12916 }, { "epoch": 0.34663482181193644, "grad_norm": 0.2451171875, "learning_rate": 0.0011804659940992674, "loss": 1.0221, "step": 12917 }, { "epoch": 0.3466616573636754, "grad_norm": 0.248046875, "learning_rate": 0.0011804588811376597, "loss": 1.0805, "step": 12918 }, { "epoch": 0.3466884929154143, "grad_norm": 0.25, "learning_rate": 0.001180451766902696, "loss": 1.0226, "step": 12919 }, { "epoch": 0.3467153284671533, "grad_norm": 0.25390625, "learning_rate": 0.0011804446513943919, "loss": 0.9972, "step": 12920 }, { "epoch": 0.34674216401889224, "grad_norm": 0.265625, "learning_rate": 0.0011804375346127628, "loss": 1.0837, "step": 12921 }, { "epoch": 0.3467689995706312, "grad_norm": 0.2353515625, "learning_rate": 0.0011804304165578248, "loss": 0.8994, "step": 12922 }, { "epoch": 0.3467958351223701, "grad_norm": 0.251953125, "learning_rate": 0.001180423297229593, "loss": 1.0221, "step": 12923 }, { "epoch": 0.34682267067410905, "grad_norm": 0.248046875, "learning_rate": 0.0011804161766280836, "loss": 0.9634, "step": 12924 }, { "epoch": 0.346849506225848, "grad_norm": 0.23828125, "learning_rate": 0.0011804090547533116, "loss": 0.9708, "step": 12925 }, { "epoch": 0.34687634177758697, "grad_norm": 0.25, "learning_rate": 0.0011804019316052929, "loss": 0.9963, "step": 12926 }, { "epoch": 0.3469031773293259, "grad_norm": 0.259765625, "learning_rate": 0.0011803948071840433, "loss": 1.0239, "step": 12927 }, { "epoch": 0.34693001288106484, "grad_norm": 0.2314453125, "learning_rate": 0.0011803876814895778, "loss": 0.9185, "step": 12928 }, { "epoch": 0.3469568484328038, "grad_norm": 0.251953125, "learning_rate": 0.001180380554521913, "loss": 1.0767, "step": 12929 }, { "epoch": 0.3469836839845427, "grad_norm": 0.2255859375, "learning_rate": 0.0011803734262810636, "loss": 0.8715, "step": 12930 }, { "epoch": 0.34701051953628165, "grad_norm": 0.2294921875, "learning_rate": 0.001180366296767046, "loss": 0.9272, "step": 12931 }, { "epoch": 0.3470373550880206, "grad_norm": 0.251953125, "learning_rate": 0.0011803591659798752, "loss": 1.0511, "step": 12932 }, { "epoch": 0.3470641906397596, "grad_norm": 0.2412109375, "learning_rate": 0.0011803520339195672, "loss": 0.9826, "step": 12933 }, { "epoch": 0.3470910261914985, "grad_norm": 0.2431640625, "learning_rate": 0.0011803449005861378, "loss": 0.9866, "step": 12934 }, { "epoch": 0.34711786174323744, "grad_norm": 0.25390625, "learning_rate": 0.0011803377659796023, "loss": 1.0831, "step": 12935 }, { "epoch": 0.3471446972949764, "grad_norm": 0.2470703125, "learning_rate": 0.0011803306300999764, "loss": 1.0175, "step": 12936 }, { "epoch": 0.3471715328467153, "grad_norm": 0.2353515625, "learning_rate": 0.001180323492947276, "loss": 0.9551, "step": 12937 }, { "epoch": 0.34719836839845425, "grad_norm": 0.248046875, "learning_rate": 0.0011803163545215164, "loss": 0.984, "step": 12938 }, { "epoch": 0.34722520395019324, "grad_norm": 0.2490234375, "learning_rate": 0.0011803092148227135, "loss": 1.0156, "step": 12939 }, { "epoch": 0.3472520395019322, "grad_norm": 0.2431640625, "learning_rate": 0.001180302073850883, "loss": 1.0354, "step": 12940 }, { "epoch": 0.3472788750536711, "grad_norm": 0.2470703125, "learning_rate": 0.0011802949316060403, "loss": 1.0471, "step": 12941 }, { "epoch": 0.34730571060541005, "grad_norm": 0.232421875, "learning_rate": 0.0011802877880882014, "loss": 0.877, "step": 12942 }, { "epoch": 0.347332546157149, "grad_norm": 0.26171875, "learning_rate": 0.0011802806432973819, "loss": 1.0556, "step": 12943 }, { "epoch": 0.3473593817088879, "grad_norm": 0.2451171875, "learning_rate": 0.0011802734972335973, "loss": 1.0317, "step": 12944 }, { "epoch": 0.3473862172606269, "grad_norm": 0.2451171875, "learning_rate": 0.0011802663498968631, "loss": 1.0074, "step": 12945 }, { "epoch": 0.34741305281236584, "grad_norm": 0.259765625, "learning_rate": 0.0011802592012871956, "loss": 1.0917, "step": 12946 }, { "epoch": 0.3474398883641048, "grad_norm": 0.240234375, "learning_rate": 0.0011802520514046099, "loss": 0.993, "step": 12947 }, { "epoch": 0.3474667239158437, "grad_norm": 0.234375, "learning_rate": 0.0011802449002491222, "loss": 0.9479, "step": 12948 }, { "epoch": 0.34749355946758265, "grad_norm": 0.228515625, "learning_rate": 0.0011802377478207476, "loss": 0.9051, "step": 12949 }, { "epoch": 0.3475203950193216, "grad_norm": 0.265625, "learning_rate": 0.0011802305941195023, "loss": 1.0955, "step": 12950 }, { "epoch": 0.3475472305710605, "grad_norm": 0.314453125, "learning_rate": 0.0011802234391454017, "loss": 1.2543, "step": 12951 }, { "epoch": 0.3475740661227995, "grad_norm": 0.30078125, "learning_rate": 0.0011802162828984616, "loss": 1.1932, "step": 12952 }, { "epoch": 0.34760090167453844, "grad_norm": 0.287109375, "learning_rate": 0.0011802091253786976, "loss": 1.0962, "step": 12953 }, { "epoch": 0.3476277372262774, "grad_norm": 0.26953125, "learning_rate": 0.0011802019665861255, "loss": 1.1008, "step": 12954 }, { "epoch": 0.3476545727780163, "grad_norm": 0.28515625, "learning_rate": 0.001180194806520761, "loss": 1.1364, "step": 12955 }, { "epoch": 0.34768140832975525, "grad_norm": 0.294921875, "learning_rate": 0.0011801876451826199, "loss": 1.0914, "step": 12956 }, { "epoch": 0.3477082438814942, "grad_norm": 0.294921875, "learning_rate": 0.0011801804825717177, "loss": 1.0956, "step": 12957 }, { "epoch": 0.3477350794332332, "grad_norm": 0.294921875, "learning_rate": 0.0011801733186880701, "loss": 1.1187, "step": 12958 }, { "epoch": 0.3477619149849721, "grad_norm": 0.2578125, "learning_rate": 0.0011801661535316929, "loss": 1.131, "step": 12959 }, { "epoch": 0.34778875053671104, "grad_norm": 0.255859375, "learning_rate": 0.001180158987102602, "loss": 1.0162, "step": 12960 }, { "epoch": 0.34781558608845, "grad_norm": 0.2734375, "learning_rate": 0.001180151819400813, "loss": 1.2345, "step": 12961 }, { "epoch": 0.3478424216401889, "grad_norm": 0.259765625, "learning_rate": 0.0011801446504263416, "loss": 1.0719, "step": 12962 }, { "epoch": 0.34786925719192785, "grad_norm": 0.28125, "learning_rate": 0.0011801374801792036, "loss": 1.236, "step": 12963 }, { "epoch": 0.3478960927436668, "grad_norm": 0.263671875, "learning_rate": 0.0011801303086594144, "loss": 1.0828, "step": 12964 }, { "epoch": 0.3479229282954058, "grad_norm": 0.26953125, "learning_rate": 0.0011801231358669901, "loss": 1.0345, "step": 12965 }, { "epoch": 0.3479497638471447, "grad_norm": 0.283203125, "learning_rate": 0.0011801159618019463, "loss": 1.21, "step": 12966 }, { "epoch": 0.34797659939888365, "grad_norm": 0.259765625, "learning_rate": 0.0011801087864642987, "loss": 1.1494, "step": 12967 }, { "epoch": 0.3480034349506226, "grad_norm": 0.25390625, "learning_rate": 0.001180101609854063, "loss": 1.1162, "step": 12968 }, { "epoch": 0.3480302705023615, "grad_norm": 0.26171875, "learning_rate": 0.0011800944319712554, "loss": 1.0975, "step": 12969 }, { "epoch": 0.34805710605410045, "grad_norm": 0.259765625, "learning_rate": 0.001180087252815891, "loss": 1.133, "step": 12970 }, { "epoch": 0.34808394160583944, "grad_norm": 0.2734375, "learning_rate": 0.001180080072387986, "loss": 1.2508, "step": 12971 }, { "epoch": 0.3481107771575784, "grad_norm": 0.2578125, "learning_rate": 0.001180072890687556, "loss": 1.1829, "step": 12972 }, { "epoch": 0.3481376127093173, "grad_norm": 0.2578125, "learning_rate": 0.0011800657077146164, "loss": 1.0313, "step": 12973 }, { "epoch": 0.34816444826105625, "grad_norm": 0.244140625, "learning_rate": 0.0011800585234691835, "loss": 1.071, "step": 12974 }, { "epoch": 0.3481912838127952, "grad_norm": 0.259765625, "learning_rate": 0.001180051337951273, "loss": 1.1253, "step": 12975 }, { "epoch": 0.3482181193645341, "grad_norm": 0.26171875, "learning_rate": 0.0011800441511609003, "loss": 1.1028, "step": 12976 }, { "epoch": 0.34824495491627305, "grad_norm": 0.279296875, "learning_rate": 0.0011800369630980815, "loss": 1.2005, "step": 12977 }, { "epoch": 0.34827179046801204, "grad_norm": 0.251953125, "learning_rate": 0.0011800297737628322, "loss": 1.0727, "step": 12978 }, { "epoch": 0.348298626019751, "grad_norm": 0.2734375, "learning_rate": 0.0011800225831551682, "loss": 1.21, "step": 12979 }, { "epoch": 0.3483254615714899, "grad_norm": 0.27734375, "learning_rate": 0.0011800153912751054, "loss": 1.2235, "step": 12980 }, { "epoch": 0.34835229712322885, "grad_norm": 0.267578125, "learning_rate": 0.0011800081981226595, "loss": 1.2345, "step": 12981 }, { "epoch": 0.3483791326749678, "grad_norm": 0.25, "learning_rate": 0.001180001003697846, "loss": 1.0551, "step": 12982 }, { "epoch": 0.3484059682267067, "grad_norm": 0.26953125, "learning_rate": 0.0011799938080006814, "loss": 1.0457, "step": 12983 }, { "epoch": 0.3484328037784457, "grad_norm": 0.26953125, "learning_rate": 0.0011799866110311806, "loss": 1.1856, "step": 12984 }, { "epoch": 0.34845963933018465, "grad_norm": 0.26953125, "learning_rate": 0.00117997941278936, "loss": 1.2097, "step": 12985 }, { "epoch": 0.3484864748819236, "grad_norm": 0.26953125, "learning_rate": 0.0011799722132752352, "loss": 1.103, "step": 12986 }, { "epoch": 0.3485133104336625, "grad_norm": 0.2421875, "learning_rate": 0.0011799650124888219, "loss": 1.0698, "step": 12987 }, { "epoch": 0.34854014598540145, "grad_norm": 0.251953125, "learning_rate": 0.0011799578104301361, "loss": 1.1427, "step": 12988 }, { "epoch": 0.3485669815371404, "grad_norm": 0.2373046875, "learning_rate": 0.0011799506070991933, "loss": 0.9612, "step": 12989 }, { "epoch": 0.3485938170888793, "grad_norm": 0.2490234375, "learning_rate": 0.0011799434024960095, "loss": 1.0627, "step": 12990 }, { "epoch": 0.3486206526406183, "grad_norm": 0.255859375, "learning_rate": 0.0011799361966206006, "loss": 1.124, "step": 12991 }, { "epoch": 0.34864748819235725, "grad_norm": 0.25390625, "learning_rate": 0.0011799289894729825, "loss": 1.1089, "step": 12992 }, { "epoch": 0.3486743237440962, "grad_norm": 0.26953125, "learning_rate": 0.0011799217810531705, "loss": 1.2281, "step": 12993 }, { "epoch": 0.3487011592958351, "grad_norm": 0.271484375, "learning_rate": 0.001179914571361181, "loss": 1.1772, "step": 12994 }, { "epoch": 0.34872799484757405, "grad_norm": 0.263671875, "learning_rate": 0.0011799073603970293, "loss": 1.1046, "step": 12995 }, { "epoch": 0.348754830399313, "grad_norm": 0.251953125, "learning_rate": 0.0011799001481607317, "loss": 1.0971, "step": 12996 }, { "epoch": 0.348781665951052, "grad_norm": 0.26953125, "learning_rate": 0.0011798929346523036, "loss": 1.1875, "step": 12997 }, { "epoch": 0.3488085015027909, "grad_norm": 0.24609375, "learning_rate": 0.001179885719871761, "loss": 1.0215, "step": 12998 }, { "epoch": 0.34883533705452985, "grad_norm": 0.255859375, "learning_rate": 0.0011798785038191198, "loss": 1.0146, "step": 12999 }, { "epoch": 0.3488621726062688, "grad_norm": 0.279296875, "learning_rate": 0.0011798712864943958, "loss": 1.2448, "step": 13000 }, { "epoch": 0.3488890081580077, "grad_norm": 0.25390625, "learning_rate": 0.0011798640678976047, "loss": 1.132, "step": 13001 }, { "epoch": 0.34891584370974665, "grad_norm": 0.255859375, "learning_rate": 0.0011798568480287624, "loss": 1.1031, "step": 13002 }, { "epoch": 0.3489426792614856, "grad_norm": 0.2470703125, "learning_rate": 0.0011798496268878849, "loss": 1.0171, "step": 13003 }, { "epoch": 0.3489695148132246, "grad_norm": 0.255859375, "learning_rate": 0.001179842404474988, "loss": 1.0593, "step": 13004 }, { "epoch": 0.3489963503649635, "grad_norm": 0.283203125, "learning_rate": 0.0011798351807900873, "loss": 1.2698, "step": 13005 }, { "epoch": 0.34902318591670245, "grad_norm": 0.2578125, "learning_rate": 0.0011798279558331987, "loss": 1.1797, "step": 13006 }, { "epoch": 0.3490500214684414, "grad_norm": 0.2412109375, "learning_rate": 0.0011798207296043384, "loss": 0.9847, "step": 13007 }, { "epoch": 0.3490768570201803, "grad_norm": 0.271484375, "learning_rate": 0.001179813502103522, "loss": 1.2154, "step": 13008 }, { "epoch": 0.34910369257191926, "grad_norm": 0.265625, "learning_rate": 0.0011798062733307652, "loss": 1.163, "step": 13009 }, { "epoch": 0.34913052812365825, "grad_norm": 0.25390625, "learning_rate": 0.001179799043286084, "loss": 1.0511, "step": 13010 }, { "epoch": 0.3491573636753972, "grad_norm": 0.255859375, "learning_rate": 0.0011797918119694944, "loss": 1.0889, "step": 13011 }, { "epoch": 0.3491841992271361, "grad_norm": 0.2470703125, "learning_rate": 0.001179784579381012, "loss": 1.1024, "step": 13012 }, { "epoch": 0.34921103477887505, "grad_norm": 0.265625, "learning_rate": 0.001179777345520653, "loss": 1.1274, "step": 13013 }, { "epoch": 0.349237870330614, "grad_norm": 0.267578125, "learning_rate": 0.001179770110388433, "loss": 1.1931, "step": 13014 }, { "epoch": 0.3492647058823529, "grad_norm": 0.2392578125, "learning_rate": 0.0011797628739843677, "loss": 1.0156, "step": 13015 }, { "epoch": 0.3492915414340919, "grad_norm": 0.263671875, "learning_rate": 0.0011797556363084738, "loss": 1.1329, "step": 13016 }, { "epoch": 0.34931837698583085, "grad_norm": 0.2578125, "learning_rate": 0.001179748397360766, "loss": 1.097, "step": 13017 }, { "epoch": 0.3493452125375698, "grad_norm": 0.255859375, "learning_rate": 0.0011797411571412612, "loss": 1.093, "step": 13018 }, { "epoch": 0.3493720480893087, "grad_norm": 0.265625, "learning_rate": 0.0011797339156499748, "loss": 1.1854, "step": 13019 }, { "epoch": 0.34939888364104765, "grad_norm": 0.251953125, "learning_rate": 0.0011797266728869225, "loss": 1.1402, "step": 13020 }, { "epoch": 0.3494257191927866, "grad_norm": 0.259765625, "learning_rate": 0.0011797194288521206, "loss": 1.1356, "step": 13021 }, { "epoch": 0.3494525547445255, "grad_norm": 0.259765625, "learning_rate": 0.0011797121835455848, "loss": 1.129, "step": 13022 }, { "epoch": 0.3494793902962645, "grad_norm": 0.2470703125, "learning_rate": 0.0011797049369673311, "loss": 1.0356, "step": 13023 }, { "epoch": 0.34950622584800345, "grad_norm": 0.248046875, "learning_rate": 0.001179697689117375, "loss": 1.1162, "step": 13024 }, { "epoch": 0.3495330613997424, "grad_norm": 0.251953125, "learning_rate": 0.0011796904399957332, "loss": 1.0397, "step": 13025 }, { "epoch": 0.3495598969514813, "grad_norm": 0.240234375, "learning_rate": 0.001179683189602421, "loss": 1.0211, "step": 13026 }, { "epoch": 0.34958673250322025, "grad_norm": 0.2451171875, "learning_rate": 0.0011796759379374543, "loss": 1.0288, "step": 13027 }, { "epoch": 0.3496135680549592, "grad_norm": 0.25390625, "learning_rate": 0.001179668685000849, "loss": 1.177, "step": 13028 }, { "epoch": 0.3496404036066982, "grad_norm": 0.2421875, "learning_rate": 0.0011796614307926212, "loss": 1.0652, "step": 13029 }, { "epoch": 0.3496672391584371, "grad_norm": 0.251953125, "learning_rate": 0.001179654175312787, "loss": 1.1544, "step": 13030 }, { "epoch": 0.34969407471017605, "grad_norm": 0.23828125, "learning_rate": 0.0011796469185613619, "loss": 1.0616, "step": 13031 }, { "epoch": 0.349720910261915, "grad_norm": 0.2470703125, "learning_rate": 0.001179639660538362, "loss": 1.0519, "step": 13032 }, { "epoch": 0.3497477458136539, "grad_norm": 0.265625, "learning_rate": 0.0011796324012438033, "loss": 1.1716, "step": 13033 }, { "epoch": 0.34977458136539286, "grad_norm": 0.2578125, "learning_rate": 0.0011796251406777016, "loss": 1.0632, "step": 13034 }, { "epoch": 0.3498014169171318, "grad_norm": 0.259765625, "learning_rate": 0.0011796178788400728, "loss": 1.1649, "step": 13035 }, { "epoch": 0.3498282524688708, "grad_norm": 0.2578125, "learning_rate": 0.001179610615730933, "loss": 1.0494, "step": 13036 }, { "epoch": 0.3498550880206097, "grad_norm": 0.248046875, "learning_rate": 0.001179603351350298, "loss": 1.0677, "step": 13037 }, { "epoch": 0.34988192357234865, "grad_norm": 0.265625, "learning_rate": 0.0011795960856981838, "loss": 1.1933, "step": 13038 }, { "epoch": 0.3499087591240876, "grad_norm": 0.26171875, "learning_rate": 0.0011795888187746061, "loss": 1.164, "step": 13039 }, { "epoch": 0.3499355946758265, "grad_norm": 0.2490234375, "learning_rate": 0.0011795815505795814, "loss": 1.1166, "step": 13040 }, { "epoch": 0.34996243022756546, "grad_norm": 0.271484375, "learning_rate": 0.0011795742811131253, "loss": 1.2157, "step": 13041 }, { "epoch": 0.34998926577930445, "grad_norm": 0.2392578125, "learning_rate": 0.0011795670103752535, "loss": 0.9944, "step": 13042 }, { "epoch": 0.3500161013310434, "grad_norm": 0.26171875, "learning_rate": 0.001179559738365982, "loss": 1.1843, "step": 13043 }, { "epoch": 0.3500429368827823, "grad_norm": 0.25390625, "learning_rate": 0.0011795524650853272, "loss": 1.0809, "step": 13044 }, { "epoch": 0.35006977243452125, "grad_norm": 0.259765625, "learning_rate": 0.001179545190533305, "loss": 1.086, "step": 13045 }, { "epoch": 0.3500966079862602, "grad_norm": 0.2421875, "learning_rate": 0.0011795379147099309, "loss": 1.0276, "step": 13046 }, { "epoch": 0.3501234435379991, "grad_norm": 0.25390625, "learning_rate": 0.001179530637615221, "loss": 1.091, "step": 13047 }, { "epoch": 0.35015027908973806, "grad_norm": 0.25390625, "learning_rate": 0.0011795233592491916, "loss": 1.0739, "step": 13048 }, { "epoch": 0.35017711464147705, "grad_norm": 0.25390625, "learning_rate": 0.0011795160796118584, "loss": 1.0803, "step": 13049 }, { "epoch": 0.350203950193216, "grad_norm": 0.271484375, "learning_rate": 0.0011795087987032373, "loss": 1.1231, "step": 13050 }, { "epoch": 0.3502307857449549, "grad_norm": 0.25390625, "learning_rate": 0.0011795015165233446, "loss": 1.1103, "step": 13051 }, { "epoch": 0.35025762129669386, "grad_norm": 0.259765625, "learning_rate": 0.001179494233072196, "loss": 1.1421, "step": 13052 }, { "epoch": 0.3502844568484328, "grad_norm": 0.259765625, "learning_rate": 0.0011794869483498075, "loss": 1.0822, "step": 13053 }, { "epoch": 0.3503112924001717, "grad_norm": 0.265625, "learning_rate": 0.001179479662356195, "loss": 1.2343, "step": 13054 }, { "epoch": 0.3503381279519107, "grad_norm": 0.2578125, "learning_rate": 0.001179472375091375, "loss": 1.1325, "step": 13055 }, { "epoch": 0.35036496350364965, "grad_norm": 0.255859375, "learning_rate": 0.0011794650865553627, "loss": 1.0525, "step": 13056 }, { "epoch": 0.3503917990553886, "grad_norm": 0.244140625, "learning_rate": 0.0011794577967481747, "loss": 1.014, "step": 13057 }, { "epoch": 0.3504186346071275, "grad_norm": 0.267578125, "learning_rate": 0.0011794505056698266, "loss": 1.1289, "step": 13058 }, { "epoch": 0.35044547015886646, "grad_norm": 0.255859375, "learning_rate": 0.0011794432133203346, "loss": 1.168, "step": 13059 }, { "epoch": 0.3504723057106054, "grad_norm": 0.2578125, "learning_rate": 0.0011794359196997148, "loss": 1.1399, "step": 13060 }, { "epoch": 0.3504991412623443, "grad_norm": 0.251953125, "learning_rate": 0.0011794286248079829, "loss": 1.0743, "step": 13061 }, { "epoch": 0.3505259768140833, "grad_norm": 0.255859375, "learning_rate": 0.001179421328645155, "loss": 0.9917, "step": 13062 }, { "epoch": 0.35055281236582225, "grad_norm": 0.2490234375, "learning_rate": 0.0011794140312112475, "loss": 1.0702, "step": 13063 }, { "epoch": 0.3505796479175612, "grad_norm": 0.2353515625, "learning_rate": 0.001179406732506276, "loss": 0.9876, "step": 13064 }, { "epoch": 0.3506064834693001, "grad_norm": 0.259765625, "learning_rate": 0.0011793994325302564, "loss": 1.1091, "step": 13065 }, { "epoch": 0.35063331902103906, "grad_norm": 0.25390625, "learning_rate": 0.001179392131283205, "loss": 1.1143, "step": 13066 }, { "epoch": 0.350660154572778, "grad_norm": 0.267578125, "learning_rate": 0.0011793848287651378, "loss": 1.1572, "step": 13067 }, { "epoch": 0.350686990124517, "grad_norm": 0.26171875, "learning_rate": 0.0011793775249760705, "loss": 1.1255, "step": 13068 }, { "epoch": 0.3507138256762559, "grad_norm": 0.251953125, "learning_rate": 0.0011793702199160195, "loss": 1.0609, "step": 13069 }, { "epoch": 0.35074066122799485, "grad_norm": 0.251953125, "learning_rate": 0.0011793629135850008, "loss": 1.0196, "step": 13070 }, { "epoch": 0.3507674967797338, "grad_norm": 0.25, "learning_rate": 0.0011793556059830302, "loss": 1.0029, "step": 13071 }, { "epoch": 0.3507943323314727, "grad_norm": 0.2470703125, "learning_rate": 0.001179348297110124, "loss": 1.0551, "step": 13072 }, { "epoch": 0.35082116788321166, "grad_norm": 0.255859375, "learning_rate": 0.0011793409869662978, "loss": 1.1228, "step": 13073 }, { "epoch": 0.3508480034349506, "grad_norm": 0.2578125, "learning_rate": 0.0011793336755515683, "loss": 1.1337, "step": 13074 }, { "epoch": 0.3508748389866896, "grad_norm": 0.248046875, "learning_rate": 0.001179326362865951, "loss": 1.0808, "step": 13075 }, { "epoch": 0.3509016745384285, "grad_norm": 0.263671875, "learning_rate": 0.001179319048909462, "loss": 1.1934, "step": 13076 }, { "epoch": 0.35092851009016746, "grad_norm": 0.24609375, "learning_rate": 0.0011793117336821176, "loss": 1.0958, "step": 13077 }, { "epoch": 0.3509553456419064, "grad_norm": 0.259765625, "learning_rate": 0.0011793044171839334, "loss": 1.117, "step": 13078 }, { "epoch": 0.3509821811936453, "grad_norm": 0.23828125, "learning_rate": 0.001179297099414926, "loss": 1.0686, "step": 13079 }, { "epoch": 0.35100901674538426, "grad_norm": 0.259765625, "learning_rate": 0.0011792897803751111, "loss": 1.127, "step": 13080 }, { "epoch": 0.35103585229712325, "grad_norm": 0.251953125, "learning_rate": 0.001179282460064505, "loss": 1.0218, "step": 13081 }, { "epoch": 0.3510626878488622, "grad_norm": 0.265625, "learning_rate": 0.0011792751384831234, "loss": 1.1825, "step": 13082 }, { "epoch": 0.3510895234006011, "grad_norm": 0.263671875, "learning_rate": 0.0011792678156309828, "loss": 1.194, "step": 13083 }, { "epoch": 0.35111635895234006, "grad_norm": 0.267578125, "learning_rate": 0.0011792604915080989, "loss": 1.1822, "step": 13084 }, { "epoch": 0.351143194504079, "grad_norm": 0.267578125, "learning_rate": 0.0011792531661144877, "loss": 1.1843, "step": 13085 }, { "epoch": 0.3511700300558179, "grad_norm": 0.2578125, "learning_rate": 0.0011792458394501658, "loss": 1.1226, "step": 13086 }, { "epoch": 0.3511968656075569, "grad_norm": 0.259765625, "learning_rate": 0.0011792385115151488, "loss": 1.0724, "step": 13087 }, { "epoch": 0.35122370115929585, "grad_norm": 0.25, "learning_rate": 0.001179231182309453, "loss": 1.07, "step": 13088 }, { "epoch": 0.3512505367110348, "grad_norm": 0.2470703125, "learning_rate": 0.0011792238518330944, "loss": 1.072, "step": 13089 }, { "epoch": 0.3512773722627737, "grad_norm": 0.236328125, "learning_rate": 0.0011792165200860889, "loss": 1.0214, "step": 13090 }, { "epoch": 0.35130420781451266, "grad_norm": 0.263671875, "learning_rate": 0.001179209187068453, "loss": 1.134, "step": 13091 }, { "epoch": 0.3513310433662516, "grad_norm": 0.265625, "learning_rate": 0.0011792018527802023, "loss": 1.1157, "step": 13092 }, { "epoch": 0.35135787891799053, "grad_norm": 0.2421875, "learning_rate": 0.0011791945172213531, "loss": 1.0369, "step": 13093 }, { "epoch": 0.3513847144697295, "grad_norm": 0.263671875, "learning_rate": 0.0011791871803919218, "loss": 1.1654, "step": 13094 }, { "epoch": 0.35141155002146846, "grad_norm": 0.2451171875, "learning_rate": 0.0011791798422919242, "loss": 1.0261, "step": 13095 }, { "epoch": 0.3514383855732074, "grad_norm": 0.255859375, "learning_rate": 0.0011791725029213765, "loss": 1.1497, "step": 13096 }, { "epoch": 0.3514652211249463, "grad_norm": 0.2421875, "learning_rate": 0.0011791651622802945, "loss": 0.9831, "step": 13097 }, { "epoch": 0.35149205667668526, "grad_norm": 0.255859375, "learning_rate": 0.0011791578203686945, "loss": 1.0758, "step": 13098 }, { "epoch": 0.3515188922284242, "grad_norm": 0.26953125, "learning_rate": 0.0011791504771865927, "loss": 1.2223, "step": 13099 }, { "epoch": 0.3515457277801632, "grad_norm": 0.234375, "learning_rate": 0.0011791431327340051, "loss": 0.9309, "step": 13100 }, { "epoch": 0.3515725633319021, "grad_norm": 0.2412109375, "learning_rate": 0.0011791357870109482, "loss": 1.0092, "step": 13101 }, { "epoch": 0.35159939888364106, "grad_norm": 0.251953125, "learning_rate": 0.0011791284400174372, "loss": 1.102, "step": 13102 }, { "epoch": 0.35162623443538, "grad_norm": 0.2392578125, "learning_rate": 0.0011791210917534893, "loss": 0.9907, "step": 13103 }, { "epoch": 0.3516530699871189, "grad_norm": 0.259765625, "learning_rate": 0.00117911374221912, "loss": 1.1179, "step": 13104 }, { "epoch": 0.35167990553885786, "grad_norm": 0.2431640625, "learning_rate": 0.0011791063914143452, "loss": 1.0579, "step": 13105 }, { "epoch": 0.3517067410905968, "grad_norm": 0.263671875, "learning_rate": 0.0011790990393391815, "loss": 1.1612, "step": 13106 }, { "epoch": 0.3517335766423358, "grad_norm": 0.2333984375, "learning_rate": 0.001179091685993645, "loss": 1.0048, "step": 13107 }, { "epoch": 0.3517604121940747, "grad_norm": 0.240234375, "learning_rate": 0.0011790843313777518, "loss": 0.9403, "step": 13108 }, { "epoch": 0.35178724774581366, "grad_norm": 0.251953125, "learning_rate": 0.0011790769754915177, "loss": 1.0615, "step": 13109 }, { "epoch": 0.3518140832975526, "grad_norm": 0.259765625, "learning_rate": 0.0011790696183349592, "loss": 1.1421, "step": 13110 }, { "epoch": 0.35184091884929153, "grad_norm": 0.255859375, "learning_rate": 0.0011790622599080923, "loss": 1.025, "step": 13111 }, { "epoch": 0.35186775440103046, "grad_norm": 0.263671875, "learning_rate": 0.0011790549002109333, "loss": 1.1823, "step": 13112 }, { "epoch": 0.35189458995276945, "grad_norm": 0.251953125, "learning_rate": 0.001179047539243498, "loss": 1.0552, "step": 13113 }, { "epoch": 0.3519214255045084, "grad_norm": 0.2578125, "learning_rate": 0.001179040177005803, "loss": 1.1485, "step": 13114 }, { "epoch": 0.3519482610562473, "grad_norm": 0.244140625, "learning_rate": 0.0011790328134978637, "loss": 1.052, "step": 13115 }, { "epoch": 0.35197509660798626, "grad_norm": 0.2451171875, "learning_rate": 0.0011790254487196972, "loss": 1.0312, "step": 13116 }, { "epoch": 0.3520019321597252, "grad_norm": 0.251953125, "learning_rate": 0.0011790180826713192, "loss": 1.0602, "step": 13117 }, { "epoch": 0.35202876771146413, "grad_norm": 0.25390625, "learning_rate": 0.0011790107153527457, "loss": 1.1388, "step": 13118 }, { "epoch": 0.35205560326320307, "grad_norm": 0.255859375, "learning_rate": 0.0011790033467639933, "loss": 1.0724, "step": 13119 }, { "epoch": 0.35208243881494206, "grad_norm": 0.25, "learning_rate": 0.0011789959769050776, "loss": 1.0768, "step": 13120 }, { "epoch": 0.352109274366681, "grad_norm": 0.244140625, "learning_rate": 0.0011789886057760152, "loss": 1.048, "step": 13121 }, { "epoch": 0.3521361099184199, "grad_norm": 0.267578125, "learning_rate": 0.0011789812333768222, "loss": 1.1686, "step": 13122 }, { "epoch": 0.35216294547015886, "grad_norm": 0.265625, "learning_rate": 0.0011789738597075146, "loss": 1.177, "step": 13123 }, { "epoch": 0.3521897810218978, "grad_norm": 0.265625, "learning_rate": 0.001178966484768109, "loss": 1.1523, "step": 13124 }, { "epoch": 0.35221661657363673, "grad_norm": 0.232421875, "learning_rate": 0.001178959108558621, "loss": 0.9439, "step": 13125 }, { "epoch": 0.3522434521253757, "grad_norm": 0.259765625, "learning_rate": 0.001178951731079067, "loss": 1.1547, "step": 13126 }, { "epoch": 0.35227028767711466, "grad_norm": 0.251953125, "learning_rate": 0.0011789443523294633, "loss": 1.1096, "step": 13127 }, { "epoch": 0.3522971232288536, "grad_norm": 0.25390625, "learning_rate": 0.001178936972309826, "loss": 1.0427, "step": 13128 }, { "epoch": 0.3523239587805925, "grad_norm": 0.267578125, "learning_rate": 0.0011789295910201712, "loss": 1.1486, "step": 13129 }, { "epoch": 0.35235079433233146, "grad_norm": 0.2412109375, "learning_rate": 0.0011789222084605153, "loss": 0.9414, "step": 13130 }, { "epoch": 0.3523776298840704, "grad_norm": 0.255859375, "learning_rate": 0.0011789148246308745, "loss": 1.0699, "step": 13131 }, { "epoch": 0.35240446543580933, "grad_norm": 0.24609375, "learning_rate": 0.0011789074395312648, "loss": 1.048, "step": 13132 }, { "epoch": 0.3524313009875483, "grad_norm": 0.2373046875, "learning_rate": 0.0011789000531617026, "loss": 0.992, "step": 13133 }, { "epoch": 0.35245813653928726, "grad_norm": 0.259765625, "learning_rate": 0.001178892665522204, "loss": 1.1968, "step": 13134 }, { "epoch": 0.3524849720910262, "grad_norm": 0.2373046875, "learning_rate": 0.001178885276612785, "loss": 1.0078, "step": 13135 }, { "epoch": 0.35251180764276513, "grad_norm": 0.255859375, "learning_rate": 0.0011788778864334623, "loss": 1.0747, "step": 13136 }, { "epoch": 0.35253864319450406, "grad_norm": 0.2431640625, "learning_rate": 0.0011788704949842516, "loss": 1.0115, "step": 13137 }, { "epoch": 0.352565478746243, "grad_norm": 0.2578125, "learning_rate": 0.0011788631022651693, "loss": 1.062, "step": 13138 }, { "epoch": 0.352592314297982, "grad_norm": 0.275390625, "learning_rate": 0.0011788557082762316, "loss": 1.2255, "step": 13139 }, { "epoch": 0.3526191498497209, "grad_norm": 0.26953125, "learning_rate": 0.001178848313017455, "loss": 1.1168, "step": 13140 }, { "epoch": 0.35264598540145986, "grad_norm": 0.255859375, "learning_rate": 0.0011788409164888555, "loss": 1.119, "step": 13141 }, { "epoch": 0.3526728209531988, "grad_norm": 0.25, "learning_rate": 0.0011788335186904493, "loss": 1.0824, "step": 13142 }, { "epoch": 0.35269965650493773, "grad_norm": 0.263671875, "learning_rate": 0.0011788261196222526, "loss": 1.1563, "step": 13143 }, { "epoch": 0.35272649205667667, "grad_norm": 0.26953125, "learning_rate": 0.0011788187192842818, "loss": 1.2082, "step": 13144 }, { "epoch": 0.35275332760841566, "grad_norm": 0.251953125, "learning_rate": 0.001178811317676553, "loss": 1.0233, "step": 13145 }, { "epoch": 0.3527801631601546, "grad_norm": 0.26171875, "learning_rate": 0.0011788039147990822, "loss": 1.134, "step": 13146 }, { "epoch": 0.3528069987118935, "grad_norm": 0.265625, "learning_rate": 0.0011787965106518862, "loss": 1.1448, "step": 13147 }, { "epoch": 0.35283383426363246, "grad_norm": 0.275390625, "learning_rate": 0.0011787891052349806, "loss": 1.1734, "step": 13148 }, { "epoch": 0.3528606698153714, "grad_norm": 0.25390625, "learning_rate": 0.0011787816985483824, "loss": 1.073, "step": 13149 }, { "epoch": 0.35288750536711033, "grad_norm": 0.25390625, "learning_rate": 0.001178774290592107, "loss": 1.125, "step": 13150 }, { "epoch": 0.35291434091884927, "grad_norm": 0.2578125, "learning_rate": 0.0011787668813661714, "loss": 1.0486, "step": 13151 }, { "epoch": 0.35294117647058826, "grad_norm": 0.2470703125, "learning_rate": 0.0011787594708705916, "loss": 1.0591, "step": 13152 }, { "epoch": 0.3529680120223272, "grad_norm": 0.283203125, "learning_rate": 0.0011787520591053835, "loss": 1.2209, "step": 13153 }, { "epoch": 0.35299484757406613, "grad_norm": 0.259765625, "learning_rate": 0.001178744646070564, "loss": 1.0957, "step": 13154 }, { "epoch": 0.35302168312580506, "grad_norm": 0.267578125, "learning_rate": 0.0011787372317661488, "loss": 1.1384, "step": 13155 }, { "epoch": 0.353048518677544, "grad_norm": 0.26171875, "learning_rate": 0.0011787298161921543, "loss": 1.1297, "step": 13156 }, { "epoch": 0.35307535422928293, "grad_norm": 0.263671875, "learning_rate": 0.001178722399348597, "loss": 1.1034, "step": 13157 }, { "epoch": 0.3531021897810219, "grad_norm": 0.2578125, "learning_rate": 0.001178714981235493, "loss": 1.1112, "step": 13158 }, { "epoch": 0.35312902533276086, "grad_norm": 0.26953125, "learning_rate": 0.0011787075618528585, "loss": 1.0138, "step": 13159 }, { "epoch": 0.3531558608844998, "grad_norm": 0.2470703125, "learning_rate": 0.00117870014120071, "loss": 1.121, "step": 13160 }, { "epoch": 0.35318269643623873, "grad_norm": 0.26171875, "learning_rate": 0.0011786927192790635, "loss": 1.0802, "step": 13161 }, { "epoch": 0.35320953198797767, "grad_norm": 0.2421875, "learning_rate": 0.0011786852960879355, "loss": 0.9878, "step": 13162 }, { "epoch": 0.3532363675397166, "grad_norm": 0.263671875, "learning_rate": 0.0011786778716273423, "loss": 1.1672, "step": 13163 }, { "epoch": 0.35326320309145554, "grad_norm": 0.2412109375, "learning_rate": 0.0011786704458973, "loss": 1.0162, "step": 13164 }, { "epoch": 0.3532900386431945, "grad_norm": 0.248046875, "learning_rate": 0.001178663018897825, "loss": 1.0439, "step": 13165 }, { "epoch": 0.35331687419493346, "grad_norm": 0.2470703125, "learning_rate": 0.0011786555906289336, "loss": 1.0814, "step": 13166 }, { "epoch": 0.3533437097466724, "grad_norm": 0.248046875, "learning_rate": 0.0011786481610906421, "loss": 0.9961, "step": 13167 }, { "epoch": 0.35337054529841133, "grad_norm": 0.265625, "learning_rate": 0.0011786407302829666, "loss": 1.1227, "step": 13168 }, { "epoch": 0.35339738085015027, "grad_norm": 0.25, "learning_rate": 0.0011786332982059238, "loss": 1.0732, "step": 13169 }, { "epoch": 0.3534242164018892, "grad_norm": 0.2470703125, "learning_rate": 0.0011786258648595297, "loss": 0.9822, "step": 13170 }, { "epoch": 0.3534510519536282, "grad_norm": 0.248046875, "learning_rate": 0.0011786184302438006, "loss": 1.0439, "step": 13171 }, { "epoch": 0.3534778875053671, "grad_norm": 0.2578125, "learning_rate": 0.001178610994358753, "loss": 1.1178, "step": 13172 }, { "epoch": 0.35350472305710606, "grad_norm": 0.255859375, "learning_rate": 0.0011786035572044031, "loss": 1.0966, "step": 13173 }, { "epoch": 0.353531558608845, "grad_norm": 0.259765625, "learning_rate": 0.0011785961187807672, "loss": 1.158, "step": 13174 }, { "epoch": 0.35355839416058393, "grad_norm": 0.24609375, "learning_rate": 0.0011785886790878616, "loss": 1.0302, "step": 13175 }, { "epoch": 0.35358522971232287, "grad_norm": 0.263671875, "learning_rate": 0.0011785812381257027, "loss": 1.1737, "step": 13176 }, { "epoch": 0.3536120652640618, "grad_norm": 0.27734375, "learning_rate": 0.0011785737958943069, "loss": 1.2267, "step": 13177 }, { "epoch": 0.3536389008158008, "grad_norm": 0.2578125, "learning_rate": 0.0011785663523936902, "loss": 1.115, "step": 13178 }, { "epoch": 0.35366573636753973, "grad_norm": 0.2431640625, "learning_rate": 0.0011785589076238692, "loss": 1.0496, "step": 13179 }, { "epoch": 0.35369257191927866, "grad_norm": 0.251953125, "learning_rate": 0.0011785514615848603, "loss": 1.1357, "step": 13180 }, { "epoch": 0.3537194074710176, "grad_norm": 0.2451171875, "learning_rate": 0.0011785440142766796, "loss": 1.0567, "step": 13181 }, { "epoch": 0.35374624302275653, "grad_norm": 0.251953125, "learning_rate": 0.0011785365656993437, "loss": 1.1337, "step": 13182 }, { "epoch": 0.35377307857449547, "grad_norm": 0.255859375, "learning_rate": 0.0011785291158528687, "loss": 1.1346, "step": 13183 }, { "epoch": 0.35379991412623446, "grad_norm": 0.2314453125, "learning_rate": 0.0011785216647372707, "loss": 0.9883, "step": 13184 }, { "epoch": 0.3538267496779734, "grad_norm": 0.26171875, "learning_rate": 0.0011785142123525668, "loss": 1.1427, "step": 13185 }, { "epoch": 0.35385358522971233, "grad_norm": 0.2431640625, "learning_rate": 0.0011785067586987728, "loss": 1.0361, "step": 13186 }, { "epoch": 0.35388042078145127, "grad_norm": 0.259765625, "learning_rate": 0.001178499303775905, "loss": 1.1526, "step": 13187 }, { "epoch": 0.3539072563331902, "grad_norm": 0.251953125, "learning_rate": 0.00117849184758398, "loss": 1.0647, "step": 13188 }, { "epoch": 0.35393409188492914, "grad_norm": 0.26171875, "learning_rate": 0.0011784843901230142, "loss": 1.1015, "step": 13189 }, { "epoch": 0.35396092743666807, "grad_norm": 0.244140625, "learning_rate": 0.0011784769313930238, "loss": 1.0142, "step": 13190 }, { "epoch": 0.35398776298840706, "grad_norm": 0.23828125, "learning_rate": 0.0011784694713940253, "loss": 1.0134, "step": 13191 }, { "epoch": 0.354014598540146, "grad_norm": 0.255859375, "learning_rate": 0.001178462010126035, "loss": 1.0856, "step": 13192 }, { "epoch": 0.35404143409188493, "grad_norm": 0.2578125, "learning_rate": 0.001178454547589069, "loss": 1.0912, "step": 13193 }, { "epoch": 0.35406826964362387, "grad_norm": 0.2578125, "learning_rate": 0.0011784470837831438, "loss": 1.1129, "step": 13194 }, { "epoch": 0.3540951051953628, "grad_norm": 0.255859375, "learning_rate": 0.0011784396187082761, "loss": 1.0894, "step": 13195 }, { "epoch": 0.35412194074710174, "grad_norm": 0.26171875, "learning_rate": 0.0011784321523644822, "loss": 1.1502, "step": 13196 }, { "epoch": 0.35414877629884073, "grad_norm": 0.2578125, "learning_rate": 0.001178424684751778, "loss": 1.1157, "step": 13197 }, { "epoch": 0.35417561185057966, "grad_norm": 0.263671875, "learning_rate": 0.0011784172158701804, "loss": 1.1201, "step": 13198 }, { "epoch": 0.3542024474023186, "grad_norm": 0.26953125, "learning_rate": 0.0011784097457197059, "loss": 1.2286, "step": 13199 }, { "epoch": 0.35422928295405753, "grad_norm": 0.265625, "learning_rate": 0.00117840227430037, "loss": 1.183, "step": 13200 }, { "epoch": 0.35425611850579647, "grad_norm": 0.2421875, "learning_rate": 0.00117839480161219, "loss": 1.0719, "step": 13201 }, { "epoch": 0.3542829540575354, "grad_norm": 0.2578125, "learning_rate": 0.001178387327655182, "loss": 1.0923, "step": 13202 }, { "epoch": 0.35430978960927434, "grad_norm": 0.259765625, "learning_rate": 0.0011783798524293621, "loss": 1.1143, "step": 13203 }, { "epoch": 0.35433662516101333, "grad_norm": 0.2470703125, "learning_rate": 0.0011783723759347473, "loss": 1.058, "step": 13204 }, { "epoch": 0.35436346071275227, "grad_norm": 0.267578125, "learning_rate": 0.0011783648981713534, "loss": 1.201, "step": 13205 }, { "epoch": 0.3543902962644912, "grad_norm": 0.25390625, "learning_rate": 0.0011783574191391974, "loss": 1.1263, "step": 13206 }, { "epoch": 0.35441713181623014, "grad_norm": 0.2470703125, "learning_rate": 0.001178349938838295, "loss": 1.0279, "step": 13207 }, { "epoch": 0.35444396736796907, "grad_norm": 0.2470703125, "learning_rate": 0.0011783424572686632, "loss": 1.0251, "step": 13208 }, { "epoch": 0.354470802919708, "grad_norm": 0.255859375, "learning_rate": 0.0011783349744303182, "loss": 1.1869, "step": 13209 }, { "epoch": 0.354497638471447, "grad_norm": 0.2490234375, "learning_rate": 0.0011783274903232764, "loss": 0.9955, "step": 13210 }, { "epoch": 0.35452447402318593, "grad_norm": 0.26953125, "learning_rate": 0.0011783200049475541, "loss": 1.1667, "step": 13211 }, { "epoch": 0.35455130957492487, "grad_norm": 0.2333984375, "learning_rate": 0.0011783125183031681, "loss": 0.9869, "step": 13212 }, { "epoch": 0.3545781451266638, "grad_norm": 0.26171875, "learning_rate": 0.0011783050303901344, "loss": 1.1773, "step": 13213 }, { "epoch": 0.35460498067840274, "grad_norm": 0.255859375, "learning_rate": 0.0011782975412084696, "loss": 1.0749, "step": 13214 }, { "epoch": 0.35463181623014167, "grad_norm": 0.263671875, "learning_rate": 0.0011782900507581902, "loss": 1.1422, "step": 13215 }, { "epoch": 0.35465865178188066, "grad_norm": 0.25, "learning_rate": 0.0011782825590393126, "loss": 1.0169, "step": 13216 }, { "epoch": 0.3546854873336196, "grad_norm": 0.24609375, "learning_rate": 0.0011782750660518531, "loss": 1.072, "step": 13217 }, { "epoch": 0.35471232288535853, "grad_norm": 0.2451171875, "learning_rate": 0.0011782675717958281, "loss": 1.0489, "step": 13218 }, { "epoch": 0.35473915843709747, "grad_norm": 0.2578125, "learning_rate": 0.0011782600762712545, "loss": 1.1364, "step": 13219 }, { "epoch": 0.3547659939888364, "grad_norm": 0.275390625, "learning_rate": 0.0011782525794781483, "loss": 1.1929, "step": 13220 }, { "epoch": 0.35479282954057534, "grad_norm": 0.2451171875, "learning_rate": 0.001178245081416526, "loss": 1.092, "step": 13221 }, { "epoch": 0.3548196650923143, "grad_norm": 0.248046875, "learning_rate": 0.0011782375820864042, "loss": 1.1387, "step": 13222 }, { "epoch": 0.35484650064405326, "grad_norm": 0.25, "learning_rate": 0.0011782300814877993, "loss": 1.0882, "step": 13223 }, { "epoch": 0.3548733361957922, "grad_norm": 0.267578125, "learning_rate": 0.0011782225796207275, "loss": 1.1482, "step": 13224 }, { "epoch": 0.35490017174753113, "grad_norm": 0.259765625, "learning_rate": 0.0011782150764852056, "loss": 1.2089, "step": 13225 }, { "epoch": 0.35492700729927007, "grad_norm": 0.25, "learning_rate": 0.00117820757208125, "loss": 1.0724, "step": 13226 }, { "epoch": 0.354953842851009, "grad_norm": 0.255859375, "learning_rate": 0.001178200066408877, "loss": 1.103, "step": 13227 }, { "epoch": 0.35498067840274794, "grad_norm": 0.25390625, "learning_rate": 0.0011781925594681031, "loss": 1.1912, "step": 13228 }, { "epoch": 0.35500751395448693, "grad_norm": 0.2470703125, "learning_rate": 0.001178185051258945, "loss": 1.0329, "step": 13229 }, { "epoch": 0.35503434950622587, "grad_norm": 0.287109375, "learning_rate": 0.0011781775417814189, "loss": 1.1669, "step": 13230 }, { "epoch": 0.3550611850579648, "grad_norm": 0.234375, "learning_rate": 0.0011781700310355416, "loss": 0.9978, "step": 13231 }, { "epoch": 0.35508802060970374, "grad_norm": 0.251953125, "learning_rate": 0.0011781625190213291, "loss": 1.0504, "step": 13232 }, { "epoch": 0.35511485616144267, "grad_norm": 0.26171875, "learning_rate": 0.0011781550057387983, "loss": 1.1563, "step": 13233 }, { "epoch": 0.3551416917131816, "grad_norm": 0.26171875, "learning_rate": 0.0011781474911879651, "loss": 1.1861, "step": 13234 }, { "epoch": 0.35516852726492054, "grad_norm": 0.24609375, "learning_rate": 0.001178139975368847, "loss": 1.0307, "step": 13235 }, { "epoch": 0.35519536281665953, "grad_norm": 0.271484375, "learning_rate": 0.0011781324582814594, "loss": 1.1917, "step": 13236 }, { "epoch": 0.35522219836839847, "grad_norm": 0.25, "learning_rate": 0.0011781249399258195, "loss": 1.0947, "step": 13237 }, { "epoch": 0.3552490339201374, "grad_norm": 0.259765625, "learning_rate": 0.0011781174203019436, "loss": 1.1444, "step": 13238 }, { "epoch": 0.35527586947187634, "grad_norm": 0.263671875, "learning_rate": 0.0011781098994098481, "loss": 1.1659, "step": 13239 }, { "epoch": 0.3553027050236153, "grad_norm": 0.255859375, "learning_rate": 0.0011781023772495496, "loss": 1.1795, "step": 13240 }, { "epoch": 0.3553295405753542, "grad_norm": 0.25390625, "learning_rate": 0.0011780948538210646, "loss": 1.095, "step": 13241 }, { "epoch": 0.3553563761270932, "grad_norm": 0.25390625, "learning_rate": 0.0011780873291244093, "loss": 1.0953, "step": 13242 }, { "epoch": 0.35538321167883213, "grad_norm": 0.248046875, "learning_rate": 0.0011780798031596007, "loss": 1.0192, "step": 13243 }, { "epoch": 0.35541004723057107, "grad_norm": 0.263671875, "learning_rate": 0.001178072275926655, "loss": 1.095, "step": 13244 }, { "epoch": 0.35543688278231, "grad_norm": 0.263671875, "learning_rate": 0.001178064747425589, "loss": 1.2217, "step": 13245 }, { "epoch": 0.35546371833404894, "grad_norm": 0.267578125, "learning_rate": 0.0011780572176564189, "loss": 1.1251, "step": 13246 }, { "epoch": 0.3554905538857879, "grad_norm": 0.248046875, "learning_rate": 0.0011780496866191613, "loss": 1.1076, "step": 13247 }, { "epoch": 0.3555173894375268, "grad_norm": 0.259765625, "learning_rate": 0.0011780421543138328, "loss": 1.1196, "step": 13248 }, { "epoch": 0.3555442249892658, "grad_norm": 0.2451171875, "learning_rate": 0.0011780346207404498, "loss": 1.053, "step": 13249 }, { "epoch": 0.35557106054100474, "grad_norm": 0.259765625, "learning_rate": 0.0011780270858990289, "loss": 1.1598, "step": 13250 }, { "epoch": 0.35559789609274367, "grad_norm": 0.2490234375, "learning_rate": 0.0011780195497895867, "loss": 1.0504, "step": 13251 }, { "epoch": 0.3556247316444826, "grad_norm": 0.263671875, "learning_rate": 0.0011780120124121397, "loss": 1.1129, "step": 13252 }, { "epoch": 0.35565156719622154, "grad_norm": 0.2412109375, "learning_rate": 0.0011780044737667041, "loss": 1.0283, "step": 13253 }, { "epoch": 0.3556784027479605, "grad_norm": 0.240234375, "learning_rate": 0.001177996933853297, "loss": 0.9869, "step": 13254 }, { "epoch": 0.35570523829969947, "grad_norm": 0.2294921875, "learning_rate": 0.0011779893926719346, "loss": 0.8977, "step": 13255 }, { "epoch": 0.3557320738514384, "grad_norm": 0.255859375, "learning_rate": 0.0011779818502226337, "loss": 1.0575, "step": 13256 }, { "epoch": 0.35575890940317734, "grad_norm": 0.251953125, "learning_rate": 0.0011779743065054105, "loss": 1.0952, "step": 13257 }, { "epoch": 0.35578574495491627, "grad_norm": 0.2392578125, "learning_rate": 0.0011779667615202816, "loss": 0.9905, "step": 13258 }, { "epoch": 0.3558125805066552, "grad_norm": 0.2451171875, "learning_rate": 0.0011779592152672638, "loss": 1.0743, "step": 13259 }, { "epoch": 0.35583941605839414, "grad_norm": 0.291015625, "learning_rate": 0.0011779516677463736, "loss": 1.036, "step": 13260 }, { "epoch": 0.3558662516101331, "grad_norm": 0.298828125, "learning_rate": 0.0011779441189576274, "loss": 0.9899, "step": 13261 }, { "epoch": 0.35589308716187207, "grad_norm": 0.27734375, "learning_rate": 0.0011779365689010418, "loss": 1.0762, "step": 13262 }, { "epoch": 0.355919922713611, "grad_norm": 0.259765625, "learning_rate": 0.0011779290175766334, "loss": 1.0198, "step": 13263 }, { "epoch": 0.35594675826534994, "grad_norm": 0.2412109375, "learning_rate": 0.001177921464984419, "loss": 0.9239, "step": 13264 }, { "epoch": 0.3559735938170889, "grad_norm": 0.267578125, "learning_rate": 0.0011779139111244147, "loss": 1.0919, "step": 13265 }, { "epoch": 0.3560004293688278, "grad_norm": 0.251953125, "learning_rate": 0.0011779063559966374, "loss": 0.9684, "step": 13266 }, { "epoch": 0.35602726492056674, "grad_norm": 0.271484375, "learning_rate": 0.0011778987996011034, "loss": 1.0391, "step": 13267 }, { "epoch": 0.35605410047230573, "grad_norm": 0.279296875, "learning_rate": 0.0011778912419378297, "loss": 1.0974, "step": 13268 }, { "epoch": 0.35608093602404467, "grad_norm": 0.27734375, "learning_rate": 0.0011778836830068323, "loss": 1.1087, "step": 13269 }, { "epoch": 0.3561077715757836, "grad_norm": 0.25390625, "learning_rate": 0.0011778761228081285, "loss": 1.0015, "step": 13270 }, { "epoch": 0.35613460712752254, "grad_norm": 0.2421875, "learning_rate": 0.0011778685613417343, "loss": 1.0081, "step": 13271 }, { "epoch": 0.3561614426792615, "grad_norm": 0.2197265625, "learning_rate": 0.0011778609986076666, "loss": 0.8329, "step": 13272 }, { "epoch": 0.3561882782310004, "grad_norm": 0.240234375, "learning_rate": 0.0011778534346059415, "loss": 0.9866, "step": 13273 }, { "epoch": 0.35621511378273935, "grad_norm": 0.255859375, "learning_rate": 0.0011778458693365765, "loss": 1.0017, "step": 13274 }, { "epoch": 0.35624194933447834, "grad_norm": 0.259765625, "learning_rate": 0.0011778383027995871, "loss": 1.0436, "step": 13275 }, { "epoch": 0.35626878488621727, "grad_norm": 0.2421875, "learning_rate": 0.0011778307349949908, "loss": 0.9542, "step": 13276 }, { "epoch": 0.3562956204379562, "grad_norm": 0.25, "learning_rate": 0.001177823165922804, "loss": 1.0118, "step": 13277 }, { "epoch": 0.35632245598969514, "grad_norm": 0.2451171875, "learning_rate": 0.001177815595583043, "loss": 0.9455, "step": 13278 }, { "epoch": 0.3563492915414341, "grad_norm": 0.2451171875, "learning_rate": 0.0011778080239757246, "loss": 1.0223, "step": 13279 }, { "epoch": 0.356376127093173, "grad_norm": 0.23828125, "learning_rate": 0.0011778004511008653, "loss": 0.9328, "step": 13280 }, { "epoch": 0.356402962644912, "grad_norm": 0.25390625, "learning_rate": 0.001177792876958482, "loss": 1.0446, "step": 13281 }, { "epoch": 0.35642979819665094, "grad_norm": 0.259765625, "learning_rate": 0.0011777853015485912, "loss": 1.0755, "step": 13282 }, { "epoch": 0.3564566337483899, "grad_norm": 0.26171875, "learning_rate": 0.0011777777248712091, "loss": 1.0256, "step": 13283 }, { "epoch": 0.3564834693001288, "grad_norm": 0.2373046875, "learning_rate": 0.0011777701469263529, "loss": 0.9459, "step": 13284 }, { "epoch": 0.35651030485186774, "grad_norm": 0.263671875, "learning_rate": 0.0011777625677140387, "loss": 1.1663, "step": 13285 }, { "epoch": 0.3565371404036067, "grad_norm": 0.2373046875, "learning_rate": 0.0011777549872342837, "loss": 1.0015, "step": 13286 }, { "epoch": 0.35656397595534567, "grad_norm": 0.251953125, "learning_rate": 0.0011777474054871042, "loss": 1.0274, "step": 13287 }, { "epoch": 0.3565908115070846, "grad_norm": 0.24609375, "learning_rate": 0.0011777398224725167, "loss": 1.0465, "step": 13288 }, { "epoch": 0.35661764705882354, "grad_norm": 0.25390625, "learning_rate": 0.0011777322381905382, "loss": 1.0779, "step": 13289 }, { "epoch": 0.3566444826105625, "grad_norm": 0.2470703125, "learning_rate": 0.001177724652641185, "loss": 0.9591, "step": 13290 }, { "epoch": 0.3566713181623014, "grad_norm": 0.2255859375, "learning_rate": 0.001177717065824474, "loss": 0.9175, "step": 13291 }, { "epoch": 0.35669815371404034, "grad_norm": 0.2451171875, "learning_rate": 0.0011777094777404215, "loss": 0.9815, "step": 13292 }, { "epoch": 0.3567249892657793, "grad_norm": 0.240234375, "learning_rate": 0.0011777018883890446, "loss": 0.9831, "step": 13293 }, { "epoch": 0.35675182481751827, "grad_norm": 0.2470703125, "learning_rate": 0.0011776942977703595, "loss": 0.9357, "step": 13294 }, { "epoch": 0.3567786603692572, "grad_norm": 0.2353515625, "learning_rate": 0.0011776867058843833, "loss": 0.9618, "step": 13295 }, { "epoch": 0.35680549592099614, "grad_norm": 0.224609375, "learning_rate": 0.0011776791127311324, "loss": 0.861, "step": 13296 }, { "epoch": 0.3568323314727351, "grad_norm": 0.248046875, "learning_rate": 0.0011776715183106233, "loss": 1.0195, "step": 13297 }, { "epoch": 0.356859167024474, "grad_norm": 0.259765625, "learning_rate": 0.001177663922622873, "loss": 1.0591, "step": 13298 }, { "epoch": 0.35688600257621295, "grad_norm": 0.265625, "learning_rate": 0.001177656325667898, "loss": 1.1806, "step": 13299 }, { "epoch": 0.35691283812795194, "grad_norm": 0.23828125, "learning_rate": 0.0011776487274457149, "loss": 0.9933, "step": 13300 }, { "epoch": 0.35693967367969087, "grad_norm": 0.25390625, "learning_rate": 0.0011776411279563405, "loss": 1.03, "step": 13301 }, { "epoch": 0.3569665092314298, "grad_norm": 0.232421875, "learning_rate": 0.0011776335271997914, "loss": 0.9435, "step": 13302 }, { "epoch": 0.35699334478316874, "grad_norm": 0.25, "learning_rate": 0.0011776259251760843, "loss": 1.1668, "step": 13303 }, { "epoch": 0.3570201803349077, "grad_norm": 0.236328125, "learning_rate": 0.001177618321885236, "loss": 0.9467, "step": 13304 }, { "epoch": 0.3570470158866466, "grad_norm": 0.2373046875, "learning_rate": 0.0011776107173272626, "loss": 0.9446, "step": 13305 }, { "epoch": 0.35707385143838555, "grad_norm": 0.251953125, "learning_rate": 0.0011776031115021816, "loss": 1.1104, "step": 13306 }, { "epoch": 0.35710068699012454, "grad_norm": 0.2412109375, "learning_rate": 0.0011775955044100092, "loss": 1.0278, "step": 13307 }, { "epoch": 0.3571275225418635, "grad_norm": 0.2421875, "learning_rate": 0.0011775878960507621, "loss": 0.9782, "step": 13308 }, { "epoch": 0.3571543580936024, "grad_norm": 0.2421875, "learning_rate": 0.0011775802864244574, "loss": 1.072, "step": 13309 }, { "epoch": 0.35718119364534134, "grad_norm": 0.25, "learning_rate": 0.001177572675531111, "loss": 1.0441, "step": 13310 }, { "epoch": 0.3572080291970803, "grad_norm": 0.232421875, "learning_rate": 0.0011775650633707403, "loss": 0.9726, "step": 13311 }, { "epoch": 0.3572348647488192, "grad_norm": 0.232421875, "learning_rate": 0.001177557449943362, "loss": 0.9265, "step": 13312 }, { "epoch": 0.3572617003005582, "grad_norm": 0.2412109375, "learning_rate": 0.0011775498352489923, "loss": 1.0358, "step": 13313 }, { "epoch": 0.35728853585229714, "grad_norm": 0.248046875, "learning_rate": 0.001177542219287648, "loss": 1.0828, "step": 13314 }, { "epoch": 0.3573153714040361, "grad_norm": 0.2421875, "learning_rate": 0.0011775346020593464, "loss": 1.0215, "step": 13315 }, { "epoch": 0.357342206955775, "grad_norm": 0.251953125, "learning_rate": 0.0011775269835641038, "loss": 1.0114, "step": 13316 }, { "epoch": 0.35736904250751395, "grad_norm": 0.244140625, "learning_rate": 0.0011775193638019366, "loss": 1.0102, "step": 13317 }, { "epoch": 0.3573958780592529, "grad_norm": 0.251953125, "learning_rate": 0.001177511742772862, "loss": 1.0734, "step": 13318 }, { "epoch": 0.3574227136109918, "grad_norm": 0.236328125, "learning_rate": 0.0011775041204768967, "loss": 0.9946, "step": 13319 }, { "epoch": 0.3574495491627308, "grad_norm": 0.25390625, "learning_rate": 0.0011774964969140569, "loss": 1.0793, "step": 13320 }, { "epoch": 0.35747638471446974, "grad_norm": 0.25390625, "learning_rate": 0.00117748887208436, "loss": 1.1581, "step": 13321 }, { "epoch": 0.3575032202662087, "grad_norm": 0.26171875, "learning_rate": 0.0011774812459878223, "loss": 1.1294, "step": 13322 }, { "epoch": 0.3575300558179476, "grad_norm": 0.251953125, "learning_rate": 0.0011774736186244605, "loss": 1.0833, "step": 13323 }, { "epoch": 0.35755689136968655, "grad_norm": 0.2392578125, "learning_rate": 0.0011774659899942916, "loss": 1.0594, "step": 13324 }, { "epoch": 0.3575837269214255, "grad_norm": 0.2421875, "learning_rate": 0.0011774583600973323, "loss": 1.024, "step": 13325 }, { "epoch": 0.3576105624731645, "grad_norm": 0.244140625, "learning_rate": 0.001177450728933599, "loss": 0.9629, "step": 13326 }, { "epoch": 0.3576373980249034, "grad_norm": 0.255859375, "learning_rate": 0.001177443096503109, "loss": 1.1487, "step": 13327 }, { "epoch": 0.35766423357664234, "grad_norm": 0.27734375, "learning_rate": 0.0011774354628058786, "loss": 1.1745, "step": 13328 }, { "epoch": 0.3576910691283813, "grad_norm": 0.23828125, "learning_rate": 0.0011774278278419246, "loss": 1.0163, "step": 13329 }, { "epoch": 0.3577179046801202, "grad_norm": 0.2314453125, "learning_rate": 0.0011774201916112637, "loss": 0.9788, "step": 13330 }, { "epoch": 0.35774474023185915, "grad_norm": 0.248046875, "learning_rate": 0.0011774125541139133, "loss": 1.0084, "step": 13331 }, { "epoch": 0.3577715757835981, "grad_norm": 0.22265625, "learning_rate": 0.0011774049153498892, "loss": 0.8978, "step": 13332 }, { "epoch": 0.3577984113353371, "grad_norm": 0.2412109375, "learning_rate": 0.0011773972753192086, "loss": 1.0385, "step": 13333 }, { "epoch": 0.357825246887076, "grad_norm": 0.2353515625, "learning_rate": 0.0011773896340218884, "loss": 0.9939, "step": 13334 }, { "epoch": 0.35785208243881494, "grad_norm": 0.267578125, "learning_rate": 0.0011773819914579452, "loss": 1.2186, "step": 13335 }, { "epoch": 0.3578789179905539, "grad_norm": 0.2275390625, "learning_rate": 0.0011773743476273957, "loss": 0.9072, "step": 13336 }, { "epoch": 0.3579057535422928, "grad_norm": 0.2578125, "learning_rate": 0.0011773667025302566, "loss": 1.0089, "step": 13337 }, { "epoch": 0.35793258909403175, "grad_norm": 0.251953125, "learning_rate": 0.0011773590561665452, "loss": 1.0949, "step": 13338 }, { "epoch": 0.35795942464577074, "grad_norm": 0.259765625, "learning_rate": 0.0011773514085362776, "loss": 1.0648, "step": 13339 }, { "epoch": 0.3579862601975097, "grad_norm": 0.24609375, "learning_rate": 0.001177343759639471, "loss": 1.0665, "step": 13340 }, { "epoch": 0.3580130957492486, "grad_norm": 0.248046875, "learning_rate": 0.001177336109476142, "loss": 1.0841, "step": 13341 }, { "epoch": 0.35803993130098755, "grad_norm": 0.2373046875, "learning_rate": 0.0011773284580463075, "loss": 0.9899, "step": 13342 }, { "epoch": 0.3580667668527265, "grad_norm": 0.2314453125, "learning_rate": 0.0011773208053499841, "loss": 0.9944, "step": 13343 }, { "epoch": 0.3580936024044654, "grad_norm": 0.240234375, "learning_rate": 0.0011773131513871887, "loss": 1.0012, "step": 13344 }, { "epoch": 0.3581204379562044, "grad_norm": 0.228515625, "learning_rate": 0.0011773054961579383, "loss": 0.9219, "step": 13345 }, { "epoch": 0.35814727350794334, "grad_norm": 0.240234375, "learning_rate": 0.0011772978396622493, "loss": 1.0296, "step": 13346 }, { "epoch": 0.3581741090596823, "grad_norm": 0.244140625, "learning_rate": 0.0011772901819001387, "loss": 0.8979, "step": 13347 }, { "epoch": 0.3582009446114212, "grad_norm": 0.2451171875, "learning_rate": 0.0011772825228716234, "loss": 1.0629, "step": 13348 }, { "epoch": 0.35822778016316015, "grad_norm": 0.2314453125, "learning_rate": 0.00117727486257672, "loss": 1.012, "step": 13349 }, { "epoch": 0.3582546157148991, "grad_norm": 0.2490234375, "learning_rate": 0.0011772672010154452, "loss": 1.0402, "step": 13350 }, { "epoch": 0.358281451266638, "grad_norm": 0.259765625, "learning_rate": 0.001177259538187816, "loss": 1.1116, "step": 13351 }, { "epoch": 0.358308286818377, "grad_norm": 0.2333984375, "learning_rate": 0.0011772518740938496, "loss": 0.9289, "step": 13352 }, { "epoch": 0.35833512237011594, "grad_norm": 0.228515625, "learning_rate": 0.0011772442087335622, "loss": 0.9301, "step": 13353 }, { "epoch": 0.3583619579218549, "grad_norm": 0.2373046875, "learning_rate": 0.0011772365421069708, "loss": 1.0834, "step": 13354 }, { "epoch": 0.3583887934735938, "grad_norm": 0.248046875, "learning_rate": 0.0011772288742140922, "loss": 1.05, "step": 13355 }, { "epoch": 0.35841562902533275, "grad_norm": 0.23828125, "learning_rate": 0.0011772212050549435, "loss": 0.9485, "step": 13356 }, { "epoch": 0.3584424645770717, "grad_norm": 0.2470703125, "learning_rate": 0.001177213534629541, "loss": 0.9825, "step": 13357 }, { "epoch": 0.3584693001288107, "grad_norm": 0.2470703125, "learning_rate": 0.001177205862937902, "loss": 1.0514, "step": 13358 }, { "epoch": 0.3584961356805496, "grad_norm": 0.234375, "learning_rate": 0.0011771981899800434, "loss": 0.9448, "step": 13359 }, { "epoch": 0.35852297123228855, "grad_norm": 0.2412109375, "learning_rate": 0.0011771905157559814, "loss": 1.0639, "step": 13360 }, { "epoch": 0.3585498067840275, "grad_norm": 0.244140625, "learning_rate": 0.0011771828402657332, "loss": 1.0305, "step": 13361 }, { "epoch": 0.3585766423357664, "grad_norm": 0.255859375, "learning_rate": 0.001177175163509316, "loss": 1.0805, "step": 13362 }, { "epoch": 0.35860347788750535, "grad_norm": 0.251953125, "learning_rate": 0.0011771674854867459, "loss": 1.015, "step": 13363 }, { "epoch": 0.3586303134392443, "grad_norm": 0.232421875, "learning_rate": 0.0011771598061980405, "loss": 0.9624, "step": 13364 }, { "epoch": 0.3586571489909833, "grad_norm": 0.2314453125, "learning_rate": 0.001177152125643216, "loss": 0.9244, "step": 13365 }, { "epoch": 0.3586839845427222, "grad_norm": 0.2353515625, "learning_rate": 0.0011771444438222897, "loss": 1.0032, "step": 13366 }, { "epoch": 0.35871082009446115, "grad_norm": 0.25, "learning_rate": 0.0011771367607352784, "loss": 1.0539, "step": 13367 }, { "epoch": 0.3587376556462001, "grad_norm": 0.2578125, "learning_rate": 0.0011771290763821986, "loss": 1.2041, "step": 13368 }, { "epoch": 0.358764491197939, "grad_norm": 0.2451171875, "learning_rate": 0.0011771213907630675, "loss": 1.0127, "step": 13369 }, { "epoch": 0.35879132674967795, "grad_norm": 0.244140625, "learning_rate": 0.0011771137038779017, "loss": 1.0333, "step": 13370 }, { "epoch": 0.35881816230141694, "grad_norm": 0.23828125, "learning_rate": 0.0011771060157267186, "loss": 0.9411, "step": 13371 }, { "epoch": 0.3588449978531559, "grad_norm": 0.255859375, "learning_rate": 0.0011770983263095344, "loss": 1.0582, "step": 13372 }, { "epoch": 0.3588718334048948, "grad_norm": 0.2470703125, "learning_rate": 0.0011770906356263662, "loss": 1.0583, "step": 13373 }, { "epoch": 0.35889866895663375, "grad_norm": 0.25390625, "learning_rate": 0.001177082943677231, "loss": 1.1399, "step": 13374 }, { "epoch": 0.3589255045083727, "grad_norm": 0.259765625, "learning_rate": 0.001177075250462146, "loss": 1.2145, "step": 13375 }, { "epoch": 0.3589523400601116, "grad_norm": 0.232421875, "learning_rate": 0.001177067555981127, "loss": 0.9845, "step": 13376 }, { "epoch": 0.35897917561185055, "grad_norm": 0.2373046875, "learning_rate": 0.001177059860234192, "loss": 0.9809, "step": 13377 }, { "epoch": 0.35900601116358954, "grad_norm": 0.255859375, "learning_rate": 0.0011770521632213573, "loss": 1.105, "step": 13378 }, { "epoch": 0.3590328467153285, "grad_norm": 0.24609375, "learning_rate": 0.0011770444649426399, "loss": 1.0188, "step": 13379 }, { "epoch": 0.3590596822670674, "grad_norm": 0.2294921875, "learning_rate": 0.001177036765398057, "loss": 0.9112, "step": 13380 }, { "epoch": 0.35908651781880635, "grad_norm": 0.234375, "learning_rate": 0.001177029064587625, "loss": 0.9866, "step": 13381 }, { "epoch": 0.3591133533705453, "grad_norm": 0.216796875, "learning_rate": 0.0011770213625113609, "loss": 0.8723, "step": 13382 }, { "epoch": 0.3591401889222842, "grad_norm": 0.26171875, "learning_rate": 0.0011770136591692816, "loss": 1.0324, "step": 13383 }, { "epoch": 0.3591670244740232, "grad_norm": 0.240234375, "learning_rate": 0.0011770059545614042, "loss": 1.0131, "step": 13384 }, { "epoch": 0.35919386002576215, "grad_norm": 0.236328125, "learning_rate": 0.0011769982486877456, "loss": 1.0205, "step": 13385 }, { "epoch": 0.3592206955775011, "grad_norm": 0.228515625, "learning_rate": 0.0011769905415483226, "loss": 0.9411, "step": 13386 }, { "epoch": 0.35924753112924, "grad_norm": 0.2421875, "learning_rate": 0.001176982833143152, "loss": 1.0635, "step": 13387 }, { "epoch": 0.35927436668097895, "grad_norm": 0.248046875, "learning_rate": 0.001176975123472251, "loss": 1.0911, "step": 13388 }, { "epoch": 0.3593012022327179, "grad_norm": 0.248046875, "learning_rate": 0.0011769674125356362, "loss": 1.0056, "step": 13389 }, { "epoch": 0.3593280377844568, "grad_norm": 0.2490234375, "learning_rate": 0.0011769597003333245, "loss": 1.0611, "step": 13390 }, { "epoch": 0.3593548733361958, "grad_norm": 0.2294921875, "learning_rate": 0.0011769519868653332, "loss": 0.9353, "step": 13391 }, { "epoch": 0.35938170888793475, "grad_norm": 0.2373046875, "learning_rate": 0.0011769442721316788, "loss": 1.0464, "step": 13392 }, { "epoch": 0.3594085444396737, "grad_norm": 0.24609375, "learning_rate": 0.0011769365561323784, "loss": 1.0506, "step": 13393 }, { "epoch": 0.3594353799914126, "grad_norm": 0.244140625, "learning_rate": 0.0011769288388674493, "loss": 1.0087, "step": 13394 }, { "epoch": 0.35946221554315155, "grad_norm": 0.24609375, "learning_rate": 0.0011769211203369076, "loss": 1.0388, "step": 13395 }, { "epoch": 0.3594890510948905, "grad_norm": 0.2578125, "learning_rate": 0.0011769134005407708, "loss": 1.0593, "step": 13396 }, { "epoch": 0.3595158866466295, "grad_norm": 0.24609375, "learning_rate": 0.001176905679479056, "loss": 1.0952, "step": 13397 }, { "epoch": 0.3595427221983684, "grad_norm": 0.2294921875, "learning_rate": 0.0011768979571517796, "loss": 0.9852, "step": 13398 }, { "epoch": 0.35956955775010735, "grad_norm": 0.25390625, "learning_rate": 0.001176890233558959, "loss": 1.2017, "step": 13399 }, { "epoch": 0.3595963933018463, "grad_norm": 0.2431640625, "learning_rate": 0.0011768825087006107, "loss": 1.0257, "step": 13400 }, { "epoch": 0.3596232288535852, "grad_norm": 0.2421875, "learning_rate": 0.001176874782576752, "loss": 1.0408, "step": 13401 }, { "epoch": 0.35965006440532415, "grad_norm": 0.2333984375, "learning_rate": 0.0011768670551873998, "loss": 0.9398, "step": 13402 }, { "epoch": 0.3596768999570631, "grad_norm": 0.248046875, "learning_rate": 0.0011768593265325709, "loss": 1.0372, "step": 13403 }, { "epoch": 0.3597037355088021, "grad_norm": 0.2421875, "learning_rate": 0.0011768515966122826, "loss": 0.9944, "step": 13404 }, { "epoch": 0.359730571060541, "grad_norm": 0.255859375, "learning_rate": 0.0011768438654265515, "loss": 1.1726, "step": 13405 }, { "epoch": 0.35975740661227995, "grad_norm": 0.224609375, "learning_rate": 0.0011768361329753945, "loss": 0.9931, "step": 13406 }, { "epoch": 0.3597842421640189, "grad_norm": 0.2412109375, "learning_rate": 0.0011768283992588287, "loss": 1.0499, "step": 13407 }, { "epoch": 0.3598110777157578, "grad_norm": 0.2275390625, "learning_rate": 0.0011768206642768712, "loss": 0.995, "step": 13408 }, { "epoch": 0.35983791326749676, "grad_norm": 0.2490234375, "learning_rate": 0.001176812928029539, "loss": 1.066, "step": 13409 }, { "epoch": 0.35986474881923575, "grad_norm": 0.216796875, "learning_rate": 0.001176805190516849, "loss": 0.8433, "step": 13410 }, { "epoch": 0.3598915843709747, "grad_norm": 0.255859375, "learning_rate": 0.0011767974517388176, "loss": 1.142, "step": 13411 }, { "epoch": 0.3599184199227136, "grad_norm": 0.232421875, "learning_rate": 0.001176789711695463, "loss": 0.9944, "step": 13412 }, { "epoch": 0.35994525547445255, "grad_norm": 0.2490234375, "learning_rate": 0.0011767819703868009, "loss": 1.0251, "step": 13413 }, { "epoch": 0.3599720910261915, "grad_norm": 0.240234375, "learning_rate": 0.001176774227812849, "loss": 0.9757, "step": 13414 }, { "epoch": 0.3599989265779304, "grad_norm": 0.228515625, "learning_rate": 0.0011767664839736243, "loss": 0.9373, "step": 13415 }, { "epoch": 0.3600257621296694, "grad_norm": 0.236328125, "learning_rate": 0.0011767587388691436, "loss": 1.0273, "step": 13416 }, { "epoch": 0.36005259768140835, "grad_norm": 0.2373046875, "learning_rate": 0.0011767509924994236, "loss": 0.9291, "step": 13417 }, { "epoch": 0.3600794332331473, "grad_norm": 0.2373046875, "learning_rate": 0.0011767432448644818, "loss": 0.9448, "step": 13418 }, { "epoch": 0.3601062687848862, "grad_norm": 0.2275390625, "learning_rate": 0.001176735495964335, "loss": 0.8717, "step": 13419 }, { "epoch": 0.36013310433662515, "grad_norm": 0.240234375, "learning_rate": 0.0011767277457990005, "loss": 0.955, "step": 13420 }, { "epoch": 0.3601599398883641, "grad_norm": 0.2373046875, "learning_rate": 0.0011767199943684946, "loss": 1.0342, "step": 13421 }, { "epoch": 0.360186775440103, "grad_norm": 0.2421875, "learning_rate": 0.0011767122416728349, "loss": 1.0409, "step": 13422 }, { "epoch": 0.360213610991842, "grad_norm": 0.244140625, "learning_rate": 0.001176704487712038, "loss": 1.0343, "step": 13423 }, { "epoch": 0.36024044654358095, "grad_norm": 0.251953125, "learning_rate": 0.0011766967324861213, "loss": 1.1292, "step": 13424 }, { "epoch": 0.3602672820953199, "grad_norm": 0.25390625, "learning_rate": 0.0011766889759951017, "loss": 1.0811, "step": 13425 }, { "epoch": 0.3602941176470588, "grad_norm": 0.2294921875, "learning_rate": 0.0011766812182389958, "loss": 0.9119, "step": 13426 }, { "epoch": 0.36032095319879776, "grad_norm": 0.2373046875, "learning_rate": 0.0011766734592178214, "loss": 1.0111, "step": 13427 }, { "epoch": 0.3603477887505367, "grad_norm": 0.248046875, "learning_rate": 0.0011766656989315948, "loss": 1.0718, "step": 13428 }, { "epoch": 0.3603746243022757, "grad_norm": 0.2392578125, "learning_rate": 0.0011766579373803335, "loss": 1.0595, "step": 13429 }, { "epoch": 0.3604014598540146, "grad_norm": 0.2314453125, "learning_rate": 0.0011766501745640542, "loss": 0.9578, "step": 13430 }, { "epoch": 0.36042829540575355, "grad_norm": 0.255859375, "learning_rate": 0.0011766424104827742, "loss": 1.1794, "step": 13431 }, { "epoch": 0.3604551309574925, "grad_norm": 0.2412109375, "learning_rate": 0.0011766346451365104, "loss": 1.0648, "step": 13432 }, { "epoch": 0.3604819665092314, "grad_norm": 0.2421875, "learning_rate": 0.0011766268785252797, "loss": 0.9909, "step": 13433 }, { "epoch": 0.36050880206097036, "grad_norm": 0.234375, "learning_rate": 0.0011766191106490993, "loss": 0.9253, "step": 13434 }, { "epoch": 0.3605356376127093, "grad_norm": 0.25, "learning_rate": 0.0011766113415079865, "loss": 1.0978, "step": 13435 }, { "epoch": 0.3605624731644483, "grad_norm": 0.2431640625, "learning_rate": 0.0011766035711019576, "loss": 1.0466, "step": 13436 }, { "epoch": 0.3605893087161872, "grad_norm": 0.23828125, "learning_rate": 0.0011765957994310304, "loss": 1.0325, "step": 13437 }, { "epoch": 0.36061614426792615, "grad_norm": 0.232421875, "learning_rate": 0.0011765880264952217, "loss": 0.9439, "step": 13438 }, { "epoch": 0.3606429798196651, "grad_norm": 0.234375, "learning_rate": 0.0011765802522945483, "loss": 0.9753, "step": 13439 }, { "epoch": 0.360669815371404, "grad_norm": 0.25, "learning_rate": 0.0011765724768290277, "loss": 1.0965, "step": 13440 }, { "epoch": 0.36069665092314296, "grad_norm": 0.2392578125, "learning_rate": 0.0011765647000986765, "loss": 0.9936, "step": 13441 }, { "epoch": 0.36072348647488195, "grad_norm": 0.25390625, "learning_rate": 0.001176556922103512, "loss": 1.0796, "step": 13442 }, { "epoch": 0.3607503220266209, "grad_norm": 0.244140625, "learning_rate": 0.0011765491428435513, "loss": 1.0942, "step": 13443 }, { "epoch": 0.3607771575783598, "grad_norm": 0.263671875, "learning_rate": 0.0011765413623188114, "loss": 1.1533, "step": 13444 }, { "epoch": 0.36080399313009875, "grad_norm": 0.259765625, "learning_rate": 0.001176533580529309, "loss": 1.128, "step": 13445 }, { "epoch": 0.3608308286818377, "grad_norm": 0.24609375, "learning_rate": 0.0011765257974750622, "loss": 1.0345, "step": 13446 }, { "epoch": 0.3608576642335766, "grad_norm": 0.2578125, "learning_rate": 0.001176518013156087, "loss": 1.1493, "step": 13447 }, { "epoch": 0.36088449978531556, "grad_norm": 0.255859375, "learning_rate": 0.001176510227572401, "loss": 1.0451, "step": 13448 }, { "epoch": 0.36091133533705455, "grad_norm": 0.2490234375, "learning_rate": 0.0011765024407240211, "loss": 1.0518, "step": 13449 }, { "epoch": 0.3609381708887935, "grad_norm": 0.376953125, "learning_rate": 0.0011764946526109646, "loss": 0.9992, "step": 13450 }, { "epoch": 0.3609650064405324, "grad_norm": 0.23828125, "learning_rate": 0.0011764868632332486, "loss": 1.0261, "step": 13451 }, { "epoch": 0.36099184199227136, "grad_norm": 0.2431640625, "learning_rate": 0.0011764790725908897, "loss": 1.014, "step": 13452 }, { "epoch": 0.3610186775440103, "grad_norm": 0.251953125, "learning_rate": 0.0011764712806839053, "loss": 1.0603, "step": 13453 }, { "epoch": 0.3610455130957492, "grad_norm": 0.2431640625, "learning_rate": 0.0011764634875123128, "loss": 1.0654, "step": 13454 }, { "epoch": 0.3610723486474882, "grad_norm": 0.2314453125, "learning_rate": 0.0011764556930761287, "loss": 0.969, "step": 13455 }, { "epoch": 0.36109918419922715, "grad_norm": 0.2314453125, "learning_rate": 0.0011764478973753706, "loss": 0.9673, "step": 13456 }, { "epoch": 0.3611260197509661, "grad_norm": 0.2490234375, "learning_rate": 0.0011764401004100552, "loss": 0.9976, "step": 13457 }, { "epoch": 0.361152855302705, "grad_norm": 0.2412109375, "learning_rate": 0.0011764323021802002, "loss": 1.0272, "step": 13458 }, { "epoch": 0.36117969085444396, "grad_norm": 0.2490234375, "learning_rate": 0.001176424502685822, "loss": 1.1111, "step": 13459 }, { "epoch": 0.3612065264061829, "grad_norm": 0.236328125, "learning_rate": 0.001176416701926938, "loss": 0.9677, "step": 13460 }, { "epoch": 0.36123336195792183, "grad_norm": 0.248046875, "learning_rate": 0.0011764088999035654, "loss": 0.9807, "step": 13461 }, { "epoch": 0.3612601975096608, "grad_norm": 0.25390625, "learning_rate": 0.0011764010966157214, "loss": 1.0839, "step": 13462 }, { "epoch": 0.36128703306139975, "grad_norm": 0.2578125, "learning_rate": 0.001176393292063423, "loss": 1.137, "step": 13463 }, { "epoch": 0.3613138686131387, "grad_norm": 0.23046875, "learning_rate": 0.001176385486246687, "loss": 0.9374, "step": 13464 }, { "epoch": 0.3613407041648776, "grad_norm": 0.259765625, "learning_rate": 0.001176377679165531, "loss": 1.0774, "step": 13465 }, { "epoch": 0.36136753971661656, "grad_norm": 0.2734375, "learning_rate": 0.001176369870819972, "loss": 1.1126, "step": 13466 }, { "epoch": 0.3613943752683555, "grad_norm": 0.2470703125, "learning_rate": 0.0011763620612100269, "loss": 1.0627, "step": 13467 }, { "epoch": 0.3614212108200945, "grad_norm": 0.2431640625, "learning_rate": 0.001176354250335713, "loss": 1.0849, "step": 13468 }, { "epoch": 0.3614480463718334, "grad_norm": 0.2275390625, "learning_rate": 0.0011763464381970476, "loss": 0.9246, "step": 13469 }, { "epoch": 0.36147488192357236, "grad_norm": 0.234375, "learning_rate": 0.0011763386247940477, "loss": 1.0063, "step": 13470 }, { "epoch": 0.3615017174753113, "grad_norm": 0.2373046875, "learning_rate": 0.0011763308101267304, "loss": 0.97, "step": 13471 }, { "epoch": 0.3615285530270502, "grad_norm": 0.236328125, "learning_rate": 0.0011763229941951128, "loss": 1.0228, "step": 13472 }, { "epoch": 0.36155538857878916, "grad_norm": 0.244140625, "learning_rate": 0.001176315176999212, "loss": 1.0685, "step": 13473 }, { "epoch": 0.3615822241305281, "grad_norm": 0.240234375, "learning_rate": 0.0011763073585390456, "loss": 0.9962, "step": 13474 }, { "epoch": 0.3616090596822671, "grad_norm": 0.2373046875, "learning_rate": 0.00117629953881463, "loss": 0.9801, "step": 13475 }, { "epoch": 0.361635895234006, "grad_norm": 0.244140625, "learning_rate": 0.001176291717825983, "loss": 1.0678, "step": 13476 }, { "epoch": 0.36166273078574496, "grad_norm": 0.2275390625, "learning_rate": 0.0011762838955731215, "loss": 0.9284, "step": 13477 }, { "epoch": 0.3616895663374839, "grad_norm": 0.234375, "learning_rate": 0.0011762760720560625, "loss": 0.9813, "step": 13478 }, { "epoch": 0.3617164018892228, "grad_norm": 0.244140625, "learning_rate": 0.0011762682472748235, "loss": 1.0096, "step": 13479 }, { "epoch": 0.36174323744096176, "grad_norm": 0.22265625, "learning_rate": 0.0011762604212294215, "loss": 0.8874, "step": 13480 }, { "epoch": 0.36177007299270075, "grad_norm": 0.2578125, "learning_rate": 0.0011762525939198738, "loss": 1.0392, "step": 13481 }, { "epoch": 0.3617969085444397, "grad_norm": 0.232421875, "learning_rate": 0.0011762447653461971, "loss": 0.9384, "step": 13482 }, { "epoch": 0.3618237440961786, "grad_norm": 0.240234375, "learning_rate": 0.0011762369355084092, "loss": 1.0537, "step": 13483 }, { "epoch": 0.36185057964791756, "grad_norm": 0.234375, "learning_rate": 0.001176229104406527, "loss": 0.9882, "step": 13484 }, { "epoch": 0.3618774151996565, "grad_norm": 0.23046875, "learning_rate": 0.0011762212720405675, "loss": 0.9186, "step": 13485 }, { "epoch": 0.36190425075139543, "grad_norm": 0.2314453125, "learning_rate": 0.0011762134384105483, "loss": 0.9537, "step": 13486 }, { "epoch": 0.3619310863031344, "grad_norm": 0.232421875, "learning_rate": 0.0011762056035164862, "loss": 0.9357, "step": 13487 }, { "epoch": 0.36195792185487335, "grad_norm": 0.2255859375, "learning_rate": 0.0011761977673583983, "loss": 0.8973, "step": 13488 }, { "epoch": 0.3619847574066123, "grad_norm": 0.255859375, "learning_rate": 0.0011761899299363022, "loss": 1.0397, "step": 13489 }, { "epoch": 0.3620115929583512, "grad_norm": 0.2373046875, "learning_rate": 0.001176182091250215, "loss": 0.9409, "step": 13490 }, { "epoch": 0.36203842851009016, "grad_norm": 0.244140625, "learning_rate": 0.0011761742513001537, "loss": 0.9404, "step": 13491 }, { "epoch": 0.3620652640618291, "grad_norm": 0.2412109375, "learning_rate": 0.0011761664100861358, "loss": 0.9403, "step": 13492 }, { "epoch": 0.36209209961356803, "grad_norm": 0.265625, "learning_rate": 0.0011761585676081782, "loss": 0.9372, "step": 13493 }, { "epoch": 0.362118935165307, "grad_norm": 0.255859375, "learning_rate": 0.0011761507238662982, "loss": 1.0221, "step": 13494 }, { "epoch": 0.36214577071704596, "grad_norm": 0.26171875, "learning_rate": 0.0011761428788605128, "loss": 1.1584, "step": 13495 }, { "epoch": 0.3621726062687849, "grad_norm": 0.2421875, "learning_rate": 0.0011761350325908398, "loss": 1.0515, "step": 13496 }, { "epoch": 0.3621994418205238, "grad_norm": 0.25390625, "learning_rate": 0.0011761271850572958, "loss": 1.01, "step": 13497 }, { "epoch": 0.36222627737226276, "grad_norm": 0.255859375, "learning_rate": 0.0011761193362598984, "loss": 1.0976, "step": 13498 }, { "epoch": 0.3622531129240017, "grad_norm": 0.255859375, "learning_rate": 0.0011761114861986645, "loss": 1.0986, "step": 13499 }, { "epoch": 0.3622799484757407, "grad_norm": 0.2578125, "learning_rate": 0.0011761036348736115, "loss": 1.0891, "step": 13500 }, { "epoch": 0.3623067840274796, "grad_norm": 0.23828125, "learning_rate": 0.0011760957822847568, "loss": 0.9937, "step": 13501 }, { "epoch": 0.36233361957921856, "grad_norm": 0.25, "learning_rate": 0.0011760879284321174, "loss": 1.031, "step": 13502 }, { "epoch": 0.3623604551309575, "grad_norm": 0.236328125, "learning_rate": 0.0011760800733157104, "loss": 0.89, "step": 13503 }, { "epoch": 0.36238729068269643, "grad_norm": 0.23046875, "learning_rate": 0.0011760722169355533, "loss": 0.992, "step": 13504 }, { "epoch": 0.36241412623443536, "grad_norm": 0.2412109375, "learning_rate": 0.0011760643592916634, "loss": 1.0053, "step": 13505 }, { "epoch": 0.3624409617861743, "grad_norm": 0.2470703125, "learning_rate": 0.0011760565003840577, "loss": 1.0146, "step": 13506 }, { "epoch": 0.3624677973379133, "grad_norm": 0.2333984375, "learning_rate": 0.0011760486402127533, "loss": 0.9225, "step": 13507 }, { "epoch": 0.3624946328896522, "grad_norm": 0.2255859375, "learning_rate": 0.0011760407787777677, "loss": 0.8901, "step": 13508 }, { "epoch": 0.36252146844139116, "grad_norm": 0.2314453125, "learning_rate": 0.0011760329160791184, "loss": 0.9076, "step": 13509 }, { "epoch": 0.3625483039931301, "grad_norm": 0.25, "learning_rate": 0.0011760250521168222, "loss": 0.9847, "step": 13510 }, { "epoch": 0.36257513954486903, "grad_norm": 0.228515625, "learning_rate": 0.0011760171868908965, "loss": 0.9259, "step": 13511 }, { "epoch": 0.36260197509660796, "grad_norm": 0.232421875, "learning_rate": 0.0011760093204013584, "loss": 0.9628, "step": 13512 }, { "epoch": 0.36262881064834696, "grad_norm": 0.2373046875, "learning_rate": 0.0011760014526482256, "loss": 0.9931, "step": 13513 }, { "epoch": 0.3626556462000859, "grad_norm": 0.25, "learning_rate": 0.0011759935836315148, "loss": 1.0714, "step": 13514 }, { "epoch": 0.3626824817518248, "grad_norm": 0.236328125, "learning_rate": 0.0011759857133512436, "loss": 0.9871, "step": 13515 }, { "epoch": 0.36270931730356376, "grad_norm": 0.25, "learning_rate": 0.0011759778418074295, "loss": 1.0749, "step": 13516 }, { "epoch": 0.3627361528553027, "grad_norm": 0.26171875, "learning_rate": 0.001175969969000089, "loss": 1.1324, "step": 13517 }, { "epoch": 0.36276298840704163, "grad_norm": 0.2412109375, "learning_rate": 0.00117596209492924, "loss": 0.9924, "step": 13518 }, { "epoch": 0.36278982395878057, "grad_norm": 0.2421875, "learning_rate": 0.0011759542195948999, "loss": 0.9545, "step": 13519 }, { "epoch": 0.36281665951051956, "grad_norm": 0.244140625, "learning_rate": 0.0011759463429970854, "loss": 1.0321, "step": 13520 }, { "epoch": 0.3628434950622585, "grad_norm": 0.2490234375, "learning_rate": 0.001175938465135814, "loss": 1.0115, "step": 13521 }, { "epoch": 0.3628703306139974, "grad_norm": 0.248046875, "learning_rate": 0.0011759305860111033, "loss": 1.0044, "step": 13522 }, { "epoch": 0.36289716616573636, "grad_norm": 0.232421875, "learning_rate": 0.0011759227056229702, "loss": 0.9665, "step": 13523 }, { "epoch": 0.3629240017174753, "grad_norm": 0.23828125, "learning_rate": 0.0011759148239714322, "loss": 0.9975, "step": 13524 }, { "epoch": 0.36295083726921423, "grad_norm": 0.263671875, "learning_rate": 0.0011759069410565066, "loss": 1.065, "step": 13525 }, { "epoch": 0.3629776728209532, "grad_norm": 0.251953125, "learning_rate": 0.0011758990568782105, "loss": 1.092, "step": 13526 }, { "epoch": 0.36300450837269216, "grad_norm": 0.2578125, "learning_rate": 0.0011758911714365612, "loss": 1.2389, "step": 13527 }, { "epoch": 0.3630313439244311, "grad_norm": 0.2451171875, "learning_rate": 0.0011758832847315762, "loss": 1.0434, "step": 13528 }, { "epoch": 0.36305817947617003, "grad_norm": 0.259765625, "learning_rate": 0.0011758753967632727, "loss": 1.1639, "step": 13529 }, { "epoch": 0.36308501502790896, "grad_norm": 0.25, "learning_rate": 0.0011758675075316679, "loss": 0.9755, "step": 13530 }, { "epoch": 0.3631118505796479, "grad_norm": 0.2314453125, "learning_rate": 0.0011758596170367793, "loss": 0.9465, "step": 13531 }, { "epoch": 0.36313868613138683, "grad_norm": 0.259765625, "learning_rate": 0.0011758517252786242, "loss": 1.0763, "step": 13532 }, { "epoch": 0.3631655216831258, "grad_norm": 0.22265625, "learning_rate": 0.0011758438322572197, "loss": 0.8881, "step": 13533 }, { "epoch": 0.36319235723486476, "grad_norm": 0.2431640625, "learning_rate": 0.0011758359379725834, "loss": 1.0154, "step": 13534 }, { "epoch": 0.3632191927866037, "grad_norm": 0.2431640625, "learning_rate": 0.0011758280424247324, "loss": 1.0695, "step": 13535 }, { "epoch": 0.36324602833834263, "grad_norm": 0.2255859375, "learning_rate": 0.001175820145613684, "loss": 0.8679, "step": 13536 }, { "epoch": 0.36327286389008157, "grad_norm": 0.259765625, "learning_rate": 0.0011758122475394556, "loss": 1.1779, "step": 13537 }, { "epoch": 0.3632996994418205, "grad_norm": 0.2490234375, "learning_rate": 0.0011758043482020645, "loss": 1.0464, "step": 13538 }, { "epoch": 0.3633265349935595, "grad_norm": 0.2470703125, "learning_rate": 0.0011757964476015283, "loss": 0.9803, "step": 13539 }, { "epoch": 0.3633533705452984, "grad_norm": 0.251953125, "learning_rate": 0.0011757885457378638, "loss": 1.0966, "step": 13540 }, { "epoch": 0.36338020609703736, "grad_norm": 0.2265625, "learning_rate": 0.0011757806426110889, "loss": 0.9327, "step": 13541 }, { "epoch": 0.3634070416487763, "grad_norm": 0.2421875, "learning_rate": 0.0011757727382212205, "loss": 1.0908, "step": 13542 }, { "epoch": 0.36343387720051523, "grad_norm": 0.2412109375, "learning_rate": 0.0011757648325682762, "loss": 1.0472, "step": 13543 }, { "epoch": 0.36346071275225417, "grad_norm": 0.251953125, "learning_rate": 0.0011757569256522732, "loss": 1.0174, "step": 13544 }, { "epoch": 0.36348754830399316, "grad_norm": 0.2294921875, "learning_rate": 0.001175749017473229, "loss": 0.9524, "step": 13545 }, { "epoch": 0.3635143838557321, "grad_norm": 0.2373046875, "learning_rate": 0.0011757411080311607, "loss": 0.9593, "step": 13546 }, { "epoch": 0.36354121940747103, "grad_norm": 0.2431640625, "learning_rate": 0.0011757331973260858, "loss": 1.0371, "step": 13547 }, { "epoch": 0.36356805495920996, "grad_norm": 0.25390625, "learning_rate": 0.0011757252853580216, "loss": 1.1084, "step": 13548 }, { "epoch": 0.3635948905109489, "grad_norm": 0.2421875, "learning_rate": 0.0011757173721269859, "loss": 1.0239, "step": 13549 }, { "epoch": 0.36362172606268783, "grad_norm": 0.23828125, "learning_rate": 0.0011757094576329951, "loss": 0.9919, "step": 13550 }, { "epoch": 0.36364856161442677, "grad_norm": 0.2353515625, "learning_rate": 0.0011757015418760674, "loss": 0.9573, "step": 13551 }, { "epoch": 0.36367539716616576, "grad_norm": 0.23828125, "learning_rate": 0.00117569362485622, "loss": 0.9804, "step": 13552 }, { "epoch": 0.3637022327179047, "grad_norm": 0.23046875, "learning_rate": 0.00117568570657347, "loss": 0.8959, "step": 13553 }, { "epoch": 0.36372906826964363, "grad_norm": 0.2421875, "learning_rate": 0.001175677787027835, "loss": 0.9454, "step": 13554 }, { "epoch": 0.36375590382138256, "grad_norm": 0.259765625, "learning_rate": 0.0011756698662193322, "loss": 1.1881, "step": 13555 }, { "epoch": 0.3637827393731215, "grad_norm": 0.25, "learning_rate": 0.0011756619441479791, "loss": 1.06, "step": 13556 }, { "epoch": 0.36380957492486043, "grad_norm": 0.2255859375, "learning_rate": 0.0011756540208137931, "loss": 0.9488, "step": 13557 }, { "epoch": 0.3638364104765994, "grad_norm": 0.2578125, "learning_rate": 0.0011756460962167915, "loss": 1.1095, "step": 13558 }, { "epoch": 0.36386324602833836, "grad_norm": 0.208984375, "learning_rate": 0.0011756381703569919, "loss": 0.7794, "step": 13559 }, { "epoch": 0.3638900815800773, "grad_norm": 0.2373046875, "learning_rate": 0.0011756302432344113, "loss": 0.9792, "step": 13560 }, { "epoch": 0.36391691713181623, "grad_norm": 0.25390625, "learning_rate": 0.0011756223148490675, "loss": 1.0534, "step": 13561 }, { "epoch": 0.36394375268355517, "grad_norm": 0.2431640625, "learning_rate": 0.0011756143852009776, "loss": 0.9895, "step": 13562 }, { "epoch": 0.3639705882352941, "grad_norm": 0.2412109375, "learning_rate": 0.001175606454290159, "loss": 1.0328, "step": 13563 }, { "epoch": 0.36399742378703304, "grad_norm": 0.2470703125, "learning_rate": 0.0011755985221166291, "loss": 1.0334, "step": 13564 }, { "epoch": 0.364024259338772, "grad_norm": 0.25, "learning_rate": 0.0011755905886804056, "loss": 1.0294, "step": 13565 }, { "epoch": 0.36405109489051096, "grad_norm": 0.244140625, "learning_rate": 0.0011755826539815056, "loss": 1.0352, "step": 13566 }, { "epoch": 0.3640779304422499, "grad_norm": 0.259765625, "learning_rate": 0.0011755747180199466, "loss": 1.1829, "step": 13567 }, { "epoch": 0.36410476599398883, "grad_norm": 0.2490234375, "learning_rate": 0.001175566780795746, "loss": 1.0611, "step": 13568 }, { "epoch": 0.36413160154572777, "grad_norm": 0.23046875, "learning_rate": 0.0011755588423089211, "loss": 0.9413, "step": 13569 }, { "epoch": 0.3641584370974667, "grad_norm": 0.228515625, "learning_rate": 0.0011755509025594896, "loss": 0.9849, "step": 13570 }, { "epoch": 0.3641852726492057, "grad_norm": 0.25390625, "learning_rate": 0.0011755429615474687, "loss": 1.0424, "step": 13571 }, { "epoch": 0.36421210820094463, "grad_norm": 0.2333984375, "learning_rate": 0.001175535019272876, "loss": 0.9972, "step": 13572 }, { "epoch": 0.36423894375268356, "grad_norm": 0.2431640625, "learning_rate": 0.0011755270757357287, "loss": 1.0242, "step": 13573 }, { "epoch": 0.3642657793044225, "grad_norm": 0.240234375, "learning_rate": 0.0011755191309360442, "loss": 0.9691, "step": 13574 }, { "epoch": 0.36429261485616143, "grad_norm": 0.25390625, "learning_rate": 0.0011755111848738402, "loss": 1.1016, "step": 13575 }, { "epoch": 0.36431945040790037, "grad_norm": 0.2431640625, "learning_rate": 0.0011755032375491338, "loss": 1.003, "step": 13576 }, { "epoch": 0.3643462859596393, "grad_norm": 0.236328125, "learning_rate": 0.0011754952889619428, "loss": 0.9845, "step": 13577 }, { "epoch": 0.3643731215113783, "grad_norm": 0.232421875, "learning_rate": 0.0011754873391122845, "loss": 0.9525, "step": 13578 }, { "epoch": 0.36439995706311723, "grad_norm": 0.2373046875, "learning_rate": 0.0011754793880001761, "loss": 1.0114, "step": 13579 }, { "epoch": 0.36442679261485617, "grad_norm": 0.22265625, "learning_rate": 0.0011754714356256354, "loss": 0.9284, "step": 13580 }, { "epoch": 0.3644536281665951, "grad_norm": 0.2373046875, "learning_rate": 0.0011754634819886795, "loss": 1.046, "step": 13581 }, { "epoch": 0.36448046371833404, "grad_norm": 0.2333984375, "learning_rate": 0.0011754555270893261, "loss": 1.019, "step": 13582 }, { "epoch": 0.36450729927007297, "grad_norm": 0.2412109375, "learning_rate": 0.0011754475709275926, "loss": 1.0473, "step": 13583 }, { "epoch": 0.36453413482181196, "grad_norm": 0.240234375, "learning_rate": 0.0011754396135034964, "loss": 0.998, "step": 13584 }, { "epoch": 0.3645609703735509, "grad_norm": 0.26953125, "learning_rate": 0.001175431654817055, "loss": 1.1037, "step": 13585 }, { "epoch": 0.36458780592528983, "grad_norm": 0.24609375, "learning_rate": 0.0011754236948682858, "loss": 1.04, "step": 13586 }, { "epoch": 0.36461464147702877, "grad_norm": 0.2451171875, "learning_rate": 0.0011754157336572064, "loss": 1.0427, "step": 13587 }, { "epoch": 0.3646414770287677, "grad_norm": 0.453125, "learning_rate": 0.001175407771183834, "loss": 1.5819, "step": 13588 }, { "epoch": 0.36466831258050664, "grad_norm": 0.34765625, "learning_rate": 0.0011753998074481864, "loss": 1.5992, "step": 13589 }, { "epoch": 0.3646951481322456, "grad_norm": 0.349609375, "learning_rate": 0.0011753918424502808, "loss": 1.7244, "step": 13590 }, { "epoch": 0.36472198368398456, "grad_norm": 0.345703125, "learning_rate": 0.0011753838761901349, "loss": 1.6178, "step": 13591 }, { "epoch": 0.3647488192357235, "grad_norm": 0.33203125, "learning_rate": 0.001175375908667766, "loss": 1.6365, "step": 13592 }, { "epoch": 0.36477565478746243, "grad_norm": 0.328125, "learning_rate": 0.0011753679398831918, "loss": 1.513, "step": 13593 }, { "epoch": 0.36480249033920137, "grad_norm": 0.37109375, "learning_rate": 0.0011753599698364292, "loss": 1.5617, "step": 13594 }, { "epoch": 0.3648293258909403, "grad_norm": 0.326171875, "learning_rate": 0.0011753519985274964, "loss": 1.6292, "step": 13595 }, { "epoch": 0.36485616144267924, "grad_norm": 0.32421875, "learning_rate": 0.0011753440259564105, "loss": 1.7788, "step": 13596 }, { "epoch": 0.36488299699441823, "grad_norm": 0.3125, "learning_rate": 0.001175336052123189, "loss": 1.4714, "step": 13597 }, { "epoch": 0.36490983254615716, "grad_norm": 0.306640625, "learning_rate": 0.0011753280770278496, "loss": 1.4762, "step": 13598 }, { "epoch": 0.3649366680978961, "grad_norm": 0.296875, "learning_rate": 0.0011753201006704096, "loss": 1.4138, "step": 13599 }, { "epoch": 0.36496350364963503, "grad_norm": 0.302734375, "learning_rate": 0.0011753121230508867, "loss": 1.6051, "step": 13600 }, { "epoch": 0.36499033920137397, "grad_norm": 0.302734375, "learning_rate": 0.0011753041441692983, "loss": 1.608, "step": 13601 }, { "epoch": 0.3650171747531129, "grad_norm": 0.3046875, "learning_rate": 0.0011752961640256617, "loss": 1.5837, "step": 13602 }, { "epoch": 0.36504401030485184, "grad_norm": 0.3046875, "learning_rate": 0.0011752881826199945, "loss": 1.642, "step": 13603 }, { "epoch": 0.36507084585659083, "grad_norm": 0.296875, "learning_rate": 0.0011752801999523144, "loss": 1.5764, "step": 13604 }, { "epoch": 0.36509768140832977, "grad_norm": 0.2890625, "learning_rate": 0.0011752722160226386, "loss": 1.4379, "step": 13605 }, { "epoch": 0.3651245169600687, "grad_norm": 0.294921875, "learning_rate": 0.001175264230830985, "loss": 1.5338, "step": 13606 }, { "epoch": 0.36515135251180764, "grad_norm": 0.28515625, "learning_rate": 0.001175256244377371, "loss": 1.4972, "step": 13607 }, { "epoch": 0.36517818806354657, "grad_norm": 0.306640625, "learning_rate": 0.001175248256661814, "loss": 1.6855, "step": 13608 }, { "epoch": 0.3652050236152855, "grad_norm": 0.296875, "learning_rate": 0.0011752402676843314, "loss": 1.5935, "step": 13609 }, { "epoch": 0.3652318591670245, "grad_norm": 0.31640625, "learning_rate": 0.0011752322774449412, "loss": 1.6559, "step": 13610 }, { "epoch": 0.36525869471876343, "grad_norm": 0.29296875, "learning_rate": 0.0011752242859436601, "loss": 1.4681, "step": 13611 }, { "epoch": 0.36528553027050237, "grad_norm": 0.318359375, "learning_rate": 0.0011752162931805065, "loss": 1.7779, "step": 13612 }, { "epoch": 0.3653123658222413, "grad_norm": 0.296875, "learning_rate": 0.0011752082991554975, "loss": 1.5822, "step": 13613 }, { "epoch": 0.36533920137398024, "grad_norm": 0.3203125, "learning_rate": 0.0011752003038686509, "loss": 1.6003, "step": 13614 }, { "epoch": 0.3653660369257192, "grad_norm": 0.283203125, "learning_rate": 0.0011751923073199837, "loss": 1.3762, "step": 13615 }, { "epoch": 0.36539287247745816, "grad_norm": 0.302734375, "learning_rate": 0.001175184309509514, "loss": 1.566, "step": 13616 }, { "epoch": 0.3654197080291971, "grad_norm": 0.41796875, "learning_rate": 0.0011751763104372594, "loss": 1.691, "step": 13617 }, { "epoch": 0.36544654358093603, "grad_norm": 0.302734375, "learning_rate": 0.001175168310103237, "loss": 1.5687, "step": 13618 }, { "epoch": 0.36547337913267497, "grad_norm": 0.306640625, "learning_rate": 0.0011751603085074643, "loss": 1.5002, "step": 13619 }, { "epoch": 0.3655002146844139, "grad_norm": 0.30078125, "learning_rate": 0.0011751523056499594, "loss": 1.5034, "step": 13620 }, { "epoch": 0.36552705023615284, "grad_norm": 0.283203125, "learning_rate": 0.0011751443015307395, "loss": 1.4554, "step": 13621 }, { "epoch": 0.3655538857878918, "grad_norm": 0.30859375, "learning_rate": 0.0011751362961498221, "loss": 1.5428, "step": 13622 }, { "epoch": 0.36558072133963077, "grad_norm": 0.314453125, "learning_rate": 0.001175128289507225, "loss": 1.6338, "step": 13623 }, { "epoch": 0.3656075568913697, "grad_norm": 0.279296875, "learning_rate": 0.0011751202816029655, "loss": 1.4712, "step": 13624 }, { "epoch": 0.36563439244310864, "grad_norm": 0.298828125, "learning_rate": 0.0011751122724370615, "loss": 1.6271, "step": 13625 }, { "epoch": 0.36566122799484757, "grad_norm": 0.279296875, "learning_rate": 0.0011751042620095301, "loss": 1.4501, "step": 13626 }, { "epoch": 0.3656880635465865, "grad_norm": 0.28515625, "learning_rate": 0.0011750962503203897, "loss": 1.4899, "step": 13627 }, { "epoch": 0.36571489909832544, "grad_norm": 0.30078125, "learning_rate": 0.0011750882373696568, "loss": 1.4381, "step": 13628 }, { "epoch": 0.36574173465006443, "grad_norm": 0.271484375, "learning_rate": 0.0011750802231573496, "loss": 1.2867, "step": 13629 }, { "epoch": 0.36576857020180337, "grad_norm": 0.283203125, "learning_rate": 0.0011750722076834855, "loss": 1.3506, "step": 13630 }, { "epoch": 0.3657954057535423, "grad_norm": 0.318359375, "learning_rate": 0.0011750641909480822, "loss": 1.5259, "step": 13631 }, { "epoch": 0.36582224130528124, "grad_norm": 0.287109375, "learning_rate": 0.0011750561729511576, "loss": 1.4451, "step": 13632 }, { "epoch": 0.3658490768570202, "grad_norm": 0.294921875, "learning_rate": 0.0011750481536927286, "loss": 1.4462, "step": 13633 }, { "epoch": 0.3658759124087591, "grad_norm": 0.283203125, "learning_rate": 0.001175040133172813, "loss": 1.4352, "step": 13634 }, { "epoch": 0.36590274796049804, "grad_norm": 0.294921875, "learning_rate": 0.001175032111391429, "loss": 1.4413, "step": 13635 }, { "epoch": 0.36592958351223703, "grad_norm": 0.28515625, "learning_rate": 0.0011750240883485934, "loss": 1.4156, "step": 13636 }, { "epoch": 0.36595641906397597, "grad_norm": 0.29296875, "learning_rate": 0.0011750160640443242, "loss": 1.4173, "step": 13637 }, { "epoch": 0.3659832546157149, "grad_norm": 0.2890625, "learning_rate": 0.0011750080384786389, "loss": 1.4535, "step": 13638 }, { "epoch": 0.36601009016745384, "grad_norm": 0.2890625, "learning_rate": 0.0011750000116515552, "loss": 1.486, "step": 13639 }, { "epoch": 0.3660369257191928, "grad_norm": 0.294921875, "learning_rate": 0.0011749919835630906, "loss": 1.5622, "step": 13640 }, { "epoch": 0.3660637612709317, "grad_norm": 0.296875, "learning_rate": 0.0011749839542132628, "loss": 1.4875, "step": 13641 }, { "epoch": 0.3660905968226707, "grad_norm": 0.283203125, "learning_rate": 0.001174975923602089, "loss": 1.3851, "step": 13642 }, { "epoch": 0.36611743237440963, "grad_norm": 0.27734375, "learning_rate": 0.0011749678917295876, "loss": 1.4374, "step": 13643 }, { "epoch": 0.36614426792614857, "grad_norm": 0.291015625, "learning_rate": 0.0011749598585957758, "loss": 1.4552, "step": 13644 }, { "epoch": 0.3661711034778875, "grad_norm": 0.2890625, "learning_rate": 0.001174951824200671, "loss": 1.5047, "step": 13645 }, { "epoch": 0.36619793902962644, "grad_norm": 0.30078125, "learning_rate": 0.0011749437885442912, "loss": 1.4959, "step": 13646 }, { "epoch": 0.3662247745813654, "grad_norm": 0.29296875, "learning_rate": 0.001174935751626654, "loss": 1.4885, "step": 13647 }, { "epoch": 0.3662516101331043, "grad_norm": 0.298828125, "learning_rate": 0.0011749277134477769, "loss": 1.5367, "step": 13648 }, { "epoch": 0.3662784456848433, "grad_norm": 0.28125, "learning_rate": 0.0011749196740076773, "loss": 1.4583, "step": 13649 }, { "epoch": 0.36630528123658224, "grad_norm": 0.271484375, "learning_rate": 0.0011749116333063731, "loss": 1.2847, "step": 13650 }, { "epoch": 0.36633211678832117, "grad_norm": 0.2890625, "learning_rate": 0.0011749035913438818, "loss": 1.5777, "step": 13651 }, { "epoch": 0.3663589523400601, "grad_norm": 0.306640625, "learning_rate": 0.0011748955481202215, "loss": 1.5895, "step": 13652 }, { "epoch": 0.36638578789179904, "grad_norm": 0.29296875, "learning_rate": 0.0011748875036354095, "loss": 1.4856, "step": 13653 }, { "epoch": 0.366412623443538, "grad_norm": 0.271484375, "learning_rate": 0.0011748794578894633, "loss": 1.3549, "step": 13654 }, { "epoch": 0.36643945899527697, "grad_norm": 0.283203125, "learning_rate": 0.0011748714108824007, "loss": 1.3686, "step": 13655 }, { "epoch": 0.3664662945470159, "grad_norm": 0.29296875, "learning_rate": 0.0011748633626142394, "loss": 1.476, "step": 13656 }, { "epoch": 0.36649313009875484, "grad_norm": 0.30078125, "learning_rate": 0.0011748553130849968, "loss": 1.5235, "step": 13657 }, { "epoch": 0.3665199656504938, "grad_norm": 0.287109375, "learning_rate": 0.0011748472622946909, "loss": 1.3737, "step": 13658 }, { "epoch": 0.3665468012022327, "grad_norm": 0.302734375, "learning_rate": 0.0011748392102433395, "loss": 1.5117, "step": 13659 }, { "epoch": 0.36657363675397164, "grad_norm": 0.291015625, "learning_rate": 0.0011748311569309599, "loss": 1.5147, "step": 13660 }, { "epoch": 0.3666004723057106, "grad_norm": 0.30078125, "learning_rate": 0.0011748231023575694, "loss": 1.5508, "step": 13661 }, { "epoch": 0.36662730785744957, "grad_norm": 0.2890625, "learning_rate": 0.0011748150465231866, "loss": 1.437, "step": 13662 }, { "epoch": 0.3666541434091885, "grad_norm": 0.294921875, "learning_rate": 0.0011748069894278285, "loss": 1.4791, "step": 13663 }, { "epoch": 0.36668097896092744, "grad_norm": 0.30078125, "learning_rate": 0.001174798931071513, "loss": 1.4488, "step": 13664 }, { "epoch": 0.3667078145126664, "grad_norm": 0.294921875, "learning_rate": 0.001174790871454258, "loss": 1.5037, "step": 13665 }, { "epoch": 0.3667346500644053, "grad_norm": 0.283203125, "learning_rate": 0.0011747828105760805, "loss": 1.3884, "step": 13666 }, { "epoch": 0.36676148561614424, "grad_norm": 0.294921875, "learning_rate": 0.001174774748436999, "loss": 1.4709, "step": 13667 }, { "epoch": 0.36678832116788324, "grad_norm": 0.283203125, "learning_rate": 0.0011747666850370306, "loss": 1.3612, "step": 13668 }, { "epoch": 0.36681515671962217, "grad_norm": 0.302734375, "learning_rate": 0.0011747586203761933, "loss": 1.4809, "step": 13669 }, { "epoch": 0.3668419922713611, "grad_norm": 0.302734375, "learning_rate": 0.0011747505544545045, "loss": 1.5785, "step": 13670 }, { "epoch": 0.36686882782310004, "grad_norm": 0.2734375, "learning_rate": 0.0011747424872719822, "loss": 1.3185, "step": 13671 }, { "epoch": 0.366895663374839, "grad_norm": 0.28515625, "learning_rate": 0.001174734418828644, "loss": 1.4653, "step": 13672 }, { "epoch": 0.3669224989265779, "grad_norm": 0.283203125, "learning_rate": 0.0011747263491245076, "loss": 1.418, "step": 13673 }, { "epoch": 0.36694933447831685, "grad_norm": 0.279296875, "learning_rate": 0.0011747182781595906, "loss": 1.4157, "step": 13674 }, { "epoch": 0.36697617003005584, "grad_norm": 0.298828125, "learning_rate": 0.0011747102059339108, "loss": 1.5039, "step": 13675 }, { "epoch": 0.3670030055817948, "grad_norm": 0.283203125, "learning_rate": 0.001174702132447486, "loss": 1.4233, "step": 13676 }, { "epoch": 0.3670298411335337, "grad_norm": 0.28515625, "learning_rate": 0.0011746940577003337, "loss": 1.394, "step": 13677 }, { "epoch": 0.36705667668527264, "grad_norm": 0.291015625, "learning_rate": 0.0011746859816924716, "loss": 1.4863, "step": 13678 }, { "epoch": 0.3670835122370116, "grad_norm": 0.298828125, "learning_rate": 0.0011746779044239178, "loss": 1.4236, "step": 13679 }, { "epoch": 0.3671103477887505, "grad_norm": 0.2890625, "learning_rate": 0.0011746698258946896, "loss": 1.4421, "step": 13680 }, { "epoch": 0.3671371833404895, "grad_norm": 0.287109375, "learning_rate": 0.0011746617461048047, "loss": 1.3696, "step": 13681 }, { "epoch": 0.36716401889222844, "grad_norm": 0.27734375, "learning_rate": 0.0011746536650542813, "loss": 1.289, "step": 13682 }, { "epoch": 0.3671908544439674, "grad_norm": 0.28515625, "learning_rate": 0.0011746455827431365, "loss": 1.387, "step": 13683 }, { "epoch": 0.3672176899957063, "grad_norm": 0.296875, "learning_rate": 0.0011746374991713885, "loss": 1.5029, "step": 13684 }, { "epoch": 0.36724452554744524, "grad_norm": 0.283203125, "learning_rate": 0.0011746294143390552, "loss": 1.3431, "step": 13685 }, { "epoch": 0.3672713610991842, "grad_norm": 0.2890625, "learning_rate": 0.0011746213282461536, "loss": 1.3481, "step": 13686 }, { "epoch": 0.36729819665092317, "grad_norm": 0.287109375, "learning_rate": 0.001174613240892702, "loss": 1.4169, "step": 13687 }, { "epoch": 0.3673250322026621, "grad_norm": 0.3125, "learning_rate": 0.001174605152278718, "loss": 1.568, "step": 13688 }, { "epoch": 0.36735186775440104, "grad_norm": 0.2890625, "learning_rate": 0.0011745970624042195, "loss": 1.4733, "step": 13689 }, { "epoch": 0.36737870330614, "grad_norm": 0.29296875, "learning_rate": 0.001174588971269224, "loss": 1.3673, "step": 13690 }, { "epoch": 0.3674055388578789, "grad_norm": 0.302734375, "learning_rate": 0.001174580878873749, "loss": 1.4192, "step": 13691 }, { "epoch": 0.36743237440961785, "grad_norm": 0.306640625, "learning_rate": 0.0011745727852178131, "loss": 1.5842, "step": 13692 }, { "epoch": 0.3674592099613568, "grad_norm": 0.29296875, "learning_rate": 0.0011745646903014333, "loss": 1.475, "step": 13693 }, { "epoch": 0.36748604551309577, "grad_norm": 0.298828125, "learning_rate": 0.0011745565941246277, "loss": 1.3933, "step": 13694 }, { "epoch": 0.3675128810648347, "grad_norm": 0.283203125, "learning_rate": 0.001174548496687414, "loss": 1.3365, "step": 13695 }, { "epoch": 0.36753971661657364, "grad_norm": 0.30859375, "learning_rate": 0.00117454039798981, "loss": 1.5318, "step": 13696 }, { "epoch": 0.3675665521683126, "grad_norm": 0.279296875, "learning_rate": 0.001174532298031833, "loss": 1.3672, "step": 13697 }, { "epoch": 0.3675933877200515, "grad_norm": 0.296875, "learning_rate": 0.0011745241968135016, "loss": 1.4574, "step": 13698 }, { "epoch": 0.36762022327179045, "grad_norm": 0.28515625, "learning_rate": 0.001174516094334833, "loss": 1.3394, "step": 13699 }, { "epoch": 0.36764705882352944, "grad_norm": 0.29296875, "learning_rate": 0.001174507990595845, "loss": 1.3547, "step": 13700 }, { "epoch": 0.3676738943752684, "grad_norm": 0.3046875, "learning_rate": 0.0011744998855965556, "loss": 1.5078, "step": 13701 }, { "epoch": 0.3677007299270073, "grad_norm": 0.291015625, "learning_rate": 0.0011744917793369825, "loss": 1.4148, "step": 13702 }, { "epoch": 0.36772756547874624, "grad_norm": 0.302734375, "learning_rate": 0.0011744836718171434, "loss": 1.4197, "step": 13703 }, { "epoch": 0.3677544010304852, "grad_norm": 0.27734375, "learning_rate": 0.0011744755630370561, "loss": 1.314, "step": 13704 }, { "epoch": 0.3677812365822241, "grad_norm": 0.294921875, "learning_rate": 0.0011744674529967384, "loss": 1.4218, "step": 13705 }, { "epoch": 0.36780807213396305, "grad_norm": 0.29296875, "learning_rate": 0.001174459341696208, "loss": 1.4629, "step": 13706 }, { "epoch": 0.36783490768570204, "grad_norm": 0.294921875, "learning_rate": 0.0011744512291354833, "loss": 1.393, "step": 13707 }, { "epoch": 0.367861743237441, "grad_norm": 0.302734375, "learning_rate": 0.0011744431153145813, "loss": 1.4568, "step": 13708 }, { "epoch": 0.3678885787891799, "grad_norm": 0.2890625, "learning_rate": 0.00117443500023352, "loss": 1.3414, "step": 13709 }, { "epoch": 0.36791541434091884, "grad_norm": 0.2890625, "learning_rate": 0.0011744268838923175, "loss": 1.3927, "step": 13710 }, { "epoch": 0.3679422498926578, "grad_norm": 0.296875, "learning_rate": 0.0011744187662909914, "loss": 1.4159, "step": 13711 }, { "epoch": 0.3679690854443967, "grad_norm": 0.294921875, "learning_rate": 0.0011744106474295595, "loss": 1.3461, "step": 13712 }, { "epoch": 0.3679959209961357, "grad_norm": 0.275390625, "learning_rate": 0.0011744025273080394, "loss": 1.2983, "step": 13713 }, { "epoch": 0.36802275654787464, "grad_norm": 0.2890625, "learning_rate": 0.0011743944059264496, "loss": 1.4004, "step": 13714 }, { "epoch": 0.3680495920996136, "grad_norm": 0.298828125, "learning_rate": 0.001174386283284807, "loss": 1.4514, "step": 13715 }, { "epoch": 0.3680764276513525, "grad_norm": 0.283203125, "learning_rate": 0.00117437815938313, "loss": 1.3663, "step": 13716 }, { "epoch": 0.36810326320309145, "grad_norm": 0.306640625, "learning_rate": 0.0011743700342214365, "loss": 1.486, "step": 13717 }, { "epoch": 0.3681300987548304, "grad_norm": 0.287109375, "learning_rate": 0.001174361907799744, "loss": 1.4355, "step": 13718 }, { "epoch": 0.3681569343065693, "grad_norm": 0.291015625, "learning_rate": 0.0011743537801180706, "loss": 1.4384, "step": 13719 }, { "epoch": 0.3681837698583083, "grad_norm": 0.291015625, "learning_rate": 0.0011743456511764338, "loss": 1.4253, "step": 13720 }, { "epoch": 0.36821060541004724, "grad_norm": 0.283203125, "learning_rate": 0.0011743375209748517, "loss": 1.3639, "step": 13721 }, { "epoch": 0.3682374409617862, "grad_norm": 0.283203125, "learning_rate": 0.0011743293895133422, "loss": 1.3765, "step": 13722 }, { "epoch": 0.3682642765135251, "grad_norm": 0.306640625, "learning_rate": 0.0011743212567919227, "loss": 1.5695, "step": 13723 }, { "epoch": 0.36829111206526405, "grad_norm": 0.3828125, "learning_rate": 0.0011743131228106115, "loss": 1.5615, "step": 13724 }, { "epoch": 0.368317947617003, "grad_norm": 0.283203125, "learning_rate": 0.0011743049875694261, "loss": 1.334, "step": 13725 }, { "epoch": 0.368344783168742, "grad_norm": 0.302734375, "learning_rate": 0.0011742968510683848, "loss": 1.4716, "step": 13726 }, { "epoch": 0.3683716187204809, "grad_norm": 0.296875, "learning_rate": 0.001174288713307505, "loss": 1.4582, "step": 13727 }, { "epoch": 0.36839845427221984, "grad_norm": 0.2890625, "learning_rate": 0.0011742805742868045, "loss": 1.3441, "step": 13728 }, { "epoch": 0.3684252898239588, "grad_norm": 0.28125, "learning_rate": 0.0011742724340063017, "loss": 1.3699, "step": 13729 }, { "epoch": 0.3684521253756977, "grad_norm": 0.279296875, "learning_rate": 0.001174264292466014, "loss": 1.3004, "step": 13730 }, { "epoch": 0.36847896092743665, "grad_norm": 0.318359375, "learning_rate": 0.0011742561496659596, "loss": 1.4994, "step": 13731 }, { "epoch": 0.3685057964791756, "grad_norm": 0.302734375, "learning_rate": 0.0011742480056061559, "loss": 1.5371, "step": 13732 }, { "epoch": 0.3685326320309146, "grad_norm": 0.29296875, "learning_rate": 0.0011742398602866211, "loss": 1.3322, "step": 13733 }, { "epoch": 0.3685594675826535, "grad_norm": 0.28125, "learning_rate": 0.0011742317137073731, "loss": 1.3408, "step": 13734 }, { "epoch": 0.36858630313439245, "grad_norm": 0.306640625, "learning_rate": 0.0011742235658684294, "loss": 1.4435, "step": 13735 }, { "epoch": 0.3686131386861314, "grad_norm": 0.294921875, "learning_rate": 0.0011742154167698085, "loss": 1.3983, "step": 13736 }, { "epoch": 0.3686399742378703, "grad_norm": 0.279296875, "learning_rate": 0.0011742072664115276, "loss": 1.271, "step": 13737 }, { "epoch": 0.36866680978960925, "grad_norm": 0.291015625, "learning_rate": 0.0011741991147936049, "loss": 1.4157, "step": 13738 }, { "epoch": 0.36869364534134824, "grad_norm": 0.291015625, "learning_rate": 0.0011741909619160586, "loss": 1.4403, "step": 13739 }, { "epoch": 0.3687204808930872, "grad_norm": 0.314453125, "learning_rate": 0.001174182807778906, "loss": 1.5269, "step": 13740 }, { "epoch": 0.3687473164448261, "grad_norm": 0.294921875, "learning_rate": 0.0011741746523821653, "loss": 1.3999, "step": 13741 }, { "epoch": 0.36877415199656505, "grad_norm": 0.2890625, "learning_rate": 0.0011741664957258544, "loss": 1.3096, "step": 13742 }, { "epoch": 0.368800987548304, "grad_norm": 0.30859375, "learning_rate": 0.001174158337809991, "loss": 1.6015, "step": 13743 }, { "epoch": 0.3688278231000429, "grad_norm": 0.302734375, "learning_rate": 0.0011741501786345931, "loss": 1.4602, "step": 13744 }, { "epoch": 0.3688546586517819, "grad_norm": 0.296875, "learning_rate": 0.0011741420181996787, "loss": 1.4196, "step": 13745 }, { "epoch": 0.36888149420352084, "grad_norm": 0.298828125, "learning_rate": 0.0011741338565052657, "loss": 1.3691, "step": 13746 }, { "epoch": 0.3689083297552598, "grad_norm": 0.314453125, "learning_rate": 0.0011741256935513718, "loss": 1.5509, "step": 13747 }, { "epoch": 0.3689351653069987, "grad_norm": 0.291015625, "learning_rate": 0.0011741175293380152, "loss": 1.3357, "step": 13748 }, { "epoch": 0.36896200085873765, "grad_norm": 0.29296875, "learning_rate": 0.0011741093638652136, "loss": 1.4315, "step": 13749 }, { "epoch": 0.3689888364104766, "grad_norm": 0.291015625, "learning_rate": 0.0011741011971329847, "loss": 1.3592, "step": 13750 }, { "epoch": 0.3690156719622155, "grad_norm": 0.3046875, "learning_rate": 0.001174093029141347, "loss": 1.4213, "step": 13751 }, { "epoch": 0.3690425075139545, "grad_norm": 0.291015625, "learning_rate": 0.001174084859890318, "loss": 1.3562, "step": 13752 }, { "epoch": 0.36906934306569344, "grad_norm": 0.28515625, "learning_rate": 0.0011740766893799156, "loss": 1.353, "step": 13753 }, { "epoch": 0.3690961786174324, "grad_norm": 0.298828125, "learning_rate": 0.001174068517610158, "loss": 1.4606, "step": 13754 }, { "epoch": 0.3691230141691713, "grad_norm": 0.3046875, "learning_rate": 0.001174060344581063, "loss": 1.4242, "step": 13755 }, { "epoch": 0.36914984972091025, "grad_norm": 0.310546875, "learning_rate": 0.0011740521702926483, "loss": 1.4905, "step": 13756 }, { "epoch": 0.3691766852726492, "grad_norm": 0.294921875, "learning_rate": 0.0011740439947449323, "loss": 1.4236, "step": 13757 }, { "epoch": 0.3692035208243882, "grad_norm": 0.30078125, "learning_rate": 0.0011740358179379324, "loss": 1.4355, "step": 13758 }, { "epoch": 0.3692303563761271, "grad_norm": 0.2890625, "learning_rate": 0.001174027639871667, "loss": 1.3218, "step": 13759 }, { "epoch": 0.36925719192786605, "grad_norm": 0.30078125, "learning_rate": 0.0011740194605461536, "loss": 1.4275, "step": 13760 }, { "epoch": 0.369284027479605, "grad_norm": 0.29296875, "learning_rate": 0.0011740112799614106, "loss": 1.3188, "step": 13761 }, { "epoch": 0.3693108630313439, "grad_norm": 0.296875, "learning_rate": 0.0011740030981174554, "loss": 1.4234, "step": 13762 }, { "epoch": 0.36933769858308285, "grad_norm": 0.298828125, "learning_rate": 0.0011739949150143068, "loss": 1.4522, "step": 13763 }, { "epoch": 0.3693645341348218, "grad_norm": 0.291015625, "learning_rate": 0.0011739867306519818, "loss": 1.4231, "step": 13764 }, { "epoch": 0.3693913696865608, "grad_norm": 0.271484375, "learning_rate": 0.001173978545030499, "loss": 1.3102, "step": 13765 }, { "epoch": 0.3694182052382997, "grad_norm": 0.296875, "learning_rate": 0.001173970358149876, "loss": 1.3151, "step": 13766 }, { "epoch": 0.36944504079003865, "grad_norm": 0.298828125, "learning_rate": 0.001173962170010131, "loss": 1.4292, "step": 13767 }, { "epoch": 0.3694718763417776, "grad_norm": 0.2890625, "learning_rate": 0.0011739539806112818, "loss": 1.3938, "step": 13768 }, { "epoch": 0.3694987118935165, "grad_norm": 0.283203125, "learning_rate": 0.0011739457899533465, "loss": 1.3544, "step": 13769 }, { "epoch": 0.36952554744525545, "grad_norm": 0.302734375, "learning_rate": 0.001173937598036343, "loss": 1.4104, "step": 13770 }, { "epoch": 0.36955238299699444, "grad_norm": 0.294921875, "learning_rate": 0.0011739294048602892, "loss": 1.4315, "step": 13771 }, { "epoch": 0.3695792185487334, "grad_norm": 0.291015625, "learning_rate": 0.0011739212104252032, "loss": 1.4185, "step": 13772 }, { "epoch": 0.3696060541004723, "grad_norm": 0.291015625, "learning_rate": 0.0011739130147311026, "loss": 1.368, "step": 13773 }, { "epoch": 0.36963288965221125, "grad_norm": 0.287109375, "learning_rate": 0.001173904817778006, "loss": 1.3493, "step": 13774 }, { "epoch": 0.3696597252039502, "grad_norm": 0.294921875, "learning_rate": 0.0011738966195659311, "loss": 1.3546, "step": 13775 }, { "epoch": 0.3696865607556891, "grad_norm": 0.28515625, "learning_rate": 0.0011738884200948958, "loss": 1.3109, "step": 13776 }, { "epoch": 0.36971339630742805, "grad_norm": 0.287109375, "learning_rate": 0.0011738802193649181, "loss": 1.1327, "step": 13777 }, { "epoch": 0.36974023185916705, "grad_norm": 0.318359375, "learning_rate": 0.001173872017376016, "loss": 1.3693, "step": 13778 }, { "epoch": 0.369767067410906, "grad_norm": 0.3046875, "learning_rate": 0.0011738638141282078, "loss": 1.4361, "step": 13779 }, { "epoch": 0.3697939029626449, "grad_norm": 0.29296875, "learning_rate": 0.001173855609621511, "loss": 1.3139, "step": 13780 }, { "epoch": 0.36982073851438385, "grad_norm": 0.30078125, "learning_rate": 0.001173847403855944, "loss": 1.4098, "step": 13781 }, { "epoch": 0.3698475740661228, "grad_norm": 0.30859375, "learning_rate": 0.0011738391968315245, "loss": 1.4804, "step": 13782 }, { "epoch": 0.3698744096178617, "grad_norm": 0.29296875, "learning_rate": 0.0011738309885482704, "loss": 1.3767, "step": 13783 }, { "epoch": 0.3699012451696007, "grad_norm": 0.287109375, "learning_rate": 0.0011738227790062002, "loss": 1.4075, "step": 13784 }, { "epoch": 0.36992808072133965, "grad_norm": 0.29296875, "learning_rate": 0.0011738145682053315, "loss": 1.368, "step": 13785 }, { "epoch": 0.3699549162730786, "grad_norm": 0.275390625, "learning_rate": 0.0011738063561456827, "loss": 1.293, "step": 13786 }, { "epoch": 0.3699817518248175, "grad_norm": 0.294921875, "learning_rate": 0.0011737981428272715, "loss": 1.4193, "step": 13787 }, { "epoch": 0.37000858737655645, "grad_norm": 0.306640625, "learning_rate": 0.0011737899282501158, "loss": 1.5343, "step": 13788 }, { "epoch": 0.3700354229282954, "grad_norm": 0.28515625, "learning_rate": 0.0011737817124142338, "loss": 1.3244, "step": 13789 }, { "epoch": 0.3700622584800343, "grad_norm": 0.28515625, "learning_rate": 0.0011737734953196437, "loss": 1.2663, "step": 13790 }, { "epoch": 0.3700890940317733, "grad_norm": 0.279296875, "learning_rate": 0.0011737652769663633, "loss": 1.294, "step": 13791 }, { "epoch": 0.37011592958351225, "grad_norm": 0.296875, "learning_rate": 0.0011737570573544108, "loss": 1.3865, "step": 13792 }, { "epoch": 0.3701427651352512, "grad_norm": 0.30078125, "learning_rate": 0.001173748836483804, "loss": 1.3963, "step": 13793 }, { "epoch": 0.3701696006869901, "grad_norm": 0.3046875, "learning_rate": 0.0011737406143545609, "loss": 1.4338, "step": 13794 }, { "epoch": 0.37019643623872905, "grad_norm": 0.3125, "learning_rate": 0.0011737323909667, "loss": 1.455, "step": 13795 }, { "epoch": 0.370223271790468, "grad_norm": 0.283203125, "learning_rate": 0.0011737241663202387, "loss": 1.3004, "step": 13796 }, { "epoch": 0.370250107342207, "grad_norm": 0.2890625, "learning_rate": 0.0011737159404151957, "loss": 1.3878, "step": 13797 }, { "epoch": 0.3702769428939459, "grad_norm": 0.3046875, "learning_rate": 0.0011737077132515884, "loss": 1.4554, "step": 13798 }, { "epoch": 0.37030377844568485, "grad_norm": 0.296875, "learning_rate": 0.0011736994848294353, "loss": 1.4074, "step": 13799 }, { "epoch": 0.3703306139974238, "grad_norm": 0.283203125, "learning_rate": 0.0011736912551487545, "loss": 1.3063, "step": 13800 }, { "epoch": 0.3703574495491627, "grad_norm": 0.302734375, "learning_rate": 0.0011736830242095636, "loss": 1.4357, "step": 13801 }, { "epoch": 0.37038428510090166, "grad_norm": 0.279296875, "learning_rate": 0.0011736747920118812, "loss": 1.279, "step": 13802 }, { "epoch": 0.3704111206526406, "grad_norm": 0.2890625, "learning_rate": 0.001173666558555725, "loss": 1.3868, "step": 13803 }, { "epoch": 0.3704379562043796, "grad_norm": 0.294921875, "learning_rate": 0.001173658323841113, "loss": 1.4368, "step": 13804 }, { "epoch": 0.3704647917561185, "grad_norm": 0.2734375, "learning_rate": 0.0011736500878680633, "loss": 1.2158, "step": 13805 }, { "epoch": 0.37049162730785745, "grad_norm": 0.294921875, "learning_rate": 0.0011736418506365944, "loss": 1.3298, "step": 13806 }, { "epoch": 0.3705184628595964, "grad_norm": 0.29296875, "learning_rate": 0.001173633612146724, "loss": 1.306, "step": 13807 }, { "epoch": 0.3705452984113353, "grad_norm": 0.298828125, "learning_rate": 0.0011736253723984699, "loss": 1.3882, "step": 13808 }, { "epoch": 0.37057213396307426, "grad_norm": 0.298828125, "learning_rate": 0.0011736171313918508, "loss": 1.3996, "step": 13809 }, { "epoch": 0.37059896951481325, "grad_norm": 0.28125, "learning_rate": 0.0011736088891268843, "loss": 1.2504, "step": 13810 }, { "epoch": 0.3706258050665522, "grad_norm": 0.298828125, "learning_rate": 0.001173600645603589, "loss": 1.3665, "step": 13811 }, { "epoch": 0.3706526406182911, "grad_norm": 0.294921875, "learning_rate": 0.0011735924008219822, "loss": 1.2829, "step": 13812 }, { "epoch": 0.37067947617003005, "grad_norm": 0.2890625, "learning_rate": 0.0011735841547820826, "loss": 1.2909, "step": 13813 }, { "epoch": 0.370706311721769, "grad_norm": 0.29296875, "learning_rate": 0.0011735759074839082, "loss": 1.372, "step": 13814 }, { "epoch": 0.3707331472735079, "grad_norm": 0.298828125, "learning_rate": 0.001173567658927477, "loss": 1.4215, "step": 13815 }, { "epoch": 0.3707599828252469, "grad_norm": 0.306640625, "learning_rate": 0.001173559409112807, "loss": 1.5496, "step": 13816 }, { "epoch": 0.37078681837698585, "grad_norm": 0.310546875, "learning_rate": 0.0011735511580399163, "loss": 1.3976, "step": 13817 }, { "epoch": 0.3708136539287248, "grad_norm": 0.306640625, "learning_rate": 0.0011735429057088233, "loss": 1.4878, "step": 13818 }, { "epoch": 0.3708404894804637, "grad_norm": 0.31640625, "learning_rate": 0.001173534652119546, "loss": 1.4907, "step": 13819 }, { "epoch": 0.37086732503220265, "grad_norm": 0.2890625, "learning_rate": 0.001173526397272102, "loss": 1.3066, "step": 13820 }, { "epoch": 0.3708941605839416, "grad_norm": 0.306640625, "learning_rate": 0.0011735181411665103, "loss": 1.4432, "step": 13821 }, { "epoch": 0.3709209961356805, "grad_norm": 0.302734375, "learning_rate": 0.0011735098838027882, "loss": 1.3783, "step": 13822 }, { "epoch": 0.3709478316874195, "grad_norm": 0.298828125, "learning_rate": 0.0011735016251809543, "loss": 1.3968, "step": 13823 }, { "epoch": 0.37097466723915845, "grad_norm": 0.287109375, "learning_rate": 0.0011734933653010267, "loss": 1.1944, "step": 13824 }, { "epoch": 0.3710015027908974, "grad_norm": 0.298828125, "learning_rate": 0.001173485104163023, "loss": 1.3687, "step": 13825 }, { "epoch": 0.3710283383426363, "grad_norm": 0.294921875, "learning_rate": 0.0011734768417669618, "loss": 1.3448, "step": 13826 }, { "epoch": 0.37105517389437526, "grad_norm": 0.283203125, "learning_rate": 0.0011734685781128614, "loss": 1.3353, "step": 13827 }, { "epoch": 0.3710820094461142, "grad_norm": 0.287109375, "learning_rate": 0.0011734603132007393, "loss": 1.2741, "step": 13828 }, { "epoch": 0.3711088449978532, "grad_norm": 0.306640625, "learning_rate": 0.0011734520470306144, "loss": 1.5134, "step": 13829 }, { "epoch": 0.3711356805495921, "grad_norm": 0.283203125, "learning_rate": 0.001173443779602504, "loss": 1.2823, "step": 13830 }, { "epoch": 0.37116251610133105, "grad_norm": 0.298828125, "learning_rate": 0.0011734355109164268, "loss": 1.3878, "step": 13831 }, { "epoch": 0.37118935165307, "grad_norm": 0.306640625, "learning_rate": 0.001173427240972401, "loss": 1.4151, "step": 13832 }, { "epoch": 0.3712161872048089, "grad_norm": 0.29296875, "learning_rate": 0.0011734189697704441, "loss": 1.319, "step": 13833 }, { "epoch": 0.37124302275654786, "grad_norm": 0.30078125, "learning_rate": 0.0011734106973105751, "loss": 1.422, "step": 13834 }, { "epoch": 0.3712698583082868, "grad_norm": 0.291015625, "learning_rate": 0.0011734024235928117, "loss": 1.3124, "step": 13835 }, { "epoch": 0.3712966938600258, "grad_norm": 0.29296875, "learning_rate": 0.0011733941486171719, "loss": 1.251, "step": 13836 }, { "epoch": 0.3713235294117647, "grad_norm": 0.30859375, "learning_rate": 0.001173385872383674, "loss": 1.4222, "step": 13837 }, { "epoch": 0.37135036496350365, "grad_norm": 0.2890625, "learning_rate": 0.0011733775948923363, "loss": 1.3588, "step": 13838 }, { "epoch": 0.3713772005152426, "grad_norm": 0.294921875, "learning_rate": 0.001173369316143177, "loss": 1.3425, "step": 13839 }, { "epoch": 0.3714040360669815, "grad_norm": 0.283203125, "learning_rate": 0.001173361036136214, "loss": 1.3073, "step": 13840 }, { "epoch": 0.37143087161872046, "grad_norm": 0.296875, "learning_rate": 0.0011733527548714654, "loss": 1.3512, "step": 13841 }, { "epoch": 0.37145770717045945, "grad_norm": 0.283203125, "learning_rate": 0.0011733444723489497, "loss": 1.2979, "step": 13842 }, { "epoch": 0.3714845427221984, "grad_norm": 0.314453125, "learning_rate": 0.0011733361885686848, "loss": 1.4967, "step": 13843 }, { "epoch": 0.3715113782739373, "grad_norm": 0.296875, "learning_rate": 0.0011733279035306891, "loss": 1.3056, "step": 13844 }, { "epoch": 0.37153821382567626, "grad_norm": 0.287109375, "learning_rate": 0.0011733196172349807, "loss": 1.2742, "step": 13845 }, { "epoch": 0.3715650493774152, "grad_norm": 0.30078125, "learning_rate": 0.0011733113296815774, "loss": 1.3524, "step": 13846 }, { "epoch": 0.3715918849291541, "grad_norm": 0.294921875, "learning_rate": 0.001173303040870498, "loss": 1.3761, "step": 13847 }, { "epoch": 0.37161872048089306, "grad_norm": 0.279296875, "learning_rate": 0.0011732947508017605, "loss": 1.2982, "step": 13848 }, { "epoch": 0.37164555603263205, "grad_norm": 0.31640625, "learning_rate": 0.0011732864594753828, "loss": 1.4391, "step": 13849 }, { "epoch": 0.371672391584371, "grad_norm": 0.30078125, "learning_rate": 0.0011732781668913833, "loss": 1.4006, "step": 13850 }, { "epoch": 0.3716992271361099, "grad_norm": 0.294921875, "learning_rate": 0.0011732698730497803, "loss": 1.279, "step": 13851 }, { "epoch": 0.37172606268784886, "grad_norm": 0.279296875, "learning_rate": 0.0011732615779505916, "loss": 1.2842, "step": 13852 }, { "epoch": 0.3717528982395878, "grad_norm": 0.287109375, "learning_rate": 0.0011732532815938357, "loss": 1.3174, "step": 13853 }, { "epoch": 0.3717797337913267, "grad_norm": 0.302734375, "learning_rate": 0.0011732449839795308, "loss": 1.3991, "step": 13854 }, { "epoch": 0.3718065693430657, "grad_norm": 0.2890625, "learning_rate": 0.0011732366851076952, "loss": 1.3783, "step": 13855 }, { "epoch": 0.37183340489480465, "grad_norm": 0.3046875, "learning_rate": 0.0011732283849783468, "loss": 1.4395, "step": 13856 }, { "epoch": 0.3718602404465436, "grad_norm": 0.3046875, "learning_rate": 0.001173220083591504, "loss": 1.3969, "step": 13857 }, { "epoch": 0.3718870759982825, "grad_norm": 0.29296875, "learning_rate": 0.001173211780947185, "loss": 1.3537, "step": 13858 }, { "epoch": 0.37191391155002146, "grad_norm": 0.318359375, "learning_rate": 0.001173203477045408, "loss": 1.4108, "step": 13859 }, { "epoch": 0.3719407471017604, "grad_norm": 0.314453125, "learning_rate": 0.0011731951718861912, "loss": 1.4423, "step": 13860 }, { "epoch": 0.37196758265349933, "grad_norm": 0.302734375, "learning_rate": 0.0011731868654695527, "loss": 1.3934, "step": 13861 }, { "epoch": 0.3719944182052383, "grad_norm": 0.310546875, "learning_rate": 0.0011731785577955112, "loss": 1.4425, "step": 13862 }, { "epoch": 0.37202125375697725, "grad_norm": 0.3046875, "learning_rate": 0.0011731702488640844, "loss": 1.359, "step": 13863 }, { "epoch": 0.3720480893087162, "grad_norm": 0.298828125, "learning_rate": 0.0011731619386752905, "loss": 1.3382, "step": 13864 }, { "epoch": 0.3720749248604551, "grad_norm": 0.29296875, "learning_rate": 0.001173153627229148, "loss": 1.367, "step": 13865 }, { "epoch": 0.37210176041219406, "grad_norm": 0.302734375, "learning_rate": 0.0011731453145256753, "loss": 1.3662, "step": 13866 }, { "epoch": 0.372128595963933, "grad_norm": 0.298828125, "learning_rate": 0.0011731370005648902, "loss": 1.3698, "step": 13867 }, { "epoch": 0.372155431515672, "grad_norm": 0.302734375, "learning_rate": 0.0011731286853468113, "loss": 1.4112, "step": 13868 }, { "epoch": 0.3721822670674109, "grad_norm": 0.298828125, "learning_rate": 0.0011731203688714565, "loss": 1.3146, "step": 13869 }, { "epoch": 0.37220910261914986, "grad_norm": 0.296875, "learning_rate": 0.0011731120511388443, "loss": 1.3508, "step": 13870 }, { "epoch": 0.3722359381708888, "grad_norm": 0.2890625, "learning_rate": 0.001173103732148993, "loss": 1.3045, "step": 13871 }, { "epoch": 0.3722627737226277, "grad_norm": 0.287109375, "learning_rate": 0.0011730954119019204, "loss": 1.2549, "step": 13872 }, { "epoch": 0.37228960927436666, "grad_norm": 0.298828125, "learning_rate": 0.0011730870903976453, "loss": 1.379, "step": 13873 }, { "epoch": 0.3723164448261056, "grad_norm": 0.2890625, "learning_rate": 0.0011730787676361857, "loss": 1.265, "step": 13874 }, { "epoch": 0.3723432803778446, "grad_norm": 0.29296875, "learning_rate": 0.0011730704436175598, "loss": 1.3567, "step": 13875 }, { "epoch": 0.3723701159295835, "grad_norm": 0.283203125, "learning_rate": 0.0011730621183417861, "loss": 1.2281, "step": 13876 }, { "epoch": 0.37239695148132246, "grad_norm": 0.2890625, "learning_rate": 0.0011730537918088826, "loss": 1.2741, "step": 13877 }, { "epoch": 0.3724237870330614, "grad_norm": 0.3046875, "learning_rate": 0.0011730454640188678, "loss": 1.4017, "step": 13878 }, { "epoch": 0.37245062258480033, "grad_norm": 0.29296875, "learning_rate": 0.0011730371349717597, "loss": 1.2648, "step": 13879 }, { "epoch": 0.37247745813653926, "grad_norm": 0.30859375, "learning_rate": 0.001173028804667577, "loss": 1.421, "step": 13880 }, { "epoch": 0.37250429368827825, "grad_norm": 0.306640625, "learning_rate": 0.0011730204731063374, "loss": 1.3594, "step": 13881 }, { "epoch": 0.3725311292400172, "grad_norm": 0.30078125, "learning_rate": 0.0011730121402880594, "loss": 1.3873, "step": 13882 }, { "epoch": 0.3725579647917561, "grad_norm": 0.275390625, "learning_rate": 0.0011730038062127616, "loss": 1.2416, "step": 13883 }, { "epoch": 0.37258480034349506, "grad_norm": 0.310546875, "learning_rate": 0.0011729954708804618, "loss": 1.5211, "step": 13884 }, { "epoch": 0.372611635895234, "grad_norm": 0.294921875, "learning_rate": 0.0011729871342911788, "loss": 1.3424, "step": 13885 }, { "epoch": 0.37263847144697293, "grad_norm": 0.287109375, "learning_rate": 0.0011729787964449303, "loss": 1.2549, "step": 13886 }, { "epoch": 0.3726653069987119, "grad_norm": 0.298828125, "learning_rate": 0.0011729704573417352, "loss": 1.4012, "step": 13887 }, { "epoch": 0.37269214255045086, "grad_norm": 0.29296875, "learning_rate": 0.001172962116981611, "loss": 1.3408, "step": 13888 }, { "epoch": 0.3727189781021898, "grad_norm": 0.298828125, "learning_rate": 0.001172953775364577, "loss": 1.3247, "step": 13889 }, { "epoch": 0.3727458136539287, "grad_norm": 0.306640625, "learning_rate": 0.0011729454324906508, "loss": 1.3784, "step": 13890 }, { "epoch": 0.37277264920566766, "grad_norm": 0.296875, "learning_rate": 0.0011729370883598509, "loss": 1.362, "step": 13891 }, { "epoch": 0.3727994847574066, "grad_norm": 0.3046875, "learning_rate": 0.0011729287429721954, "loss": 1.3722, "step": 13892 }, { "epoch": 0.37282632030914553, "grad_norm": 0.291015625, "learning_rate": 0.001172920396327703, "loss": 1.3676, "step": 13893 }, { "epoch": 0.3728531558608845, "grad_norm": 0.279296875, "learning_rate": 0.001172912048426392, "loss": 1.2718, "step": 13894 }, { "epoch": 0.37287999141262346, "grad_norm": 0.30859375, "learning_rate": 0.00117290369926828, "loss": 1.3203, "step": 13895 }, { "epoch": 0.3729068269643624, "grad_norm": 0.296875, "learning_rate": 0.001172895348853386, "loss": 1.3177, "step": 13896 }, { "epoch": 0.3729336625161013, "grad_norm": 0.30078125, "learning_rate": 0.0011728869971817284, "loss": 1.3836, "step": 13897 }, { "epoch": 0.37296049806784026, "grad_norm": 0.3046875, "learning_rate": 0.0011728786442533252, "loss": 1.33, "step": 13898 }, { "epoch": 0.3729873336195792, "grad_norm": 0.306640625, "learning_rate": 0.0011728702900681946, "loss": 1.3946, "step": 13899 }, { "epoch": 0.3730141691713182, "grad_norm": 0.298828125, "learning_rate": 0.0011728619346263553, "loss": 1.4185, "step": 13900 }, { "epoch": 0.3730410047230571, "grad_norm": 0.302734375, "learning_rate": 0.0011728535779278256, "loss": 1.3278, "step": 13901 }, { "epoch": 0.37306784027479606, "grad_norm": 0.302734375, "learning_rate": 0.0011728452199726233, "loss": 1.4799, "step": 13902 }, { "epoch": 0.373094675826535, "grad_norm": 0.306640625, "learning_rate": 0.0011728368607607674, "loss": 1.4255, "step": 13903 }, { "epoch": 0.37312151137827393, "grad_norm": 0.31640625, "learning_rate": 0.0011728285002922758, "loss": 1.4776, "step": 13904 }, { "epoch": 0.37314834693001286, "grad_norm": 0.302734375, "learning_rate": 0.0011728201385671673, "loss": 1.4451, "step": 13905 }, { "epoch": 0.3731751824817518, "grad_norm": 0.27734375, "learning_rate": 0.0011728117755854595, "loss": 1.2587, "step": 13906 }, { "epoch": 0.3732020180334908, "grad_norm": 0.296875, "learning_rate": 0.0011728034113471714, "loss": 1.2885, "step": 13907 }, { "epoch": 0.3732288535852297, "grad_norm": 0.298828125, "learning_rate": 0.0011727950458523212, "loss": 1.2742, "step": 13908 }, { "epoch": 0.37325568913696866, "grad_norm": 0.302734375, "learning_rate": 0.001172786679100927, "loss": 1.351, "step": 13909 }, { "epoch": 0.3732825246887076, "grad_norm": 0.291015625, "learning_rate": 0.0011727783110930077, "loss": 1.2814, "step": 13910 }, { "epoch": 0.37330936024044653, "grad_norm": 0.302734375, "learning_rate": 0.001172769941828581, "loss": 1.3921, "step": 13911 }, { "epoch": 0.37333619579218547, "grad_norm": 0.298828125, "learning_rate": 0.0011727615713076655, "loss": 1.3597, "step": 13912 }, { "epoch": 0.37336303134392446, "grad_norm": 0.294921875, "learning_rate": 0.0011727531995302798, "loss": 1.3616, "step": 13913 }, { "epoch": 0.3733898668956634, "grad_norm": 0.296875, "learning_rate": 0.0011727448264964421, "loss": 1.3512, "step": 13914 }, { "epoch": 0.3734167024474023, "grad_norm": 0.283203125, "learning_rate": 0.0011727364522061707, "loss": 1.2164, "step": 13915 }, { "epoch": 0.37344353799914126, "grad_norm": 0.3125, "learning_rate": 0.001172728076659484, "loss": 1.3842, "step": 13916 }, { "epoch": 0.3734703735508802, "grad_norm": 0.298828125, "learning_rate": 0.0011727196998564005, "loss": 1.3463, "step": 13917 }, { "epoch": 0.37349720910261913, "grad_norm": 0.28515625, "learning_rate": 0.0011727113217969382, "loss": 1.2211, "step": 13918 }, { "epoch": 0.37352404465435807, "grad_norm": 0.29296875, "learning_rate": 0.001172702942481116, "loss": 1.2615, "step": 13919 }, { "epoch": 0.37355088020609706, "grad_norm": 0.294921875, "learning_rate": 0.001172694561908952, "loss": 1.2946, "step": 13920 }, { "epoch": 0.373577715757836, "grad_norm": 0.279296875, "learning_rate": 0.0011726861800804647, "loss": 1.1213, "step": 13921 }, { "epoch": 0.37360455130957493, "grad_norm": 0.296875, "learning_rate": 0.0011726777969956722, "loss": 1.2887, "step": 13922 }, { "epoch": 0.37363138686131386, "grad_norm": 0.291015625, "learning_rate": 0.001172669412654593, "loss": 1.2954, "step": 13923 }, { "epoch": 0.3736582224130528, "grad_norm": 0.306640625, "learning_rate": 0.001172661027057246, "loss": 1.4134, "step": 13924 }, { "epoch": 0.37368505796479173, "grad_norm": 0.298828125, "learning_rate": 0.0011726526402036491, "loss": 1.3451, "step": 13925 }, { "epoch": 0.3737118935165307, "grad_norm": 0.29296875, "learning_rate": 0.0011726442520938205, "loss": 1.3367, "step": 13926 }, { "epoch": 0.37373872906826966, "grad_norm": 0.29296875, "learning_rate": 0.0011726358627277792, "loss": 1.307, "step": 13927 }, { "epoch": 0.3737655646200086, "grad_norm": 0.287109375, "learning_rate": 0.0011726274721055431, "loss": 1.3604, "step": 13928 }, { "epoch": 0.37379240017174753, "grad_norm": 0.296875, "learning_rate": 0.0011726190802271307, "loss": 1.3755, "step": 13929 }, { "epoch": 0.37381923572348646, "grad_norm": 0.291015625, "learning_rate": 0.0011726106870925607, "loss": 1.2374, "step": 13930 }, { "epoch": 0.3738460712752254, "grad_norm": 0.30859375, "learning_rate": 0.0011726022927018513, "loss": 1.3185, "step": 13931 }, { "epoch": 0.37387290682696434, "grad_norm": 0.30078125, "learning_rate": 0.001172593897055021, "loss": 1.3571, "step": 13932 }, { "epoch": 0.3738997423787033, "grad_norm": 0.2890625, "learning_rate": 0.001172585500152088, "loss": 1.214, "step": 13933 }, { "epoch": 0.37392657793044226, "grad_norm": 0.318359375, "learning_rate": 0.0011725771019930708, "loss": 1.3801, "step": 13934 }, { "epoch": 0.3739534134821812, "grad_norm": 0.2890625, "learning_rate": 0.0011725687025779881, "loss": 1.264, "step": 13935 }, { "epoch": 0.37398024903392013, "grad_norm": 0.2734375, "learning_rate": 0.001172560301906858, "loss": 1.1607, "step": 13936 }, { "epoch": 0.37400708458565907, "grad_norm": 0.294921875, "learning_rate": 0.001172551899979699, "loss": 1.3024, "step": 13937 }, { "epoch": 0.374033920137398, "grad_norm": 0.3125, "learning_rate": 0.0011725434967965298, "loss": 1.3712, "step": 13938 }, { "epoch": 0.374060755689137, "grad_norm": 0.306640625, "learning_rate": 0.0011725350923573684, "loss": 1.4008, "step": 13939 }, { "epoch": 0.3740875912408759, "grad_norm": 0.298828125, "learning_rate": 0.0011725266866622335, "loss": 1.2817, "step": 13940 }, { "epoch": 0.37411442679261486, "grad_norm": 0.322265625, "learning_rate": 0.0011725182797111436, "loss": 1.4049, "step": 13941 }, { "epoch": 0.3741412623443538, "grad_norm": 0.30078125, "learning_rate": 0.0011725098715041169, "loss": 1.4027, "step": 13942 }, { "epoch": 0.37416809789609273, "grad_norm": 0.328125, "learning_rate": 0.001172501462041172, "loss": 1.5349, "step": 13943 }, { "epoch": 0.37419493344783167, "grad_norm": 0.306640625, "learning_rate": 0.0011724930513223274, "loss": 1.3712, "step": 13944 }, { "epoch": 0.37422176899957066, "grad_norm": 0.28515625, "learning_rate": 0.0011724846393476012, "loss": 1.2856, "step": 13945 }, { "epoch": 0.3742486045513096, "grad_norm": 0.44921875, "learning_rate": 0.0011724762261170125, "loss": 1.751, "step": 13946 }, { "epoch": 0.37427544010304853, "grad_norm": 0.345703125, "learning_rate": 0.001172467811630579, "loss": 1.6403, "step": 13947 }, { "epoch": 0.37430227565478746, "grad_norm": 0.318359375, "learning_rate": 0.00117245939588832, "loss": 1.5493, "step": 13948 }, { "epoch": 0.3743291112065264, "grad_norm": 0.376953125, "learning_rate": 0.0011724509788902532, "loss": 1.9122, "step": 13949 }, { "epoch": 0.37435594675826533, "grad_norm": 0.31640625, "learning_rate": 0.0011724425606363974, "loss": 1.7122, "step": 13950 }, { "epoch": 0.37438278231000427, "grad_norm": 0.32421875, "learning_rate": 0.0011724341411267713, "loss": 1.6804, "step": 13951 }, { "epoch": 0.37440961786174326, "grad_norm": 0.30859375, "learning_rate": 0.0011724257203613927, "loss": 1.6175, "step": 13952 }, { "epoch": 0.3744364534134822, "grad_norm": 0.28515625, "learning_rate": 0.0011724172983402807, "loss": 1.4672, "step": 13953 }, { "epoch": 0.37446328896522113, "grad_norm": 0.31640625, "learning_rate": 0.0011724088750634536, "loss": 1.589, "step": 13954 }, { "epoch": 0.37449012451696007, "grad_norm": 0.306640625, "learning_rate": 0.0011724004505309297, "loss": 1.5371, "step": 13955 }, { "epoch": 0.374516960068699, "grad_norm": 0.326171875, "learning_rate": 0.0011723920247427276, "loss": 1.7942, "step": 13956 }, { "epoch": 0.37454379562043794, "grad_norm": 0.318359375, "learning_rate": 0.0011723835976988658, "loss": 1.5706, "step": 13957 }, { "epoch": 0.3745706311721769, "grad_norm": 0.30859375, "learning_rate": 0.001172375169399363, "loss": 1.5466, "step": 13958 }, { "epoch": 0.37459746672391586, "grad_norm": 0.298828125, "learning_rate": 0.0011723667398442373, "loss": 1.5551, "step": 13959 }, { "epoch": 0.3746243022756548, "grad_norm": 0.30078125, "learning_rate": 0.0011723583090335074, "loss": 1.5689, "step": 13960 }, { "epoch": 0.37465113782739373, "grad_norm": 0.3125, "learning_rate": 0.0011723498769671918, "loss": 1.6615, "step": 13961 }, { "epoch": 0.37467797337913267, "grad_norm": 0.30859375, "learning_rate": 0.0011723414436453088, "loss": 1.4648, "step": 13962 }, { "epoch": 0.3747048089308716, "grad_norm": 0.3046875, "learning_rate": 0.0011723330090678773, "loss": 1.5696, "step": 13963 }, { "epoch": 0.37473164448261054, "grad_norm": 0.29296875, "learning_rate": 0.0011723245732349152, "loss": 1.4705, "step": 13964 }, { "epoch": 0.37475848003434953, "grad_norm": 0.3046875, "learning_rate": 0.0011723161361464418, "loss": 1.716, "step": 13965 }, { "epoch": 0.37478531558608846, "grad_norm": 0.310546875, "learning_rate": 0.001172307697802475, "loss": 1.639, "step": 13966 }, { "epoch": 0.3748121511378274, "grad_norm": 0.306640625, "learning_rate": 0.0011722992582030333, "loss": 1.5341, "step": 13967 }, { "epoch": 0.37483898668956633, "grad_norm": 0.3125, "learning_rate": 0.0011722908173481357, "loss": 1.6593, "step": 13968 }, { "epoch": 0.37486582224130527, "grad_norm": 0.302734375, "learning_rate": 0.0011722823752378004, "loss": 1.5602, "step": 13969 }, { "epoch": 0.3748926577930442, "grad_norm": 0.30078125, "learning_rate": 0.0011722739318720456, "loss": 1.5225, "step": 13970 }, { "epoch": 0.3749194933447832, "grad_norm": 0.298828125, "learning_rate": 0.0011722654872508903, "loss": 1.5037, "step": 13971 }, { "epoch": 0.37494632889652213, "grad_norm": 0.3046875, "learning_rate": 0.001172257041374353, "loss": 1.5203, "step": 13972 }, { "epoch": 0.37497316444826106, "grad_norm": 0.296875, "learning_rate": 0.0011722485942424522, "loss": 1.5159, "step": 13973 }, { "epoch": 0.375, "grad_norm": 0.310546875, "learning_rate": 0.0011722401458552061, "loss": 1.5605, "step": 13974 }, { "epoch": 0.37502683555173894, "grad_norm": 0.302734375, "learning_rate": 0.0011722316962126337, "loss": 1.602, "step": 13975 }, { "epoch": 0.37505367110347787, "grad_norm": 0.31640625, "learning_rate": 0.0011722232453147531, "loss": 1.6557, "step": 13976 }, { "epoch": 0.3750805066552168, "grad_norm": 0.306640625, "learning_rate": 0.0011722147931615833, "loss": 1.6773, "step": 13977 }, { "epoch": 0.3751073422069558, "grad_norm": 0.279296875, "learning_rate": 0.0011722063397531424, "loss": 1.4181, "step": 13978 }, { "epoch": 0.37513417775869473, "grad_norm": 0.3046875, "learning_rate": 0.001172197885089449, "loss": 1.5704, "step": 13979 }, { "epoch": 0.37516101331043367, "grad_norm": 0.296875, "learning_rate": 0.0011721894291705222, "loss": 1.617, "step": 13980 }, { "epoch": 0.3751878488621726, "grad_norm": 0.30859375, "learning_rate": 0.0011721809719963797, "loss": 1.6115, "step": 13981 }, { "epoch": 0.37521468441391154, "grad_norm": 0.30859375, "learning_rate": 0.0011721725135670406, "loss": 1.6483, "step": 13982 }, { "epoch": 0.37524151996565047, "grad_norm": 0.298828125, "learning_rate": 0.0011721640538825235, "loss": 1.5978, "step": 13983 }, { "epoch": 0.37526835551738946, "grad_norm": 0.30859375, "learning_rate": 0.0011721555929428468, "loss": 1.617, "step": 13984 }, { "epoch": 0.3752951910691284, "grad_norm": 0.3046875, "learning_rate": 0.001172147130748029, "loss": 1.5527, "step": 13985 }, { "epoch": 0.37532202662086733, "grad_norm": 0.291015625, "learning_rate": 0.0011721386672980886, "loss": 1.4882, "step": 13986 }, { "epoch": 0.37534886217260627, "grad_norm": 0.287109375, "learning_rate": 0.0011721302025930445, "loss": 1.5447, "step": 13987 }, { "epoch": 0.3753756977243452, "grad_norm": 0.310546875, "learning_rate": 0.0011721217366329147, "loss": 1.6195, "step": 13988 }, { "epoch": 0.37540253327608414, "grad_norm": 0.30859375, "learning_rate": 0.0011721132694177186, "loss": 1.5568, "step": 13989 }, { "epoch": 0.3754293688278231, "grad_norm": 0.310546875, "learning_rate": 0.0011721048009474741, "loss": 1.578, "step": 13990 }, { "epoch": 0.37545620437956206, "grad_norm": 0.296875, "learning_rate": 0.0011720963312222, "loss": 1.5284, "step": 13991 }, { "epoch": 0.375483039931301, "grad_norm": 0.291015625, "learning_rate": 0.0011720878602419146, "loss": 1.4361, "step": 13992 }, { "epoch": 0.37550987548303993, "grad_norm": 0.298828125, "learning_rate": 0.0011720793880066372, "loss": 1.5628, "step": 13993 }, { "epoch": 0.37553671103477887, "grad_norm": 0.314453125, "learning_rate": 0.0011720709145163857, "loss": 1.6148, "step": 13994 }, { "epoch": 0.3755635465865178, "grad_norm": 0.298828125, "learning_rate": 0.0011720624397711789, "loss": 1.5634, "step": 13995 }, { "epoch": 0.37559038213825674, "grad_norm": 0.302734375, "learning_rate": 0.0011720539637710354, "loss": 1.5484, "step": 13996 }, { "epoch": 0.37561721768999573, "grad_norm": 0.296875, "learning_rate": 0.0011720454865159738, "loss": 1.4836, "step": 13997 }, { "epoch": 0.37564405324173467, "grad_norm": 0.3125, "learning_rate": 0.0011720370080060129, "loss": 1.6598, "step": 13998 }, { "epoch": 0.3756708887934736, "grad_norm": 0.298828125, "learning_rate": 0.0011720285282411707, "loss": 1.6072, "step": 13999 }, { "epoch": 0.37569772434521254, "grad_norm": 0.30078125, "learning_rate": 0.0011720200472214667, "loss": 1.5347, "step": 14000 }, { "epoch": 0.37572455989695147, "grad_norm": 0.31640625, "learning_rate": 0.0011720115649469186, "loss": 1.6224, "step": 14001 }, { "epoch": 0.3757513954486904, "grad_norm": 0.298828125, "learning_rate": 0.0011720030814175455, "loss": 1.535, "step": 14002 }, { "epoch": 0.37577823100042934, "grad_norm": 0.296875, "learning_rate": 0.001171994596633366, "loss": 1.5624, "step": 14003 }, { "epoch": 0.37580506655216833, "grad_norm": 0.3046875, "learning_rate": 0.0011719861105943987, "loss": 1.589, "step": 14004 }, { "epoch": 0.37583190210390727, "grad_norm": 0.306640625, "learning_rate": 0.001171977623300662, "loss": 1.6139, "step": 14005 }, { "epoch": 0.3758587376556462, "grad_norm": 0.298828125, "learning_rate": 0.0011719691347521748, "loss": 1.5122, "step": 14006 }, { "epoch": 0.37588557320738514, "grad_norm": 0.30078125, "learning_rate": 0.0011719606449489557, "loss": 1.547, "step": 14007 }, { "epoch": 0.3759124087591241, "grad_norm": 0.31640625, "learning_rate": 0.001171952153891023, "loss": 1.6221, "step": 14008 }, { "epoch": 0.375939244310863, "grad_norm": 0.302734375, "learning_rate": 0.0011719436615783955, "loss": 1.5787, "step": 14009 }, { "epoch": 0.375966079862602, "grad_norm": 0.291015625, "learning_rate": 0.001171935168011092, "loss": 1.5337, "step": 14010 }, { "epoch": 0.37599291541434093, "grad_norm": 0.31640625, "learning_rate": 0.0011719266731891312, "loss": 1.618, "step": 14011 }, { "epoch": 0.37601975096607987, "grad_norm": 0.3125, "learning_rate": 0.0011719181771125311, "loss": 1.6591, "step": 14012 }, { "epoch": 0.3760465865178188, "grad_norm": 0.30078125, "learning_rate": 0.0011719096797813112, "loss": 1.5959, "step": 14013 }, { "epoch": 0.37607342206955774, "grad_norm": 0.28515625, "learning_rate": 0.0011719011811954895, "loss": 1.48, "step": 14014 }, { "epoch": 0.3761002576212967, "grad_norm": 0.30078125, "learning_rate": 0.0011718926813550849, "loss": 1.5694, "step": 14015 }, { "epoch": 0.37612709317303566, "grad_norm": 0.296875, "learning_rate": 0.001171884180260116, "loss": 1.4532, "step": 14016 }, { "epoch": 0.3761539287247746, "grad_norm": 0.294921875, "learning_rate": 0.0011718756779106015, "loss": 1.5039, "step": 14017 }, { "epoch": 0.37618076427651354, "grad_norm": 0.291015625, "learning_rate": 0.00117186717430656, "loss": 1.5133, "step": 14018 }, { "epoch": 0.37620759982825247, "grad_norm": 0.291015625, "learning_rate": 0.00117185866944801, "loss": 1.4335, "step": 14019 }, { "epoch": 0.3762344353799914, "grad_norm": 0.2890625, "learning_rate": 0.0011718501633349704, "loss": 1.4444, "step": 14020 }, { "epoch": 0.37626127093173034, "grad_norm": 0.30859375, "learning_rate": 0.0011718416559674601, "loss": 1.6639, "step": 14021 }, { "epoch": 0.3762881064834693, "grad_norm": 0.28515625, "learning_rate": 0.001171833147345497, "loss": 1.4172, "step": 14022 }, { "epoch": 0.37631494203520827, "grad_norm": 0.298828125, "learning_rate": 0.0011718246374691006, "loss": 1.5242, "step": 14023 }, { "epoch": 0.3763417775869472, "grad_norm": 0.3046875, "learning_rate": 0.0011718161263382888, "loss": 1.5218, "step": 14024 }, { "epoch": 0.37636861313868614, "grad_norm": 0.310546875, "learning_rate": 0.0011718076139530808, "loss": 1.5694, "step": 14025 }, { "epoch": 0.37639544869042507, "grad_norm": 0.30078125, "learning_rate": 0.0011717991003134952, "loss": 1.4649, "step": 14026 }, { "epoch": 0.376422284242164, "grad_norm": 0.291015625, "learning_rate": 0.0011717905854195508, "loss": 1.4443, "step": 14027 }, { "epoch": 0.37644911979390294, "grad_norm": 0.3125, "learning_rate": 0.0011717820692712657, "loss": 1.6121, "step": 14028 }, { "epoch": 0.37647595534564193, "grad_norm": 0.326171875, "learning_rate": 0.0011717735518686591, "loss": 1.7101, "step": 14029 }, { "epoch": 0.37650279089738087, "grad_norm": 0.3125, "learning_rate": 0.0011717650332117495, "loss": 1.6025, "step": 14030 }, { "epoch": 0.3765296264491198, "grad_norm": 0.3046875, "learning_rate": 0.0011717565133005556, "loss": 1.6159, "step": 14031 }, { "epoch": 0.37655646200085874, "grad_norm": 0.298828125, "learning_rate": 0.0011717479921350964, "loss": 1.4822, "step": 14032 }, { "epoch": 0.3765832975525977, "grad_norm": 0.30078125, "learning_rate": 0.00117173946971539, "loss": 1.5553, "step": 14033 }, { "epoch": 0.3766101331043366, "grad_norm": 0.310546875, "learning_rate": 0.0011717309460414556, "loss": 1.56, "step": 14034 }, { "epoch": 0.37663696865607554, "grad_norm": 0.294921875, "learning_rate": 0.0011717224211133116, "loss": 1.4213, "step": 14035 }, { "epoch": 0.37666380420781453, "grad_norm": 0.314453125, "learning_rate": 0.001171713894930977, "loss": 1.611, "step": 14036 }, { "epoch": 0.37669063975955347, "grad_norm": 0.306640625, "learning_rate": 0.00117170536749447, "loss": 1.5641, "step": 14037 }, { "epoch": 0.3767174753112924, "grad_norm": 0.30078125, "learning_rate": 0.00117169683880381, "loss": 1.5449, "step": 14038 }, { "epoch": 0.37674431086303134, "grad_norm": 0.29296875, "learning_rate": 0.0011716883088590151, "loss": 1.4792, "step": 14039 }, { "epoch": 0.3767711464147703, "grad_norm": 0.2890625, "learning_rate": 0.0011716797776601042, "loss": 1.4418, "step": 14040 }, { "epoch": 0.3767979819665092, "grad_norm": 0.3046875, "learning_rate": 0.0011716712452070962, "loss": 1.4888, "step": 14041 }, { "epoch": 0.3768248175182482, "grad_norm": 0.30859375, "learning_rate": 0.0011716627115000095, "loss": 1.489, "step": 14042 }, { "epoch": 0.37685165306998714, "grad_norm": 0.291015625, "learning_rate": 0.001171654176538863, "loss": 1.4009, "step": 14043 }, { "epoch": 0.37687848862172607, "grad_norm": 0.30078125, "learning_rate": 0.0011716456403236756, "loss": 1.4177, "step": 14044 }, { "epoch": 0.376905324173465, "grad_norm": 0.3046875, "learning_rate": 0.0011716371028544657, "loss": 1.5651, "step": 14045 }, { "epoch": 0.37693215972520394, "grad_norm": 0.298828125, "learning_rate": 0.0011716285641312522, "loss": 1.4583, "step": 14046 }, { "epoch": 0.3769589952769429, "grad_norm": 0.3125, "learning_rate": 0.001171620024154054, "loss": 1.5536, "step": 14047 }, { "epoch": 0.3769858308286818, "grad_norm": 0.294921875, "learning_rate": 0.0011716114829228895, "loss": 1.4422, "step": 14048 }, { "epoch": 0.3770126663804208, "grad_norm": 0.302734375, "learning_rate": 0.0011716029404377775, "loss": 1.4232, "step": 14049 }, { "epoch": 0.37703950193215974, "grad_norm": 0.296875, "learning_rate": 0.0011715943966987368, "loss": 1.4769, "step": 14050 }, { "epoch": 0.3770663374838987, "grad_norm": 0.314453125, "learning_rate": 0.0011715858517057864, "loss": 1.5514, "step": 14051 }, { "epoch": 0.3770931730356376, "grad_norm": 0.3046875, "learning_rate": 0.0011715773054589444, "loss": 1.5244, "step": 14052 }, { "epoch": 0.37712000858737654, "grad_norm": 0.279296875, "learning_rate": 0.0011715687579582303, "loss": 1.3434, "step": 14053 }, { "epoch": 0.3771468441391155, "grad_norm": 0.33984375, "learning_rate": 0.0011715602092036623, "loss": 1.6283, "step": 14054 }, { "epoch": 0.37717367969085447, "grad_norm": 0.30078125, "learning_rate": 0.0011715516591952596, "loss": 1.4465, "step": 14055 }, { "epoch": 0.3772005152425934, "grad_norm": 0.306640625, "learning_rate": 0.0011715431079330405, "loss": 1.4851, "step": 14056 }, { "epoch": 0.37722735079433234, "grad_norm": 0.291015625, "learning_rate": 0.0011715345554170241, "loss": 1.4782, "step": 14057 }, { "epoch": 0.3772541863460713, "grad_norm": 0.3046875, "learning_rate": 0.001171526001647229, "loss": 1.5082, "step": 14058 }, { "epoch": 0.3772810218978102, "grad_norm": 0.28515625, "learning_rate": 0.0011715174466236738, "loss": 1.3272, "step": 14059 }, { "epoch": 0.37730785744954914, "grad_norm": 0.294921875, "learning_rate": 0.0011715088903463777, "loss": 1.4047, "step": 14060 }, { "epoch": 0.3773346930012881, "grad_norm": 0.30859375, "learning_rate": 0.0011715003328153593, "loss": 1.5846, "step": 14061 }, { "epoch": 0.37736152855302707, "grad_norm": 0.3203125, "learning_rate": 0.0011714917740306369, "loss": 1.6222, "step": 14062 }, { "epoch": 0.377388364104766, "grad_norm": 0.29296875, "learning_rate": 0.00117148321399223, "loss": 1.4503, "step": 14063 }, { "epoch": 0.37741519965650494, "grad_norm": 0.296875, "learning_rate": 0.0011714746527001572, "loss": 1.465, "step": 14064 }, { "epoch": 0.3774420352082439, "grad_norm": 0.30859375, "learning_rate": 0.001171466090154437, "loss": 1.5426, "step": 14065 }, { "epoch": 0.3774688707599828, "grad_norm": 0.310546875, "learning_rate": 0.001171457526355088, "loss": 1.5197, "step": 14066 }, { "epoch": 0.37749570631172175, "grad_norm": 0.3046875, "learning_rate": 0.0011714489613021298, "loss": 1.5165, "step": 14067 }, { "epoch": 0.37752254186346074, "grad_norm": 0.302734375, "learning_rate": 0.0011714403949955805, "loss": 1.5041, "step": 14068 }, { "epoch": 0.37754937741519967, "grad_norm": 0.306640625, "learning_rate": 0.001171431827435459, "loss": 1.4637, "step": 14069 }, { "epoch": 0.3775762129669386, "grad_norm": 0.287109375, "learning_rate": 0.0011714232586217844, "loss": 1.3684, "step": 14070 }, { "epoch": 0.37760304851867754, "grad_norm": 0.30078125, "learning_rate": 0.001171414688554575, "loss": 1.4972, "step": 14071 }, { "epoch": 0.3776298840704165, "grad_norm": 0.30078125, "learning_rate": 0.0011714061172338503, "loss": 1.5032, "step": 14072 }, { "epoch": 0.3776567196221554, "grad_norm": 0.306640625, "learning_rate": 0.0011713975446596284, "loss": 1.4729, "step": 14073 }, { "epoch": 0.3776835551738944, "grad_norm": 0.306640625, "learning_rate": 0.0011713889708319283, "loss": 1.5027, "step": 14074 }, { "epoch": 0.37771039072563334, "grad_norm": 0.3046875, "learning_rate": 0.0011713803957507693, "loss": 1.4896, "step": 14075 }, { "epoch": 0.3777372262773723, "grad_norm": 0.30078125, "learning_rate": 0.0011713718194161693, "loss": 1.4249, "step": 14076 }, { "epoch": 0.3777640618291112, "grad_norm": 0.294921875, "learning_rate": 0.001171363241828148, "loss": 1.3827, "step": 14077 }, { "epoch": 0.37779089738085014, "grad_norm": 0.30078125, "learning_rate": 0.0011713546629867238, "loss": 1.4707, "step": 14078 }, { "epoch": 0.3778177329325891, "grad_norm": 0.353515625, "learning_rate": 0.0011713460828919153, "loss": 1.5038, "step": 14079 }, { "epoch": 0.377844568484328, "grad_norm": 0.28515625, "learning_rate": 0.0011713375015437418, "loss": 1.3648, "step": 14080 }, { "epoch": 0.377871404036067, "grad_norm": 0.306640625, "learning_rate": 0.001171328918942222, "loss": 1.4962, "step": 14081 }, { "epoch": 0.37789823958780594, "grad_norm": 0.306640625, "learning_rate": 0.0011713203350873745, "loss": 1.5028, "step": 14082 }, { "epoch": 0.3779250751395449, "grad_norm": 0.322265625, "learning_rate": 0.0011713117499792185, "loss": 1.6158, "step": 14083 }, { "epoch": 0.3779519106912838, "grad_norm": 0.314453125, "learning_rate": 0.0011713031636177724, "loss": 1.5127, "step": 14084 }, { "epoch": 0.37797874624302275, "grad_norm": 0.3046875, "learning_rate": 0.0011712945760030552, "loss": 1.4557, "step": 14085 }, { "epoch": 0.3780055817947617, "grad_norm": 0.3203125, "learning_rate": 0.0011712859871350857, "loss": 1.473, "step": 14086 }, { "epoch": 0.37803241734650067, "grad_norm": 0.306640625, "learning_rate": 0.001171277397013883, "loss": 1.5274, "step": 14087 }, { "epoch": 0.3780592528982396, "grad_norm": 0.30078125, "learning_rate": 0.0011712688056394656, "loss": 1.4238, "step": 14088 }, { "epoch": 0.37808608844997854, "grad_norm": 0.318359375, "learning_rate": 0.0011712602130118527, "loss": 1.6208, "step": 14089 }, { "epoch": 0.3781129240017175, "grad_norm": 0.302734375, "learning_rate": 0.0011712516191310627, "loss": 1.476, "step": 14090 }, { "epoch": 0.3781397595534564, "grad_norm": 0.3203125, "learning_rate": 0.001171243023997115, "loss": 1.5634, "step": 14091 }, { "epoch": 0.37816659510519535, "grad_norm": 0.30078125, "learning_rate": 0.001171234427610028, "loss": 1.407, "step": 14092 }, { "epoch": 0.3781934306569343, "grad_norm": 0.302734375, "learning_rate": 0.0011712258299698209, "loss": 1.4193, "step": 14093 }, { "epoch": 0.3782202662086733, "grad_norm": 0.291015625, "learning_rate": 0.001171217231076512, "loss": 1.4409, "step": 14094 }, { "epoch": 0.3782471017604122, "grad_norm": 0.30078125, "learning_rate": 0.001171208630930121, "loss": 1.4075, "step": 14095 }, { "epoch": 0.37827393731215114, "grad_norm": 0.302734375, "learning_rate": 0.0011712000295306662, "loss": 1.4729, "step": 14096 }, { "epoch": 0.3783007728638901, "grad_norm": 0.306640625, "learning_rate": 0.0011711914268781665, "loss": 1.5136, "step": 14097 }, { "epoch": 0.378327608415629, "grad_norm": 0.271484375, "learning_rate": 0.0011711828229726408, "loss": 1.2892, "step": 14098 }, { "epoch": 0.37835444396736795, "grad_norm": 0.291015625, "learning_rate": 0.001171174217814108, "loss": 1.413, "step": 14099 }, { "epoch": 0.37838127951910694, "grad_norm": 0.31640625, "learning_rate": 0.0011711656114025872, "loss": 1.5388, "step": 14100 }, { "epoch": 0.3784081150708459, "grad_norm": 0.29296875, "learning_rate": 0.001171157003738097, "loss": 1.4542, "step": 14101 }, { "epoch": 0.3784349506225848, "grad_norm": 0.302734375, "learning_rate": 0.0011711483948206565, "loss": 1.4541, "step": 14102 }, { "epoch": 0.37846178617432374, "grad_norm": 0.314453125, "learning_rate": 0.0011711397846502843, "loss": 1.5081, "step": 14103 }, { "epoch": 0.3784886217260627, "grad_norm": 0.287109375, "learning_rate": 0.0011711311732269995, "loss": 1.3613, "step": 14104 }, { "epoch": 0.3785154572778016, "grad_norm": 0.30078125, "learning_rate": 0.001171122560550821, "loss": 1.42, "step": 14105 }, { "epoch": 0.37854229282954055, "grad_norm": 0.3046875, "learning_rate": 0.0011711139466217674, "loss": 1.4196, "step": 14106 }, { "epoch": 0.37856912838127954, "grad_norm": 0.2890625, "learning_rate": 0.001171105331439858, "loss": 1.351, "step": 14107 }, { "epoch": 0.3785959639330185, "grad_norm": 0.30859375, "learning_rate": 0.0011710967150051113, "loss": 1.5053, "step": 14108 }, { "epoch": 0.3786227994847574, "grad_norm": 0.287109375, "learning_rate": 0.0011710880973175468, "loss": 1.4433, "step": 14109 }, { "epoch": 0.37864963503649635, "grad_norm": 0.302734375, "learning_rate": 0.0011710794783771827, "loss": 1.5058, "step": 14110 }, { "epoch": 0.3786764705882353, "grad_norm": 0.30078125, "learning_rate": 0.0011710708581840386, "loss": 1.5752, "step": 14111 }, { "epoch": 0.3787033061399742, "grad_norm": 0.30859375, "learning_rate": 0.0011710622367381326, "loss": 1.5554, "step": 14112 }, { "epoch": 0.3787301416917132, "grad_norm": 0.294921875, "learning_rate": 0.0011710536140394844, "loss": 1.4032, "step": 14113 }, { "epoch": 0.37875697724345214, "grad_norm": 0.29296875, "learning_rate": 0.0011710449900881124, "loss": 1.425, "step": 14114 }, { "epoch": 0.3787838127951911, "grad_norm": 0.306640625, "learning_rate": 0.0011710363648840358, "loss": 1.5454, "step": 14115 }, { "epoch": 0.37881064834693, "grad_norm": 0.298828125, "learning_rate": 0.0011710277384272734, "loss": 1.4426, "step": 14116 }, { "epoch": 0.37883748389866895, "grad_norm": 0.30078125, "learning_rate": 0.001171019110717844, "loss": 1.4419, "step": 14117 }, { "epoch": 0.3788643194504079, "grad_norm": 0.30078125, "learning_rate": 0.001171010481755767, "loss": 1.5218, "step": 14118 }, { "epoch": 0.3788911550021468, "grad_norm": 0.3203125, "learning_rate": 0.0011710018515410606, "loss": 1.6008, "step": 14119 }, { "epoch": 0.3789179905538858, "grad_norm": 0.3203125, "learning_rate": 0.0011709932200737443, "loss": 1.4926, "step": 14120 }, { "epoch": 0.37894482610562474, "grad_norm": 0.298828125, "learning_rate": 0.001170984587353837, "loss": 1.4445, "step": 14121 }, { "epoch": 0.3789716616573637, "grad_norm": 0.30859375, "learning_rate": 0.0011709759533813575, "loss": 1.5436, "step": 14122 }, { "epoch": 0.3789984972091026, "grad_norm": 0.294921875, "learning_rate": 0.0011709673181563246, "loss": 1.3806, "step": 14123 }, { "epoch": 0.37902533276084155, "grad_norm": 0.298828125, "learning_rate": 0.0011709586816787574, "loss": 1.4895, "step": 14124 }, { "epoch": 0.3790521683125805, "grad_norm": 0.30078125, "learning_rate": 0.0011709500439486746, "loss": 1.468, "step": 14125 }, { "epoch": 0.3790790038643195, "grad_norm": 0.287109375, "learning_rate": 0.0011709414049660958, "loss": 1.3403, "step": 14126 }, { "epoch": 0.3791058394160584, "grad_norm": 0.291015625, "learning_rate": 0.0011709327647310393, "loss": 1.3222, "step": 14127 }, { "epoch": 0.37913267496779735, "grad_norm": 0.302734375, "learning_rate": 0.0011709241232435244, "loss": 1.4075, "step": 14128 }, { "epoch": 0.3791595105195363, "grad_norm": 0.294921875, "learning_rate": 0.0011709154805035697, "loss": 1.364, "step": 14129 }, { "epoch": 0.3791863460712752, "grad_norm": 0.298828125, "learning_rate": 0.0011709068365111947, "loss": 1.4104, "step": 14130 }, { "epoch": 0.37921318162301415, "grad_norm": 0.302734375, "learning_rate": 0.001170898191266418, "loss": 1.4737, "step": 14131 }, { "epoch": 0.3792400171747531, "grad_norm": 0.310546875, "learning_rate": 0.0011708895447692584, "loss": 1.5185, "step": 14132 }, { "epoch": 0.3792668527264921, "grad_norm": 0.310546875, "learning_rate": 0.0011708808970197354, "loss": 1.5451, "step": 14133 }, { "epoch": 0.379293688278231, "grad_norm": 0.296875, "learning_rate": 0.0011708722480178674, "loss": 1.4278, "step": 14134 }, { "epoch": 0.37932052382996995, "grad_norm": 0.296875, "learning_rate": 0.001170863597763674, "loss": 1.3896, "step": 14135 }, { "epoch": 0.3793473593817089, "grad_norm": 0.298828125, "learning_rate": 0.0011708549462571733, "loss": 1.4528, "step": 14136 }, { "epoch": 0.3793741949334478, "grad_norm": 0.30859375, "learning_rate": 0.0011708462934983852, "loss": 1.4924, "step": 14137 }, { "epoch": 0.37940103048518675, "grad_norm": 0.27734375, "learning_rate": 0.0011708376394873282, "loss": 1.2448, "step": 14138 }, { "epoch": 0.37942786603692574, "grad_norm": 0.306640625, "learning_rate": 0.0011708289842240214, "loss": 1.4098, "step": 14139 }, { "epoch": 0.3794547015886647, "grad_norm": 0.302734375, "learning_rate": 0.0011708203277084835, "loss": 1.4436, "step": 14140 }, { "epoch": 0.3794815371404036, "grad_norm": 0.2890625, "learning_rate": 0.001170811669940734, "loss": 1.3396, "step": 14141 }, { "epoch": 0.37950837269214255, "grad_norm": 0.3046875, "learning_rate": 0.0011708030109207916, "loss": 1.4875, "step": 14142 }, { "epoch": 0.3795352082438815, "grad_norm": 0.30078125, "learning_rate": 0.0011707943506486752, "loss": 1.4326, "step": 14143 }, { "epoch": 0.3795620437956204, "grad_norm": 0.287109375, "learning_rate": 0.0011707856891244042, "loss": 1.3714, "step": 14144 }, { "epoch": 0.3795888793473594, "grad_norm": 0.3046875, "learning_rate": 0.001170777026347997, "loss": 1.4806, "step": 14145 }, { "epoch": 0.37961571489909834, "grad_norm": 0.3125, "learning_rate": 0.001170768362319473, "loss": 1.4247, "step": 14146 }, { "epoch": 0.3796425504508373, "grad_norm": 0.27734375, "learning_rate": 0.0011707596970388513, "loss": 1.2333, "step": 14147 }, { "epoch": 0.3796693860025762, "grad_norm": 0.314453125, "learning_rate": 0.0011707510305061506, "loss": 1.458, "step": 14148 }, { "epoch": 0.37969622155431515, "grad_norm": 0.310546875, "learning_rate": 0.00117074236272139, "loss": 1.417, "step": 14149 }, { "epoch": 0.3797230571060541, "grad_norm": 0.314453125, "learning_rate": 0.001170733693684589, "loss": 1.453, "step": 14150 }, { "epoch": 0.379749892657793, "grad_norm": 0.314453125, "learning_rate": 0.0011707250233957656, "loss": 1.4702, "step": 14151 }, { "epoch": 0.379776728209532, "grad_norm": 0.3046875, "learning_rate": 0.0011707163518549397, "loss": 1.3926, "step": 14152 }, { "epoch": 0.37980356376127095, "grad_norm": 0.294921875, "learning_rate": 0.0011707076790621299, "loss": 1.3319, "step": 14153 }, { "epoch": 0.3798303993130099, "grad_norm": 0.318359375, "learning_rate": 0.0011706990050173555, "loss": 1.4617, "step": 14154 }, { "epoch": 0.3798572348647488, "grad_norm": 0.314453125, "learning_rate": 0.0011706903297206354, "loss": 1.3874, "step": 14155 }, { "epoch": 0.37988407041648775, "grad_norm": 0.2890625, "learning_rate": 0.0011706816531719885, "loss": 1.3252, "step": 14156 }, { "epoch": 0.3799109059682267, "grad_norm": 0.306640625, "learning_rate": 0.001170672975371434, "loss": 1.4703, "step": 14157 }, { "epoch": 0.3799377415199657, "grad_norm": 0.3125, "learning_rate": 0.0011706642963189907, "loss": 1.4357, "step": 14158 }, { "epoch": 0.3799645770717046, "grad_norm": 0.298828125, "learning_rate": 0.0011706556160146781, "loss": 1.3972, "step": 14159 }, { "epoch": 0.37999141262344355, "grad_norm": 0.326171875, "learning_rate": 0.001170646934458515, "loss": 1.4598, "step": 14160 }, { "epoch": 0.3800182481751825, "grad_norm": 0.296875, "learning_rate": 0.0011706382516505204, "loss": 1.4261, "step": 14161 }, { "epoch": 0.3800450837269214, "grad_norm": 0.302734375, "learning_rate": 0.001170629567590713, "loss": 1.4353, "step": 14162 }, { "epoch": 0.38007191927866035, "grad_norm": 0.302734375, "learning_rate": 0.0011706208822791124, "loss": 1.4404, "step": 14163 }, { "epoch": 0.3800987548303993, "grad_norm": 0.283203125, "learning_rate": 0.0011706121957157378, "loss": 1.3008, "step": 14164 }, { "epoch": 0.3801255903821383, "grad_norm": 0.294921875, "learning_rate": 0.0011706035079006075, "loss": 1.3524, "step": 14165 }, { "epoch": 0.3801524259338772, "grad_norm": 0.279296875, "learning_rate": 0.0011705948188337413, "loss": 1.2348, "step": 14166 }, { "epoch": 0.38017926148561615, "grad_norm": 0.314453125, "learning_rate": 0.0011705861285151577, "loss": 1.5364, "step": 14167 }, { "epoch": 0.3802060970373551, "grad_norm": 0.310546875, "learning_rate": 0.001170577436944876, "loss": 1.3731, "step": 14168 }, { "epoch": 0.380232932589094, "grad_norm": 0.30078125, "learning_rate": 0.0011705687441229153, "loss": 1.4236, "step": 14169 }, { "epoch": 0.38025976814083295, "grad_norm": 0.30859375, "learning_rate": 0.0011705600500492947, "loss": 1.4205, "step": 14170 }, { "epoch": 0.38028660369257195, "grad_norm": 0.318359375, "learning_rate": 0.001170551354724033, "loss": 1.5503, "step": 14171 }, { "epoch": 0.3803134392443109, "grad_norm": 0.3125, "learning_rate": 0.00117054265814715, "loss": 1.5213, "step": 14172 }, { "epoch": 0.3803402747960498, "grad_norm": 0.310546875, "learning_rate": 0.0011705339603186639, "loss": 1.4167, "step": 14173 }, { "epoch": 0.38036711034778875, "grad_norm": 0.298828125, "learning_rate": 0.001170525261238594, "loss": 1.3612, "step": 14174 }, { "epoch": 0.3803939458995277, "grad_norm": 0.287109375, "learning_rate": 0.0011705165609069597, "loss": 1.3344, "step": 14175 }, { "epoch": 0.3804207814512666, "grad_norm": 0.306640625, "learning_rate": 0.00117050785932378, "loss": 1.4107, "step": 14176 }, { "epoch": 0.38044761700300556, "grad_norm": 0.306640625, "learning_rate": 0.0011704991564890738, "loss": 1.437, "step": 14177 }, { "epoch": 0.38047445255474455, "grad_norm": 0.2890625, "learning_rate": 0.0011704904524028603, "loss": 1.2665, "step": 14178 }, { "epoch": 0.3805012881064835, "grad_norm": 0.298828125, "learning_rate": 0.0011704817470651586, "loss": 1.3658, "step": 14179 }, { "epoch": 0.3805281236582224, "grad_norm": 0.326171875, "learning_rate": 0.001170473040475988, "loss": 1.5251, "step": 14180 }, { "epoch": 0.38055495920996135, "grad_norm": 0.302734375, "learning_rate": 0.0011704643326353671, "loss": 1.4638, "step": 14181 }, { "epoch": 0.3805817947617003, "grad_norm": 0.30078125, "learning_rate": 0.0011704556235433152, "loss": 1.3594, "step": 14182 }, { "epoch": 0.3806086303134392, "grad_norm": 0.322265625, "learning_rate": 0.0011704469131998518, "loss": 1.5664, "step": 14183 }, { "epoch": 0.3806354658651782, "grad_norm": 0.314453125, "learning_rate": 0.0011704382016049955, "loss": 1.4931, "step": 14184 }, { "epoch": 0.38066230141691715, "grad_norm": 0.298828125, "learning_rate": 0.0011704294887587658, "loss": 1.3707, "step": 14185 }, { "epoch": 0.3806891369686561, "grad_norm": 0.310546875, "learning_rate": 0.0011704207746611815, "loss": 1.4017, "step": 14186 }, { "epoch": 0.380715972520395, "grad_norm": 0.30078125, "learning_rate": 0.0011704120593122617, "loss": 1.3247, "step": 14187 }, { "epoch": 0.38074280807213395, "grad_norm": 0.30078125, "learning_rate": 0.0011704033427120258, "loss": 1.3942, "step": 14188 }, { "epoch": 0.3807696436238729, "grad_norm": 0.3203125, "learning_rate": 0.001170394624860493, "loss": 1.4775, "step": 14189 }, { "epoch": 0.3807964791756118, "grad_norm": 0.3125, "learning_rate": 0.0011703859057576819, "loss": 1.4261, "step": 14190 }, { "epoch": 0.3808233147273508, "grad_norm": 0.30859375, "learning_rate": 0.001170377185403612, "loss": 1.4303, "step": 14191 }, { "epoch": 0.38085015027908975, "grad_norm": 0.310546875, "learning_rate": 0.0011703684637983023, "loss": 1.4162, "step": 14192 }, { "epoch": 0.3808769858308287, "grad_norm": 0.306640625, "learning_rate": 0.001170359740941772, "loss": 1.383, "step": 14193 }, { "epoch": 0.3809038213825676, "grad_norm": 0.30078125, "learning_rate": 0.0011703510168340403, "loss": 1.4018, "step": 14194 }, { "epoch": 0.38093065693430656, "grad_norm": 0.32421875, "learning_rate": 0.0011703422914751264, "loss": 1.5701, "step": 14195 }, { "epoch": 0.3809574924860455, "grad_norm": 0.310546875, "learning_rate": 0.001170333564865049, "loss": 1.5219, "step": 14196 }, { "epoch": 0.3809843280377845, "grad_norm": 0.30078125, "learning_rate": 0.001170324837003828, "loss": 1.4386, "step": 14197 }, { "epoch": 0.3810111635895234, "grad_norm": 0.3046875, "learning_rate": 0.0011703161078914816, "loss": 1.3898, "step": 14198 }, { "epoch": 0.38103799914126235, "grad_norm": 0.298828125, "learning_rate": 0.0011703073775280297, "loss": 1.3422, "step": 14199 }, { "epoch": 0.3810648346930013, "grad_norm": 0.31640625, "learning_rate": 0.0011702986459134913, "loss": 1.5015, "step": 14200 }, { "epoch": 0.3810916702447402, "grad_norm": 0.31640625, "learning_rate": 0.0011702899130478853, "loss": 1.4942, "step": 14201 }, { "epoch": 0.38111850579647916, "grad_norm": 0.322265625, "learning_rate": 0.001170281178931231, "loss": 1.5516, "step": 14202 }, { "epoch": 0.3811453413482181, "grad_norm": 0.306640625, "learning_rate": 0.0011702724435635476, "loss": 1.3879, "step": 14203 }, { "epoch": 0.3811721768999571, "grad_norm": 0.287109375, "learning_rate": 0.0011702637069448544, "loss": 1.3605, "step": 14204 }, { "epoch": 0.381199012451696, "grad_norm": 0.291015625, "learning_rate": 0.0011702549690751703, "loss": 1.3501, "step": 14205 }, { "epoch": 0.38122584800343495, "grad_norm": 0.3125, "learning_rate": 0.0011702462299545144, "loss": 1.4326, "step": 14206 }, { "epoch": 0.3812526835551739, "grad_norm": 0.30078125, "learning_rate": 0.001170237489582906, "loss": 1.372, "step": 14207 }, { "epoch": 0.3812795191069128, "grad_norm": 0.341796875, "learning_rate": 0.0011702287479603646, "loss": 1.5393, "step": 14208 }, { "epoch": 0.38130635465865176, "grad_norm": 0.314453125, "learning_rate": 0.001170220005086909, "loss": 1.4701, "step": 14209 }, { "epoch": 0.38133319021039075, "grad_norm": 0.3125, "learning_rate": 0.0011702112609625584, "loss": 1.4807, "step": 14210 }, { "epoch": 0.3813600257621297, "grad_norm": 0.30859375, "learning_rate": 0.0011702025155873321, "loss": 1.4412, "step": 14211 }, { "epoch": 0.3813868613138686, "grad_norm": 0.310546875, "learning_rate": 0.0011701937689612492, "loss": 1.4349, "step": 14212 }, { "epoch": 0.38141369686560755, "grad_norm": 0.31640625, "learning_rate": 0.001170185021084329, "loss": 1.4862, "step": 14213 }, { "epoch": 0.3814405324173465, "grad_norm": 0.306640625, "learning_rate": 0.0011701762719565907, "loss": 1.3612, "step": 14214 }, { "epoch": 0.3814673679690854, "grad_norm": 0.318359375, "learning_rate": 0.0011701675215780533, "loss": 1.4164, "step": 14215 }, { "epoch": 0.3814942035208244, "grad_norm": 0.3203125, "learning_rate": 0.001170158769948736, "loss": 1.511, "step": 14216 }, { "epoch": 0.38152103907256335, "grad_norm": 0.318359375, "learning_rate": 0.0011701500170686581, "loss": 1.4838, "step": 14217 }, { "epoch": 0.3815478746243023, "grad_norm": 0.310546875, "learning_rate": 0.001170141262937839, "loss": 1.4326, "step": 14218 }, { "epoch": 0.3815747101760412, "grad_norm": 0.31640625, "learning_rate": 0.0011701325075562975, "loss": 1.4352, "step": 14219 }, { "epoch": 0.38160154572778016, "grad_norm": 0.291015625, "learning_rate": 0.0011701237509240533, "loss": 1.2906, "step": 14220 }, { "epoch": 0.3816283812795191, "grad_norm": 0.30859375, "learning_rate": 0.0011701149930411251, "loss": 1.405, "step": 14221 }, { "epoch": 0.381655216831258, "grad_norm": 0.306640625, "learning_rate": 0.0011701062339075324, "loss": 1.4284, "step": 14222 }, { "epoch": 0.381682052382997, "grad_norm": 0.296875, "learning_rate": 0.001170097473523294, "loss": 1.3168, "step": 14223 }, { "epoch": 0.38170888793473595, "grad_norm": 0.298828125, "learning_rate": 0.00117008871188843, "loss": 1.351, "step": 14224 }, { "epoch": 0.3817357234864749, "grad_norm": 0.310546875, "learning_rate": 0.0011700799490029588, "loss": 1.4337, "step": 14225 }, { "epoch": 0.3817625590382138, "grad_norm": 0.3203125, "learning_rate": 0.0011700711848669, "loss": 1.4704, "step": 14226 }, { "epoch": 0.38178939458995276, "grad_norm": 0.3125, "learning_rate": 0.0011700624194802727, "loss": 1.4841, "step": 14227 }, { "epoch": 0.3818162301416917, "grad_norm": 0.30078125, "learning_rate": 0.0011700536528430963, "loss": 1.3573, "step": 14228 }, { "epoch": 0.3818430656934307, "grad_norm": 0.296875, "learning_rate": 0.0011700448849553899, "loss": 1.3207, "step": 14229 }, { "epoch": 0.3818699012451696, "grad_norm": 0.30859375, "learning_rate": 0.0011700361158171724, "loss": 1.3776, "step": 14230 }, { "epoch": 0.38189673679690855, "grad_norm": 0.310546875, "learning_rate": 0.0011700273454284637, "loss": 1.4281, "step": 14231 }, { "epoch": 0.3819235723486475, "grad_norm": 0.314453125, "learning_rate": 0.0011700185737892827, "loss": 1.4048, "step": 14232 }, { "epoch": 0.3819504079003864, "grad_norm": 0.30078125, "learning_rate": 0.0011700098008996483, "loss": 1.4118, "step": 14233 }, { "epoch": 0.38197724345212536, "grad_norm": 0.30078125, "learning_rate": 0.0011700010267595806, "loss": 1.3609, "step": 14234 }, { "epoch": 0.3820040790038643, "grad_norm": 0.310546875, "learning_rate": 0.001169992251369098, "loss": 1.4263, "step": 14235 }, { "epoch": 0.3820309145556033, "grad_norm": 0.30078125, "learning_rate": 0.0011699834747282201, "loss": 1.3241, "step": 14236 }, { "epoch": 0.3820577501073422, "grad_norm": 0.30859375, "learning_rate": 0.0011699746968369664, "loss": 1.3768, "step": 14237 }, { "epoch": 0.38208458565908116, "grad_norm": 0.29296875, "learning_rate": 0.0011699659176953557, "loss": 1.3046, "step": 14238 }, { "epoch": 0.3821114212108201, "grad_norm": 0.30859375, "learning_rate": 0.0011699571373034075, "loss": 1.4789, "step": 14239 }, { "epoch": 0.382138256762559, "grad_norm": 0.2890625, "learning_rate": 0.001169948355661141, "loss": 1.3373, "step": 14240 }, { "epoch": 0.38216509231429796, "grad_norm": 0.314453125, "learning_rate": 0.0011699395727685755, "loss": 1.4267, "step": 14241 }, { "epoch": 0.38219192786603695, "grad_norm": 0.287109375, "learning_rate": 0.0011699307886257303, "loss": 1.2624, "step": 14242 }, { "epoch": 0.3822187634177759, "grad_norm": 0.2890625, "learning_rate": 0.0011699220032326246, "loss": 1.2838, "step": 14243 }, { "epoch": 0.3822455989695148, "grad_norm": 0.3359375, "learning_rate": 0.0011699132165892776, "loss": 1.543, "step": 14244 }, { "epoch": 0.38227243452125376, "grad_norm": 0.31640625, "learning_rate": 0.0011699044286957087, "loss": 1.4233, "step": 14245 }, { "epoch": 0.3822992700729927, "grad_norm": 0.306640625, "learning_rate": 0.0011698956395519372, "loss": 1.3158, "step": 14246 }, { "epoch": 0.3823261056247316, "grad_norm": 0.322265625, "learning_rate": 0.0011698868491579826, "loss": 1.4116, "step": 14247 }, { "epoch": 0.38235294117647056, "grad_norm": 0.318359375, "learning_rate": 0.0011698780575138635, "loss": 1.4905, "step": 14248 }, { "epoch": 0.38237977672820955, "grad_norm": 0.3046875, "learning_rate": 0.0011698692646196, "loss": 1.4046, "step": 14249 }, { "epoch": 0.3824066122799485, "grad_norm": 0.314453125, "learning_rate": 0.0011698604704752107, "loss": 1.4429, "step": 14250 }, { "epoch": 0.3824334478316874, "grad_norm": 0.3203125, "learning_rate": 0.0011698516750807153, "loss": 1.4862, "step": 14251 }, { "epoch": 0.38246028338342636, "grad_norm": 0.294921875, "learning_rate": 0.0011698428784361328, "loss": 1.3411, "step": 14252 }, { "epoch": 0.3824871189351653, "grad_norm": 0.298828125, "learning_rate": 0.0011698340805414828, "loss": 1.2978, "step": 14253 }, { "epoch": 0.38251395448690423, "grad_norm": 0.30859375, "learning_rate": 0.0011698252813967846, "loss": 1.4335, "step": 14254 }, { "epoch": 0.3825407900386432, "grad_norm": 0.29296875, "learning_rate": 0.0011698164810020571, "loss": 1.2823, "step": 14255 }, { "epoch": 0.38256762559038215, "grad_norm": 0.33203125, "learning_rate": 0.00116980767935732, "loss": 1.5254, "step": 14256 }, { "epoch": 0.3825944611421211, "grad_norm": 0.302734375, "learning_rate": 0.0011697988764625927, "loss": 1.337, "step": 14257 }, { "epoch": 0.38262129669386, "grad_norm": 0.318359375, "learning_rate": 0.001169790072317894, "loss": 1.4401, "step": 14258 }, { "epoch": 0.38264813224559896, "grad_norm": 0.30078125, "learning_rate": 0.0011697812669232437, "loss": 1.3591, "step": 14259 }, { "epoch": 0.3826749677973379, "grad_norm": 0.306640625, "learning_rate": 0.0011697724602786608, "loss": 1.356, "step": 14260 }, { "epoch": 0.38270180334907683, "grad_norm": 0.318359375, "learning_rate": 0.0011697636523841648, "loss": 1.4012, "step": 14261 }, { "epoch": 0.3827286389008158, "grad_norm": 0.314453125, "learning_rate": 0.0011697548432397752, "loss": 1.5253, "step": 14262 }, { "epoch": 0.38275547445255476, "grad_norm": 0.302734375, "learning_rate": 0.0011697460328455108, "loss": 1.3913, "step": 14263 }, { "epoch": 0.3827823100042937, "grad_norm": 0.318359375, "learning_rate": 0.0011697372212013913, "loss": 1.4664, "step": 14264 }, { "epoch": 0.3828091455560326, "grad_norm": 0.42578125, "learning_rate": 0.001169728408307436, "loss": 1.8471, "step": 14265 }, { "epoch": 0.38283598110777156, "grad_norm": 0.357421875, "learning_rate": 0.001169719594163664, "loss": 1.7327, "step": 14266 }, { "epoch": 0.3828628166595105, "grad_norm": 0.349609375, "learning_rate": 0.001169710778770095, "loss": 1.6527, "step": 14267 }, { "epoch": 0.3828896522112495, "grad_norm": 0.333984375, "learning_rate": 0.0011697019621267485, "loss": 1.6139, "step": 14268 }, { "epoch": 0.3829164877629884, "grad_norm": 0.333984375, "learning_rate": 0.0011696931442336431, "loss": 1.7255, "step": 14269 }, { "epoch": 0.38294332331472736, "grad_norm": 0.345703125, "learning_rate": 0.0011696843250907985, "loss": 1.6595, "step": 14270 }, { "epoch": 0.3829701588664663, "grad_norm": 0.30859375, "learning_rate": 0.0011696755046982342, "loss": 1.5953, "step": 14271 }, { "epoch": 0.3829969944182052, "grad_norm": 0.302734375, "learning_rate": 0.0011696666830559694, "loss": 1.6679, "step": 14272 }, { "epoch": 0.38302382996994416, "grad_norm": 0.310546875, "learning_rate": 0.0011696578601640237, "loss": 1.6997, "step": 14273 }, { "epoch": 0.38305066552168315, "grad_norm": 0.337890625, "learning_rate": 0.001169649036022416, "loss": 1.8198, "step": 14274 }, { "epoch": 0.3830775010734221, "grad_norm": 0.3203125, "learning_rate": 0.001169640210631166, "loss": 1.6885, "step": 14275 }, { "epoch": 0.383104336625161, "grad_norm": 0.33203125, "learning_rate": 0.001169631383990293, "loss": 1.8156, "step": 14276 }, { "epoch": 0.38313117217689996, "grad_norm": 0.328125, "learning_rate": 0.0011696225560998163, "loss": 1.8173, "step": 14277 }, { "epoch": 0.3831580077286389, "grad_norm": 0.306640625, "learning_rate": 0.0011696137269597553, "loss": 1.6582, "step": 14278 }, { "epoch": 0.38318484328037783, "grad_norm": 0.3046875, "learning_rate": 0.0011696048965701294, "loss": 1.6122, "step": 14279 }, { "epoch": 0.38321167883211676, "grad_norm": 0.314453125, "learning_rate": 0.001169596064930958, "loss": 1.6581, "step": 14280 }, { "epoch": 0.38323851438385576, "grad_norm": 0.306640625, "learning_rate": 0.0011695872320422601, "loss": 1.5594, "step": 14281 }, { "epoch": 0.3832653499355947, "grad_norm": 0.296875, "learning_rate": 0.0011695783979040557, "loss": 1.5936, "step": 14282 }, { "epoch": 0.3832921854873336, "grad_norm": 0.306640625, "learning_rate": 0.0011695695625163639, "loss": 1.6452, "step": 14283 }, { "epoch": 0.38331902103907256, "grad_norm": 0.30078125, "learning_rate": 0.0011695607258792038, "loss": 1.5946, "step": 14284 }, { "epoch": 0.3833458565908115, "grad_norm": 0.3125, "learning_rate": 0.0011695518879925953, "loss": 1.739, "step": 14285 }, { "epoch": 0.38337269214255043, "grad_norm": 0.302734375, "learning_rate": 0.0011695430488565574, "loss": 1.5688, "step": 14286 }, { "epoch": 0.3833995276942894, "grad_norm": 0.310546875, "learning_rate": 0.0011695342084711094, "loss": 1.665, "step": 14287 }, { "epoch": 0.38342636324602836, "grad_norm": 0.314453125, "learning_rate": 0.0011695253668362712, "loss": 1.6187, "step": 14288 }, { "epoch": 0.3834531987977673, "grad_norm": 0.31640625, "learning_rate": 0.0011695165239520619, "loss": 1.6644, "step": 14289 }, { "epoch": 0.3834800343495062, "grad_norm": 0.318359375, "learning_rate": 0.0011695076798185007, "loss": 1.7362, "step": 14290 }, { "epoch": 0.38350686990124516, "grad_norm": 0.310546875, "learning_rate": 0.0011694988344356071, "loss": 1.649, "step": 14291 }, { "epoch": 0.3835337054529841, "grad_norm": 0.318359375, "learning_rate": 0.0011694899878034008, "loss": 1.7509, "step": 14292 }, { "epoch": 0.38356054100472303, "grad_norm": 0.298828125, "learning_rate": 0.001169481139921901, "loss": 1.5605, "step": 14293 }, { "epoch": 0.383587376556462, "grad_norm": 0.298828125, "learning_rate": 0.0011694722907911272, "loss": 1.5956, "step": 14294 }, { "epoch": 0.38361421210820096, "grad_norm": 0.326171875, "learning_rate": 0.0011694634404110988, "loss": 1.7844, "step": 14295 }, { "epoch": 0.3836410476599399, "grad_norm": 0.3125, "learning_rate": 0.001169454588781835, "loss": 1.7214, "step": 14296 }, { "epoch": 0.38366788321167883, "grad_norm": 0.306640625, "learning_rate": 0.0011694457359033555, "loss": 1.6222, "step": 14297 }, { "epoch": 0.38369471876341776, "grad_norm": 0.3046875, "learning_rate": 0.0011694368817756794, "loss": 1.5621, "step": 14298 }, { "epoch": 0.3837215543151567, "grad_norm": 0.3046875, "learning_rate": 0.0011694280263988264, "loss": 1.6799, "step": 14299 }, { "epoch": 0.3837483898668957, "grad_norm": 0.31640625, "learning_rate": 0.001169419169772816, "loss": 1.6707, "step": 14300 }, { "epoch": 0.3837752254186346, "grad_norm": 0.30859375, "learning_rate": 0.0011694103118976673, "loss": 1.6407, "step": 14301 }, { "epoch": 0.38380206097037356, "grad_norm": 0.31640625, "learning_rate": 0.0011694014527733998, "loss": 1.7003, "step": 14302 }, { "epoch": 0.3838288965221125, "grad_norm": 0.3125, "learning_rate": 0.0011693925924000331, "loss": 1.7056, "step": 14303 }, { "epoch": 0.38385573207385143, "grad_norm": 0.296875, "learning_rate": 0.0011693837307775866, "loss": 1.5323, "step": 14304 }, { "epoch": 0.38388256762559037, "grad_norm": 0.306640625, "learning_rate": 0.0011693748679060796, "loss": 1.6478, "step": 14305 }, { "epoch": 0.3839094031773293, "grad_norm": 0.306640625, "learning_rate": 0.001169366003785532, "loss": 1.6025, "step": 14306 }, { "epoch": 0.3839362387290683, "grad_norm": 0.32421875, "learning_rate": 0.0011693571384159627, "loss": 1.8234, "step": 14307 }, { "epoch": 0.3839630742808072, "grad_norm": 0.298828125, "learning_rate": 0.0011693482717973912, "loss": 1.5247, "step": 14308 }, { "epoch": 0.38398990983254616, "grad_norm": 0.3046875, "learning_rate": 0.0011693394039298372, "loss": 1.5855, "step": 14309 }, { "epoch": 0.3840167453842851, "grad_norm": 0.29296875, "learning_rate": 0.00116933053481332, "loss": 1.5568, "step": 14310 }, { "epoch": 0.38404358093602403, "grad_norm": 0.333984375, "learning_rate": 0.0011693216644478592, "loss": 1.7311, "step": 14311 }, { "epoch": 0.38407041648776297, "grad_norm": 0.3125, "learning_rate": 0.001169312792833474, "loss": 1.5887, "step": 14312 }, { "epoch": 0.38409725203950196, "grad_norm": 0.30078125, "learning_rate": 0.0011693039199701845, "loss": 1.5486, "step": 14313 }, { "epoch": 0.3841240875912409, "grad_norm": 0.318359375, "learning_rate": 0.0011692950458580091, "loss": 1.6861, "step": 14314 }, { "epoch": 0.3841509231429798, "grad_norm": 0.291015625, "learning_rate": 0.0011692861704969682, "loss": 1.4906, "step": 14315 }, { "epoch": 0.38417775869471876, "grad_norm": 0.29296875, "learning_rate": 0.0011692772938870807, "loss": 1.5275, "step": 14316 }, { "epoch": 0.3842045942464577, "grad_norm": 0.294921875, "learning_rate": 0.0011692684160283665, "loss": 1.6049, "step": 14317 }, { "epoch": 0.38423142979819663, "grad_norm": 0.306640625, "learning_rate": 0.001169259536920845, "loss": 1.6423, "step": 14318 }, { "epoch": 0.38425826534993557, "grad_norm": 0.298828125, "learning_rate": 0.001169250656564535, "loss": 1.5006, "step": 14319 }, { "epoch": 0.38428510090167456, "grad_norm": 0.306640625, "learning_rate": 0.0011692417749594572, "loss": 1.6324, "step": 14320 }, { "epoch": 0.3843119364534135, "grad_norm": 0.3203125, "learning_rate": 0.0011692328921056301, "loss": 1.7298, "step": 14321 }, { "epoch": 0.38433877200515243, "grad_norm": 0.302734375, "learning_rate": 0.0011692240080030734, "loss": 1.539, "step": 14322 }, { "epoch": 0.38436560755689136, "grad_norm": 0.322265625, "learning_rate": 0.0011692151226518068, "loss": 1.6687, "step": 14323 }, { "epoch": 0.3843924431086303, "grad_norm": 0.298828125, "learning_rate": 0.0011692062360518498, "loss": 1.5468, "step": 14324 }, { "epoch": 0.38441927866036923, "grad_norm": 0.302734375, "learning_rate": 0.0011691973482032218, "loss": 1.5159, "step": 14325 }, { "epoch": 0.3844461142121082, "grad_norm": 0.318359375, "learning_rate": 0.001169188459105942, "loss": 1.6782, "step": 14326 }, { "epoch": 0.38447294976384716, "grad_norm": 0.310546875, "learning_rate": 0.0011691795687600304, "loss": 1.6751, "step": 14327 }, { "epoch": 0.3844997853155861, "grad_norm": 0.310546875, "learning_rate": 0.0011691706771655062, "loss": 1.6216, "step": 14328 }, { "epoch": 0.38452662086732503, "grad_norm": 0.302734375, "learning_rate": 0.001169161784322389, "loss": 1.4961, "step": 14329 }, { "epoch": 0.38455345641906397, "grad_norm": 0.30078125, "learning_rate": 0.0011691528902306983, "loss": 1.5964, "step": 14330 }, { "epoch": 0.3845802919708029, "grad_norm": 0.3046875, "learning_rate": 0.0011691439948904537, "loss": 1.5433, "step": 14331 }, { "epoch": 0.38460712752254184, "grad_norm": 0.302734375, "learning_rate": 0.0011691350983016745, "loss": 1.6588, "step": 14332 }, { "epoch": 0.3846339630742808, "grad_norm": 0.3046875, "learning_rate": 0.0011691262004643805, "loss": 1.6279, "step": 14333 }, { "epoch": 0.38466079862601976, "grad_norm": 0.32421875, "learning_rate": 0.0011691173013785908, "loss": 1.6618, "step": 14334 }, { "epoch": 0.3846876341777587, "grad_norm": 0.3046875, "learning_rate": 0.0011691084010443254, "loss": 1.6888, "step": 14335 }, { "epoch": 0.38471446972949763, "grad_norm": 0.31640625, "learning_rate": 0.0011690994994616034, "loss": 1.7219, "step": 14336 }, { "epoch": 0.38474130528123657, "grad_norm": 0.302734375, "learning_rate": 0.0011690905966304447, "loss": 1.5324, "step": 14337 }, { "epoch": 0.3847681408329755, "grad_norm": 0.302734375, "learning_rate": 0.0011690816925508686, "loss": 1.6161, "step": 14338 }, { "epoch": 0.3847949763847145, "grad_norm": 0.30078125, "learning_rate": 0.0011690727872228947, "loss": 1.5127, "step": 14339 }, { "epoch": 0.38482181193645343, "grad_norm": 0.30859375, "learning_rate": 0.0011690638806465426, "loss": 1.6526, "step": 14340 }, { "epoch": 0.38484864748819236, "grad_norm": 0.298828125, "learning_rate": 0.0011690549728218317, "loss": 1.5094, "step": 14341 }, { "epoch": 0.3848754830399313, "grad_norm": 0.30078125, "learning_rate": 0.0011690460637487816, "loss": 1.5364, "step": 14342 }, { "epoch": 0.38490231859167023, "grad_norm": 0.3046875, "learning_rate": 0.0011690371534274118, "loss": 1.6091, "step": 14343 }, { "epoch": 0.38492915414340917, "grad_norm": 0.29296875, "learning_rate": 0.0011690282418577419, "loss": 1.5541, "step": 14344 }, { "epoch": 0.38495598969514816, "grad_norm": 0.3125, "learning_rate": 0.0011690193290397916, "loss": 1.6355, "step": 14345 }, { "epoch": 0.3849828252468871, "grad_norm": 0.298828125, "learning_rate": 0.00116901041497358, "loss": 1.5641, "step": 14346 }, { "epoch": 0.38500966079862603, "grad_norm": 0.3046875, "learning_rate": 0.0011690014996591272, "loss": 1.6809, "step": 14347 }, { "epoch": 0.38503649635036497, "grad_norm": 0.30859375, "learning_rate": 0.0011689925830964526, "loss": 1.6633, "step": 14348 }, { "epoch": 0.3850633319021039, "grad_norm": 0.296875, "learning_rate": 0.0011689836652855755, "loss": 1.5243, "step": 14349 }, { "epoch": 0.38509016745384284, "grad_norm": 0.31640625, "learning_rate": 0.0011689747462265156, "loss": 1.6596, "step": 14350 }, { "epoch": 0.38511700300558177, "grad_norm": 0.3046875, "learning_rate": 0.0011689658259192925, "loss": 1.5852, "step": 14351 }, { "epoch": 0.38514383855732076, "grad_norm": 0.298828125, "learning_rate": 0.001168956904363926, "loss": 1.5075, "step": 14352 }, { "epoch": 0.3851706741090597, "grad_norm": 0.30859375, "learning_rate": 0.0011689479815604352, "loss": 1.6206, "step": 14353 }, { "epoch": 0.38519750966079863, "grad_norm": 0.30078125, "learning_rate": 0.0011689390575088398, "loss": 1.5575, "step": 14354 }, { "epoch": 0.38522434521253757, "grad_norm": 0.322265625, "learning_rate": 0.0011689301322091597, "loss": 1.6574, "step": 14355 }, { "epoch": 0.3852511807642765, "grad_norm": 0.30078125, "learning_rate": 0.0011689212056614144, "loss": 1.5269, "step": 14356 }, { "epoch": 0.38527801631601544, "grad_norm": 0.30859375, "learning_rate": 0.0011689122778656233, "loss": 1.63, "step": 14357 }, { "epoch": 0.3853048518677544, "grad_norm": 0.31640625, "learning_rate": 0.001168903348821806, "loss": 1.6449, "step": 14358 }, { "epoch": 0.38533168741949336, "grad_norm": 0.30078125, "learning_rate": 0.0011688944185299821, "loss": 1.5487, "step": 14359 }, { "epoch": 0.3853585229712323, "grad_norm": 0.296875, "learning_rate": 0.0011688854869901712, "loss": 1.5204, "step": 14360 }, { "epoch": 0.38538535852297123, "grad_norm": 0.302734375, "learning_rate": 0.0011688765542023929, "loss": 1.5363, "step": 14361 }, { "epoch": 0.38541219407471017, "grad_norm": 0.3125, "learning_rate": 0.0011688676201666668, "loss": 1.5164, "step": 14362 }, { "epoch": 0.3854390296264491, "grad_norm": 0.318359375, "learning_rate": 0.0011688586848830123, "loss": 1.6319, "step": 14363 }, { "epoch": 0.38546586517818804, "grad_norm": 0.298828125, "learning_rate": 0.0011688497483514496, "loss": 1.4942, "step": 14364 }, { "epoch": 0.38549270072992703, "grad_norm": 0.302734375, "learning_rate": 0.0011688408105719977, "loss": 1.5548, "step": 14365 }, { "epoch": 0.38551953628166596, "grad_norm": 0.314453125, "learning_rate": 0.0011688318715446764, "loss": 1.6515, "step": 14366 }, { "epoch": 0.3855463718334049, "grad_norm": 0.3046875, "learning_rate": 0.0011688229312695054, "loss": 1.5827, "step": 14367 }, { "epoch": 0.38557320738514383, "grad_norm": 0.298828125, "learning_rate": 0.0011688139897465043, "loss": 1.5039, "step": 14368 }, { "epoch": 0.38560004293688277, "grad_norm": 0.283203125, "learning_rate": 0.0011688050469756926, "loss": 1.4229, "step": 14369 }, { "epoch": 0.3856268784886217, "grad_norm": 0.4296875, "learning_rate": 0.0011687961029570898, "loss": 1.651, "step": 14370 }, { "epoch": 0.3856537140403607, "grad_norm": 0.306640625, "learning_rate": 0.0011687871576907159, "loss": 1.6225, "step": 14371 }, { "epoch": 0.38568054959209963, "grad_norm": 0.314453125, "learning_rate": 0.0011687782111765904, "loss": 1.6205, "step": 14372 }, { "epoch": 0.38570738514383857, "grad_norm": 0.322265625, "learning_rate": 0.0011687692634147326, "loss": 1.5786, "step": 14373 }, { "epoch": 0.3857342206955775, "grad_norm": 0.318359375, "learning_rate": 0.0011687603144051625, "loss": 1.6484, "step": 14374 }, { "epoch": 0.38576105624731644, "grad_norm": 0.291015625, "learning_rate": 0.0011687513641478995, "loss": 1.4687, "step": 14375 }, { "epoch": 0.38578789179905537, "grad_norm": 0.30078125, "learning_rate": 0.0011687424126429634, "loss": 1.4899, "step": 14376 }, { "epoch": 0.3858147273507943, "grad_norm": 0.314453125, "learning_rate": 0.001168733459890374, "loss": 1.565, "step": 14377 }, { "epoch": 0.3858415629025333, "grad_norm": 0.310546875, "learning_rate": 0.0011687245058901502, "loss": 1.5319, "step": 14378 }, { "epoch": 0.38586839845427223, "grad_norm": 0.310546875, "learning_rate": 0.0011687155506423124, "loss": 1.5685, "step": 14379 }, { "epoch": 0.38589523400601117, "grad_norm": 0.314453125, "learning_rate": 0.0011687065941468802, "loss": 1.531, "step": 14380 }, { "epoch": 0.3859220695577501, "grad_norm": 0.3125, "learning_rate": 0.001168697636403873, "loss": 1.5567, "step": 14381 }, { "epoch": 0.38594890510948904, "grad_norm": 0.3046875, "learning_rate": 0.00116868867741331, "loss": 1.493, "step": 14382 }, { "epoch": 0.385975740661228, "grad_norm": 0.330078125, "learning_rate": 0.001168679717175212, "loss": 1.6922, "step": 14383 }, { "epoch": 0.38600257621296696, "grad_norm": 0.30859375, "learning_rate": 0.0011686707556895978, "loss": 1.5943, "step": 14384 }, { "epoch": 0.3860294117647059, "grad_norm": 0.298828125, "learning_rate": 0.0011686617929564872, "loss": 1.5525, "step": 14385 }, { "epoch": 0.38605624731644483, "grad_norm": 0.3125, "learning_rate": 0.0011686528289759, "loss": 1.5998, "step": 14386 }, { "epoch": 0.38608308286818377, "grad_norm": 0.302734375, "learning_rate": 0.0011686438637478556, "loss": 1.6164, "step": 14387 }, { "epoch": 0.3861099184199227, "grad_norm": 0.310546875, "learning_rate": 0.001168634897272374, "loss": 1.6408, "step": 14388 }, { "epoch": 0.38613675397166164, "grad_norm": 0.291015625, "learning_rate": 0.001168625929549475, "loss": 1.4665, "step": 14389 }, { "epoch": 0.3861635895234006, "grad_norm": 0.29296875, "learning_rate": 0.0011686169605791775, "loss": 1.482, "step": 14390 }, { "epoch": 0.38619042507513957, "grad_norm": 0.3125, "learning_rate": 0.001168607990361502, "loss": 1.6344, "step": 14391 }, { "epoch": 0.3862172606268785, "grad_norm": 0.296875, "learning_rate": 0.0011685990188964677, "loss": 1.5326, "step": 14392 }, { "epoch": 0.38624409617861744, "grad_norm": 0.296875, "learning_rate": 0.0011685900461840945, "loss": 1.5254, "step": 14393 }, { "epoch": 0.38627093173035637, "grad_norm": 0.3046875, "learning_rate": 0.0011685810722244022, "loss": 1.5252, "step": 14394 }, { "epoch": 0.3862977672820953, "grad_norm": 0.30859375, "learning_rate": 0.00116857209701741, "loss": 1.5595, "step": 14395 }, { "epoch": 0.38632460283383424, "grad_norm": 0.310546875, "learning_rate": 0.001168563120563138, "loss": 1.593, "step": 14396 }, { "epoch": 0.38635143838557323, "grad_norm": 0.3125, "learning_rate": 0.0011685541428616058, "loss": 1.5566, "step": 14397 }, { "epoch": 0.38637827393731217, "grad_norm": 0.302734375, "learning_rate": 0.0011685451639128332, "loss": 1.4805, "step": 14398 }, { "epoch": 0.3864051094890511, "grad_norm": 0.294921875, "learning_rate": 0.0011685361837168397, "loss": 1.4373, "step": 14399 }, { "epoch": 0.38643194504079004, "grad_norm": 0.306640625, "learning_rate": 0.001168527202273645, "loss": 1.5318, "step": 14400 }, { "epoch": 0.38645878059252897, "grad_norm": 0.3203125, "learning_rate": 0.001168518219583269, "loss": 1.5908, "step": 14401 }, { "epoch": 0.3864856161442679, "grad_norm": 0.318359375, "learning_rate": 0.001168509235645731, "loss": 1.5941, "step": 14402 }, { "epoch": 0.38651245169600684, "grad_norm": 0.30859375, "learning_rate": 0.0011685002504610515, "loss": 1.5238, "step": 14403 }, { "epoch": 0.38653928724774583, "grad_norm": 0.310546875, "learning_rate": 0.0011684912640292492, "loss": 1.6822, "step": 14404 }, { "epoch": 0.38656612279948477, "grad_norm": 0.314453125, "learning_rate": 0.0011684822763503447, "loss": 1.5474, "step": 14405 }, { "epoch": 0.3865929583512237, "grad_norm": 0.310546875, "learning_rate": 0.001168473287424357, "loss": 1.5828, "step": 14406 }, { "epoch": 0.38661979390296264, "grad_norm": 0.306640625, "learning_rate": 0.0011684642972513066, "loss": 1.5082, "step": 14407 }, { "epoch": 0.3866466294547016, "grad_norm": 0.3046875, "learning_rate": 0.0011684553058312125, "loss": 1.5112, "step": 14408 }, { "epoch": 0.3866734650064405, "grad_norm": 0.306640625, "learning_rate": 0.0011684463131640946, "loss": 1.5691, "step": 14409 }, { "epoch": 0.3867003005581795, "grad_norm": 0.29296875, "learning_rate": 0.0011684373192499728, "loss": 1.4949, "step": 14410 }, { "epoch": 0.38672713610991843, "grad_norm": 0.30078125, "learning_rate": 0.0011684283240888669, "loss": 1.5374, "step": 14411 }, { "epoch": 0.38675397166165737, "grad_norm": 0.314453125, "learning_rate": 0.0011684193276807963, "loss": 1.619, "step": 14412 }, { "epoch": 0.3867808072133963, "grad_norm": 0.328125, "learning_rate": 0.0011684103300257812, "loss": 1.655, "step": 14413 }, { "epoch": 0.38680764276513524, "grad_norm": 0.298828125, "learning_rate": 0.0011684013311238407, "loss": 1.4204, "step": 14414 }, { "epoch": 0.3868344783168742, "grad_norm": 0.294921875, "learning_rate": 0.0011683923309749951, "loss": 1.4842, "step": 14415 }, { "epoch": 0.38686131386861317, "grad_norm": 0.3125, "learning_rate": 0.0011683833295792639, "loss": 1.5661, "step": 14416 }, { "epoch": 0.3868881494203521, "grad_norm": 0.31640625, "learning_rate": 0.001168374326936667, "loss": 1.5549, "step": 14417 }, { "epoch": 0.38691498497209104, "grad_norm": 0.318359375, "learning_rate": 0.001168365323047224, "loss": 1.5895, "step": 14418 }, { "epoch": 0.38694182052382997, "grad_norm": 0.302734375, "learning_rate": 0.0011683563179109547, "loss": 1.5136, "step": 14419 }, { "epoch": 0.3869686560755689, "grad_norm": 0.30859375, "learning_rate": 0.0011683473115278787, "loss": 1.538, "step": 14420 }, { "epoch": 0.38699549162730784, "grad_norm": 0.310546875, "learning_rate": 0.0011683383038980162, "loss": 1.5373, "step": 14421 }, { "epoch": 0.3870223271790468, "grad_norm": 0.291015625, "learning_rate": 0.0011683292950213863, "loss": 1.4527, "step": 14422 }, { "epoch": 0.38704916273078577, "grad_norm": 0.31640625, "learning_rate": 0.0011683202848980093, "loss": 1.5714, "step": 14423 }, { "epoch": 0.3870759982825247, "grad_norm": 0.337890625, "learning_rate": 0.001168311273527905, "loss": 1.7219, "step": 14424 }, { "epoch": 0.38710283383426364, "grad_norm": 0.330078125, "learning_rate": 0.0011683022609110928, "loss": 1.6138, "step": 14425 }, { "epoch": 0.3871296693860026, "grad_norm": 0.30859375, "learning_rate": 0.0011682932470475926, "loss": 1.5379, "step": 14426 }, { "epoch": 0.3871565049377415, "grad_norm": 0.302734375, "learning_rate": 0.0011682842319374242, "loss": 1.5344, "step": 14427 }, { "epoch": 0.38718334048948044, "grad_norm": 0.291015625, "learning_rate": 0.0011682752155806075, "loss": 1.4077, "step": 14428 }, { "epoch": 0.38721017604121943, "grad_norm": 0.30859375, "learning_rate": 0.001168266197977162, "loss": 1.5327, "step": 14429 }, { "epoch": 0.38723701159295837, "grad_norm": 0.3125, "learning_rate": 0.001168257179127108, "loss": 1.5801, "step": 14430 }, { "epoch": 0.3872638471446973, "grad_norm": 0.3046875, "learning_rate": 0.0011682481590304646, "loss": 1.4819, "step": 14431 }, { "epoch": 0.38729068269643624, "grad_norm": 0.310546875, "learning_rate": 0.001168239137687252, "loss": 1.581, "step": 14432 }, { "epoch": 0.3873175182481752, "grad_norm": 0.314453125, "learning_rate": 0.0011682301150974898, "loss": 1.5589, "step": 14433 }, { "epoch": 0.3873443537999141, "grad_norm": 0.3203125, "learning_rate": 0.001168221091261198, "loss": 1.5997, "step": 14434 }, { "epoch": 0.38737118935165304, "grad_norm": 0.3125, "learning_rate": 0.0011682120661783964, "loss": 1.5206, "step": 14435 }, { "epoch": 0.38739802490339204, "grad_norm": 0.30078125, "learning_rate": 0.0011682030398491047, "loss": 1.4751, "step": 14436 }, { "epoch": 0.38742486045513097, "grad_norm": 0.314453125, "learning_rate": 0.0011681940122733425, "loss": 1.5579, "step": 14437 }, { "epoch": 0.3874516960068699, "grad_norm": 0.298828125, "learning_rate": 0.00116818498345113, "loss": 1.4431, "step": 14438 }, { "epoch": 0.38747853155860884, "grad_norm": 0.30078125, "learning_rate": 0.0011681759533824867, "loss": 1.3827, "step": 14439 }, { "epoch": 0.3875053671103478, "grad_norm": 0.29296875, "learning_rate": 0.0011681669220674326, "loss": 1.4087, "step": 14440 }, { "epoch": 0.3875322026620867, "grad_norm": 0.3046875, "learning_rate": 0.0011681578895059873, "loss": 1.5351, "step": 14441 }, { "epoch": 0.3875590382138257, "grad_norm": 0.30078125, "learning_rate": 0.0011681488556981709, "loss": 1.4892, "step": 14442 }, { "epoch": 0.38758587376556464, "grad_norm": 0.302734375, "learning_rate": 0.0011681398206440027, "loss": 1.5097, "step": 14443 }, { "epoch": 0.38761270931730357, "grad_norm": 0.3125, "learning_rate": 0.0011681307843435032, "loss": 1.58, "step": 14444 }, { "epoch": 0.3876395448690425, "grad_norm": 0.298828125, "learning_rate": 0.001168121746796692, "loss": 1.4727, "step": 14445 }, { "epoch": 0.38766638042078144, "grad_norm": 0.31640625, "learning_rate": 0.0011681127080035887, "loss": 1.5361, "step": 14446 }, { "epoch": 0.3876932159725204, "grad_norm": 0.302734375, "learning_rate": 0.0011681036679642132, "loss": 1.5302, "step": 14447 }, { "epoch": 0.3877200515242593, "grad_norm": 0.314453125, "learning_rate": 0.0011680946266785855, "loss": 1.5584, "step": 14448 }, { "epoch": 0.3877468870759983, "grad_norm": 0.298828125, "learning_rate": 0.0011680855841467253, "loss": 1.4507, "step": 14449 }, { "epoch": 0.38777372262773724, "grad_norm": 0.314453125, "learning_rate": 0.0011680765403686525, "loss": 1.5803, "step": 14450 }, { "epoch": 0.3878005581794762, "grad_norm": 0.3046875, "learning_rate": 0.0011680674953443867, "loss": 1.5336, "step": 14451 }, { "epoch": 0.3878273937312151, "grad_norm": 0.3203125, "learning_rate": 0.001168058449073948, "loss": 1.572, "step": 14452 }, { "epoch": 0.38785422928295404, "grad_norm": 0.30078125, "learning_rate": 0.0011680494015573564, "loss": 1.4892, "step": 14453 }, { "epoch": 0.387881064834693, "grad_norm": 0.30859375, "learning_rate": 0.0011680403527946314, "loss": 1.4843, "step": 14454 }, { "epoch": 0.38790790038643197, "grad_norm": 0.306640625, "learning_rate": 0.001168031302785793, "loss": 1.4682, "step": 14455 }, { "epoch": 0.3879347359381709, "grad_norm": 0.32421875, "learning_rate": 0.001168022251530861, "loss": 1.5666, "step": 14456 }, { "epoch": 0.38796157148990984, "grad_norm": 0.328125, "learning_rate": 0.0011680131990298552, "loss": 1.6843, "step": 14457 }, { "epoch": 0.3879884070416488, "grad_norm": 0.3046875, "learning_rate": 0.0011680041452827956, "loss": 1.421, "step": 14458 }, { "epoch": 0.3880152425933877, "grad_norm": 0.302734375, "learning_rate": 0.0011679950902897022, "loss": 1.4735, "step": 14459 }, { "epoch": 0.38804207814512665, "grad_norm": 0.30859375, "learning_rate": 0.0011679860340505946, "loss": 1.5327, "step": 14460 }, { "epoch": 0.3880689136968656, "grad_norm": 0.298828125, "learning_rate": 0.0011679769765654926, "loss": 1.4451, "step": 14461 }, { "epoch": 0.38809574924860457, "grad_norm": 0.310546875, "learning_rate": 0.0011679679178344163, "loss": 1.4887, "step": 14462 }, { "epoch": 0.3881225848003435, "grad_norm": 0.306640625, "learning_rate": 0.0011679588578573856, "loss": 1.5853, "step": 14463 }, { "epoch": 0.38814942035208244, "grad_norm": 0.3046875, "learning_rate": 0.0011679497966344203, "loss": 1.5426, "step": 14464 }, { "epoch": 0.3881762559038214, "grad_norm": 0.3046875, "learning_rate": 0.0011679407341655401, "loss": 1.4612, "step": 14465 }, { "epoch": 0.3882030914555603, "grad_norm": 0.318359375, "learning_rate": 0.0011679316704507649, "loss": 1.6097, "step": 14466 }, { "epoch": 0.38822992700729925, "grad_norm": 0.296875, "learning_rate": 0.001167922605490115, "loss": 1.4182, "step": 14467 }, { "epoch": 0.38825676255903824, "grad_norm": 0.32421875, "learning_rate": 0.0011679135392836098, "loss": 1.6263, "step": 14468 }, { "epoch": 0.3882835981107772, "grad_norm": 0.30078125, "learning_rate": 0.0011679044718312694, "loss": 1.4555, "step": 14469 }, { "epoch": 0.3883104336625161, "grad_norm": 0.2890625, "learning_rate": 0.0011678954031331137, "loss": 1.3536, "step": 14470 }, { "epoch": 0.38833726921425504, "grad_norm": 0.310546875, "learning_rate": 0.0011678863331891624, "loss": 1.581, "step": 14471 }, { "epoch": 0.388364104765994, "grad_norm": 0.30078125, "learning_rate": 0.001167877261999436, "loss": 1.4706, "step": 14472 }, { "epoch": 0.3883909403177329, "grad_norm": 0.31640625, "learning_rate": 0.0011678681895639534, "loss": 1.6168, "step": 14473 }, { "epoch": 0.3884177758694719, "grad_norm": 0.322265625, "learning_rate": 0.0011678591158827353, "loss": 1.6269, "step": 14474 }, { "epoch": 0.38844461142121084, "grad_norm": 0.30859375, "learning_rate": 0.0011678500409558015, "loss": 1.5363, "step": 14475 }, { "epoch": 0.3884714469729498, "grad_norm": 0.3125, "learning_rate": 0.0011678409647831714, "loss": 1.5529, "step": 14476 }, { "epoch": 0.3884982825246887, "grad_norm": 0.30859375, "learning_rate": 0.0011678318873648656, "loss": 1.5396, "step": 14477 }, { "epoch": 0.38852511807642764, "grad_norm": 0.32421875, "learning_rate": 0.0011678228087009038, "loss": 1.6793, "step": 14478 }, { "epoch": 0.3885519536281666, "grad_norm": 0.30859375, "learning_rate": 0.0011678137287913057, "loss": 1.5062, "step": 14479 }, { "epoch": 0.3885787891799055, "grad_norm": 0.3125, "learning_rate": 0.0011678046476360912, "loss": 1.5099, "step": 14480 }, { "epoch": 0.3886056247316445, "grad_norm": 0.310546875, "learning_rate": 0.0011677955652352804, "loss": 1.4963, "step": 14481 }, { "epoch": 0.38863246028338344, "grad_norm": 0.314453125, "learning_rate": 0.0011677864815888932, "loss": 1.5463, "step": 14482 }, { "epoch": 0.3886592958351224, "grad_norm": 0.3125, "learning_rate": 0.0011677773966969494, "loss": 1.4645, "step": 14483 }, { "epoch": 0.3886861313868613, "grad_norm": 0.3203125, "learning_rate": 0.0011677683105594694, "loss": 1.5426, "step": 14484 }, { "epoch": 0.38871296693860025, "grad_norm": 0.326171875, "learning_rate": 0.0011677592231764723, "loss": 1.5209, "step": 14485 }, { "epoch": 0.3887398024903392, "grad_norm": 0.306640625, "learning_rate": 0.0011677501345479788, "loss": 1.4806, "step": 14486 }, { "epoch": 0.38876663804207817, "grad_norm": 0.310546875, "learning_rate": 0.0011677410446740086, "loss": 1.4114, "step": 14487 }, { "epoch": 0.3887934735938171, "grad_norm": 0.3203125, "learning_rate": 0.0011677319535545814, "loss": 1.5842, "step": 14488 }, { "epoch": 0.38882030914555604, "grad_norm": 0.283203125, "learning_rate": 0.0011677228611897174, "loss": 1.2595, "step": 14489 }, { "epoch": 0.388847144697295, "grad_norm": 0.337890625, "learning_rate": 0.0011677137675794363, "loss": 1.6443, "step": 14490 }, { "epoch": 0.3888739802490339, "grad_norm": 0.31640625, "learning_rate": 0.0011677046727237585, "loss": 1.5511, "step": 14491 }, { "epoch": 0.38890081580077285, "grad_norm": 0.326171875, "learning_rate": 0.0011676955766227034, "loss": 1.6517, "step": 14492 }, { "epoch": 0.3889276513525118, "grad_norm": 0.31640625, "learning_rate": 0.0011676864792762914, "loss": 1.4891, "step": 14493 }, { "epoch": 0.3889544869042508, "grad_norm": 0.296875, "learning_rate": 0.001167677380684542, "loss": 1.3848, "step": 14494 }, { "epoch": 0.3889813224559897, "grad_norm": 0.318359375, "learning_rate": 0.0011676682808474759, "loss": 1.4948, "step": 14495 }, { "epoch": 0.38900815800772864, "grad_norm": 0.322265625, "learning_rate": 0.0011676591797651124, "loss": 1.5566, "step": 14496 }, { "epoch": 0.3890349935594676, "grad_norm": 0.318359375, "learning_rate": 0.0011676500774374715, "loss": 1.5475, "step": 14497 }, { "epoch": 0.3890618291112065, "grad_norm": 0.314453125, "learning_rate": 0.0011676409738645736, "loss": 1.5559, "step": 14498 }, { "epoch": 0.38908866466294545, "grad_norm": 0.3125, "learning_rate": 0.0011676318690464382, "loss": 1.5151, "step": 14499 }, { "epoch": 0.38911550021468444, "grad_norm": 0.283203125, "learning_rate": 0.0011676227629830857, "loss": 1.3261, "step": 14500 }, { "epoch": 0.3891423357664234, "grad_norm": 0.306640625, "learning_rate": 0.0011676136556745358, "loss": 1.4821, "step": 14501 }, { "epoch": 0.3891691713181623, "grad_norm": 0.3046875, "learning_rate": 0.0011676045471208081, "loss": 1.5296, "step": 14502 }, { "epoch": 0.38919600686990125, "grad_norm": 0.318359375, "learning_rate": 0.0011675954373219235, "loss": 1.5626, "step": 14503 }, { "epoch": 0.3892228424216402, "grad_norm": 0.314453125, "learning_rate": 0.0011675863262779014, "loss": 1.5351, "step": 14504 }, { "epoch": 0.3892496779733791, "grad_norm": 0.322265625, "learning_rate": 0.001167577213988762, "loss": 1.581, "step": 14505 }, { "epoch": 0.38927651352511805, "grad_norm": 0.30078125, "learning_rate": 0.0011675681004545248, "loss": 1.3199, "step": 14506 }, { "epoch": 0.38930334907685704, "grad_norm": 0.302734375, "learning_rate": 0.0011675589856752105, "loss": 1.3978, "step": 14507 }, { "epoch": 0.389330184628596, "grad_norm": 0.302734375, "learning_rate": 0.0011675498696508387, "loss": 1.4392, "step": 14508 }, { "epoch": 0.3893570201803349, "grad_norm": 0.31640625, "learning_rate": 0.0011675407523814294, "loss": 1.4623, "step": 14509 }, { "epoch": 0.38938385573207385, "grad_norm": 0.31640625, "learning_rate": 0.0011675316338670026, "loss": 1.5588, "step": 14510 }, { "epoch": 0.3894106912838128, "grad_norm": 0.32421875, "learning_rate": 0.0011675225141075784, "loss": 1.5674, "step": 14511 }, { "epoch": 0.3894375268355517, "grad_norm": 0.31640625, "learning_rate": 0.0011675133931031768, "loss": 1.5531, "step": 14512 }, { "epoch": 0.3894643623872907, "grad_norm": 0.330078125, "learning_rate": 0.0011675042708538179, "loss": 1.64, "step": 14513 }, { "epoch": 0.38949119793902964, "grad_norm": 0.314453125, "learning_rate": 0.0011674951473595215, "loss": 1.5579, "step": 14514 }, { "epoch": 0.3895180334907686, "grad_norm": 0.322265625, "learning_rate": 0.0011674860226203075, "loss": 1.5593, "step": 14515 }, { "epoch": 0.3895448690425075, "grad_norm": 0.3203125, "learning_rate": 0.0011674768966361965, "loss": 1.5987, "step": 14516 }, { "epoch": 0.38957170459424645, "grad_norm": 0.306640625, "learning_rate": 0.0011674677694072078, "loss": 1.4752, "step": 14517 }, { "epoch": 0.3895985401459854, "grad_norm": 0.306640625, "learning_rate": 0.001167458640933362, "loss": 1.4873, "step": 14518 }, { "epoch": 0.3896253756977243, "grad_norm": 0.3203125, "learning_rate": 0.0011674495112146788, "loss": 1.5283, "step": 14519 }, { "epoch": 0.3896522112494633, "grad_norm": 0.314453125, "learning_rate": 0.0011674403802511783, "loss": 1.5014, "step": 14520 }, { "epoch": 0.38967904680120224, "grad_norm": 0.32421875, "learning_rate": 0.0011674312480428807, "loss": 1.5466, "step": 14521 }, { "epoch": 0.3897058823529412, "grad_norm": 0.306640625, "learning_rate": 0.0011674221145898057, "loss": 1.4222, "step": 14522 }, { "epoch": 0.3897327179046801, "grad_norm": 0.310546875, "learning_rate": 0.0011674129798919736, "loss": 1.4563, "step": 14523 }, { "epoch": 0.38975955345641905, "grad_norm": 0.328125, "learning_rate": 0.0011674038439494043, "loss": 1.5971, "step": 14524 }, { "epoch": 0.389786389008158, "grad_norm": 0.3203125, "learning_rate": 0.001167394706762118, "loss": 1.571, "step": 14525 }, { "epoch": 0.389813224559897, "grad_norm": 0.31640625, "learning_rate": 0.0011673855683301346, "loss": 1.5075, "step": 14526 }, { "epoch": 0.3898400601116359, "grad_norm": 0.306640625, "learning_rate": 0.0011673764286534742, "loss": 1.5209, "step": 14527 }, { "epoch": 0.38986689566337485, "grad_norm": 0.3125, "learning_rate": 0.0011673672877321568, "loss": 1.5506, "step": 14528 }, { "epoch": 0.3898937312151138, "grad_norm": 0.32421875, "learning_rate": 0.0011673581455662026, "loss": 1.6191, "step": 14529 }, { "epoch": 0.3899205667668527, "grad_norm": 0.310546875, "learning_rate": 0.0011673490021556314, "loss": 1.4613, "step": 14530 }, { "epoch": 0.38994740231859165, "grad_norm": 0.318359375, "learning_rate": 0.0011673398575004635, "loss": 1.5392, "step": 14531 }, { "epoch": 0.3899742378703306, "grad_norm": 0.291015625, "learning_rate": 0.0011673307116007187, "loss": 1.3078, "step": 14532 }, { "epoch": 0.3900010734220696, "grad_norm": 0.3203125, "learning_rate": 0.0011673215644564176, "loss": 1.5142, "step": 14533 }, { "epoch": 0.3900279089738085, "grad_norm": 0.30078125, "learning_rate": 0.0011673124160675795, "loss": 1.4324, "step": 14534 }, { "epoch": 0.39005474452554745, "grad_norm": 0.298828125, "learning_rate": 0.001167303266434225, "loss": 1.321, "step": 14535 }, { "epoch": 0.3900815800772864, "grad_norm": 0.33203125, "learning_rate": 0.001167294115556374, "loss": 1.5892, "step": 14536 }, { "epoch": 0.3901084156290253, "grad_norm": 0.330078125, "learning_rate": 0.0011672849634340467, "loss": 1.5917, "step": 14537 }, { "epoch": 0.39013525118076425, "grad_norm": 0.314453125, "learning_rate": 0.001167275810067263, "loss": 1.4766, "step": 14538 }, { "epoch": 0.39016208673250324, "grad_norm": 0.33203125, "learning_rate": 0.0011672666554560433, "loss": 1.5956, "step": 14539 }, { "epoch": 0.3901889222842422, "grad_norm": 0.310546875, "learning_rate": 0.0011672574996004072, "loss": 1.4476, "step": 14540 }, { "epoch": 0.3902157578359811, "grad_norm": 0.318359375, "learning_rate": 0.0011672483425003751, "loss": 1.5247, "step": 14541 }, { "epoch": 0.39024259338772005, "grad_norm": 0.326171875, "learning_rate": 0.001167239184155967, "loss": 1.5194, "step": 14542 }, { "epoch": 0.390269428939459, "grad_norm": 0.326171875, "learning_rate": 0.001167230024567203, "loss": 1.568, "step": 14543 }, { "epoch": 0.3902962644911979, "grad_norm": 0.30859375, "learning_rate": 0.0011672208637341034, "loss": 1.4515, "step": 14544 }, { "epoch": 0.3903231000429369, "grad_norm": 0.30078125, "learning_rate": 0.0011672117016566879, "loss": 1.3872, "step": 14545 }, { "epoch": 0.39034993559467585, "grad_norm": 0.302734375, "learning_rate": 0.0011672025383349769, "loss": 1.4285, "step": 14546 }, { "epoch": 0.3903767711464148, "grad_norm": 0.318359375, "learning_rate": 0.0011671933737689903, "loss": 1.4723, "step": 14547 }, { "epoch": 0.3904036066981537, "grad_norm": 0.357421875, "learning_rate": 0.0011671842079587482, "loss": 1.6404, "step": 14548 }, { "epoch": 0.39043044224989265, "grad_norm": 0.33203125, "learning_rate": 0.0011671750409042712, "loss": 1.5624, "step": 14549 }, { "epoch": 0.3904572778016316, "grad_norm": 0.3359375, "learning_rate": 0.001167165872605579, "loss": 1.6267, "step": 14550 }, { "epoch": 0.3904841133533705, "grad_norm": 0.318359375, "learning_rate": 0.0011671567030626913, "loss": 1.4876, "step": 14551 }, { "epoch": 0.3905109489051095, "grad_norm": 0.318359375, "learning_rate": 0.001167147532275629, "loss": 1.4578, "step": 14552 }, { "epoch": 0.39053778445684845, "grad_norm": 0.302734375, "learning_rate": 0.0011671383602444118, "loss": 1.4108, "step": 14553 }, { "epoch": 0.3905646200085874, "grad_norm": 0.3203125, "learning_rate": 0.0011671291869690598, "loss": 1.5295, "step": 14554 }, { "epoch": 0.3905914555603263, "grad_norm": 0.32421875, "learning_rate": 0.0011671200124495932, "loss": 1.5863, "step": 14555 }, { "epoch": 0.39061829111206525, "grad_norm": 0.322265625, "learning_rate": 0.0011671108366860324, "loss": 1.5525, "step": 14556 }, { "epoch": 0.3906451266638042, "grad_norm": 0.294921875, "learning_rate": 0.001167101659678397, "loss": 1.4117, "step": 14557 }, { "epoch": 0.3906719622155432, "grad_norm": 0.37109375, "learning_rate": 0.0011670924814267075, "loss": 1.6998, "step": 14558 }, { "epoch": 0.3906987977672821, "grad_norm": 0.390625, "learning_rate": 0.0011670833019309839, "loss": 1.8364, "step": 14559 }, { "epoch": 0.39072563331902105, "grad_norm": 0.361328125, "learning_rate": 0.0011670741211912464, "loss": 1.8332, "step": 14560 }, { "epoch": 0.39075246887076, "grad_norm": 0.3359375, "learning_rate": 0.0011670649392075151, "loss": 1.779, "step": 14561 }, { "epoch": 0.3907793044224989, "grad_norm": 0.3359375, "learning_rate": 0.0011670557559798102, "loss": 1.7776, "step": 14562 }, { "epoch": 0.39080613997423785, "grad_norm": 0.341796875, "learning_rate": 0.001167046571508152, "loss": 1.744, "step": 14563 }, { "epoch": 0.3908329755259768, "grad_norm": 0.35546875, "learning_rate": 0.0011670373857925601, "loss": 1.7193, "step": 14564 }, { "epoch": 0.3908598110777158, "grad_norm": 0.3125, "learning_rate": 0.0011670281988330551, "loss": 1.7266, "step": 14565 }, { "epoch": 0.3908866466294547, "grad_norm": 0.326171875, "learning_rate": 0.0011670190106296571, "loss": 1.8294, "step": 14566 }, { "epoch": 0.39091348218119365, "grad_norm": 0.322265625, "learning_rate": 0.0011670098211823863, "loss": 1.7258, "step": 14567 }, { "epoch": 0.3909403177329326, "grad_norm": 0.318359375, "learning_rate": 0.0011670006304912626, "loss": 1.8224, "step": 14568 }, { "epoch": 0.3909671532846715, "grad_norm": 0.318359375, "learning_rate": 0.0011669914385563063, "loss": 1.794, "step": 14569 }, { "epoch": 0.39099398883641046, "grad_norm": 0.30078125, "learning_rate": 0.0011669822453775377, "loss": 1.6439, "step": 14570 }, { "epoch": 0.39102082438814945, "grad_norm": 0.31640625, "learning_rate": 0.001166973050954977, "loss": 1.7594, "step": 14571 }, { "epoch": 0.3910476599398884, "grad_norm": 0.3125, "learning_rate": 0.0011669638552886442, "loss": 1.7496, "step": 14572 }, { "epoch": 0.3910744954916273, "grad_norm": 0.31640625, "learning_rate": 0.0011669546583785591, "loss": 1.7817, "step": 14573 }, { "epoch": 0.39110133104336625, "grad_norm": 0.29296875, "learning_rate": 0.0011669454602247426, "loss": 1.6125, "step": 14574 }, { "epoch": 0.3911281665951052, "grad_norm": 0.296875, "learning_rate": 0.0011669362608272146, "loss": 1.5595, "step": 14575 }, { "epoch": 0.3911550021468441, "grad_norm": 0.32421875, "learning_rate": 0.0011669270601859952, "loss": 1.8396, "step": 14576 }, { "epoch": 0.39118183769858306, "grad_norm": 0.3125, "learning_rate": 0.0011669178583011044, "loss": 1.6691, "step": 14577 }, { "epoch": 0.39120867325032205, "grad_norm": 0.31640625, "learning_rate": 0.001166908655172563, "loss": 1.8674, "step": 14578 }, { "epoch": 0.391235508802061, "grad_norm": 0.3125, "learning_rate": 0.0011668994508003906, "loss": 1.7604, "step": 14579 }, { "epoch": 0.3912623443537999, "grad_norm": 0.318359375, "learning_rate": 0.0011668902451846077, "loss": 1.7981, "step": 14580 }, { "epoch": 0.39128917990553885, "grad_norm": 0.322265625, "learning_rate": 0.001166881038325234, "loss": 1.7985, "step": 14581 }, { "epoch": 0.3913160154572778, "grad_norm": 0.298828125, "learning_rate": 0.0011668718302222905, "loss": 1.6539, "step": 14582 }, { "epoch": 0.3913428510090167, "grad_norm": 0.310546875, "learning_rate": 0.0011668626208757968, "loss": 1.7526, "step": 14583 }, { "epoch": 0.3913696865607557, "grad_norm": 0.314453125, "learning_rate": 0.0011668534102857735, "loss": 1.7698, "step": 14584 }, { "epoch": 0.39139652211249465, "grad_norm": 0.322265625, "learning_rate": 0.0011668441984522403, "loss": 1.6996, "step": 14585 }, { "epoch": 0.3914233576642336, "grad_norm": 0.31640625, "learning_rate": 0.0011668349853752177, "loss": 1.7419, "step": 14586 }, { "epoch": 0.3914501932159725, "grad_norm": 0.310546875, "learning_rate": 0.001166825771054726, "loss": 1.6295, "step": 14587 }, { "epoch": 0.39147702876771145, "grad_norm": 0.3203125, "learning_rate": 0.0011668165554907855, "loss": 1.7877, "step": 14588 }, { "epoch": 0.3915038643194504, "grad_norm": 0.3046875, "learning_rate": 0.001166807338683416, "loss": 1.6193, "step": 14589 }, { "epoch": 0.3915306998711893, "grad_norm": 0.302734375, "learning_rate": 0.0011667981206326381, "loss": 1.6511, "step": 14590 }, { "epoch": 0.3915575354229283, "grad_norm": 0.3046875, "learning_rate": 0.0011667889013384717, "loss": 1.6961, "step": 14591 }, { "epoch": 0.39158437097466725, "grad_norm": 0.31640625, "learning_rate": 0.0011667796808009373, "loss": 1.7141, "step": 14592 }, { "epoch": 0.3916112065264062, "grad_norm": 0.314453125, "learning_rate": 0.001166770459020055, "loss": 1.6458, "step": 14593 }, { "epoch": 0.3916380420781451, "grad_norm": 0.3046875, "learning_rate": 0.0011667612359958451, "loss": 1.6969, "step": 14594 }, { "epoch": 0.39166487762988406, "grad_norm": 0.302734375, "learning_rate": 0.0011667520117283278, "loss": 1.6209, "step": 14595 }, { "epoch": 0.391691713181623, "grad_norm": 0.298828125, "learning_rate": 0.0011667427862175233, "loss": 1.6258, "step": 14596 }, { "epoch": 0.391718548733362, "grad_norm": 0.314453125, "learning_rate": 0.0011667335594634518, "loss": 1.6838, "step": 14597 }, { "epoch": 0.3917453842851009, "grad_norm": 0.314453125, "learning_rate": 0.001166724331466134, "loss": 1.7157, "step": 14598 }, { "epoch": 0.39177221983683985, "grad_norm": 0.32421875, "learning_rate": 0.0011667151022255893, "loss": 1.7357, "step": 14599 }, { "epoch": 0.3917990553885788, "grad_norm": 0.306640625, "learning_rate": 0.0011667058717418385, "loss": 1.5779, "step": 14600 }, { "epoch": 0.3918258909403177, "grad_norm": 0.3203125, "learning_rate": 0.0011666966400149018, "loss": 1.7296, "step": 14601 }, { "epoch": 0.39185272649205666, "grad_norm": 0.3125, "learning_rate": 0.0011666874070447994, "loss": 1.6758, "step": 14602 }, { "epoch": 0.3918795620437956, "grad_norm": 0.326171875, "learning_rate": 0.0011666781728315514, "loss": 1.8421, "step": 14603 }, { "epoch": 0.3919063975955346, "grad_norm": 0.326171875, "learning_rate": 0.0011666689373751784, "loss": 1.8349, "step": 14604 }, { "epoch": 0.3919332331472735, "grad_norm": 0.322265625, "learning_rate": 0.0011666597006757004, "loss": 1.7829, "step": 14605 }, { "epoch": 0.39196006869901245, "grad_norm": 0.322265625, "learning_rate": 0.0011666504627331378, "loss": 1.703, "step": 14606 }, { "epoch": 0.3919869042507514, "grad_norm": 0.3046875, "learning_rate": 0.0011666412235475105, "loss": 1.5606, "step": 14607 }, { "epoch": 0.3920137398024903, "grad_norm": 0.318359375, "learning_rate": 0.0011666319831188394, "loss": 1.7907, "step": 14608 }, { "epoch": 0.39204057535422926, "grad_norm": 0.30078125, "learning_rate": 0.0011666227414471443, "loss": 1.6333, "step": 14609 }, { "epoch": 0.39206741090596825, "grad_norm": 0.310546875, "learning_rate": 0.0011666134985324457, "loss": 1.6579, "step": 14610 }, { "epoch": 0.3920942464577072, "grad_norm": 0.310546875, "learning_rate": 0.0011666042543747636, "loss": 1.6592, "step": 14611 }, { "epoch": 0.3921210820094461, "grad_norm": 0.302734375, "learning_rate": 0.0011665950089741186, "loss": 1.6065, "step": 14612 }, { "epoch": 0.39214791756118506, "grad_norm": 0.298828125, "learning_rate": 0.0011665857623305309, "loss": 1.6094, "step": 14613 }, { "epoch": 0.392174753112924, "grad_norm": 0.322265625, "learning_rate": 0.0011665765144440207, "loss": 1.6936, "step": 14614 }, { "epoch": 0.3922015886646629, "grad_norm": 0.3125, "learning_rate": 0.0011665672653146083, "loss": 1.7224, "step": 14615 }, { "epoch": 0.3922284242164019, "grad_norm": 0.32421875, "learning_rate": 0.001166558014942314, "loss": 1.7233, "step": 14616 }, { "epoch": 0.39225525976814085, "grad_norm": 0.291015625, "learning_rate": 0.0011665487633271579, "loss": 1.5145, "step": 14617 }, { "epoch": 0.3922820953198798, "grad_norm": 0.30859375, "learning_rate": 0.0011665395104691607, "loss": 1.6697, "step": 14618 }, { "epoch": 0.3923089308716187, "grad_norm": 0.3125, "learning_rate": 0.0011665302563683424, "loss": 1.7031, "step": 14619 }, { "epoch": 0.39233576642335766, "grad_norm": 0.3125, "learning_rate": 0.0011665210010247235, "loss": 1.6058, "step": 14620 }, { "epoch": 0.3923626019750966, "grad_norm": 0.314453125, "learning_rate": 0.0011665117444383242, "loss": 1.5916, "step": 14621 }, { "epoch": 0.3923894375268355, "grad_norm": 0.310546875, "learning_rate": 0.0011665024866091647, "loss": 1.6839, "step": 14622 }, { "epoch": 0.3924162730785745, "grad_norm": 0.314453125, "learning_rate": 0.0011664932275372655, "loss": 1.7065, "step": 14623 }, { "epoch": 0.39244310863031345, "grad_norm": 0.302734375, "learning_rate": 0.0011664839672226469, "loss": 1.6096, "step": 14624 }, { "epoch": 0.3924699441820524, "grad_norm": 0.298828125, "learning_rate": 0.001166474705665329, "loss": 1.5387, "step": 14625 }, { "epoch": 0.3924967797337913, "grad_norm": 0.30859375, "learning_rate": 0.0011664654428653323, "loss": 1.6866, "step": 14626 }, { "epoch": 0.39252361528553026, "grad_norm": 0.3125, "learning_rate": 0.001166456178822677, "loss": 1.6871, "step": 14627 }, { "epoch": 0.3925504508372692, "grad_norm": 0.328125, "learning_rate": 0.0011664469135373836, "loss": 1.7474, "step": 14628 }, { "epoch": 0.3925772863890082, "grad_norm": 0.302734375, "learning_rate": 0.001166437647009472, "loss": 1.6031, "step": 14629 }, { "epoch": 0.3926041219407471, "grad_norm": 0.306640625, "learning_rate": 0.0011664283792389633, "loss": 1.6189, "step": 14630 }, { "epoch": 0.39263095749248605, "grad_norm": 0.296875, "learning_rate": 0.0011664191102258772, "loss": 1.5918, "step": 14631 }, { "epoch": 0.392657793044225, "grad_norm": 0.31640625, "learning_rate": 0.0011664098399702342, "loss": 1.6953, "step": 14632 }, { "epoch": 0.3926846285959639, "grad_norm": 0.3203125, "learning_rate": 0.0011664005684720547, "loss": 1.7354, "step": 14633 }, { "epoch": 0.39271146414770286, "grad_norm": 0.302734375, "learning_rate": 0.001166391295731359, "loss": 1.5159, "step": 14634 }, { "epoch": 0.3927382996994418, "grad_norm": 0.30078125, "learning_rate": 0.0011663820217481674, "loss": 1.592, "step": 14635 }, { "epoch": 0.3927651352511808, "grad_norm": 0.306640625, "learning_rate": 0.0011663727465225002, "loss": 1.5862, "step": 14636 }, { "epoch": 0.3927919708029197, "grad_norm": 0.30859375, "learning_rate": 0.001166363470054378, "loss": 1.6143, "step": 14637 }, { "epoch": 0.39281880635465866, "grad_norm": 0.3046875, "learning_rate": 0.0011663541923438209, "loss": 1.5849, "step": 14638 }, { "epoch": 0.3928456419063976, "grad_norm": 0.306640625, "learning_rate": 0.0011663449133908492, "loss": 1.6817, "step": 14639 }, { "epoch": 0.3928724774581365, "grad_norm": 0.314453125, "learning_rate": 0.0011663356331954836, "loss": 1.6771, "step": 14640 }, { "epoch": 0.39289931300987546, "grad_norm": 0.296875, "learning_rate": 0.001166326351757744, "loss": 1.5842, "step": 14641 }, { "epoch": 0.39292614856161445, "grad_norm": 0.29296875, "learning_rate": 0.0011663170690776512, "loss": 1.5117, "step": 14642 }, { "epoch": 0.3929529841133534, "grad_norm": 0.306640625, "learning_rate": 0.0011663077851552252, "loss": 1.594, "step": 14643 }, { "epoch": 0.3929798196650923, "grad_norm": 0.30859375, "learning_rate": 0.0011662984999904866, "loss": 1.6847, "step": 14644 }, { "epoch": 0.39300665521683126, "grad_norm": 0.302734375, "learning_rate": 0.0011662892135834559, "loss": 1.5636, "step": 14645 }, { "epoch": 0.3930334907685702, "grad_norm": 0.328125, "learning_rate": 0.001166279925934153, "loss": 1.61, "step": 14646 }, { "epoch": 0.39306032632030913, "grad_norm": 0.318359375, "learning_rate": 0.0011662706370425984, "loss": 1.7069, "step": 14647 }, { "epoch": 0.39308716187204806, "grad_norm": 0.3203125, "learning_rate": 0.0011662613469088131, "loss": 1.7257, "step": 14648 }, { "epoch": 0.39311399742378705, "grad_norm": 0.310546875, "learning_rate": 0.0011662520555328166, "loss": 1.6068, "step": 14649 }, { "epoch": 0.393140832975526, "grad_norm": 0.3046875, "learning_rate": 0.00116624276291463, "loss": 1.5711, "step": 14650 }, { "epoch": 0.3931676685272649, "grad_norm": 0.30859375, "learning_rate": 0.001166233469054273, "loss": 1.6948, "step": 14651 }, { "epoch": 0.39319450407900386, "grad_norm": 0.306640625, "learning_rate": 0.0011662241739517666, "loss": 1.7306, "step": 14652 }, { "epoch": 0.3932213396307428, "grad_norm": 0.306640625, "learning_rate": 0.001166214877607131, "loss": 1.6588, "step": 14653 }, { "epoch": 0.39324817518248173, "grad_norm": 0.31640625, "learning_rate": 0.0011662055800203863, "loss": 1.725, "step": 14654 }, { "epoch": 0.3932750107342207, "grad_norm": 0.3125, "learning_rate": 0.0011661962811915533, "loss": 1.6256, "step": 14655 }, { "epoch": 0.39330184628595966, "grad_norm": 0.298828125, "learning_rate": 0.0011661869811206522, "loss": 1.5286, "step": 14656 }, { "epoch": 0.3933286818376986, "grad_norm": 0.314453125, "learning_rate": 0.0011661776798077036, "loss": 1.6652, "step": 14657 }, { "epoch": 0.3933555173894375, "grad_norm": 0.314453125, "learning_rate": 0.0011661683772527277, "loss": 1.6801, "step": 14658 }, { "epoch": 0.39338235294117646, "grad_norm": 0.32421875, "learning_rate": 0.0011661590734557447, "loss": 1.6703, "step": 14659 }, { "epoch": 0.3934091884929154, "grad_norm": 0.32421875, "learning_rate": 0.0011661497684167755, "loss": 1.7326, "step": 14660 }, { "epoch": 0.39343602404465433, "grad_norm": 0.310546875, "learning_rate": 0.0011661404621358402, "loss": 1.6345, "step": 14661 }, { "epoch": 0.3934628595963933, "grad_norm": 0.3203125, "learning_rate": 0.0011661311546129593, "loss": 1.7211, "step": 14662 }, { "epoch": 0.39348969514813226, "grad_norm": 0.318359375, "learning_rate": 0.0011661218458481532, "loss": 1.6057, "step": 14663 }, { "epoch": 0.3935165306998712, "grad_norm": 0.32421875, "learning_rate": 0.0011661125358414424, "loss": 1.6887, "step": 14664 }, { "epoch": 0.3935433662516101, "grad_norm": 0.314453125, "learning_rate": 0.0011661032245928469, "loss": 1.6922, "step": 14665 }, { "epoch": 0.39357020180334906, "grad_norm": 0.294921875, "learning_rate": 0.001166093912102388, "loss": 1.6037, "step": 14666 }, { "epoch": 0.393597037355088, "grad_norm": 0.31640625, "learning_rate": 0.0011660845983700852, "loss": 1.692, "step": 14667 }, { "epoch": 0.393623872906827, "grad_norm": 0.318359375, "learning_rate": 0.0011660752833959594, "loss": 1.729, "step": 14668 }, { "epoch": 0.3936507084585659, "grad_norm": 0.306640625, "learning_rate": 0.001166065967180031, "loss": 1.6457, "step": 14669 }, { "epoch": 0.39367754401030486, "grad_norm": 0.310546875, "learning_rate": 0.0011660566497223204, "loss": 1.6236, "step": 14670 }, { "epoch": 0.3937043795620438, "grad_norm": 0.322265625, "learning_rate": 0.0011660473310228482, "loss": 1.8095, "step": 14671 }, { "epoch": 0.39373121511378273, "grad_norm": 0.328125, "learning_rate": 0.0011660380110816344, "loss": 1.6235, "step": 14672 }, { "epoch": 0.39375805066552166, "grad_norm": 0.310546875, "learning_rate": 0.0011660286898987002, "loss": 1.6092, "step": 14673 }, { "epoch": 0.39378488621726065, "grad_norm": 0.310546875, "learning_rate": 0.0011660193674740652, "loss": 1.5981, "step": 14674 }, { "epoch": 0.3938117217689996, "grad_norm": 0.330078125, "learning_rate": 0.0011660100438077503, "loss": 1.7119, "step": 14675 }, { "epoch": 0.3938385573207385, "grad_norm": 0.318359375, "learning_rate": 0.0011660007188997758, "loss": 1.7565, "step": 14676 }, { "epoch": 0.39386539287247746, "grad_norm": 0.31640625, "learning_rate": 0.0011659913927501626, "loss": 1.5821, "step": 14677 }, { "epoch": 0.3938922284242164, "grad_norm": 0.322265625, "learning_rate": 0.0011659820653589304, "loss": 1.6806, "step": 14678 }, { "epoch": 0.39391906397595533, "grad_norm": 0.3125, "learning_rate": 0.0011659727367261002, "loss": 1.6589, "step": 14679 }, { "epoch": 0.39394589952769427, "grad_norm": 0.30859375, "learning_rate": 0.0011659634068516924, "loss": 1.658, "step": 14680 }, { "epoch": 0.39397273507943326, "grad_norm": 0.314453125, "learning_rate": 0.0011659540757357272, "loss": 1.6426, "step": 14681 }, { "epoch": 0.3939995706311722, "grad_norm": 0.330078125, "learning_rate": 0.0011659447433782254, "loss": 1.7622, "step": 14682 }, { "epoch": 0.3940264061829111, "grad_norm": 0.318359375, "learning_rate": 0.0011659354097792074, "loss": 1.6805, "step": 14683 }, { "epoch": 0.39405324173465006, "grad_norm": 0.3046875, "learning_rate": 0.0011659260749386936, "loss": 1.516, "step": 14684 }, { "epoch": 0.394080077286389, "grad_norm": 0.318359375, "learning_rate": 0.0011659167388567045, "loss": 1.59, "step": 14685 }, { "epoch": 0.39410691283812793, "grad_norm": 0.30078125, "learning_rate": 0.0011659074015332605, "loss": 1.4939, "step": 14686 }, { "epoch": 0.3941337483898669, "grad_norm": 0.3125, "learning_rate": 0.001165898062968382, "loss": 1.606, "step": 14687 }, { "epoch": 0.39416058394160586, "grad_norm": 0.32421875, "learning_rate": 0.00116588872316209, "loss": 1.6159, "step": 14688 }, { "epoch": 0.3941874194933448, "grad_norm": 0.30859375, "learning_rate": 0.0011658793821144044, "loss": 1.6079, "step": 14689 }, { "epoch": 0.39421425504508373, "grad_norm": 0.31640625, "learning_rate": 0.001165870039825346, "loss": 1.541, "step": 14690 }, { "epoch": 0.39424109059682266, "grad_norm": 0.3203125, "learning_rate": 0.001165860696294935, "loss": 1.7197, "step": 14691 }, { "epoch": 0.3942679261485616, "grad_norm": 0.328125, "learning_rate": 0.0011658513515231924, "loss": 1.7973, "step": 14692 }, { "epoch": 0.39429476170030053, "grad_norm": 0.298828125, "learning_rate": 0.0011658420055101382, "loss": 1.5101, "step": 14693 }, { "epoch": 0.3943215972520395, "grad_norm": 0.31640625, "learning_rate": 0.0011658326582557933, "loss": 1.6457, "step": 14694 }, { "epoch": 0.39434843280377846, "grad_norm": 0.328125, "learning_rate": 0.001165823309760178, "loss": 1.7501, "step": 14695 }, { "epoch": 0.3943752683555174, "grad_norm": 0.30078125, "learning_rate": 0.0011658139600233127, "loss": 1.5753, "step": 14696 }, { "epoch": 0.39440210390725633, "grad_norm": 0.302734375, "learning_rate": 0.001165804609045218, "loss": 1.5596, "step": 14697 }, { "epoch": 0.39442893945899526, "grad_norm": 0.296875, "learning_rate": 0.0011657952568259148, "loss": 1.4699, "step": 14698 }, { "epoch": 0.3944557750107342, "grad_norm": 0.3046875, "learning_rate": 0.0011657859033654227, "loss": 1.4508, "step": 14699 }, { "epoch": 0.3944826105624732, "grad_norm": 0.31640625, "learning_rate": 0.0011657765486637632, "loss": 1.6033, "step": 14700 }, { "epoch": 0.3945094461142121, "grad_norm": 0.328125, "learning_rate": 0.0011657671927209564, "loss": 1.6651, "step": 14701 }, { "epoch": 0.39453628166595106, "grad_norm": 0.310546875, "learning_rate": 0.0011657578355370225, "loss": 1.5794, "step": 14702 }, { "epoch": 0.39456311721769, "grad_norm": 0.31640625, "learning_rate": 0.0011657484771119828, "loss": 1.6283, "step": 14703 }, { "epoch": 0.39458995276942893, "grad_norm": 0.310546875, "learning_rate": 0.0011657391174458571, "loss": 1.647, "step": 14704 }, { "epoch": 0.39461678832116787, "grad_norm": 0.318359375, "learning_rate": 0.0011657297565386662, "loss": 1.7427, "step": 14705 }, { "epoch": 0.3946436238729068, "grad_norm": 0.3125, "learning_rate": 0.0011657203943904308, "loss": 1.7056, "step": 14706 }, { "epoch": 0.3946704594246458, "grad_norm": 0.29296875, "learning_rate": 0.0011657110310011713, "loss": 1.4395, "step": 14707 }, { "epoch": 0.3946972949763847, "grad_norm": 0.30859375, "learning_rate": 0.0011657016663709082, "loss": 1.5969, "step": 14708 }, { "epoch": 0.39472413052812366, "grad_norm": 0.30078125, "learning_rate": 0.001165692300499662, "loss": 1.5848, "step": 14709 }, { "epoch": 0.3947509660798626, "grad_norm": 0.310546875, "learning_rate": 0.0011656829333874534, "loss": 1.5948, "step": 14710 }, { "epoch": 0.39477780163160153, "grad_norm": 0.322265625, "learning_rate": 0.0011656735650343029, "loss": 1.6742, "step": 14711 }, { "epoch": 0.39480463718334047, "grad_norm": 0.306640625, "learning_rate": 0.0011656641954402307, "loss": 1.5511, "step": 14712 }, { "epoch": 0.39483147273507946, "grad_norm": 0.314453125, "learning_rate": 0.001165654824605258, "loss": 1.6204, "step": 14713 }, { "epoch": 0.3948583082868184, "grad_norm": 0.326171875, "learning_rate": 0.001165645452529405, "loss": 1.7024, "step": 14714 }, { "epoch": 0.39488514383855733, "grad_norm": 0.29296875, "learning_rate": 0.0011656360792126922, "loss": 1.3574, "step": 14715 }, { "epoch": 0.39491197939029626, "grad_norm": 0.3046875, "learning_rate": 0.0011656267046551406, "loss": 1.4989, "step": 14716 }, { "epoch": 0.3949388149420352, "grad_norm": 0.31640625, "learning_rate": 0.00116561732885677, "loss": 1.5513, "step": 14717 }, { "epoch": 0.39496565049377413, "grad_norm": 0.328125, "learning_rate": 0.0011656079518176014, "loss": 1.7395, "step": 14718 }, { "epoch": 0.39499248604551307, "grad_norm": 0.30078125, "learning_rate": 0.0011655985735376554, "loss": 1.515, "step": 14719 }, { "epoch": 0.39501932159725206, "grad_norm": 0.31640625, "learning_rate": 0.0011655891940169528, "loss": 1.6441, "step": 14720 }, { "epoch": 0.395046157148991, "grad_norm": 0.322265625, "learning_rate": 0.0011655798132555136, "loss": 1.5614, "step": 14721 }, { "epoch": 0.39507299270072993, "grad_norm": 0.3125, "learning_rate": 0.0011655704312533589, "loss": 1.6026, "step": 14722 }, { "epoch": 0.39509982825246887, "grad_norm": 0.314453125, "learning_rate": 0.001165561048010509, "loss": 1.5442, "step": 14723 }, { "epoch": 0.3951266638042078, "grad_norm": 0.310546875, "learning_rate": 0.0011655516635269844, "loss": 1.5476, "step": 14724 }, { "epoch": 0.39515349935594674, "grad_norm": 0.3125, "learning_rate": 0.001165542277802806, "loss": 1.5995, "step": 14725 }, { "epoch": 0.3951803349076857, "grad_norm": 0.310546875, "learning_rate": 0.0011655328908379942, "loss": 1.5396, "step": 14726 }, { "epoch": 0.39520717045942466, "grad_norm": 0.318359375, "learning_rate": 0.0011655235026325696, "loss": 1.5764, "step": 14727 }, { "epoch": 0.3952340060111636, "grad_norm": 0.310546875, "learning_rate": 0.0011655141131865529, "loss": 1.5679, "step": 14728 }, { "epoch": 0.39526084156290253, "grad_norm": 0.330078125, "learning_rate": 0.0011655047224999645, "loss": 1.8113, "step": 14729 }, { "epoch": 0.39528767711464147, "grad_norm": 0.314453125, "learning_rate": 0.001165495330572825, "loss": 1.6016, "step": 14730 }, { "epoch": 0.3953145126663804, "grad_norm": 0.333984375, "learning_rate": 0.0011654859374051554, "loss": 1.7485, "step": 14731 }, { "epoch": 0.39534134821811934, "grad_norm": 0.318359375, "learning_rate": 0.0011654765429969759, "loss": 1.6485, "step": 14732 }, { "epoch": 0.39536818376985833, "grad_norm": 0.306640625, "learning_rate": 0.0011654671473483073, "loss": 1.4695, "step": 14733 }, { "epoch": 0.39539501932159726, "grad_norm": 0.322265625, "learning_rate": 0.00116545775045917, "loss": 1.6169, "step": 14734 }, { "epoch": 0.3954218548733362, "grad_norm": 0.3125, "learning_rate": 0.0011654483523295847, "loss": 1.5764, "step": 14735 }, { "epoch": 0.39544869042507513, "grad_norm": 0.306640625, "learning_rate": 0.0011654389529595723, "loss": 1.4714, "step": 14736 }, { "epoch": 0.39547552597681407, "grad_norm": 0.310546875, "learning_rate": 0.0011654295523491531, "loss": 1.5851, "step": 14737 }, { "epoch": 0.395502361528553, "grad_norm": 0.310546875, "learning_rate": 0.0011654201504983479, "loss": 1.5327, "step": 14738 }, { "epoch": 0.395529197080292, "grad_norm": 0.30859375, "learning_rate": 0.001165410747407177, "loss": 1.5459, "step": 14739 }, { "epoch": 0.39555603263203093, "grad_norm": 0.3046875, "learning_rate": 0.0011654013430756613, "loss": 1.5867, "step": 14740 }, { "epoch": 0.39558286818376986, "grad_norm": 0.322265625, "learning_rate": 0.0011653919375038215, "loss": 1.6948, "step": 14741 }, { "epoch": 0.3956097037355088, "grad_norm": 0.314453125, "learning_rate": 0.0011653825306916783, "loss": 1.5613, "step": 14742 }, { "epoch": 0.39563653928724773, "grad_norm": 0.3125, "learning_rate": 0.001165373122639252, "loss": 1.5283, "step": 14743 }, { "epoch": 0.39566337483898667, "grad_norm": 0.3125, "learning_rate": 0.0011653637133465632, "loss": 1.4991, "step": 14744 }, { "epoch": 0.39569021039072566, "grad_norm": 0.31640625, "learning_rate": 0.001165354302813633, "loss": 1.6797, "step": 14745 }, { "epoch": 0.3957170459424646, "grad_norm": 0.302734375, "learning_rate": 0.0011653448910404816, "loss": 1.5191, "step": 14746 }, { "epoch": 0.39574388149420353, "grad_norm": 0.318359375, "learning_rate": 0.00116533547802713, "loss": 1.6502, "step": 14747 }, { "epoch": 0.39577071704594247, "grad_norm": 0.3125, "learning_rate": 0.0011653260637735988, "loss": 1.4981, "step": 14748 }, { "epoch": 0.3957975525976814, "grad_norm": 0.3046875, "learning_rate": 0.0011653166482799083, "loss": 1.4786, "step": 14749 }, { "epoch": 0.39582438814942034, "grad_norm": 0.330078125, "learning_rate": 0.0011653072315460794, "loss": 1.6241, "step": 14750 }, { "epoch": 0.39585122370115927, "grad_norm": 0.31640625, "learning_rate": 0.0011652978135721329, "loss": 1.5478, "step": 14751 }, { "epoch": 0.39587805925289826, "grad_norm": 0.310546875, "learning_rate": 0.0011652883943580894, "loss": 1.5234, "step": 14752 }, { "epoch": 0.3959048948046372, "grad_norm": 0.322265625, "learning_rate": 0.0011652789739039692, "loss": 1.5818, "step": 14753 }, { "epoch": 0.39593173035637613, "grad_norm": 0.330078125, "learning_rate": 0.0011652695522097933, "loss": 1.562, "step": 14754 }, { "epoch": 0.39595856590811507, "grad_norm": 0.314453125, "learning_rate": 0.0011652601292755824, "loss": 1.5616, "step": 14755 }, { "epoch": 0.395985401459854, "grad_norm": 0.3203125, "learning_rate": 0.0011652507051013571, "loss": 1.6213, "step": 14756 }, { "epoch": 0.39601223701159294, "grad_norm": 0.306640625, "learning_rate": 0.0011652412796871378, "loss": 1.5223, "step": 14757 }, { "epoch": 0.39603907256333193, "grad_norm": 0.3203125, "learning_rate": 0.0011652318530329458, "loss": 1.658, "step": 14758 }, { "epoch": 0.39606590811507086, "grad_norm": 0.3203125, "learning_rate": 0.0011652224251388012, "loss": 1.6474, "step": 14759 }, { "epoch": 0.3960927436668098, "grad_norm": 0.32421875, "learning_rate": 0.001165212996004725, "loss": 1.6884, "step": 14760 }, { "epoch": 0.39611957921854873, "grad_norm": 0.314453125, "learning_rate": 0.0011652035656307376, "loss": 1.62, "step": 14761 }, { "epoch": 0.39614641477028767, "grad_norm": 0.32421875, "learning_rate": 0.00116519413401686, "loss": 1.559, "step": 14762 }, { "epoch": 0.3961732503220266, "grad_norm": 0.318359375, "learning_rate": 0.0011651847011631125, "loss": 1.5064, "step": 14763 }, { "epoch": 0.39620008587376554, "grad_norm": 0.306640625, "learning_rate": 0.0011651752670695163, "loss": 1.5419, "step": 14764 }, { "epoch": 0.39622692142550453, "grad_norm": 0.3203125, "learning_rate": 0.0011651658317360918, "loss": 1.6325, "step": 14765 }, { "epoch": 0.39625375697724347, "grad_norm": 0.31640625, "learning_rate": 0.0011651563951628598, "loss": 1.6033, "step": 14766 }, { "epoch": 0.3962805925289824, "grad_norm": 0.318359375, "learning_rate": 0.0011651469573498408, "loss": 1.6, "step": 14767 }, { "epoch": 0.39630742808072134, "grad_norm": 0.33203125, "learning_rate": 0.0011651375182970558, "loss": 1.6346, "step": 14768 }, { "epoch": 0.39633426363246027, "grad_norm": 0.322265625, "learning_rate": 0.0011651280780045252, "loss": 1.5022, "step": 14769 }, { "epoch": 0.3963610991841992, "grad_norm": 0.33203125, "learning_rate": 0.00116511863647227, "loss": 1.5812, "step": 14770 }, { "epoch": 0.3963879347359382, "grad_norm": 0.3203125, "learning_rate": 0.0011651091937003107, "loss": 1.6243, "step": 14771 }, { "epoch": 0.39641477028767713, "grad_norm": 0.314453125, "learning_rate": 0.001165099749688668, "loss": 1.4998, "step": 14772 }, { "epoch": 0.39644160583941607, "grad_norm": 0.32421875, "learning_rate": 0.0011650903044373628, "loss": 1.5751, "step": 14773 }, { "epoch": 0.396468441391155, "grad_norm": 0.328125, "learning_rate": 0.0011650808579464157, "loss": 1.5746, "step": 14774 }, { "epoch": 0.39649527694289394, "grad_norm": 0.330078125, "learning_rate": 0.0011650714102158476, "loss": 1.6445, "step": 14775 }, { "epoch": 0.3965221124946329, "grad_norm": 0.3125, "learning_rate": 0.0011650619612456788, "loss": 1.5147, "step": 14776 }, { "epoch": 0.3965489480463718, "grad_norm": 0.3125, "learning_rate": 0.0011650525110359306, "loss": 1.5574, "step": 14777 }, { "epoch": 0.3965757835981108, "grad_norm": 0.32421875, "learning_rate": 0.0011650430595866233, "loss": 1.5374, "step": 14778 }, { "epoch": 0.39660261914984973, "grad_norm": 0.326171875, "learning_rate": 0.0011650336068977777, "loss": 1.5907, "step": 14779 }, { "epoch": 0.39662945470158867, "grad_norm": 0.3125, "learning_rate": 0.0011650241529694144, "loss": 1.5553, "step": 14780 }, { "epoch": 0.3966562902533276, "grad_norm": 0.31640625, "learning_rate": 0.0011650146978015548, "loss": 1.6581, "step": 14781 }, { "epoch": 0.39668312580506654, "grad_norm": 0.314453125, "learning_rate": 0.0011650052413942188, "loss": 1.6381, "step": 14782 }, { "epoch": 0.3967099613568055, "grad_norm": 0.318359375, "learning_rate": 0.0011649957837474277, "loss": 1.5508, "step": 14783 }, { "epoch": 0.39673679690854446, "grad_norm": 0.318359375, "learning_rate": 0.0011649863248612022, "loss": 1.6285, "step": 14784 }, { "epoch": 0.3967636324602834, "grad_norm": 0.302734375, "learning_rate": 0.0011649768647355627, "loss": 1.4487, "step": 14785 }, { "epoch": 0.39679046801202233, "grad_norm": 0.310546875, "learning_rate": 0.0011649674033705302, "loss": 1.464, "step": 14786 }, { "epoch": 0.39681730356376127, "grad_norm": 0.328125, "learning_rate": 0.0011649579407661255, "loss": 1.5133, "step": 14787 }, { "epoch": 0.3968441391155002, "grad_norm": 0.3125, "learning_rate": 0.0011649484769223693, "loss": 1.4625, "step": 14788 }, { "epoch": 0.39687097466723914, "grad_norm": 0.306640625, "learning_rate": 0.0011649390118392824, "loss": 1.4252, "step": 14789 }, { "epoch": 0.3968978102189781, "grad_norm": 0.33984375, "learning_rate": 0.0011649295455168854, "loss": 1.6327, "step": 14790 }, { "epoch": 0.39692464577071707, "grad_norm": 0.291015625, "learning_rate": 0.0011649200779551992, "loss": 1.3494, "step": 14791 }, { "epoch": 0.396951481322456, "grad_norm": 0.33203125, "learning_rate": 0.0011649106091542445, "loss": 1.6984, "step": 14792 }, { "epoch": 0.39697831687419494, "grad_norm": 0.318359375, "learning_rate": 0.0011649011391140424, "loss": 1.6027, "step": 14793 }, { "epoch": 0.39700515242593387, "grad_norm": 0.31640625, "learning_rate": 0.0011648916678346133, "loss": 1.5368, "step": 14794 }, { "epoch": 0.3970319879776728, "grad_norm": 0.3125, "learning_rate": 0.001164882195315978, "loss": 1.5315, "step": 14795 }, { "epoch": 0.39705882352941174, "grad_norm": 0.322265625, "learning_rate": 0.0011648727215581571, "loss": 1.4225, "step": 14796 }, { "epoch": 0.39708565908115073, "grad_norm": 0.326171875, "learning_rate": 0.001164863246561172, "loss": 1.6507, "step": 14797 }, { "epoch": 0.39711249463288967, "grad_norm": 0.31640625, "learning_rate": 0.0011648537703250429, "loss": 1.5239, "step": 14798 }, { "epoch": 0.3971393301846286, "grad_norm": 0.3125, "learning_rate": 0.0011648442928497908, "loss": 1.5084, "step": 14799 }, { "epoch": 0.39716616573636754, "grad_norm": 0.328125, "learning_rate": 0.0011648348141354366, "loss": 1.5289, "step": 14800 }, { "epoch": 0.3971930012881065, "grad_norm": 0.333984375, "learning_rate": 0.001164825334182001, "loss": 1.6436, "step": 14801 }, { "epoch": 0.3972198368398454, "grad_norm": 0.32421875, "learning_rate": 0.0011648158529895048, "loss": 1.6494, "step": 14802 }, { "epoch": 0.39724667239158434, "grad_norm": 0.318359375, "learning_rate": 0.0011648063705579689, "loss": 1.5434, "step": 14803 }, { "epoch": 0.39727350794332333, "grad_norm": 0.326171875, "learning_rate": 0.0011647968868874138, "loss": 1.6251, "step": 14804 }, { "epoch": 0.39730034349506227, "grad_norm": 0.3046875, "learning_rate": 0.0011647874019778607, "loss": 1.4538, "step": 14805 }, { "epoch": 0.3973271790468012, "grad_norm": 0.318359375, "learning_rate": 0.00116477791582933, "loss": 1.6185, "step": 14806 }, { "epoch": 0.39735401459854014, "grad_norm": 0.30078125, "learning_rate": 0.0011647684284418428, "loss": 1.4553, "step": 14807 }, { "epoch": 0.3973808501502791, "grad_norm": 0.30078125, "learning_rate": 0.0011647589398154198, "loss": 1.5025, "step": 14808 }, { "epoch": 0.397407685702018, "grad_norm": 0.3203125, "learning_rate": 0.0011647494499500818, "loss": 1.4911, "step": 14809 }, { "epoch": 0.397434521253757, "grad_norm": 0.318359375, "learning_rate": 0.0011647399588458496, "loss": 1.5715, "step": 14810 }, { "epoch": 0.39746135680549594, "grad_norm": 0.322265625, "learning_rate": 0.0011647304665027443, "loss": 1.5919, "step": 14811 }, { "epoch": 0.39748819235723487, "grad_norm": 0.314453125, "learning_rate": 0.0011647209729207864, "loss": 1.4933, "step": 14812 }, { "epoch": 0.3975150279089738, "grad_norm": 0.32421875, "learning_rate": 0.001164711478099997, "loss": 1.5874, "step": 14813 }, { "epoch": 0.39754186346071274, "grad_norm": 0.328125, "learning_rate": 0.0011647019820403965, "loss": 1.5374, "step": 14814 }, { "epoch": 0.3975686990124517, "grad_norm": 0.314453125, "learning_rate": 0.0011646924847420062, "loss": 1.5305, "step": 14815 }, { "epoch": 0.39759553456419067, "grad_norm": 0.3203125, "learning_rate": 0.0011646829862048463, "loss": 1.5257, "step": 14816 }, { "epoch": 0.3976223701159296, "grad_norm": 0.30078125, "learning_rate": 0.0011646734864289385, "loss": 1.4265, "step": 14817 }, { "epoch": 0.39764920566766854, "grad_norm": 0.322265625, "learning_rate": 0.0011646639854143031, "loss": 1.594, "step": 14818 }, { "epoch": 0.3976760412194075, "grad_norm": 0.314453125, "learning_rate": 0.0011646544831609609, "loss": 1.4798, "step": 14819 }, { "epoch": 0.3977028767711464, "grad_norm": 0.322265625, "learning_rate": 0.001164644979668933, "loss": 1.4635, "step": 14820 }, { "epoch": 0.39772971232288534, "grad_norm": 0.326171875, "learning_rate": 0.00116463547493824, "loss": 1.5962, "step": 14821 }, { "epoch": 0.3977565478746243, "grad_norm": 0.330078125, "learning_rate": 0.001164625968968903, "loss": 1.5237, "step": 14822 }, { "epoch": 0.39778338342636327, "grad_norm": 0.294921875, "learning_rate": 0.001164616461760943, "loss": 1.3944, "step": 14823 }, { "epoch": 0.3978102189781022, "grad_norm": 0.3125, "learning_rate": 0.0011646069533143802, "loss": 1.5819, "step": 14824 }, { "epoch": 0.39783705452984114, "grad_norm": 0.33984375, "learning_rate": 0.0011645974436292358, "loss": 1.7029, "step": 14825 }, { "epoch": 0.3978638900815801, "grad_norm": 0.310546875, "learning_rate": 0.001164587932705531, "loss": 1.523, "step": 14826 }, { "epoch": 0.397890725633319, "grad_norm": 0.310546875, "learning_rate": 0.0011645784205432865, "loss": 1.54, "step": 14827 }, { "epoch": 0.39791756118505794, "grad_norm": 0.333984375, "learning_rate": 0.0011645689071425227, "loss": 1.616, "step": 14828 }, { "epoch": 0.39794439673679693, "grad_norm": 0.333984375, "learning_rate": 0.001164559392503261, "loss": 1.6245, "step": 14829 }, { "epoch": 0.39797123228853587, "grad_norm": 0.326171875, "learning_rate": 0.0011645498766255221, "loss": 1.6038, "step": 14830 }, { "epoch": 0.3979980678402748, "grad_norm": 0.3203125, "learning_rate": 0.0011645403595093268, "loss": 1.5456, "step": 14831 }, { "epoch": 0.39802490339201374, "grad_norm": 0.310546875, "learning_rate": 0.0011645308411546958, "loss": 1.4537, "step": 14832 }, { "epoch": 0.3980517389437527, "grad_norm": 0.396484375, "learning_rate": 0.0011645213215616505, "loss": 1.9016, "step": 14833 }, { "epoch": 0.3980785744954916, "grad_norm": 0.41796875, "learning_rate": 0.0011645118007302115, "loss": 1.8218, "step": 14834 }, { "epoch": 0.39810541004723055, "grad_norm": 0.3359375, "learning_rate": 0.0011645022786603999, "loss": 1.7501, "step": 14835 }, { "epoch": 0.39813224559896954, "grad_norm": 0.337890625, "learning_rate": 0.0011644927553522359, "loss": 1.8349, "step": 14836 }, { "epoch": 0.39815908115070847, "grad_norm": 0.349609375, "learning_rate": 0.001164483230805741, "loss": 1.9675, "step": 14837 }, { "epoch": 0.3981859167024474, "grad_norm": 0.326171875, "learning_rate": 0.0011644737050209362, "loss": 1.7601, "step": 14838 }, { "epoch": 0.39821275225418634, "grad_norm": 0.326171875, "learning_rate": 0.001164464177997842, "loss": 1.9478, "step": 14839 }, { "epoch": 0.3982395878059253, "grad_norm": 0.3359375, "learning_rate": 0.0011644546497364798, "loss": 1.9699, "step": 14840 }, { "epoch": 0.3982664233576642, "grad_norm": 0.330078125, "learning_rate": 0.0011644451202368698, "loss": 1.8887, "step": 14841 }, { "epoch": 0.3982932589094032, "grad_norm": 0.326171875, "learning_rate": 0.0011644355894990333, "loss": 1.7848, "step": 14842 }, { "epoch": 0.39832009446114214, "grad_norm": 0.31640625, "learning_rate": 0.0011644260575229912, "loss": 1.9154, "step": 14843 }, { "epoch": 0.3983469300128811, "grad_norm": 0.3203125, "learning_rate": 0.0011644165243087643, "loss": 1.8627, "step": 14844 }, { "epoch": 0.39837376556462, "grad_norm": 0.326171875, "learning_rate": 0.0011644069898563737, "loss": 1.9069, "step": 14845 }, { "epoch": 0.39840060111635894, "grad_norm": 0.330078125, "learning_rate": 0.0011643974541658403, "loss": 1.8238, "step": 14846 }, { "epoch": 0.3984274366680979, "grad_norm": 0.31640625, "learning_rate": 0.0011643879172371847, "loss": 1.8011, "step": 14847 }, { "epoch": 0.3984542722198368, "grad_norm": 0.3125, "learning_rate": 0.0011643783790704282, "loss": 1.7456, "step": 14848 }, { "epoch": 0.3984811077715758, "grad_norm": 0.306640625, "learning_rate": 0.0011643688396655917, "loss": 1.7095, "step": 14849 }, { "epoch": 0.39850794332331474, "grad_norm": 0.326171875, "learning_rate": 0.001164359299022696, "loss": 1.8246, "step": 14850 }, { "epoch": 0.3985347788750537, "grad_norm": 0.318359375, "learning_rate": 0.0011643497571417617, "loss": 1.8174, "step": 14851 }, { "epoch": 0.3985616144267926, "grad_norm": 0.3125, "learning_rate": 0.0011643402140228104, "loss": 1.7932, "step": 14852 }, { "epoch": 0.39858844997853154, "grad_norm": 0.314453125, "learning_rate": 0.0011643306696658626, "loss": 1.8566, "step": 14853 }, { "epoch": 0.3986152855302705, "grad_norm": 0.3203125, "learning_rate": 0.0011643211240709393, "loss": 1.8894, "step": 14854 }, { "epoch": 0.39864212108200947, "grad_norm": 0.31640625, "learning_rate": 0.0011643115772380615, "loss": 1.8541, "step": 14855 }, { "epoch": 0.3986689566337484, "grad_norm": 0.326171875, "learning_rate": 0.0011643020291672503, "loss": 1.924, "step": 14856 }, { "epoch": 0.39869579218548734, "grad_norm": 0.31640625, "learning_rate": 0.0011642924798585261, "loss": 1.8947, "step": 14857 }, { "epoch": 0.3987226277372263, "grad_norm": 0.3359375, "learning_rate": 0.0011642829293119104, "loss": 1.9816, "step": 14858 }, { "epoch": 0.3987494632889652, "grad_norm": 0.30859375, "learning_rate": 0.001164273377527424, "loss": 1.7339, "step": 14859 }, { "epoch": 0.39877629884070415, "grad_norm": 0.314453125, "learning_rate": 0.0011642638245050877, "loss": 1.8092, "step": 14860 }, { "epoch": 0.3988031343924431, "grad_norm": 0.306640625, "learning_rate": 0.0011642542702449228, "loss": 1.7649, "step": 14861 }, { "epoch": 0.3988299699441821, "grad_norm": 0.31640625, "learning_rate": 0.0011642447147469498, "loss": 1.822, "step": 14862 }, { "epoch": 0.398856805495921, "grad_norm": 0.31640625, "learning_rate": 0.0011642351580111903, "loss": 1.8707, "step": 14863 }, { "epoch": 0.39888364104765994, "grad_norm": 0.30078125, "learning_rate": 0.0011642256000376646, "loss": 1.6789, "step": 14864 }, { "epoch": 0.3989104765993989, "grad_norm": 0.314453125, "learning_rate": 0.0011642160408263937, "loss": 1.8164, "step": 14865 }, { "epoch": 0.3989373121511378, "grad_norm": 0.314453125, "learning_rate": 0.001164206480377399, "loss": 1.8097, "step": 14866 }, { "epoch": 0.39896414770287675, "grad_norm": 0.306640625, "learning_rate": 0.0011641969186907014, "loss": 1.8064, "step": 14867 }, { "epoch": 0.39899098325461574, "grad_norm": 0.3125, "learning_rate": 0.0011641873557663217, "loss": 1.7651, "step": 14868 }, { "epoch": 0.3990178188063547, "grad_norm": 0.33203125, "learning_rate": 0.0011641777916042808, "loss": 1.8593, "step": 14869 }, { "epoch": 0.3990446543580936, "grad_norm": 0.310546875, "learning_rate": 0.0011641682262045997, "loss": 1.7904, "step": 14870 }, { "epoch": 0.39907148990983254, "grad_norm": 0.328125, "learning_rate": 0.0011641586595672999, "loss": 1.886, "step": 14871 }, { "epoch": 0.3990983254615715, "grad_norm": 0.318359375, "learning_rate": 0.0011641490916924017, "loss": 1.8669, "step": 14872 }, { "epoch": 0.3991251610133104, "grad_norm": 0.326171875, "learning_rate": 0.0011641395225799264, "loss": 1.8828, "step": 14873 }, { "epoch": 0.3991519965650494, "grad_norm": 0.314453125, "learning_rate": 0.001164129952229895, "loss": 1.7989, "step": 14874 }, { "epoch": 0.39917883211678834, "grad_norm": 0.3203125, "learning_rate": 0.0011641203806423285, "loss": 1.8821, "step": 14875 }, { "epoch": 0.3992056676685273, "grad_norm": 0.314453125, "learning_rate": 0.0011641108078172474, "loss": 1.8113, "step": 14876 }, { "epoch": 0.3992325032202662, "grad_norm": 0.298828125, "learning_rate": 0.0011641012337546737, "loss": 1.7214, "step": 14877 }, { "epoch": 0.39925933877200515, "grad_norm": 0.322265625, "learning_rate": 0.0011640916584546278, "loss": 1.7882, "step": 14878 }, { "epoch": 0.3992861743237441, "grad_norm": 0.32421875, "learning_rate": 0.0011640820819171305, "loss": 1.833, "step": 14879 }, { "epoch": 0.399313009875483, "grad_norm": 0.314453125, "learning_rate": 0.0011640725041422032, "loss": 1.7746, "step": 14880 }, { "epoch": 0.399339845427222, "grad_norm": 0.322265625, "learning_rate": 0.0011640629251298668, "loss": 1.8557, "step": 14881 }, { "epoch": 0.39936668097896094, "grad_norm": 0.31640625, "learning_rate": 0.0011640533448801424, "loss": 1.7637, "step": 14882 }, { "epoch": 0.3993935165306999, "grad_norm": 0.30859375, "learning_rate": 0.0011640437633930508, "loss": 1.7498, "step": 14883 }, { "epoch": 0.3994203520824388, "grad_norm": 0.30859375, "learning_rate": 0.001164034180668613, "loss": 1.7247, "step": 14884 }, { "epoch": 0.39944718763417775, "grad_norm": 0.322265625, "learning_rate": 0.0011640245967068504, "loss": 1.8916, "step": 14885 }, { "epoch": 0.3994740231859167, "grad_norm": 0.29296875, "learning_rate": 0.0011640150115077837, "loss": 1.5825, "step": 14886 }, { "epoch": 0.3995008587376557, "grad_norm": 0.3203125, "learning_rate": 0.0011640054250714339, "loss": 1.7936, "step": 14887 }, { "epoch": 0.3995276942893946, "grad_norm": 0.3203125, "learning_rate": 0.0011639958373978222, "loss": 1.8053, "step": 14888 }, { "epoch": 0.39955452984113354, "grad_norm": 0.30859375, "learning_rate": 0.0011639862484869694, "loss": 1.7809, "step": 14889 }, { "epoch": 0.3995813653928725, "grad_norm": 0.318359375, "learning_rate": 0.0011639766583388968, "loss": 1.8434, "step": 14890 }, { "epoch": 0.3996082009446114, "grad_norm": 0.3046875, "learning_rate": 0.0011639670669536253, "loss": 1.6341, "step": 14891 }, { "epoch": 0.39963503649635035, "grad_norm": 0.3203125, "learning_rate": 0.001163957474331176, "loss": 1.7745, "step": 14892 }, { "epoch": 0.3996618720480893, "grad_norm": 0.30859375, "learning_rate": 0.0011639478804715702, "loss": 1.6451, "step": 14893 }, { "epoch": 0.3996887075998283, "grad_norm": 0.333984375, "learning_rate": 0.0011639382853748284, "loss": 1.8731, "step": 14894 }, { "epoch": 0.3997155431515672, "grad_norm": 0.3203125, "learning_rate": 0.001163928689040972, "loss": 1.7521, "step": 14895 }, { "epoch": 0.39974237870330614, "grad_norm": 0.310546875, "learning_rate": 0.001163919091470022, "loss": 1.6768, "step": 14896 }, { "epoch": 0.3997692142550451, "grad_norm": 0.322265625, "learning_rate": 0.0011639094926619992, "loss": 1.74, "step": 14897 }, { "epoch": 0.399796049806784, "grad_norm": 0.326171875, "learning_rate": 0.001163899892616925, "loss": 1.8856, "step": 14898 }, { "epoch": 0.39982288535852295, "grad_norm": 0.3125, "learning_rate": 0.0011638902913348203, "loss": 1.6673, "step": 14899 }, { "epoch": 0.39984972091026194, "grad_norm": 0.30078125, "learning_rate": 0.0011638806888157062, "loss": 1.6978, "step": 14900 }, { "epoch": 0.3998765564620009, "grad_norm": 0.31640625, "learning_rate": 0.0011638710850596038, "loss": 1.7261, "step": 14901 }, { "epoch": 0.3999033920137398, "grad_norm": 0.322265625, "learning_rate": 0.0011638614800665342, "loss": 1.709, "step": 14902 }, { "epoch": 0.39993022756547875, "grad_norm": 0.314453125, "learning_rate": 0.0011638518738365184, "loss": 1.6673, "step": 14903 }, { "epoch": 0.3999570631172177, "grad_norm": 0.314453125, "learning_rate": 0.0011638422663695774, "loss": 1.7992, "step": 14904 }, { "epoch": 0.3999838986689566, "grad_norm": 0.32421875, "learning_rate": 0.0011638326576657323, "loss": 1.7722, "step": 14905 }, { "epoch": 0.40001073422069555, "grad_norm": 0.314453125, "learning_rate": 0.0011638230477250043, "loss": 1.7611, "step": 14906 }, { "epoch": 0.40003756977243454, "grad_norm": 0.314453125, "learning_rate": 0.0011638134365474146, "loss": 1.7711, "step": 14907 }, { "epoch": 0.4000644053241735, "grad_norm": 0.30859375, "learning_rate": 0.0011638038241329838, "loss": 1.738, "step": 14908 }, { "epoch": 0.4000912408759124, "grad_norm": 0.3046875, "learning_rate": 0.0011637942104817335, "loss": 1.666, "step": 14909 }, { "epoch": 0.40011807642765135, "grad_norm": 0.3125, "learning_rate": 0.0011637845955936844, "loss": 1.7318, "step": 14910 }, { "epoch": 0.4001449119793903, "grad_norm": 0.322265625, "learning_rate": 0.0011637749794688578, "loss": 1.7837, "step": 14911 }, { "epoch": 0.4001717475311292, "grad_norm": 0.302734375, "learning_rate": 0.001163765362107275, "loss": 1.6837, "step": 14912 }, { "epoch": 0.4001985830828682, "grad_norm": 0.318359375, "learning_rate": 0.0011637557435089565, "loss": 1.7799, "step": 14913 }, { "epoch": 0.40022541863460714, "grad_norm": 0.3125, "learning_rate": 0.001163746123673924, "loss": 1.6857, "step": 14914 }, { "epoch": 0.4002522541863461, "grad_norm": 0.326171875, "learning_rate": 0.0011637365026021982, "loss": 1.8259, "step": 14915 }, { "epoch": 0.400279089738085, "grad_norm": 0.31640625, "learning_rate": 0.0011637268802938004, "loss": 1.7573, "step": 14916 }, { "epoch": 0.40030592528982395, "grad_norm": 0.31640625, "learning_rate": 0.001163717256748752, "loss": 1.6815, "step": 14917 }, { "epoch": 0.4003327608415629, "grad_norm": 0.318359375, "learning_rate": 0.0011637076319670732, "loss": 1.7926, "step": 14918 }, { "epoch": 0.4003595963933018, "grad_norm": 0.3125, "learning_rate": 0.001163698005948786, "loss": 1.7289, "step": 14919 }, { "epoch": 0.4003864319450408, "grad_norm": 0.3125, "learning_rate": 0.0011636883786939115, "loss": 1.7711, "step": 14920 }, { "epoch": 0.40041326749677975, "grad_norm": 0.310546875, "learning_rate": 0.00116367875020247, "loss": 1.7064, "step": 14921 }, { "epoch": 0.4004401030485187, "grad_norm": 0.310546875, "learning_rate": 0.0011636691204744835, "loss": 1.7791, "step": 14922 }, { "epoch": 0.4004669386002576, "grad_norm": 0.32421875, "learning_rate": 0.0011636594895099728, "loss": 1.7087, "step": 14923 }, { "epoch": 0.40049377415199655, "grad_norm": 0.330078125, "learning_rate": 0.001163649857308959, "loss": 1.8267, "step": 14924 }, { "epoch": 0.4005206097037355, "grad_norm": 0.330078125, "learning_rate": 0.001163640223871463, "loss": 1.7168, "step": 14925 }, { "epoch": 0.4005474452554745, "grad_norm": 0.328125, "learning_rate": 0.0011636305891975065, "loss": 1.709, "step": 14926 }, { "epoch": 0.4005742808072134, "grad_norm": 0.3125, "learning_rate": 0.00116362095328711, "loss": 1.6506, "step": 14927 }, { "epoch": 0.40060111635895235, "grad_norm": 0.3203125, "learning_rate": 0.001163611316140295, "loss": 1.6575, "step": 14928 }, { "epoch": 0.4006279519106913, "grad_norm": 0.32421875, "learning_rate": 0.0011636016777570829, "loss": 1.6953, "step": 14929 }, { "epoch": 0.4006547874624302, "grad_norm": 0.31640625, "learning_rate": 0.001163592038137494, "loss": 1.7065, "step": 14930 }, { "epoch": 0.40068162301416915, "grad_norm": 0.33984375, "learning_rate": 0.0011635823972815503, "loss": 1.8042, "step": 14931 }, { "epoch": 0.4007084585659081, "grad_norm": 0.328125, "learning_rate": 0.0011635727551892727, "loss": 1.8079, "step": 14932 }, { "epoch": 0.4007352941176471, "grad_norm": 0.318359375, "learning_rate": 0.0011635631118606822, "loss": 1.6598, "step": 14933 }, { "epoch": 0.400762129669386, "grad_norm": 0.3203125, "learning_rate": 0.0011635534672958, "loss": 1.7527, "step": 14934 }, { "epoch": 0.40078896522112495, "grad_norm": 0.33984375, "learning_rate": 0.0011635438214946474, "loss": 1.7342, "step": 14935 }, { "epoch": 0.4008158007728639, "grad_norm": 0.318359375, "learning_rate": 0.0011635341744572454, "loss": 1.7715, "step": 14936 }, { "epoch": 0.4008426363246028, "grad_norm": 0.3125, "learning_rate": 0.001163524526183615, "loss": 1.6628, "step": 14937 }, { "epoch": 0.40086947187634175, "grad_norm": 0.318359375, "learning_rate": 0.0011635148766737779, "loss": 1.6974, "step": 14938 }, { "epoch": 0.40089630742808074, "grad_norm": 0.310546875, "learning_rate": 0.0011635052259277548, "loss": 1.659, "step": 14939 }, { "epoch": 0.4009231429798197, "grad_norm": 0.30859375, "learning_rate": 0.0011634955739455671, "loss": 1.6288, "step": 14940 }, { "epoch": 0.4009499785315586, "grad_norm": 0.3125, "learning_rate": 0.001163485920727236, "loss": 1.7222, "step": 14941 }, { "epoch": 0.40097681408329755, "grad_norm": 0.31640625, "learning_rate": 0.0011634762662727823, "loss": 1.8523, "step": 14942 }, { "epoch": 0.4010036496350365, "grad_norm": 0.314453125, "learning_rate": 0.0011634666105822275, "loss": 1.7944, "step": 14943 }, { "epoch": 0.4010304851867754, "grad_norm": 0.31640625, "learning_rate": 0.0011634569536555929, "loss": 1.7045, "step": 14944 }, { "epoch": 0.4010573207385144, "grad_norm": 0.31640625, "learning_rate": 0.0011634472954928994, "loss": 1.6676, "step": 14945 }, { "epoch": 0.40108415629025335, "grad_norm": 0.32421875, "learning_rate": 0.0011634376360941682, "loss": 1.7849, "step": 14946 }, { "epoch": 0.4011109918419923, "grad_norm": 0.310546875, "learning_rate": 0.0011634279754594208, "loss": 1.6288, "step": 14947 }, { "epoch": 0.4011378273937312, "grad_norm": 0.310546875, "learning_rate": 0.001163418313588678, "loss": 1.6151, "step": 14948 }, { "epoch": 0.40116466294547015, "grad_norm": 0.302734375, "learning_rate": 0.0011634086504819614, "loss": 1.6279, "step": 14949 }, { "epoch": 0.4011914984972091, "grad_norm": 0.314453125, "learning_rate": 0.0011633989861392917, "loss": 1.7794, "step": 14950 }, { "epoch": 0.401218334048948, "grad_norm": 0.31640625, "learning_rate": 0.0011633893205606905, "loss": 1.6653, "step": 14951 }, { "epoch": 0.401245169600687, "grad_norm": 0.3203125, "learning_rate": 0.001163379653746179, "loss": 1.7032, "step": 14952 }, { "epoch": 0.40127200515242595, "grad_norm": 0.326171875, "learning_rate": 0.0011633699856957783, "loss": 1.7095, "step": 14953 }, { "epoch": 0.4012988407041649, "grad_norm": 0.318359375, "learning_rate": 0.0011633603164095093, "loss": 1.7294, "step": 14954 }, { "epoch": 0.4013256762559038, "grad_norm": 0.31640625, "learning_rate": 0.0011633506458873938, "loss": 1.7127, "step": 14955 }, { "epoch": 0.40135251180764275, "grad_norm": 0.318359375, "learning_rate": 0.0011633409741294526, "loss": 1.738, "step": 14956 }, { "epoch": 0.4013793473593817, "grad_norm": 0.296875, "learning_rate": 0.001163331301135707, "loss": 1.593, "step": 14957 }, { "epoch": 0.4014061829111207, "grad_norm": 0.328125, "learning_rate": 0.0011633216269061782, "loss": 1.7856, "step": 14958 }, { "epoch": 0.4014330184628596, "grad_norm": 0.33203125, "learning_rate": 0.0011633119514408877, "loss": 1.8455, "step": 14959 }, { "epoch": 0.40145985401459855, "grad_norm": 0.30859375, "learning_rate": 0.0011633022747398565, "loss": 1.5546, "step": 14960 }, { "epoch": 0.4014866895663375, "grad_norm": 0.314453125, "learning_rate": 0.0011632925968031056, "loss": 1.728, "step": 14961 }, { "epoch": 0.4015135251180764, "grad_norm": 0.3125, "learning_rate": 0.0011632829176306566, "loss": 1.6581, "step": 14962 }, { "epoch": 0.40154036066981535, "grad_norm": 0.33984375, "learning_rate": 0.0011632732372225307, "loss": 1.8247, "step": 14963 }, { "epoch": 0.4015671962215543, "grad_norm": 0.306640625, "learning_rate": 0.001163263555578749, "loss": 1.6262, "step": 14964 }, { "epoch": 0.4015940317732933, "grad_norm": 0.30078125, "learning_rate": 0.0011632538726993327, "loss": 1.6231, "step": 14965 }, { "epoch": 0.4016208673250322, "grad_norm": 0.318359375, "learning_rate": 0.001163244188584303, "loss": 1.7414, "step": 14966 }, { "epoch": 0.40164770287677115, "grad_norm": 0.314453125, "learning_rate": 0.0011632345032336815, "loss": 1.7256, "step": 14967 }, { "epoch": 0.4016745384285101, "grad_norm": 0.33203125, "learning_rate": 0.0011632248166474892, "loss": 1.7582, "step": 14968 }, { "epoch": 0.401701373980249, "grad_norm": 0.322265625, "learning_rate": 0.0011632151288257472, "loss": 1.7394, "step": 14969 }, { "epoch": 0.40172820953198796, "grad_norm": 0.31640625, "learning_rate": 0.001163205439768477, "loss": 1.7446, "step": 14970 }, { "epoch": 0.40175504508372695, "grad_norm": 0.326171875, "learning_rate": 0.0011631957494756997, "loss": 1.828, "step": 14971 }, { "epoch": 0.4017818806354659, "grad_norm": 0.3203125, "learning_rate": 0.0011631860579474366, "loss": 1.6392, "step": 14972 }, { "epoch": 0.4018087161872048, "grad_norm": 0.326171875, "learning_rate": 0.0011631763651837092, "loss": 1.7522, "step": 14973 }, { "epoch": 0.40183555173894375, "grad_norm": 0.322265625, "learning_rate": 0.0011631666711845383, "loss": 1.565, "step": 14974 }, { "epoch": 0.4018623872906827, "grad_norm": 0.32421875, "learning_rate": 0.0011631569759499453, "loss": 1.658, "step": 14975 }, { "epoch": 0.4018892228424216, "grad_norm": 0.326171875, "learning_rate": 0.0011631472794799518, "loss": 1.6623, "step": 14976 }, { "epoch": 0.40191605839416056, "grad_norm": 0.32421875, "learning_rate": 0.0011631375817745789, "loss": 1.7699, "step": 14977 }, { "epoch": 0.40194289394589955, "grad_norm": 0.32421875, "learning_rate": 0.0011631278828338478, "loss": 1.7508, "step": 14978 }, { "epoch": 0.4019697294976385, "grad_norm": 0.32421875, "learning_rate": 0.0011631181826577796, "loss": 1.7435, "step": 14979 }, { "epoch": 0.4019965650493774, "grad_norm": 0.318359375, "learning_rate": 0.001163108481246396, "loss": 1.7969, "step": 14980 }, { "epoch": 0.40202340060111635, "grad_norm": 0.296875, "learning_rate": 0.001163098778599718, "loss": 1.5635, "step": 14981 }, { "epoch": 0.4020502361528553, "grad_norm": 0.30859375, "learning_rate": 0.0011630890747177669, "loss": 1.6213, "step": 14982 }, { "epoch": 0.4020770717045942, "grad_norm": 0.337890625, "learning_rate": 0.001163079369600564, "loss": 1.8222, "step": 14983 }, { "epoch": 0.4021039072563332, "grad_norm": 0.318359375, "learning_rate": 0.0011630696632481305, "loss": 1.6554, "step": 14984 }, { "epoch": 0.40213074280807215, "grad_norm": 0.3125, "learning_rate": 0.0011630599556604882, "loss": 1.6113, "step": 14985 }, { "epoch": 0.4021575783598111, "grad_norm": 0.318359375, "learning_rate": 0.0011630502468376578, "loss": 1.6211, "step": 14986 }, { "epoch": 0.40218441391155, "grad_norm": 0.32421875, "learning_rate": 0.0011630405367796606, "loss": 1.7295, "step": 14987 }, { "epoch": 0.40221124946328896, "grad_norm": 0.3203125, "learning_rate": 0.0011630308254865186, "loss": 1.6174, "step": 14988 }, { "epoch": 0.4022380850150279, "grad_norm": 0.318359375, "learning_rate": 0.001163021112958252, "loss": 1.6249, "step": 14989 }, { "epoch": 0.4022649205667668, "grad_norm": 0.322265625, "learning_rate": 0.0011630113991948831, "loss": 1.6137, "step": 14990 }, { "epoch": 0.4022917561185058, "grad_norm": 0.330078125, "learning_rate": 0.001163001684196433, "loss": 1.6864, "step": 14991 }, { "epoch": 0.40231859167024475, "grad_norm": 0.3203125, "learning_rate": 0.0011629919679629226, "loss": 1.7474, "step": 14992 }, { "epoch": 0.4023454272219837, "grad_norm": 0.31640625, "learning_rate": 0.0011629822504943734, "loss": 1.5314, "step": 14993 }, { "epoch": 0.4023722627737226, "grad_norm": 0.322265625, "learning_rate": 0.0011629725317908069, "loss": 1.7208, "step": 14994 }, { "epoch": 0.40239909832546156, "grad_norm": 0.3125, "learning_rate": 0.0011629628118522443, "loss": 1.5605, "step": 14995 }, { "epoch": 0.4024259338772005, "grad_norm": 0.302734375, "learning_rate": 0.001162953090678707, "loss": 1.6259, "step": 14996 }, { "epoch": 0.4024527694289395, "grad_norm": 0.318359375, "learning_rate": 0.001162943368270216, "loss": 1.6716, "step": 14997 }, { "epoch": 0.4024796049806784, "grad_norm": 0.306640625, "learning_rate": 0.001162933644626793, "loss": 1.637, "step": 14998 }, { "epoch": 0.40250644053241735, "grad_norm": 0.337890625, "learning_rate": 0.0011629239197484592, "loss": 1.7216, "step": 14999 }, { "epoch": 0.4025332760841563, "grad_norm": 0.32421875, "learning_rate": 0.001162914193635236, "loss": 1.6698, "step": 15000 }, { "epoch": 0.4025601116358952, "grad_norm": 0.314453125, "learning_rate": 0.0011629044662871445, "loss": 1.6201, "step": 15001 }, { "epoch": 0.40258694718763416, "grad_norm": 0.333984375, "learning_rate": 0.0011628947377042064, "loss": 1.762, "step": 15002 }, { "epoch": 0.4026137827393731, "grad_norm": 0.318359375, "learning_rate": 0.0011628850078864426, "loss": 1.605, "step": 15003 }, { "epoch": 0.4026406182911121, "grad_norm": 0.306640625, "learning_rate": 0.001162875276833875, "loss": 1.5604, "step": 15004 }, { "epoch": 0.402667453842851, "grad_norm": 0.322265625, "learning_rate": 0.0011628655445465245, "loss": 1.746, "step": 15005 }, { "epoch": 0.40269428939458995, "grad_norm": 0.322265625, "learning_rate": 0.0011628558110244125, "loss": 1.6959, "step": 15006 }, { "epoch": 0.4027211249463289, "grad_norm": 0.328125, "learning_rate": 0.0011628460762675606, "loss": 1.7259, "step": 15007 }, { "epoch": 0.4027479604980678, "grad_norm": 0.314453125, "learning_rate": 0.0011628363402759899, "loss": 1.6456, "step": 15008 }, { "epoch": 0.40277479604980676, "grad_norm": 0.33984375, "learning_rate": 0.0011628266030497218, "loss": 1.6575, "step": 15009 }, { "epoch": 0.40280163160154575, "grad_norm": 0.330078125, "learning_rate": 0.0011628168645887779, "loss": 1.7226, "step": 15010 }, { "epoch": 0.4028284671532847, "grad_norm": 0.31640625, "learning_rate": 0.0011628071248931792, "loss": 1.6366, "step": 15011 }, { "epoch": 0.4028553027050236, "grad_norm": 0.322265625, "learning_rate": 0.0011627973839629475, "loss": 1.6139, "step": 15012 }, { "epoch": 0.40288213825676256, "grad_norm": 0.33984375, "learning_rate": 0.0011627876417981038, "loss": 1.7563, "step": 15013 }, { "epoch": 0.4029089738085015, "grad_norm": 0.318359375, "learning_rate": 0.0011627778983986695, "loss": 1.6719, "step": 15014 }, { "epoch": 0.4029358093602404, "grad_norm": 0.330078125, "learning_rate": 0.0011627681537646662, "loss": 1.6927, "step": 15015 }, { "epoch": 0.4029626449119794, "grad_norm": 0.328125, "learning_rate": 0.001162758407896115, "loss": 1.6791, "step": 15016 }, { "epoch": 0.40298948046371835, "grad_norm": 0.34765625, "learning_rate": 0.0011627486607930374, "loss": 1.8463, "step": 15017 }, { "epoch": 0.4030163160154573, "grad_norm": 0.3203125, "learning_rate": 0.0011627389124554547, "loss": 1.6548, "step": 15018 }, { "epoch": 0.4030431515671962, "grad_norm": 0.322265625, "learning_rate": 0.0011627291628833888, "loss": 1.6018, "step": 15019 }, { "epoch": 0.40306998711893516, "grad_norm": 0.318359375, "learning_rate": 0.0011627194120768605, "loss": 1.686, "step": 15020 }, { "epoch": 0.4030968226706741, "grad_norm": 0.33203125, "learning_rate": 0.001162709660035891, "loss": 1.7136, "step": 15021 }, { "epoch": 0.40312365822241303, "grad_norm": 0.328125, "learning_rate": 0.0011626999067605025, "loss": 1.6795, "step": 15022 }, { "epoch": 0.403150493774152, "grad_norm": 0.32421875, "learning_rate": 0.0011626901522507158, "loss": 1.6081, "step": 15023 }, { "epoch": 0.40317732932589095, "grad_norm": 0.31640625, "learning_rate": 0.0011626803965065523, "loss": 1.6391, "step": 15024 }, { "epoch": 0.4032041648776299, "grad_norm": 0.31640625, "learning_rate": 0.001162670639528034, "loss": 1.5817, "step": 15025 }, { "epoch": 0.4032310004293688, "grad_norm": 0.314453125, "learning_rate": 0.0011626608813151815, "loss": 1.5493, "step": 15026 }, { "epoch": 0.40325783598110776, "grad_norm": 0.314453125, "learning_rate": 0.0011626511218680166, "loss": 1.589, "step": 15027 }, { "epoch": 0.4032846715328467, "grad_norm": 0.314453125, "learning_rate": 0.0011626413611865608, "loss": 1.6248, "step": 15028 }, { "epoch": 0.4033115070845857, "grad_norm": 0.33203125, "learning_rate": 0.0011626315992708351, "loss": 1.7421, "step": 15029 }, { "epoch": 0.4033383426363246, "grad_norm": 0.31640625, "learning_rate": 0.0011626218361208613, "loss": 1.6072, "step": 15030 }, { "epoch": 0.40336517818806356, "grad_norm": 0.32421875, "learning_rate": 0.001162612071736661, "loss": 1.6697, "step": 15031 }, { "epoch": 0.4033920137398025, "grad_norm": 0.33203125, "learning_rate": 0.001162602306118255, "loss": 1.707, "step": 15032 }, { "epoch": 0.4034188492915414, "grad_norm": 0.33203125, "learning_rate": 0.0011625925392656654, "loss": 1.7404, "step": 15033 }, { "epoch": 0.40344568484328036, "grad_norm": 0.328125, "learning_rate": 0.001162582771178913, "loss": 1.6827, "step": 15034 }, { "epoch": 0.4034725203950193, "grad_norm": 0.330078125, "learning_rate": 0.0011625730018580197, "loss": 1.7306, "step": 15035 }, { "epoch": 0.4034993559467583, "grad_norm": 0.326171875, "learning_rate": 0.0011625632313030066, "loss": 1.66, "step": 15036 }, { "epoch": 0.4035261914984972, "grad_norm": 0.328125, "learning_rate": 0.0011625534595138952, "loss": 1.7562, "step": 15037 }, { "epoch": 0.40355302705023616, "grad_norm": 0.318359375, "learning_rate": 0.0011625436864907072, "loss": 1.5646, "step": 15038 }, { "epoch": 0.4035798626019751, "grad_norm": 0.33984375, "learning_rate": 0.0011625339122334638, "loss": 1.7928, "step": 15039 }, { "epoch": 0.403606698153714, "grad_norm": 0.330078125, "learning_rate": 0.0011625241367421867, "loss": 1.6204, "step": 15040 }, { "epoch": 0.40363353370545296, "grad_norm": 0.333984375, "learning_rate": 0.0011625143600168968, "loss": 1.6554, "step": 15041 }, { "epoch": 0.40366036925719195, "grad_norm": 0.3203125, "learning_rate": 0.0011625045820576158, "loss": 1.6372, "step": 15042 }, { "epoch": 0.4036872048089309, "grad_norm": 0.31640625, "learning_rate": 0.0011624948028643657, "loss": 1.5382, "step": 15043 }, { "epoch": 0.4037140403606698, "grad_norm": 0.318359375, "learning_rate": 0.0011624850224371674, "loss": 1.6404, "step": 15044 }, { "epoch": 0.40374087591240876, "grad_norm": 0.328125, "learning_rate": 0.0011624752407760422, "loss": 1.7266, "step": 15045 }, { "epoch": 0.4037677114641477, "grad_norm": 0.322265625, "learning_rate": 0.0011624654578810119, "loss": 1.6005, "step": 15046 }, { "epoch": 0.40379454701588663, "grad_norm": 0.322265625, "learning_rate": 0.001162455673752098, "loss": 1.7113, "step": 15047 }, { "epoch": 0.40382138256762556, "grad_norm": 0.31640625, "learning_rate": 0.0011624458883893215, "loss": 1.584, "step": 15048 }, { "epoch": 0.40384821811936455, "grad_norm": 0.326171875, "learning_rate": 0.0011624361017927045, "loss": 1.6977, "step": 15049 }, { "epoch": 0.4038750536711035, "grad_norm": 0.32421875, "learning_rate": 0.001162426313962268, "loss": 1.6281, "step": 15050 }, { "epoch": 0.4039018892228424, "grad_norm": 0.337890625, "learning_rate": 0.0011624165248980338, "loss": 1.743, "step": 15051 }, { "epoch": 0.40392872477458136, "grad_norm": 0.318359375, "learning_rate": 0.0011624067346000231, "loss": 1.5818, "step": 15052 }, { "epoch": 0.4039555603263203, "grad_norm": 0.345703125, "learning_rate": 0.0011623969430682574, "loss": 1.7185, "step": 15053 }, { "epoch": 0.40398239587805923, "grad_norm": 0.318359375, "learning_rate": 0.0011623871503027583, "loss": 1.6191, "step": 15054 }, { "epoch": 0.4040092314297982, "grad_norm": 0.328125, "learning_rate": 0.0011623773563035472, "loss": 1.6773, "step": 15055 }, { "epoch": 0.40403606698153716, "grad_norm": 0.3203125, "learning_rate": 0.001162367561070646, "loss": 1.6676, "step": 15056 }, { "epoch": 0.4040629025332761, "grad_norm": 0.30859375, "learning_rate": 0.0011623577646040755, "loss": 1.6079, "step": 15057 }, { "epoch": 0.404089738085015, "grad_norm": 0.318359375, "learning_rate": 0.0011623479669038575, "loss": 1.5776, "step": 15058 }, { "epoch": 0.40411657363675396, "grad_norm": 0.322265625, "learning_rate": 0.0011623381679700135, "loss": 1.6856, "step": 15059 }, { "epoch": 0.4041434091884929, "grad_norm": 0.3125, "learning_rate": 0.0011623283678025651, "loss": 1.6229, "step": 15060 }, { "epoch": 0.40417024474023183, "grad_norm": 0.31640625, "learning_rate": 0.0011623185664015336, "loss": 1.622, "step": 15061 }, { "epoch": 0.4041970802919708, "grad_norm": 0.30859375, "learning_rate": 0.0011623087637669409, "loss": 1.5709, "step": 15062 }, { "epoch": 0.40422391584370976, "grad_norm": 0.32421875, "learning_rate": 0.001162298959898808, "loss": 1.6219, "step": 15063 }, { "epoch": 0.4042507513954487, "grad_norm": 0.330078125, "learning_rate": 0.0011622891547971566, "loss": 1.666, "step": 15064 }, { "epoch": 0.40427758694718763, "grad_norm": 0.33984375, "learning_rate": 0.0011622793484620082, "loss": 1.6432, "step": 15065 }, { "epoch": 0.40430442249892656, "grad_norm": 0.31640625, "learning_rate": 0.0011622695408933844, "loss": 1.5778, "step": 15066 }, { "epoch": 0.4043312580506655, "grad_norm": 0.33984375, "learning_rate": 0.0011622597320913065, "loss": 1.6604, "step": 15067 }, { "epoch": 0.4043580936024045, "grad_norm": 0.326171875, "learning_rate": 0.0011622499220557963, "loss": 1.672, "step": 15068 }, { "epoch": 0.4043849291541434, "grad_norm": 0.326171875, "learning_rate": 0.0011622401107868753, "loss": 1.7128, "step": 15069 }, { "epoch": 0.40441176470588236, "grad_norm": 0.3125, "learning_rate": 0.0011622302982845646, "loss": 1.5451, "step": 15070 }, { "epoch": 0.4044386002576213, "grad_norm": 0.326171875, "learning_rate": 0.0011622204845488863, "loss": 1.6563, "step": 15071 }, { "epoch": 0.40446543580936023, "grad_norm": 0.32421875, "learning_rate": 0.0011622106695798615, "loss": 1.586, "step": 15072 }, { "epoch": 0.40449227136109916, "grad_norm": 0.322265625, "learning_rate": 0.0011622008533775118, "loss": 1.6141, "step": 15073 }, { "epoch": 0.40451910691283816, "grad_norm": 0.337890625, "learning_rate": 0.0011621910359418592, "loss": 1.6657, "step": 15074 }, { "epoch": 0.4045459424645771, "grad_norm": 0.337890625, "learning_rate": 0.0011621812172729247, "loss": 1.6435, "step": 15075 }, { "epoch": 0.404572778016316, "grad_norm": 0.318359375, "learning_rate": 0.0011621713973707299, "loss": 1.5865, "step": 15076 }, { "epoch": 0.40459961356805496, "grad_norm": 0.326171875, "learning_rate": 0.0011621615762352966, "loss": 1.6228, "step": 15077 }, { "epoch": 0.4046264491197939, "grad_norm": 0.33203125, "learning_rate": 0.0011621517538666461, "loss": 1.5826, "step": 15078 }, { "epoch": 0.40465328467153283, "grad_norm": 0.322265625, "learning_rate": 0.0011621419302648, "loss": 1.5741, "step": 15079 }, { "epoch": 0.40468012022327177, "grad_norm": 0.337890625, "learning_rate": 0.0011621321054297799, "loss": 1.7489, "step": 15080 }, { "epoch": 0.40470695577501076, "grad_norm": 0.32421875, "learning_rate": 0.0011621222793616074, "loss": 1.6075, "step": 15081 }, { "epoch": 0.4047337913267497, "grad_norm": 0.3359375, "learning_rate": 0.0011621124520603042, "loss": 1.6951, "step": 15082 }, { "epoch": 0.4047606268784886, "grad_norm": 0.423828125, "learning_rate": 0.0011621026235258912, "loss": 1.9341, "step": 15083 }, { "epoch": 0.40478746243022756, "grad_norm": 0.376953125, "learning_rate": 0.0011620927937583908, "loss": 1.9149, "step": 15084 }, { "epoch": 0.4048142979819665, "grad_norm": 0.353515625, "learning_rate": 0.001162082962757824, "loss": 1.9383, "step": 15085 }, { "epoch": 0.40484113353370543, "grad_norm": 0.3125, "learning_rate": 0.0011620731305242128, "loss": 1.727, "step": 15086 }, { "epoch": 0.4048679690854444, "grad_norm": 0.3515625, "learning_rate": 0.001162063297057578, "loss": 1.8779, "step": 15087 }, { "epoch": 0.40489480463718336, "grad_norm": 0.3515625, "learning_rate": 0.001162053462357942, "loss": 2.0023, "step": 15088 }, { "epoch": 0.4049216401889223, "grad_norm": 0.322265625, "learning_rate": 0.001162043626425326, "loss": 1.8644, "step": 15089 }, { "epoch": 0.40494847574066123, "grad_norm": 0.30859375, "learning_rate": 0.0011620337892597518, "loss": 1.7699, "step": 15090 }, { "epoch": 0.40497531129240016, "grad_norm": 0.30859375, "learning_rate": 0.0011620239508612407, "loss": 1.6864, "step": 15091 }, { "epoch": 0.4050021468441391, "grad_norm": 0.322265625, "learning_rate": 0.0011620141112298143, "loss": 1.8176, "step": 15092 }, { "epoch": 0.40502898239587803, "grad_norm": 0.322265625, "learning_rate": 0.0011620042703654944, "loss": 1.8003, "step": 15093 }, { "epoch": 0.405055817947617, "grad_norm": 0.33203125, "learning_rate": 0.0011619944282683025, "loss": 1.7716, "step": 15094 }, { "epoch": 0.40508265349935596, "grad_norm": 0.33203125, "learning_rate": 0.0011619845849382602, "loss": 1.8234, "step": 15095 }, { "epoch": 0.4051094890510949, "grad_norm": 0.33203125, "learning_rate": 0.0011619747403753891, "loss": 1.8834, "step": 15096 }, { "epoch": 0.40513632460283383, "grad_norm": 0.333984375, "learning_rate": 0.0011619648945797106, "loss": 1.7955, "step": 15097 }, { "epoch": 0.40516316015457277, "grad_norm": 0.328125, "learning_rate": 0.0011619550475512465, "loss": 1.8746, "step": 15098 }, { "epoch": 0.4051899957063117, "grad_norm": 0.30859375, "learning_rate": 0.0011619451992900182, "loss": 1.8114, "step": 15099 }, { "epoch": 0.4052168312580507, "grad_norm": 0.318359375, "learning_rate": 0.0011619353497960478, "loss": 1.8341, "step": 15100 }, { "epoch": 0.4052436668097896, "grad_norm": 0.30859375, "learning_rate": 0.0011619254990693564, "loss": 1.7254, "step": 15101 }, { "epoch": 0.40527050236152856, "grad_norm": 0.3125, "learning_rate": 0.0011619156471099658, "loss": 1.8165, "step": 15102 }, { "epoch": 0.4052973379132675, "grad_norm": 0.3203125, "learning_rate": 0.0011619057939178973, "loss": 1.8494, "step": 15103 }, { "epoch": 0.40532417346500643, "grad_norm": 0.314453125, "learning_rate": 0.0011618959394931732, "loss": 1.8412, "step": 15104 }, { "epoch": 0.40535100901674537, "grad_norm": 0.314453125, "learning_rate": 0.0011618860838358148, "loss": 1.8279, "step": 15105 }, { "epoch": 0.4053778445684843, "grad_norm": 0.314453125, "learning_rate": 0.0011618762269458432, "loss": 1.8917, "step": 15106 }, { "epoch": 0.4054046801202233, "grad_norm": 0.294921875, "learning_rate": 0.0011618663688232806, "loss": 1.6336, "step": 15107 }, { "epoch": 0.40543151567196223, "grad_norm": 0.318359375, "learning_rate": 0.0011618565094681486, "loss": 1.8778, "step": 15108 }, { "epoch": 0.40545835122370116, "grad_norm": 0.306640625, "learning_rate": 0.0011618466488804687, "loss": 1.6828, "step": 15109 }, { "epoch": 0.4054851867754401, "grad_norm": 0.3203125, "learning_rate": 0.0011618367870602627, "loss": 1.8096, "step": 15110 }, { "epoch": 0.40551202232717903, "grad_norm": 0.32421875, "learning_rate": 0.0011618269240075518, "loss": 1.7736, "step": 15111 }, { "epoch": 0.40553885787891797, "grad_norm": 0.3203125, "learning_rate": 0.001161817059722358, "loss": 1.8302, "step": 15112 }, { "epoch": 0.40556569343065696, "grad_norm": 0.310546875, "learning_rate": 0.001161807194204703, "loss": 1.7154, "step": 15113 }, { "epoch": 0.4055925289823959, "grad_norm": 0.3046875, "learning_rate": 0.0011617973274546081, "loss": 1.7079, "step": 15114 }, { "epoch": 0.40561936453413483, "grad_norm": 0.306640625, "learning_rate": 0.0011617874594720954, "loss": 1.7551, "step": 15115 }, { "epoch": 0.40564620008587376, "grad_norm": 0.30859375, "learning_rate": 0.0011617775902571861, "loss": 1.7595, "step": 15116 }, { "epoch": 0.4056730356376127, "grad_norm": 0.322265625, "learning_rate": 0.0011617677198099022, "loss": 1.865, "step": 15117 }, { "epoch": 0.40569987118935164, "grad_norm": 0.32421875, "learning_rate": 0.0011617578481302648, "loss": 1.8427, "step": 15118 }, { "epoch": 0.40572670674109057, "grad_norm": 0.3125, "learning_rate": 0.0011617479752182964, "loss": 1.7944, "step": 15119 }, { "epoch": 0.40575354229282956, "grad_norm": 0.31640625, "learning_rate": 0.0011617381010740181, "loss": 1.8231, "step": 15120 }, { "epoch": 0.4057803778445685, "grad_norm": 0.3046875, "learning_rate": 0.0011617282256974515, "loss": 1.795, "step": 15121 }, { "epoch": 0.40580721339630743, "grad_norm": 0.30859375, "learning_rate": 0.0011617183490886186, "loss": 1.7881, "step": 15122 }, { "epoch": 0.40583404894804637, "grad_norm": 0.3046875, "learning_rate": 0.001161708471247541, "loss": 1.726, "step": 15123 }, { "epoch": 0.4058608844997853, "grad_norm": 0.3125, "learning_rate": 0.00116169859217424, "loss": 1.7805, "step": 15124 }, { "epoch": 0.40588772005152424, "grad_norm": 0.314453125, "learning_rate": 0.0011616887118687377, "loss": 1.846, "step": 15125 }, { "epoch": 0.4059145556032632, "grad_norm": 0.306640625, "learning_rate": 0.0011616788303310556, "loss": 1.7355, "step": 15126 }, { "epoch": 0.40594139115500216, "grad_norm": 0.310546875, "learning_rate": 0.0011616689475612155, "loss": 1.7459, "step": 15127 }, { "epoch": 0.4059682267067411, "grad_norm": 0.32421875, "learning_rate": 0.0011616590635592388, "loss": 1.7603, "step": 15128 }, { "epoch": 0.40599506225848003, "grad_norm": 0.326171875, "learning_rate": 0.0011616491783251475, "loss": 1.8582, "step": 15129 }, { "epoch": 0.40602189781021897, "grad_norm": 0.310546875, "learning_rate": 0.001161639291858963, "loss": 1.7726, "step": 15130 }, { "epoch": 0.4060487333619579, "grad_norm": 0.328125, "learning_rate": 0.0011616294041607072, "loss": 1.7252, "step": 15131 }, { "epoch": 0.40607556891369684, "grad_norm": 0.310546875, "learning_rate": 0.0011616195152304018, "loss": 1.7505, "step": 15132 }, { "epoch": 0.40610240446543583, "grad_norm": 0.3125, "learning_rate": 0.0011616096250680683, "loss": 1.6732, "step": 15133 }, { "epoch": 0.40612924001717476, "grad_norm": 0.310546875, "learning_rate": 0.0011615997336737285, "loss": 1.6934, "step": 15134 }, { "epoch": 0.4061560755689137, "grad_norm": 0.30859375, "learning_rate": 0.0011615898410474042, "loss": 1.6998, "step": 15135 }, { "epoch": 0.40618291112065263, "grad_norm": 0.298828125, "learning_rate": 0.001161579947189117, "loss": 1.6618, "step": 15136 }, { "epoch": 0.40620974667239157, "grad_norm": 0.353515625, "learning_rate": 0.0011615700520988886, "loss": 1.9106, "step": 15137 }, { "epoch": 0.4062365822241305, "grad_norm": 0.3203125, "learning_rate": 0.0011615601557767405, "loss": 1.708, "step": 15138 }, { "epoch": 0.4062634177758695, "grad_norm": 0.306640625, "learning_rate": 0.0011615502582226949, "loss": 1.73, "step": 15139 }, { "epoch": 0.40629025332760843, "grad_norm": 0.310546875, "learning_rate": 0.001161540359436773, "loss": 1.7522, "step": 15140 }, { "epoch": 0.40631708887934737, "grad_norm": 0.314453125, "learning_rate": 0.001161530459418997, "loss": 1.746, "step": 15141 }, { "epoch": 0.4063439244310863, "grad_norm": 0.314453125, "learning_rate": 0.0011615205581693884, "loss": 1.7602, "step": 15142 }, { "epoch": 0.40637075998282524, "grad_norm": 0.310546875, "learning_rate": 0.0011615106556879687, "loss": 1.7654, "step": 15143 }, { "epoch": 0.40639759553456417, "grad_norm": 0.29296875, "learning_rate": 0.00116150075197476, "loss": 1.635, "step": 15144 }, { "epoch": 0.40642443108630316, "grad_norm": 0.296875, "learning_rate": 0.0011614908470297838, "loss": 1.6132, "step": 15145 }, { "epoch": 0.4064512666380421, "grad_norm": 0.30078125, "learning_rate": 0.0011614809408530618, "loss": 1.6418, "step": 15146 }, { "epoch": 0.40647810218978103, "grad_norm": 0.30859375, "learning_rate": 0.0011614710334446159, "loss": 1.7396, "step": 15147 }, { "epoch": 0.40650493774151997, "grad_norm": 0.306640625, "learning_rate": 0.0011614611248044677, "loss": 1.6613, "step": 15148 }, { "epoch": 0.4065317732932589, "grad_norm": 0.3046875, "learning_rate": 0.001161451214932639, "loss": 1.6746, "step": 15149 }, { "epoch": 0.40655860884499784, "grad_norm": 0.302734375, "learning_rate": 0.0011614413038291515, "loss": 1.6873, "step": 15150 }, { "epoch": 0.4065854443967368, "grad_norm": 0.314453125, "learning_rate": 0.001161431391494027, "loss": 1.7241, "step": 15151 }, { "epoch": 0.40661227994847576, "grad_norm": 0.3046875, "learning_rate": 0.0011614214779272869, "loss": 1.5976, "step": 15152 }, { "epoch": 0.4066391155002147, "grad_norm": 0.33203125, "learning_rate": 0.0011614115631289536, "loss": 1.8351, "step": 15153 }, { "epoch": 0.40666595105195363, "grad_norm": 0.306640625, "learning_rate": 0.0011614016470990484, "loss": 1.6048, "step": 15154 }, { "epoch": 0.40669278660369257, "grad_norm": 0.322265625, "learning_rate": 0.0011613917298375933, "loss": 1.8051, "step": 15155 }, { "epoch": 0.4067196221554315, "grad_norm": 0.3125, "learning_rate": 0.0011613818113446097, "loss": 1.7184, "step": 15156 }, { "epoch": 0.40674645770717044, "grad_norm": 0.306640625, "learning_rate": 0.0011613718916201196, "loss": 1.7474, "step": 15157 }, { "epoch": 0.40677329325890943, "grad_norm": 0.302734375, "learning_rate": 0.0011613619706641448, "loss": 1.6436, "step": 15158 }, { "epoch": 0.40680012881064836, "grad_norm": 0.3125, "learning_rate": 0.0011613520484767069, "loss": 1.724, "step": 15159 }, { "epoch": 0.4068269643623873, "grad_norm": 0.302734375, "learning_rate": 0.001161342125057828, "loss": 1.5902, "step": 15160 }, { "epoch": 0.40685379991412624, "grad_norm": 0.32421875, "learning_rate": 0.0011613322004075293, "loss": 1.8067, "step": 15161 }, { "epoch": 0.40688063546586517, "grad_norm": 0.310546875, "learning_rate": 0.0011613222745258328, "loss": 1.7651, "step": 15162 }, { "epoch": 0.4069074710176041, "grad_norm": 0.318359375, "learning_rate": 0.0011613123474127607, "loss": 1.7432, "step": 15163 }, { "epoch": 0.40693430656934304, "grad_norm": 0.30078125, "learning_rate": 0.0011613024190683344, "loss": 1.6984, "step": 15164 }, { "epoch": 0.40696114212108203, "grad_norm": 0.30859375, "learning_rate": 0.0011612924894925758, "loss": 1.6634, "step": 15165 }, { "epoch": 0.40698797767282097, "grad_norm": 0.30078125, "learning_rate": 0.0011612825586855063, "loss": 1.6253, "step": 15166 }, { "epoch": 0.4070148132245599, "grad_norm": 0.32421875, "learning_rate": 0.0011612726266471483, "loss": 1.7422, "step": 15167 }, { "epoch": 0.40704164877629884, "grad_norm": 0.31640625, "learning_rate": 0.001161262693377523, "loss": 1.7519, "step": 15168 }, { "epoch": 0.40706848432803777, "grad_norm": 0.3125, "learning_rate": 0.001161252758876653, "loss": 1.7368, "step": 15169 }, { "epoch": 0.4070953198797767, "grad_norm": 0.306640625, "learning_rate": 0.001161242823144559, "loss": 1.6842, "step": 15170 }, { "epoch": 0.4071221554315157, "grad_norm": 0.31640625, "learning_rate": 0.0011612328861812637, "loss": 1.7686, "step": 15171 }, { "epoch": 0.40714899098325463, "grad_norm": 0.30859375, "learning_rate": 0.0011612229479867884, "loss": 1.7235, "step": 15172 }, { "epoch": 0.40717582653499357, "grad_norm": 0.306640625, "learning_rate": 0.0011612130085611552, "loss": 1.63, "step": 15173 }, { "epoch": 0.4072026620867325, "grad_norm": 0.298828125, "learning_rate": 0.0011612030679043856, "loss": 1.6484, "step": 15174 }, { "epoch": 0.40722949763847144, "grad_norm": 0.30859375, "learning_rate": 0.0011611931260165017, "loss": 1.7293, "step": 15175 }, { "epoch": 0.4072563331902104, "grad_norm": 0.310546875, "learning_rate": 0.0011611831828975251, "loss": 1.6713, "step": 15176 }, { "epoch": 0.4072831687419493, "grad_norm": 0.3125, "learning_rate": 0.0011611732385474777, "loss": 1.743, "step": 15177 }, { "epoch": 0.4073100042936883, "grad_norm": 0.306640625, "learning_rate": 0.0011611632929663814, "loss": 1.7322, "step": 15178 }, { "epoch": 0.40733683984542723, "grad_norm": 0.3125, "learning_rate": 0.001161153346154258, "loss": 1.7792, "step": 15179 }, { "epoch": 0.40736367539716617, "grad_norm": 0.326171875, "learning_rate": 0.001161143398111129, "loss": 1.7329, "step": 15180 }, { "epoch": 0.4073905109489051, "grad_norm": 0.306640625, "learning_rate": 0.0011611334488370168, "loss": 1.6624, "step": 15181 }, { "epoch": 0.40741734650064404, "grad_norm": 0.328125, "learning_rate": 0.0011611234983319428, "loss": 1.7098, "step": 15182 }, { "epoch": 0.407444182052383, "grad_norm": 0.3046875, "learning_rate": 0.0011611135465959288, "loss": 1.6382, "step": 15183 }, { "epoch": 0.40747101760412197, "grad_norm": 0.318359375, "learning_rate": 0.0011611035936289968, "loss": 1.7506, "step": 15184 }, { "epoch": 0.4074978531558609, "grad_norm": 0.318359375, "learning_rate": 0.0011610936394311686, "loss": 1.7132, "step": 15185 }, { "epoch": 0.40752468870759984, "grad_norm": 0.296875, "learning_rate": 0.001161083684002466, "loss": 1.5391, "step": 15186 }, { "epoch": 0.40755152425933877, "grad_norm": 0.314453125, "learning_rate": 0.001161073727342911, "loss": 1.7574, "step": 15187 }, { "epoch": 0.4075783598110777, "grad_norm": 0.306640625, "learning_rate": 0.0011610637694525253, "loss": 1.6753, "step": 15188 }, { "epoch": 0.40760519536281664, "grad_norm": 0.298828125, "learning_rate": 0.0011610538103313307, "loss": 1.5539, "step": 15189 }, { "epoch": 0.4076320309145556, "grad_norm": 0.306640625, "learning_rate": 0.001161043849979349, "loss": 1.6489, "step": 15190 }, { "epoch": 0.40765886646629457, "grad_norm": 0.3046875, "learning_rate": 0.001161033888396602, "loss": 1.583, "step": 15191 }, { "epoch": 0.4076857020180335, "grad_norm": 0.3203125, "learning_rate": 0.0011610239255831123, "loss": 1.761, "step": 15192 }, { "epoch": 0.40771253756977244, "grad_norm": 0.318359375, "learning_rate": 0.0011610139615389006, "loss": 1.72, "step": 15193 }, { "epoch": 0.4077393731215114, "grad_norm": 0.3203125, "learning_rate": 0.0011610039962639896, "loss": 1.7825, "step": 15194 }, { "epoch": 0.4077662086732503, "grad_norm": 0.30859375, "learning_rate": 0.0011609940297584009, "loss": 1.6389, "step": 15195 }, { "epoch": 0.40779304422498924, "grad_norm": 0.32421875, "learning_rate": 0.0011609840620221564, "loss": 1.8104, "step": 15196 }, { "epoch": 0.40781987977672823, "grad_norm": 0.306640625, "learning_rate": 0.0011609740930552776, "loss": 1.6341, "step": 15197 }, { "epoch": 0.40784671532846717, "grad_norm": 0.29296875, "learning_rate": 0.0011609641228577869, "loss": 1.4933, "step": 15198 }, { "epoch": 0.4078735508802061, "grad_norm": 0.3046875, "learning_rate": 0.001160954151429706, "loss": 1.6865, "step": 15199 }, { "epoch": 0.40790038643194504, "grad_norm": 0.298828125, "learning_rate": 0.0011609441787710567, "loss": 1.5453, "step": 15200 }, { "epoch": 0.407927221983684, "grad_norm": 0.310546875, "learning_rate": 0.0011609342048818608, "loss": 1.6208, "step": 15201 }, { "epoch": 0.4079540575354229, "grad_norm": 0.33203125, "learning_rate": 0.0011609242297621401, "loss": 1.7995, "step": 15202 }, { "epoch": 0.40798089308716184, "grad_norm": 0.3125, "learning_rate": 0.001160914253411917, "loss": 1.6931, "step": 15203 }, { "epoch": 0.40800772863890084, "grad_norm": 0.31640625, "learning_rate": 0.0011609042758312128, "loss": 1.636, "step": 15204 }, { "epoch": 0.40803456419063977, "grad_norm": 0.30859375, "learning_rate": 0.0011608942970200498, "loss": 1.6197, "step": 15205 }, { "epoch": 0.4080613997423787, "grad_norm": 0.306640625, "learning_rate": 0.0011608843169784498, "loss": 1.6627, "step": 15206 }, { "epoch": 0.40808823529411764, "grad_norm": 0.30859375, "learning_rate": 0.0011608743357064345, "loss": 1.7394, "step": 15207 }, { "epoch": 0.4081150708458566, "grad_norm": 0.3203125, "learning_rate": 0.001160864353204026, "loss": 1.8171, "step": 15208 }, { "epoch": 0.4081419063975955, "grad_norm": 0.306640625, "learning_rate": 0.0011608543694712461, "loss": 1.6224, "step": 15209 }, { "epoch": 0.4081687419493345, "grad_norm": 0.310546875, "learning_rate": 0.0011608443845081168, "loss": 1.7166, "step": 15210 }, { "epoch": 0.40819557750107344, "grad_norm": 0.310546875, "learning_rate": 0.0011608343983146598, "loss": 1.7321, "step": 15211 }, { "epoch": 0.40822241305281237, "grad_norm": 0.30859375, "learning_rate": 0.0011608244108908972, "loss": 1.6675, "step": 15212 }, { "epoch": 0.4082492486045513, "grad_norm": 0.3046875, "learning_rate": 0.0011608144222368508, "loss": 1.6028, "step": 15213 }, { "epoch": 0.40827608415629024, "grad_norm": 0.302734375, "learning_rate": 0.0011608044323525423, "loss": 1.6288, "step": 15214 }, { "epoch": 0.4083029197080292, "grad_norm": 0.3046875, "learning_rate": 0.0011607944412379944, "loss": 1.6259, "step": 15215 }, { "epoch": 0.40832975525976817, "grad_norm": 0.3125, "learning_rate": 0.0011607844488932281, "loss": 1.6509, "step": 15216 }, { "epoch": 0.4083565908115071, "grad_norm": 0.302734375, "learning_rate": 0.0011607744553182658, "loss": 1.6224, "step": 15217 }, { "epoch": 0.40838342636324604, "grad_norm": 0.322265625, "learning_rate": 0.0011607644605131293, "loss": 1.6723, "step": 15218 }, { "epoch": 0.408410261914985, "grad_norm": 0.3125, "learning_rate": 0.0011607544644778405, "loss": 1.7057, "step": 15219 }, { "epoch": 0.4084370974667239, "grad_norm": 0.318359375, "learning_rate": 0.0011607444672124213, "loss": 1.7275, "step": 15220 }, { "epoch": 0.40846393301846284, "grad_norm": 0.3125, "learning_rate": 0.0011607344687168941, "loss": 1.6519, "step": 15221 }, { "epoch": 0.4084907685702018, "grad_norm": 0.330078125, "learning_rate": 0.0011607244689912802, "loss": 1.8392, "step": 15222 }, { "epoch": 0.40851760412194077, "grad_norm": 0.3046875, "learning_rate": 0.0011607144680356018, "loss": 1.6025, "step": 15223 }, { "epoch": 0.4085444396736797, "grad_norm": 0.3125, "learning_rate": 0.0011607044658498808, "loss": 1.687, "step": 15224 }, { "epoch": 0.40857127522541864, "grad_norm": 0.302734375, "learning_rate": 0.0011606944624341393, "loss": 1.6005, "step": 15225 }, { "epoch": 0.4085981107771576, "grad_norm": 0.3203125, "learning_rate": 0.001160684457788399, "loss": 1.7487, "step": 15226 }, { "epoch": 0.4086249463288965, "grad_norm": 0.3203125, "learning_rate": 0.001160674451912682, "loss": 1.8203, "step": 15227 }, { "epoch": 0.40865178188063545, "grad_norm": 0.31640625, "learning_rate": 0.00116066444480701, "loss": 1.6812, "step": 15228 }, { "epoch": 0.40867861743237444, "grad_norm": 0.322265625, "learning_rate": 0.0011606544364714051, "loss": 1.7479, "step": 15229 }, { "epoch": 0.40870545298411337, "grad_norm": 0.31640625, "learning_rate": 0.0011606444269058897, "loss": 1.685, "step": 15230 }, { "epoch": 0.4087322885358523, "grad_norm": 0.29296875, "learning_rate": 0.0011606344161104853, "loss": 1.4826, "step": 15231 }, { "epoch": 0.40875912408759124, "grad_norm": 0.3125, "learning_rate": 0.0011606244040852137, "loss": 1.7253, "step": 15232 }, { "epoch": 0.4087859596393302, "grad_norm": 0.3125, "learning_rate": 0.0011606143908300972, "loss": 1.6894, "step": 15233 }, { "epoch": 0.4088127951910691, "grad_norm": 0.310546875, "learning_rate": 0.0011606043763451577, "loss": 1.6623, "step": 15234 }, { "epoch": 0.40883963074280805, "grad_norm": 0.306640625, "learning_rate": 0.0011605943606304173, "loss": 1.6173, "step": 15235 }, { "epoch": 0.40886646629454704, "grad_norm": 0.314453125, "learning_rate": 0.0011605843436858975, "loss": 1.6492, "step": 15236 }, { "epoch": 0.408893301846286, "grad_norm": 0.30859375, "learning_rate": 0.0011605743255116206, "loss": 1.6382, "step": 15237 }, { "epoch": 0.4089201373980249, "grad_norm": 0.298828125, "learning_rate": 0.0011605643061076087, "loss": 1.5622, "step": 15238 }, { "epoch": 0.40894697294976384, "grad_norm": 0.318359375, "learning_rate": 0.0011605542854738837, "loss": 1.7415, "step": 15239 }, { "epoch": 0.4089738085015028, "grad_norm": 0.3203125, "learning_rate": 0.0011605442636104671, "loss": 1.72, "step": 15240 }, { "epoch": 0.4090006440532417, "grad_norm": 0.31640625, "learning_rate": 0.0011605342405173818, "loss": 1.5547, "step": 15241 }, { "epoch": 0.4090274796049807, "grad_norm": 0.3125, "learning_rate": 0.0011605242161946492, "loss": 1.6459, "step": 15242 }, { "epoch": 0.40905431515671964, "grad_norm": 0.30078125, "learning_rate": 0.001160514190642291, "loss": 1.5427, "step": 15243 }, { "epoch": 0.4090811507084586, "grad_norm": 0.314453125, "learning_rate": 0.00116050416386033, "loss": 1.6421, "step": 15244 }, { "epoch": 0.4091079862601975, "grad_norm": 0.314453125, "learning_rate": 0.0011604941358487875, "loss": 1.7174, "step": 15245 }, { "epoch": 0.40913482181193644, "grad_norm": 0.314453125, "learning_rate": 0.0011604841066076858, "loss": 1.695, "step": 15246 }, { "epoch": 0.4091616573636754, "grad_norm": 0.31640625, "learning_rate": 0.001160474076137047, "loss": 1.6729, "step": 15247 }, { "epoch": 0.4091884929154143, "grad_norm": 0.326171875, "learning_rate": 0.001160464044436893, "loss": 1.7359, "step": 15248 }, { "epoch": 0.4092153284671533, "grad_norm": 0.306640625, "learning_rate": 0.0011604540115072457, "loss": 1.5186, "step": 15249 }, { "epoch": 0.40924216401889224, "grad_norm": 0.322265625, "learning_rate": 0.001160443977348127, "loss": 1.7848, "step": 15250 }, { "epoch": 0.4092689995706312, "grad_norm": 0.318359375, "learning_rate": 0.0011604339419595594, "loss": 1.7435, "step": 15251 }, { "epoch": 0.4092958351223701, "grad_norm": 0.310546875, "learning_rate": 0.0011604239053415644, "loss": 1.5189, "step": 15252 }, { "epoch": 0.40932267067410905, "grad_norm": 0.310546875, "learning_rate": 0.0011604138674941645, "loss": 1.6185, "step": 15253 }, { "epoch": 0.409349506225848, "grad_norm": 0.3203125, "learning_rate": 0.001160403828417381, "loss": 1.6124, "step": 15254 }, { "epoch": 0.40937634177758697, "grad_norm": 0.3203125, "learning_rate": 0.0011603937881112367, "loss": 1.6229, "step": 15255 }, { "epoch": 0.4094031773293259, "grad_norm": 0.32421875, "learning_rate": 0.0011603837465757533, "loss": 1.6161, "step": 15256 }, { "epoch": 0.40943001288106484, "grad_norm": 0.322265625, "learning_rate": 0.0011603737038109528, "loss": 1.6747, "step": 15257 }, { "epoch": 0.4094568484328038, "grad_norm": 0.32421875, "learning_rate": 0.0011603636598168572, "loss": 1.6616, "step": 15258 }, { "epoch": 0.4094836839845427, "grad_norm": 0.3046875, "learning_rate": 0.0011603536145934886, "loss": 1.5193, "step": 15259 }, { "epoch": 0.40951051953628165, "grad_norm": 0.322265625, "learning_rate": 0.001160343568140869, "loss": 1.6431, "step": 15260 }, { "epoch": 0.4095373550880206, "grad_norm": 0.30859375, "learning_rate": 0.0011603335204590207, "loss": 1.6011, "step": 15261 }, { "epoch": 0.4095641906397596, "grad_norm": 0.30859375, "learning_rate": 0.0011603234715479652, "loss": 1.5705, "step": 15262 }, { "epoch": 0.4095910261914985, "grad_norm": 0.310546875, "learning_rate": 0.001160313421407725, "loss": 1.6898, "step": 15263 }, { "epoch": 0.40961786174323744, "grad_norm": 0.330078125, "learning_rate": 0.001160303370038322, "loss": 1.7641, "step": 15264 }, { "epoch": 0.4096446972949764, "grad_norm": 0.3359375, "learning_rate": 0.0011602933174397782, "loss": 1.7746, "step": 15265 }, { "epoch": 0.4096715328467153, "grad_norm": 0.326171875, "learning_rate": 0.0011602832636121157, "loss": 1.7303, "step": 15266 }, { "epoch": 0.40969836839845425, "grad_norm": 0.30859375, "learning_rate": 0.0011602732085553567, "loss": 1.6139, "step": 15267 }, { "epoch": 0.40972520395019324, "grad_norm": 0.3046875, "learning_rate": 0.0011602631522695229, "loss": 1.6158, "step": 15268 }, { "epoch": 0.4097520395019322, "grad_norm": 0.314453125, "learning_rate": 0.0011602530947546366, "loss": 1.6776, "step": 15269 }, { "epoch": 0.4097788750536711, "grad_norm": 0.31640625, "learning_rate": 0.0011602430360107198, "loss": 1.6478, "step": 15270 }, { "epoch": 0.40980571060541005, "grad_norm": 0.3125, "learning_rate": 0.0011602329760377948, "loss": 1.597, "step": 15271 }, { "epoch": 0.409832546157149, "grad_norm": 0.3203125, "learning_rate": 0.0011602229148358835, "loss": 1.6326, "step": 15272 }, { "epoch": 0.4098593817088879, "grad_norm": 0.33203125, "learning_rate": 0.0011602128524050078, "loss": 1.7548, "step": 15273 }, { "epoch": 0.4098862172606269, "grad_norm": 0.3203125, "learning_rate": 0.00116020278874519, "loss": 1.639, "step": 15274 }, { "epoch": 0.40991305281236584, "grad_norm": 0.306640625, "learning_rate": 0.001160192723856452, "loss": 1.5899, "step": 15275 }, { "epoch": 0.4099398883641048, "grad_norm": 0.310546875, "learning_rate": 0.001160182657738816, "loss": 1.6148, "step": 15276 }, { "epoch": 0.4099667239158437, "grad_norm": 0.326171875, "learning_rate": 0.001160172590392304, "loss": 1.7201, "step": 15277 }, { "epoch": 0.40999355946758265, "grad_norm": 0.326171875, "learning_rate": 0.0011601625218169381, "loss": 1.6776, "step": 15278 }, { "epoch": 0.4100203950193216, "grad_norm": 0.314453125, "learning_rate": 0.0011601524520127408, "loss": 1.5897, "step": 15279 }, { "epoch": 0.4100472305710605, "grad_norm": 0.3203125, "learning_rate": 0.0011601423809797335, "loss": 1.5658, "step": 15280 }, { "epoch": 0.4100740661227995, "grad_norm": 0.326171875, "learning_rate": 0.0011601323087179387, "loss": 1.6257, "step": 15281 }, { "epoch": 0.41010090167453844, "grad_norm": 0.3125, "learning_rate": 0.0011601222352273784, "loss": 1.606, "step": 15282 }, { "epoch": 0.4101277372262774, "grad_norm": 0.31640625, "learning_rate": 0.0011601121605080747, "loss": 1.5935, "step": 15283 }, { "epoch": 0.4101545727780163, "grad_norm": 0.30859375, "learning_rate": 0.0011601020845600496, "loss": 1.6402, "step": 15284 }, { "epoch": 0.41018140832975525, "grad_norm": 0.330078125, "learning_rate": 0.0011600920073833254, "loss": 1.7276, "step": 15285 }, { "epoch": 0.4102082438814942, "grad_norm": 0.306640625, "learning_rate": 0.0011600819289779242, "loss": 1.5599, "step": 15286 }, { "epoch": 0.4102350794332332, "grad_norm": 0.318359375, "learning_rate": 0.001160071849343868, "loss": 1.5719, "step": 15287 }, { "epoch": 0.4102619149849721, "grad_norm": 0.3046875, "learning_rate": 0.0011600617684811789, "loss": 1.4846, "step": 15288 }, { "epoch": 0.41028875053671104, "grad_norm": 0.3203125, "learning_rate": 0.001160051686389879, "loss": 1.6656, "step": 15289 }, { "epoch": 0.41031558608845, "grad_norm": 0.314453125, "learning_rate": 0.0011600416030699905, "loss": 1.6003, "step": 15290 }, { "epoch": 0.4103424216401889, "grad_norm": 0.30859375, "learning_rate": 0.0011600315185215356, "loss": 1.5744, "step": 15291 }, { "epoch": 0.41036925719192785, "grad_norm": 0.306640625, "learning_rate": 0.0011600214327445362, "loss": 1.5603, "step": 15292 }, { "epoch": 0.4103960927436668, "grad_norm": 0.314453125, "learning_rate": 0.0011600113457390145, "loss": 1.5953, "step": 15293 }, { "epoch": 0.4104229282954058, "grad_norm": 0.306640625, "learning_rate": 0.0011600012575049927, "loss": 1.5933, "step": 15294 }, { "epoch": 0.4104497638471447, "grad_norm": 0.330078125, "learning_rate": 0.0011599911680424929, "loss": 1.6462, "step": 15295 }, { "epoch": 0.41047659939888365, "grad_norm": 0.330078125, "learning_rate": 0.0011599810773515373, "loss": 1.6849, "step": 15296 }, { "epoch": 0.4105034349506226, "grad_norm": 0.314453125, "learning_rate": 0.001159970985432148, "loss": 1.5676, "step": 15297 }, { "epoch": 0.4105302705023615, "grad_norm": 0.322265625, "learning_rate": 0.001159960892284347, "loss": 1.6694, "step": 15298 }, { "epoch": 0.41055710605410045, "grad_norm": 0.30859375, "learning_rate": 0.0011599507979081565, "loss": 1.5852, "step": 15299 }, { "epoch": 0.41058394160583944, "grad_norm": 0.328125, "learning_rate": 0.0011599407023035989, "loss": 1.6746, "step": 15300 }, { "epoch": 0.4106107771575784, "grad_norm": 0.3125, "learning_rate": 0.001159930605470696, "loss": 1.5052, "step": 15301 }, { "epoch": 0.4106376127093173, "grad_norm": 0.298828125, "learning_rate": 0.00115992050740947, "loss": 1.4514, "step": 15302 }, { "epoch": 0.41066444826105625, "grad_norm": 0.328125, "learning_rate": 0.0011599104081199431, "loss": 1.6151, "step": 15303 }, { "epoch": 0.4106912838127952, "grad_norm": 0.32421875, "learning_rate": 0.0011599003076021377, "loss": 1.6294, "step": 15304 }, { "epoch": 0.4107181193645341, "grad_norm": 0.326171875, "learning_rate": 0.0011598902058560757, "loss": 1.5765, "step": 15305 }, { "epoch": 0.41074495491627305, "grad_norm": 0.32421875, "learning_rate": 0.0011598801028817791, "loss": 1.6885, "step": 15306 }, { "epoch": 0.41077179046801204, "grad_norm": 0.3125, "learning_rate": 0.0011598699986792706, "loss": 1.4726, "step": 15307 }, { "epoch": 0.410798626019751, "grad_norm": 0.3359375, "learning_rate": 0.0011598598932485717, "loss": 1.697, "step": 15308 }, { "epoch": 0.4108254615714899, "grad_norm": 0.318359375, "learning_rate": 0.0011598497865897052, "loss": 1.6124, "step": 15309 }, { "epoch": 0.41085229712322885, "grad_norm": 0.310546875, "learning_rate": 0.001159839678702693, "loss": 1.5088, "step": 15310 }, { "epoch": 0.4108791326749678, "grad_norm": 0.3125, "learning_rate": 0.001159829569587557, "loss": 1.6161, "step": 15311 }, { "epoch": 0.4109059682267067, "grad_norm": 0.326171875, "learning_rate": 0.0011598194592443197, "loss": 1.6085, "step": 15312 }, { "epoch": 0.4109328037784457, "grad_norm": 0.328125, "learning_rate": 0.0011598093476730032, "loss": 1.5659, "step": 15313 }, { "epoch": 0.41095963933018465, "grad_norm": 0.318359375, "learning_rate": 0.0011597992348736299, "loss": 1.5749, "step": 15314 }, { "epoch": 0.4109864748819236, "grad_norm": 0.318359375, "learning_rate": 0.0011597891208462215, "loss": 1.5484, "step": 15315 }, { "epoch": 0.4110133104336625, "grad_norm": 0.322265625, "learning_rate": 0.0011597790055908007, "loss": 1.5636, "step": 15316 }, { "epoch": 0.41104014598540145, "grad_norm": 0.3203125, "learning_rate": 0.001159768889107389, "loss": 1.6228, "step": 15317 }, { "epoch": 0.4110669815371404, "grad_norm": 0.3203125, "learning_rate": 0.0011597587713960095, "loss": 1.5848, "step": 15318 }, { "epoch": 0.4110938170888793, "grad_norm": 0.322265625, "learning_rate": 0.001159748652456684, "loss": 1.5898, "step": 15319 }, { "epoch": 0.4111206526406183, "grad_norm": 0.31640625, "learning_rate": 0.0011597385322894343, "loss": 1.638, "step": 15320 }, { "epoch": 0.41114748819235725, "grad_norm": 0.30859375, "learning_rate": 0.0011597284108942833, "loss": 1.5012, "step": 15321 }, { "epoch": 0.4111743237440962, "grad_norm": 0.337890625, "learning_rate": 0.0011597182882712525, "loss": 1.7665, "step": 15322 }, { "epoch": 0.4112011592958351, "grad_norm": 0.328125, "learning_rate": 0.0011597081644203644, "loss": 1.6554, "step": 15323 }, { "epoch": 0.41122799484757405, "grad_norm": 0.3125, "learning_rate": 0.0011596980393416415, "loss": 1.5403, "step": 15324 }, { "epoch": 0.411254830399313, "grad_norm": 0.31640625, "learning_rate": 0.0011596879130351058, "loss": 1.5642, "step": 15325 }, { "epoch": 0.411281665951052, "grad_norm": 0.330078125, "learning_rate": 0.0011596777855007792, "loss": 1.5764, "step": 15326 }, { "epoch": 0.4113085015027909, "grad_norm": 0.328125, "learning_rate": 0.0011596676567386846, "loss": 1.7088, "step": 15327 }, { "epoch": 0.41133533705452985, "grad_norm": 0.37890625, "learning_rate": 0.0011596575267488435, "loss": 1.7185, "step": 15328 }, { "epoch": 0.4113621726062688, "grad_norm": 0.380859375, "learning_rate": 0.0011596473955312787, "loss": 1.8535, "step": 15329 }, { "epoch": 0.4113890081580077, "grad_norm": 0.349609375, "learning_rate": 0.0011596372630860118, "loss": 1.7803, "step": 15330 }, { "epoch": 0.41141584370974665, "grad_norm": 0.330078125, "learning_rate": 0.0011596271294130656, "loss": 1.8193, "step": 15331 }, { "epoch": 0.4114426792614856, "grad_norm": 0.32421875, "learning_rate": 0.0011596169945124622, "loss": 1.8627, "step": 15332 }, { "epoch": 0.4114695148132246, "grad_norm": 0.353515625, "learning_rate": 0.0011596068583842236, "loss": 2.0421, "step": 15333 }, { "epoch": 0.4114963503649635, "grad_norm": 0.32421875, "learning_rate": 0.0011595967210283724, "loss": 1.8996, "step": 15334 }, { "epoch": 0.41152318591670245, "grad_norm": 0.31640625, "learning_rate": 0.0011595865824449304, "loss": 1.7892, "step": 15335 }, { "epoch": 0.4115500214684414, "grad_norm": 0.3203125, "learning_rate": 0.00115957644263392, "loss": 1.9555, "step": 15336 }, { "epoch": 0.4115768570201803, "grad_norm": 0.32421875, "learning_rate": 0.0011595663015953638, "loss": 1.8794, "step": 15337 }, { "epoch": 0.41160369257191926, "grad_norm": 0.310546875, "learning_rate": 0.0011595561593292837, "loss": 1.7487, "step": 15338 }, { "epoch": 0.41163052812365825, "grad_norm": 0.322265625, "learning_rate": 0.001159546015835702, "loss": 1.9818, "step": 15339 }, { "epoch": 0.4116573636753972, "grad_norm": 0.30859375, "learning_rate": 0.0011595358711146409, "loss": 1.6952, "step": 15340 }, { "epoch": 0.4116841992271361, "grad_norm": 0.330078125, "learning_rate": 0.0011595257251661227, "loss": 1.8899, "step": 15341 }, { "epoch": 0.41171103477887505, "grad_norm": 0.32421875, "learning_rate": 0.0011595155779901697, "loss": 1.8715, "step": 15342 }, { "epoch": 0.411737870330614, "grad_norm": 0.326171875, "learning_rate": 0.0011595054295868042, "loss": 1.8614, "step": 15343 }, { "epoch": 0.4117647058823529, "grad_norm": 0.3125, "learning_rate": 0.0011594952799560483, "loss": 1.8321, "step": 15344 }, { "epoch": 0.4117915414340919, "grad_norm": 0.310546875, "learning_rate": 0.0011594851290979244, "loss": 1.7897, "step": 15345 }, { "epoch": 0.41181837698583085, "grad_norm": 0.314453125, "learning_rate": 0.0011594749770124546, "loss": 1.8239, "step": 15346 }, { "epoch": 0.4118452125375698, "grad_norm": 0.31640625, "learning_rate": 0.0011594648236996615, "loss": 1.7266, "step": 15347 }, { "epoch": 0.4118720480893087, "grad_norm": 0.310546875, "learning_rate": 0.0011594546691595673, "loss": 1.7619, "step": 15348 }, { "epoch": 0.41189888364104765, "grad_norm": 0.306640625, "learning_rate": 0.0011594445133921937, "loss": 1.7442, "step": 15349 }, { "epoch": 0.4119257191927866, "grad_norm": 0.318359375, "learning_rate": 0.0011594343563975638, "loss": 1.9003, "step": 15350 }, { "epoch": 0.4119525547445255, "grad_norm": 0.31640625, "learning_rate": 0.0011594241981756994, "loss": 1.8607, "step": 15351 }, { "epoch": 0.4119793902962645, "grad_norm": 0.3125, "learning_rate": 0.001159414038726623, "loss": 1.8513, "step": 15352 }, { "epoch": 0.41200622584800345, "grad_norm": 0.333984375, "learning_rate": 0.0011594038780503564, "loss": 1.9466, "step": 15353 }, { "epoch": 0.4120330613997424, "grad_norm": 0.314453125, "learning_rate": 0.0011593937161469227, "loss": 1.8299, "step": 15354 }, { "epoch": 0.4120598969514813, "grad_norm": 0.31640625, "learning_rate": 0.0011593835530163436, "loss": 1.8732, "step": 15355 }, { "epoch": 0.41208673250322025, "grad_norm": 0.30859375, "learning_rate": 0.0011593733886586415, "loss": 1.8395, "step": 15356 }, { "epoch": 0.4121135680549592, "grad_norm": 0.30859375, "learning_rate": 0.0011593632230738388, "loss": 1.8432, "step": 15357 }, { "epoch": 0.4121404036066982, "grad_norm": 0.3046875, "learning_rate": 0.0011593530562619577, "loss": 1.7004, "step": 15358 }, { "epoch": 0.4121672391584371, "grad_norm": 0.314453125, "learning_rate": 0.0011593428882230208, "loss": 1.7536, "step": 15359 }, { "epoch": 0.41219407471017605, "grad_norm": 0.314453125, "learning_rate": 0.0011593327189570498, "loss": 1.7597, "step": 15360 }, { "epoch": 0.412220910261915, "grad_norm": 0.314453125, "learning_rate": 0.0011593225484640675, "loss": 1.8333, "step": 15361 }, { "epoch": 0.4122477458136539, "grad_norm": 0.306640625, "learning_rate": 0.0011593123767440962, "loss": 1.7112, "step": 15362 }, { "epoch": 0.41227458136539286, "grad_norm": 0.294921875, "learning_rate": 0.001159302203797158, "loss": 1.6885, "step": 15363 }, { "epoch": 0.4123014169171318, "grad_norm": 0.31640625, "learning_rate": 0.0011592920296232755, "loss": 1.8411, "step": 15364 }, { "epoch": 0.4123282524688708, "grad_norm": 0.30859375, "learning_rate": 0.0011592818542224706, "loss": 1.7869, "step": 15365 }, { "epoch": 0.4123550880206097, "grad_norm": 0.310546875, "learning_rate": 0.001159271677594766, "loss": 1.7699, "step": 15366 }, { "epoch": 0.41238192357234865, "grad_norm": 0.3046875, "learning_rate": 0.0011592614997401837, "loss": 1.6928, "step": 15367 }, { "epoch": 0.4124087591240876, "grad_norm": 0.3203125, "learning_rate": 0.0011592513206587465, "loss": 1.7753, "step": 15368 }, { "epoch": 0.4124355946758265, "grad_norm": 0.30859375, "learning_rate": 0.0011592411403504763, "loss": 1.7494, "step": 15369 }, { "epoch": 0.41246243022756546, "grad_norm": 0.306640625, "learning_rate": 0.0011592309588153954, "loss": 1.6119, "step": 15370 }, { "epoch": 0.41248926577930445, "grad_norm": 0.328125, "learning_rate": 0.0011592207760535264, "loss": 1.8571, "step": 15371 }, { "epoch": 0.4125161013310434, "grad_norm": 0.306640625, "learning_rate": 0.0011592105920648916, "loss": 1.6849, "step": 15372 }, { "epoch": 0.4125429368827823, "grad_norm": 0.3046875, "learning_rate": 0.0011592004068495135, "loss": 1.7348, "step": 15373 }, { "epoch": 0.41256977243452125, "grad_norm": 0.314453125, "learning_rate": 0.001159190220407414, "loss": 1.704, "step": 15374 }, { "epoch": 0.4125966079862602, "grad_norm": 0.322265625, "learning_rate": 0.0011591800327386155, "loss": 1.805, "step": 15375 }, { "epoch": 0.4126234435379991, "grad_norm": 0.322265625, "learning_rate": 0.0011591698438431408, "loss": 1.8822, "step": 15376 }, { "epoch": 0.41265027908973806, "grad_norm": 0.3046875, "learning_rate": 0.0011591596537210117, "loss": 1.6383, "step": 15377 }, { "epoch": 0.41267711464147705, "grad_norm": 0.302734375, "learning_rate": 0.001159149462372251, "loss": 1.7479, "step": 15378 }, { "epoch": 0.412703950193216, "grad_norm": 0.30859375, "learning_rate": 0.001159139269796881, "loss": 1.7213, "step": 15379 }, { "epoch": 0.4127307857449549, "grad_norm": 0.29296875, "learning_rate": 0.0011591290759949237, "loss": 1.6257, "step": 15380 }, { "epoch": 0.41275762129669386, "grad_norm": 0.314453125, "learning_rate": 0.0011591188809664018, "loss": 1.812, "step": 15381 }, { "epoch": 0.4127844568484328, "grad_norm": 0.30078125, "learning_rate": 0.0011591086847113376, "loss": 1.7038, "step": 15382 }, { "epoch": 0.4128112924001717, "grad_norm": 0.298828125, "learning_rate": 0.0011590984872297534, "loss": 1.6387, "step": 15383 }, { "epoch": 0.4128381279519107, "grad_norm": 0.314453125, "learning_rate": 0.0011590882885216718, "loss": 1.7675, "step": 15384 }, { "epoch": 0.41286496350364965, "grad_norm": 0.318359375, "learning_rate": 0.0011590780885871149, "loss": 1.8354, "step": 15385 }, { "epoch": 0.4128917990553886, "grad_norm": 0.3046875, "learning_rate": 0.001159067887426105, "loss": 1.663, "step": 15386 }, { "epoch": 0.4129186346071275, "grad_norm": 0.314453125, "learning_rate": 0.0011590576850386645, "loss": 1.828, "step": 15387 }, { "epoch": 0.41294547015886646, "grad_norm": 0.3125, "learning_rate": 0.0011590474814248163, "loss": 1.7951, "step": 15388 }, { "epoch": 0.4129723057106054, "grad_norm": 0.3125, "learning_rate": 0.0011590372765845822, "loss": 1.7509, "step": 15389 }, { "epoch": 0.4129991412623443, "grad_norm": 0.30078125, "learning_rate": 0.0011590270705179847, "loss": 1.6831, "step": 15390 }, { "epoch": 0.4130259768140833, "grad_norm": 0.322265625, "learning_rate": 0.0011590168632250465, "loss": 1.7986, "step": 15391 }, { "epoch": 0.41305281236582225, "grad_norm": 0.3125, "learning_rate": 0.0011590066547057896, "loss": 1.8378, "step": 15392 }, { "epoch": 0.4130796479175612, "grad_norm": 0.3046875, "learning_rate": 0.0011589964449602367, "loss": 1.705, "step": 15393 }, { "epoch": 0.4131064834693001, "grad_norm": 0.310546875, "learning_rate": 0.00115898623398841, "loss": 1.6582, "step": 15394 }, { "epoch": 0.41313331902103906, "grad_norm": 0.30859375, "learning_rate": 0.0011589760217903318, "loss": 1.7273, "step": 15395 }, { "epoch": 0.413160154572778, "grad_norm": 0.314453125, "learning_rate": 0.0011589658083660248, "loss": 1.7564, "step": 15396 }, { "epoch": 0.413186990124517, "grad_norm": 0.3046875, "learning_rate": 0.0011589555937155114, "loss": 1.765, "step": 15397 }, { "epoch": 0.4132138256762559, "grad_norm": 0.318359375, "learning_rate": 0.0011589453778388138, "loss": 1.829, "step": 15398 }, { "epoch": 0.41324066122799485, "grad_norm": 0.322265625, "learning_rate": 0.0011589351607359543, "loss": 1.8029, "step": 15399 }, { "epoch": 0.4132674967797338, "grad_norm": 0.314453125, "learning_rate": 0.0011589249424069556, "loss": 1.7419, "step": 15400 }, { "epoch": 0.4132943323314727, "grad_norm": 0.3203125, "learning_rate": 0.00115891472285184, "loss": 1.8063, "step": 15401 }, { "epoch": 0.41332116788321166, "grad_norm": 0.30859375, "learning_rate": 0.00115890450207063, "loss": 1.6772, "step": 15402 }, { "epoch": 0.4133480034349506, "grad_norm": 0.3046875, "learning_rate": 0.0011588942800633479, "loss": 1.6571, "step": 15403 }, { "epoch": 0.4133748389866896, "grad_norm": 0.31640625, "learning_rate": 0.0011588840568300164, "loss": 1.7394, "step": 15404 }, { "epoch": 0.4134016745384285, "grad_norm": 0.31640625, "learning_rate": 0.0011588738323706574, "loss": 1.7462, "step": 15405 }, { "epoch": 0.41342851009016746, "grad_norm": 0.318359375, "learning_rate": 0.0011588636066852936, "loss": 1.7811, "step": 15406 }, { "epoch": 0.4134553456419064, "grad_norm": 0.322265625, "learning_rate": 0.0011588533797739478, "loss": 1.7341, "step": 15407 }, { "epoch": 0.4134821811936453, "grad_norm": 0.314453125, "learning_rate": 0.0011588431516366417, "loss": 1.6964, "step": 15408 }, { "epoch": 0.41350901674538426, "grad_norm": 0.322265625, "learning_rate": 0.0011588329222733985, "loss": 1.7472, "step": 15409 }, { "epoch": 0.41353585229712325, "grad_norm": 0.296875, "learning_rate": 0.0011588226916842402, "loss": 1.5941, "step": 15410 }, { "epoch": 0.4135626878488622, "grad_norm": 0.306640625, "learning_rate": 0.0011588124598691891, "loss": 1.7292, "step": 15411 }, { "epoch": 0.4135895234006011, "grad_norm": 0.296875, "learning_rate": 0.001158802226828268, "loss": 1.6026, "step": 15412 }, { "epoch": 0.41361635895234006, "grad_norm": 0.3046875, "learning_rate": 0.0011587919925614993, "loss": 1.7322, "step": 15413 }, { "epoch": 0.413643194504079, "grad_norm": 0.30078125, "learning_rate": 0.0011587817570689054, "loss": 1.6639, "step": 15414 }, { "epoch": 0.4136700300558179, "grad_norm": 0.326171875, "learning_rate": 0.0011587715203505086, "loss": 1.8187, "step": 15415 }, { "epoch": 0.4136968656075569, "grad_norm": 0.310546875, "learning_rate": 0.0011587612824063315, "loss": 1.7588, "step": 15416 }, { "epoch": 0.41372370115929585, "grad_norm": 0.29296875, "learning_rate": 0.0011587510432363966, "loss": 1.6059, "step": 15417 }, { "epoch": 0.4137505367110348, "grad_norm": 0.31640625, "learning_rate": 0.0011587408028407263, "loss": 1.7417, "step": 15418 }, { "epoch": 0.4137773722627737, "grad_norm": 0.306640625, "learning_rate": 0.0011587305612193429, "loss": 1.6628, "step": 15419 }, { "epoch": 0.41380420781451266, "grad_norm": 0.30859375, "learning_rate": 0.0011587203183722689, "loss": 1.7387, "step": 15420 }, { "epoch": 0.4138310433662516, "grad_norm": 0.296875, "learning_rate": 0.0011587100742995273, "loss": 1.546, "step": 15421 }, { "epoch": 0.41385787891799053, "grad_norm": 0.30078125, "learning_rate": 0.00115869982900114, "loss": 1.6433, "step": 15422 }, { "epoch": 0.4138847144697295, "grad_norm": 0.3125, "learning_rate": 0.0011586895824771295, "loss": 1.6904, "step": 15423 }, { "epoch": 0.41391155002146846, "grad_norm": 0.314453125, "learning_rate": 0.0011586793347275188, "loss": 1.7253, "step": 15424 }, { "epoch": 0.4139383855732074, "grad_norm": 0.31640625, "learning_rate": 0.0011586690857523297, "loss": 1.7903, "step": 15425 }, { "epoch": 0.4139652211249463, "grad_norm": 0.32421875, "learning_rate": 0.001158658835551585, "loss": 1.8064, "step": 15426 }, { "epoch": 0.41399205667668526, "grad_norm": 0.302734375, "learning_rate": 0.0011586485841253073, "loss": 1.6903, "step": 15427 }, { "epoch": 0.4140188922284242, "grad_norm": 0.30078125, "learning_rate": 0.001158638331473519, "loss": 1.5939, "step": 15428 }, { "epoch": 0.4140457277801632, "grad_norm": 0.3046875, "learning_rate": 0.0011586280775962422, "loss": 1.7245, "step": 15429 }, { "epoch": 0.4140725633319021, "grad_norm": 0.310546875, "learning_rate": 0.0011586178224935002, "loss": 1.7721, "step": 15430 }, { "epoch": 0.41409939888364106, "grad_norm": 0.302734375, "learning_rate": 0.0011586075661653148, "loss": 1.7038, "step": 15431 }, { "epoch": 0.41412623443538, "grad_norm": 0.30859375, "learning_rate": 0.0011585973086117088, "loss": 1.6838, "step": 15432 }, { "epoch": 0.4141530699871189, "grad_norm": 0.30859375, "learning_rate": 0.0011585870498327048, "loss": 1.6614, "step": 15433 }, { "epoch": 0.41417990553885786, "grad_norm": 0.30859375, "learning_rate": 0.0011585767898283248, "loss": 1.7655, "step": 15434 }, { "epoch": 0.4142067410905968, "grad_norm": 0.314453125, "learning_rate": 0.0011585665285985922, "loss": 1.7468, "step": 15435 }, { "epoch": 0.4142335766423358, "grad_norm": 0.31640625, "learning_rate": 0.0011585562661435285, "loss": 1.717, "step": 15436 }, { "epoch": 0.4142604121940747, "grad_norm": 0.30859375, "learning_rate": 0.0011585460024631567, "loss": 1.6693, "step": 15437 }, { "epoch": 0.41428724774581366, "grad_norm": 0.310546875, "learning_rate": 0.0011585357375574995, "loss": 1.7497, "step": 15438 }, { "epoch": 0.4143140832975526, "grad_norm": 0.3125, "learning_rate": 0.001158525471426579, "loss": 1.7466, "step": 15439 }, { "epoch": 0.41434091884929153, "grad_norm": 0.30859375, "learning_rate": 0.0011585152040704183, "loss": 1.6726, "step": 15440 }, { "epoch": 0.41436775440103046, "grad_norm": 0.296875, "learning_rate": 0.0011585049354890392, "loss": 1.5749, "step": 15441 }, { "epoch": 0.41439458995276945, "grad_norm": 0.3203125, "learning_rate": 0.0011584946656824646, "loss": 1.7466, "step": 15442 }, { "epoch": 0.4144214255045084, "grad_norm": 0.322265625, "learning_rate": 0.001158484394650717, "loss": 1.8395, "step": 15443 }, { "epoch": 0.4144482610562473, "grad_norm": 0.318359375, "learning_rate": 0.0011584741223938193, "loss": 1.7777, "step": 15444 }, { "epoch": 0.41447509660798626, "grad_norm": 0.318359375, "learning_rate": 0.0011584638489117933, "loss": 1.7142, "step": 15445 }, { "epoch": 0.4145019321597252, "grad_norm": 0.318359375, "learning_rate": 0.0011584535742046622, "loss": 1.7752, "step": 15446 }, { "epoch": 0.41452876771146413, "grad_norm": 0.3125, "learning_rate": 0.001158443298272448, "loss": 1.6595, "step": 15447 }, { "epoch": 0.41455560326320307, "grad_norm": 0.328125, "learning_rate": 0.0011584330211151736, "loss": 1.747, "step": 15448 }, { "epoch": 0.41458243881494206, "grad_norm": 0.302734375, "learning_rate": 0.0011584227427328614, "loss": 1.5944, "step": 15449 }, { "epoch": 0.414609274366681, "grad_norm": 0.314453125, "learning_rate": 0.0011584124631255342, "loss": 1.7704, "step": 15450 }, { "epoch": 0.4146361099184199, "grad_norm": 0.298828125, "learning_rate": 0.0011584021822932142, "loss": 1.6363, "step": 15451 }, { "epoch": 0.41466294547015886, "grad_norm": 0.318359375, "learning_rate": 0.0011583919002359242, "loss": 1.7566, "step": 15452 }, { "epoch": 0.4146897810218978, "grad_norm": 0.294921875, "learning_rate": 0.0011583816169536865, "loss": 1.5923, "step": 15453 }, { "epoch": 0.41471661657363673, "grad_norm": 0.298828125, "learning_rate": 0.0011583713324465237, "loss": 1.5892, "step": 15454 }, { "epoch": 0.4147434521253757, "grad_norm": 0.30859375, "learning_rate": 0.0011583610467144588, "loss": 1.6005, "step": 15455 }, { "epoch": 0.41477028767711466, "grad_norm": 0.33203125, "learning_rate": 0.001158350759757514, "loss": 1.8174, "step": 15456 }, { "epoch": 0.4147971232288536, "grad_norm": 0.30859375, "learning_rate": 0.0011583404715757115, "loss": 1.5714, "step": 15457 }, { "epoch": 0.4148239587805925, "grad_norm": 0.314453125, "learning_rate": 0.0011583301821690747, "loss": 1.6542, "step": 15458 }, { "epoch": 0.41485079433233146, "grad_norm": 0.310546875, "learning_rate": 0.0011583198915376255, "loss": 1.7128, "step": 15459 }, { "epoch": 0.4148776298840704, "grad_norm": 0.28515625, "learning_rate": 0.0011583095996813867, "loss": 1.4473, "step": 15460 }, { "epoch": 0.41490446543580933, "grad_norm": 0.31640625, "learning_rate": 0.001158299306600381, "loss": 1.7502, "step": 15461 }, { "epoch": 0.4149313009875483, "grad_norm": 0.314453125, "learning_rate": 0.001158289012294631, "loss": 1.7148, "step": 15462 }, { "epoch": 0.41495813653928726, "grad_norm": 0.333984375, "learning_rate": 0.001158278716764159, "loss": 1.7144, "step": 15463 }, { "epoch": 0.4149849720910262, "grad_norm": 0.3125, "learning_rate": 0.0011582684200089877, "loss": 1.6367, "step": 15464 }, { "epoch": 0.41501180764276513, "grad_norm": 0.31640625, "learning_rate": 0.00115825812202914, "loss": 1.7363, "step": 15465 }, { "epoch": 0.41503864319450406, "grad_norm": 0.302734375, "learning_rate": 0.001158247822824638, "loss": 1.5556, "step": 15466 }, { "epoch": 0.415065478746243, "grad_norm": 0.306640625, "learning_rate": 0.0011582375223955046, "loss": 1.622, "step": 15467 }, { "epoch": 0.415092314297982, "grad_norm": 0.3125, "learning_rate": 0.0011582272207417623, "loss": 1.7853, "step": 15468 }, { "epoch": 0.4151191498497209, "grad_norm": 0.31640625, "learning_rate": 0.0011582169178634336, "loss": 1.7103, "step": 15469 }, { "epoch": 0.41514598540145986, "grad_norm": 0.30078125, "learning_rate": 0.0011582066137605413, "loss": 1.5991, "step": 15470 }, { "epoch": 0.4151728209531988, "grad_norm": 0.302734375, "learning_rate": 0.0011581963084331078, "loss": 1.5996, "step": 15471 }, { "epoch": 0.41519965650493773, "grad_norm": 0.3125, "learning_rate": 0.0011581860018811559, "loss": 1.6821, "step": 15472 }, { "epoch": 0.41522649205667667, "grad_norm": 0.314453125, "learning_rate": 0.0011581756941047082, "loss": 1.7275, "step": 15473 }, { "epoch": 0.41525332760841566, "grad_norm": 0.298828125, "learning_rate": 0.001158165385103787, "loss": 1.5143, "step": 15474 }, { "epoch": 0.4152801631601546, "grad_norm": 0.30859375, "learning_rate": 0.0011581550748784154, "loss": 1.6769, "step": 15475 }, { "epoch": 0.4153069987118935, "grad_norm": 0.31640625, "learning_rate": 0.001158144763428616, "loss": 1.7209, "step": 15476 }, { "epoch": 0.41533383426363246, "grad_norm": 0.31640625, "learning_rate": 0.001158134450754411, "loss": 1.7226, "step": 15477 }, { "epoch": 0.4153606698153714, "grad_norm": 0.318359375, "learning_rate": 0.0011581241368558229, "loss": 1.7625, "step": 15478 }, { "epoch": 0.41538750536711033, "grad_norm": 0.306640625, "learning_rate": 0.001158113821732875, "loss": 1.663, "step": 15479 }, { "epoch": 0.41541434091884927, "grad_norm": 0.3125, "learning_rate": 0.0011581035053855895, "loss": 1.7109, "step": 15480 }, { "epoch": 0.41544117647058826, "grad_norm": 0.322265625, "learning_rate": 0.0011580931878139892, "loss": 1.6901, "step": 15481 }, { "epoch": 0.4154680120223272, "grad_norm": 0.306640625, "learning_rate": 0.0011580828690180964, "loss": 1.6768, "step": 15482 }, { "epoch": 0.41549484757406613, "grad_norm": 0.296875, "learning_rate": 0.001158072548997934, "loss": 1.5884, "step": 15483 }, { "epoch": 0.41552168312580506, "grad_norm": 0.32421875, "learning_rate": 0.0011580622277535247, "loss": 1.7226, "step": 15484 }, { "epoch": 0.415548518677544, "grad_norm": 0.30078125, "learning_rate": 0.001158051905284891, "loss": 1.6032, "step": 15485 }, { "epoch": 0.41557535422928293, "grad_norm": 0.3125, "learning_rate": 0.0011580415815920556, "loss": 1.617, "step": 15486 }, { "epoch": 0.4156021897810219, "grad_norm": 0.3203125, "learning_rate": 0.0011580312566750414, "loss": 1.7124, "step": 15487 }, { "epoch": 0.41562902533276086, "grad_norm": 0.3203125, "learning_rate": 0.0011580209305338707, "loss": 1.7195, "step": 15488 }, { "epoch": 0.4156558608844998, "grad_norm": 0.30859375, "learning_rate": 0.001158010603168566, "loss": 1.5898, "step": 15489 }, { "epoch": 0.41568269643623873, "grad_norm": 0.3203125, "learning_rate": 0.0011580002745791504, "loss": 1.6394, "step": 15490 }, { "epoch": 0.41570953198797767, "grad_norm": 0.306640625, "learning_rate": 0.0011579899447656465, "loss": 1.6235, "step": 15491 }, { "epoch": 0.4157363675397166, "grad_norm": 0.3203125, "learning_rate": 0.0011579796137280766, "loss": 1.6295, "step": 15492 }, { "epoch": 0.41576320309145554, "grad_norm": 0.306640625, "learning_rate": 0.0011579692814664636, "loss": 1.6359, "step": 15493 }, { "epoch": 0.4157900386431945, "grad_norm": 0.322265625, "learning_rate": 0.0011579589479808303, "loss": 1.6988, "step": 15494 }, { "epoch": 0.41581687419493346, "grad_norm": 0.306640625, "learning_rate": 0.001157948613271199, "loss": 1.5564, "step": 15495 }, { "epoch": 0.4158437097466724, "grad_norm": 0.3046875, "learning_rate": 0.0011579382773375928, "loss": 1.5666, "step": 15496 }, { "epoch": 0.41587054529841133, "grad_norm": 0.31640625, "learning_rate": 0.001157927940180034, "loss": 1.683, "step": 15497 }, { "epoch": 0.41589738085015027, "grad_norm": 0.30859375, "learning_rate": 0.0011579176017985455, "loss": 1.6072, "step": 15498 }, { "epoch": 0.4159242164018892, "grad_norm": 0.31640625, "learning_rate": 0.0011579072621931501, "loss": 1.6444, "step": 15499 }, { "epoch": 0.4159510519536282, "grad_norm": 0.31640625, "learning_rate": 0.00115789692136387, "loss": 1.572, "step": 15500 }, { "epoch": 0.4159778875053671, "grad_norm": 0.3125, "learning_rate": 0.0011578865793107284, "loss": 1.6872, "step": 15501 }, { "epoch": 0.41600472305710606, "grad_norm": 0.3125, "learning_rate": 0.0011578762360337476, "loss": 1.6614, "step": 15502 }, { "epoch": 0.416031558608845, "grad_norm": 0.30859375, "learning_rate": 0.0011578658915329507, "loss": 1.6711, "step": 15503 }, { "epoch": 0.41605839416058393, "grad_norm": 0.310546875, "learning_rate": 0.00115785554580836, "loss": 1.7355, "step": 15504 }, { "epoch": 0.41608522971232287, "grad_norm": 0.310546875, "learning_rate": 0.0011578451988599986, "loss": 1.6775, "step": 15505 }, { "epoch": 0.4161120652640618, "grad_norm": 0.3046875, "learning_rate": 0.0011578348506878886, "loss": 1.5527, "step": 15506 }, { "epoch": 0.4161389008158008, "grad_norm": 0.3125, "learning_rate": 0.0011578245012920534, "loss": 1.6299, "step": 15507 }, { "epoch": 0.41616573636753973, "grad_norm": 0.322265625, "learning_rate": 0.0011578141506725151, "loss": 1.6886, "step": 15508 }, { "epoch": 0.41619257191927866, "grad_norm": 0.3203125, "learning_rate": 0.0011578037988292968, "loss": 1.6519, "step": 15509 }, { "epoch": 0.4162194074710176, "grad_norm": 0.322265625, "learning_rate": 0.001157793445762421, "loss": 1.6699, "step": 15510 }, { "epoch": 0.41624624302275653, "grad_norm": 0.330078125, "learning_rate": 0.0011577830914719102, "loss": 1.6343, "step": 15511 }, { "epoch": 0.41627307857449547, "grad_norm": 0.322265625, "learning_rate": 0.0011577727359577878, "loss": 1.6865, "step": 15512 }, { "epoch": 0.41629991412623446, "grad_norm": 0.3125, "learning_rate": 0.0011577623792200762, "loss": 1.5792, "step": 15513 }, { "epoch": 0.4163267496779734, "grad_norm": 0.328125, "learning_rate": 0.0011577520212587978, "loss": 1.7193, "step": 15514 }, { "epoch": 0.41635358522971233, "grad_norm": 0.310546875, "learning_rate": 0.0011577416620739756, "loss": 1.6143, "step": 15515 }, { "epoch": 0.41638042078145127, "grad_norm": 0.318359375, "learning_rate": 0.0011577313016656323, "loss": 1.6725, "step": 15516 }, { "epoch": 0.4164072563331902, "grad_norm": 0.30859375, "learning_rate": 0.0011577209400337905, "loss": 1.6515, "step": 15517 }, { "epoch": 0.41643409188492914, "grad_norm": 0.32421875, "learning_rate": 0.0011577105771784731, "loss": 1.6607, "step": 15518 }, { "epoch": 0.41646092743666807, "grad_norm": 0.31640625, "learning_rate": 0.001157700213099703, "loss": 1.5941, "step": 15519 }, { "epoch": 0.41648776298840706, "grad_norm": 0.314453125, "learning_rate": 0.0011576898477975025, "loss": 1.6276, "step": 15520 }, { "epoch": 0.416514598540146, "grad_norm": 0.3046875, "learning_rate": 0.0011576794812718947, "loss": 1.5863, "step": 15521 }, { "epoch": 0.41654143409188493, "grad_norm": 0.3125, "learning_rate": 0.001157669113522902, "loss": 1.664, "step": 15522 }, { "epoch": 0.41656826964362387, "grad_norm": 0.310546875, "learning_rate": 0.0011576587445505473, "loss": 1.5321, "step": 15523 }, { "epoch": 0.4165951051953628, "grad_norm": 0.318359375, "learning_rate": 0.0011576483743548536, "loss": 1.6024, "step": 15524 }, { "epoch": 0.41662194074710174, "grad_norm": 0.333984375, "learning_rate": 0.0011576380029358433, "loss": 1.7216, "step": 15525 }, { "epoch": 0.41664877629884073, "grad_norm": 0.314453125, "learning_rate": 0.0011576276302935394, "loss": 1.6173, "step": 15526 }, { "epoch": 0.41667561185057966, "grad_norm": 0.318359375, "learning_rate": 0.0011576172564279643, "loss": 1.6289, "step": 15527 }, { "epoch": 0.4167024474023186, "grad_norm": 0.322265625, "learning_rate": 0.0011576068813391413, "loss": 1.714, "step": 15528 }, { "epoch": 0.41672928295405753, "grad_norm": 0.3046875, "learning_rate": 0.0011575965050270925, "loss": 1.4147, "step": 15529 }, { "epoch": 0.41675611850579647, "grad_norm": 0.31640625, "learning_rate": 0.0011575861274918412, "loss": 1.5981, "step": 15530 }, { "epoch": 0.4167829540575354, "grad_norm": 0.3125, "learning_rate": 0.00115757574873341, "loss": 1.6668, "step": 15531 }, { "epoch": 0.41680978960927434, "grad_norm": 0.310546875, "learning_rate": 0.0011575653687518216, "loss": 1.5842, "step": 15532 }, { "epoch": 0.41683662516101333, "grad_norm": 0.302734375, "learning_rate": 0.0011575549875470987, "loss": 1.5752, "step": 15533 }, { "epoch": 0.41686346071275227, "grad_norm": 0.3046875, "learning_rate": 0.0011575446051192645, "loss": 1.5604, "step": 15534 }, { "epoch": 0.4168902962644912, "grad_norm": 0.326171875, "learning_rate": 0.0011575342214683413, "loss": 1.6171, "step": 15535 }, { "epoch": 0.41691713181623014, "grad_norm": 0.322265625, "learning_rate": 0.001157523836594352, "loss": 1.6359, "step": 15536 }, { "epoch": 0.41694396736796907, "grad_norm": 0.33203125, "learning_rate": 0.0011575134504973196, "loss": 1.6681, "step": 15537 }, { "epoch": 0.416970802919708, "grad_norm": 0.302734375, "learning_rate": 0.0011575030631772664, "loss": 1.5627, "step": 15538 }, { "epoch": 0.416997638471447, "grad_norm": 0.3046875, "learning_rate": 0.0011574926746342159, "loss": 1.4752, "step": 15539 }, { "epoch": 0.41702447402318593, "grad_norm": 0.314453125, "learning_rate": 0.0011574822848681903, "loss": 1.5711, "step": 15540 }, { "epoch": 0.41705130957492487, "grad_norm": 0.337890625, "learning_rate": 0.0011574718938792124, "loss": 1.7585, "step": 15541 }, { "epoch": 0.4170781451266638, "grad_norm": 0.3046875, "learning_rate": 0.0011574615016673054, "loss": 1.5502, "step": 15542 }, { "epoch": 0.41710498067840274, "grad_norm": 0.30859375, "learning_rate": 0.0011574511082324919, "loss": 1.5904, "step": 15543 }, { "epoch": 0.41713181623014167, "grad_norm": 0.310546875, "learning_rate": 0.0011574407135747946, "loss": 1.5172, "step": 15544 }, { "epoch": 0.41715865178188066, "grad_norm": 0.30859375, "learning_rate": 0.0011574303176942362, "loss": 1.5461, "step": 15545 }, { "epoch": 0.4171854873336196, "grad_norm": 0.318359375, "learning_rate": 0.00115741992059084, "loss": 1.5971, "step": 15546 }, { "epoch": 0.41721232288535853, "grad_norm": 0.33203125, "learning_rate": 0.0011574095222646281, "loss": 1.6156, "step": 15547 }, { "epoch": 0.41723915843709747, "grad_norm": 0.30859375, "learning_rate": 0.001157399122715624, "loss": 1.5893, "step": 15548 }, { "epoch": 0.4172659939888364, "grad_norm": 0.310546875, "learning_rate": 0.00115738872194385, "loss": 1.5787, "step": 15549 }, { "epoch": 0.41729282954057534, "grad_norm": 0.330078125, "learning_rate": 0.0011573783199493293, "loss": 1.7112, "step": 15550 }, { "epoch": 0.4173196650923143, "grad_norm": 0.3125, "learning_rate": 0.0011573679167320846, "loss": 1.5931, "step": 15551 }, { "epoch": 0.41734650064405326, "grad_norm": 0.306640625, "learning_rate": 0.0011573575122921385, "loss": 1.539, "step": 15552 }, { "epoch": 0.4173733361957922, "grad_norm": 0.306640625, "learning_rate": 0.0011573471066295142, "loss": 1.4618, "step": 15553 }, { "epoch": 0.41740017174753113, "grad_norm": 0.328125, "learning_rate": 0.0011573366997442343, "loss": 1.6447, "step": 15554 }, { "epoch": 0.41742700729927007, "grad_norm": 0.32421875, "learning_rate": 0.0011573262916363214, "loss": 1.5947, "step": 15555 }, { "epoch": 0.417453842851009, "grad_norm": 0.314453125, "learning_rate": 0.0011573158823057986, "loss": 1.6279, "step": 15556 }, { "epoch": 0.41748067840274794, "grad_norm": 0.322265625, "learning_rate": 0.001157305471752689, "loss": 1.5953, "step": 15557 }, { "epoch": 0.41750751395448693, "grad_norm": 0.31640625, "learning_rate": 0.0011572950599770151, "loss": 1.4845, "step": 15558 }, { "epoch": 0.41753434950622587, "grad_norm": 0.322265625, "learning_rate": 0.0011572846469787997, "loss": 1.6573, "step": 15559 }, { "epoch": 0.4175611850579648, "grad_norm": 0.32421875, "learning_rate": 0.0011572742327580657, "loss": 1.6406, "step": 15560 }, { "epoch": 0.41758802060970374, "grad_norm": 0.3125, "learning_rate": 0.0011572638173148362, "loss": 1.4516, "step": 15561 }, { "epoch": 0.41761485616144267, "grad_norm": 0.3203125, "learning_rate": 0.0011572534006491337, "loss": 1.5439, "step": 15562 }, { "epoch": 0.4176416917131816, "grad_norm": 0.330078125, "learning_rate": 0.001157242982760981, "loss": 1.7089, "step": 15563 }, { "epoch": 0.41766852726492054, "grad_norm": 0.322265625, "learning_rate": 0.0011572325636504013, "loss": 1.587, "step": 15564 }, { "epoch": 0.41769536281665953, "grad_norm": 0.326171875, "learning_rate": 0.0011572221433174175, "loss": 1.6995, "step": 15565 }, { "epoch": 0.41772219836839847, "grad_norm": 0.322265625, "learning_rate": 0.0011572117217620518, "loss": 1.5705, "step": 15566 }, { "epoch": 0.4177490339201374, "grad_norm": 0.328125, "learning_rate": 0.0011572012989843278, "loss": 1.6985, "step": 15567 }, { "epoch": 0.41777586947187634, "grad_norm": 0.3125, "learning_rate": 0.0011571908749842682, "loss": 1.6225, "step": 15568 }, { "epoch": 0.4178027050236153, "grad_norm": 0.3125, "learning_rate": 0.0011571804497618953, "loss": 1.6145, "step": 15569 }, { "epoch": 0.4178295405753542, "grad_norm": 0.36328125, "learning_rate": 0.0011571700233172328, "loss": 1.7645, "step": 15570 }, { "epoch": 0.4178563761270932, "grad_norm": 0.380859375, "learning_rate": 0.0011571595956503032, "loss": 1.8857, "step": 15571 }, { "epoch": 0.41788321167883213, "grad_norm": 0.357421875, "learning_rate": 0.0011571491667611293, "loss": 1.9482, "step": 15572 }, { "epoch": 0.41791004723057107, "grad_norm": 0.33984375, "learning_rate": 0.0011571387366497339, "loss": 1.8803, "step": 15573 }, { "epoch": 0.41793688278231, "grad_norm": 0.337890625, "learning_rate": 0.0011571283053161403, "loss": 1.9517, "step": 15574 }, { "epoch": 0.41796371833404894, "grad_norm": 0.330078125, "learning_rate": 0.0011571178727603707, "loss": 1.7834, "step": 15575 }, { "epoch": 0.4179905538857879, "grad_norm": 0.337890625, "learning_rate": 0.001157107438982449, "loss": 1.9066, "step": 15576 }, { "epoch": 0.4180173894375268, "grad_norm": 0.322265625, "learning_rate": 0.0011570970039823968, "loss": 1.8836, "step": 15577 }, { "epoch": 0.4180442249892658, "grad_norm": 0.314453125, "learning_rate": 0.0011570865677602382, "loss": 1.8305, "step": 15578 }, { "epoch": 0.41807106054100474, "grad_norm": 0.30078125, "learning_rate": 0.0011570761303159953, "loss": 1.6873, "step": 15579 }, { "epoch": 0.41809789609274367, "grad_norm": 0.31640625, "learning_rate": 0.0011570656916496914, "loss": 1.87, "step": 15580 }, { "epoch": 0.4181247316444826, "grad_norm": 0.30078125, "learning_rate": 0.0011570552517613491, "loss": 1.6899, "step": 15581 }, { "epoch": 0.41815156719622154, "grad_norm": 0.330078125, "learning_rate": 0.0011570448106509917, "loss": 1.9301, "step": 15582 }, { "epoch": 0.4181784027479605, "grad_norm": 0.31640625, "learning_rate": 0.0011570343683186416, "loss": 1.8663, "step": 15583 }, { "epoch": 0.41820523829969947, "grad_norm": 0.30078125, "learning_rate": 0.0011570239247643223, "loss": 1.7951, "step": 15584 }, { "epoch": 0.4182320738514384, "grad_norm": 0.306640625, "learning_rate": 0.001157013479988056, "loss": 1.8722, "step": 15585 }, { "epoch": 0.41825890940317734, "grad_norm": 0.3203125, "learning_rate": 0.0011570030339898662, "loss": 1.8663, "step": 15586 }, { "epoch": 0.41828574495491627, "grad_norm": 0.314453125, "learning_rate": 0.0011569925867697757, "loss": 1.9049, "step": 15587 }, { "epoch": 0.4183125805066552, "grad_norm": 0.302734375, "learning_rate": 0.0011569821383278073, "loss": 1.7939, "step": 15588 }, { "epoch": 0.41833941605839414, "grad_norm": 0.294921875, "learning_rate": 0.0011569716886639838, "loss": 1.7187, "step": 15589 }, { "epoch": 0.4183662516101331, "grad_norm": 0.298828125, "learning_rate": 0.0011569612377783285, "loss": 1.7999, "step": 15590 }, { "epoch": 0.41839308716187207, "grad_norm": 0.310546875, "learning_rate": 0.0011569507856708638, "loss": 1.8611, "step": 15591 }, { "epoch": 0.418419922713611, "grad_norm": 0.302734375, "learning_rate": 0.0011569403323416132, "loss": 1.7468, "step": 15592 }, { "epoch": 0.41844675826534994, "grad_norm": 0.306640625, "learning_rate": 0.0011569298777905992, "loss": 1.7483, "step": 15593 }, { "epoch": 0.4184735938170889, "grad_norm": 0.30078125, "learning_rate": 0.0011569194220178451, "loss": 1.6881, "step": 15594 }, { "epoch": 0.4185004293688278, "grad_norm": 0.306640625, "learning_rate": 0.0011569089650233736, "loss": 1.7946, "step": 15595 }, { "epoch": 0.41852726492056674, "grad_norm": 0.306640625, "learning_rate": 0.0011568985068072075, "loss": 1.7571, "step": 15596 }, { "epoch": 0.41855410047230573, "grad_norm": 0.306640625, "learning_rate": 0.00115688804736937, "loss": 1.8435, "step": 15597 }, { "epoch": 0.41858093602404467, "grad_norm": 0.314453125, "learning_rate": 0.001156877586709884, "loss": 1.8558, "step": 15598 }, { "epoch": 0.4186077715757836, "grad_norm": 0.310546875, "learning_rate": 0.0011568671248287723, "loss": 1.7992, "step": 15599 }, { "epoch": 0.41863460712752254, "grad_norm": 0.318359375, "learning_rate": 0.0011568566617260582, "loss": 1.8866, "step": 15600 }, { "epoch": 0.4186614426792615, "grad_norm": 0.318359375, "learning_rate": 0.0011568461974017642, "loss": 1.9009, "step": 15601 }, { "epoch": 0.4186882782310004, "grad_norm": 0.30859375, "learning_rate": 0.0011568357318559135, "loss": 1.7645, "step": 15602 }, { "epoch": 0.41871511378273935, "grad_norm": 0.306640625, "learning_rate": 0.0011568252650885293, "loss": 1.7166, "step": 15603 }, { "epoch": 0.41874194933447834, "grad_norm": 0.3125, "learning_rate": 0.0011568147970996338, "loss": 1.8348, "step": 15604 }, { "epoch": 0.41876878488621727, "grad_norm": 0.314453125, "learning_rate": 0.0011568043278892507, "loss": 1.8479, "step": 15605 }, { "epoch": 0.4187956204379562, "grad_norm": 0.30859375, "learning_rate": 0.0011567938574574028, "loss": 1.7667, "step": 15606 }, { "epoch": 0.41882245598969514, "grad_norm": 0.3046875, "learning_rate": 0.0011567833858041128, "loss": 1.7894, "step": 15607 }, { "epoch": 0.4188492915414341, "grad_norm": 0.306640625, "learning_rate": 0.001156772912929404, "loss": 1.7727, "step": 15608 }, { "epoch": 0.418876127093173, "grad_norm": 0.3046875, "learning_rate": 0.001156762438833299, "loss": 1.7489, "step": 15609 }, { "epoch": 0.418902962644912, "grad_norm": 0.310546875, "learning_rate": 0.0011567519635158215, "loss": 1.8285, "step": 15610 }, { "epoch": 0.41892979819665094, "grad_norm": 0.306640625, "learning_rate": 0.0011567414869769936, "loss": 1.7539, "step": 15611 }, { "epoch": 0.4189566337483899, "grad_norm": 0.298828125, "learning_rate": 0.001156731009216839, "loss": 1.7316, "step": 15612 }, { "epoch": 0.4189834693001288, "grad_norm": 0.318359375, "learning_rate": 0.00115672053023538, "loss": 1.8024, "step": 15613 }, { "epoch": 0.41901030485186774, "grad_norm": 0.30859375, "learning_rate": 0.00115671005003264, "loss": 1.7617, "step": 15614 }, { "epoch": 0.4190371404036067, "grad_norm": 0.30078125, "learning_rate": 0.0011566995686086421, "loss": 1.6295, "step": 15615 }, { "epoch": 0.41906397595534567, "grad_norm": 0.318359375, "learning_rate": 0.001156689085963409, "loss": 1.7692, "step": 15616 }, { "epoch": 0.4190908115070846, "grad_norm": 0.283203125, "learning_rate": 0.0011566786020969638, "loss": 1.5389, "step": 15617 }, { "epoch": 0.41911764705882354, "grad_norm": 0.30078125, "learning_rate": 0.0011566681170093298, "loss": 1.7454, "step": 15618 }, { "epoch": 0.4191444826105625, "grad_norm": 0.302734375, "learning_rate": 0.0011566576307005293, "loss": 1.7289, "step": 15619 }, { "epoch": 0.4191713181623014, "grad_norm": 0.318359375, "learning_rate": 0.001156647143170586, "loss": 1.8845, "step": 15620 }, { "epoch": 0.41919815371404034, "grad_norm": 0.3046875, "learning_rate": 0.0011566366544195226, "loss": 1.7309, "step": 15621 }, { "epoch": 0.4192249892657793, "grad_norm": 0.310546875, "learning_rate": 0.0011566261644473622, "loss": 1.7863, "step": 15622 }, { "epoch": 0.41925182481751827, "grad_norm": 0.314453125, "learning_rate": 0.0011566156732541277, "loss": 1.8333, "step": 15623 }, { "epoch": 0.4192786603692572, "grad_norm": 0.3046875, "learning_rate": 0.001156605180839842, "loss": 1.6962, "step": 15624 }, { "epoch": 0.41930549592099614, "grad_norm": 0.29296875, "learning_rate": 0.0011565946872045287, "loss": 1.7248, "step": 15625 }, { "epoch": 0.4193323314727351, "grad_norm": 0.30859375, "learning_rate": 0.00115658419234821, "loss": 1.7717, "step": 15626 }, { "epoch": 0.419359167024474, "grad_norm": 0.302734375, "learning_rate": 0.0011565736962709096, "loss": 1.7513, "step": 15627 }, { "epoch": 0.41938600257621295, "grad_norm": 0.314453125, "learning_rate": 0.0011565631989726502, "loss": 1.7316, "step": 15628 }, { "epoch": 0.41941283812795194, "grad_norm": 0.3125, "learning_rate": 0.0011565527004534547, "loss": 1.7605, "step": 15629 }, { "epoch": 0.41943967367969087, "grad_norm": 0.302734375, "learning_rate": 0.0011565422007133465, "loss": 1.6908, "step": 15630 }, { "epoch": 0.4194665092314298, "grad_norm": 0.29296875, "learning_rate": 0.0011565316997523484, "loss": 1.6009, "step": 15631 }, { "epoch": 0.41949334478316874, "grad_norm": 0.314453125, "learning_rate": 0.0011565211975704834, "loss": 1.8108, "step": 15632 }, { "epoch": 0.4195201803349077, "grad_norm": 0.302734375, "learning_rate": 0.0011565106941677748, "loss": 1.6782, "step": 15633 }, { "epoch": 0.4195470158866466, "grad_norm": 0.3046875, "learning_rate": 0.0011565001895442453, "loss": 1.7382, "step": 15634 }, { "epoch": 0.41957385143838555, "grad_norm": 0.314453125, "learning_rate": 0.0011564896836999182, "loss": 1.8475, "step": 15635 }, { "epoch": 0.41960068699012454, "grad_norm": 0.30078125, "learning_rate": 0.0011564791766348163, "loss": 1.6022, "step": 15636 }, { "epoch": 0.4196275225418635, "grad_norm": 0.33203125, "learning_rate": 0.001156468668348963, "loss": 1.7509, "step": 15637 }, { "epoch": 0.4196543580936024, "grad_norm": 0.314453125, "learning_rate": 0.001156458158842381, "loss": 1.8058, "step": 15638 }, { "epoch": 0.41968119364534134, "grad_norm": 0.328125, "learning_rate": 0.0011564476481150938, "loss": 1.8827, "step": 15639 }, { "epoch": 0.4197080291970803, "grad_norm": 0.30078125, "learning_rate": 0.001156437136167124, "loss": 1.746, "step": 15640 }, { "epoch": 0.4197348647488192, "grad_norm": 0.326171875, "learning_rate": 0.0011564266229984947, "loss": 1.6741, "step": 15641 }, { "epoch": 0.4197617003005582, "grad_norm": 0.29296875, "learning_rate": 0.0011564161086092294, "loss": 1.6244, "step": 15642 }, { "epoch": 0.41978853585229714, "grad_norm": 0.3046875, "learning_rate": 0.0011564055929993505, "loss": 1.7439, "step": 15643 }, { "epoch": 0.4198153714040361, "grad_norm": 0.306640625, "learning_rate": 0.0011563950761688817, "loss": 1.7195, "step": 15644 }, { "epoch": 0.419842206955775, "grad_norm": 0.28515625, "learning_rate": 0.0011563845581178457, "loss": 1.5369, "step": 15645 }, { "epoch": 0.41986904250751395, "grad_norm": 0.30859375, "learning_rate": 0.0011563740388462658, "loss": 1.7784, "step": 15646 }, { "epoch": 0.4198958780592529, "grad_norm": 0.318359375, "learning_rate": 0.0011563635183541646, "loss": 1.8107, "step": 15647 }, { "epoch": 0.4199227136109918, "grad_norm": 0.302734375, "learning_rate": 0.0011563529966415657, "loss": 1.7051, "step": 15648 }, { "epoch": 0.4199495491627308, "grad_norm": 0.310546875, "learning_rate": 0.001156342473708492, "loss": 1.714, "step": 15649 }, { "epoch": 0.41997638471446974, "grad_norm": 0.306640625, "learning_rate": 0.0011563319495549666, "loss": 1.721, "step": 15650 }, { "epoch": 0.4200032202662087, "grad_norm": 0.302734375, "learning_rate": 0.0011563214241810127, "loss": 1.7137, "step": 15651 }, { "epoch": 0.4200300558179476, "grad_norm": 0.314453125, "learning_rate": 0.001156310897586653, "loss": 1.7976, "step": 15652 }, { "epoch": 0.42005689136968655, "grad_norm": 0.296875, "learning_rate": 0.001156300369771911, "loss": 1.5863, "step": 15653 }, { "epoch": 0.4200837269214255, "grad_norm": 0.302734375, "learning_rate": 0.0011562898407368098, "loss": 1.7286, "step": 15654 }, { "epoch": 0.4201105624731645, "grad_norm": 0.29296875, "learning_rate": 0.0011562793104813722, "loss": 1.6916, "step": 15655 }, { "epoch": 0.4201373980249034, "grad_norm": 0.3125, "learning_rate": 0.0011562687790056215, "loss": 1.7766, "step": 15656 }, { "epoch": 0.42016423357664234, "grad_norm": 0.306640625, "learning_rate": 0.0011562582463095809, "loss": 1.7115, "step": 15657 }, { "epoch": 0.4201910691283813, "grad_norm": 0.302734375, "learning_rate": 0.001156247712393273, "loss": 1.7196, "step": 15658 }, { "epoch": 0.4202179046801202, "grad_norm": 0.298828125, "learning_rate": 0.0011562371772567215, "loss": 1.6667, "step": 15659 }, { "epoch": 0.42024474023185915, "grad_norm": 0.310546875, "learning_rate": 0.0011562266408999494, "loss": 1.8236, "step": 15660 }, { "epoch": 0.4202715757835981, "grad_norm": 0.29296875, "learning_rate": 0.0011562161033229796, "loss": 1.6301, "step": 15661 }, { "epoch": 0.4202984113353371, "grad_norm": 0.30078125, "learning_rate": 0.0011562055645258351, "loss": 1.683, "step": 15662 }, { "epoch": 0.420325246887076, "grad_norm": 0.30078125, "learning_rate": 0.0011561950245085396, "loss": 1.6994, "step": 15663 }, { "epoch": 0.42035208243881494, "grad_norm": 0.30078125, "learning_rate": 0.0011561844832711154, "loss": 1.6527, "step": 15664 }, { "epoch": 0.4203789179905539, "grad_norm": 0.302734375, "learning_rate": 0.0011561739408135865, "loss": 1.6133, "step": 15665 }, { "epoch": 0.4204057535422928, "grad_norm": 0.31640625, "learning_rate": 0.0011561633971359753, "loss": 1.7889, "step": 15666 }, { "epoch": 0.42043258909403175, "grad_norm": 0.30078125, "learning_rate": 0.0011561528522383055, "loss": 1.6821, "step": 15667 }, { "epoch": 0.42045942464577074, "grad_norm": 0.30859375, "learning_rate": 0.0011561423061205995, "loss": 1.7218, "step": 15668 }, { "epoch": 0.4204862601975097, "grad_norm": 0.3125, "learning_rate": 0.0011561317587828815, "loss": 1.6815, "step": 15669 }, { "epoch": 0.4205130957492486, "grad_norm": 0.294921875, "learning_rate": 0.0011561212102251736, "loss": 1.593, "step": 15670 }, { "epoch": 0.42053993130098755, "grad_norm": 0.287109375, "learning_rate": 0.0011561106604474995, "loss": 1.5827, "step": 15671 }, { "epoch": 0.4205667668527265, "grad_norm": 0.30859375, "learning_rate": 0.0011561001094498823, "loss": 1.7008, "step": 15672 }, { "epoch": 0.4205936024044654, "grad_norm": 0.30078125, "learning_rate": 0.001156089557232345, "loss": 1.6493, "step": 15673 }, { "epoch": 0.4206204379562044, "grad_norm": 0.30859375, "learning_rate": 0.0011560790037949107, "loss": 1.7369, "step": 15674 }, { "epoch": 0.42064727350794334, "grad_norm": 0.287109375, "learning_rate": 0.0011560684491376029, "loss": 1.5777, "step": 15675 }, { "epoch": 0.4206741090596823, "grad_norm": 0.294921875, "learning_rate": 0.0011560578932604444, "loss": 1.624, "step": 15676 }, { "epoch": 0.4207009446114212, "grad_norm": 0.306640625, "learning_rate": 0.0011560473361634585, "loss": 1.619, "step": 15677 }, { "epoch": 0.42072778016316015, "grad_norm": 0.32421875, "learning_rate": 0.0011560367778466684, "loss": 1.7692, "step": 15678 }, { "epoch": 0.4207546157148991, "grad_norm": 0.302734375, "learning_rate": 0.001156026218310097, "loss": 1.6512, "step": 15679 }, { "epoch": 0.420781451266638, "grad_norm": 0.30859375, "learning_rate": 0.001156015657553768, "loss": 1.7119, "step": 15680 }, { "epoch": 0.420808286818377, "grad_norm": 0.310546875, "learning_rate": 0.001156005095577704, "loss": 1.7513, "step": 15681 }, { "epoch": 0.42083512237011594, "grad_norm": 0.322265625, "learning_rate": 0.0011559945323819283, "loss": 1.7859, "step": 15682 }, { "epoch": 0.4208619579218549, "grad_norm": 0.30859375, "learning_rate": 0.0011559839679664643, "loss": 1.6511, "step": 15683 }, { "epoch": 0.4208887934735938, "grad_norm": 0.318359375, "learning_rate": 0.0011559734023313352, "loss": 1.8416, "step": 15684 }, { "epoch": 0.42091562902533275, "grad_norm": 0.29296875, "learning_rate": 0.001155962835476564, "loss": 1.5661, "step": 15685 }, { "epoch": 0.4209424645770717, "grad_norm": 0.3046875, "learning_rate": 0.0011559522674021736, "loss": 1.6103, "step": 15686 }, { "epoch": 0.4209693001288107, "grad_norm": 0.3125, "learning_rate": 0.0011559416981081875, "loss": 1.6741, "step": 15687 }, { "epoch": 0.4209961356805496, "grad_norm": 0.314453125, "learning_rate": 0.001155931127594629, "loss": 1.6197, "step": 15688 }, { "epoch": 0.42102297123228855, "grad_norm": 0.30078125, "learning_rate": 0.0011559205558615213, "loss": 1.7091, "step": 15689 }, { "epoch": 0.4210498067840275, "grad_norm": 0.318359375, "learning_rate": 0.0011559099829088874, "loss": 1.7138, "step": 15690 }, { "epoch": 0.4210766423357664, "grad_norm": 0.3046875, "learning_rate": 0.0011558994087367506, "loss": 1.7253, "step": 15691 }, { "epoch": 0.42110347788750535, "grad_norm": 0.3125, "learning_rate": 0.0011558888333451339, "loss": 1.6648, "step": 15692 }, { "epoch": 0.4211303134392443, "grad_norm": 0.314453125, "learning_rate": 0.0011558782567340606, "loss": 1.79, "step": 15693 }, { "epoch": 0.4211571489909833, "grad_norm": 0.3125, "learning_rate": 0.0011558676789035541, "loss": 1.7448, "step": 15694 }, { "epoch": 0.4211839845427222, "grad_norm": 0.310546875, "learning_rate": 0.0011558570998536374, "loss": 1.6994, "step": 15695 }, { "epoch": 0.42121082009446115, "grad_norm": 0.302734375, "learning_rate": 0.0011558465195843338, "loss": 1.6961, "step": 15696 }, { "epoch": 0.4212376556462001, "grad_norm": 0.314453125, "learning_rate": 0.0011558359380956665, "loss": 1.7586, "step": 15697 }, { "epoch": 0.421264491197939, "grad_norm": 0.302734375, "learning_rate": 0.0011558253553876584, "loss": 1.6426, "step": 15698 }, { "epoch": 0.42129132674967795, "grad_norm": 0.310546875, "learning_rate": 0.0011558147714603333, "loss": 1.7306, "step": 15699 }, { "epoch": 0.42131816230141694, "grad_norm": 0.30859375, "learning_rate": 0.001155804186313714, "loss": 1.7245, "step": 15700 }, { "epoch": 0.4213449978531559, "grad_norm": 0.30859375, "learning_rate": 0.0011557935999478239, "loss": 1.6852, "step": 15701 }, { "epoch": 0.4213718334048948, "grad_norm": 0.30859375, "learning_rate": 0.001155783012362686, "loss": 1.6596, "step": 15702 }, { "epoch": 0.42139866895663375, "grad_norm": 0.31640625, "learning_rate": 0.0011557724235583238, "loss": 1.7187, "step": 15703 }, { "epoch": 0.4214255045083727, "grad_norm": 0.30859375, "learning_rate": 0.0011557618335347605, "loss": 1.6414, "step": 15704 }, { "epoch": 0.4214523400601116, "grad_norm": 0.30859375, "learning_rate": 0.0011557512422920191, "loss": 1.7026, "step": 15705 }, { "epoch": 0.42147917561185055, "grad_norm": 0.298828125, "learning_rate": 0.001155740649830123, "loss": 1.647, "step": 15706 }, { "epoch": 0.42150601116358954, "grad_norm": 0.318359375, "learning_rate": 0.0011557300561490956, "loss": 1.8297, "step": 15707 }, { "epoch": 0.4215328467153285, "grad_norm": 0.296875, "learning_rate": 0.0011557194612489597, "loss": 1.5718, "step": 15708 }, { "epoch": 0.4215596822670674, "grad_norm": 0.310546875, "learning_rate": 0.001155708865129739, "loss": 1.707, "step": 15709 }, { "epoch": 0.42158651781880635, "grad_norm": 0.330078125, "learning_rate": 0.0011556982677914563, "loss": 1.7398, "step": 15710 }, { "epoch": 0.4216133533705453, "grad_norm": 0.318359375, "learning_rate": 0.0011556876692341352, "loss": 1.707, "step": 15711 }, { "epoch": 0.4216401889222842, "grad_norm": 0.32421875, "learning_rate": 0.0011556770694577988, "loss": 1.5036, "step": 15712 }, { "epoch": 0.4216670244740232, "grad_norm": 0.30078125, "learning_rate": 0.0011556664684624705, "loss": 1.5274, "step": 15713 }, { "epoch": 0.42169386002576215, "grad_norm": 0.298828125, "learning_rate": 0.0011556558662481735, "loss": 1.5826, "step": 15714 }, { "epoch": 0.4217206955775011, "grad_norm": 0.2890625, "learning_rate": 0.0011556452628149307, "loss": 1.4943, "step": 15715 }, { "epoch": 0.42174753112924, "grad_norm": 0.3203125, "learning_rate": 0.001155634658162766, "loss": 1.6832, "step": 15716 }, { "epoch": 0.42177436668097895, "grad_norm": 0.3125, "learning_rate": 0.0011556240522917023, "loss": 1.6546, "step": 15717 }, { "epoch": 0.4218012022327179, "grad_norm": 0.322265625, "learning_rate": 0.0011556134452017628, "loss": 1.6997, "step": 15718 }, { "epoch": 0.4218280377844568, "grad_norm": 0.306640625, "learning_rate": 0.0011556028368929709, "loss": 1.6657, "step": 15719 }, { "epoch": 0.4218548733361958, "grad_norm": 0.314453125, "learning_rate": 0.0011555922273653496, "loss": 1.7113, "step": 15720 }, { "epoch": 0.42188170888793475, "grad_norm": 0.3203125, "learning_rate": 0.0011555816166189227, "loss": 1.6769, "step": 15721 }, { "epoch": 0.4219085444396737, "grad_norm": 0.310546875, "learning_rate": 0.0011555710046537132, "loss": 1.713, "step": 15722 }, { "epoch": 0.4219353799914126, "grad_norm": 0.3046875, "learning_rate": 0.0011555603914697441, "loss": 1.5855, "step": 15723 }, { "epoch": 0.42196221554315155, "grad_norm": 0.294921875, "learning_rate": 0.0011555497770670393, "loss": 1.5592, "step": 15724 }, { "epoch": 0.4219890510948905, "grad_norm": 0.318359375, "learning_rate": 0.0011555391614456213, "loss": 1.7305, "step": 15725 }, { "epoch": 0.4220158866466295, "grad_norm": 0.318359375, "learning_rate": 0.0011555285446055144, "loss": 1.7154, "step": 15726 }, { "epoch": 0.4220427221983684, "grad_norm": 0.30859375, "learning_rate": 0.0011555179265467408, "loss": 1.7207, "step": 15727 }, { "epoch": 0.42206955775010735, "grad_norm": 0.314453125, "learning_rate": 0.0011555073072693246, "loss": 1.6824, "step": 15728 }, { "epoch": 0.4220963933018463, "grad_norm": 0.30078125, "learning_rate": 0.0011554966867732888, "loss": 1.5353, "step": 15729 }, { "epoch": 0.4221232288535852, "grad_norm": 0.326171875, "learning_rate": 0.0011554860650586564, "loss": 1.7186, "step": 15730 }, { "epoch": 0.42215006440532415, "grad_norm": 0.310546875, "learning_rate": 0.0011554754421254513, "loss": 1.6899, "step": 15731 }, { "epoch": 0.4221768999570631, "grad_norm": 0.31640625, "learning_rate": 0.0011554648179736965, "loss": 1.7727, "step": 15732 }, { "epoch": 0.4222037355088021, "grad_norm": 0.3046875, "learning_rate": 0.001155454192603415, "loss": 1.587, "step": 15733 }, { "epoch": 0.422230571060541, "grad_norm": 0.294921875, "learning_rate": 0.0011554435660146307, "loss": 1.6416, "step": 15734 }, { "epoch": 0.42225740661227995, "grad_norm": 0.30078125, "learning_rate": 0.0011554329382073665, "loss": 1.5861, "step": 15735 }, { "epoch": 0.4222842421640189, "grad_norm": 0.30859375, "learning_rate": 0.001155422309181646, "loss": 1.6189, "step": 15736 }, { "epoch": 0.4223110777157578, "grad_norm": 0.3125, "learning_rate": 0.0011554116789374922, "loss": 1.6187, "step": 15737 }, { "epoch": 0.42233791326749676, "grad_norm": 0.31640625, "learning_rate": 0.0011554010474749286, "loss": 1.7271, "step": 15738 }, { "epoch": 0.42236474881923575, "grad_norm": 0.318359375, "learning_rate": 0.0011553904147939785, "loss": 1.7243, "step": 15739 }, { "epoch": 0.4223915843709747, "grad_norm": 0.32421875, "learning_rate": 0.0011553797808946653, "loss": 1.6583, "step": 15740 }, { "epoch": 0.4224184199227136, "grad_norm": 0.333984375, "learning_rate": 0.0011553691457770123, "loss": 1.7597, "step": 15741 }, { "epoch": 0.42244525547445255, "grad_norm": 0.322265625, "learning_rate": 0.0011553585094410427, "loss": 1.6797, "step": 15742 }, { "epoch": 0.4224720910261915, "grad_norm": 0.296875, "learning_rate": 0.00115534787188678, "loss": 1.5044, "step": 15743 }, { "epoch": 0.4224989265779304, "grad_norm": 0.32421875, "learning_rate": 0.0011553372331142473, "loss": 1.7705, "step": 15744 }, { "epoch": 0.4225257621296694, "grad_norm": 0.318359375, "learning_rate": 0.0011553265931234682, "loss": 1.7194, "step": 15745 }, { "epoch": 0.42255259768140835, "grad_norm": 0.314453125, "learning_rate": 0.001155315951914466, "loss": 1.7107, "step": 15746 }, { "epoch": 0.4225794332331473, "grad_norm": 0.31640625, "learning_rate": 0.0011553053094872639, "loss": 1.6568, "step": 15747 }, { "epoch": 0.4226062687848862, "grad_norm": 0.302734375, "learning_rate": 0.0011552946658418852, "loss": 1.5221, "step": 15748 }, { "epoch": 0.42263310433662515, "grad_norm": 0.3203125, "learning_rate": 0.0011552840209783535, "loss": 1.691, "step": 15749 }, { "epoch": 0.4226599398883641, "grad_norm": 0.3125, "learning_rate": 0.0011552733748966919, "loss": 1.652, "step": 15750 }, { "epoch": 0.422686775440103, "grad_norm": 0.326171875, "learning_rate": 0.0011552627275969241, "loss": 1.6395, "step": 15751 }, { "epoch": 0.422713610991842, "grad_norm": 0.326171875, "learning_rate": 0.0011552520790790732, "loss": 1.6822, "step": 15752 }, { "epoch": 0.42274044654358095, "grad_norm": 0.3046875, "learning_rate": 0.0011552414293431626, "loss": 1.5619, "step": 15753 }, { "epoch": 0.4227672820953199, "grad_norm": 0.32421875, "learning_rate": 0.0011552307783892154, "loss": 1.6797, "step": 15754 }, { "epoch": 0.4227941176470588, "grad_norm": 0.3203125, "learning_rate": 0.0011552201262172554, "loss": 1.6679, "step": 15755 }, { "epoch": 0.42282095319879776, "grad_norm": 0.328125, "learning_rate": 0.0011552094728273059, "loss": 1.7611, "step": 15756 }, { "epoch": 0.4228477887505367, "grad_norm": 0.302734375, "learning_rate": 0.00115519881821939, "loss": 1.5095, "step": 15757 }, { "epoch": 0.4228746243022757, "grad_norm": 0.296875, "learning_rate": 0.0011551881623935315, "loss": 1.581, "step": 15758 }, { "epoch": 0.4229014598540146, "grad_norm": 0.326171875, "learning_rate": 0.0011551775053497532, "loss": 1.6684, "step": 15759 }, { "epoch": 0.42292829540575355, "grad_norm": 0.298828125, "learning_rate": 0.0011551668470880787, "loss": 1.5901, "step": 15760 }, { "epoch": 0.4229551309574925, "grad_norm": 0.314453125, "learning_rate": 0.0011551561876085319, "loss": 1.6166, "step": 15761 }, { "epoch": 0.4229819665092314, "grad_norm": 0.3046875, "learning_rate": 0.0011551455269111354, "loss": 1.54, "step": 15762 }, { "epoch": 0.42300880206097036, "grad_norm": 0.314453125, "learning_rate": 0.0011551348649959132, "loss": 1.6223, "step": 15763 }, { "epoch": 0.4230356376127093, "grad_norm": 0.3203125, "learning_rate": 0.0011551242018628882, "loss": 1.6449, "step": 15764 }, { "epoch": 0.4230624731644483, "grad_norm": 0.3203125, "learning_rate": 0.001155113537512084, "loss": 1.6438, "step": 15765 }, { "epoch": 0.4230893087161872, "grad_norm": 0.310546875, "learning_rate": 0.0011551028719435242, "loss": 1.6177, "step": 15766 }, { "epoch": 0.42311614426792615, "grad_norm": 0.322265625, "learning_rate": 0.001155092205157232, "loss": 1.7024, "step": 15767 }, { "epoch": 0.4231429798196651, "grad_norm": 0.3125, "learning_rate": 0.0011550815371532308, "loss": 1.5255, "step": 15768 }, { "epoch": 0.423169815371404, "grad_norm": 0.322265625, "learning_rate": 0.001155070867931544, "loss": 1.643, "step": 15769 }, { "epoch": 0.42319665092314296, "grad_norm": 0.302734375, "learning_rate": 0.001155060197492195, "loss": 1.5264, "step": 15770 }, { "epoch": 0.42322348647488195, "grad_norm": 0.330078125, "learning_rate": 0.0011550495258352072, "loss": 1.7191, "step": 15771 }, { "epoch": 0.4232503220266209, "grad_norm": 0.322265625, "learning_rate": 0.001155038852960604, "loss": 1.6386, "step": 15772 }, { "epoch": 0.4232771575783598, "grad_norm": 0.326171875, "learning_rate": 0.0011550281788684088, "loss": 1.6573, "step": 15773 }, { "epoch": 0.42330399313009875, "grad_norm": 0.3203125, "learning_rate": 0.0011550175035586452, "loss": 1.584, "step": 15774 }, { "epoch": 0.4233308286818377, "grad_norm": 0.32421875, "learning_rate": 0.0011550068270313365, "loss": 1.7335, "step": 15775 }, { "epoch": 0.4233576642335766, "grad_norm": 0.318359375, "learning_rate": 0.001154996149286506, "loss": 1.6646, "step": 15776 }, { "epoch": 0.42338449978531556, "grad_norm": 0.3125, "learning_rate": 0.0011549854703241773, "loss": 1.6197, "step": 15777 }, { "epoch": 0.42341133533705455, "grad_norm": 0.3203125, "learning_rate": 0.0011549747901443738, "loss": 1.6448, "step": 15778 }, { "epoch": 0.4234381708887935, "grad_norm": 0.30859375, "learning_rate": 0.0011549641087471188, "loss": 1.6358, "step": 15779 }, { "epoch": 0.4234650064405324, "grad_norm": 0.32421875, "learning_rate": 0.001154953426132436, "loss": 1.6073, "step": 15780 }, { "epoch": 0.42349184199227136, "grad_norm": 0.31640625, "learning_rate": 0.0011549427423003485, "loss": 1.5515, "step": 15781 }, { "epoch": 0.4235186775440103, "grad_norm": 0.31640625, "learning_rate": 0.0011549320572508798, "loss": 1.6455, "step": 15782 }, { "epoch": 0.4235455130957492, "grad_norm": 0.3046875, "learning_rate": 0.0011549213709840535, "loss": 1.615, "step": 15783 }, { "epoch": 0.4235723486474882, "grad_norm": 0.30859375, "learning_rate": 0.001154910683499893, "loss": 1.5994, "step": 15784 }, { "epoch": 0.42359918419922715, "grad_norm": 0.3125, "learning_rate": 0.0011548999947984218, "loss": 1.5997, "step": 15785 }, { "epoch": 0.4236260197509661, "grad_norm": 0.306640625, "learning_rate": 0.001154889304879663, "loss": 1.6151, "step": 15786 }, { "epoch": 0.423652855302705, "grad_norm": 0.326171875, "learning_rate": 0.0011548786137436408, "loss": 1.7131, "step": 15787 }, { "epoch": 0.42367969085444396, "grad_norm": 0.314453125, "learning_rate": 0.0011548679213903778, "loss": 1.5803, "step": 15788 }, { "epoch": 0.4237065264061829, "grad_norm": 0.31640625, "learning_rate": 0.001154857227819898, "loss": 1.6699, "step": 15789 }, { "epoch": 0.42373336195792183, "grad_norm": 0.314453125, "learning_rate": 0.0011548465330322244, "loss": 1.5721, "step": 15790 }, { "epoch": 0.4237601975096608, "grad_norm": 0.30859375, "learning_rate": 0.0011548358370273809, "loss": 1.569, "step": 15791 }, { "epoch": 0.42378703306139975, "grad_norm": 0.31640625, "learning_rate": 0.0011548251398053907, "loss": 1.6067, "step": 15792 }, { "epoch": 0.4238138686131387, "grad_norm": 0.310546875, "learning_rate": 0.0011548144413662776, "loss": 1.5802, "step": 15793 }, { "epoch": 0.4238407041648776, "grad_norm": 0.3046875, "learning_rate": 0.0011548037417100647, "loss": 1.5066, "step": 15794 }, { "epoch": 0.42386753971661656, "grad_norm": 0.291015625, "learning_rate": 0.001154793040836776, "loss": 1.4883, "step": 15795 }, { "epoch": 0.4238943752683555, "grad_norm": 0.31640625, "learning_rate": 0.0011547823387464341, "loss": 1.6147, "step": 15796 }, { "epoch": 0.4239212108200945, "grad_norm": 0.322265625, "learning_rate": 0.001154771635439063, "loss": 1.6451, "step": 15797 }, { "epoch": 0.4239480463718334, "grad_norm": 0.29296875, "learning_rate": 0.0011547609309146864, "loss": 1.404, "step": 15798 }, { "epoch": 0.42397488192357236, "grad_norm": 0.330078125, "learning_rate": 0.0011547502251733274, "loss": 1.6975, "step": 15799 }, { "epoch": 0.4240017174753113, "grad_norm": 0.31640625, "learning_rate": 0.0011547395182150096, "loss": 1.5768, "step": 15800 }, { "epoch": 0.4240285530270502, "grad_norm": 0.31640625, "learning_rate": 0.0011547288100397565, "loss": 1.5952, "step": 15801 }, { "epoch": 0.42405538857878916, "grad_norm": 0.3125, "learning_rate": 0.0011547181006475917, "loss": 1.5637, "step": 15802 }, { "epoch": 0.4240822241305281, "grad_norm": 0.333984375, "learning_rate": 0.0011547073900385386, "loss": 1.649, "step": 15803 }, { "epoch": 0.4241090596822671, "grad_norm": 0.3125, "learning_rate": 0.0011546966782126206, "loss": 1.5712, "step": 15804 }, { "epoch": 0.424135895234006, "grad_norm": 0.31640625, "learning_rate": 0.001154685965169861, "loss": 1.5773, "step": 15805 }, { "epoch": 0.42416273078574496, "grad_norm": 0.337890625, "learning_rate": 0.0011546752509102838, "loss": 1.6502, "step": 15806 }, { "epoch": 0.4241895663374839, "grad_norm": 0.396484375, "learning_rate": 0.0011546645354339126, "loss": 1.9429, "step": 15807 }, { "epoch": 0.4242164018892228, "grad_norm": 0.37109375, "learning_rate": 0.0011546538187407703, "loss": 1.8766, "step": 15808 }, { "epoch": 0.42424323744096176, "grad_norm": 0.330078125, "learning_rate": 0.0011546431008308807, "loss": 1.7938, "step": 15809 }, { "epoch": 0.42427007299270075, "grad_norm": 0.359375, "learning_rate": 0.0011546323817042674, "loss": 2.0731, "step": 15810 }, { "epoch": 0.4242969085444397, "grad_norm": 0.322265625, "learning_rate": 0.0011546216613609538, "loss": 1.8811, "step": 15811 }, { "epoch": 0.4243237440961786, "grad_norm": 0.33984375, "learning_rate": 0.0011546109398009635, "loss": 1.8459, "step": 15812 }, { "epoch": 0.42435057964791756, "grad_norm": 0.330078125, "learning_rate": 0.00115460021702432, "loss": 1.9504, "step": 15813 }, { "epoch": 0.4243774151996565, "grad_norm": 0.326171875, "learning_rate": 0.0011545894930310465, "loss": 2.0399, "step": 15814 }, { "epoch": 0.42440425075139543, "grad_norm": 0.3203125, "learning_rate": 0.0011545787678211672, "loss": 2.0172, "step": 15815 }, { "epoch": 0.4244310863031344, "grad_norm": 0.298828125, "learning_rate": 0.0011545680413947049, "loss": 1.7899, "step": 15816 }, { "epoch": 0.42445792185487335, "grad_norm": 0.302734375, "learning_rate": 0.0011545573137516836, "loss": 1.8282, "step": 15817 }, { "epoch": 0.4244847574066123, "grad_norm": 0.32421875, "learning_rate": 0.0011545465848921267, "loss": 1.9451, "step": 15818 }, { "epoch": 0.4245115929583512, "grad_norm": 0.310546875, "learning_rate": 0.0011545358548160577, "loss": 1.8505, "step": 15819 }, { "epoch": 0.42453842851009016, "grad_norm": 0.322265625, "learning_rate": 0.0011545251235235005, "loss": 1.9361, "step": 15820 }, { "epoch": 0.4245652640618291, "grad_norm": 0.326171875, "learning_rate": 0.0011545143910144779, "loss": 1.9192, "step": 15821 }, { "epoch": 0.42459209961356803, "grad_norm": 0.32421875, "learning_rate": 0.001154503657289014, "loss": 1.9518, "step": 15822 }, { "epoch": 0.424618935165307, "grad_norm": 0.30859375, "learning_rate": 0.0011544929223471325, "loss": 1.7793, "step": 15823 }, { "epoch": 0.42464577071704596, "grad_norm": 0.33203125, "learning_rate": 0.0011544821861888563, "loss": 1.9834, "step": 15824 }, { "epoch": 0.4246726062687849, "grad_norm": 0.310546875, "learning_rate": 0.0011544714488142096, "loss": 1.8981, "step": 15825 }, { "epoch": 0.4246994418205238, "grad_norm": 0.306640625, "learning_rate": 0.0011544607102232155, "loss": 1.8032, "step": 15826 }, { "epoch": 0.42472627737226276, "grad_norm": 0.314453125, "learning_rate": 0.0011544499704158975, "loss": 1.91, "step": 15827 }, { "epoch": 0.4247531129240017, "grad_norm": 0.314453125, "learning_rate": 0.0011544392293922796, "loss": 1.8137, "step": 15828 }, { "epoch": 0.4247799484757407, "grad_norm": 0.326171875, "learning_rate": 0.0011544284871523854, "loss": 2.0166, "step": 15829 }, { "epoch": 0.4248067840274796, "grad_norm": 0.302734375, "learning_rate": 0.0011544177436962379, "loss": 1.8407, "step": 15830 }, { "epoch": 0.42483361957921856, "grad_norm": 0.322265625, "learning_rate": 0.001154406999023861, "loss": 1.9798, "step": 15831 }, { "epoch": 0.4248604551309575, "grad_norm": 0.302734375, "learning_rate": 0.0011543962531352784, "loss": 1.797, "step": 15832 }, { "epoch": 0.42488729068269643, "grad_norm": 0.31640625, "learning_rate": 0.0011543855060305135, "loss": 1.9463, "step": 15833 }, { "epoch": 0.42491412623443536, "grad_norm": 0.3125, "learning_rate": 0.0011543747577095896, "loss": 1.8607, "step": 15834 }, { "epoch": 0.4249409617861743, "grad_norm": 0.30859375, "learning_rate": 0.001154364008172531, "loss": 1.9256, "step": 15835 }, { "epoch": 0.4249677973379133, "grad_norm": 0.314453125, "learning_rate": 0.0011543532574193607, "loss": 1.8667, "step": 15836 }, { "epoch": 0.4249946328896522, "grad_norm": 0.3203125, "learning_rate": 0.0011543425054501024, "loss": 1.856, "step": 15837 }, { "epoch": 0.42502146844139116, "grad_norm": 0.314453125, "learning_rate": 0.0011543317522647797, "loss": 1.8467, "step": 15838 }, { "epoch": 0.4250483039931301, "grad_norm": 0.30859375, "learning_rate": 0.0011543209978634164, "loss": 1.7656, "step": 15839 }, { "epoch": 0.42507513954486903, "grad_norm": 0.3203125, "learning_rate": 0.0011543102422460357, "loss": 1.8517, "step": 15840 }, { "epoch": 0.42510197509660796, "grad_norm": 0.3125, "learning_rate": 0.0011542994854126616, "loss": 1.8175, "step": 15841 }, { "epoch": 0.42512881064834696, "grad_norm": 0.3125, "learning_rate": 0.0011542887273633174, "loss": 1.8735, "step": 15842 }, { "epoch": 0.4251556462000859, "grad_norm": 0.3046875, "learning_rate": 0.0011542779680980269, "loss": 1.7248, "step": 15843 }, { "epoch": 0.4251824817518248, "grad_norm": 0.3046875, "learning_rate": 0.0011542672076168133, "loss": 1.7738, "step": 15844 }, { "epoch": 0.42520931730356376, "grad_norm": 0.314453125, "learning_rate": 0.001154256445919701, "loss": 1.8745, "step": 15845 }, { "epoch": 0.4252361528553027, "grad_norm": 0.310546875, "learning_rate": 0.0011542456830067127, "loss": 1.8464, "step": 15846 }, { "epoch": 0.42526298840704163, "grad_norm": 0.3203125, "learning_rate": 0.0011542349188778725, "loss": 1.9005, "step": 15847 }, { "epoch": 0.42528982395878057, "grad_norm": 0.310546875, "learning_rate": 0.001154224153533204, "loss": 1.8385, "step": 15848 }, { "epoch": 0.42531665951051956, "grad_norm": 0.318359375, "learning_rate": 0.001154213386972731, "loss": 1.8706, "step": 15849 }, { "epoch": 0.4253434950622585, "grad_norm": 0.306640625, "learning_rate": 0.0011542026191964767, "loss": 1.7296, "step": 15850 }, { "epoch": 0.4253703306139974, "grad_norm": 0.3125, "learning_rate": 0.001154191850204465, "loss": 1.8561, "step": 15851 }, { "epoch": 0.42539716616573636, "grad_norm": 0.306640625, "learning_rate": 0.0011541810799967193, "loss": 1.7536, "step": 15852 }, { "epoch": 0.4254240017174753, "grad_norm": 0.318359375, "learning_rate": 0.0011541703085732632, "loss": 1.7762, "step": 15853 }, { "epoch": 0.42545083726921423, "grad_norm": 0.30078125, "learning_rate": 0.0011541595359341207, "loss": 1.7324, "step": 15854 }, { "epoch": 0.4254776728209532, "grad_norm": 0.31640625, "learning_rate": 0.0011541487620793153, "loss": 1.8487, "step": 15855 }, { "epoch": 0.42550450837269216, "grad_norm": 0.3125, "learning_rate": 0.0011541379870088704, "loss": 1.8731, "step": 15856 }, { "epoch": 0.4255313439244311, "grad_norm": 0.306640625, "learning_rate": 0.0011541272107228099, "loss": 1.6613, "step": 15857 }, { "epoch": 0.42555817947617003, "grad_norm": 0.302734375, "learning_rate": 0.001154116433221157, "loss": 1.8044, "step": 15858 }, { "epoch": 0.42558501502790896, "grad_norm": 0.31640625, "learning_rate": 0.001154105654503936, "loss": 1.7958, "step": 15859 }, { "epoch": 0.4256118505796479, "grad_norm": 0.30859375, "learning_rate": 0.00115409487457117, "loss": 1.8184, "step": 15860 }, { "epoch": 0.42563868613138683, "grad_norm": 0.3125, "learning_rate": 0.001154084093422883, "loss": 1.8096, "step": 15861 }, { "epoch": 0.4256655216831258, "grad_norm": 0.30078125, "learning_rate": 0.0011540733110590987, "loss": 1.7294, "step": 15862 }, { "epoch": 0.42569235723486476, "grad_norm": 0.306640625, "learning_rate": 0.0011540625274798402, "loss": 1.7684, "step": 15863 }, { "epoch": 0.4257191927866037, "grad_norm": 0.310546875, "learning_rate": 0.0011540517426851318, "loss": 1.8626, "step": 15864 }, { "epoch": 0.42574602833834263, "grad_norm": 0.30859375, "learning_rate": 0.0011540409566749967, "loss": 1.8344, "step": 15865 }, { "epoch": 0.42577286389008157, "grad_norm": 0.302734375, "learning_rate": 0.001154030169449459, "loss": 1.6773, "step": 15866 }, { "epoch": 0.4257996994418205, "grad_norm": 0.3125, "learning_rate": 0.0011540193810085416, "loss": 1.8568, "step": 15867 }, { "epoch": 0.4258265349935595, "grad_norm": 0.31640625, "learning_rate": 0.001154008591352269, "loss": 1.8729, "step": 15868 }, { "epoch": 0.4258533705452984, "grad_norm": 0.3125, "learning_rate": 0.0011539978004806647, "loss": 1.7183, "step": 15869 }, { "epoch": 0.42588020609703736, "grad_norm": 0.296875, "learning_rate": 0.001153987008393752, "loss": 1.6518, "step": 15870 }, { "epoch": 0.4259070416487763, "grad_norm": 0.310546875, "learning_rate": 0.001153976215091555, "loss": 1.7899, "step": 15871 }, { "epoch": 0.42593387720051523, "grad_norm": 0.306640625, "learning_rate": 0.0011539654205740969, "loss": 1.7825, "step": 15872 }, { "epoch": 0.42596071275225417, "grad_norm": 0.296875, "learning_rate": 0.0011539546248414017, "loss": 1.6648, "step": 15873 }, { "epoch": 0.42598754830399316, "grad_norm": 0.314453125, "learning_rate": 0.001153943827893493, "loss": 1.8532, "step": 15874 }, { "epoch": 0.4260143838557321, "grad_norm": 0.30859375, "learning_rate": 0.0011539330297303946, "loss": 1.7635, "step": 15875 }, { "epoch": 0.42604121940747103, "grad_norm": 0.31640625, "learning_rate": 0.00115392223035213, "loss": 1.8132, "step": 15876 }, { "epoch": 0.42606805495920996, "grad_norm": 0.3359375, "learning_rate": 0.0011539114297587233, "loss": 1.9711, "step": 15877 }, { "epoch": 0.4260948905109489, "grad_norm": 0.296875, "learning_rate": 0.0011539006279501975, "loss": 1.6546, "step": 15878 }, { "epoch": 0.42612172606268783, "grad_norm": 0.298828125, "learning_rate": 0.0011538898249265769, "loss": 1.7142, "step": 15879 }, { "epoch": 0.42614856161442677, "grad_norm": 0.30859375, "learning_rate": 0.001153879020687885, "loss": 1.7372, "step": 15880 }, { "epoch": 0.42617539716616576, "grad_norm": 0.310546875, "learning_rate": 0.0011538682152341453, "loss": 1.7365, "step": 15881 }, { "epoch": 0.4262022327179047, "grad_norm": 0.3125, "learning_rate": 0.001153857408565382, "loss": 1.7593, "step": 15882 }, { "epoch": 0.42622906826964363, "grad_norm": 0.296875, "learning_rate": 0.0011538466006816182, "loss": 1.6946, "step": 15883 }, { "epoch": 0.42625590382138256, "grad_norm": 0.314453125, "learning_rate": 0.001153835791582878, "loss": 1.8405, "step": 15884 }, { "epoch": 0.4262827393731215, "grad_norm": 0.3125, "learning_rate": 0.001153824981269185, "loss": 1.6987, "step": 15885 }, { "epoch": 0.42630957492486043, "grad_norm": 0.30859375, "learning_rate": 0.001153814169740563, "loss": 1.836, "step": 15886 }, { "epoch": 0.4263364104765994, "grad_norm": 0.306640625, "learning_rate": 0.0011538033569970356, "loss": 1.7044, "step": 15887 }, { "epoch": 0.42636324602833836, "grad_norm": 0.29296875, "learning_rate": 0.0011537925430386265, "loss": 1.6089, "step": 15888 }, { "epoch": 0.4263900815800773, "grad_norm": 0.31640625, "learning_rate": 0.0011537817278653593, "loss": 1.7399, "step": 15889 }, { "epoch": 0.42641691713181623, "grad_norm": 0.310546875, "learning_rate": 0.0011537709114772584, "loss": 1.7413, "step": 15890 }, { "epoch": 0.42644375268355517, "grad_norm": 0.310546875, "learning_rate": 0.0011537600938743469, "loss": 1.771, "step": 15891 }, { "epoch": 0.4264705882352941, "grad_norm": 0.306640625, "learning_rate": 0.0011537492750566484, "loss": 1.7372, "step": 15892 }, { "epoch": 0.42649742378703304, "grad_norm": 0.30859375, "learning_rate": 0.001153738455024187, "loss": 1.7632, "step": 15893 }, { "epoch": 0.426524259338772, "grad_norm": 0.318359375, "learning_rate": 0.0011537276337769864, "loss": 1.7463, "step": 15894 }, { "epoch": 0.42655109489051096, "grad_norm": 0.310546875, "learning_rate": 0.0011537168113150704, "loss": 1.7708, "step": 15895 }, { "epoch": 0.4265779304422499, "grad_norm": 0.30859375, "learning_rate": 0.0011537059876384624, "loss": 1.6934, "step": 15896 }, { "epoch": 0.42660476599398883, "grad_norm": 0.314453125, "learning_rate": 0.0011536951627471865, "loss": 1.8141, "step": 15897 }, { "epoch": 0.42663160154572777, "grad_norm": 0.3125, "learning_rate": 0.0011536843366412664, "loss": 1.7144, "step": 15898 }, { "epoch": 0.4266584370974667, "grad_norm": 0.296875, "learning_rate": 0.0011536735093207256, "loss": 1.6567, "step": 15899 }, { "epoch": 0.4266852726492057, "grad_norm": 0.3125, "learning_rate": 0.001153662680785588, "loss": 1.7325, "step": 15900 }, { "epoch": 0.42671210820094463, "grad_norm": 0.318359375, "learning_rate": 0.0011536518510358777, "loss": 1.7771, "step": 15901 }, { "epoch": 0.42673894375268356, "grad_norm": 0.310546875, "learning_rate": 0.0011536410200716177, "loss": 1.7283, "step": 15902 }, { "epoch": 0.4267657793044225, "grad_norm": 0.3046875, "learning_rate": 0.0011536301878928325, "loss": 1.6898, "step": 15903 }, { "epoch": 0.42679261485616143, "grad_norm": 0.30859375, "learning_rate": 0.0011536193544995452, "loss": 1.6887, "step": 15904 }, { "epoch": 0.42681945040790037, "grad_norm": 0.3046875, "learning_rate": 0.0011536085198917802, "loss": 1.7214, "step": 15905 }, { "epoch": 0.4268462859596393, "grad_norm": 0.3125, "learning_rate": 0.001153597684069561, "loss": 1.7967, "step": 15906 }, { "epoch": 0.4268731215113783, "grad_norm": 0.3125, "learning_rate": 0.001153586847032911, "loss": 1.7218, "step": 15907 }, { "epoch": 0.42689995706311723, "grad_norm": 0.3125, "learning_rate": 0.0011535760087818547, "loss": 1.7075, "step": 15908 }, { "epoch": 0.42692679261485617, "grad_norm": 0.314453125, "learning_rate": 0.0011535651693164153, "loss": 1.7758, "step": 15909 }, { "epoch": 0.4269536281665951, "grad_norm": 0.310546875, "learning_rate": 0.001153554328636617, "loss": 1.7719, "step": 15910 }, { "epoch": 0.42698046371833404, "grad_norm": 0.29296875, "learning_rate": 0.001153543486742483, "loss": 1.5937, "step": 15911 }, { "epoch": 0.42700729927007297, "grad_norm": 0.294921875, "learning_rate": 0.0011535326436340379, "loss": 1.6425, "step": 15912 }, { "epoch": 0.42703413482181196, "grad_norm": 0.30859375, "learning_rate": 0.0011535217993113047, "loss": 1.7217, "step": 15913 }, { "epoch": 0.4270609703735509, "grad_norm": 0.30859375, "learning_rate": 0.0011535109537743077, "loss": 1.821, "step": 15914 }, { "epoch": 0.42708780592528983, "grad_norm": 0.3046875, "learning_rate": 0.0011535001070230703, "loss": 1.6662, "step": 15915 }, { "epoch": 0.42711464147702877, "grad_norm": 0.314453125, "learning_rate": 0.0011534892590576167, "loss": 1.7017, "step": 15916 }, { "epoch": 0.4271414770287677, "grad_norm": 0.302734375, "learning_rate": 0.0011534784098779704, "loss": 1.6977, "step": 15917 }, { "epoch": 0.42716831258050664, "grad_norm": 0.296875, "learning_rate": 0.0011534675594841552, "loss": 1.6354, "step": 15918 }, { "epoch": 0.4271951481322456, "grad_norm": 0.302734375, "learning_rate": 0.0011534567078761953, "loss": 1.6253, "step": 15919 }, { "epoch": 0.42722198368398456, "grad_norm": 0.30859375, "learning_rate": 0.001153445855054114, "loss": 1.7044, "step": 15920 }, { "epoch": 0.4272488192357235, "grad_norm": 0.298828125, "learning_rate": 0.0011534350010179355, "loss": 1.5665, "step": 15921 }, { "epoch": 0.42727565478746243, "grad_norm": 0.31640625, "learning_rate": 0.0011534241457676832, "loss": 1.7606, "step": 15922 }, { "epoch": 0.42730249033920137, "grad_norm": 0.294921875, "learning_rate": 0.0011534132893033812, "loss": 1.6505, "step": 15923 }, { "epoch": 0.4273293258909403, "grad_norm": 0.310546875, "learning_rate": 0.0011534024316250534, "loss": 1.72, "step": 15924 }, { "epoch": 0.42735616144267924, "grad_norm": 0.3125, "learning_rate": 0.0011533915727327234, "loss": 1.7456, "step": 15925 }, { "epoch": 0.42738299699441823, "grad_norm": 0.3125, "learning_rate": 0.001153380712626415, "loss": 1.7575, "step": 15926 }, { "epoch": 0.42740983254615716, "grad_norm": 0.302734375, "learning_rate": 0.0011533698513061523, "loss": 1.6368, "step": 15927 }, { "epoch": 0.4274366680978961, "grad_norm": 0.3046875, "learning_rate": 0.001153358988771959, "loss": 1.6986, "step": 15928 }, { "epoch": 0.42746350364963503, "grad_norm": 0.314453125, "learning_rate": 0.0011533481250238587, "loss": 1.6514, "step": 15929 }, { "epoch": 0.42749033920137397, "grad_norm": 0.302734375, "learning_rate": 0.0011533372600618754, "loss": 1.7226, "step": 15930 }, { "epoch": 0.4275171747531129, "grad_norm": 0.302734375, "learning_rate": 0.001153326393886033, "loss": 1.6361, "step": 15931 }, { "epoch": 0.42754401030485184, "grad_norm": 0.30859375, "learning_rate": 0.0011533155264963554, "loss": 1.6608, "step": 15932 }, { "epoch": 0.42757084585659083, "grad_norm": 0.310546875, "learning_rate": 0.0011533046578928662, "loss": 1.7025, "step": 15933 }, { "epoch": 0.42759768140832977, "grad_norm": 0.29296875, "learning_rate": 0.0011532937880755893, "loss": 1.5888, "step": 15934 }, { "epoch": 0.4276245169600687, "grad_norm": 0.31640625, "learning_rate": 0.0011532829170445486, "loss": 1.7543, "step": 15935 }, { "epoch": 0.42765135251180764, "grad_norm": 0.310546875, "learning_rate": 0.0011532720447997682, "loss": 1.6217, "step": 15936 }, { "epoch": 0.42767818806354657, "grad_norm": 0.287109375, "learning_rate": 0.0011532611713412715, "loss": 1.5527, "step": 15937 }, { "epoch": 0.4277050236152855, "grad_norm": 0.318359375, "learning_rate": 0.0011532502966690827, "loss": 1.7001, "step": 15938 }, { "epoch": 0.4277318591670245, "grad_norm": 0.310546875, "learning_rate": 0.0011532394207832252, "loss": 1.7198, "step": 15939 }, { "epoch": 0.42775869471876343, "grad_norm": 0.30078125, "learning_rate": 0.0011532285436837235, "loss": 1.6208, "step": 15940 }, { "epoch": 0.42778553027050237, "grad_norm": 0.302734375, "learning_rate": 0.001153217665370601, "loss": 1.6549, "step": 15941 }, { "epoch": 0.4278123658222413, "grad_norm": 0.33203125, "learning_rate": 0.0011532067858438817, "loss": 1.8805, "step": 15942 }, { "epoch": 0.42783920137398024, "grad_norm": 0.32421875, "learning_rate": 0.0011531959051035896, "loss": 1.838, "step": 15943 }, { "epoch": 0.4278660369257192, "grad_norm": 0.306640625, "learning_rate": 0.0011531850231497483, "loss": 1.7494, "step": 15944 }, { "epoch": 0.42789287247745816, "grad_norm": 0.314453125, "learning_rate": 0.001153174139982382, "loss": 1.7837, "step": 15945 }, { "epoch": 0.4279197080291971, "grad_norm": 0.322265625, "learning_rate": 0.0011531632556015141, "loss": 1.8041, "step": 15946 }, { "epoch": 0.42794654358093603, "grad_norm": 0.302734375, "learning_rate": 0.0011531523700071692, "loss": 1.6461, "step": 15947 }, { "epoch": 0.42797337913267497, "grad_norm": 0.3125, "learning_rate": 0.0011531414831993702, "loss": 1.7214, "step": 15948 }, { "epoch": 0.4280002146844139, "grad_norm": 0.3203125, "learning_rate": 0.0011531305951781418, "loss": 1.7277, "step": 15949 }, { "epoch": 0.42802705023615284, "grad_norm": 0.310546875, "learning_rate": 0.0011531197059435074, "loss": 1.7551, "step": 15950 }, { "epoch": 0.4280538857878918, "grad_norm": 0.310546875, "learning_rate": 0.0011531088154954914, "loss": 1.6673, "step": 15951 }, { "epoch": 0.42808072133963077, "grad_norm": 0.326171875, "learning_rate": 0.0011530979238341173, "loss": 1.8255, "step": 15952 }, { "epoch": 0.4281075568913697, "grad_norm": 0.310546875, "learning_rate": 0.001153087030959409, "loss": 1.7134, "step": 15953 }, { "epoch": 0.42813439244310864, "grad_norm": 0.306640625, "learning_rate": 0.0011530761368713904, "loss": 1.6835, "step": 15954 }, { "epoch": 0.42816122799484757, "grad_norm": 0.3125, "learning_rate": 0.0011530652415700858, "loss": 1.6459, "step": 15955 }, { "epoch": 0.4281880635465865, "grad_norm": 0.302734375, "learning_rate": 0.0011530543450555186, "loss": 1.586, "step": 15956 }, { "epoch": 0.42821489909832544, "grad_norm": 0.322265625, "learning_rate": 0.0011530434473277127, "loss": 1.7549, "step": 15957 }, { "epoch": 0.42824173465006443, "grad_norm": 0.330078125, "learning_rate": 0.0011530325483866922, "loss": 1.7723, "step": 15958 }, { "epoch": 0.42826857020180337, "grad_norm": 0.306640625, "learning_rate": 0.0011530216482324813, "loss": 1.6722, "step": 15959 }, { "epoch": 0.4282954057535423, "grad_norm": 0.3046875, "learning_rate": 0.0011530107468651035, "loss": 1.6022, "step": 15960 }, { "epoch": 0.42832224130528124, "grad_norm": 0.328125, "learning_rate": 0.0011529998442845826, "loss": 1.7738, "step": 15961 }, { "epoch": 0.4283490768570202, "grad_norm": 0.31640625, "learning_rate": 0.001152988940490943, "loss": 1.6142, "step": 15962 }, { "epoch": 0.4283759124087591, "grad_norm": 0.310546875, "learning_rate": 0.001152978035484208, "loss": 1.6862, "step": 15963 }, { "epoch": 0.42840274796049804, "grad_norm": 0.298828125, "learning_rate": 0.0011529671292644023, "loss": 1.6161, "step": 15964 }, { "epoch": 0.42842958351223703, "grad_norm": 0.310546875, "learning_rate": 0.0011529562218315491, "loss": 1.7216, "step": 15965 }, { "epoch": 0.42845641906397597, "grad_norm": 0.30859375, "learning_rate": 0.0011529453131856728, "loss": 1.7013, "step": 15966 }, { "epoch": 0.4284832546157149, "grad_norm": 0.302734375, "learning_rate": 0.001152934403326797, "loss": 1.5766, "step": 15967 }, { "epoch": 0.42851009016745384, "grad_norm": 0.318359375, "learning_rate": 0.0011529234922549461, "loss": 1.7013, "step": 15968 }, { "epoch": 0.4285369257191928, "grad_norm": 0.29296875, "learning_rate": 0.0011529125799701434, "loss": 1.5355, "step": 15969 }, { "epoch": 0.4285637612709317, "grad_norm": 0.3203125, "learning_rate": 0.0011529016664724133, "loss": 1.6887, "step": 15970 }, { "epoch": 0.4285905968226707, "grad_norm": 0.306640625, "learning_rate": 0.0011528907517617795, "loss": 1.6367, "step": 15971 }, { "epoch": 0.42861743237440963, "grad_norm": 0.322265625, "learning_rate": 0.0011528798358382663, "loss": 1.6829, "step": 15972 }, { "epoch": 0.42864426792614857, "grad_norm": 0.30859375, "learning_rate": 0.0011528689187018971, "loss": 1.6078, "step": 15973 }, { "epoch": 0.4286711034778875, "grad_norm": 0.310546875, "learning_rate": 0.0011528580003526965, "loss": 1.6957, "step": 15974 }, { "epoch": 0.42869793902962644, "grad_norm": 0.31640625, "learning_rate": 0.001152847080790688, "loss": 1.6959, "step": 15975 }, { "epoch": 0.4287247745813654, "grad_norm": 0.30078125, "learning_rate": 0.0011528361600158955, "loss": 1.5615, "step": 15976 }, { "epoch": 0.4287516101331043, "grad_norm": 0.310546875, "learning_rate": 0.001152825238028343, "loss": 1.6816, "step": 15977 }, { "epoch": 0.4287784456848433, "grad_norm": 0.330078125, "learning_rate": 0.001152814314828055, "loss": 1.7347, "step": 15978 }, { "epoch": 0.42880528123658224, "grad_norm": 0.322265625, "learning_rate": 0.0011528033904150547, "loss": 1.7328, "step": 15979 }, { "epoch": 0.42883211678832117, "grad_norm": 0.333984375, "learning_rate": 0.0011527924647893664, "loss": 1.7455, "step": 15980 }, { "epoch": 0.4288589523400601, "grad_norm": 0.31640625, "learning_rate": 0.001152781537951014, "loss": 1.6952, "step": 15981 }, { "epoch": 0.42888578789179904, "grad_norm": 0.3046875, "learning_rate": 0.0011527706099000218, "loss": 1.6078, "step": 15982 }, { "epoch": 0.428912623443538, "grad_norm": 0.306640625, "learning_rate": 0.0011527596806364135, "loss": 1.6048, "step": 15983 }, { "epoch": 0.42893945899527697, "grad_norm": 0.302734375, "learning_rate": 0.001152748750160213, "loss": 1.6258, "step": 15984 }, { "epoch": 0.4289662945470159, "grad_norm": 0.3125, "learning_rate": 0.0011527378184714443, "loss": 1.5826, "step": 15985 }, { "epoch": 0.42899313009875484, "grad_norm": 0.3125, "learning_rate": 0.0011527268855701315, "loss": 1.6559, "step": 15986 }, { "epoch": 0.4290199656504938, "grad_norm": 0.314453125, "learning_rate": 0.0011527159514562984, "loss": 1.614, "step": 15987 }, { "epoch": 0.4290468012022327, "grad_norm": 0.310546875, "learning_rate": 0.0011527050161299693, "loss": 1.6724, "step": 15988 }, { "epoch": 0.42907363675397164, "grad_norm": 0.294921875, "learning_rate": 0.001152694079591168, "loss": 1.5684, "step": 15989 }, { "epoch": 0.4291004723057106, "grad_norm": 0.314453125, "learning_rate": 0.0011526831418399183, "loss": 1.6073, "step": 15990 }, { "epoch": 0.42912730785744957, "grad_norm": 0.3125, "learning_rate": 0.0011526722028762447, "loss": 1.7015, "step": 15991 }, { "epoch": 0.4291541434091885, "grad_norm": 0.31640625, "learning_rate": 0.0011526612627001706, "loss": 1.7084, "step": 15992 }, { "epoch": 0.42918097896092744, "grad_norm": 0.310546875, "learning_rate": 0.0011526503213117204, "loss": 1.6623, "step": 15993 }, { "epoch": 0.4292078145126664, "grad_norm": 0.3046875, "learning_rate": 0.001152639378710918, "loss": 1.6201, "step": 15994 }, { "epoch": 0.4292346500644053, "grad_norm": 0.318359375, "learning_rate": 0.0011526284348977874, "loss": 1.6659, "step": 15995 }, { "epoch": 0.42926148561614424, "grad_norm": 0.3203125, "learning_rate": 0.0011526174898723526, "loss": 1.6438, "step": 15996 }, { "epoch": 0.42928832116788324, "grad_norm": 0.31640625, "learning_rate": 0.0011526065436346373, "loss": 1.7129, "step": 15997 }, { "epoch": 0.42931515671962217, "grad_norm": 0.31640625, "learning_rate": 0.0011525955961846663, "loss": 1.6454, "step": 15998 }, { "epoch": 0.4293419922713611, "grad_norm": 0.314453125, "learning_rate": 0.0011525846475224628, "loss": 1.7122, "step": 15999 }, { "epoch": 0.42936882782310004, "grad_norm": 0.314453125, "learning_rate": 0.0011525736976480514, "loss": 1.5441, "step": 16000 }, { "epoch": 0.429395663374839, "grad_norm": 0.33203125, "learning_rate": 0.0011525627465614557, "loss": 1.7426, "step": 16001 }, { "epoch": 0.4294224989265779, "grad_norm": 0.310546875, "learning_rate": 0.0011525517942627, "loss": 1.5977, "step": 16002 }, { "epoch": 0.42944933447831685, "grad_norm": 0.314453125, "learning_rate": 0.0011525408407518083, "loss": 1.6382, "step": 16003 }, { "epoch": 0.42947617003005584, "grad_norm": 0.318359375, "learning_rate": 0.0011525298860288044, "loss": 1.7169, "step": 16004 }, { "epoch": 0.4295030055817948, "grad_norm": 0.32421875, "learning_rate": 0.0011525189300937125, "loss": 1.6492, "step": 16005 }, { "epoch": 0.4295298411335337, "grad_norm": 0.30859375, "learning_rate": 0.0011525079729465565, "loss": 1.5607, "step": 16006 }, { "epoch": 0.42955667668527264, "grad_norm": 0.322265625, "learning_rate": 0.001152497014587361, "loss": 1.7604, "step": 16007 }, { "epoch": 0.4295835122370116, "grad_norm": 0.3125, "learning_rate": 0.0011524860550161493, "loss": 1.6552, "step": 16008 }, { "epoch": 0.4296103477887505, "grad_norm": 0.314453125, "learning_rate": 0.0011524750942329457, "loss": 1.6967, "step": 16009 }, { "epoch": 0.4296371833404895, "grad_norm": 0.3359375, "learning_rate": 0.0011524641322377743, "loss": 1.7177, "step": 16010 }, { "epoch": 0.42966401889222844, "grad_norm": 0.310546875, "learning_rate": 0.0011524531690306592, "loss": 1.6042, "step": 16011 }, { "epoch": 0.4296908544439674, "grad_norm": 0.3203125, "learning_rate": 0.0011524422046116244, "loss": 1.6746, "step": 16012 }, { "epoch": 0.4297176899957063, "grad_norm": 0.3203125, "learning_rate": 0.0011524312389806937, "loss": 1.6249, "step": 16013 }, { "epoch": 0.42974452554744524, "grad_norm": 0.30859375, "learning_rate": 0.0011524202721378916, "loss": 1.5922, "step": 16014 }, { "epoch": 0.4297713610991842, "grad_norm": 0.330078125, "learning_rate": 0.001152409304083242, "loss": 1.7296, "step": 16015 }, { "epoch": 0.42979819665092317, "grad_norm": 0.298828125, "learning_rate": 0.0011523983348167687, "loss": 1.5343, "step": 16016 }, { "epoch": 0.4298250322026621, "grad_norm": 0.30859375, "learning_rate": 0.0011523873643384962, "loss": 1.5939, "step": 16017 }, { "epoch": 0.42985186775440104, "grad_norm": 0.314453125, "learning_rate": 0.0011523763926484481, "loss": 1.5913, "step": 16018 }, { "epoch": 0.42987870330614, "grad_norm": 0.302734375, "learning_rate": 0.001152365419746649, "loss": 1.5947, "step": 16019 }, { "epoch": 0.4299055388578789, "grad_norm": 0.330078125, "learning_rate": 0.0011523544456331224, "loss": 1.7592, "step": 16020 }, { "epoch": 0.42993237440961785, "grad_norm": 0.328125, "learning_rate": 0.0011523434703078928, "loss": 1.7453, "step": 16021 }, { "epoch": 0.4299592099613568, "grad_norm": 0.3046875, "learning_rate": 0.0011523324937709842, "loss": 1.5865, "step": 16022 }, { "epoch": 0.42998604551309577, "grad_norm": 0.3125, "learning_rate": 0.0011523215160224205, "loss": 1.6703, "step": 16023 }, { "epoch": 0.4300128810648347, "grad_norm": 0.31640625, "learning_rate": 0.001152310537062226, "loss": 1.644, "step": 16024 }, { "epoch": 0.43003971661657364, "grad_norm": 0.30859375, "learning_rate": 0.0011522995568904245, "loss": 1.6062, "step": 16025 }, { "epoch": 0.4300665521683126, "grad_norm": 0.296875, "learning_rate": 0.0011522885755070403, "loss": 1.5255, "step": 16026 }, { "epoch": 0.4300933877200515, "grad_norm": 0.33203125, "learning_rate": 0.0011522775929120974, "loss": 1.6277, "step": 16027 }, { "epoch": 0.43012022327179045, "grad_norm": 0.34765625, "learning_rate": 0.00115226660910562, "loss": 1.6306, "step": 16028 }, { "epoch": 0.43014705882352944, "grad_norm": 0.3203125, "learning_rate": 0.0011522556240876323, "loss": 1.613, "step": 16029 }, { "epoch": 0.4301738943752684, "grad_norm": 0.3359375, "learning_rate": 0.001152244637858158, "loss": 1.8178, "step": 16030 }, { "epoch": 0.4302007299270073, "grad_norm": 0.419921875, "learning_rate": 0.0011522336504172215, "loss": 2.0987, "step": 16031 }, { "epoch": 0.43022756547874624, "grad_norm": 0.380859375, "learning_rate": 0.001152222661764847, "loss": 2.0541, "step": 16032 }, { "epoch": 0.4302544010304852, "grad_norm": 0.359375, "learning_rate": 0.0011522116719010582, "loss": 2.1673, "step": 16033 }, { "epoch": 0.4302812365822241, "grad_norm": 0.341796875, "learning_rate": 0.0011522006808258795, "loss": 1.9804, "step": 16034 }, { "epoch": 0.43030807213396305, "grad_norm": 0.3359375, "learning_rate": 0.0011521896885393352, "loss": 2.0405, "step": 16035 }, { "epoch": 0.43033490768570204, "grad_norm": 0.345703125, "learning_rate": 0.001152178695041449, "loss": 2.0153, "step": 16036 }, { "epoch": 0.430361743237441, "grad_norm": 0.3359375, "learning_rate": 0.0011521677003322453, "loss": 2.0563, "step": 16037 }, { "epoch": 0.4303885787891799, "grad_norm": 0.322265625, "learning_rate": 0.001152156704411748, "loss": 1.9948, "step": 16038 }, { "epoch": 0.43041541434091884, "grad_norm": 0.314453125, "learning_rate": 0.0011521457072799814, "loss": 1.9623, "step": 16039 }, { "epoch": 0.4304422498926578, "grad_norm": 0.3203125, "learning_rate": 0.0011521347089369694, "loss": 1.9643, "step": 16040 }, { "epoch": 0.4304690854443967, "grad_norm": 0.326171875, "learning_rate": 0.0011521237093827365, "loss": 2.0131, "step": 16041 }, { "epoch": 0.4304959209961357, "grad_norm": 0.330078125, "learning_rate": 0.0011521127086173064, "loss": 1.9941, "step": 16042 }, { "epoch": 0.43052275654787464, "grad_norm": 0.322265625, "learning_rate": 0.0011521017066407036, "loss": 2.0085, "step": 16043 }, { "epoch": 0.4305495920996136, "grad_norm": 0.337890625, "learning_rate": 0.0011520907034529523, "loss": 2.0927, "step": 16044 }, { "epoch": 0.4305764276513525, "grad_norm": 0.3359375, "learning_rate": 0.0011520796990540762, "loss": 2.0346, "step": 16045 }, { "epoch": 0.43060326320309145, "grad_norm": 0.33203125, "learning_rate": 0.0011520686934440997, "loss": 2.0378, "step": 16046 }, { "epoch": 0.4306300987548304, "grad_norm": 0.314453125, "learning_rate": 0.0011520576866230468, "loss": 1.88, "step": 16047 }, { "epoch": 0.4306569343065693, "grad_norm": 0.318359375, "learning_rate": 0.0011520466785909418, "loss": 1.9162, "step": 16048 }, { "epoch": 0.4306837698583083, "grad_norm": 0.330078125, "learning_rate": 0.0011520356693478088, "loss": 2.0625, "step": 16049 }, { "epoch": 0.43071060541004724, "grad_norm": 0.326171875, "learning_rate": 0.001152024658893672, "loss": 1.9662, "step": 16050 }, { "epoch": 0.4307374409617862, "grad_norm": 0.326171875, "learning_rate": 0.0011520136472285556, "loss": 2.0227, "step": 16051 }, { "epoch": 0.4307642765135251, "grad_norm": 0.3203125, "learning_rate": 0.0011520026343524836, "loss": 1.8899, "step": 16052 }, { "epoch": 0.43079111206526405, "grad_norm": 0.326171875, "learning_rate": 0.00115199162026548, "loss": 1.9865, "step": 16053 }, { "epoch": 0.430817947617003, "grad_norm": 0.31640625, "learning_rate": 0.0011519806049675695, "loss": 1.9153, "step": 16054 }, { "epoch": 0.430844783168742, "grad_norm": 0.322265625, "learning_rate": 0.0011519695884587757, "loss": 2.0085, "step": 16055 }, { "epoch": 0.4308716187204809, "grad_norm": 0.318359375, "learning_rate": 0.001151958570739123, "loss": 1.9456, "step": 16056 }, { "epoch": 0.43089845427221984, "grad_norm": 0.328125, "learning_rate": 0.0011519475518086358, "loss": 1.9409, "step": 16057 }, { "epoch": 0.4309252898239588, "grad_norm": 0.322265625, "learning_rate": 0.001151936531667338, "loss": 1.8652, "step": 16058 }, { "epoch": 0.4309521253756977, "grad_norm": 0.330078125, "learning_rate": 0.0011519255103152539, "loss": 1.9516, "step": 16059 }, { "epoch": 0.43097896092743665, "grad_norm": 0.31640625, "learning_rate": 0.0011519144877524074, "loss": 1.9256, "step": 16060 }, { "epoch": 0.4310057964791756, "grad_norm": 0.322265625, "learning_rate": 0.001151903463978823, "loss": 1.9309, "step": 16061 }, { "epoch": 0.4310326320309146, "grad_norm": 0.30859375, "learning_rate": 0.0011518924389945248, "loss": 1.8823, "step": 16062 }, { "epoch": 0.4310594675826535, "grad_norm": 0.326171875, "learning_rate": 0.001151881412799537, "loss": 1.971, "step": 16063 }, { "epoch": 0.43108630313439245, "grad_norm": 0.330078125, "learning_rate": 0.0011518703853938836, "loss": 2.0101, "step": 16064 }, { "epoch": 0.4311131386861314, "grad_norm": 0.330078125, "learning_rate": 0.001151859356777589, "loss": 2.067, "step": 16065 }, { "epoch": 0.4311399742378703, "grad_norm": 0.314453125, "learning_rate": 0.0011518483269506775, "loss": 1.8735, "step": 16066 }, { "epoch": 0.43116680978960925, "grad_norm": 0.32421875, "learning_rate": 0.0011518372959131728, "loss": 1.9149, "step": 16067 }, { "epoch": 0.43119364534134824, "grad_norm": 0.330078125, "learning_rate": 0.0011518262636650996, "loss": 1.9853, "step": 16068 }, { "epoch": 0.4312204808930872, "grad_norm": 0.3203125, "learning_rate": 0.001151815230206482, "loss": 1.9631, "step": 16069 }, { "epoch": 0.4312473164448261, "grad_norm": 0.330078125, "learning_rate": 0.001151804195537344, "loss": 1.9509, "step": 16070 }, { "epoch": 0.43127415199656505, "grad_norm": 0.31640625, "learning_rate": 0.00115179315965771, "loss": 1.9402, "step": 16071 }, { "epoch": 0.431300987548304, "grad_norm": 0.322265625, "learning_rate": 0.0011517821225676041, "loss": 1.9377, "step": 16072 }, { "epoch": 0.4313278231000429, "grad_norm": 0.326171875, "learning_rate": 0.0011517710842670506, "loss": 1.9798, "step": 16073 }, { "epoch": 0.4313546586517819, "grad_norm": 0.310546875, "learning_rate": 0.0011517600447560736, "loss": 1.8471, "step": 16074 }, { "epoch": 0.43138149420352084, "grad_norm": 0.3203125, "learning_rate": 0.0011517490040346976, "loss": 1.9702, "step": 16075 }, { "epoch": 0.4314083297552598, "grad_norm": 0.31640625, "learning_rate": 0.0011517379621029465, "loss": 1.9568, "step": 16076 }, { "epoch": 0.4314351653069987, "grad_norm": 0.318359375, "learning_rate": 0.0011517269189608445, "loss": 1.9275, "step": 16077 }, { "epoch": 0.43146200085873765, "grad_norm": 0.322265625, "learning_rate": 0.001151715874608416, "loss": 1.9, "step": 16078 }, { "epoch": 0.4314888364104766, "grad_norm": 0.32421875, "learning_rate": 0.0011517048290456853, "loss": 1.8946, "step": 16079 }, { "epoch": 0.4315156719622155, "grad_norm": 0.31640625, "learning_rate": 0.0011516937822726766, "loss": 1.9478, "step": 16080 }, { "epoch": 0.4315425075139545, "grad_norm": 0.3203125, "learning_rate": 0.0011516827342894138, "loss": 1.9229, "step": 16081 }, { "epoch": 0.43156934306569344, "grad_norm": 0.3203125, "learning_rate": 0.0011516716850959216, "loss": 1.9579, "step": 16082 }, { "epoch": 0.4315961786174324, "grad_norm": 0.3203125, "learning_rate": 0.0011516606346922238, "loss": 1.9597, "step": 16083 }, { "epoch": 0.4316230141691713, "grad_norm": 0.330078125, "learning_rate": 0.0011516495830783452, "loss": 1.9655, "step": 16084 }, { "epoch": 0.43164984972091025, "grad_norm": 0.314453125, "learning_rate": 0.0011516385302543094, "loss": 1.8552, "step": 16085 }, { "epoch": 0.4316766852726492, "grad_norm": 0.32421875, "learning_rate": 0.0011516274762201411, "loss": 1.9179, "step": 16086 }, { "epoch": 0.4317035208243882, "grad_norm": 0.30859375, "learning_rate": 0.0011516164209758645, "loss": 1.8408, "step": 16087 }, { "epoch": 0.4317303563761271, "grad_norm": 0.30859375, "learning_rate": 0.0011516053645215037, "loss": 1.8978, "step": 16088 }, { "epoch": 0.43175719192786605, "grad_norm": 0.318359375, "learning_rate": 0.0011515943068570829, "loss": 1.936, "step": 16089 }, { "epoch": 0.431784027479605, "grad_norm": 0.31640625, "learning_rate": 0.0011515832479826267, "loss": 1.9342, "step": 16090 }, { "epoch": 0.4318108630313439, "grad_norm": 0.310546875, "learning_rate": 0.0011515721878981588, "loss": 1.8533, "step": 16091 }, { "epoch": 0.43183769858308285, "grad_norm": 0.31640625, "learning_rate": 0.001151561126603704, "loss": 1.9035, "step": 16092 }, { "epoch": 0.4318645341348218, "grad_norm": 0.310546875, "learning_rate": 0.0011515500640992863, "loss": 1.7876, "step": 16093 }, { "epoch": 0.4318913696865608, "grad_norm": 0.32421875, "learning_rate": 0.0011515390003849302, "loss": 1.9236, "step": 16094 }, { "epoch": 0.4319182052382997, "grad_norm": 0.30859375, "learning_rate": 0.0011515279354606595, "loss": 1.8765, "step": 16095 }, { "epoch": 0.43194504079003865, "grad_norm": 0.310546875, "learning_rate": 0.001151516869326499, "loss": 1.8645, "step": 16096 }, { "epoch": 0.4319718763417776, "grad_norm": 0.318359375, "learning_rate": 0.0011515058019824726, "loss": 1.8431, "step": 16097 }, { "epoch": 0.4319987118935165, "grad_norm": 0.314453125, "learning_rate": 0.0011514947334286048, "loss": 1.8493, "step": 16098 }, { "epoch": 0.43202554744525545, "grad_norm": 0.318359375, "learning_rate": 0.00115148366366492, "loss": 1.8495, "step": 16099 }, { "epoch": 0.43205238299699444, "grad_norm": 0.31640625, "learning_rate": 0.0011514725926914419, "loss": 1.8712, "step": 16100 }, { "epoch": 0.4320792185487334, "grad_norm": 0.310546875, "learning_rate": 0.0011514615205081955, "loss": 1.8589, "step": 16101 }, { "epoch": 0.4321060541004723, "grad_norm": 0.318359375, "learning_rate": 0.0011514504471152045, "loss": 1.9798, "step": 16102 }, { "epoch": 0.43213288965221125, "grad_norm": 0.3203125, "learning_rate": 0.0011514393725124937, "loss": 1.8708, "step": 16103 }, { "epoch": 0.4321597252039502, "grad_norm": 0.306640625, "learning_rate": 0.001151428296700087, "loss": 1.7964, "step": 16104 }, { "epoch": 0.4321865607556891, "grad_norm": 0.31640625, "learning_rate": 0.001151417219678009, "loss": 1.9104, "step": 16105 }, { "epoch": 0.43221339630742805, "grad_norm": 0.31640625, "learning_rate": 0.0011514061414462837, "loss": 1.799, "step": 16106 }, { "epoch": 0.43224023185916705, "grad_norm": 0.30859375, "learning_rate": 0.0011513950620049356, "loss": 1.8045, "step": 16107 }, { "epoch": 0.432267067410906, "grad_norm": 0.322265625, "learning_rate": 0.001151383981353989, "loss": 1.9523, "step": 16108 }, { "epoch": 0.4322939029626449, "grad_norm": 0.310546875, "learning_rate": 0.0011513728994934679, "loss": 1.8858, "step": 16109 }, { "epoch": 0.43232073851438385, "grad_norm": 0.3046875, "learning_rate": 0.0011513618164233973, "loss": 1.7835, "step": 16110 }, { "epoch": 0.4323475740661228, "grad_norm": 0.3203125, "learning_rate": 0.0011513507321438009, "loss": 1.7711, "step": 16111 }, { "epoch": 0.4323744096178617, "grad_norm": 0.310546875, "learning_rate": 0.001151339646654703, "loss": 1.767, "step": 16112 }, { "epoch": 0.4324012451696007, "grad_norm": 0.326171875, "learning_rate": 0.0011513285599561284, "loss": 1.8938, "step": 16113 }, { "epoch": 0.43242808072133965, "grad_norm": 0.326171875, "learning_rate": 0.0011513174720481011, "loss": 1.961, "step": 16114 }, { "epoch": 0.4324549162730786, "grad_norm": 0.326171875, "learning_rate": 0.0011513063829306455, "loss": 1.8918, "step": 16115 }, { "epoch": 0.4324817518248175, "grad_norm": 0.3125, "learning_rate": 0.0011512952926037857, "loss": 1.8555, "step": 16116 }, { "epoch": 0.43250858737655645, "grad_norm": 0.30859375, "learning_rate": 0.0011512842010675463, "loss": 1.8613, "step": 16117 }, { "epoch": 0.4325354229282954, "grad_norm": 0.31640625, "learning_rate": 0.0011512731083219516, "loss": 1.9272, "step": 16118 }, { "epoch": 0.4325622584800343, "grad_norm": 0.322265625, "learning_rate": 0.001151262014367026, "loss": 1.8704, "step": 16119 }, { "epoch": 0.4325890940317733, "grad_norm": 0.322265625, "learning_rate": 0.0011512509192027937, "loss": 1.9263, "step": 16120 }, { "epoch": 0.43261592958351225, "grad_norm": 0.31640625, "learning_rate": 0.0011512398228292788, "loss": 1.8267, "step": 16121 }, { "epoch": 0.4326427651352512, "grad_norm": 0.32421875, "learning_rate": 0.001151228725246506, "loss": 1.8579, "step": 16122 }, { "epoch": 0.4326696006869901, "grad_norm": 0.3203125, "learning_rate": 0.0011512176264544998, "loss": 1.8587, "step": 16123 }, { "epoch": 0.43269643623872905, "grad_norm": 0.32421875, "learning_rate": 0.001151206526453284, "loss": 1.931, "step": 16124 }, { "epoch": 0.432723271790468, "grad_norm": 0.326171875, "learning_rate": 0.0011511954252428833, "loss": 1.9507, "step": 16125 }, { "epoch": 0.432750107342207, "grad_norm": 0.3203125, "learning_rate": 0.001151184322823322, "loss": 1.8194, "step": 16126 }, { "epoch": 0.4327769428939459, "grad_norm": 0.31640625, "learning_rate": 0.0011511732191946248, "loss": 1.94, "step": 16127 }, { "epoch": 0.43280377844568485, "grad_norm": 0.33203125, "learning_rate": 0.0011511621143568153, "loss": 1.9823, "step": 16128 }, { "epoch": 0.4328306139974238, "grad_norm": 0.31640625, "learning_rate": 0.0011511510083099183, "loss": 1.8209, "step": 16129 }, { "epoch": 0.4328574495491627, "grad_norm": 0.36328125, "learning_rate": 0.0011511399010539583, "loss": 1.7968, "step": 16130 }, { "epoch": 0.43288428510090166, "grad_norm": 0.326171875, "learning_rate": 0.0011511287925889595, "loss": 1.9116, "step": 16131 }, { "epoch": 0.4329111206526406, "grad_norm": 0.328125, "learning_rate": 0.0011511176829149463, "loss": 1.931, "step": 16132 }, { "epoch": 0.4329379562043796, "grad_norm": 0.314453125, "learning_rate": 0.001151106572031943, "loss": 1.8102, "step": 16133 }, { "epoch": 0.4329647917561185, "grad_norm": 0.3125, "learning_rate": 0.0011510954599399741, "loss": 1.8292, "step": 16134 }, { "epoch": 0.43299162730785745, "grad_norm": 0.32421875, "learning_rate": 0.0011510843466390635, "loss": 1.8891, "step": 16135 }, { "epoch": 0.4330184628595964, "grad_norm": 0.3125, "learning_rate": 0.0011510732321292365, "loss": 1.8198, "step": 16136 }, { "epoch": 0.4330452984113353, "grad_norm": 0.330078125, "learning_rate": 0.0011510621164105166, "loss": 1.9276, "step": 16137 }, { "epoch": 0.43307213396307426, "grad_norm": 0.326171875, "learning_rate": 0.0011510509994829287, "loss": 1.8495, "step": 16138 }, { "epoch": 0.43309896951481325, "grad_norm": 0.3359375, "learning_rate": 0.0011510398813464968, "loss": 2.0027, "step": 16139 }, { "epoch": 0.4331258050665522, "grad_norm": 0.32421875, "learning_rate": 0.001151028762001246, "loss": 1.8649, "step": 16140 }, { "epoch": 0.4331526406182911, "grad_norm": 0.3203125, "learning_rate": 0.0011510176414471999, "loss": 1.948, "step": 16141 }, { "epoch": 0.43317947617003005, "grad_norm": 0.3203125, "learning_rate": 0.0011510065196843832, "loss": 1.7747, "step": 16142 }, { "epoch": 0.433206311721769, "grad_norm": 0.3125, "learning_rate": 0.0011509953967128203, "loss": 1.8036, "step": 16143 }, { "epoch": 0.4332331472735079, "grad_norm": 0.328125, "learning_rate": 0.0011509842725325357, "loss": 1.8923, "step": 16144 }, { "epoch": 0.4332599828252469, "grad_norm": 0.310546875, "learning_rate": 0.0011509731471435534, "loss": 1.7954, "step": 16145 }, { "epoch": 0.43328681837698585, "grad_norm": 0.3203125, "learning_rate": 0.0011509620205458985, "loss": 1.7787, "step": 16146 }, { "epoch": 0.4333136539287248, "grad_norm": 0.3203125, "learning_rate": 0.001150950892739595, "loss": 1.8292, "step": 16147 }, { "epoch": 0.4333404894804637, "grad_norm": 0.32421875, "learning_rate": 0.0011509397637246672, "loss": 1.9586, "step": 16148 }, { "epoch": 0.43336732503220265, "grad_norm": 0.333984375, "learning_rate": 0.0011509286335011396, "loss": 1.9266, "step": 16149 }, { "epoch": 0.4333941605839416, "grad_norm": 0.3359375, "learning_rate": 0.0011509175020690367, "loss": 1.9658, "step": 16150 }, { "epoch": 0.4334209961356805, "grad_norm": 0.3203125, "learning_rate": 0.001150906369428383, "loss": 1.8954, "step": 16151 }, { "epoch": 0.4334478316874195, "grad_norm": 0.3125, "learning_rate": 0.0011508952355792027, "loss": 1.793, "step": 16152 }, { "epoch": 0.43347466723915845, "grad_norm": 0.314453125, "learning_rate": 0.0011508841005215204, "loss": 1.7781, "step": 16153 }, { "epoch": 0.4335015027908974, "grad_norm": 0.314453125, "learning_rate": 0.0011508729642553603, "loss": 1.7949, "step": 16154 }, { "epoch": 0.4335283383426363, "grad_norm": 0.3203125, "learning_rate": 0.0011508618267807471, "loss": 1.8576, "step": 16155 }, { "epoch": 0.43355517389437526, "grad_norm": 0.310546875, "learning_rate": 0.001150850688097705, "loss": 1.7679, "step": 16156 }, { "epoch": 0.4335820094461142, "grad_norm": 0.32421875, "learning_rate": 0.0011508395482062587, "loss": 1.8613, "step": 16157 }, { "epoch": 0.4336088449978532, "grad_norm": 0.3203125, "learning_rate": 0.0011508284071064326, "loss": 1.8359, "step": 16158 }, { "epoch": 0.4336356805495921, "grad_norm": 0.328125, "learning_rate": 0.0011508172647982508, "loss": 1.8823, "step": 16159 }, { "epoch": 0.43366251610133105, "grad_norm": 0.330078125, "learning_rate": 0.001150806121281738, "loss": 1.8667, "step": 16160 }, { "epoch": 0.43368935165307, "grad_norm": 0.33984375, "learning_rate": 0.0011507949765569188, "loss": 1.9135, "step": 16161 }, { "epoch": 0.4337161872048089, "grad_norm": 0.3125, "learning_rate": 0.0011507838306238172, "loss": 1.7297, "step": 16162 }, { "epoch": 0.43374302275654786, "grad_norm": 0.32421875, "learning_rate": 0.0011507726834824581, "loss": 1.8346, "step": 16163 }, { "epoch": 0.4337698583082868, "grad_norm": 0.326171875, "learning_rate": 0.0011507615351328657, "loss": 1.8373, "step": 16164 }, { "epoch": 0.4337966938600258, "grad_norm": 0.328125, "learning_rate": 0.0011507503855750646, "loss": 1.8539, "step": 16165 }, { "epoch": 0.4338235294117647, "grad_norm": 0.3203125, "learning_rate": 0.001150739234809079, "loss": 1.7941, "step": 16166 }, { "epoch": 0.43385036496350365, "grad_norm": 0.326171875, "learning_rate": 0.001150728082834934, "loss": 1.7748, "step": 16167 }, { "epoch": 0.4338772005152426, "grad_norm": 0.318359375, "learning_rate": 0.0011507169296526532, "loss": 1.8487, "step": 16168 }, { "epoch": 0.4339040360669815, "grad_norm": 0.3359375, "learning_rate": 0.0011507057752622616, "loss": 1.8093, "step": 16169 }, { "epoch": 0.43393087161872046, "grad_norm": 0.318359375, "learning_rate": 0.0011506946196637837, "loss": 1.7228, "step": 16170 }, { "epoch": 0.43395770717045945, "grad_norm": 0.326171875, "learning_rate": 0.0011506834628572434, "loss": 1.883, "step": 16171 }, { "epoch": 0.4339845427221984, "grad_norm": 0.330078125, "learning_rate": 0.001150672304842666, "loss": 1.8303, "step": 16172 }, { "epoch": 0.4340113782739373, "grad_norm": 0.32421875, "learning_rate": 0.0011506611456200753, "loss": 1.8438, "step": 16173 }, { "epoch": 0.43403821382567626, "grad_norm": 0.3359375, "learning_rate": 0.0011506499851894963, "loss": 1.8001, "step": 16174 }, { "epoch": 0.4340650493774152, "grad_norm": 0.32421875, "learning_rate": 0.001150638823550953, "loss": 1.7582, "step": 16175 }, { "epoch": 0.4340918849291541, "grad_norm": 0.337890625, "learning_rate": 0.0011506276607044703, "loss": 1.9372, "step": 16176 }, { "epoch": 0.43411872048089306, "grad_norm": 0.33203125, "learning_rate": 0.0011506164966500723, "loss": 1.8817, "step": 16177 }, { "epoch": 0.43414555603263205, "grad_norm": 0.32421875, "learning_rate": 0.001150605331387784, "loss": 1.7924, "step": 16178 }, { "epoch": 0.434172391584371, "grad_norm": 0.33203125, "learning_rate": 0.0011505941649176295, "loss": 1.8517, "step": 16179 }, { "epoch": 0.4341992271361099, "grad_norm": 0.318359375, "learning_rate": 0.0011505829972396331, "loss": 1.7282, "step": 16180 }, { "epoch": 0.43422606268784886, "grad_norm": 0.326171875, "learning_rate": 0.00115057182835382, "loss": 1.803, "step": 16181 }, { "epoch": 0.4342528982395878, "grad_norm": 0.3125, "learning_rate": 0.001150560658260214, "loss": 1.7039, "step": 16182 }, { "epoch": 0.4342797337913267, "grad_norm": 0.3125, "learning_rate": 0.00115054948695884, "loss": 1.6954, "step": 16183 }, { "epoch": 0.4343065693430657, "grad_norm": 0.328125, "learning_rate": 0.0011505383144497223, "loss": 1.746, "step": 16184 }, { "epoch": 0.43433340489480465, "grad_norm": 0.3359375, "learning_rate": 0.0011505271407328858, "loss": 1.9127, "step": 16185 }, { "epoch": 0.4343602404465436, "grad_norm": 0.318359375, "learning_rate": 0.0011505159658083545, "loss": 1.7208, "step": 16186 }, { "epoch": 0.4343870759982825, "grad_norm": 0.326171875, "learning_rate": 0.0011505047896761531, "loss": 1.7983, "step": 16187 }, { "epoch": 0.43441391155002146, "grad_norm": 0.31640625, "learning_rate": 0.0011504936123363063, "loss": 1.6851, "step": 16188 }, { "epoch": 0.4344407471017604, "grad_norm": 0.32421875, "learning_rate": 0.0011504824337888384, "loss": 1.8271, "step": 16189 }, { "epoch": 0.43446758265349933, "grad_norm": 0.33984375, "learning_rate": 0.001150471254033774, "loss": 1.8607, "step": 16190 }, { "epoch": 0.4344944182052383, "grad_norm": 0.34375, "learning_rate": 0.0011504600730711375, "loss": 1.9707, "step": 16191 }, { "epoch": 0.43452125375697725, "grad_norm": 0.32421875, "learning_rate": 0.0011504488909009538, "loss": 1.7562, "step": 16192 }, { "epoch": 0.4345480893087162, "grad_norm": 0.314453125, "learning_rate": 0.001150437707523247, "loss": 1.6731, "step": 16193 }, { "epoch": 0.4345749248604551, "grad_norm": 0.33203125, "learning_rate": 0.0011504265229380418, "loss": 1.929, "step": 16194 }, { "epoch": 0.43460176041219406, "grad_norm": 0.33203125, "learning_rate": 0.0011504153371453627, "loss": 1.7751, "step": 16195 }, { "epoch": 0.434628595963933, "grad_norm": 0.330078125, "learning_rate": 0.0011504041501452344, "loss": 1.9147, "step": 16196 }, { "epoch": 0.434655431515672, "grad_norm": 0.328125, "learning_rate": 0.0011503929619376813, "loss": 1.876, "step": 16197 }, { "epoch": 0.4346822670674109, "grad_norm": 0.330078125, "learning_rate": 0.0011503817725227279, "loss": 1.7933, "step": 16198 }, { "epoch": 0.43470910261914986, "grad_norm": 0.322265625, "learning_rate": 0.0011503705819003991, "loss": 1.7764, "step": 16199 }, { "epoch": 0.4347359381708888, "grad_norm": 0.322265625, "learning_rate": 0.0011503593900707189, "loss": 1.7834, "step": 16200 }, { "epoch": 0.4347627737226277, "grad_norm": 0.345703125, "learning_rate": 0.0011503481970337123, "loss": 1.9377, "step": 16201 }, { "epoch": 0.43478960927436666, "grad_norm": 0.328125, "learning_rate": 0.0011503370027894034, "loss": 1.7017, "step": 16202 }, { "epoch": 0.4348164448261056, "grad_norm": 0.322265625, "learning_rate": 0.001150325807337817, "loss": 1.7295, "step": 16203 }, { "epoch": 0.4348432803778446, "grad_norm": 0.318359375, "learning_rate": 0.001150314610678978, "loss": 1.7283, "step": 16204 }, { "epoch": 0.4348701159295835, "grad_norm": 0.326171875, "learning_rate": 0.0011503034128129107, "loss": 1.8379, "step": 16205 }, { "epoch": 0.43489695148132246, "grad_norm": 0.330078125, "learning_rate": 0.001150292213739639, "loss": 1.8833, "step": 16206 }, { "epoch": 0.4349237870330614, "grad_norm": 0.330078125, "learning_rate": 0.0011502810134591886, "loss": 1.7611, "step": 16207 }, { "epoch": 0.43495062258480033, "grad_norm": 0.328125, "learning_rate": 0.0011502698119715835, "loss": 1.7938, "step": 16208 }, { "epoch": 0.43497745813653926, "grad_norm": 0.326171875, "learning_rate": 0.0011502586092768483, "loss": 1.8287, "step": 16209 }, { "epoch": 0.43500429368827825, "grad_norm": 0.333984375, "learning_rate": 0.0011502474053750077, "loss": 1.8697, "step": 16210 }, { "epoch": 0.4350311292400172, "grad_norm": 0.333984375, "learning_rate": 0.0011502362002660858, "loss": 1.7885, "step": 16211 }, { "epoch": 0.4350579647917561, "grad_norm": 0.33203125, "learning_rate": 0.001150224993950108, "loss": 1.8217, "step": 16212 }, { "epoch": 0.43508480034349506, "grad_norm": 0.328125, "learning_rate": 0.0011502137864270981, "loss": 1.7612, "step": 16213 }, { "epoch": 0.435111635895234, "grad_norm": 0.333984375, "learning_rate": 0.0011502025776970813, "loss": 1.8465, "step": 16214 }, { "epoch": 0.43513847144697293, "grad_norm": 0.333984375, "learning_rate": 0.0011501913677600818, "loss": 1.7872, "step": 16215 }, { "epoch": 0.4351653069987119, "grad_norm": 0.33984375, "learning_rate": 0.0011501801566161244, "loss": 1.8151, "step": 16216 }, { "epoch": 0.43519214255045086, "grad_norm": 0.33203125, "learning_rate": 0.0011501689442652333, "loss": 1.7855, "step": 16217 }, { "epoch": 0.4352189781021898, "grad_norm": 0.3359375, "learning_rate": 0.0011501577307074338, "loss": 1.7975, "step": 16218 }, { "epoch": 0.4352458136539287, "grad_norm": 0.34375, "learning_rate": 0.0011501465159427498, "loss": 1.7963, "step": 16219 }, { "epoch": 0.43527264920566766, "grad_norm": 0.33203125, "learning_rate": 0.0011501352999712063, "loss": 1.8356, "step": 16220 }, { "epoch": 0.4352994847574066, "grad_norm": 0.330078125, "learning_rate": 0.0011501240827928277, "loss": 1.8334, "step": 16221 }, { "epoch": 0.43532632030914553, "grad_norm": 0.3359375, "learning_rate": 0.0011501128644076388, "loss": 1.8141, "step": 16222 }, { "epoch": 0.4353531558608845, "grad_norm": 0.310546875, "learning_rate": 0.0011501016448156641, "loss": 1.7207, "step": 16223 }, { "epoch": 0.43537999141262346, "grad_norm": 0.33984375, "learning_rate": 0.0011500904240169284, "loss": 1.7514, "step": 16224 }, { "epoch": 0.4354068269643624, "grad_norm": 0.322265625, "learning_rate": 0.001150079202011456, "loss": 1.8467, "step": 16225 }, { "epoch": 0.4354336625161013, "grad_norm": 0.341796875, "learning_rate": 0.0011500679787992715, "loss": 1.9128, "step": 16226 }, { "epoch": 0.43546049806784026, "grad_norm": 0.326171875, "learning_rate": 0.0011500567543804002, "loss": 1.7886, "step": 16227 }, { "epoch": 0.4354873336195792, "grad_norm": 0.330078125, "learning_rate": 0.0011500455287548658, "loss": 1.7507, "step": 16228 }, { "epoch": 0.4355141691713182, "grad_norm": 0.328125, "learning_rate": 0.0011500343019226933, "loss": 1.7521, "step": 16229 }, { "epoch": 0.4355410047230571, "grad_norm": 0.33984375, "learning_rate": 0.0011500230738839077, "loss": 1.8325, "step": 16230 }, { "epoch": 0.43556784027479606, "grad_norm": 0.318359375, "learning_rate": 0.001150011844638533, "loss": 1.6525, "step": 16231 }, { "epoch": 0.435594675826535, "grad_norm": 0.33984375, "learning_rate": 0.0011500006141865941, "loss": 1.9066, "step": 16232 }, { "epoch": 0.43562151137827393, "grad_norm": 0.345703125, "learning_rate": 0.0011499893825281157, "loss": 1.835, "step": 16233 }, { "epoch": 0.43564834693001286, "grad_norm": 0.33203125, "learning_rate": 0.0011499781496631227, "loss": 1.8433, "step": 16234 }, { "epoch": 0.4356751824817518, "grad_norm": 0.451171875, "learning_rate": 0.001149966915591639, "loss": 2.2576, "step": 16235 }, { "epoch": 0.4357020180334908, "grad_norm": 0.400390625, "learning_rate": 0.0011499556803136902, "loss": 2.1687, "step": 16236 }, { "epoch": 0.4357288535852297, "grad_norm": 0.369140625, "learning_rate": 0.0011499444438292999, "loss": 2.1096, "step": 16237 }, { "epoch": 0.43575568913696866, "grad_norm": 0.34375, "learning_rate": 0.0011499332061384935, "loss": 2.0118, "step": 16238 }, { "epoch": 0.4357825246887076, "grad_norm": 0.345703125, "learning_rate": 0.0011499219672412955, "loss": 2.1382, "step": 16239 }, { "epoch": 0.43580936024044653, "grad_norm": 0.34765625, "learning_rate": 0.0011499107271377305, "loss": 2.096, "step": 16240 }, { "epoch": 0.43583619579218547, "grad_norm": 0.35546875, "learning_rate": 0.001149899485827823, "loss": 2.1218, "step": 16241 }, { "epoch": 0.43586303134392446, "grad_norm": 0.3359375, "learning_rate": 0.001149888243311598, "loss": 2.1311, "step": 16242 }, { "epoch": 0.4358898668956634, "grad_norm": 0.322265625, "learning_rate": 0.0011498769995890799, "loss": 2.0149, "step": 16243 }, { "epoch": 0.4359167024474023, "grad_norm": 0.34375, "learning_rate": 0.0011498657546602936, "loss": 2.0834, "step": 16244 }, { "epoch": 0.43594353799914126, "grad_norm": 0.337890625, "learning_rate": 0.0011498545085252634, "loss": 2.0673, "step": 16245 }, { "epoch": 0.4359703735508802, "grad_norm": 0.330078125, "learning_rate": 0.001149843261184014, "loss": 2.0985, "step": 16246 }, { "epoch": 0.43599720910261913, "grad_norm": 0.3203125, "learning_rate": 0.0011498320126365705, "loss": 2.0296, "step": 16247 }, { "epoch": 0.43602404465435807, "grad_norm": 0.3359375, "learning_rate": 0.0011498207628829575, "loss": 2.1334, "step": 16248 }, { "epoch": 0.43605088020609706, "grad_norm": 0.333984375, "learning_rate": 0.0011498095119231993, "loss": 2.0732, "step": 16249 }, { "epoch": 0.436077715757836, "grad_norm": 0.326171875, "learning_rate": 0.0011497982597573207, "loss": 2.0212, "step": 16250 }, { "epoch": 0.43610455130957493, "grad_norm": 0.3359375, "learning_rate": 0.0011497870063853464, "loss": 2.1596, "step": 16251 }, { "epoch": 0.43613138686131386, "grad_norm": 0.3203125, "learning_rate": 0.0011497757518073015, "loss": 1.9988, "step": 16252 }, { "epoch": 0.4361582224130528, "grad_norm": 0.3125, "learning_rate": 0.0011497644960232103, "loss": 1.9467, "step": 16253 }, { "epoch": 0.43618505796479173, "grad_norm": 0.32421875, "learning_rate": 0.0011497532390330972, "loss": 2.0251, "step": 16254 }, { "epoch": 0.4362118935165307, "grad_norm": 0.33984375, "learning_rate": 0.0011497419808369875, "loss": 2.1361, "step": 16255 }, { "epoch": 0.43623872906826966, "grad_norm": 0.337890625, "learning_rate": 0.0011497307214349056, "loss": 2.1185, "step": 16256 }, { "epoch": 0.4362655646200086, "grad_norm": 0.337890625, "learning_rate": 0.0011497194608268762, "loss": 2.0493, "step": 16257 }, { "epoch": 0.43629240017174753, "grad_norm": 0.322265625, "learning_rate": 0.0011497081990129241, "loss": 2.054, "step": 16258 }, { "epoch": 0.43631923572348646, "grad_norm": 0.32421875, "learning_rate": 0.001149696935993074, "loss": 1.997, "step": 16259 }, { "epoch": 0.4363460712752254, "grad_norm": 0.322265625, "learning_rate": 0.0011496856717673502, "loss": 1.9728, "step": 16260 }, { "epoch": 0.43637290682696434, "grad_norm": 0.326171875, "learning_rate": 0.001149674406335778, "loss": 1.9674, "step": 16261 }, { "epoch": 0.4363997423787033, "grad_norm": 0.33984375, "learning_rate": 0.001149663139698382, "loss": 2.0662, "step": 16262 }, { "epoch": 0.43642657793044226, "grad_norm": 0.328125, "learning_rate": 0.0011496518718551866, "loss": 2.0282, "step": 16263 }, { "epoch": 0.4364534134821812, "grad_norm": 0.333984375, "learning_rate": 0.0011496406028062166, "loss": 2.0663, "step": 16264 }, { "epoch": 0.43648024903392013, "grad_norm": 0.330078125, "learning_rate": 0.001149629332551497, "loss": 1.9647, "step": 16265 }, { "epoch": 0.43650708458565907, "grad_norm": 0.30859375, "learning_rate": 0.0011496180610910524, "loss": 1.8147, "step": 16266 }, { "epoch": 0.436533920137398, "grad_norm": 0.3125, "learning_rate": 0.0011496067884249075, "loss": 1.9159, "step": 16267 }, { "epoch": 0.436560755689137, "grad_norm": 0.333984375, "learning_rate": 0.0011495955145530868, "loss": 1.9642, "step": 16268 }, { "epoch": 0.4365875912408759, "grad_norm": 0.326171875, "learning_rate": 0.0011495842394756153, "loss": 1.9425, "step": 16269 }, { "epoch": 0.43661442679261486, "grad_norm": 0.32421875, "learning_rate": 0.0011495729631925177, "loss": 1.9372, "step": 16270 }, { "epoch": 0.4366412623443538, "grad_norm": 0.337890625, "learning_rate": 0.0011495616857038185, "loss": 1.9586, "step": 16271 }, { "epoch": 0.43666809789609273, "grad_norm": 0.3125, "learning_rate": 0.0011495504070095429, "loss": 1.9012, "step": 16272 }, { "epoch": 0.43669493344783167, "grad_norm": 0.328125, "learning_rate": 0.0011495391271097153, "loss": 2.0191, "step": 16273 }, { "epoch": 0.43672176899957066, "grad_norm": 0.33203125, "learning_rate": 0.0011495278460043605, "loss": 2.07, "step": 16274 }, { "epoch": 0.4367486045513096, "grad_norm": 0.32421875, "learning_rate": 0.0011495165636935035, "loss": 1.9419, "step": 16275 }, { "epoch": 0.43677544010304853, "grad_norm": 0.328125, "learning_rate": 0.0011495052801771685, "loss": 1.9757, "step": 16276 }, { "epoch": 0.43680227565478746, "grad_norm": 0.3203125, "learning_rate": 0.0011494939954553808, "loss": 2.0217, "step": 16277 }, { "epoch": 0.4368291112065264, "grad_norm": 0.326171875, "learning_rate": 0.0011494827095281648, "loss": 2.0274, "step": 16278 }, { "epoch": 0.43685594675826533, "grad_norm": 0.33203125, "learning_rate": 0.0011494714223955456, "loss": 1.9673, "step": 16279 }, { "epoch": 0.43688278231000427, "grad_norm": 0.33203125, "learning_rate": 0.0011494601340575478, "loss": 1.9375, "step": 16280 }, { "epoch": 0.43690961786174326, "grad_norm": 0.318359375, "learning_rate": 0.0011494488445141957, "loss": 1.9404, "step": 16281 }, { "epoch": 0.4369364534134822, "grad_norm": 0.33203125, "learning_rate": 0.0011494375537655147, "loss": 2.0219, "step": 16282 }, { "epoch": 0.43696328896522113, "grad_norm": 0.326171875, "learning_rate": 0.0011494262618115295, "loss": 2.0748, "step": 16283 }, { "epoch": 0.43699012451696007, "grad_norm": 0.31640625, "learning_rate": 0.0011494149686522648, "loss": 1.8955, "step": 16284 }, { "epoch": 0.437016960068699, "grad_norm": 0.3203125, "learning_rate": 0.001149403674287745, "loss": 1.9455, "step": 16285 }, { "epoch": 0.43704379562043794, "grad_norm": 0.31640625, "learning_rate": 0.0011493923787179953, "loss": 1.9558, "step": 16286 }, { "epoch": 0.4370706311721769, "grad_norm": 0.326171875, "learning_rate": 0.0011493810819430403, "loss": 1.9892, "step": 16287 }, { "epoch": 0.43709746672391586, "grad_norm": 0.33203125, "learning_rate": 0.001149369783962905, "loss": 2.0372, "step": 16288 }, { "epoch": 0.4371243022756548, "grad_norm": 0.333984375, "learning_rate": 0.001149358484777614, "loss": 1.9924, "step": 16289 }, { "epoch": 0.43715113782739373, "grad_norm": 0.322265625, "learning_rate": 0.001149347184387192, "loss": 1.9256, "step": 16290 }, { "epoch": 0.43717797337913267, "grad_norm": 0.322265625, "learning_rate": 0.0011493358827916638, "loss": 1.9597, "step": 16291 }, { "epoch": 0.4372048089308716, "grad_norm": 0.30859375, "learning_rate": 0.0011493245799910543, "loss": 1.902, "step": 16292 }, { "epoch": 0.43723164448261054, "grad_norm": 0.3203125, "learning_rate": 0.0011493132759853883, "loss": 1.9653, "step": 16293 }, { "epoch": 0.43725848003434953, "grad_norm": 0.326171875, "learning_rate": 0.001149301970774691, "loss": 1.8487, "step": 16294 }, { "epoch": 0.43728531558608846, "grad_norm": 0.32421875, "learning_rate": 0.001149290664358986, "loss": 2.0113, "step": 16295 }, { "epoch": 0.4373121511378274, "grad_norm": 0.32421875, "learning_rate": 0.0011492793567382997, "loss": 2.01, "step": 16296 }, { "epoch": 0.43733898668956633, "grad_norm": 0.318359375, "learning_rate": 0.0011492680479126556, "loss": 1.9276, "step": 16297 }, { "epoch": 0.43736582224130527, "grad_norm": 0.3203125, "learning_rate": 0.0011492567378820792, "loss": 1.9857, "step": 16298 }, { "epoch": 0.4373926577930442, "grad_norm": 0.326171875, "learning_rate": 0.0011492454266465951, "loss": 2.0044, "step": 16299 }, { "epoch": 0.4374194933447832, "grad_norm": 0.3046875, "learning_rate": 0.001149234114206228, "loss": 1.8909, "step": 16300 }, { "epoch": 0.43744632889652213, "grad_norm": 0.322265625, "learning_rate": 0.0011492228005610028, "loss": 1.8824, "step": 16301 }, { "epoch": 0.43747316444826106, "grad_norm": 0.318359375, "learning_rate": 0.0011492114857109446, "loss": 1.9946, "step": 16302 }, { "epoch": 0.4375, "grad_norm": 0.322265625, "learning_rate": 0.001149200169656078, "loss": 1.8909, "step": 16303 }, { "epoch": 0.43752683555173894, "grad_norm": 0.326171875, "learning_rate": 0.0011491888523964273, "loss": 2.0319, "step": 16304 }, { "epoch": 0.43755367110347787, "grad_norm": 0.322265625, "learning_rate": 0.0011491775339320183, "loss": 1.9086, "step": 16305 }, { "epoch": 0.4375805066552168, "grad_norm": 0.333984375, "learning_rate": 0.0011491662142628753, "loss": 2.0092, "step": 16306 }, { "epoch": 0.4376073422069558, "grad_norm": 0.314453125, "learning_rate": 0.0011491548933890232, "loss": 1.8843, "step": 16307 }, { "epoch": 0.43763417775869473, "grad_norm": 0.31640625, "learning_rate": 0.0011491435713104868, "loss": 1.8732, "step": 16308 }, { "epoch": 0.43766101331043367, "grad_norm": 0.314453125, "learning_rate": 0.0011491322480272907, "loss": 1.909, "step": 16309 }, { "epoch": 0.4376878488621726, "grad_norm": 0.3203125, "learning_rate": 0.0011491209235394602, "loss": 1.8303, "step": 16310 }, { "epoch": 0.43771468441391154, "grad_norm": 0.302734375, "learning_rate": 0.00114910959784702, "loss": 1.8044, "step": 16311 }, { "epoch": 0.43774151996565047, "grad_norm": 0.3203125, "learning_rate": 0.001149098270949995, "loss": 1.8667, "step": 16312 }, { "epoch": 0.43776835551738946, "grad_norm": 0.33203125, "learning_rate": 0.00114908694284841, "loss": 1.8853, "step": 16313 }, { "epoch": 0.4377951910691284, "grad_norm": 0.3359375, "learning_rate": 0.0011490756135422895, "loss": 2.0445, "step": 16314 }, { "epoch": 0.43782202662086733, "grad_norm": 0.3203125, "learning_rate": 0.0011490642830316586, "loss": 1.9539, "step": 16315 }, { "epoch": 0.43784886217260627, "grad_norm": 0.318359375, "learning_rate": 0.0011490529513165425, "loss": 1.939, "step": 16316 }, { "epoch": 0.4378756977243452, "grad_norm": 0.314453125, "learning_rate": 0.0011490416183969655, "loss": 1.8702, "step": 16317 }, { "epoch": 0.43790253327608414, "grad_norm": 0.30859375, "learning_rate": 0.001149030284272953, "loss": 1.8837, "step": 16318 }, { "epoch": 0.4379293688278231, "grad_norm": 0.318359375, "learning_rate": 0.0011490189489445294, "loss": 1.9006, "step": 16319 }, { "epoch": 0.43795620437956206, "grad_norm": 0.32421875, "learning_rate": 0.0011490076124117197, "loss": 1.9251, "step": 16320 }, { "epoch": 0.437983039931301, "grad_norm": 0.328125, "learning_rate": 0.001148996274674549, "loss": 1.9063, "step": 16321 }, { "epoch": 0.43800987548303993, "grad_norm": 0.3203125, "learning_rate": 0.0011489849357330419, "loss": 1.893, "step": 16322 }, { "epoch": 0.43803671103477887, "grad_norm": 0.326171875, "learning_rate": 0.0011489735955872232, "loss": 1.9221, "step": 16323 }, { "epoch": 0.4380635465865178, "grad_norm": 0.322265625, "learning_rate": 0.001148962254237118, "loss": 1.8803, "step": 16324 }, { "epoch": 0.43809038213825674, "grad_norm": 0.326171875, "learning_rate": 0.0011489509116827514, "loss": 1.9409, "step": 16325 }, { "epoch": 0.43811721768999573, "grad_norm": 0.314453125, "learning_rate": 0.0011489395679241477, "loss": 1.8539, "step": 16326 }, { "epoch": 0.43814405324173467, "grad_norm": 0.32421875, "learning_rate": 0.0011489282229613321, "loss": 1.917, "step": 16327 }, { "epoch": 0.4381708887934736, "grad_norm": 0.31640625, "learning_rate": 0.0011489168767943297, "loss": 1.8797, "step": 16328 }, { "epoch": 0.43819772434521254, "grad_norm": 0.328125, "learning_rate": 0.001148905529423165, "loss": 1.9178, "step": 16329 }, { "epoch": 0.43822455989695147, "grad_norm": 0.31640625, "learning_rate": 0.001148894180847863, "loss": 1.8404, "step": 16330 }, { "epoch": 0.4382513954486904, "grad_norm": 0.32421875, "learning_rate": 0.0011488828310684488, "loss": 1.9292, "step": 16331 }, { "epoch": 0.43827823100042934, "grad_norm": 0.326171875, "learning_rate": 0.0011488714800849472, "loss": 1.9174, "step": 16332 }, { "epoch": 0.43830506655216833, "grad_norm": 0.3359375, "learning_rate": 0.0011488601278973828, "loss": 1.9175, "step": 16333 }, { "epoch": 0.43833190210390727, "grad_norm": 0.32421875, "learning_rate": 0.001148848774505781, "loss": 1.9229, "step": 16334 }, { "epoch": 0.4383587376556462, "grad_norm": 0.322265625, "learning_rate": 0.0011488374199101665, "loss": 1.9974, "step": 16335 }, { "epoch": 0.43838557320738514, "grad_norm": 0.3203125, "learning_rate": 0.001148826064110564, "loss": 1.832, "step": 16336 }, { "epoch": 0.4384124087591241, "grad_norm": 0.318359375, "learning_rate": 0.0011488147071069986, "loss": 1.8949, "step": 16337 }, { "epoch": 0.438439244310863, "grad_norm": 0.322265625, "learning_rate": 0.0011488033488994953, "loss": 1.8732, "step": 16338 }, { "epoch": 0.438466079862602, "grad_norm": 0.322265625, "learning_rate": 0.0011487919894880786, "loss": 1.9107, "step": 16339 }, { "epoch": 0.43849291541434093, "grad_norm": 0.322265625, "learning_rate": 0.001148780628872774, "loss": 1.8515, "step": 16340 }, { "epoch": 0.43851975096607987, "grad_norm": 0.326171875, "learning_rate": 0.0011487692670536062, "loss": 1.943, "step": 16341 }, { "epoch": 0.4385465865178188, "grad_norm": 0.3203125, "learning_rate": 0.0011487579040306, "loss": 1.8749, "step": 16342 }, { "epoch": 0.43857342206955774, "grad_norm": 0.326171875, "learning_rate": 0.0011487465398037804, "loss": 1.9133, "step": 16343 }, { "epoch": 0.4386002576212967, "grad_norm": 0.32421875, "learning_rate": 0.0011487351743731723, "loss": 1.9193, "step": 16344 }, { "epoch": 0.43862709317303566, "grad_norm": 0.345703125, "learning_rate": 0.0011487238077388007, "loss": 1.9894, "step": 16345 }, { "epoch": 0.4386539287247746, "grad_norm": 0.34765625, "learning_rate": 0.0011487124399006906, "loss": 1.9685, "step": 16346 }, { "epoch": 0.43868076427651354, "grad_norm": 0.3203125, "learning_rate": 0.0011487010708588668, "loss": 1.844, "step": 16347 }, { "epoch": 0.43870759982825247, "grad_norm": 0.333984375, "learning_rate": 0.001148689700613354, "loss": 2.0171, "step": 16348 }, { "epoch": 0.4387344353799914, "grad_norm": 0.32421875, "learning_rate": 0.0011486783291641777, "loss": 2.0016, "step": 16349 }, { "epoch": 0.43876127093173034, "grad_norm": 0.3359375, "learning_rate": 0.0011486669565113627, "loss": 2.0206, "step": 16350 }, { "epoch": 0.4387881064834693, "grad_norm": 0.326171875, "learning_rate": 0.0011486555826549336, "loss": 1.9324, "step": 16351 }, { "epoch": 0.43881494203520827, "grad_norm": 0.326171875, "learning_rate": 0.0011486442075949157, "loss": 1.9689, "step": 16352 }, { "epoch": 0.4388417775869472, "grad_norm": 0.322265625, "learning_rate": 0.0011486328313313335, "loss": 1.919, "step": 16353 }, { "epoch": 0.43886861313868614, "grad_norm": 0.322265625, "learning_rate": 0.0011486214538642125, "loss": 1.8957, "step": 16354 }, { "epoch": 0.43889544869042507, "grad_norm": 0.322265625, "learning_rate": 0.0011486100751935775, "loss": 1.9051, "step": 16355 }, { "epoch": 0.438922284242164, "grad_norm": 0.328125, "learning_rate": 0.0011485986953194535, "loss": 1.8673, "step": 16356 }, { "epoch": 0.43894911979390294, "grad_norm": 0.326171875, "learning_rate": 0.0011485873142418651, "loss": 1.8648, "step": 16357 }, { "epoch": 0.43897595534564193, "grad_norm": 0.3203125, "learning_rate": 0.0011485759319608377, "loss": 1.8786, "step": 16358 }, { "epoch": 0.43900279089738087, "grad_norm": 0.322265625, "learning_rate": 0.001148564548476396, "loss": 1.8201, "step": 16359 }, { "epoch": 0.4390296264491198, "grad_norm": 0.306640625, "learning_rate": 0.0011485531637885652, "loss": 1.7306, "step": 16360 }, { "epoch": 0.43905646200085874, "grad_norm": 0.328125, "learning_rate": 0.0011485417778973699, "loss": 1.8896, "step": 16361 }, { "epoch": 0.4390832975525977, "grad_norm": 0.337890625, "learning_rate": 0.0011485303908028354, "loss": 1.8539, "step": 16362 }, { "epoch": 0.4391101331043366, "grad_norm": 0.31640625, "learning_rate": 0.0011485190025049869, "loss": 1.8233, "step": 16363 }, { "epoch": 0.43913696865607554, "grad_norm": 0.3203125, "learning_rate": 0.0011485076130038489, "loss": 1.7969, "step": 16364 }, { "epoch": 0.43916380420781453, "grad_norm": 0.322265625, "learning_rate": 0.0011484962222994464, "loss": 1.7878, "step": 16365 }, { "epoch": 0.43919063975955347, "grad_norm": 0.326171875, "learning_rate": 0.0011484848303918048, "loss": 1.9056, "step": 16366 }, { "epoch": 0.4392174753112924, "grad_norm": 0.31640625, "learning_rate": 0.0011484734372809488, "loss": 1.8791, "step": 16367 }, { "epoch": 0.43924431086303134, "grad_norm": 0.32421875, "learning_rate": 0.0011484620429669033, "loss": 1.8976, "step": 16368 }, { "epoch": 0.4392711464147703, "grad_norm": 0.3203125, "learning_rate": 0.0011484506474496935, "loss": 1.8221, "step": 16369 }, { "epoch": 0.4392979819665092, "grad_norm": 0.333984375, "learning_rate": 0.0011484392507293444, "loss": 1.8678, "step": 16370 }, { "epoch": 0.4393248175182482, "grad_norm": 0.328125, "learning_rate": 0.0011484278528058809, "loss": 1.8574, "step": 16371 }, { "epoch": 0.43935165306998714, "grad_norm": 0.326171875, "learning_rate": 0.001148416453679328, "loss": 1.8621, "step": 16372 }, { "epoch": 0.43937848862172607, "grad_norm": 0.330078125, "learning_rate": 0.0011484050533497106, "loss": 1.9107, "step": 16373 }, { "epoch": 0.439405324173465, "grad_norm": 0.32421875, "learning_rate": 0.0011483936518170543, "loss": 1.8385, "step": 16374 }, { "epoch": 0.43943215972520394, "grad_norm": 0.318359375, "learning_rate": 0.0011483822490813834, "loss": 1.8109, "step": 16375 }, { "epoch": 0.4394589952769429, "grad_norm": 0.333984375, "learning_rate": 0.001148370845142723, "loss": 1.9438, "step": 16376 }, { "epoch": 0.4394858308286818, "grad_norm": 0.322265625, "learning_rate": 0.0011483594400010985, "loss": 1.7847, "step": 16377 }, { "epoch": 0.4395126663804208, "grad_norm": 0.31640625, "learning_rate": 0.0011483480336565348, "loss": 1.8073, "step": 16378 }, { "epoch": 0.43953950193215974, "grad_norm": 0.322265625, "learning_rate": 0.0011483366261090568, "loss": 1.8048, "step": 16379 }, { "epoch": 0.4395663374838987, "grad_norm": 0.318359375, "learning_rate": 0.0011483252173586894, "loss": 1.9224, "step": 16380 }, { "epoch": 0.4395931730356376, "grad_norm": 0.330078125, "learning_rate": 0.001148313807405458, "loss": 1.9663, "step": 16381 }, { "epoch": 0.43962000858737654, "grad_norm": 0.32421875, "learning_rate": 0.0011483023962493874, "loss": 1.8906, "step": 16382 }, { "epoch": 0.4396468441391155, "grad_norm": 0.337890625, "learning_rate": 0.0011482909838905024, "loss": 1.9248, "step": 16383 }, { "epoch": 0.43967367969085447, "grad_norm": 0.314453125, "learning_rate": 0.0011482795703288285, "loss": 1.8175, "step": 16384 }, { "epoch": 0.4397005152425934, "grad_norm": 0.32421875, "learning_rate": 0.0011482681555643906, "loss": 1.8645, "step": 16385 }, { "epoch": 0.43972735079433234, "grad_norm": 0.330078125, "learning_rate": 0.0011482567395972134, "loss": 1.889, "step": 16386 }, { "epoch": 0.4397541863460713, "grad_norm": 0.326171875, "learning_rate": 0.0011482453224273227, "loss": 1.8125, "step": 16387 }, { "epoch": 0.4397810218978102, "grad_norm": 0.326171875, "learning_rate": 0.0011482339040547425, "loss": 1.8764, "step": 16388 }, { "epoch": 0.43980785744954914, "grad_norm": 0.328125, "learning_rate": 0.0011482224844794988, "loss": 1.8917, "step": 16389 }, { "epoch": 0.4398346930012881, "grad_norm": 0.333984375, "learning_rate": 0.0011482110637016161, "loss": 1.9538, "step": 16390 }, { "epoch": 0.43986152855302707, "grad_norm": 0.322265625, "learning_rate": 0.0011481996417211195, "loss": 1.8094, "step": 16391 }, { "epoch": 0.439888364104766, "grad_norm": 0.3203125, "learning_rate": 0.0011481882185380343, "loss": 1.7951, "step": 16392 }, { "epoch": 0.43991519965650494, "grad_norm": 0.345703125, "learning_rate": 0.0011481767941523854, "loss": 2.0069, "step": 16393 }, { "epoch": 0.4399420352082439, "grad_norm": 0.3359375, "learning_rate": 0.001148165368564198, "loss": 1.8496, "step": 16394 }, { "epoch": 0.4399688707599828, "grad_norm": 0.330078125, "learning_rate": 0.001148153941773497, "loss": 1.8298, "step": 16395 }, { "epoch": 0.43999570631172175, "grad_norm": 0.337890625, "learning_rate": 0.0011481425137803076, "loss": 1.8916, "step": 16396 }, { "epoch": 0.44002254186346074, "grad_norm": 0.341796875, "learning_rate": 0.0011481310845846545, "loss": 1.8757, "step": 16397 }, { "epoch": 0.44004937741519967, "grad_norm": 0.328125, "learning_rate": 0.0011481196541865632, "loss": 1.8237, "step": 16398 }, { "epoch": 0.4400762129669386, "grad_norm": 0.328125, "learning_rate": 0.0011481082225860586, "loss": 1.7349, "step": 16399 }, { "epoch": 0.44010304851867754, "grad_norm": 0.326171875, "learning_rate": 0.0011480967897831659, "loss": 1.8369, "step": 16400 }, { "epoch": 0.4401298840704165, "grad_norm": 0.326171875, "learning_rate": 0.0011480853557779102, "loss": 1.8023, "step": 16401 }, { "epoch": 0.4401567196221554, "grad_norm": 0.337890625, "learning_rate": 0.0011480739205703163, "loss": 1.8764, "step": 16402 }, { "epoch": 0.4401835551738944, "grad_norm": 0.32421875, "learning_rate": 0.0011480624841604095, "loss": 1.8724, "step": 16403 }, { "epoch": 0.44021039072563334, "grad_norm": 0.337890625, "learning_rate": 0.001148051046548215, "loss": 1.9399, "step": 16404 }, { "epoch": 0.4402372262773723, "grad_norm": 0.328125, "learning_rate": 0.0011480396077337576, "loss": 1.8442, "step": 16405 }, { "epoch": 0.4402640618291112, "grad_norm": 0.3203125, "learning_rate": 0.0011480281677170626, "loss": 1.8021, "step": 16406 }, { "epoch": 0.44029089738085014, "grad_norm": 0.345703125, "learning_rate": 0.001148016726498155, "loss": 2.0044, "step": 16407 }, { "epoch": 0.4403177329325891, "grad_norm": 0.341796875, "learning_rate": 0.00114800528407706, "loss": 1.9131, "step": 16408 }, { "epoch": 0.440344568484328, "grad_norm": 0.333984375, "learning_rate": 0.0011479938404538024, "loss": 1.8004, "step": 16409 }, { "epoch": 0.440371404036067, "grad_norm": 0.32421875, "learning_rate": 0.0011479823956284077, "loss": 1.8023, "step": 16410 }, { "epoch": 0.44039823958780594, "grad_norm": 0.34375, "learning_rate": 0.0011479709496009008, "loss": 1.9257, "step": 16411 }, { "epoch": 0.4404250751395449, "grad_norm": 0.330078125, "learning_rate": 0.001147959502371307, "loss": 1.8664, "step": 16412 }, { "epoch": 0.4404519106912838, "grad_norm": 0.33984375, "learning_rate": 0.0011479480539396511, "loss": 1.8734, "step": 16413 }, { "epoch": 0.44047874624302275, "grad_norm": 0.328125, "learning_rate": 0.0011479366043059586, "loss": 1.811, "step": 16414 }, { "epoch": 0.4405055817947617, "grad_norm": 0.330078125, "learning_rate": 0.001147925153470254, "loss": 1.8561, "step": 16415 }, { "epoch": 0.44053241734650067, "grad_norm": 0.337890625, "learning_rate": 0.001147913701432563, "loss": 1.8409, "step": 16416 }, { "epoch": 0.4405592528982396, "grad_norm": 0.322265625, "learning_rate": 0.0011479022481929106, "loss": 1.7145, "step": 16417 }, { "epoch": 0.44058608844997854, "grad_norm": 0.322265625, "learning_rate": 0.0011478907937513218, "loss": 1.7627, "step": 16418 }, { "epoch": 0.4406129240017175, "grad_norm": 0.32421875, "learning_rate": 0.001147879338107822, "loss": 1.7892, "step": 16419 }, { "epoch": 0.4406397595534564, "grad_norm": 0.33203125, "learning_rate": 0.001147867881262436, "loss": 1.7765, "step": 16420 }, { "epoch": 0.44066659510519535, "grad_norm": 0.33203125, "learning_rate": 0.0011478564232151889, "loss": 1.7813, "step": 16421 }, { "epoch": 0.4406934306569343, "grad_norm": 0.333984375, "learning_rate": 0.0011478449639661062, "loss": 1.851, "step": 16422 }, { "epoch": 0.4407202662086733, "grad_norm": 0.322265625, "learning_rate": 0.0011478335035152126, "loss": 1.7649, "step": 16423 }, { "epoch": 0.4407471017604122, "grad_norm": 0.337890625, "learning_rate": 0.0011478220418625336, "loss": 1.7928, "step": 16424 }, { "epoch": 0.44077393731215114, "grad_norm": 0.32421875, "learning_rate": 0.0011478105790080943, "loss": 1.7162, "step": 16425 }, { "epoch": 0.4408007728638901, "grad_norm": 0.326171875, "learning_rate": 0.0011477991149519196, "loss": 1.6924, "step": 16426 }, { "epoch": 0.440827608415629, "grad_norm": 0.3515625, "learning_rate": 0.0011477876496940348, "loss": 1.8365, "step": 16427 }, { "epoch": 0.44085444396736795, "grad_norm": 0.34765625, "learning_rate": 0.0011477761832344652, "loss": 1.8226, "step": 16428 }, { "epoch": 0.44088127951910694, "grad_norm": 0.416015625, "learning_rate": 0.0011477647155732358, "loss": 2.1634, "step": 16429 }, { "epoch": 0.4409081150708459, "grad_norm": 0.38671875, "learning_rate": 0.0011477532467103718, "loss": 2.0587, "step": 16430 }, { "epoch": 0.4409349506225848, "grad_norm": 0.37109375, "learning_rate": 0.001147741776645898, "loss": 2.105, "step": 16431 }, { "epoch": 0.44096178617432374, "grad_norm": 0.34765625, "learning_rate": 0.0011477303053798401, "loss": 2.0887, "step": 16432 }, { "epoch": 0.4409886217260627, "grad_norm": 0.3515625, "learning_rate": 0.0011477188329122232, "loss": 2.1914, "step": 16433 }, { "epoch": 0.4410154572778016, "grad_norm": 0.3515625, "learning_rate": 0.0011477073592430721, "loss": 2.1226, "step": 16434 }, { "epoch": 0.44104229282954055, "grad_norm": 0.337890625, "learning_rate": 0.0011476958843724123, "loss": 2.1176, "step": 16435 }, { "epoch": 0.44106912838127954, "grad_norm": 0.33203125, "learning_rate": 0.001147684408300269, "loss": 2.0624, "step": 16436 }, { "epoch": 0.4410959639330185, "grad_norm": 0.326171875, "learning_rate": 0.001147672931026667, "loss": 2.1137, "step": 16437 }, { "epoch": 0.4411227994847574, "grad_norm": 0.32421875, "learning_rate": 0.001147661452551632, "loss": 1.9971, "step": 16438 }, { "epoch": 0.44114963503649635, "grad_norm": 0.328125, "learning_rate": 0.0011476499728751888, "loss": 2.1243, "step": 16439 }, { "epoch": 0.4411764705882353, "grad_norm": 0.322265625, "learning_rate": 0.0011476384919973624, "loss": 2.0163, "step": 16440 }, { "epoch": 0.4412033061399742, "grad_norm": 0.33203125, "learning_rate": 0.0011476270099181785, "loss": 2.1251, "step": 16441 }, { "epoch": 0.4412301416917132, "grad_norm": 0.32421875, "learning_rate": 0.001147615526637662, "loss": 2.0163, "step": 16442 }, { "epoch": 0.44125697724345214, "grad_norm": 0.3203125, "learning_rate": 0.001147604042155838, "loss": 2.0266, "step": 16443 }, { "epoch": 0.4412838127951911, "grad_norm": 0.33984375, "learning_rate": 0.0011475925564727323, "loss": 2.1034, "step": 16444 }, { "epoch": 0.44131064834693, "grad_norm": 0.328125, "learning_rate": 0.001147581069588369, "loss": 2.0201, "step": 16445 }, { "epoch": 0.44133748389866895, "grad_norm": 0.330078125, "learning_rate": 0.0011475695815027745, "loss": 2.0416, "step": 16446 }, { "epoch": 0.4413643194504079, "grad_norm": 0.318359375, "learning_rate": 0.001147558092215973, "loss": 1.9664, "step": 16447 }, { "epoch": 0.4413911550021468, "grad_norm": 0.34375, "learning_rate": 0.0011475466017279903, "loss": 2.0624, "step": 16448 }, { "epoch": 0.4414179905538858, "grad_norm": 0.322265625, "learning_rate": 0.0011475351100388516, "loss": 2.0532, "step": 16449 }, { "epoch": 0.44144482610562474, "grad_norm": 0.318359375, "learning_rate": 0.0011475236171485817, "loss": 2.0004, "step": 16450 }, { "epoch": 0.4414716616573637, "grad_norm": 0.333984375, "learning_rate": 0.0011475121230572064, "loss": 2.0436, "step": 16451 }, { "epoch": 0.4414984972091026, "grad_norm": 0.318359375, "learning_rate": 0.0011475006277647504, "loss": 2.016, "step": 16452 }, { "epoch": 0.44152533276084155, "grad_norm": 0.328125, "learning_rate": 0.0011474891312712391, "loss": 2.0376, "step": 16453 }, { "epoch": 0.4415521683125805, "grad_norm": 0.328125, "learning_rate": 0.0011474776335766977, "loss": 2.068, "step": 16454 }, { "epoch": 0.4415790038643195, "grad_norm": 0.328125, "learning_rate": 0.0011474661346811514, "loss": 1.9834, "step": 16455 }, { "epoch": 0.4416058394160584, "grad_norm": 0.33203125, "learning_rate": 0.0011474546345846255, "loss": 2.0296, "step": 16456 }, { "epoch": 0.44163267496779735, "grad_norm": 0.322265625, "learning_rate": 0.0011474431332871454, "loss": 1.9455, "step": 16457 }, { "epoch": 0.4416595105195363, "grad_norm": 0.32421875, "learning_rate": 0.0011474316307887359, "loss": 2.0054, "step": 16458 }, { "epoch": 0.4416863460712752, "grad_norm": 0.314453125, "learning_rate": 0.0011474201270894224, "loss": 1.9777, "step": 16459 }, { "epoch": 0.44171318162301415, "grad_norm": 0.31640625, "learning_rate": 0.0011474086221892304, "loss": 1.9187, "step": 16460 }, { "epoch": 0.4417400171747531, "grad_norm": 0.31640625, "learning_rate": 0.0011473971160881847, "loss": 1.9995, "step": 16461 }, { "epoch": 0.4417668527264921, "grad_norm": 0.298828125, "learning_rate": 0.001147385608786311, "loss": 1.7945, "step": 16462 }, { "epoch": 0.441793688278231, "grad_norm": 0.31640625, "learning_rate": 0.001147374100283634, "loss": 2.0202, "step": 16463 }, { "epoch": 0.44182052382996995, "grad_norm": 0.326171875, "learning_rate": 0.0011473625905801797, "loss": 2.0394, "step": 16464 }, { "epoch": 0.4418473593817089, "grad_norm": 0.3203125, "learning_rate": 0.0011473510796759726, "loss": 1.9529, "step": 16465 }, { "epoch": 0.4418741949334478, "grad_norm": 0.328125, "learning_rate": 0.0011473395675710386, "loss": 1.97, "step": 16466 }, { "epoch": 0.44190103048518675, "grad_norm": 0.3203125, "learning_rate": 0.0011473280542654023, "loss": 1.952, "step": 16467 }, { "epoch": 0.44192786603692574, "grad_norm": 0.314453125, "learning_rate": 0.0011473165397590892, "loss": 1.8812, "step": 16468 }, { "epoch": 0.4419547015886647, "grad_norm": 0.318359375, "learning_rate": 0.001147305024052125, "loss": 1.9419, "step": 16469 }, { "epoch": 0.4419815371404036, "grad_norm": 0.326171875, "learning_rate": 0.0011472935071445342, "loss": 2.0938, "step": 16470 }, { "epoch": 0.44200837269214255, "grad_norm": 0.33203125, "learning_rate": 0.0011472819890363426, "loss": 2.0882, "step": 16471 }, { "epoch": 0.4420352082438815, "grad_norm": 0.31640625, "learning_rate": 0.0011472704697275756, "loss": 1.8993, "step": 16472 }, { "epoch": 0.4420620437956204, "grad_norm": 0.333984375, "learning_rate": 0.0011472589492182578, "loss": 2.0851, "step": 16473 }, { "epoch": 0.4420888793473594, "grad_norm": 0.32421875, "learning_rate": 0.001147247427508415, "loss": 2.0939, "step": 16474 }, { "epoch": 0.44211571489909834, "grad_norm": 0.318359375, "learning_rate": 0.0011472359045980726, "loss": 2.0468, "step": 16475 }, { "epoch": 0.4421425504508373, "grad_norm": 0.3125, "learning_rate": 0.0011472243804872553, "loss": 1.8799, "step": 16476 }, { "epoch": 0.4421693860025762, "grad_norm": 0.32421875, "learning_rate": 0.001147212855175989, "loss": 2.0039, "step": 16477 }, { "epoch": 0.44219622155431515, "grad_norm": 0.306640625, "learning_rate": 0.0011472013286642985, "loss": 1.8309, "step": 16478 }, { "epoch": 0.4422230571060541, "grad_norm": 0.31640625, "learning_rate": 0.0011471898009522094, "loss": 1.9594, "step": 16479 }, { "epoch": 0.442249892657793, "grad_norm": 0.3203125, "learning_rate": 0.0011471782720397466, "loss": 1.9406, "step": 16480 }, { "epoch": 0.442276728209532, "grad_norm": 0.330078125, "learning_rate": 0.0011471667419269357, "loss": 1.9714, "step": 16481 }, { "epoch": 0.44230356376127095, "grad_norm": 0.333984375, "learning_rate": 0.0011471552106138022, "loss": 2.0655, "step": 16482 }, { "epoch": 0.4423303993130099, "grad_norm": 0.3203125, "learning_rate": 0.001147143678100371, "loss": 1.8234, "step": 16483 }, { "epoch": 0.4423572348647488, "grad_norm": 0.322265625, "learning_rate": 0.0011471321443866673, "loss": 1.881, "step": 16484 }, { "epoch": 0.44238407041648775, "grad_norm": 0.3203125, "learning_rate": 0.001147120609472717, "loss": 1.9276, "step": 16485 }, { "epoch": 0.4424109059682267, "grad_norm": 0.318359375, "learning_rate": 0.001147109073358545, "loss": 1.9196, "step": 16486 }, { "epoch": 0.4424377415199657, "grad_norm": 0.318359375, "learning_rate": 0.0011470975360441764, "loss": 1.9296, "step": 16487 }, { "epoch": 0.4424645770717046, "grad_norm": 0.318359375, "learning_rate": 0.001147085997529637, "loss": 1.8096, "step": 16488 }, { "epoch": 0.44249141262344355, "grad_norm": 0.326171875, "learning_rate": 0.0011470744578149518, "loss": 1.9302, "step": 16489 }, { "epoch": 0.4425182481751825, "grad_norm": 0.328125, "learning_rate": 0.0011470629169001462, "loss": 1.9943, "step": 16490 }, { "epoch": 0.4425450837269214, "grad_norm": 0.31640625, "learning_rate": 0.0011470513747852457, "loss": 1.8684, "step": 16491 }, { "epoch": 0.44257191927866035, "grad_norm": 0.322265625, "learning_rate": 0.001147039831470275, "loss": 1.9104, "step": 16492 }, { "epoch": 0.4425987548303993, "grad_norm": 0.3125, "learning_rate": 0.0011470282869552604, "loss": 1.8256, "step": 16493 }, { "epoch": 0.4426255903821383, "grad_norm": 0.33203125, "learning_rate": 0.0011470167412402262, "loss": 2.0182, "step": 16494 }, { "epoch": 0.4426524259338772, "grad_norm": 0.322265625, "learning_rate": 0.0011470051943251983, "loss": 1.9197, "step": 16495 }, { "epoch": 0.44267926148561615, "grad_norm": 0.32421875, "learning_rate": 0.0011469936462102021, "loss": 1.9434, "step": 16496 }, { "epoch": 0.4427060970373551, "grad_norm": 0.306640625, "learning_rate": 0.0011469820968952628, "loss": 1.8348, "step": 16497 }, { "epoch": 0.442732932589094, "grad_norm": 0.3203125, "learning_rate": 0.0011469705463804055, "loss": 1.9276, "step": 16498 }, { "epoch": 0.44275976814083295, "grad_norm": 0.318359375, "learning_rate": 0.0011469589946656558, "loss": 1.8861, "step": 16499 }, { "epoch": 0.44278660369257195, "grad_norm": 0.333984375, "learning_rate": 0.0011469474417510391, "loss": 1.9774, "step": 16500 }, { "epoch": 0.4428134392443109, "grad_norm": 0.310546875, "learning_rate": 0.0011469358876365805, "loss": 1.8752, "step": 16501 }, { "epoch": 0.4428402747960498, "grad_norm": 0.322265625, "learning_rate": 0.0011469243323223056, "loss": 1.9329, "step": 16502 }, { "epoch": 0.44286711034778875, "grad_norm": 0.314453125, "learning_rate": 0.0011469127758082394, "loss": 1.9116, "step": 16503 }, { "epoch": 0.4428939458995277, "grad_norm": 0.322265625, "learning_rate": 0.0011469012180944079, "loss": 1.9355, "step": 16504 }, { "epoch": 0.4429207814512666, "grad_norm": 0.337890625, "learning_rate": 0.0011468896591808357, "loss": 2.0034, "step": 16505 }, { "epoch": 0.44294761700300556, "grad_norm": 0.32421875, "learning_rate": 0.0011468780990675485, "loss": 2.0116, "step": 16506 }, { "epoch": 0.44297445255474455, "grad_norm": 0.310546875, "learning_rate": 0.0011468665377545716, "loss": 1.8167, "step": 16507 }, { "epoch": 0.4430012881064835, "grad_norm": 0.326171875, "learning_rate": 0.0011468549752419305, "loss": 1.9987, "step": 16508 }, { "epoch": 0.4430281236582224, "grad_norm": 0.328125, "learning_rate": 0.0011468434115296505, "loss": 1.9085, "step": 16509 }, { "epoch": 0.44305495920996135, "grad_norm": 0.326171875, "learning_rate": 0.0011468318466177569, "loss": 1.9773, "step": 16510 }, { "epoch": 0.4430817947617003, "grad_norm": 0.322265625, "learning_rate": 0.0011468202805062751, "loss": 1.9258, "step": 16511 }, { "epoch": 0.4431086303134392, "grad_norm": 0.31640625, "learning_rate": 0.0011468087131952303, "loss": 1.8885, "step": 16512 }, { "epoch": 0.4431354658651782, "grad_norm": 0.326171875, "learning_rate": 0.0011467971446846483, "loss": 1.9984, "step": 16513 }, { "epoch": 0.44316230141691715, "grad_norm": 0.3125, "learning_rate": 0.001146785574974554, "loss": 1.8823, "step": 16514 }, { "epoch": 0.4431891369686561, "grad_norm": 0.322265625, "learning_rate": 0.0011467740040649733, "loss": 1.8763, "step": 16515 }, { "epoch": 0.443215972520395, "grad_norm": 0.322265625, "learning_rate": 0.0011467624319559313, "loss": 1.9206, "step": 16516 }, { "epoch": 0.44324280807213395, "grad_norm": 0.32421875, "learning_rate": 0.0011467508586474533, "loss": 1.9763, "step": 16517 }, { "epoch": 0.4432696436238729, "grad_norm": 0.318359375, "learning_rate": 0.0011467392841395647, "loss": 1.8993, "step": 16518 }, { "epoch": 0.4432964791756118, "grad_norm": 0.322265625, "learning_rate": 0.001146727708432291, "loss": 1.9682, "step": 16519 }, { "epoch": 0.4433233147273508, "grad_norm": 0.32421875, "learning_rate": 0.0011467161315256574, "loss": 1.9352, "step": 16520 }, { "epoch": 0.44335015027908975, "grad_norm": 0.322265625, "learning_rate": 0.00114670455341969, "loss": 1.9066, "step": 16521 }, { "epoch": 0.4433769858308287, "grad_norm": 0.314453125, "learning_rate": 0.0011466929741144132, "loss": 1.9179, "step": 16522 }, { "epoch": 0.4434038213825676, "grad_norm": 0.322265625, "learning_rate": 0.0011466813936098527, "loss": 1.9499, "step": 16523 }, { "epoch": 0.44343065693430656, "grad_norm": 0.322265625, "learning_rate": 0.0011466698119060343, "loss": 1.9702, "step": 16524 }, { "epoch": 0.4434574924860455, "grad_norm": 0.3203125, "learning_rate": 0.0011466582290029833, "loss": 1.8764, "step": 16525 }, { "epoch": 0.4434843280377845, "grad_norm": 0.31640625, "learning_rate": 0.0011466466449007247, "loss": 1.8505, "step": 16526 }, { "epoch": 0.4435111635895234, "grad_norm": 0.326171875, "learning_rate": 0.0011466350595992843, "loss": 1.8745, "step": 16527 }, { "epoch": 0.44353799914126235, "grad_norm": 0.3125, "learning_rate": 0.0011466234730986874, "loss": 1.8025, "step": 16528 }, { "epoch": 0.4435648346930013, "grad_norm": 0.318359375, "learning_rate": 0.0011466118853989595, "loss": 1.9266, "step": 16529 }, { "epoch": 0.4435916702447402, "grad_norm": 0.330078125, "learning_rate": 0.0011466002965001259, "loss": 1.9063, "step": 16530 }, { "epoch": 0.44361850579647916, "grad_norm": 0.310546875, "learning_rate": 0.001146588706402212, "loss": 1.8155, "step": 16531 }, { "epoch": 0.4436453413482181, "grad_norm": 0.318359375, "learning_rate": 0.0011465771151052434, "loss": 1.8476, "step": 16532 }, { "epoch": 0.4436721768999571, "grad_norm": 0.33203125, "learning_rate": 0.0011465655226092454, "loss": 1.9209, "step": 16533 }, { "epoch": 0.443699012451696, "grad_norm": 0.341796875, "learning_rate": 0.001146553928914243, "loss": 2.0142, "step": 16534 }, { "epoch": 0.44372584800343495, "grad_norm": 0.322265625, "learning_rate": 0.0011465423340202628, "loss": 1.8234, "step": 16535 }, { "epoch": 0.4437526835551739, "grad_norm": 0.3203125, "learning_rate": 0.001146530737927329, "loss": 1.8138, "step": 16536 }, { "epoch": 0.4437795191069128, "grad_norm": 0.314453125, "learning_rate": 0.0011465191406354678, "loss": 1.9002, "step": 16537 }, { "epoch": 0.44380635465865176, "grad_norm": 0.322265625, "learning_rate": 0.0011465075421447043, "loss": 1.8631, "step": 16538 }, { "epoch": 0.44383319021039075, "grad_norm": 0.3125, "learning_rate": 0.001146495942455064, "loss": 1.8693, "step": 16539 }, { "epoch": 0.4438600257621297, "grad_norm": 0.3203125, "learning_rate": 0.0011464843415665725, "loss": 1.8419, "step": 16540 }, { "epoch": 0.4438868613138686, "grad_norm": 0.322265625, "learning_rate": 0.0011464727394792551, "loss": 1.8201, "step": 16541 }, { "epoch": 0.44391369686560755, "grad_norm": 0.318359375, "learning_rate": 0.0011464611361931372, "loss": 1.8934, "step": 16542 }, { "epoch": 0.4439405324173465, "grad_norm": 0.31640625, "learning_rate": 0.0011464495317082444, "loss": 1.8832, "step": 16543 }, { "epoch": 0.4439673679690854, "grad_norm": 0.330078125, "learning_rate": 0.0011464379260246022, "loss": 1.9433, "step": 16544 }, { "epoch": 0.4439942035208244, "grad_norm": 0.326171875, "learning_rate": 0.0011464263191422357, "loss": 1.9387, "step": 16545 }, { "epoch": 0.44402103907256335, "grad_norm": 0.32421875, "learning_rate": 0.0011464147110611708, "loss": 1.8267, "step": 16546 }, { "epoch": 0.4440478746243023, "grad_norm": 0.3203125, "learning_rate": 0.0011464031017814327, "loss": 1.9228, "step": 16547 }, { "epoch": 0.4440747101760412, "grad_norm": 0.322265625, "learning_rate": 0.0011463914913030472, "loss": 1.8399, "step": 16548 }, { "epoch": 0.44410154572778016, "grad_norm": 0.322265625, "learning_rate": 0.0011463798796260392, "loss": 1.8519, "step": 16549 }, { "epoch": 0.4441283812795191, "grad_norm": 0.3203125, "learning_rate": 0.0011463682667504345, "loss": 1.8765, "step": 16550 }, { "epoch": 0.444155216831258, "grad_norm": 0.32421875, "learning_rate": 0.0011463566526762586, "loss": 1.8791, "step": 16551 }, { "epoch": 0.444182052382997, "grad_norm": 0.3203125, "learning_rate": 0.001146345037403537, "loss": 1.9149, "step": 16552 }, { "epoch": 0.44420888793473595, "grad_norm": 0.322265625, "learning_rate": 0.0011463334209322953, "loss": 1.8888, "step": 16553 }, { "epoch": 0.4442357234864749, "grad_norm": 0.326171875, "learning_rate": 0.0011463218032625587, "loss": 1.9359, "step": 16554 }, { "epoch": 0.4442625590382138, "grad_norm": 0.33984375, "learning_rate": 0.0011463101843943526, "loss": 1.9496, "step": 16555 }, { "epoch": 0.44428939458995276, "grad_norm": 0.31640625, "learning_rate": 0.0011462985643277029, "loss": 1.8505, "step": 16556 }, { "epoch": 0.4443162301416917, "grad_norm": 0.33203125, "learning_rate": 0.0011462869430626348, "loss": 1.9281, "step": 16557 }, { "epoch": 0.4443430656934307, "grad_norm": 0.337890625, "learning_rate": 0.0011462753205991737, "loss": 1.9357, "step": 16558 }, { "epoch": 0.4443699012451696, "grad_norm": 0.33203125, "learning_rate": 0.0011462636969373455, "loss": 1.899, "step": 16559 }, { "epoch": 0.44439673679690855, "grad_norm": 0.322265625, "learning_rate": 0.0011462520720771756, "loss": 1.7706, "step": 16560 }, { "epoch": 0.4444235723486475, "grad_norm": 0.330078125, "learning_rate": 0.001146240446018689, "loss": 1.9055, "step": 16561 }, { "epoch": 0.4444504079003864, "grad_norm": 0.32421875, "learning_rate": 0.0011462288187619118, "loss": 1.8399, "step": 16562 }, { "epoch": 0.44447724345212536, "grad_norm": 0.318359375, "learning_rate": 0.0011462171903068692, "loss": 1.8474, "step": 16563 }, { "epoch": 0.4445040790038643, "grad_norm": 0.30859375, "learning_rate": 0.0011462055606535867, "loss": 1.7301, "step": 16564 }, { "epoch": 0.4445309145556033, "grad_norm": 0.3359375, "learning_rate": 0.0011461939298020901, "loss": 1.8684, "step": 16565 }, { "epoch": 0.4445577501073422, "grad_norm": 0.345703125, "learning_rate": 0.0011461822977524045, "loss": 1.8691, "step": 16566 }, { "epoch": 0.44458458565908116, "grad_norm": 0.32421875, "learning_rate": 0.0011461706645045557, "loss": 1.8212, "step": 16567 }, { "epoch": 0.4446114212108201, "grad_norm": 0.337890625, "learning_rate": 0.001146159030058569, "loss": 1.9297, "step": 16568 }, { "epoch": 0.444638256762559, "grad_norm": 0.322265625, "learning_rate": 0.0011461473944144704, "loss": 1.8402, "step": 16569 }, { "epoch": 0.44466509231429796, "grad_norm": 0.3203125, "learning_rate": 0.0011461357575722848, "loss": 1.8717, "step": 16570 }, { "epoch": 0.44469192786603695, "grad_norm": 0.333984375, "learning_rate": 0.0011461241195320382, "loss": 1.8427, "step": 16571 }, { "epoch": 0.4447187634177759, "grad_norm": 0.330078125, "learning_rate": 0.0011461124802937559, "loss": 1.8858, "step": 16572 }, { "epoch": 0.4447455989695148, "grad_norm": 0.326171875, "learning_rate": 0.0011461008398574635, "loss": 1.8764, "step": 16573 }, { "epoch": 0.44477243452125376, "grad_norm": 0.328125, "learning_rate": 0.0011460891982231867, "loss": 1.9035, "step": 16574 }, { "epoch": 0.4447992700729927, "grad_norm": 0.3203125, "learning_rate": 0.0011460775553909506, "loss": 1.847, "step": 16575 }, { "epoch": 0.4448261056247316, "grad_norm": 0.32421875, "learning_rate": 0.001146065911360781, "loss": 1.8463, "step": 16576 }, { "epoch": 0.44485294117647056, "grad_norm": 0.3203125, "learning_rate": 0.0011460542661327036, "loss": 1.8079, "step": 16577 }, { "epoch": 0.44487977672820955, "grad_norm": 0.31640625, "learning_rate": 0.0011460426197067437, "loss": 1.8008, "step": 16578 }, { "epoch": 0.4449066122799485, "grad_norm": 0.31640625, "learning_rate": 0.0011460309720829269, "loss": 1.7734, "step": 16579 }, { "epoch": 0.4449334478316874, "grad_norm": 0.326171875, "learning_rate": 0.0011460193232612787, "loss": 1.8401, "step": 16580 }, { "epoch": 0.44496028338342636, "grad_norm": 0.337890625, "learning_rate": 0.001146007673241825, "loss": 1.961, "step": 16581 }, { "epoch": 0.4449871189351653, "grad_norm": 0.322265625, "learning_rate": 0.001145996022024591, "loss": 1.8148, "step": 16582 }, { "epoch": 0.44501395448690423, "grad_norm": 0.330078125, "learning_rate": 0.0011459843696096023, "loss": 1.8451, "step": 16583 }, { "epoch": 0.4450407900386432, "grad_norm": 0.34375, "learning_rate": 0.0011459727159968847, "loss": 2.058, "step": 16584 }, { "epoch": 0.44506762559038215, "grad_norm": 0.33203125, "learning_rate": 0.0011459610611864635, "loss": 1.8645, "step": 16585 }, { "epoch": 0.4450944611421211, "grad_norm": 0.326171875, "learning_rate": 0.0011459494051783642, "loss": 1.835, "step": 16586 }, { "epoch": 0.44512129669386, "grad_norm": 0.3203125, "learning_rate": 0.0011459377479726125, "loss": 1.8419, "step": 16587 }, { "epoch": 0.44514813224559896, "grad_norm": 0.3203125, "learning_rate": 0.0011459260895692344, "loss": 1.799, "step": 16588 }, { "epoch": 0.4451749677973379, "grad_norm": 0.3359375, "learning_rate": 0.0011459144299682548, "loss": 1.905, "step": 16589 }, { "epoch": 0.44520180334907683, "grad_norm": 0.314453125, "learning_rate": 0.0011459027691696996, "loss": 1.7768, "step": 16590 }, { "epoch": 0.4452286389008158, "grad_norm": 0.33203125, "learning_rate": 0.0011458911071735942, "loss": 1.8618, "step": 16591 }, { "epoch": 0.44525547445255476, "grad_norm": 0.326171875, "learning_rate": 0.0011458794439799644, "loss": 1.8313, "step": 16592 }, { "epoch": 0.4452823100042937, "grad_norm": 0.3359375, "learning_rate": 0.0011458677795888358, "loss": 1.8791, "step": 16593 }, { "epoch": 0.4453091455560326, "grad_norm": 0.3359375, "learning_rate": 0.001145856114000234, "loss": 1.9048, "step": 16594 }, { "epoch": 0.44533598110777156, "grad_norm": 0.318359375, "learning_rate": 0.0011458444472141841, "loss": 1.8126, "step": 16595 }, { "epoch": 0.4453628166595105, "grad_norm": 0.3359375, "learning_rate": 0.0011458327792307121, "loss": 1.8881, "step": 16596 }, { "epoch": 0.4453896522112495, "grad_norm": 0.326171875, "learning_rate": 0.001145821110049844, "loss": 1.8029, "step": 16597 }, { "epoch": 0.4454164877629884, "grad_norm": 0.330078125, "learning_rate": 0.0011458094396716047, "loss": 1.8108, "step": 16598 }, { "epoch": 0.44544332331472736, "grad_norm": 0.34765625, "learning_rate": 0.00114579776809602, "loss": 1.8363, "step": 16599 }, { "epoch": 0.4454701588664663, "grad_norm": 0.333984375, "learning_rate": 0.0011457860953231155, "loss": 1.8422, "step": 16600 }, { "epoch": 0.4454969944182052, "grad_norm": 0.3203125, "learning_rate": 0.0011457744213529172, "loss": 1.7254, "step": 16601 }, { "epoch": 0.44552382996994416, "grad_norm": 0.34375, "learning_rate": 0.00114576274618545, "loss": 1.9332, "step": 16602 }, { "epoch": 0.44555066552168315, "grad_norm": 0.34375, "learning_rate": 0.0011457510698207403, "loss": 1.834, "step": 16603 }, { "epoch": 0.4455775010734221, "grad_norm": 0.3203125, "learning_rate": 0.001145739392258813, "loss": 1.8864, "step": 16604 }, { "epoch": 0.445604336625161, "grad_norm": 0.32421875, "learning_rate": 0.0011457277134996943, "loss": 1.7896, "step": 16605 }, { "epoch": 0.44563117217689996, "grad_norm": 0.345703125, "learning_rate": 0.0011457160335434093, "loss": 1.8595, "step": 16606 }, { "epoch": 0.4456580077286389, "grad_norm": 0.328125, "learning_rate": 0.001145704352389984, "loss": 1.741, "step": 16607 }, { "epoch": 0.44568484328037783, "grad_norm": 0.328125, "learning_rate": 0.0011456926700394439, "loss": 1.7847, "step": 16608 }, { "epoch": 0.44571167883211676, "grad_norm": 0.337890625, "learning_rate": 0.0011456809864918147, "loss": 1.8169, "step": 16609 }, { "epoch": 0.44573851438385576, "grad_norm": 0.328125, "learning_rate": 0.0011456693017471218, "loss": 1.7652, "step": 16610 }, { "epoch": 0.4457653499355947, "grad_norm": 0.3359375, "learning_rate": 0.0011456576158053909, "loss": 1.8965, "step": 16611 }, { "epoch": 0.4457921854873336, "grad_norm": 0.33203125, "learning_rate": 0.001145645928666648, "loss": 1.8362, "step": 16612 }, { "epoch": 0.44581902103907256, "grad_norm": 0.32421875, "learning_rate": 0.0011456342403309183, "loss": 1.7777, "step": 16613 }, { "epoch": 0.4458458565908115, "grad_norm": 0.322265625, "learning_rate": 0.0011456225507982278, "loss": 1.7652, "step": 16614 }, { "epoch": 0.44587269214255043, "grad_norm": 0.33984375, "learning_rate": 0.0011456108600686017, "loss": 1.7876, "step": 16615 }, { "epoch": 0.4458995276942894, "grad_norm": 0.33984375, "learning_rate": 0.001145599168142066, "loss": 1.8959, "step": 16616 }, { "epoch": 0.44592636324602836, "grad_norm": 0.333984375, "learning_rate": 0.0011455874750186462, "loss": 1.8175, "step": 16617 }, { "epoch": 0.4459531987977673, "grad_norm": 0.388671875, "learning_rate": 0.0011455757806983682, "loss": 2.0335, "step": 16618 }, { "epoch": 0.4459800343495062, "grad_norm": 0.431640625, "learning_rate": 0.001145564085181257, "loss": 2.1858, "step": 16619 }, { "epoch": 0.44600686990124516, "grad_norm": 0.369140625, "learning_rate": 0.001145552388467339, "loss": 2.1362, "step": 16620 }, { "epoch": 0.4460337054529841, "grad_norm": 0.359375, "learning_rate": 0.0011455406905566397, "loss": 2.1971, "step": 16621 }, { "epoch": 0.44606054100472303, "grad_norm": 0.35546875, "learning_rate": 0.0011455289914491845, "loss": 2.0962, "step": 16622 }, { "epoch": 0.446087376556462, "grad_norm": 0.3515625, "learning_rate": 0.0011455172911449991, "loss": 2.0819, "step": 16623 }, { "epoch": 0.44611421210820096, "grad_norm": 0.345703125, "learning_rate": 0.0011455055896441094, "loss": 2.1127, "step": 16624 }, { "epoch": 0.4461410476599399, "grad_norm": 0.330078125, "learning_rate": 0.0011454938869465408, "loss": 2.0799, "step": 16625 }, { "epoch": 0.44616788321167883, "grad_norm": 0.33203125, "learning_rate": 0.0011454821830523191, "loss": 2.0688, "step": 16626 }, { "epoch": 0.44619471876341776, "grad_norm": 0.330078125, "learning_rate": 0.0011454704779614699, "loss": 1.9747, "step": 16627 }, { "epoch": 0.4462215543151567, "grad_norm": 0.318359375, "learning_rate": 0.0011454587716740192, "loss": 1.9636, "step": 16628 }, { "epoch": 0.4462483898668957, "grad_norm": 0.328125, "learning_rate": 0.0011454470641899923, "loss": 2.0479, "step": 16629 }, { "epoch": 0.4462752254186346, "grad_norm": 0.326171875, "learning_rate": 0.001145435355509415, "loss": 2.1214, "step": 16630 }, { "epoch": 0.44630206097037356, "grad_norm": 0.328125, "learning_rate": 0.001145423645632313, "loss": 2.0811, "step": 16631 }, { "epoch": 0.4463288965221125, "grad_norm": 0.33203125, "learning_rate": 0.001145411934558712, "loss": 2.0225, "step": 16632 }, { "epoch": 0.44635573207385143, "grad_norm": 0.33203125, "learning_rate": 0.0011454002222886378, "loss": 2.075, "step": 16633 }, { "epoch": 0.44638256762559037, "grad_norm": 0.322265625, "learning_rate": 0.0011453885088221155, "loss": 2.0171, "step": 16634 }, { "epoch": 0.4464094031773293, "grad_norm": 0.330078125, "learning_rate": 0.0011453767941591718, "loss": 2.056, "step": 16635 }, { "epoch": 0.4464362387290683, "grad_norm": 0.32421875, "learning_rate": 0.0011453650782998316, "loss": 2.0436, "step": 16636 }, { "epoch": 0.4464630742808072, "grad_norm": 0.3359375, "learning_rate": 0.001145353361244121, "loss": 2.0672, "step": 16637 }, { "epoch": 0.44648990983254616, "grad_norm": 0.310546875, "learning_rate": 0.0011453416429920654, "loss": 1.9384, "step": 16638 }, { "epoch": 0.4465167453842851, "grad_norm": 0.328125, "learning_rate": 0.001145329923543691, "loss": 2.07, "step": 16639 }, { "epoch": 0.44654358093602403, "grad_norm": 0.33203125, "learning_rate": 0.0011453182028990229, "loss": 2.0763, "step": 16640 }, { "epoch": 0.44657041648776297, "grad_norm": 0.3203125, "learning_rate": 0.0011453064810580873, "loss": 2.0688, "step": 16641 }, { "epoch": 0.44659725203950196, "grad_norm": 0.32421875, "learning_rate": 0.0011452947580209095, "loss": 1.9828, "step": 16642 }, { "epoch": 0.4466240875912409, "grad_norm": 0.30859375, "learning_rate": 0.0011452830337875155, "loss": 1.8516, "step": 16643 }, { "epoch": 0.4466509231429798, "grad_norm": 0.318359375, "learning_rate": 0.001145271308357931, "loss": 1.954, "step": 16644 }, { "epoch": 0.44667775869471876, "grad_norm": 0.326171875, "learning_rate": 0.0011452595817321818, "loss": 1.9692, "step": 16645 }, { "epoch": 0.4467045942464577, "grad_norm": 0.314453125, "learning_rate": 0.0011452478539102932, "loss": 2.0041, "step": 16646 }, { "epoch": 0.44673142979819663, "grad_norm": 0.3203125, "learning_rate": 0.0011452361248922914, "loss": 2.0589, "step": 16647 }, { "epoch": 0.44675826534993557, "grad_norm": 0.31640625, "learning_rate": 0.0011452243946782018, "loss": 2.0533, "step": 16648 }, { "epoch": 0.44678510090167456, "grad_norm": 0.318359375, "learning_rate": 0.0011452126632680506, "loss": 2.0366, "step": 16649 }, { "epoch": 0.4468119364534135, "grad_norm": 0.314453125, "learning_rate": 0.0011452009306618628, "loss": 2.0276, "step": 16650 }, { "epoch": 0.44683877200515243, "grad_norm": 0.32421875, "learning_rate": 0.0011451891968596649, "loss": 2.1037, "step": 16651 }, { "epoch": 0.44686560755689136, "grad_norm": 0.32421875, "learning_rate": 0.0011451774618614823, "loss": 2.0874, "step": 16652 }, { "epoch": 0.4468924431086303, "grad_norm": 0.3125, "learning_rate": 0.0011451657256673405, "loss": 1.9226, "step": 16653 }, { "epoch": 0.44691927866036923, "grad_norm": 0.318359375, "learning_rate": 0.0011451539882772655, "loss": 2.0037, "step": 16654 }, { "epoch": 0.4469461142121082, "grad_norm": 0.328125, "learning_rate": 0.0011451422496912833, "loss": 1.8884, "step": 16655 }, { "epoch": 0.44697294976384716, "grad_norm": 0.314453125, "learning_rate": 0.001145130509909419, "loss": 1.9917, "step": 16656 }, { "epoch": 0.4469997853155861, "grad_norm": 0.302734375, "learning_rate": 0.0011451187689316992, "loss": 1.8464, "step": 16657 }, { "epoch": 0.44702662086732503, "grad_norm": 0.3125, "learning_rate": 0.0011451070267581489, "loss": 2.0001, "step": 16658 }, { "epoch": 0.44705345641906397, "grad_norm": 0.30859375, "learning_rate": 0.0011450952833887943, "loss": 1.9814, "step": 16659 }, { "epoch": 0.4470802919708029, "grad_norm": 0.310546875, "learning_rate": 0.001145083538823661, "loss": 1.9379, "step": 16660 }, { "epoch": 0.44710712752254184, "grad_norm": 0.314453125, "learning_rate": 0.0011450717930627746, "loss": 1.9495, "step": 16661 }, { "epoch": 0.4471339630742808, "grad_norm": 0.3125, "learning_rate": 0.0011450600461061613, "loss": 1.965, "step": 16662 }, { "epoch": 0.44716079862601976, "grad_norm": 0.3125, "learning_rate": 0.0011450482979538463, "loss": 1.9727, "step": 16663 }, { "epoch": 0.4471876341777587, "grad_norm": 0.3125, "learning_rate": 0.001145036548605856, "loss": 1.914, "step": 16664 }, { "epoch": 0.44721446972949763, "grad_norm": 0.310546875, "learning_rate": 0.0011450247980622158, "loss": 1.981, "step": 16665 }, { "epoch": 0.44724130528123657, "grad_norm": 0.3125, "learning_rate": 0.0011450130463229514, "loss": 1.9715, "step": 16666 }, { "epoch": 0.4472681408329755, "grad_norm": 0.322265625, "learning_rate": 0.0011450012933880887, "loss": 1.994, "step": 16667 }, { "epoch": 0.4472949763847145, "grad_norm": 0.314453125, "learning_rate": 0.0011449895392576536, "loss": 2.0729, "step": 16668 }, { "epoch": 0.44732181193645343, "grad_norm": 0.318359375, "learning_rate": 0.001144977783931672, "loss": 2.0351, "step": 16669 }, { "epoch": 0.44734864748819236, "grad_norm": 0.3125, "learning_rate": 0.0011449660274101693, "loss": 1.9407, "step": 16670 }, { "epoch": 0.4473754830399313, "grad_norm": 0.296875, "learning_rate": 0.0011449542696931713, "loss": 1.801, "step": 16671 }, { "epoch": 0.44740231859167023, "grad_norm": 0.31640625, "learning_rate": 0.0011449425107807041, "loss": 1.972, "step": 16672 }, { "epoch": 0.44742915414340917, "grad_norm": 0.310546875, "learning_rate": 0.0011449307506727934, "loss": 1.856, "step": 16673 }, { "epoch": 0.44745598969514816, "grad_norm": 0.3125, "learning_rate": 0.0011449189893694648, "loss": 1.9677, "step": 16674 }, { "epoch": 0.4474828252468871, "grad_norm": 0.314453125, "learning_rate": 0.0011449072268707445, "loss": 1.9478, "step": 16675 }, { "epoch": 0.44750966079862603, "grad_norm": 0.314453125, "learning_rate": 0.001144895463176658, "loss": 1.8941, "step": 16676 }, { "epoch": 0.44753649635036497, "grad_norm": 0.326171875, "learning_rate": 0.001144883698287231, "loss": 2.0276, "step": 16677 }, { "epoch": 0.4475633319021039, "grad_norm": 0.330078125, "learning_rate": 0.0011448719322024895, "loss": 1.9951, "step": 16678 }, { "epoch": 0.44759016745384284, "grad_norm": 0.3125, "learning_rate": 0.0011448601649224593, "loss": 1.8812, "step": 16679 }, { "epoch": 0.44761700300558177, "grad_norm": 0.32421875, "learning_rate": 0.0011448483964471665, "loss": 1.9151, "step": 16680 }, { "epoch": 0.44764383855732076, "grad_norm": 0.302734375, "learning_rate": 0.0011448366267766362, "loss": 1.8206, "step": 16681 }, { "epoch": 0.4476706741090597, "grad_norm": 0.314453125, "learning_rate": 0.001144824855910895, "loss": 1.9041, "step": 16682 }, { "epoch": 0.44769750966079863, "grad_norm": 0.31640625, "learning_rate": 0.0011448130838499682, "loss": 1.9175, "step": 16683 }, { "epoch": 0.44772434521253757, "grad_norm": 0.31640625, "learning_rate": 0.0011448013105938815, "loss": 1.8463, "step": 16684 }, { "epoch": 0.4477511807642765, "grad_norm": 0.3125, "learning_rate": 0.0011447895361426613, "loss": 1.8758, "step": 16685 }, { "epoch": 0.44777801631601544, "grad_norm": 0.3125, "learning_rate": 0.001144777760496333, "loss": 1.9212, "step": 16686 }, { "epoch": 0.4478048518677544, "grad_norm": 0.306640625, "learning_rate": 0.0011447659836549228, "loss": 1.8766, "step": 16687 }, { "epoch": 0.44783168741949336, "grad_norm": 0.32421875, "learning_rate": 0.001144754205618456, "loss": 1.9456, "step": 16688 }, { "epoch": 0.4478585229712323, "grad_norm": 0.32421875, "learning_rate": 0.0011447424263869592, "loss": 1.9525, "step": 16689 }, { "epoch": 0.44788535852297123, "grad_norm": 0.314453125, "learning_rate": 0.0011447306459604573, "loss": 1.8985, "step": 16690 }, { "epoch": 0.44791219407471017, "grad_norm": 0.31640625, "learning_rate": 0.0011447188643389768, "loss": 1.9567, "step": 16691 }, { "epoch": 0.4479390296264491, "grad_norm": 0.306640625, "learning_rate": 0.0011447070815225433, "loss": 1.9126, "step": 16692 }, { "epoch": 0.44796586517818804, "grad_norm": 0.326171875, "learning_rate": 0.0011446952975111828, "loss": 1.9908, "step": 16693 }, { "epoch": 0.44799270072992703, "grad_norm": 0.310546875, "learning_rate": 0.0011446835123049212, "loss": 1.9469, "step": 16694 }, { "epoch": 0.44801953628166596, "grad_norm": 0.3125, "learning_rate": 0.001144671725903784, "loss": 1.9434, "step": 16695 }, { "epoch": 0.4480463718334049, "grad_norm": 0.318359375, "learning_rate": 0.0011446599383077973, "loss": 1.9102, "step": 16696 }, { "epoch": 0.44807320738514383, "grad_norm": 0.328125, "learning_rate": 0.001144648149516987, "loss": 1.9734, "step": 16697 }, { "epoch": 0.44810004293688277, "grad_norm": 0.3203125, "learning_rate": 0.001144636359531379, "loss": 1.9001, "step": 16698 }, { "epoch": 0.4481268784886217, "grad_norm": 0.322265625, "learning_rate": 0.0011446245683509988, "loss": 1.882, "step": 16699 }, { "epoch": 0.4481537140403607, "grad_norm": 0.314453125, "learning_rate": 0.001144612775975873, "loss": 1.8782, "step": 16700 }, { "epoch": 0.44818054959209963, "grad_norm": 0.30859375, "learning_rate": 0.0011446009824060265, "loss": 1.7781, "step": 16701 }, { "epoch": 0.44820738514383857, "grad_norm": 0.31640625, "learning_rate": 0.0011445891876414858, "loss": 1.9411, "step": 16702 }, { "epoch": 0.4482342206955775, "grad_norm": 0.31640625, "learning_rate": 0.0011445773916822768, "loss": 1.9355, "step": 16703 }, { "epoch": 0.44826105624731644, "grad_norm": 0.318359375, "learning_rate": 0.001144565594528425, "loss": 1.9986, "step": 16704 }, { "epoch": 0.44828789179905537, "grad_norm": 0.318359375, "learning_rate": 0.0011445537961799566, "loss": 1.9267, "step": 16705 }, { "epoch": 0.4483147273507943, "grad_norm": 0.326171875, "learning_rate": 0.0011445419966368974, "loss": 1.8743, "step": 16706 }, { "epoch": 0.4483415629025333, "grad_norm": 0.3125, "learning_rate": 0.0011445301958992733, "loss": 1.8606, "step": 16707 }, { "epoch": 0.44836839845427223, "grad_norm": 0.314453125, "learning_rate": 0.00114451839396711, "loss": 1.9088, "step": 16708 }, { "epoch": 0.44839523400601117, "grad_norm": 0.3203125, "learning_rate": 0.0011445065908404336, "loss": 1.8753, "step": 16709 }, { "epoch": 0.4484220695577501, "grad_norm": 0.318359375, "learning_rate": 0.0011444947865192703, "loss": 1.9334, "step": 16710 }, { "epoch": 0.44844890510948904, "grad_norm": 0.330078125, "learning_rate": 0.0011444829810036451, "loss": 1.8919, "step": 16711 }, { "epoch": 0.448475740661228, "grad_norm": 0.31640625, "learning_rate": 0.0011444711742935848, "loss": 1.9098, "step": 16712 }, { "epoch": 0.44850257621296696, "grad_norm": 0.310546875, "learning_rate": 0.0011444593663891146, "loss": 1.8762, "step": 16713 }, { "epoch": 0.4485294117647059, "grad_norm": 0.306640625, "learning_rate": 0.0011444475572902609, "loss": 1.839, "step": 16714 }, { "epoch": 0.44855624731644483, "grad_norm": 0.32421875, "learning_rate": 0.0011444357469970496, "loss": 1.9069, "step": 16715 }, { "epoch": 0.44858308286818377, "grad_norm": 0.328125, "learning_rate": 0.0011444239355095063, "loss": 1.8759, "step": 16716 }, { "epoch": 0.4486099184199227, "grad_norm": 0.32421875, "learning_rate": 0.0011444121228276571, "loss": 1.8601, "step": 16717 }, { "epoch": 0.44863675397166164, "grad_norm": 0.3203125, "learning_rate": 0.0011444003089515276, "loss": 1.8749, "step": 16718 }, { "epoch": 0.4486635895234006, "grad_norm": 0.318359375, "learning_rate": 0.0011443884938811444, "loss": 1.9154, "step": 16719 }, { "epoch": 0.44869042507513957, "grad_norm": 0.318359375, "learning_rate": 0.0011443766776165327, "loss": 1.8813, "step": 16720 }, { "epoch": 0.4487172606268785, "grad_norm": 0.306640625, "learning_rate": 0.001144364860157719, "loss": 1.8297, "step": 16721 }, { "epoch": 0.44874409617861744, "grad_norm": 0.318359375, "learning_rate": 0.0011443530415047288, "loss": 1.8216, "step": 16722 }, { "epoch": 0.44877093173035637, "grad_norm": 0.3125, "learning_rate": 0.0011443412216575882, "loss": 1.8755, "step": 16723 }, { "epoch": 0.4487977672820953, "grad_norm": 0.3125, "learning_rate": 0.001144329400616323, "loss": 1.7992, "step": 16724 }, { "epoch": 0.44882460283383424, "grad_norm": 0.3203125, "learning_rate": 0.0011443175783809594, "loss": 1.9163, "step": 16725 }, { "epoch": 0.44885143838557323, "grad_norm": 0.314453125, "learning_rate": 0.0011443057549515231, "loss": 1.8318, "step": 16726 }, { "epoch": 0.44887827393731217, "grad_norm": 0.318359375, "learning_rate": 0.00114429393032804, "loss": 1.8515, "step": 16727 }, { "epoch": 0.4489051094890511, "grad_norm": 0.314453125, "learning_rate": 0.0011442821045105364, "loss": 1.8317, "step": 16728 }, { "epoch": 0.44893194504079004, "grad_norm": 0.3125, "learning_rate": 0.0011442702774990378, "loss": 1.9258, "step": 16729 }, { "epoch": 0.44895878059252897, "grad_norm": 0.314453125, "learning_rate": 0.0011442584492935704, "loss": 1.9263, "step": 16730 }, { "epoch": 0.4489856161442679, "grad_norm": 0.3125, "learning_rate": 0.00114424661989416, "loss": 1.7543, "step": 16731 }, { "epoch": 0.44901245169600684, "grad_norm": 0.314453125, "learning_rate": 0.0011442347893008329, "loss": 1.8302, "step": 16732 }, { "epoch": 0.44903928724774583, "grad_norm": 0.3203125, "learning_rate": 0.0011442229575136145, "loss": 1.8004, "step": 16733 }, { "epoch": 0.44906612279948477, "grad_norm": 0.328125, "learning_rate": 0.001144211124532531, "loss": 1.8666, "step": 16734 }, { "epoch": 0.4490929583512237, "grad_norm": 0.318359375, "learning_rate": 0.0011441992903576087, "loss": 1.8009, "step": 16735 }, { "epoch": 0.44911979390296264, "grad_norm": 0.31640625, "learning_rate": 0.001144187454988873, "loss": 1.9167, "step": 16736 }, { "epoch": 0.4491466294547016, "grad_norm": 0.3359375, "learning_rate": 0.0011441756184263504, "loss": 1.9823, "step": 16737 }, { "epoch": 0.4491734650064405, "grad_norm": 0.326171875, "learning_rate": 0.0011441637806700663, "loss": 1.9464, "step": 16738 }, { "epoch": 0.4492003005581795, "grad_norm": 0.31640625, "learning_rate": 0.001144151941720047, "loss": 1.851, "step": 16739 }, { "epoch": 0.44922713610991843, "grad_norm": 0.314453125, "learning_rate": 0.0011441401015763186, "loss": 1.8714, "step": 16740 }, { "epoch": 0.44925397166165737, "grad_norm": 0.322265625, "learning_rate": 0.001144128260238907, "loss": 1.9223, "step": 16741 }, { "epoch": 0.4492808072133963, "grad_norm": 0.31640625, "learning_rate": 0.0011441164177078379, "loss": 1.7957, "step": 16742 }, { "epoch": 0.44930764276513524, "grad_norm": 0.318359375, "learning_rate": 0.0011441045739831374, "loss": 1.8657, "step": 16743 }, { "epoch": 0.4493344783168742, "grad_norm": 0.314453125, "learning_rate": 0.0011440927290648315, "loss": 1.8322, "step": 16744 }, { "epoch": 0.44936131386861317, "grad_norm": 0.32421875, "learning_rate": 0.0011440808829529464, "loss": 1.8605, "step": 16745 }, { "epoch": 0.4493881494203521, "grad_norm": 0.32421875, "learning_rate": 0.001144069035647508, "loss": 1.8259, "step": 16746 }, { "epoch": 0.44941498497209104, "grad_norm": 0.318359375, "learning_rate": 0.0011440571871485419, "loss": 1.8482, "step": 16747 }, { "epoch": 0.44944182052382997, "grad_norm": 0.310546875, "learning_rate": 0.0011440453374560746, "loss": 1.8052, "step": 16748 }, { "epoch": 0.4494686560755689, "grad_norm": 0.32421875, "learning_rate": 0.0011440334865701317, "loss": 1.893, "step": 16749 }, { "epoch": 0.44949549162730784, "grad_norm": 0.326171875, "learning_rate": 0.0011440216344907395, "loss": 1.9377, "step": 16750 }, { "epoch": 0.4495223271790468, "grad_norm": 0.326171875, "learning_rate": 0.001144009781217924, "loss": 1.88, "step": 16751 }, { "epoch": 0.44954916273078577, "grad_norm": 0.32421875, "learning_rate": 0.001143997926751711, "loss": 1.9451, "step": 16752 }, { "epoch": 0.4495759982825247, "grad_norm": 0.322265625, "learning_rate": 0.0011439860710921266, "loss": 1.8887, "step": 16753 }, { "epoch": 0.44960283383426364, "grad_norm": 0.318359375, "learning_rate": 0.0011439742142391968, "loss": 1.8506, "step": 16754 }, { "epoch": 0.4496296693860026, "grad_norm": 0.326171875, "learning_rate": 0.0011439623561929476, "loss": 1.8915, "step": 16755 }, { "epoch": 0.4496565049377415, "grad_norm": 0.3203125, "learning_rate": 0.0011439504969534052, "loss": 1.8795, "step": 16756 }, { "epoch": 0.44968334048948044, "grad_norm": 0.322265625, "learning_rate": 0.0011439386365205953, "loss": 1.9464, "step": 16757 }, { "epoch": 0.44971017604121943, "grad_norm": 0.3203125, "learning_rate": 0.001143926774894544, "loss": 1.8866, "step": 16758 }, { "epoch": 0.44973701159295837, "grad_norm": 0.31640625, "learning_rate": 0.0011439149120752773, "loss": 1.8729, "step": 16759 }, { "epoch": 0.4497638471446973, "grad_norm": 0.310546875, "learning_rate": 0.0011439030480628215, "loss": 1.8086, "step": 16760 }, { "epoch": 0.44979068269643624, "grad_norm": 0.32421875, "learning_rate": 0.0011438911828572024, "loss": 1.9029, "step": 16761 }, { "epoch": 0.4498175182481752, "grad_norm": 0.328125, "learning_rate": 0.001143879316458446, "loss": 1.9223, "step": 16762 }, { "epoch": 0.4498443537999141, "grad_norm": 0.318359375, "learning_rate": 0.0011438674488665783, "loss": 1.8045, "step": 16763 }, { "epoch": 0.44987118935165304, "grad_norm": 0.322265625, "learning_rate": 0.0011438555800816256, "loss": 1.8867, "step": 16764 }, { "epoch": 0.44989802490339204, "grad_norm": 0.314453125, "learning_rate": 0.0011438437101036138, "loss": 1.8593, "step": 16765 }, { "epoch": 0.44992486045513097, "grad_norm": 0.32421875, "learning_rate": 0.0011438318389325687, "loss": 1.901, "step": 16766 }, { "epoch": 0.4499516960068699, "grad_norm": 0.3203125, "learning_rate": 0.0011438199665685165, "loss": 1.8249, "step": 16767 }, { "epoch": 0.44997853155860884, "grad_norm": 0.3125, "learning_rate": 0.0011438080930114836, "loss": 1.7233, "step": 16768 }, { "epoch": 0.4500053671103478, "grad_norm": 0.32421875, "learning_rate": 0.0011437962182614956, "loss": 1.821, "step": 16769 }, { "epoch": 0.4500322026620867, "grad_norm": 0.314453125, "learning_rate": 0.0011437843423185788, "loss": 1.7845, "step": 16770 }, { "epoch": 0.4500590382138257, "grad_norm": 0.318359375, "learning_rate": 0.0011437724651827588, "loss": 1.8117, "step": 16771 }, { "epoch": 0.45008587376556464, "grad_norm": 0.3203125, "learning_rate": 0.0011437605868540623, "loss": 1.8403, "step": 16772 }, { "epoch": 0.45011270931730357, "grad_norm": 0.32421875, "learning_rate": 0.0011437487073325148, "loss": 1.8451, "step": 16773 }, { "epoch": 0.4501395448690425, "grad_norm": 0.3203125, "learning_rate": 0.0011437368266181429, "loss": 1.7759, "step": 16774 }, { "epoch": 0.45016638042078144, "grad_norm": 0.326171875, "learning_rate": 0.001143724944710972, "loss": 1.8114, "step": 16775 }, { "epoch": 0.4501932159725204, "grad_norm": 0.31640625, "learning_rate": 0.001143713061611029, "loss": 1.8273, "step": 16776 }, { "epoch": 0.4502200515242593, "grad_norm": 0.328125, "learning_rate": 0.0011437011773183393, "loss": 1.786, "step": 16777 }, { "epoch": 0.4502468870759983, "grad_norm": 0.33203125, "learning_rate": 0.001143689291832929, "loss": 1.8849, "step": 16778 }, { "epoch": 0.45027372262773724, "grad_norm": 0.318359375, "learning_rate": 0.0011436774051548245, "loss": 1.8545, "step": 16779 }, { "epoch": 0.4503005581794762, "grad_norm": 0.314453125, "learning_rate": 0.0011436655172840516, "loss": 1.7429, "step": 16780 }, { "epoch": 0.4503273937312151, "grad_norm": 0.333984375, "learning_rate": 0.0011436536282206367, "loss": 1.8469, "step": 16781 }, { "epoch": 0.45035422928295404, "grad_norm": 0.31640625, "learning_rate": 0.0011436417379646056, "loss": 1.8001, "step": 16782 }, { "epoch": 0.450381064834693, "grad_norm": 0.33203125, "learning_rate": 0.0011436298465159844, "loss": 1.8793, "step": 16783 }, { "epoch": 0.45040790038643197, "grad_norm": 0.32421875, "learning_rate": 0.0011436179538747994, "loss": 1.8425, "step": 16784 }, { "epoch": 0.4504347359381709, "grad_norm": 0.33203125, "learning_rate": 0.0011436060600410764, "loss": 1.9479, "step": 16785 }, { "epoch": 0.45046157148990984, "grad_norm": 0.328125, "learning_rate": 0.0011435941650148418, "loss": 1.834, "step": 16786 }, { "epoch": 0.4504884070416488, "grad_norm": 0.337890625, "learning_rate": 0.0011435822687961214, "loss": 1.8848, "step": 16787 }, { "epoch": 0.4505152425933877, "grad_norm": 0.33203125, "learning_rate": 0.0011435703713849415, "loss": 1.881, "step": 16788 }, { "epoch": 0.45054207814512665, "grad_norm": 0.32421875, "learning_rate": 0.001143558472781328, "loss": 1.8539, "step": 16789 }, { "epoch": 0.4505689136968656, "grad_norm": 0.328125, "learning_rate": 0.001143546572985307, "loss": 1.8494, "step": 16790 }, { "epoch": 0.45059574924860457, "grad_norm": 0.314453125, "learning_rate": 0.001143534671996905, "loss": 1.7827, "step": 16791 }, { "epoch": 0.4506225848003435, "grad_norm": 0.318359375, "learning_rate": 0.0011435227698161478, "loss": 1.8262, "step": 16792 }, { "epoch": 0.45064942035208244, "grad_norm": 0.33203125, "learning_rate": 0.0011435108664430615, "loss": 1.7942, "step": 16793 }, { "epoch": 0.4506762559038214, "grad_norm": 0.34375, "learning_rate": 0.0011434989618776725, "loss": 1.8254, "step": 16794 }, { "epoch": 0.4507030914555603, "grad_norm": 0.322265625, "learning_rate": 0.0011434870561200063, "loss": 1.7235, "step": 16795 }, { "epoch": 0.45072992700729925, "grad_norm": 0.326171875, "learning_rate": 0.0011434751491700896, "loss": 1.7488, "step": 16796 }, { "epoch": 0.45075676255903824, "grad_norm": 0.3203125, "learning_rate": 0.0011434632410279482, "loss": 1.8179, "step": 16797 }, { "epoch": 0.4507835981107772, "grad_norm": 0.322265625, "learning_rate": 0.0011434513316936085, "loss": 1.8529, "step": 16798 }, { "epoch": 0.4508104336625161, "grad_norm": 0.32421875, "learning_rate": 0.0011434394211670964, "loss": 1.7277, "step": 16799 }, { "epoch": 0.45083726921425504, "grad_norm": 0.328125, "learning_rate": 0.001143427509448438, "loss": 1.7606, "step": 16800 }, { "epoch": 0.450864104765994, "grad_norm": 0.328125, "learning_rate": 0.0011434155965376598, "loss": 1.8104, "step": 16801 }, { "epoch": 0.4508909403177329, "grad_norm": 0.361328125, "learning_rate": 0.0011434036824347874, "loss": 1.9325, "step": 16802 }, { "epoch": 0.4509177758694719, "grad_norm": 0.400390625, "learning_rate": 0.0011433917671398472, "loss": 2.189, "step": 16803 }, { "epoch": 0.45094461142121084, "grad_norm": 0.359375, "learning_rate": 0.0011433798506528656, "loss": 2.0376, "step": 16804 }, { "epoch": 0.4509714469729498, "grad_norm": 0.353515625, "learning_rate": 0.001143367932973868, "loss": 2.0783, "step": 16805 }, { "epoch": 0.4509982825246887, "grad_norm": 0.337890625, "learning_rate": 0.0011433560141028813, "loss": 2.0351, "step": 16806 }, { "epoch": 0.45102511807642764, "grad_norm": 0.345703125, "learning_rate": 0.0011433440940399314, "loss": 2.0789, "step": 16807 }, { "epoch": 0.4510519536281666, "grad_norm": 0.333984375, "learning_rate": 0.0011433321727850445, "loss": 2.141, "step": 16808 }, { "epoch": 0.4510787891799055, "grad_norm": 0.36328125, "learning_rate": 0.0011433202503382464, "loss": 2.0718, "step": 16809 }, { "epoch": 0.4511056247316445, "grad_norm": 0.33984375, "learning_rate": 0.0011433083266995637, "loss": 2.2342, "step": 16810 }, { "epoch": 0.45113246028338344, "grad_norm": 0.326171875, "learning_rate": 0.0011432964018690222, "loss": 2.0406, "step": 16811 }, { "epoch": 0.4511592958351224, "grad_norm": 0.318359375, "learning_rate": 0.0011432844758466482, "loss": 2.0576, "step": 16812 }, { "epoch": 0.4511861313868613, "grad_norm": 0.322265625, "learning_rate": 0.0011432725486324683, "loss": 2.0283, "step": 16813 }, { "epoch": 0.45121296693860025, "grad_norm": 0.31640625, "learning_rate": 0.0011432606202265078, "loss": 2.0461, "step": 16814 }, { "epoch": 0.4512398024903392, "grad_norm": 0.328125, "learning_rate": 0.0011432486906287937, "loss": 2.0099, "step": 16815 }, { "epoch": 0.45126663804207817, "grad_norm": 0.333984375, "learning_rate": 0.0011432367598393515, "loss": 2.1056, "step": 16816 }, { "epoch": 0.4512934735938171, "grad_norm": 0.333984375, "learning_rate": 0.0011432248278582077, "loss": 2.1772, "step": 16817 }, { "epoch": 0.45132030914555604, "grad_norm": 0.33203125, "learning_rate": 0.0011432128946853885, "loss": 2.1943, "step": 16818 }, { "epoch": 0.451347144697295, "grad_norm": 0.328125, "learning_rate": 0.00114320096032092, "loss": 2.1271, "step": 16819 }, { "epoch": 0.4513739802490339, "grad_norm": 0.322265625, "learning_rate": 0.0011431890247648286, "loss": 2.057, "step": 16820 }, { "epoch": 0.45140081580077285, "grad_norm": 0.318359375, "learning_rate": 0.0011431770880171402, "loss": 2.0302, "step": 16821 }, { "epoch": 0.4514276513525118, "grad_norm": 0.318359375, "learning_rate": 0.001143165150077881, "loss": 2.005, "step": 16822 }, { "epoch": 0.4514544869042508, "grad_norm": 0.31640625, "learning_rate": 0.0011431532109470773, "loss": 1.9965, "step": 16823 }, { "epoch": 0.4514813224559897, "grad_norm": 0.318359375, "learning_rate": 0.001143141270624755, "loss": 1.9796, "step": 16824 }, { "epoch": 0.45150815800772864, "grad_norm": 0.32421875, "learning_rate": 0.001143129329110941, "loss": 1.9582, "step": 16825 }, { "epoch": 0.4515349935594676, "grad_norm": 0.310546875, "learning_rate": 0.0011431173864056609, "loss": 1.9559, "step": 16826 }, { "epoch": 0.4515618291112065, "grad_norm": 0.333984375, "learning_rate": 0.0011431054425089411, "loss": 2.1194, "step": 16827 }, { "epoch": 0.45158866466294545, "grad_norm": 0.333984375, "learning_rate": 0.0011430934974208074, "loss": 2.118, "step": 16828 }, { "epoch": 0.45161550021468444, "grad_norm": 0.3203125, "learning_rate": 0.0011430815511412867, "loss": 1.9813, "step": 16829 }, { "epoch": 0.4516423357664234, "grad_norm": 0.32421875, "learning_rate": 0.0011430696036704047, "loss": 2.0942, "step": 16830 }, { "epoch": 0.4516691713181623, "grad_norm": 0.318359375, "learning_rate": 0.0011430576550081877, "loss": 2.0816, "step": 16831 }, { "epoch": 0.45169600686990125, "grad_norm": 0.322265625, "learning_rate": 0.0011430457051546622, "loss": 1.9895, "step": 16832 }, { "epoch": 0.4517228424216402, "grad_norm": 0.31640625, "learning_rate": 0.0011430337541098539, "loss": 2.0687, "step": 16833 }, { "epoch": 0.4517496779733791, "grad_norm": 0.314453125, "learning_rate": 0.0011430218018737895, "loss": 2.0351, "step": 16834 }, { "epoch": 0.45177651352511805, "grad_norm": 0.3125, "learning_rate": 0.001143009848446495, "loss": 1.9267, "step": 16835 }, { "epoch": 0.45180334907685704, "grad_norm": 0.32421875, "learning_rate": 0.0011429978938279966, "loss": 1.9922, "step": 16836 }, { "epoch": 0.451830184628596, "grad_norm": 0.330078125, "learning_rate": 0.0011429859380183208, "loss": 2.0719, "step": 16837 }, { "epoch": 0.4518570201803349, "grad_norm": 0.326171875, "learning_rate": 0.0011429739810174932, "loss": 2.0515, "step": 16838 }, { "epoch": 0.45188385573207385, "grad_norm": 0.328125, "learning_rate": 0.0011429620228255407, "loss": 2.0272, "step": 16839 }, { "epoch": 0.4519106912838128, "grad_norm": 0.328125, "learning_rate": 0.001142950063442489, "loss": 2.0871, "step": 16840 }, { "epoch": 0.4519375268355517, "grad_norm": 0.3203125, "learning_rate": 0.001142938102868365, "loss": 2.0087, "step": 16841 }, { "epoch": 0.4519643623872907, "grad_norm": 0.32421875, "learning_rate": 0.0011429261411031943, "loss": 2.0414, "step": 16842 }, { "epoch": 0.45199119793902964, "grad_norm": 0.3203125, "learning_rate": 0.0011429141781470035, "loss": 2.0187, "step": 16843 }, { "epoch": 0.4520180334907686, "grad_norm": 0.31640625, "learning_rate": 0.0011429022139998186, "loss": 1.9865, "step": 16844 }, { "epoch": 0.4520448690425075, "grad_norm": 0.322265625, "learning_rate": 0.001142890248661666, "loss": 1.9945, "step": 16845 }, { "epoch": 0.45207170459424645, "grad_norm": 0.30859375, "learning_rate": 0.0011428782821325718, "loss": 1.9041, "step": 16846 }, { "epoch": 0.4520985401459854, "grad_norm": 0.31640625, "learning_rate": 0.0011428663144125626, "loss": 1.8938, "step": 16847 }, { "epoch": 0.4521253756977243, "grad_norm": 0.314453125, "learning_rate": 0.0011428543455016642, "loss": 2.0332, "step": 16848 }, { "epoch": 0.4521522112494633, "grad_norm": 0.31640625, "learning_rate": 0.0011428423753999033, "loss": 2.0802, "step": 16849 }, { "epoch": 0.45217904680120224, "grad_norm": 0.3046875, "learning_rate": 0.0011428304041073058, "loss": 1.9263, "step": 16850 }, { "epoch": 0.4522058823529412, "grad_norm": 0.318359375, "learning_rate": 0.0011428184316238982, "loss": 1.9253, "step": 16851 }, { "epoch": 0.4522327179046801, "grad_norm": 0.318359375, "learning_rate": 0.0011428064579497065, "loss": 1.901, "step": 16852 }, { "epoch": 0.45225955345641905, "grad_norm": 0.32421875, "learning_rate": 0.0011427944830847572, "loss": 2.0284, "step": 16853 }, { "epoch": 0.452286389008158, "grad_norm": 0.318359375, "learning_rate": 0.0011427825070290764, "loss": 2.035, "step": 16854 }, { "epoch": 0.452313224559897, "grad_norm": 0.31640625, "learning_rate": 0.0011427705297826906, "loss": 1.9522, "step": 16855 }, { "epoch": 0.4523400601116359, "grad_norm": 0.3203125, "learning_rate": 0.001142758551345626, "loss": 2.0758, "step": 16856 }, { "epoch": 0.45236689566337485, "grad_norm": 0.310546875, "learning_rate": 0.0011427465717179087, "loss": 1.9489, "step": 16857 }, { "epoch": 0.4523937312151138, "grad_norm": 0.328125, "learning_rate": 0.0011427345908995652, "loss": 1.9976, "step": 16858 }, { "epoch": 0.4524205667668527, "grad_norm": 0.322265625, "learning_rate": 0.0011427226088906214, "loss": 2.0151, "step": 16859 }, { "epoch": 0.45244740231859165, "grad_norm": 0.326171875, "learning_rate": 0.001142710625691104, "loss": 1.9821, "step": 16860 }, { "epoch": 0.4524742378703306, "grad_norm": 0.31640625, "learning_rate": 0.0011426986413010393, "loss": 1.972, "step": 16861 }, { "epoch": 0.4525010734220696, "grad_norm": 0.33203125, "learning_rate": 0.0011426866557204536, "loss": 2.082, "step": 16862 }, { "epoch": 0.4525279089738085, "grad_norm": 0.310546875, "learning_rate": 0.0011426746689493727, "loss": 1.9496, "step": 16863 }, { "epoch": 0.45255474452554745, "grad_norm": 0.310546875, "learning_rate": 0.0011426626809878234, "loss": 1.978, "step": 16864 }, { "epoch": 0.4525815800772864, "grad_norm": 0.31640625, "learning_rate": 0.0011426506918358317, "loss": 2.0113, "step": 16865 }, { "epoch": 0.4526084156290253, "grad_norm": 0.3125, "learning_rate": 0.0011426387014934242, "loss": 1.9595, "step": 16866 }, { "epoch": 0.45263525118076425, "grad_norm": 0.314453125, "learning_rate": 0.0011426267099606268, "loss": 1.9344, "step": 16867 }, { "epoch": 0.45266208673250324, "grad_norm": 0.31640625, "learning_rate": 0.0011426147172374662, "loss": 1.8733, "step": 16868 }, { "epoch": 0.4526889222842422, "grad_norm": 0.31640625, "learning_rate": 0.0011426027233239686, "loss": 1.9574, "step": 16869 }, { "epoch": 0.4527157578359811, "grad_norm": 0.3203125, "learning_rate": 0.0011425907282201601, "loss": 2.0971, "step": 16870 }, { "epoch": 0.45274259338772005, "grad_norm": 0.3203125, "learning_rate": 0.0011425787319260675, "loss": 1.9756, "step": 16871 }, { "epoch": 0.452769428939459, "grad_norm": 0.3203125, "learning_rate": 0.0011425667344417165, "loss": 1.9462, "step": 16872 }, { "epoch": 0.4527962644911979, "grad_norm": 0.322265625, "learning_rate": 0.0011425547357671337, "loss": 1.9483, "step": 16873 }, { "epoch": 0.4528231000429369, "grad_norm": 0.30859375, "learning_rate": 0.0011425427359023457, "loss": 1.9428, "step": 16874 }, { "epoch": 0.45284993559467585, "grad_norm": 0.3125, "learning_rate": 0.0011425307348473784, "loss": 1.9414, "step": 16875 }, { "epoch": 0.4528767711464148, "grad_norm": 0.326171875, "learning_rate": 0.001142518732602258, "loss": 1.9712, "step": 16876 }, { "epoch": 0.4529036066981537, "grad_norm": 0.302734375, "learning_rate": 0.0011425067291670114, "loss": 1.7876, "step": 16877 }, { "epoch": 0.45293044224989265, "grad_norm": 0.3125, "learning_rate": 0.0011424947245416647, "loss": 1.9199, "step": 16878 }, { "epoch": 0.4529572778016316, "grad_norm": 0.314453125, "learning_rate": 0.0011424827187262439, "loss": 1.8931, "step": 16879 }, { "epoch": 0.4529841133533705, "grad_norm": 0.310546875, "learning_rate": 0.0011424707117207759, "loss": 1.888, "step": 16880 }, { "epoch": 0.4530109489051095, "grad_norm": 0.322265625, "learning_rate": 0.0011424587035252867, "loss": 1.9412, "step": 16881 }, { "epoch": 0.45303778445684845, "grad_norm": 0.3125, "learning_rate": 0.0011424466941398026, "loss": 1.9037, "step": 16882 }, { "epoch": 0.4530646200085874, "grad_norm": 0.31640625, "learning_rate": 0.0011424346835643502, "loss": 1.9722, "step": 16883 }, { "epoch": 0.4530914555603263, "grad_norm": 0.318359375, "learning_rate": 0.0011424226717989556, "loss": 1.9007, "step": 16884 }, { "epoch": 0.45311829111206525, "grad_norm": 0.31640625, "learning_rate": 0.0011424106588436452, "loss": 1.9024, "step": 16885 }, { "epoch": 0.4531451266638042, "grad_norm": 0.3203125, "learning_rate": 0.0011423986446984454, "loss": 1.9895, "step": 16886 }, { "epoch": 0.4531719622155432, "grad_norm": 0.333984375, "learning_rate": 0.0011423866293633826, "loss": 1.9735, "step": 16887 }, { "epoch": 0.4531987977672821, "grad_norm": 0.30859375, "learning_rate": 0.001142374612838483, "loss": 1.9453, "step": 16888 }, { "epoch": 0.45322563331902105, "grad_norm": 0.310546875, "learning_rate": 0.001142362595123773, "loss": 1.8817, "step": 16889 }, { "epoch": 0.45325246887076, "grad_norm": 0.30859375, "learning_rate": 0.0011423505762192792, "loss": 1.8385, "step": 16890 }, { "epoch": 0.4532793044224989, "grad_norm": 0.31640625, "learning_rate": 0.0011423385561250279, "loss": 1.9016, "step": 16891 }, { "epoch": 0.45330613997423785, "grad_norm": 0.3125, "learning_rate": 0.0011423265348410453, "loss": 1.8921, "step": 16892 }, { "epoch": 0.4533329755259768, "grad_norm": 0.31640625, "learning_rate": 0.0011423145123673577, "loss": 1.9452, "step": 16893 }, { "epoch": 0.4533598110777158, "grad_norm": 0.330078125, "learning_rate": 0.0011423024887039918, "loss": 1.9634, "step": 16894 }, { "epoch": 0.4533866466294547, "grad_norm": 0.31640625, "learning_rate": 0.0011422904638509735, "loss": 1.925, "step": 16895 }, { "epoch": 0.45341348218119365, "grad_norm": 0.3203125, "learning_rate": 0.0011422784378083298, "loss": 1.9813, "step": 16896 }, { "epoch": 0.4534403177329326, "grad_norm": 0.3125, "learning_rate": 0.0011422664105760864, "loss": 1.8645, "step": 16897 }, { "epoch": 0.4534671532846715, "grad_norm": 0.31640625, "learning_rate": 0.0011422543821542704, "loss": 1.959, "step": 16898 }, { "epoch": 0.45349398883641046, "grad_norm": 0.310546875, "learning_rate": 0.0011422423525429075, "loss": 1.8146, "step": 16899 }, { "epoch": 0.45352082438814945, "grad_norm": 0.330078125, "learning_rate": 0.0011422303217420247, "loss": 1.9417, "step": 16900 }, { "epoch": 0.4535476599398884, "grad_norm": 0.330078125, "learning_rate": 0.001142218289751648, "loss": 1.8776, "step": 16901 }, { "epoch": 0.4535744954916273, "grad_norm": 0.328125, "learning_rate": 0.0011422062565718037, "loss": 1.9253, "step": 16902 }, { "epoch": 0.45360133104336625, "grad_norm": 0.322265625, "learning_rate": 0.0011421942222025186, "loss": 1.9632, "step": 16903 }, { "epoch": 0.4536281665951052, "grad_norm": 0.322265625, "learning_rate": 0.0011421821866438187, "loss": 1.9691, "step": 16904 }, { "epoch": 0.4536550021468441, "grad_norm": 0.3125, "learning_rate": 0.0011421701498957309, "loss": 1.8496, "step": 16905 }, { "epoch": 0.45368183769858306, "grad_norm": 0.3203125, "learning_rate": 0.001142158111958281, "loss": 1.9158, "step": 16906 }, { "epoch": 0.45370867325032205, "grad_norm": 0.310546875, "learning_rate": 0.001142146072831496, "loss": 1.8021, "step": 16907 }, { "epoch": 0.453735508802061, "grad_norm": 0.328125, "learning_rate": 0.0011421340325154018, "loss": 1.8709, "step": 16908 }, { "epoch": 0.4537623443537999, "grad_norm": 0.314453125, "learning_rate": 0.001142121991010025, "loss": 1.8159, "step": 16909 }, { "epoch": 0.45378917990553885, "grad_norm": 0.30859375, "learning_rate": 0.0011421099483153922, "loss": 1.7925, "step": 16910 }, { "epoch": 0.4538160154572778, "grad_norm": 0.3359375, "learning_rate": 0.0011420979044315296, "loss": 1.9475, "step": 16911 }, { "epoch": 0.4538428510090167, "grad_norm": 0.330078125, "learning_rate": 0.0011420858593584637, "loss": 1.9546, "step": 16912 }, { "epoch": 0.4538696865607557, "grad_norm": 0.333984375, "learning_rate": 0.001142073813096221, "loss": 1.936, "step": 16913 }, { "epoch": 0.45389652211249465, "grad_norm": 0.322265625, "learning_rate": 0.0011420617656448277, "loss": 1.8891, "step": 16914 }, { "epoch": 0.4539233576642336, "grad_norm": 0.33203125, "learning_rate": 0.0011420497170043103, "loss": 2.0055, "step": 16915 }, { "epoch": 0.4539501932159725, "grad_norm": 0.318359375, "learning_rate": 0.0011420376671746953, "loss": 1.8756, "step": 16916 }, { "epoch": 0.45397702876771145, "grad_norm": 0.322265625, "learning_rate": 0.0011420256161560093, "loss": 1.7896, "step": 16917 }, { "epoch": 0.4540038643194504, "grad_norm": 0.3203125, "learning_rate": 0.0011420135639482784, "loss": 1.8945, "step": 16918 }, { "epoch": 0.4540306998711893, "grad_norm": 0.3203125, "learning_rate": 0.0011420015105515293, "loss": 1.8635, "step": 16919 }, { "epoch": 0.4540575354229283, "grad_norm": 0.32421875, "learning_rate": 0.0011419894559657885, "loss": 1.847, "step": 16920 }, { "epoch": 0.45408437097466725, "grad_norm": 0.314453125, "learning_rate": 0.001141977400191082, "loss": 1.8549, "step": 16921 }, { "epoch": 0.4541112065264062, "grad_norm": 0.3125, "learning_rate": 0.0011419653432274367, "loss": 1.7988, "step": 16922 }, { "epoch": 0.4541380420781451, "grad_norm": 0.322265625, "learning_rate": 0.0011419532850748787, "loss": 1.8689, "step": 16923 }, { "epoch": 0.45416487762988406, "grad_norm": 0.3203125, "learning_rate": 0.0011419412257334348, "loss": 1.8606, "step": 16924 }, { "epoch": 0.454191713181623, "grad_norm": 0.3046875, "learning_rate": 0.0011419291652031312, "loss": 1.7278, "step": 16925 }, { "epoch": 0.454218548733362, "grad_norm": 0.32421875, "learning_rate": 0.0011419171034839947, "loss": 1.9061, "step": 16926 }, { "epoch": 0.4542453842851009, "grad_norm": 0.326171875, "learning_rate": 0.0011419050405760513, "loss": 1.9612, "step": 16927 }, { "epoch": 0.45427221983683985, "grad_norm": 0.318359375, "learning_rate": 0.0011418929764793276, "loss": 1.9191, "step": 16928 }, { "epoch": 0.4542990553885788, "grad_norm": 0.31640625, "learning_rate": 0.0011418809111938503, "loss": 1.8346, "step": 16929 }, { "epoch": 0.4543258909403177, "grad_norm": 0.318359375, "learning_rate": 0.0011418688447196455, "loss": 1.8795, "step": 16930 }, { "epoch": 0.45435272649205666, "grad_norm": 0.3125, "learning_rate": 0.0011418567770567403, "loss": 1.8048, "step": 16931 }, { "epoch": 0.4543795620437956, "grad_norm": 0.326171875, "learning_rate": 0.0011418447082051602, "loss": 1.844, "step": 16932 }, { "epoch": 0.4544063975955346, "grad_norm": 0.318359375, "learning_rate": 0.0011418326381649325, "loss": 1.8539, "step": 16933 }, { "epoch": 0.4544332331472735, "grad_norm": 0.333984375, "learning_rate": 0.0011418205669360834, "loss": 1.9725, "step": 16934 }, { "epoch": 0.45446006869901245, "grad_norm": 0.333984375, "learning_rate": 0.0011418084945186394, "loss": 1.956, "step": 16935 }, { "epoch": 0.4544869042507514, "grad_norm": 0.30859375, "learning_rate": 0.001141796420912627, "loss": 1.7963, "step": 16936 }, { "epoch": 0.4545137398024903, "grad_norm": 0.318359375, "learning_rate": 0.0011417843461180726, "loss": 1.9241, "step": 16937 }, { "epoch": 0.45454057535422926, "grad_norm": 0.3125, "learning_rate": 0.0011417722701350026, "loss": 1.8298, "step": 16938 }, { "epoch": 0.45456741090596825, "grad_norm": 0.31640625, "learning_rate": 0.0011417601929634437, "loss": 1.8217, "step": 16939 }, { "epoch": 0.4545942464577072, "grad_norm": 0.30859375, "learning_rate": 0.0011417481146034226, "loss": 1.8278, "step": 16940 }, { "epoch": 0.4546210820094461, "grad_norm": 0.3125, "learning_rate": 0.0011417360350549652, "loss": 1.8382, "step": 16941 }, { "epoch": 0.45464791756118506, "grad_norm": 0.310546875, "learning_rate": 0.0011417239543180986, "loss": 1.789, "step": 16942 }, { "epoch": 0.454674753112924, "grad_norm": 0.314453125, "learning_rate": 0.0011417118723928486, "loss": 1.8598, "step": 16943 }, { "epoch": 0.4547015886646629, "grad_norm": 0.326171875, "learning_rate": 0.0011416997892792424, "loss": 1.8891, "step": 16944 }, { "epoch": 0.4547284242164019, "grad_norm": 0.31640625, "learning_rate": 0.0011416877049773063, "loss": 1.8349, "step": 16945 }, { "epoch": 0.45475525976814085, "grad_norm": 0.330078125, "learning_rate": 0.0011416756194870667, "loss": 1.9355, "step": 16946 }, { "epoch": 0.4547820953198798, "grad_norm": 0.3203125, "learning_rate": 0.0011416635328085501, "loss": 1.8198, "step": 16947 }, { "epoch": 0.4548089308716187, "grad_norm": 0.3203125, "learning_rate": 0.001141651444941783, "loss": 1.8078, "step": 16948 }, { "epoch": 0.45483576642335766, "grad_norm": 0.330078125, "learning_rate": 0.001141639355886792, "loss": 1.8931, "step": 16949 }, { "epoch": 0.4548626019750966, "grad_norm": 0.32421875, "learning_rate": 0.0011416272656436036, "loss": 1.8948, "step": 16950 }, { "epoch": 0.4548894375268355, "grad_norm": 0.318359375, "learning_rate": 0.0011416151742122446, "loss": 1.8281, "step": 16951 }, { "epoch": 0.4549162730785745, "grad_norm": 0.318359375, "learning_rate": 0.001141603081592741, "loss": 1.8307, "step": 16952 }, { "epoch": 0.45494310863031345, "grad_norm": 0.3203125, "learning_rate": 0.0011415909877851194, "loss": 1.8885, "step": 16953 }, { "epoch": 0.4549699441820524, "grad_norm": 0.3359375, "learning_rate": 0.001141578892789407, "loss": 1.9063, "step": 16954 }, { "epoch": 0.4549967797337913, "grad_norm": 0.326171875, "learning_rate": 0.0011415667966056294, "loss": 1.8818, "step": 16955 }, { "epoch": 0.45502361528553026, "grad_norm": 0.328125, "learning_rate": 0.0011415546992338138, "loss": 1.878, "step": 16956 }, { "epoch": 0.4550504508372692, "grad_norm": 0.328125, "learning_rate": 0.0011415426006739864, "loss": 1.9576, "step": 16957 }, { "epoch": 0.4550772863890082, "grad_norm": 0.318359375, "learning_rate": 0.001141530500926174, "loss": 1.74, "step": 16958 }, { "epoch": 0.4551041219407471, "grad_norm": 0.32421875, "learning_rate": 0.0011415183999904028, "loss": 1.9543, "step": 16959 }, { "epoch": 0.45513095749248605, "grad_norm": 0.326171875, "learning_rate": 0.0011415062978666999, "loss": 1.8784, "step": 16960 }, { "epoch": 0.455157793044225, "grad_norm": 0.33203125, "learning_rate": 0.0011414941945550913, "loss": 1.9942, "step": 16961 }, { "epoch": 0.4551846285959639, "grad_norm": 0.333984375, "learning_rate": 0.0011414820900556037, "loss": 1.8417, "step": 16962 }, { "epoch": 0.45521146414770286, "grad_norm": 0.3359375, "learning_rate": 0.0011414699843682638, "loss": 1.8613, "step": 16963 }, { "epoch": 0.4552382996994418, "grad_norm": 0.33203125, "learning_rate": 0.001141457877493098, "loss": 1.811, "step": 16964 }, { "epoch": 0.4552651352511808, "grad_norm": 0.314453125, "learning_rate": 0.0011414457694301327, "loss": 1.7992, "step": 16965 }, { "epoch": 0.4552919708029197, "grad_norm": 0.33203125, "learning_rate": 0.001141433660179395, "loss": 1.8608, "step": 16966 }, { "epoch": 0.45531880635465866, "grad_norm": 0.330078125, "learning_rate": 0.0011414215497409109, "loss": 1.8198, "step": 16967 }, { "epoch": 0.4553456419063976, "grad_norm": 0.337890625, "learning_rate": 0.0011414094381147074, "loss": 1.943, "step": 16968 }, { "epoch": 0.4553724774581365, "grad_norm": 0.330078125, "learning_rate": 0.0011413973253008108, "loss": 1.9112, "step": 16969 }, { "epoch": 0.45539931300987546, "grad_norm": 0.326171875, "learning_rate": 0.0011413852112992476, "loss": 1.8264, "step": 16970 }, { "epoch": 0.45542614856161445, "grad_norm": 0.328125, "learning_rate": 0.0011413730961100448, "loss": 1.8438, "step": 16971 }, { "epoch": 0.4554529841133534, "grad_norm": 0.326171875, "learning_rate": 0.0011413609797332287, "loss": 1.8626, "step": 16972 }, { "epoch": 0.4554798196650923, "grad_norm": 0.333984375, "learning_rate": 0.0011413488621688256, "loss": 1.9003, "step": 16973 }, { "epoch": 0.45550665521683126, "grad_norm": 0.322265625, "learning_rate": 0.0011413367434168625, "loss": 1.8561, "step": 16974 }, { "epoch": 0.4555334907685702, "grad_norm": 0.3203125, "learning_rate": 0.001141324623477366, "loss": 1.8207, "step": 16975 }, { "epoch": 0.45556032632030913, "grad_norm": 0.322265625, "learning_rate": 0.0011413125023503623, "loss": 1.8197, "step": 16976 }, { "epoch": 0.45558716187204806, "grad_norm": 0.326171875, "learning_rate": 0.0011413003800358782, "loss": 1.8449, "step": 16977 }, { "epoch": 0.45561399742378705, "grad_norm": 0.337890625, "learning_rate": 0.0011412882565339406, "loss": 1.9465, "step": 16978 }, { "epoch": 0.455640832975526, "grad_norm": 0.330078125, "learning_rate": 0.0011412761318445757, "loss": 1.8778, "step": 16979 }, { "epoch": 0.4556676685272649, "grad_norm": 0.322265625, "learning_rate": 0.0011412640059678101, "loss": 1.7783, "step": 16980 }, { "epoch": 0.45569450407900386, "grad_norm": 0.32421875, "learning_rate": 0.0011412518789036708, "loss": 1.8103, "step": 16981 }, { "epoch": 0.4557213396307428, "grad_norm": 0.318359375, "learning_rate": 0.0011412397506521837, "loss": 1.7647, "step": 16982 }, { "epoch": 0.45574817518248173, "grad_norm": 0.419921875, "learning_rate": 0.0011412276212133762, "loss": 2.2409, "step": 16983 }, { "epoch": 0.4557750107342207, "grad_norm": 0.3984375, "learning_rate": 0.0011412154905872743, "loss": 2.1841, "step": 16984 }, { "epoch": 0.45580184628595966, "grad_norm": 0.357421875, "learning_rate": 0.0011412033587739048, "loss": 2.0929, "step": 16985 }, { "epoch": 0.4558286818376986, "grad_norm": 0.35546875, "learning_rate": 0.0011411912257732945, "loss": 2.1187, "step": 16986 }, { "epoch": 0.4558555173894375, "grad_norm": 0.337890625, "learning_rate": 0.0011411790915854699, "loss": 2.0789, "step": 16987 }, { "epoch": 0.45588235294117646, "grad_norm": 0.34375, "learning_rate": 0.0011411669562104575, "loss": 2.1239, "step": 16988 }, { "epoch": 0.4559091884929154, "grad_norm": 0.333984375, "learning_rate": 0.001141154819648284, "loss": 2.1244, "step": 16989 }, { "epoch": 0.45593602404465433, "grad_norm": 0.32421875, "learning_rate": 0.001141142681898976, "loss": 2.0855, "step": 16990 }, { "epoch": 0.4559628595963933, "grad_norm": 0.326171875, "learning_rate": 0.0011411305429625603, "loss": 2.1145, "step": 16991 }, { "epoch": 0.45598969514813226, "grad_norm": 0.337890625, "learning_rate": 0.0011411184028390633, "loss": 2.1139, "step": 16992 }, { "epoch": 0.4560165306998712, "grad_norm": 0.337890625, "learning_rate": 0.0011411062615285118, "loss": 2.1649, "step": 16993 }, { "epoch": 0.4560433662516101, "grad_norm": 0.326171875, "learning_rate": 0.0011410941190309323, "loss": 2.0812, "step": 16994 }, { "epoch": 0.45607020180334906, "grad_norm": 0.322265625, "learning_rate": 0.0011410819753463516, "loss": 2.0517, "step": 16995 }, { "epoch": 0.456097037355088, "grad_norm": 0.330078125, "learning_rate": 0.0011410698304747961, "loss": 2.1372, "step": 16996 }, { "epoch": 0.456123872906827, "grad_norm": 0.328125, "learning_rate": 0.0011410576844162927, "loss": 2.1381, "step": 16997 }, { "epoch": 0.4561507084585659, "grad_norm": 0.3203125, "learning_rate": 0.0011410455371708677, "loss": 2.0809, "step": 16998 }, { "epoch": 0.45617754401030486, "grad_norm": 0.33203125, "learning_rate": 0.0011410333887385481, "loss": 2.1438, "step": 16999 }, { "epoch": 0.4562043795620438, "grad_norm": 0.3203125, "learning_rate": 0.0011410212391193605, "loss": 2.0339, "step": 17000 }, { "epoch": 0.45623121511378273, "grad_norm": 0.34375, "learning_rate": 0.0011410090883133314, "loss": 2.1747, "step": 17001 }, { "epoch": 0.45625805066552166, "grad_norm": 0.314453125, "learning_rate": 0.0011409969363204875, "loss": 2.0322, "step": 17002 }, { "epoch": 0.45628488621726065, "grad_norm": 0.3125, "learning_rate": 0.0011409847831408556, "loss": 1.9282, "step": 17003 }, { "epoch": 0.4563117217689996, "grad_norm": 0.3359375, "learning_rate": 0.001140972628774462, "loss": 2.0746, "step": 17004 }, { "epoch": 0.4563385573207385, "grad_norm": 0.330078125, "learning_rate": 0.0011409604732213338, "loss": 2.1561, "step": 17005 }, { "epoch": 0.45636539287247746, "grad_norm": 0.3203125, "learning_rate": 0.0011409483164814974, "loss": 2.0945, "step": 17006 }, { "epoch": 0.4563922284242164, "grad_norm": 0.31640625, "learning_rate": 0.0011409361585549796, "loss": 2.0099, "step": 17007 }, { "epoch": 0.45641906397595533, "grad_norm": 0.32421875, "learning_rate": 0.0011409239994418072, "loss": 2.1033, "step": 17008 }, { "epoch": 0.45644589952769427, "grad_norm": 0.32421875, "learning_rate": 0.0011409118391420064, "loss": 2.0872, "step": 17009 }, { "epoch": 0.45647273507943326, "grad_norm": 0.328125, "learning_rate": 0.0011408996776556043, "loss": 2.0928, "step": 17010 }, { "epoch": 0.4564995706311722, "grad_norm": 0.31640625, "learning_rate": 0.0011408875149826272, "loss": 2.0465, "step": 17011 }, { "epoch": 0.4565264061829111, "grad_norm": 0.314453125, "learning_rate": 0.0011408753511231021, "loss": 2.023, "step": 17012 }, { "epoch": 0.45655324173465006, "grad_norm": 0.322265625, "learning_rate": 0.0011408631860770559, "loss": 2.1151, "step": 17013 }, { "epoch": 0.456580077286389, "grad_norm": 0.31640625, "learning_rate": 0.0011408510198445147, "loss": 2.0192, "step": 17014 }, { "epoch": 0.45660691283812793, "grad_norm": 0.322265625, "learning_rate": 0.0011408388524255058, "loss": 2.0533, "step": 17015 }, { "epoch": 0.4566337483898669, "grad_norm": 0.314453125, "learning_rate": 0.0011408266838200552, "loss": 1.9733, "step": 17016 }, { "epoch": 0.45666058394160586, "grad_norm": 0.326171875, "learning_rate": 0.0011408145140281902, "loss": 2.0521, "step": 17017 }, { "epoch": 0.4566874194933448, "grad_norm": 0.326171875, "learning_rate": 0.0011408023430499372, "loss": 2.076, "step": 17018 }, { "epoch": 0.45671425504508373, "grad_norm": 0.322265625, "learning_rate": 0.0011407901708853229, "loss": 2.0216, "step": 17019 }, { "epoch": 0.45674109059682266, "grad_norm": 0.3125, "learning_rate": 0.0011407779975343743, "loss": 1.9442, "step": 17020 }, { "epoch": 0.4567679261485616, "grad_norm": 0.3203125, "learning_rate": 0.0011407658229971177, "loss": 1.9671, "step": 17021 }, { "epoch": 0.45679476170030053, "grad_norm": 0.32421875, "learning_rate": 0.00114075364727358, "loss": 1.9939, "step": 17022 }, { "epoch": 0.4568215972520395, "grad_norm": 0.328125, "learning_rate": 0.0011407414703637879, "loss": 2.0733, "step": 17023 }, { "epoch": 0.45684843280377846, "grad_norm": 0.330078125, "learning_rate": 0.001140729292267768, "loss": 2.1267, "step": 17024 }, { "epoch": 0.4568752683555174, "grad_norm": 0.3125, "learning_rate": 0.0011407171129855472, "loss": 2.049, "step": 17025 }, { "epoch": 0.45690210390725633, "grad_norm": 0.31640625, "learning_rate": 0.0011407049325171522, "loss": 2.053, "step": 17026 }, { "epoch": 0.45692893945899526, "grad_norm": 0.322265625, "learning_rate": 0.0011406927508626095, "loss": 2.0368, "step": 17027 }, { "epoch": 0.4569557750107342, "grad_norm": 0.3125, "learning_rate": 0.0011406805680219461, "loss": 1.9652, "step": 17028 }, { "epoch": 0.4569826105624732, "grad_norm": 0.318359375, "learning_rate": 0.0011406683839951886, "loss": 1.934, "step": 17029 }, { "epoch": 0.4570094461142121, "grad_norm": 0.3125, "learning_rate": 0.0011406561987823637, "loss": 1.9503, "step": 17030 }, { "epoch": 0.45703628166595106, "grad_norm": 0.314453125, "learning_rate": 0.0011406440123834982, "loss": 2.0233, "step": 17031 }, { "epoch": 0.45706311721769, "grad_norm": 0.326171875, "learning_rate": 0.0011406318247986188, "loss": 1.9766, "step": 17032 }, { "epoch": 0.45708995276942893, "grad_norm": 0.314453125, "learning_rate": 0.001140619636027752, "loss": 1.949, "step": 17033 }, { "epoch": 0.45711678832116787, "grad_norm": 0.32421875, "learning_rate": 0.0011406074460709253, "loss": 2.1338, "step": 17034 }, { "epoch": 0.4571436238729068, "grad_norm": 0.318359375, "learning_rate": 0.0011405952549281644, "loss": 1.9931, "step": 17035 }, { "epoch": 0.4571704594246458, "grad_norm": 0.31640625, "learning_rate": 0.001140583062599497, "loss": 2.0125, "step": 17036 }, { "epoch": 0.4571972949763847, "grad_norm": 0.31640625, "learning_rate": 0.001140570869084949, "loss": 1.9945, "step": 17037 }, { "epoch": 0.45722413052812366, "grad_norm": 0.318359375, "learning_rate": 0.0011405586743845476, "loss": 1.9814, "step": 17038 }, { "epoch": 0.4572509660798626, "grad_norm": 0.3125, "learning_rate": 0.0011405464784983196, "loss": 2.0307, "step": 17039 }, { "epoch": 0.45727780163160153, "grad_norm": 0.30859375, "learning_rate": 0.0011405342814262917, "loss": 1.9493, "step": 17040 }, { "epoch": 0.45730463718334047, "grad_norm": 0.3125, "learning_rate": 0.0011405220831684905, "loss": 2.0044, "step": 17041 }, { "epoch": 0.45733147273507946, "grad_norm": 0.314453125, "learning_rate": 0.0011405098837249429, "loss": 1.9497, "step": 17042 }, { "epoch": 0.4573583082868184, "grad_norm": 0.3125, "learning_rate": 0.0011404976830956758, "loss": 1.9293, "step": 17043 }, { "epoch": 0.45738514383855733, "grad_norm": 0.314453125, "learning_rate": 0.0011404854812807157, "loss": 1.9631, "step": 17044 }, { "epoch": 0.45741197939029626, "grad_norm": 0.302734375, "learning_rate": 0.0011404732782800894, "loss": 1.9377, "step": 17045 }, { "epoch": 0.4574388149420352, "grad_norm": 0.31640625, "learning_rate": 0.001140461074093824, "loss": 2.0247, "step": 17046 }, { "epoch": 0.45746565049377413, "grad_norm": 0.330078125, "learning_rate": 0.0011404488687219458, "loss": 2.0471, "step": 17047 }, { "epoch": 0.45749248604551307, "grad_norm": 0.306640625, "learning_rate": 0.0011404366621644818, "loss": 1.9586, "step": 17048 }, { "epoch": 0.45751932159725206, "grad_norm": 0.32421875, "learning_rate": 0.0011404244544214585, "loss": 1.9675, "step": 17049 }, { "epoch": 0.457546157148991, "grad_norm": 0.318359375, "learning_rate": 0.0011404122454929033, "loss": 1.9855, "step": 17050 }, { "epoch": 0.45757299270072993, "grad_norm": 0.314453125, "learning_rate": 0.0011404000353788424, "loss": 1.9974, "step": 17051 }, { "epoch": 0.45759982825246887, "grad_norm": 0.318359375, "learning_rate": 0.001140387824079303, "loss": 2.0982, "step": 17052 }, { "epoch": 0.4576266638042078, "grad_norm": 0.318359375, "learning_rate": 0.0011403756115943115, "loss": 1.9567, "step": 17053 }, { "epoch": 0.45765349935594674, "grad_norm": 0.31640625, "learning_rate": 0.0011403633979238951, "loss": 1.9656, "step": 17054 }, { "epoch": 0.4576803349076857, "grad_norm": 0.310546875, "learning_rate": 0.0011403511830680804, "loss": 1.9335, "step": 17055 }, { "epoch": 0.45770717045942466, "grad_norm": 0.318359375, "learning_rate": 0.001140338967026894, "loss": 1.9572, "step": 17056 }, { "epoch": 0.4577340060111636, "grad_norm": 0.3125, "learning_rate": 0.001140326749800363, "loss": 1.9435, "step": 17057 }, { "epoch": 0.45776084156290253, "grad_norm": 0.322265625, "learning_rate": 0.0011403145313885138, "loss": 1.9429, "step": 17058 }, { "epoch": 0.45778767711464147, "grad_norm": 0.3125, "learning_rate": 0.0011403023117913737, "loss": 1.9209, "step": 17059 }, { "epoch": 0.4578145126663804, "grad_norm": 0.318359375, "learning_rate": 0.0011402900910089694, "loss": 1.9658, "step": 17060 }, { "epoch": 0.45784134821811934, "grad_norm": 0.3125, "learning_rate": 0.0011402778690413275, "loss": 1.8982, "step": 17061 }, { "epoch": 0.45786818376985833, "grad_norm": 0.322265625, "learning_rate": 0.0011402656458884748, "loss": 1.9971, "step": 17062 }, { "epoch": 0.45789501932159726, "grad_norm": 0.322265625, "learning_rate": 0.0011402534215504384, "loss": 1.9907, "step": 17063 }, { "epoch": 0.4579218548733362, "grad_norm": 0.322265625, "learning_rate": 0.0011402411960272447, "loss": 2.0253, "step": 17064 }, { "epoch": 0.45794869042507513, "grad_norm": 0.314453125, "learning_rate": 0.0011402289693189212, "loss": 1.9103, "step": 17065 }, { "epoch": 0.45797552597681407, "grad_norm": 0.333984375, "learning_rate": 0.0011402167414254938, "loss": 2.0856, "step": 17066 }, { "epoch": 0.458002361528553, "grad_norm": 0.30859375, "learning_rate": 0.00114020451234699, "loss": 1.8668, "step": 17067 }, { "epoch": 0.458029197080292, "grad_norm": 0.328125, "learning_rate": 0.0011401922820834364, "loss": 2.0106, "step": 17068 }, { "epoch": 0.45805603263203093, "grad_norm": 0.3203125, "learning_rate": 0.00114018005063486, "loss": 1.9587, "step": 17069 }, { "epoch": 0.45808286818376986, "grad_norm": 0.310546875, "learning_rate": 0.0011401678180012872, "loss": 1.909, "step": 17070 }, { "epoch": 0.4581097037355088, "grad_norm": 0.326171875, "learning_rate": 0.0011401555841827455, "loss": 1.8825, "step": 17071 }, { "epoch": 0.45813653928724773, "grad_norm": 0.318359375, "learning_rate": 0.0011401433491792612, "loss": 1.8909, "step": 17072 }, { "epoch": 0.45816337483898667, "grad_norm": 0.32421875, "learning_rate": 0.0011401311129908612, "loss": 1.9661, "step": 17073 }, { "epoch": 0.45819021039072566, "grad_norm": 0.318359375, "learning_rate": 0.0011401188756175725, "loss": 1.9133, "step": 17074 }, { "epoch": 0.4582170459424646, "grad_norm": 0.318359375, "learning_rate": 0.0011401066370594218, "loss": 1.9247, "step": 17075 }, { "epoch": 0.45824388149420353, "grad_norm": 0.3203125, "learning_rate": 0.0011400943973164362, "loss": 1.9531, "step": 17076 }, { "epoch": 0.45827071704594247, "grad_norm": 0.3203125, "learning_rate": 0.0011400821563886424, "loss": 1.9302, "step": 17077 }, { "epoch": 0.4582975525976814, "grad_norm": 0.306640625, "learning_rate": 0.0011400699142760672, "loss": 1.8328, "step": 17078 }, { "epoch": 0.45832438814942034, "grad_norm": 0.314453125, "learning_rate": 0.0011400576709787375, "loss": 1.9059, "step": 17079 }, { "epoch": 0.45835122370115927, "grad_norm": 0.32421875, "learning_rate": 0.00114004542649668, "loss": 1.9527, "step": 17080 }, { "epoch": 0.45837805925289826, "grad_norm": 0.3203125, "learning_rate": 0.001140033180829922, "loss": 1.8215, "step": 17081 }, { "epoch": 0.4584048948046372, "grad_norm": 0.314453125, "learning_rate": 0.00114002093397849, "loss": 1.863, "step": 17082 }, { "epoch": 0.45843173035637613, "grad_norm": 0.33984375, "learning_rate": 0.0011400086859424108, "loss": 1.991, "step": 17083 }, { "epoch": 0.45845856590811507, "grad_norm": 0.328125, "learning_rate": 0.0011399964367217114, "loss": 1.9757, "step": 17084 }, { "epoch": 0.458485401459854, "grad_norm": 0.3125, "learning_rate": 0.0011399841863164188, "loss": 1.8701, "step": 17085 }, { "epoch": 0.45851223701159294, "grad_norm": 0.31640625, "learning_rate": 0.0011399719347265599, "loss": 1.9115, "step": 17086 }, { "epoch": 0.45853907256333193, "grad_norm": 0.314453125, "learning_rate": 0.0011399596819521613, "loss": 1.9243, "step": 17087 }, { "epoch": 0.45856590811507086, "grad_norm": 0.3125, "learning_rate": 0.0011399474279932499, "loss": 1.8616, "step": 17088 }, { "epoch": 0.4585927436668098, "grad_norm": 0.322265625, "learning_rate": 0.0011399351728498528, "loss": 1.962, "step": 17089 }, { "epoch": 0.45861957921854873, "grad_norm": 0.310546875, "learning_rate": 0.0011399229165219968, "loss": 1.8498, "step": 17090 }, { "epoch": 0.45864641477028767, "grad_norm": 0.330078125, "learning_rate": 0.0011399106590097086, "loss": 1.9454, "step": 17091 }, { "epoch": 0.4586732503220266, "grad_norm": 0.3125, "learning_rate": 0.0011398984003130155, "loss": 1.8259, "step": 17092 }, { "epoch": 0.45870008587376554, "grad_norm": 0.333984375, "learning_rate": 0.0011398861404319442, "loss": 1.9593, "step": 17093 }, { "epoch": 0.45872692142550453, "grad_norm": 0.326171875, "learning_rate": 0.0011398738793665215, "loss": 1.9781, "step": 17094 }, { "epoch": 0.45875375697724347, "grad_norm": 0.328125, "learning_rate": 0.0011398616171167742, "loss": 1.9664, "step": 17095 }, { "epoch": 0.4587805925289824, "grad_norm": 0.32421875, "learning_rate": 0.0011398493536827292, "loss": 1.8794, "step": 17096 }, { "epoch": 0.45880742808072134, "grad_norm": 0.322265625, "learning_rate": 0.001139837089064414, "loss": 1.9551, "step": 17097 }, { "epoch": 0.45883426363246027, "grad_norm": 0.328125, "learning_rate": 0.0011398248232618547, "loss": 1.9262, "step": 17098 }, { "epoch": 0.4588610991841992, "grad_norm": 0.33203125, "learning_rate": 0.0011398125562750785, "loss": 2.0087, "step": 17099 }, { "epoch": 0.4588879347359382, "grad_norm": 0.3203125, "learning_rate": 0.0011398002881041127, "loss": 1.9177, "step": 17100 }, { "epoch": 0.45891477028767713, "grad_norm": 0.322265625, "learning_rate": 0.0011397880187489836, "loss": 1.9602, "step": 17101 }, { "epoch": 0.45894160583941607, "grad_norm": 0.32421875, "learning_rate": 0.0011397757482097186, "loss": 1.8932, "step": 17102 }, { "epoch": 0.458968441391155, "grad_norm": 0.326171875, "learning_rate": 0.001139763476486344, "loss": 1.8887, "step": 17103 }, { "epoch": 0.45899527694289394, "grad_norm": 0.32421875, "learning_rate": 0.0011397512035788875, "loss": 1.8612, "step": 17104 }, { "epoch": 0.4590221124946329, "grad_norm": 0.322265625, "learning_rate": 0.0011397389294873757, "loss": 1.916, "step": 17105 }, { "epoch": 0.4590489480463718, "grad_norm": 0.32421875, "learning_rate": 0.0011397266542118353, "loss": 1.9593, "step": 17106 }, { "epoch": 0.4590757835981108, "grad_norm": 0.32421875, "learning_rate": 0.0011397143777522936, "loss": 1.9159, "step": 17107 }, { "epoch": 0.45910261914984973, "grad_norm": 0.337890625, "learning_rate": 0.001139702100108777, "loss": 2.0069, "step": 17108 }, { "epoch": 0.45912945470158867, "grad_norm": 0.3203125, "learning_rate": 0.001139689821281313, "loss": 1.8834, "step": 17109 }, { "epoch": 0.4591562902533276, "grad_norm": 0.326171875, "learning_rate": 0.0011396775412699284, "loss": 1.8799, "step": 17110 }, { "epoch": 0.45918312580506654, "grad_norm": 0.31640625, "learning_rate": 0.00113966526007465, "loss": 1.852, "step": 17111 }, { "epoch": 0.4592099613568055, "grad_norm": 0.333984375, "learning_rate": 0.0011396529776955047, "loss": 1.9475, "step": 17112 }, { "epoch": 0.45923679690854446, "grad_norm": 0.328125, "learning_rate": 0.0011396406941325196, "loss": 1.9545, "step": 17113 }, { "epoch": 0.4592636324602834, "grad_norm": 0.314453125, "learning_rate": 0.0011396284093857217, "loss": 1.8821, "step": 17114 }, { "epoch": 0.45929046801202233, "grad_norm": 0.3359375, "learning_rate": 0.0011396161234551376, "loss": 1.9126, "step": 17115 }, { "epoch": 0.45931730356376127, "grad_norm": 0.330078125, "learning_rate": 0.0011396038363407945, "loss": 1.8877, "step": 17116 }, { "epoch": 0.4593441391155002, "grad_norm": 0.328125, "learning_rate": 0.0011395915480427194, "loss": 1.9025, "step": 17117 }, { "epoch": 0.45937097466723914, "grad_norm": 0.322265625, "learning_rate": 0.0011395792585609393, "loss": 1.8569, "step": 17118 }, { "epoch": 0.4593978102189781, "grad_norm": 0.330078125, "learning_rate": 0.0011395669678954808, "loss": 1.8478, "step": 17119 }, { "epoch": 0.45942464577071707, "grad_norm": 0.33203125, "learning_rate": 0.0011395546760463713, "loss": 1.8799, "step": 17120 }, { "epoch": 0.459451481322456, "grad_norm": 0.33203125, "learning_rate": 0.0011395423830136376, "loss": 1.982, "step": 17121 }, { "epoch": 0.45947831687419494, "grad_norm": 0.328125, "learning_rate": 0.0011395300887973067, "loss": 1.9088, "step": 17122 }, { "epoch": 0.45950515242593387, "grad_norm": 0.337890625, "learning_rate": 0.0011395177933974053, "loss": 1.9278, "step": 17123 }, { "epoch": 0.4595319879776728, "grad_norm": 0.3203125, "learning_rate": 0.001139505496813961, "loss": 1.8712, "step": 17124 }, { "epoch": 0.45955882352941174, "grad_norm": 0.328125, "learning_rate": 0.001139493199047, "loss": 1.8124, "step": 17125 }, { "epoch": 0.45958565908115073, "grad_norm": 0.33203125, "learning_rate": 0.0011394809000965496, "loss": 1.8991, "step": 17126 }, { "epoch": 0.45961249463288967, "grad_norm": 0.3203125, "learning_rate": 0.001139468599962637, "loss": 1.8402, "step": 17127 }, { "epoch": 0.4596393301846286, "grad_norm": 0.330078125, "learning_rate": 0.001139456298645289, "loss": 1.9412, "step": 17128 }, { "epoch": 0.45966616573636754, "grad_norm": 0.3359375, "learning_rate": 0.0011394439961445325, "loss": 1.9823, "step": 17129 }, { "epoch": 0.4596930012881065, "grad_norm": 0.328125, "learning_rate": 0.0011394316924603946, "loss": 1.8527, "step": 17130 }, { "epoch": 0.4597198368398454, "grad_norm": 0.322265625, "learning_rate": 0.0011394193875929022, "loss": 1.841, "step": 17131 }, { "epoch": 0.45974667239158434, "grad_norm": 0.330078125, "learning_rate": 0.0011394070815420826, "loss": 1.9594, "step": 17132 }, { "epoch": 0.45977350794332333, "grad_norm": 0.30859375, "learning_rate": 0.0011393947743079623, "loss": 1.772, "step": 17133 }, { "epoch": 0.45980034349506227, "grad_norm": 0.3203125, "learning_rate": 0.0011393824658905685, "loss": 1.9066, "step": 17134 }, { "epoch": 0.4598271790468012, "grad_norm": 0.326171875, "learning_rate": 0.0011393701562899285, "loss": 1.9708, "step": 17135 }, { "epoch": 0.45985401459854014, "grad_norm": 0.337890625, "learning_rate": 0.0011393578455060692, "loss": 1.9033, "step": 17136 }, { "epoch": 0.4598808501502791, "grad_norm": 0.333984375, "learning_rate": 0.0011393455335390172, "loss": 1.9042, "step": 17137 }, { "epoch": 0.459907685702018, "grad_norm": 0.32421875, "learning_rate": 0.0011393332203887998, "loss": 1.871, "step": 17138 }, { "epoch": 0.459934521253757, "grad_norm": 0.326171875, "learning_rate": 0.0011393209060554439, "loss": 1.9497, "step": 17139 }, { "epoch": 0.45996135680549594, "grad_norm": 0.337890625, "learning_rate": 0.0011393085905389768, "loss": 2.0367, "step": 17140 }, { "epoch": 0.45998819235723487, "grad_norm": 0.314453125, "learning_rate": 0.0011392962738394252, "loss": 1.8134, "step": 17141 }, { "epoch": 0.4600150279089738, "grad_norm": 0.333984375, "learning_rate": 0.0011392839559568164, "loss": 1.9153, "step": 17142 }, { "epoch": 0.46004186346071274, "grad_norm": 0.326171875, "learning_rate": 0.001139271636891177, "loss": 1.9311, "step": 17143 }, { "epoch": 0.4600686990124517, "grad_norm": 0.31640625, "learning_rate": 0.0011392593166425346, "loss": 1.8845, "step": 17144 }, { "epoch": 0.46009553456419067, "grad_norm": 0.333984375, "learning_rate": 0.0011392469952109156, "loss": 1.8705, "step": 17145 }, { "epoch": 0.4601223701159296, "grad_norm": 0.322265625, "learning_rate": 0.0011392346725963476, "loss": 1.8442, "step": 17146 }, { "epoch": 0.46014920566766854, "grad_norm": 0.32421875, "learning_rate": 0.0011392223487988573, "loss": 1.7988, "step": 17147 }, { "epoch": 0.4601760412194075, "grad_norm": 0.33984375, "learning_rate": 0.0011392100238184718, "loss": 1.9342, "step": 17148 }, { "epoch": 0.4602028767711464, "grad_norm": 0.3203125, "learning_rate": 0.0011391976976552179, "loss": 1.8101, "step": 17149 }, { "epoch": 0.46022971232288534, "grad_norm": 0.333984375, "learning_rate": 0.001139185370309123, "loss": 1.856, "step": 17150 }, { "epoch": 0.4602565478746243, "grad_norm": 0.33203125, "learning_rate": 0.0011391730417802143, "loss": 1.8604, "step": 17151 }, { "epoch": 0.46028338342636327, "grad_norm": 0.3359375, "learning_rate": 0.0011391607120685183, "loss": 1.8388, "step": 17152 }, { "epoch": 0.4603102189781022, "grad_norm": 0.330078125, "learning_rate": 0.0011391483811740626, "loss": 1.8641, "step": 17153 }, { "epoch": 0.46033705452984114, "grad_norm": 0.328125, "learning_rate": 0.0011391360490968737, "loss": 1.8646, "step": 17154 }, { "epoch": 0.4603638900815801, "grad_norm": 0.337890625, "learning_rate": 0.001139123715836979, "loss": 1.8789, "step": 17155 }, { "epoch": 0.460390725633319, "grad_norm": 0.42578125, "learning_rate": 0.0011391113813944054, "loss": 2.198, "step": 17156 }, { "epoch": 0.46041756118505794, "grad_norm": 0.390625, "learning_rate": 0.0011390990457691803, "loss": 2.1992, "step": 17157 }, { "epoch": 0.46044439673679693, "grad_norm": 0.341796875, "learning_rate": 0.0011390867089613303, "loss": 2.0702, "step": 17158 }, { "epoch": 0.46047123228853587, "grad_norm": 0.345703125, "learning_rate": 0.0011390743709708828, "loss": 2.1791, "step": 17159 }, { "epoch": 0.4604980678402748, "grad_norm": 0.341796875, "learning_rate": 0.0011390620317978644, "loss": 2.1897, "step": 17160 }, { "epoch": 0.46052490339201374, "grad_norm": 0.33203125, "learning_rate": 0.0011390496914423028, "loss": 2.1045, "step": 17161 }, { "epoch": 0.4605517389437527, "grad_norm": 0.341796875, "learning_rate": 0.0011390373499042246, "loss": 2.1305, "step": 17162 }, { "epoch": 0.4605785744954916, "grad_norm": 0.333984375, "learning_rate": 0.0011390250071836572, "loss": 2.2085, "step": 17163 }, { "epoch": 0.46060541004723055, "grad_norm": 0.32421875, "learning_rate": 0.0011390126632806275, "loss": 2.0825, "step": 17164 }, { "epoch": 0.46063224559896954, "grad_norm": 0.314453125, "learning_rate": 0.0011390003181951625, "loss": 2.0803, "step": 17165 }, { "epoch": 0.46065908115070847, "grad_norm": 0.32421875, "learning_rate": 0.0011389879719272892, "loss": 2.0308, "step": 17166 }, { "epoch": 0.4606859167024474, "grad_norm": 0.33203125, "learning_rate": 0.0011389756244770354, "loss": 2.1717, "step": 17167 }, { "epoch": 0.46071275225418634, "grad_norm": 0.31640625, "learning_rate": 0.0011389632758444272, "loss": 2.0911, "step": 17168 }, { "epoch": 0.4607395878059253, "grad_norm": 0.31640625, "learning_rate": 0.0011389509260294924, "loss": 2.0874, "step": 17169 }, { "epoch": 0.4607664233576642, "grad_norm": 0.314453125, "learning_rate": 0.0011389385750322576, "loss": 2.0993, "step": 17170 }, { "epoch": 0.4607932589094032, "grad_norm": 0.318359375, "learning_rate": 0.00113892622285275, "loss": 2.1079, "step": 17171 }, { "epoch": 0.46082009446114214, "grad_norm": 0.326171875, "learning_rate": 0.0011389138694909973, "loss": 2.0916, "step": 17172 }, { "epoch": 0.4608469300128811, "grad_norm": 0.30859375, "learning_rate": 0.0011389015149470259, "loss": 1.9718, "step": 17173 }, { "epoch": 0.46087376556462, "grad_norm": 0.3203125, "learning_rate": 0.001138889159220863, "loss": 2.1318, "step": 17174 }, { "epoch": 0.46090060111635894, "grad_norm": 0.32421875, "learning_rate": 0.0011388768023125359, "loss": 2.1721, "step": 17175 }, { "epoch": 0.4609274366680979, "grad_norm": 0.302734375, "learning_rate": 0.0011388644442220717, "loss": 1.9623, "step": 17176 }, { "epoch": 0.4609542722198368, "grad_norm": 0.322265625, "learning_rate": 0.0011388520849494974, "loss": 2.1037, "step": 17177 }, { "epoch": 0.4609811077715758, "grad_norm": 0.31640625, "learning_rate": 0.0011388397244948402, "loss": 2.0959, "step": 17178 }, { "epoch": 0.46100794332331474, "grad_norm": 0.3203125, "learning_rate": 0.001138827362858127, "loss": 2.1058, "step": 17179 }, { "epoch": 0.4610347788750537, "grad_norm": 0.337890625, "learning_rate": 0.0011388150000393852, "loss": 2.2042, "step": 17180 }, { "epoch": 0.4610616144267926, "grad_norm": 0.3203125, "learning_rate": 0.001138802636038642, "loss": 2.0436, "step": 17181 }, { "epoch": 0.46108844997853154, "grad_norm": 0.32421875, "learning_rate": 0.001138790270855924, "loss": 2.0767, "step": 17182 }, { "epoch": 0.4611152855302705, "grad_norm": 0.328125, "learning_rate": 0.001138777904491259, "loss": 2.1029, "step": 17183 }, { "epoch": 0.46114212108200947, "grad_norm": 0.32421875, "learning_rate": 0.0011387655369446735, "loss": 2.0423, "step": 17184 }, { "epoch": 0.4611689566337484, "grad_norm": 0.318359375, "learning_rate": 0.0011387531682161953, "loss": 2.0336, "step": 17185 }, { "epoch": 0.46119579218548734, "grad_norm": 0.326171875, "learning_rate": 0.0011387407983058505, "loss": 2.1156, "step": 17186 }, { "epoch": 0.4612226277372263, "grad_norm": 0.32421875, "learning_rate": 0.0011387284272136673, "loss": 2.0447, "step": 17187 }, { "epoch": 0.4612494632889652, "grad_norm": 0.3125, "learning_rate": 0.0011387160549396725, "loss": 2.0216, "step": 17188 }, { "epoch": 0.46127629884070415, "grad_norm": 0.330078125, "learning_rate": 0.001138703681483893, "loss": 2.1376, "step": 17189 }, { "epoch": 0.4613031343924431, "grad_norm": 0.314453125, "learning_rate": 0.0011386913068463563, "loss": 2.0402, "step": 17190 }, { "epoch": 0.4613299699441821, "grad_norm": 0.3203125, "learning_rate": 0.0011386789310270894, "loss": 2.0619, "step": 17191 }, { "epoch": 0.461356805495921, "grad_norm": 0.3203125, "learning_rate": 0.001138666554026119, "loss": 2.158, "step": 17192 }, { "epoch": 0.46138364104765994, "grad_norm": 0.318359375, "learning_rate": 0.001138654175843473, "loss": 2.0679, "step": 17193 }, { "epoch": 0.4614104765993989, "grad_norm": 0.310546875, "learning_rate": 0.0011386417964791783, "loss": 1.9859, "step": 17194 }, { "epoch": 0.4614373121511378, "grad_norm": 0.31640625, "learning_rate": 0.0011386294159332615, "loss": 2.0469, "step": 17195 }, { "epoch": 0.46146414770287675, "grad_norm": 0.30859375, "learning_rate": 0.0011386170342057506, "loss": 2.0345, "step": 17196 }, { "epoch": 0.46149098325461574, "grad_norm": 0.32421875, "learning_rate": 0.0011386046512966723, "loss": 2.0262, "step": 17197 }, { "epoch": 0.4615178188063547, "grad_norm": 0.31640625, "learning_rate": 0.0011385922672060538, "loss": 2.0439, "step": 17198 }, { "epoch": 0.4615446543580936, "grad_norm": 0.3203125, "learning_rate": 0.0011385798819339224, "loss": 2.0498, "step": 17199 }, { "epoch": 0.46157148990983254, "grad_norm": 0.322265625, "learning_rate": 0.0011385674954803053, "loss": 2.0104, "step": 17200 }, { "epoch": 0.4615983254615715, "grad_norm": 0.31640625, "learning_rate": 0.0011385551078452294, "loss": 2.056, "step": 17201 }, { "epoch": 0.4616251610133104, "grad_norm": 0.3125, "learning_rate": 0.001138542719028722, "loss": 1.9946, "step": 17202 }, { "epoch": 0.4616519965650494, "grad_norm": 0.3203125, "learning_rate": 0.0011385303290308104, "loss": 2.0543, "step": 17203 }, { "epoch": 0.46167883211678834, "grad_norm": 0.3203125, "learning_rate": 0.0011385179378515218, "loss": 2.0752, "step": 17204 }, { "epoch": 0.4617056676685273, "grad_norm": 0.30859375, "learning_rate": 0.001138505545490883, "loss": 2.03, "step": 17205 }, { "epoch": 0.4617325032202662, "grad_norm": 0.314453125, "learning_rate": 0.0011384931519489217, "loss": 2.0257, "step": 17206 }, { "epoch": 0.46175933877200515, "grad_norm": 0.310546875, "learning_rate": 0.0011384807572256649, "loss": 1.9874, "step": 17207 }, { "epoch": 0.4617861743237441, "grad_norm": 0.3203125, "learning_rate": 0.0011384683613211396, "loss": 2.0507, "step": 17208 }, { "epoch": 0.461813009875483, "grad_norm": 0.310546875, "learning_rate": 0.0011384559642353732, "loss": 1.9862, "step": 17209 }, { "epoch": 0.461839845427222, "grad_norm": 0.328125, "learning_rate": 0.0011384435659683926, "loss": 2.0973, "step": 17210 }, { "epoch": 0.46186668097896094, "grad_norm": 0.30859375, "learning_rate": 0.0011384311665202256, "loss": 1.8895, "step": 17211 }, { "epoch": 0.4618935165306999, "grad_norm": 0.318359375, "learning_rate": 0.0011384187658908989, "loss": 2.0061, "step": 17212 }, { "epoch": 0.4619203520824388, "grad_norm": 0.3203125, "learning_rate": 0.0011384063640804396, "loss": 2.0594, "step": 17213 }, { "epoch": 0.46194718763417775, "grad_norm": 0.31640625, "learning_rate": 0.0011383939610888755, "loss": 1.9784, "step": 17214 }, { "epoch": 0.4619740231859167, "grad_norm": 0.3203125, "learning_rate": 0.0011383815569162331, "loss": 2.0648, "step": 17215 }, { "epoch": 0.4620008587376557, "grad_norm": 0.3125, "learning_rate": 0.0011383691515625403, "loss": 1.984, "step": 17216 }, { "epoch": 0.4620276942893946, "grad_norm": 0.3125, "learning_rate": 0.0011383567450278236, "loss": 1.9875, "step": 17217 }, { "epoch": 0.46205452984113354, "grad_norm": 0.31640625, "learning_rate": 0.0011383443373121109, "loss": 1.9362, "step": 17218 }, { "epoch": 0.4620813653928725, "grad_norm": 0.3125, "learning_rate": 0.001138331928415429, "loss": 2.0438, "step": 17219 }, { "epoch": 0.4621082009446114, "grad_norm": 0.3125, "learning_rate": 0.0011383195183378052, "loss": 2.0133, "step": 17220 }, { "epoch": 0.46213503649635035, "grad_norm": 0.314453125, "learning_rate": 0.001138307107079267, "loss": 1.9464, "step": 17221 }, { "epoch": 0.4621618720480893, "grad_norm": 0.30859375, "learning_rate": 0.001138294694639841, "loss": 1.9396, "step": 17222 }, { "epoch": 0.4621887075998283, "grad_norm": 0.3203125, "learning_rate": 0.0011382822810195549, "loss": 1.9512, "step": 17223 }, { "epoch": 0.4622155431515672, "grad_norm": 0.306640625, "learning_rate": 0.001138269866218436, "loss": 1.9341, "step": 17224 }, { "epoch": 0.46224237870330614, "grad_norm": 0.32421875, "learning_rate": 0.0011382574502365112, "loss": 2.0404, "step": 17225 }, { "epoch": 0.4622692142550451, "grad_norm": 0.3203125, "learning_rate": 0.001138245033073808, "loss": 2.0071, "step": 17226 }, { "epoch": 0.462296049806784, "grad_norm": 0.318359375, "learning_rate": 0.0011382326147303535, "loss": 1.9746, "step": 17227 }, { "epoch": 0.46232288535852295, "grad_norm": 0.314453125, "learning_rate": 0.0011382201952061751, "loss": 1.9887, "step": 17228 }, { "epoch": 0.46234972091026194, "grad_norm": 0.314453125, "learning_rate": 0.0011382077745013, "loss": 1.9403, "step": 17229 }, { "epoch": 0.4623765564620009, "grad_norm": 0.3125, "learning_rate": 0.0011381953526157552, "loss": 2.0244, "step": 17230 }, { "epoch": 0.4624033920137398, "grad_norm": 0.31640625, "learning_rate": 0.0011381829295495681, "loss": 1.9908, "step": 17231 }, { "epoch": 0.46243022756547875, "grad_norm": 0.330078125, "learning_rate": 0.0011381705053027662, "loss": 2.0271, "step": 17232 }, { "epoch": 0.4624570631172177, "grad_norm": 0.341796875, "learning_rate": 0.0011381580798753763, "loss": 2.0139, "step": 17233 }, { "epoch": 0.4624838986689566, "grad_norm": 0.318359375, "learning_rate": 0.0011381456532674262, "loss": 1.8882, "step": 17234 }, { "epoch": 0.46251073422069555, "grad_norm": 0.3046875, "learning_rate": 0.0011381332254789425, "loss": 1.8543, "step": 17235 }, { "epoch": 0.46253756977243454, "grad_norm": 0.322265625, "learning_rate": 0.001138120796509953, "loss": 1.9446, "step": 17236 }, { "epoch": 0.4625644053241735, "grad_norm": 0.32421875, "learning_rate": 0.0011381083663604848, "loss": 1.9282, "step": 17237 }, { "epoch": 0.4625912408759124, "grad_norm": 0.322265625, "learning_rate": 0.0011380959350305653, "loss": 1.9479, "step": 17238 }, { "epoch": 0.46261807642765135, "grad_norm": 0.330078125, "learning_rate": 0.0011380835025202214, "loss": 2.0526, "step": 17239 }, { "epoch": 0.4626449119793903, "grad_norm": 0.3203125, "learning_rate": 0.0011380710688294808, "loss": 1.9617, "step": 17240 }, { "epoch": 0.4626717475311292, "grad_norm": 0.3203125, "learning_rate": 0.0011380586339583706, "loss": 2.0215, "step": 17241 }, { "epoch": 0.4626985830828682, "grad_norm": 0.30859375, "learning_rate": 0.0011380461979069178, "loss": 1.8614, "step": 17242 }, { "epoch": 0.46272541863460714, "grad_norm": 0.322265625, "learning_rate": 0.00113803376067515, "loss": 1.9388, "step": 17243 }, { "epoch": 0.4627522541863461, "grad_norm": 0.326171875, "learning_rate": 0.0011380213222630946, "loss": 2.0232, "step": 17244 }, { "epoch": 0.462779089738085, "grad_norm": 0.322265625, "learning_rate": 0.0011380088826707785, "loss": 1.9868, "step": 17245 }, { "epoch": 0.46280592528982395, "grad_norm": 0.322265625, "learning_rate": 0.0011379964418982292, "loss": 1.9746, "step": 17246 }, { "epoch": 0.4628327608415629, "grad_norm": 0.322265625, "learning_rate": 0.0011379839999454743, "loss": 1.9658, "step": 17247 }, { "epoch": 0.4628595963933018, "grad_norm": 0.3125, "learning_rate": 0.0011379715568125405, "loss": 1.9813, "step": 17248 }, { "epoch": 0.4628864319450408, "grad_norm": 0.3203125, "learning_rate": 0.0011379591124994555, "loss": 2.0262, "step": 17249 }, { "epoch": 0.46291326749677975, "grad_norm": 0.3125, "learning_rate": 0.0011379466670062464, "loss": 1.9096, "step": 17250 }, { "epoch": 0.4629401030485187, "grad_norm": 0.318359375, "learning_rate": 0.0011379342203329406, "loss": 1.9479, "step": 17251 }, { "epoch": 0.4629669386002576, "grad_norm": 0.3203125, "learning_rate": 0.0011379217724795654, "loss": 2.0266, "step": 17252 }, { "epoch": 0.46299377415199655, "grad_norm": 0.326171875, "learning_rate": 0.0011379093234461478, "loss": 1.9836, "step": 17253 }, { "epoch": 0.4630206097037355, "grad_norm": 0.3203125, "learning_rate": 0.0011378968732327157, "loss": 2.0363, "step": 17254 }, { "epoch": 0.4630474452554745, "grad_norm": 0.3203125, "learning_rate": 0.0011378844218392962, "loss": 2.0027, "step": 17255 }, { "epoch": 0.4630742808072134, "grad_norm": 0.3125, "learning_rate": 0.0011378719692659164, "loss": 1.8934, "step": 17256 }, { "epoch": 0.46310111635895235, "grad_norm": 0.31640625, "learning_rate": 0.0011378595155126037, "loss": 1.9247, "step": 17257 }, { "epoch": 0.4631279519106913, "grad_norm": 0.3203125, "learning_rate": 0.0011378470605793855, "loss": 1.9698, "step": 17258 }, { "epoch": 0.4631547874624302, "grad_norm": 0.3203125, "learning_rate": 0.001137834604466289, "loss": 1.9388, "step": 17259 }, { "epoch": 0.46318162301416915, "grad_norm": 0.328125, "learning_rate": 0.0011378221471733418, "loss": 2.0181, "step": 17260 }, { "epoch": 0.4632084585659081, "grad_norm": 0.32421875, "learning_rate": 0.001137809688700571, "loss": 1.9738, "step": 17261 }, { "epoch": 0.4632352941176471, "grad_norm": 0.318359375, "learning_rate": 0.001137797229048004, "loss": 1.9014, "step": 17262 }, { "epoch": 0.463262129669386, "grad_norm": 0.326171875, "learning_rate": 0.001137784768215668, "loss": 1.9314, "step": 17263 }, { "epoch": 0.46328896522112495, "grad_norm": 0.333984375, "learning_rate": 0.0011377723062035905, "loss": 1.9627, "step": 17264 }, { "epoch": 0.4633158007728639, "grad_norm": 0.3203125, "learning_rate": 0.0011377598430117987, "loss": 1.9247, "step": 17265 }, { "epoch": 0.4633426363246028, "grad_norm": 0.3125, "learning_rate": 0.00113774737864032, "loss": 1.9338, "step": 17266 }, { "epoch": 0.46336947187634175, "grad_norm": 0.298828125, "learning_rate": 0.0011377349130891818, "loss": 1.7917, "step": 17267 }, { "epoch": 0.46339630742808074, "grad_norm": 0.318359375, "learning_rate": 0.0011377224463584115, "loss": 1.9197, "step": 17268 }, { "epoch": 0.4634231429798197, "grad_norm": 0.32421875, "learning_rate": 0.0011377099784480361, "loss": 2.01, "step": 17269 }, { "epoch": 0.4634499785315586, "grad_norm": 0.3203125, "learning_rate": 0.0011376975093580835, "loss": 1.9213, "step": 17270 }, { "epoch": 0.46347681408329755, "grad_norm": 0.32421875, "learning_rate": 0.0011376850390885807, "loss": 1.9901, "step": 17271 }, { "epoch": 0.4635036496350365, "grad_norm": 0.322265625, "learning_rate": 0.0011376725676395549, "loss": 1.9427, "step": 17272 }, { "epoch": 0.4635304851867754, "grad_norm": 0.337890625, "learning_rate": 0.0011376600950110337, "loss": 2.0658, "step": 17273 }, { "epoch": 0.4635573207385144, "grad_norm": 0.30859375, "learning_rate": 0.0011376476212030448, "loss": 1.8173, "step": 17274 }, { "epoch": 0.46358415629025335, "grad_norm": 0.3359375, "learning_rate": 0.0011376351462156148, "loss": 2.0176, "step": 17275 }, { "epoch": 0.4636109918419923, "grad_norm": 0.326171875, "learning_rate": 0.0011376226700487717, "loss": 1.9367, "step": 17276 }, { "epoch": 0.4636378273937312, "grad_norm": 0.3125, "learning_rate": 0.0011376101927025426, "loss": 1.9622, "step": 17277 }, { "epoch": 0.46366466294547015, "grad_norm": 0.3203125, "learning_rate": 0.0011375977141769547, "loss": 2.0066, "step": 17278 }, { "epoch": 0.4636914984972091, "grad_norm": 0.318359375, "learning_rate": 0.0011375852344720357, "loss": 1.9234, "step": 17279 }, { "epoch": 0.463718334048948, "grad_norm": 0.328125, "learning_rate": 0.001137572753587813, "loss": 2.0151, "step": 17280 }, { "epoch": 0.463745169600687, "grad_norm": 0.330078125, "learning_rate": 0.0011375602715243136, "loss": 1.9247, "step": 17281 }, { "epoch": 0.46377200515242595, "grad_norm": 0.318359375, "learning_rate": 0.0011375477882815653, "loss": 1.9388, "step": 17282 }, { "epoch": 0.4637988407041649, "grad_norm": 0.328125, "learning_rate": 0.001137535303859595, "loss": 1.8964, "step": 17283 }, { "epoch": 0.4638256762559038, "grad_norm": 0.326171875, "learning_rate": 0.0011375228182584308, "loss": 1.8951, "step": 17284 }, { "epoch": 0.46385251180764275, "grad_norm": 0.330078125, "learning_rate": 0.0011375103314780995, "loss": 1.9829, "step": 17285 }, { "epoch": 0.4638793473593817, "grad_norm": 0.31640625, "learning_rate": 0.0011374978435186285, "loss": 1.849, "step": 17286 }, { "epoch": 0.4639061829111207, "grad_norm": 0.33203125, "learning_rate": 0.0011374853543800456, "loss": 1.9772, "step": 17287 }, { "epoch": 0.4639330184628596, "grad_norm": 0.33203125, "learning_rate": 0.001137472864062378, "loss": 1.9527, "step": 17288 }, { "epoch": 0.46395985401459855, "grad_norm": 0.326171875, "learning_rate": 0.0011374603725656529, "loss": 1.9161, "step": 17289 }, { "epoch": 0.4639866895663375, "grad_norm": 0.310546875, "learning_rate": 0.0011374478798898981, "loss": 1.8908, "step": 17290 }, { "epoch": 0.4640135251180764, "grad_norm": 0.326171875, "learning_rate": 0.0011374353860351405, "loss": 1.8751, "step": 17291 }, { "epoch": 0.46404036066981535, "grad_norm": 0.333984375, "learning_rate": 0.0011374228910014078, "loss": 2.0338, "step": 17292 }, { "epoch": 0.4640671962215543, "grad_norm": 0.3046875, "learning_rate": 0.0011374103947887275, "loss": 1.8731, "step": 17293 }, { "epoch": 0.4640940317732933, "grad_norm": 0.32421875, "learning_rate": 0.001137397897397127, "loss": 1.932, "step": 17294 }, { "epoch": 0.4641208673250322, "grad_norm": 0.310546875, "learning_rate": 0.0011373853988266335, "loss": 1.8488, "step": 17295 }, { "epoch": 0.46414770287677115, "grad_norm": 0.3203125, "learning_rate": 0.0011373728990772744, "loss": 1.9206, "step": 17296 }, { "epoch": 0.4641745384285101, "grad_norm": 0.310546875, "learning_rate": 0.0011373603981490773, "loss": 1.8702, "step": 17297 }, { "epoch": 0.464201373980249, "grad_norm": 0.318359375, "learning_rate": 0.00113734789604207, "loss": 1.8587, "step": 17298 }, { "epoch": 0.46422820953198796, "grad_norm": 0.322265625, "learning_rate": 0.0011373353927562788, "loss": 1.9168, "step": 17299 }, { "epoch": 0.46425504508372695, "grad_norm": 0.318359375, "learning_rate": 0.0011373228882917323, "loss": 1.858, "step": 17300 }, { "epoch": 0.4642818806354659, "grad_norm": 0.3046875, "learning_rate": 0.0011373103826484573, "loss": 1.7722, "step": 17301 }, { "epoch": 0.4643087161872048, "grad_norm": 0.33203125, "learning_rate": 0.0011372978758264817, "loss": 1.9655, "step": 17302 }, { "epoch": 0.46433555173894375, "grad_norm": 0.3203125, "learning_rate": 0.0011372853678258323, "loss": 1.8933, "step": 17303 }, { "epoch": 0.4643623872906827, "grad_norm": 0.322265625, "learning_rate": 0.0011372728586465368, "loss": 1.8321, "step": 17304 }, { "epoch": 0.4643892228424216, "grad_norm": 0.33203125, "learning_rate": 0.001137260348288623, "loss": 1.9593, "step": 17305 }, { "epoch": 0.46441605839416056, "grad_norm": 0.333984375, "learning_rate": 0.001137247836752118, "loss": 1.9555, "step": 17306 }, { "epoch": 0.46444289394589955, "grad_norm": 0.328125, "learning_rate": 0.0011372353240370493, "loss": 1.9758, "step": 17307 }, { "epoch": 0.4644697294976385, "grad_norm": 0.326171875, "learning_rate": 0.0011372228101434443, "loss": 1.8749, "step": 17308 }, { "epoch": 0.4644965650493774, "grad_norm": 0.3203125, "learning_rate": 0.0011372102950713305, "loss": 1.8824, "step": 17309 }, { "epoch": 0.46452340060111635, "grad_norm": 0.330078125, "learning_rate": 0.0011371977788207353, "loss": 1.9549, "step": 17310 }, { "epoch": 0.4645502361528553, "grad_norm": 0.322265625, "learning_rate": 0.0011371852613916863, "loss": 1.9102, "step": 17311 }, { "epoch": 0.4645770717045942, "grad_norm": 0.32421875, "learning_rate": 0.001137172742784211, "loss": 1.873, "step": 17312 }, { "epoch": 0.4646039072563332, "grad_norm": 0.328125, "learning_rate": 0.0011371602229983365, "loss": 1.918, "step": 17313 }, { "epoch": 0.46463074280807215, "grad_norm": 0.3203125, "learning_rate": 0.0011371477020340906, "loss": 1.9542, "step": 17314 }, { "epoch": 0.4646575783598111, "grad_norm": 0.328125, "learning_rate": 0.0011371351798915008, "loss": 1.873, "step": 17315 }, { "epoch": 0.46468441391155, "grad_norm": 0.333984375, "learning_rate": 0.0011371226565705943, "loss": 1.9272, "step": 17316 }, { "epoch": 0.46471124946328896, "grad_norm": 0.33984375, "learning_rate": 0.0011371101320713987, "loss": 1.9413, "step": 17317 }, { "epoch": 0.4647380850150279, "grad_norm": 0.328125, "learning_rate": 0.0011370976063939417, "loss": 1.9069, "step": 17318 }, { "epoch": 0.4647649205667668, "grad_norm": 0.337890625, "learning_rate": 0.0011370850795382505, "loss": 1.8762, "step": 17319 }, { "epoch": 0.4647917561185058, "grad_norm": 0.3359375, "learning_rate": 0.0011370725515043524, "loss": 1.8424, "step": 17320 }, { "epoch": 0.46481859167024475, "grad_norm": 0.32421875, "learning_rate": 0.0011370600222922753, "loss": 1.8051, "step": 17321 }, { "epoch": 0.4648454272219837, "grad_norm": 0.333984375, "learning_rate": 0.0011370474919020466, "loss": 1.938, "step": 17322 }, { "epoch": 0.4648722627737226, "grad_norm": 0.33984375, "learning_rate": 0.0011370349603336935, "loss": 1.9742, "step": 17323 }, { "epoch": 0.46489909832546156, "grad_norm": 0.400390625, "learning_rate": 0.0011370224275872437, "loss": 2.1567, "step": 17324 }, { "epoch": 0.4649259338772005, "grad_norm": 0.3828125, "learning_rate": 0.001137009893662725, "loss": 2.086, "step": 17325 }, { "epoch": 0.4649527694289395, "grad_norm": 0.341796875, "learning_rate": 0.0011369973585601645, "loss": 2.084, "step": 17326 }, { "epoch": 0.4649796049806784, "grad_norm": 0.33203125, "learning_rate": 0.0011369848222795896, "loss": 2.0205, "step": 17327 }, { "epoch": 0.46500644053241735, "grad_norm": 0.326171875, "learning_rate": 0.001136972284821028, "loss": 2.1328, "step": 17328 }, { "epoch": 0.4650332760841563, "grad_norm": 0.326171875, "learning_rate": 0.0011369597461845073, "loss": 2.0434, "step": 17329 }, { "epoch": 0.4650601116358952, "grad_norm": 0.345703125, "learning_rate": 0.0011369472063700549, "loss": 2.1589, "step": 17330 }, { "epoch": 0.46508694718763416, "grad_norm": 0.326171875, "learning_rate": 0.001136934665377698, "loss": 2.0822, "step": 17331 }, { "epoch": 0.4651137827393731, "grad_norm": 0.3125, "learning_rate": 0.0011369221232074647, "loss": 1.979, "step": 17332 }, { "epoch": 0.4651406182911121, "grad_norm": 0.3203125, "learning_rate": 0.0011369095798593823, "loss": 2.0234, "step": 17333 }, { "epoch": 0.465167453842851, "grad_norm": 0.31640625, "learning_rate": 0.001136897035333478, "loss": 2.0533, "step": 17334 }, { "epoch": 0.46519428939458995, "grad_norm": 0.3046875, "learning_rate": 0.0011368844896297798, "loss": 1.9124, "step": 17335 }, { "epoch": 0.4652211249463289, "grad_norm": 0.345703125, "learning_rate": 0.001136871942748315, "loss": 2.118, "step": 17336 }, { "epoch": 0.4652479604980678, "grad_norm": 0.32421875, "learning_rate": 0.0011368593946891109, "loss": 2.0849, "step": 17337 }, { "epoch": 0.46527479604980676, "grad_norm": 0.330078125, "learning_rate": 0.0011368468454521952, "loss": 2.0921, "step": 17338 }, { "epoch": 0.46530163160154575, "grad_norm": 0.3125, "learning_rate": 0.0011368342950375957, "loss": 2.0414, "step": 17339 }, { "epoch": 0.4653284671532847, "grad_norm": 0.3125, "learning_rate": 0.0011368217434453395, "loss": 2.0117, "step": 17340 }, { "epoch": 0.4653553027050236, "grad_norm": 0.30859375, "learning_rate": 0.0011368091906754543, "loss": 1.9724, "step": 17341 }, { "epoch": 0.46538213825676256, "grad_norm": 0.314453125, "learning_rate": 0.0011367966367279677, "loss": 2.0203, "step": 17342 }, { "epoch": 0.4654089738085015, "grad_norm": 0.306640625, "learning_rate": 0.0011367840816029074, "loss": 1.9536, "step": 17343 }, { "epoch": 0.4654358093602404, "grad_norm": 0.310546875, "learning_rate": 0.0011367715253003004, "loss": 2.0109, "step": 17344 }, { "epoch": 0.4654626449119794, "grad_norm": 0.314453125, "learning_rate": 0.0011367589678201748, "loss": 2.0189, "step": 17345 }, { "epoch": 0.46548948046371835, "grad_norm": 0.322265625, "learning_rate": 0.0011367464091625581, "loss": 1.9944, "step": 17346 }, { "epoch": 0.4655163160154573, "grad_norm": 0.302734375, "learning_rate": 0.0011367338493274775, "loss": 1.8701, "step": 17347 }, { "epoch": 0.4655431515671962, "grad_norm": 0.318359375, "learning_rate": 0.0011367212883149607, "loss": 2.0234, "step": 17348 }, { "epoch": 0.46556998711893516, "grad_norm": 0.32421875, "learning_rate": 0.0011367087261250355, "loss": 2.0753, "step": 17349 }, { "epoch": 0.4655968226706741, "grad_norm": 0.322265625, "learning_rate": 0.0011366961627577291, "loss": 2.0969, "step": 17350 }, { "epoch": 0.46562365822241303, "grad_norm": 0.306640625, "learning_rate": 0.0011366835982130692, "loss": 1.9847, "step": 17351 }, { "epoch": 0.465650493774152, "grad_norm": 0.3203125, "learning_rate": 0.0011366710324910835, "loss": 2.0744, "step": 17352 }, { "epoch": 0.46567732932589095, "grad_norm": 0.310546875, "learning_rate": 0.0011366584655917995, "loss": 1.9755, "step": 17353 }, { "epoch": 0.4657041648776299, "grad_norm": 0.318359375, "learning_rate": 0.0011366458975152446, "loss": 2.0249, "step": 17354 }, { "epoch": 0.4657310004293688, "grad_norm": 0.310546875, "learning_rate": 0.0011366333282614464, "loss": 2.0277, "step": 17355 }, { "epoch": 0.46575783598110776, "grad_norm": 0.3125, "learning_rate": 0.0011366207578304326, "loss": 1.9935, "step": 17356 }, { "epoch": 0.4657846715328467, "grad_norm": 0.3046875, "learning_rate": 0.0011366081862222309, "loss": 1.9286, "step": 17357 }, { "epoch": 0.4658115070845857, "grad_norm": 0.322265625, "learning_rate": 0.0011365956134368686, "loss": 1.9791, "step": 17358 }, { "epoch": 0.4658383426363246, "grad_norm": 0.3046875, "learning_rate": 0.0011365830394743732, "loss": 1.9909, "step": 17359 }, { "epoch": 0.46586517818806356, "grad_norm": 0.314453125, "learning_rate": 0.0011365704643347728, "loss": 1.9276, "step": 17360 }, { "epoch": 0.4658920137398025, "grad_norm": 0.306640625, "learning_rate": 0.0011365578880180945, "loss": 1.9443, "step": 17361 }, { "epoch": 0.4659188492915414, "grad_norm": 0.32421875, "learning_rate": 0.001136545310524366, "loss": 2.0509, "step": 17362 }, { "epoch": 0.46594568484328036, "grad_norm": 0.310546875, "learning_rate": 0.001136532731853615, "loss": 1.9812, "step": 17363 }, { "epoch": 0.4659725203950193, "grad_norm": 0.302734375, "learning_rate": 0.0011365201520058693, "loss": 1.9405, "step": 17364 }, { "epoch": 0.4659993559467583, "grad_norm": 0.302734375, "learning_rate": 0.0011365075709811557, "loss": 1.9519, "step": 17365 }, { "epoch": 0.4660261914984972, "grad_norm": 0.330078125, "learning_rate": 0.0011364949887795028, "loss": 2.0992, "step": 17366 }, { "epoch": 0.46605302705023616, "grad_norm": 0.302734375, "learning_rate": 0.0011364824054009375, "loss": 1.9404, "step": 17367 }, { "epoch": 0.4660798626019751, "grad_norm": 0.3125, "learning_rate": 0.0011364698208454877, "loss": 2.0271, "step": 17368 }, { "epoch": 0.466106698153714, "grad_norm": 0.314453125, "learning_rate": 0.0011364572351131809, "loss": 2.0347, "step": 17369 }, { "epoch": 0.46613353370545296, "grad_norm": 0.3125, "learning_rate": 0.0011364446482040448, "loss": 2.0413, "step": 17370 }, { "epoch": 0.46616036925719195, "grad_norm": 0.30078125, "learning_rate": 0.001136432060118107, "loss": 1.9607, "step": 17371 }, { "epoch": 0.4661872048089309, "grad_norm": 0.306640625, "learning_rate": 0.0011364194708553949, "loss": 1.9371, "step": 17372 }, { "epoch": 0.4662140403606698, "grad_norm": 0.3046875, "learning_rate": 0.0011364068804159365, "loss": 1.9001, "step": 17373 }, { "epoch": 0.46624087591240876, "grad_norm": 0.306640625, "learning_rate": 0.0011363942887997593, "loss": 1.9321, "step": 17374 }, { "epoch": 0.4662677114641477, "grad_norm": 0.30078125, "learning_rate": 0.0011363816960068905, "loss": 1.9336, "step": 17375 }, { "epoch": 0.46629454701588663, "grad_norm": 0.30078125, "learning_rate": 0.0011363691020373583, "loss": 1.936, "step": 17376 }, { "epoch": 0.46632138256762556, "grad_norm": 0.3125, "learning_rate": 0.00113635650689119, "loss": 1.9482, "step": 17377 }, { "epoch": 0.46634821811936455, "grad_norm": 0.322265625, "learning_rate": 0.0011363439105684134, "loss": 2.0201, "step": 17378 }, { "epoch": 0.4663750536711035, "grad_norm": 0.326171875, "learning_rate": 0.0011363313130690561, "loss": 2.0499, "step": 17379 }, { "epoch": 0.4664018892228424, "grad_norm": 0.32421875, "learning_rate": 0.0011363187143931456, "loss": 1.9288, "step": 17380 }, { "epoch": 0.46642872477458136, "grad_norm": 0.310546875, "learning_rate": 0.0011363061145407095, "loss": 1.9673, "step": 17381 }, { "epoch": 0.4664555603263203, "grad_norm": 0.3203125, "learning_rate": 0.0011362935135117757, "loss": 2.0264, "step": 17382 }, { "epoch": 0.46648239587805923, "grad_norm": 0.30078125, "learning_rate": 0.0011362809113063719, "loss": 1.8782, "step": 17383 }, { "epoch": 0.4665092314297982, "grad_norm": 0.310546875, "learning_rate": 0.0011362683079245252, "loss": 1.9953, "step": 17384 }, { "epoch": 0.46653606698153716, "grad_norm": 0.30859375, "learning_rate": 0.001136255703366264, "loss": 1.962, "step": 17385 }, { "epoch": 0.4665629025332761, "grad_norm": 0.3125, "learning_rate": 0.0011362430976316151, "loss": 2.0588, "step": 17386 }, { "epoch": 0.466589738085015, "grad_norm": 0.298828125, "learning_rate": 0.001136230490720607, "loss": 1.8128, "step": 17387 }, { "epoch": 0.46661657363675396, "grad_norm": 0.314453125, "learning_rate": 0.0011362178826332667, "loss": 2.0433, "step": 17388 }, { "epoch": 0.4666434091884929, "grad_norm": 0.298828125, "learning_rate": 0.0011362052733696222, "loss": 1.767, "step": 17389 }, { "epoch": 0.46667024474023183, "grad_norm": 0.30078125, "learning_rate": 0.0011361926629297012, "loss": 1.9184, "step": 17390 }, { "epoch": 0.4666970802919708, "grad_norm": 0.30078125, "learning_rate": 0.0011361800513135312, "loss": 1.8409, "step": 17391 }, { "epoch": 0.46672391584370976, "grad_norm": 0.3125, "learning_rate": 0.0011361674385211397, "loss": 1.9505, "step": 17392 }, { "epoch": 0.4667507513954487, "grad_norm": 0.314453125, "learning_rate": 0.001136154824552555, "loss": 1.9709, "step": 17393 }, { "epoch": 0.46677758694718763, "grad_norm": 0.314453125, "learning_rate": 0.001136142209407804, "loss": 1.9978, "step": 17394 }, { "epoch": 0.46680442249892656, "grad_norm": 0.30859375, "learning_rate": 0.001136129593086915, "loss": 1.9316, "step": 17395 }, { "epoch": 0.4668312580506655, "grad_norm": 0.30859375, "learning_rate": 0.0011361169755899152, "loss": 1.9396, "step": 17396 }, { "epoch": 0.4668580936024045, "grad_norm": 0.3125, "learning_rate": 0.0011361043569168324, "loss": 1.8986, "step": 17397 }, { "epoch": 0.4668849291541434, "grad_norm": 0.302734375, "learning_rate": 0.0011360917370676945, "loss": 1.9066, "step": 17398 }, { "epoch": 0.46691176470588236, "grad_norm": 0.310546875, "learning_rate": 0.0011360791160425292, "loss": 1.8859, "step": 17399 }, { "epoch": 0.4669386002576213, "grad_norm": 0.3125, "learning_rate": 0.0011360664938413639, "loss": 1.8821, "step": 17400 }, { "epoch": 0.46696543580936023, "grad_norm": 0.31640625, "learning_rate": 0.0011360538704642265, "loss": 1.9379, "step": 17401 }, { "epoch": 0.46699227136109916, "grad_norm": 0.314453125, "learning_rate": 0.0011360412459111445, "loss": 1.9309, "step": 17402 }, { "epoch": 0.46701910691283816, "grad_norm": 0.3125, "learning_rate": 0.0011360286201821455, "loss": 1.8927, "step": 17403 }, { "epoch": 0.4670459424645771, "grad_norm": 0.3125, "learning_rate": 0.0011360159932772577, "loss": 1.9165, "step": 17404 }, { "epoch": 0.467072778016316, "grad_norm": 0.31640625, "learning_rate": 0.0011360033651965085, "loss": 1.956, "step": 17405 }, { "epoch": 0.46709961356805496, "grad_norm": 0.318359375, "learning_rate": 0.0011359907359399257, "loss": 1.9799, "step": 17406 }, { "epoch": 0.4671264491197939, "grad_norm": 0.30078125, "learning_rate": 0.0011359781055075367, "loss": 1.7647, "step": 17407 }, { "epoch": 0.46715328467153283, "grad_norm": 0.306640625, "learning_rate": 0.0011359654738993695, "loss": 1.8979, "step": 17408 }, { "epoch": 0.46718012022327177, "grad_norm": 0.314453125, "learning_rate": 0.0011359528411154516, "loss": 2.003, "step": 17409 }, { "epoch": 0.46720695577501076, "grad_norm": 0.30859375, "learning_rate": 0.0011359402071558112, "loss": 1.8342, "step": 17410 }, { "epoch": 0.4672337913267497, "grad_norm": 0.298828125, "learning_rate": 0.0011359275720204754, "loss": 1.8763, "step": 17411 }, { "epoch": 0.4672606268784886, "grad_norm": 0.310546875, "learning_rate": 0.0011359149357094724, "loss": 1.8964, "step": 17412 }, { "epoch": 0.46728746243022756, "grad_norm": 0.3046875, "learning_rate": 0.0011359022982228293, "loss": 1.8209, "step": 17413 }, { "epoch": 0.4673142979819665, "grad_norm": 0.31640625, "learning_rate": 0.0011358896595605745, "loss": 1.9486, "step": 17414 }, { "epoch": 0.46734113353370543, "grad_norm": 0.30859375, "learning_rate": 0.0011358770197227354, "loss": 1.8463, "step": 17415 }, { "epoch": 0.4673679690854444, "grad_norm": 0.294921875, "learning_rate": 0.0011358643787093398, "loss": 1.7128, "step": 17416 }, { "epoch": 0.46739480463718336, "grad_norm": 0.306640625, "learning_rate": 0.0011358517365204154, "loss": 1.8674, "step": 17417 }, { "epoch": 0.4674216401889223, "grad_norm": 0.3203125, "learning_rate": 0.00113583909315599, "loss": 1.9942, "step": 17418 }, { "epoch": 0.46744847574066123, "grad_norm": 0.310546875, "learning_rate": 0.0011358264486160913, "loss": 1.8517, "step": 17419 }, { "epoch": 0.46747531129240016, "grad_norm": 0.30859375, "learning_rate": 0.0011358138029007466, "loss": 1.8846, "step": 17420 }, { "epoch": 0.4675021468441391, "grad_norm": 0.3125, "learning_rate": 0.0011358011560099845, "loss": 1.8464, "step": 17421 }, { "epoch": 0.46752898239587803, "grad_norm": 0.314453125, "learning_rate": 0.0011357885079438322, "loss": 1.9203, "step": 17422 }, { "epoch": 0.467555817947617, "grad_norm": 0.306640625, "learning_rate": 0.0011357758587023175, "loss": 1.952, "step": 17423 }, { "epoch": 0.46758265349935596, "grad_norm": 0.310546875, "learning_rate": 0.0011357632082854682, "loss": 1.8786, "step": 17424 }, { "epoch": 0.4676094890510949, "grad_norm": 0.326171875, "learning_rate": 0.001135750556693312, "loss": 1.939, "step": 17425 }, { "epoch": 0.46763632460283383, "grad_norm": 0.310546875, "learning_rate": 0.0011357379039258768, "loss": 1.8757, "step": 17426 }, { "epoch": 0.46766316015457277, "grad_norm": 0.314453125, "learning_rate": 0.0011357252499831902, "loss": 1.8681, "step": 17427 }, { "epoch": 0.4676899957063117, "grad_norm": 0.30859375, "learning_rate": 0.00113571259486528, "loss": 1.8728, "step": 17428 }, { "epoch": 0.4677168312580507, "grad_norm": 0.30859375, "learning_rate": 0.001135699938572174, "loss": 1.9749, "step": 17429 }, { "epoch": 0.4677436668097896, "grad_norm": 0.318359375, "learning_rate": 0.0011356872811039, "loss": 1.9586, "step": 17430 }, { "epoch": 0.46777050236152856, "grad_norm": 0.3125, "learning_rate": 0.0011356746224604856, "loss": 1.9089, "step": 17431 }, { "epoch": 0.4677973379132675, "grad_norm": 0.314453125, "learning_rate": 0.0011356619626419588, "loss": 1.8556, "step": 17432 }, { "epoch": 0.46782417346500643, "grad_norm": 0.310546875, "learning_rate": 0.001135649301648347, "loss": 1.9661, "step": 17433 }, { "epoch": 0.46785100901674537, "grad_norm": 0.322265625, "learning_rate": 0.0011356366394796784, "loss": 1.938, "step": 17434 }, { "epoch": 0.4678778445684843, "grad_norm": 0.314453125, "learning_rate": 0.0011356239761359807, "loss": 1.9367, "step": 17435 }, { "epoch": 0.4679046801202233, "grad_norm": 0.3046875, "learning_rate": 0.0011356113116172814, "loss": 1.9039, "step": 17436 }, { "epoch": 0.46793151567196223, "grad_norm": 0.306640625, "learning_rate": 0.0011355986459236085, "loss": 1.8958, "step": 17437 }, { "epoch": 0.46795835122370116, "grad_norm": 0.3125, "learning_rate": 0.0011355859790549898, "loss": 1.8785, "step": 17438 }, { "epoch": 0.4679851867754401, "grad_norm": 0.306640625, "learning_rate": 0.001135573311011453, "loss": 1.9308, "step": 17439 }, { "epoch": 0.46801202232717903, "grad_norm": 0.3125, "learning_rate": 0.001135560641793026, "loss": 1.931, "step": 17440 }, { "epoch": 0.46803885787891797, "grad_norm": 0.30078125, "learning_rate": 0.0011355479713997365, "loss": 1.8482, "step": 17441 }, { "epoch": 0.46806569343065696, "grad_norm": 0.298828125, "learning_rate": 0.001135535299831612, "loss": 1.7283, "step": 17442 }, { "epoch": 0.4680925289823959, "grad_norm": 0.3125, "learning_rate": 0.001135522627088681, "loss": 1.8182, "step": 17443 }, { "epoch": 0.46811936453413483, "grad_norm": 0.3125, "learning_rate": 0.0011355099531709708, "loss": 1.8594, "step": 17444 }, { "epoch": 0.46814620008587376, "grad_norm": 0.330078125, "learning_rate": 0.0011354972780785093, "loss": 1.9034, "step": 17445 }, { "epoch": 0.4681730356376127, "grad_norm": 0.310546875, "learning_rate": 0.0011354846018113243, "loss": 1.8132, "step": 17446 }, { "epoch": 0.46819987118935164, "grad_norm": 0.3203125, "learning_rate": 0.0011354719243694435, "loss": 1.8904, "step": 17447 }, { "epoch": 0.46822670674109057, "grad_norm": 0.32421875, "learning_rate": 0.0011354592457528952, "loss": 1.9478, "step": 17448 }, { "epoch": 0.46825354229282956, "grad_norm": 0.314453125, "learning_rate": 0.0011354465659617066, "loss": 1.9083, "step": 17449 }, { "epoch": 0.4682803778445685, "grad_norm": 0.318359375, "learning_rate": 0.0011354338849959057, "loss": 1.8351, "step": 17450 }, { "epoch": 0.46830721339630743, "grad_norm": 0.3203125, "learning_rate": 0.0011354212028555205, "loss": 1.8566, "step": 17451 }, { "epoch": 0.46833404894804637, "grad_norm": 0.314453125, "learning_rate": 0.0011354085195405786, "loss": 1.8287, "step": 17452 }, { "epoch": 0.4683608844997853, "grad_norm": 0.318359375, "learning_rate": 0.001135395835051108, "loss": 1.8958, "step": 17453 }, { "epoch": 0.46838772005152424, "grad_norm": 0.3125, "learning_rate": 0.0011353831493871366, "loss": 1.8396, "step": 17454 }, { "epoch": 0.4684145556032632, "grad_norm": 0.31640625, "learning_rate": 0.0011353704625486918, "loss": 1.8932, "step": 17455 }, { "epoch": 0.46844139115500216, "grad_norm": 0.306640625, "learning_rate": 0.0011353577745358018, "loss": 1.7796, "step": 17456 }, { "epoch": 0.4684682267067411, "grad_norm": 0.32421875, "learning_rate": 0.0011353450853484946, "loss": 1.9884, "step": 17457 }, { "epoch": 0.46849506225848003, "grad_norm": 0.3125, "learning_rate": 0.0011353323949867974, "loss": 1.8628, "step": 17458 }, { "epoch": 0.46852189781021897, "grad_norm": 0.322265625, "learning_rate": 0.0011353197034507387, "loss": 1.8688, "step": 17459 }, { "epoch": 0.4685487333619579, "grad_norm": 0.3203125, "learning_rate": 0.001135307010740346, "loss": 1.9382, "step": 17460 }, { "epoch": 0.46857556891369684, "grad_norm": 0.314453125, "learning_rate": 0.0011352943168556472, "loss": 1.8332, "step": 17461 }, { "epoch": 0.46860240446543583, "grad_norm": 0.30859375, "learning_rate": 0.00113528162179667, "loss": 1.8401, "step": 17462 }, { "epoch": 0.46862924001717476, "grad_norm": 0.3125, "learning_rate": 0.0011352689255634426, "loss": 1.8448, "step": 17463 }, { "epoch": 0.4686560755689137, "grad_norm": 0.322265625, "learning_rate": 0.0011352562281559926, "loss": 1.9106, "step": 17464 }, { "epoch": 0.46868291112065263, "grad_norm": 0.31640625, "learning_rate": 0.0011352435295743479, "loss": 1.8668, "step": 17465 }, { "epoch": 0.46870974667239157, "grad_norm": 0.30859375, "learning_rate": 0.0011352308298185362, "loss": 1.7922, "step": 17466 }, { "epoch": 0.4687365822241305, "grad_norm": 0.32421875, "learning_rate": 0.001135218128888586, "loss": 1.9012, "step": 17467 }, { "epoch": 0.4687634177758695, "grad_norm": 0.322265625, "learning_rate": 0.0011352054267845242, "loss": 1.7866, "step": 17468 }, { "epoch": 0.46879025332760843, "grad_norm": 0.322265625, "learning_rate": 0.0011351927235063793, "loss": 1.8903, "step": 17469 }, { "epoch": 0.46881708887934737, "grad_norm": 0.31640625, "learning_rate": 0.0011351800190541792, "loss": 1.8218, "step": 17470 }, { "epoch": 0.4688439244310863, "grad_norm": 0.3125, "learning_rate": 0.0011351673134279514, "loss": 1.8012, "step": 17471 }, { "epoch": 0.46887075998282524, "grad_norm": 0.32421875, "learning_rate": 0.001135154606627724, "loss": 1.9273, "step": 17472 }, { "epoch": 0.46889759553456417, "grad_norm": 0.314453125, "learning_rate": 0.0011351418986535247, "loss": 1.9358, "step": 17473 }, { "epoch": 0.46892443108630316, "grad_norm": 0.318359375, "learning_rate": 0.0011351291895053818, "loss": 1.9143, "step": 17474 }, { "epoch": 0.4689512666380421, "grad_norm": 0.31640625, "learning_rate": 0.0011351164791833226, "loss": 1.8261, "step": 17475 }, { "epoch": 0.46897810218978103, "grad_norm": 0.322265625, "learning_rate": 0.0011351037676873756, "loss": 1.8856, "step": 17476 }, { "epoch": 0.46900493774151997, "grad_norm": 0.314453125, "learning_rate": 0.0011350910550175682, "loss": 1.8452, "step": 17477 }, { "epoch": 0.4690317732932589, "grad_norm": 0.322265625, "learning_rate": 0.0011350783411739284, "loss": 1.8777, "step": 17478 }, { "epoch": 0.46905860884499784, "grad_norm": 0.30859375, "learning_rate": 0.001135065626156484, "loss": 1.759, "step": 17479 }, { "epoch": 0.4690854443967368, "grad_norm": 0.30859375, "learning_rate": 0.0011350529099652634, "loss": 1.8294, "step": 17480 }, { "epoch": 0.46911227994847576, "grad_norm": 0.3125, "learning_rate": 0.001135040192600294, "loss": 1.7611, "step": 17481 }, { "epoch": 0.4691391155002147, "grad_norm": 0.330078125, "learning_rate": 0.0011350274740616038, "loss": 1.9173, "step": 17482 }, { "epoch": 0.46916595105195363, "grad_norm": 0.326171875, "learning_rate": 0.0011350147543492206, "loss": 1.8438, "step": 17483 }, { "epoch": 0.46919278660369257, "grad_norm": 0.3125, "learning_rate": 0.0011350020334631727, "loss": 1.826, "step": 17484 }, { "epoch": 0.4692196221554315, "grad_norm": 0.33203125, "learning_rate": 0.0011349893114034875, "loss": 1.8736, "step": 17485 }, { "epoch": 0.46924645770717044, "grad_norm": 0.318359375, "learning_rate": 0.0011349765881701933, "loss": 1.7663, "step": 17486 }, { "epoch": 0.46927329325890943, "grad_norm": 0.330078125, "learning_rate": 0.0011349638637633177, "loss": 1.9412, "step": 17487 }, { "epoch": 0.46930012881064836, "grad_norm": 0.330078125, "learning_rate": 0.0011349511381828889, "loss": 1.8795, "step": 17488 }, { "epoch": 0.4693269643623873, "grad_norm": 0.330078125, "learning_rate": 0.0011349384114289346, "loss": 1.8096, "step": 17489 }, { "epoch": 0.46935379991412624, "grad_norm": 0.326171875, "learning_rate": 0.001134925683501483, "loss": 1.8905, "step": 17490 }, { "epoch": 0.46938063546586517, "grad_norm": 0.318359375, "learning_rate": 0.0011349129544005617, "loss": 1.7682, "step": 17491 }, { "epoch": 0.4694074710176041, "grad_norm": 0.31640625, "learning_rate": 0.0011349002241261986, "loss": 1.8089, "step": 17492 }, { "epoch": 0.46943430656934304, "grad_norm": 0.376953125, "learning_rate": 0.001134887492678422, "loss": 2.0363, "step": 17493 }, { "epoch": 0.46946114212108203, "grad_norm": 0.392578125, "learning_rate": 0.0011348747600572594, "loss": 2.1132, "step": 17494 }, { "epoch": 0.46948797767282097, "grad_norm": 0.35546875, "learning_rate": 0.0011348620262627393, "loss": 2.0819, "step": 17495 }, { "epoch": 0.4695148132245599, "grad_norm": 0.345703125, "learning_rate": 0.0011348492912948889, "loss": 2.0835, "step": 17496 }, { "epoch": 0.46954164877629884, "grad_norm": 0.333984375, "learning_rate": 0.0011348365551537367, "loss": 2.0502, "step": 17497 }, { "epoch": 0.46956848432803777, "grad_norm": 0.326171875, "learning_rate": 0.0011348238178393102, "loss": 2.0203, "step": 17498 }, { "epoch": 0.4695953198797767, "grad_norm": 0.322265625, "learning_rate": 0.0011348110793516378, "loss": 1.9834, "step": 17499 }, { "epoch": 0.4696221554315157, "grad_norm": 0.330078125, "learning_rate": 0.0011347983396907473, "loss": 2.0815, "step": 17500 }, { "epoch": 0.46964899098325463, "grad_norm": 0.328125, "learning_rate": 0.0011347855988566666, "loss": 2.0966, "step": 17501 }, { "epoch": 0.46967582653499357, "grad_norm": 0.3125, "learning_rate": 0.0011347728568494235, "loss": 2.0385, "step": 17502 }, { "epoch": 0.4697026620867325, "grad_norm": 0.30859375, "learning_rate": 0.001134760113669046, "loss": 2.0403, "step": 17503 }, { "epoch": 0.46972949763847144, "grad_norm": 0.318359375, "learning_rate": 0.0011347473693155623, "loss": 2.0947, "step": 17504 }, { "epoch": 0.4697563331902104, "grad_norm": 0.310546875, "learning_rate": 0.0011347346237890001, "loss": 2.0638, "step": 17505 }, { "epoch": 0.4697831687419493, "grad_norm": 0.30859375, "learning_rate": 0.0011347218770893877, "loss": 1.9974, "step": 17506 }, { "epoch": 0.4698100042936883, "grad_norm": 0.30859375, "learning_rate": 0.0011347091292167526, "loss": 1.9863, "step": 17507 }, { "epoch": 0.46983683984542723, "grad_norm": 0.314453125, "learning_rate": 0.001134696380171123, "loss": 1.9792, "step": 17508 }, { "epoch": 0.46986367539716617, "grad_norm": 0.310546875, "learning_rate": 0.001134683629952527, "loss": 1.9855, "step": 17509 }, { "epoch": 0.4698905109489051, "grad_norm": 0.31640625, "learning_rate": 0.0011346708785609921, "loss": 2.023, "step": 17510 }, { "epoch": 0.46991734650064404, "grad_norm": 0.328125, "learning_rate": 0.0011346581259965467, "loss": 2.0843, "step": 17511 }, { "epoch": 0.469944182052383, "grad_norm": 0.31640625, "learning_rate": 0.001134645372259219, "loss": 2.0389, "step": 17512 }, { "epoch": 0.46997101760412197, "grad_norm": 0.314453125, "learning_rate": 0.0011346326173490363, "loss": 1.9817, "step": 17513 }, { "epoch": 0.4699978531558609, "grad_norm": 0.31640625, "learning_rate": 0.0011346198612660272, "loss": 2.0544, "step": 17514 }, { "epoch": 0.47002468870759984, "grad_norm": 0.314453125, "learning_rate": 0.0011346071040102192, "loss": 2.0614, "step": 17515 }, { "epoch": 0.47005152425933877, "grad_norm": 0.30859375, "learning_rate": 0.0011345943455816406, "loss": 1.955, "step": 17516 }, { "epoch": 0.4700783598110777, "grad_norm": 0.3125, "learning_rate": 0.0011345815859803191, "loss": 1.9964, "step": 17517 }, { "epoch": 0.47010519536281664, "grad_norm": 0.31640625, "learning_rate": 0.001134568825206283, "loss": 2.0536, "step": 17518 }, { "epoch": 0.4701320309145556, "grad_norm": 0.310546875, "learning_rate": 0.0011345560632595602, "loss": 1.9983, "step": 17519 }, { "epoch": 0.47015886646629457, "grad_norm": 0.310546875, "learning_rate": 0.0011345433001401786, "loss": 1.8829, "step": 17520 }, { "epoch": 0.4701857020180335, "grad_norm": 0.318359375, "learning_rate": 0.0011345305358481663, "loss": 2.0451, "step": 17521 }, { "epoch": 0.47021253756977244, "grad_norm": 0.31640625, "learning_rate": 0.0011345177703835515, "loss": 1.9141, "step": 17522 }, { "epoch": 0.4702393731215114, "grad_norm": 0.3046875, "learning_rate": 0.0011345050037463616, "loss": 1.9313, "step": 17523 }, { "epoch": 0.4702662086732503, "grad_norm": 0.30859375, "learning_rate": 0.001134492235936625, "loss": 1.9172, "step": 17524 }, { "epoch": 0.47029304422498924, "grad_norm": 0.3203125, "learning_rate": 0.0011344794669543698, "loss": 2.0174, "step": 17525 }, { "epoch": 0.47031987977672823, "grad_norm": 0.3046875, "learning_rate": 0.001134466696799624, "loss": 1.8852, "step": 17526 }, { "epoch": 0.47034671532846717, "grad_norm": 0.3046875, "learning_rate": 0.0011344539254724154, "loss": 1.9859, "step": 17527 }, { "epoch": 0.4703735508802061, "grad_norm": 0.314453125, "learning_rate": 0.0011344411529727722, "loss": 1.9568, "step": 17528 }, { "epoch": 0.47040038643194504, "grad_norm": 0.310546875, "learning_rate": 0.0011344283793007222, "loss": 1.9318, "step": 17529 }, { "epoch": 0.470427221983684, "grad_norm": 0.31640625, "learning_rate": 0.0011344156044562938, "loss": 1.9597, "step": 17530 }, { "epoch": 0.4704540575354229, "grad_norm": 0.31640625, "learning_rate": 0.0011344028284395145, "loss": 2.0312, "step": 17531 }, { "epoch": 0.47048089308716184, "grad_norm": 0.3046875, "learning_rate": 0.0011343900512504126, "loss": 1.9587, "step": 17532 }, { "epoch": 0.47050772863890084, "grad_norm": 0.3046875, "learning_rate": 0.0011343772728890163, "loss": 1.9519, "step": 17533 }, { "epoch": 0.47053456419063977, "grad_norm": 0.314453125, "learning_rate": 0.0011343644933553537, "loss": 1.8803, "step": 17534 }, { "epoch": 0.4705613997423787, "grad_norm": 0.30859375, "learning_rate": 0.0011343517126494523, "loss": 1.877, "step": 17535 }, { "epoch": 0.47058823529411764, "grad_norm": 0.30859375, "learning_rate": 0.0011343389307713406, "loss": 2.0279, "step": 17536 }, { "epoch": 0.4706150708458566, "grad_norm": 0.3046875, "learning_rate": 0.0011343261477210463, "loss": 1.861, "step": 17537 }, { "epoch": 0.4706419063975955, "grad_norm": 0.3125, "learning_rate": 0.0011343133634985978, "loss": 1.9901, "step": 17538 }, { "epoch": 0.4706687419493345, "grad_norm": 0.3125, "learning_rate": 0.001134300578104023, "loss": 2.0753, "step": 17539 }, { "epoch": 0.47069557750107344, "grad_norm": 0.302734375, "learning_rate": 0.0011342877915373499, "loss": 1.9243, "step": 17540 }, { "epoch": 0.47072241305281237, "grad_norm": 0.310546875, "learning_rate": 0.0011342750037986065, "loss": 1.8905, "step": 17541 }, { "epoch": 0.4707492486045513, "grad_norm": 0.3203125, "learning_rate": 0.001134262214887821, "loss": 1.9963, "step": 17542 }, { "epoch": 0.47077608415629024, "grad_norm": 0.3046875, "learning_rate": 0.0011342494248050214, "loss": 1.9789, "step": 17543 }, { "epoch": 0.4708029197080292, "grad_norm": 0.3046875, "learning_rate": 0.0011342366335502358, "loss": 1.901, "step": 17544 }, { "epoch": 0.47082975525976817, "grad_norm": 0.306640625, "learning_rate": 0.001134223841123492, "loss": 1.96, "step": 17545 }, { "epoch": 0.4708565908115071, "grad_norm": 0.306640625, "learning_rate": 0.0011342110475248183, "loss": 1.9281, "step": 17546 }, { "epoch": 0.47088342636324604, "grad_norm": 0.306640625, "learning_rate": 0.0011341982527542428, "loss": 1.9307, "step": 17547 }, { "epoch": 0.470910261914985, "grad_norm": 0.310546875, "learning_rate": 0.0011341854568117935, "loss": 2.0302, "step": 17548 }, { "epoch": 0.4709370974667239, "grad_norm": 0.3046875, "learning_rate": 0.0011341726596974986, "loss": 1.8571, "step": 17549 }, { "epoch": 0.47096393301846284, "grad_norm": 0.298828125, "learning_rate": 0.0011341598614113858, "loss": 1.9548, "step": 17550 }, { "epoch": 0.4709907685702018, "grad_norm": 0.3046875, "learning_rate": 0.0011341470619534836, "loss": 1.8864, "step": 17551 }, { "epoch": 0.47101760412194077, "grad_norm": 0.3046875, "learning_rate": 0.0011341342613238198, "loss": 1.9619, "step": 17552 }, { "epoch": 0.4710444396736797, "grad_norm": 0.3125, "learning_rate": 0.0011341214595224226, "loss": 1.9632, "step": 17553 }, { "epoch": 0.47107127522541864, "grad_norm": 0.30078125, "learning_rate": 0.00113410865654932, "loss": 1.9118, "step": 17554 }, { "epoch": 0.4710981107771576, "grad_norm": 0.31640625, "learning_rate": 0.0011340958524045403, "loss": 2.0348, "step": 17555 }, { "epoch": 0.4711249463288965, "grad_norm": 0.31640625, "learning_rate": 0.0011340830470881111, "loss": 1.8855, "step": 17556 }, { "epoch": 0.47115178188063545, "grad_norm": 0.3515625, "learning_rate": 0.0011340702406000612, "loss": 1.9089, "step": 17557 }, { "epoch": 0.47117861743237444, "grad_norm": 0.31640625, "learning_rate": 0.001134057432940418, "loss": 2.0201, "step": 17558 }, { "epoch": 0.47120545298411337, "grad_norm": 0.3046875, "learning_rate": 0.0011340446241092101, "loss": 1.9511, "step": 17559 }, { "epoch": 0.4712322885358523, "grad_norm": 0.3046875, "learning_rate": 0.0011340318141064656, "loss": 1.8257, "step": 17560 }, { "epoch": 0.47125912408759124, "grad_norm": 0.31640625, "learning_rate": 0.001134019002932212, "loss": 1.937, "step": 17561 }, { "epoch": 0.4712859596393302, "grad_norm": 0.31640625, "learning_rate": 0.0011340061905864781, "loss": 1.9549, "step": 17562 }, { "epoch": 0.4713127951910691, "grad_norm": 0.3125, "learning_rate": 0.0011339933770692916, "loss": 1.945, "step": 17563 }, { "epoch": 0.47133963074280805, "grad_norm": 0.3046875, "learning_rate": 0.0011339805623806808, "loss": 1.8885, "step": 17564 }, { "epoch": 0.47136646629454704, "grad_norm": 0.306640625, "learning_rate": 0.0011339677465206737, "loss": 1.8422, "step": 17565 }, { "epoch": 0.471393301846286, "grad_norm": 0.326171875, "learning_rate": 0.0011339549294892986, "loss": 2.0309, "step": 17566 }, { "epoch": 0.4714201373980249, "grad_norm": 0.302734375, "learning_rate": 0.0011339421112865832, "loss": 1.8146, "step": 17567 }, { "epoch": 0.47144697294976384, "grad_norm": 0.30078125, "learning_rate": 0.001133929291912556, "loss": 1.8216, "step": 17568 }, { "epoch": 0.4714738085015028, "grad_norm": 0.31640625, "learning_rate": 0.0011339164713672452, "loss": 1.9302, "step": 17569 }, { "epoch": 0.4715006440532417, "grad_norm": 0.31640625, "learning_rate": 0.0011339036496506786, "loss": 1.8748, "step": 17570 }, { "epoch": 0.4715274796049807, "grad_norm": 0.302734375, "learning_rate": 0.0011338908267628845, "loss": 1.8484, "step": 17571 }, { "epoch": 0.47155431515671964, "grad_norm": 0.310546875, "learning_rate": 0.001133878002703891, "loss": 1.9025, "step": 17572 }, { "epoch": 0.4715811507084586, "grad_norm": 0.326171875, "learning_rate": 0.0011338651774737262, "loss": 1.9446, "step": 17573 }, { "epoch": 0.4716079862601975, "grad_norm": 0.306640625, "learning_rate": 0.0011338523510724182, "loss": 1.8956, "step": 17574 }, { "epoch": 0.47163482181193644, "grad_norm": 0.314453125, "learning_rate": 0.0011338395234999955, "loss": 1.9029, "step": 17575 }, { "epoch": 0.4716616573636754, "grad_norm": 0.306640625, "learning_rate": 0.0011338266947564855, "loss": 1.8519, "step": 17576 }, { "epoch": 0.4716884929154143, "grad_norm": 0.306640625, "learning_rate": 0.0011338138648419173, "loss": 1.8854, "step": 17577 }, { "epoch": 0.4717153284671533, "grad_norm": 0.3125, "learning_rate": 0.001133801033756318, "loss": 1.9653, "step": 17578 }, { "epoch": 0.47174216401889224, "grad_norm": 0.310546875, "learning_rate": 0.0011337882014997166, "loss": 1.9406, "step": 17579 }, { "epoch": 0.4717689995706312, "grad_norm": 0.314453125, "learning_rate": 0.001133775368072141, "loss": 1.9255, "step": 17580 }, { "epoch": 0.4717958351223701, "grad_norm": 0.3203125, "learning_rate": 0.001133762533473619, "loss": 1.9823, "step": 17581 }, { "epoch": 0.47182267067410905, "grad_norm": 0.310546875, "learning_rate": 0.0011337496977041792, "loss": 1.81, "step": 17582 }, { "epoch": 0.471849506225848, "grad_norm": 0.3125, "learning_rate": 0.0011337368607638497, "loss": 1.9146, "step": 17583 }, { "epoch": 0.47187634177758697, "grad_norm": 0.318359375, "learning_rate": 0.0011337240226526584, "loss": 1.8573, "step": 17584 }, { "epoch": 0.4719031773293259, "grad_norm": 0.322265625, "learning_rate": 0.0011337111833706335, "loss": 1.9433, "step": 17585 }, { "epoch": 0.47193001288106484, "grad_norm": 0.3203125, "learning_rate": 0.0011336983429178036, "loss": 1.9038, "step": 17586 }, { "epoch": 0.4719568484328038, "grad_norm": 0.310546875, "learning_rate": 0.0011336855012941962, "loss": 1.9914, "step": 17587 }, { "epoch": 0.4719836839845427, "grad_norm": 0.314453125, "learning_rate": 0.00113367265849984, "loss": 1.9531, "step": 17588 }, { "epoch": 0.47201051953628165, "grad_norm": 0.318359375, "learning_rate": 0.0011336598145347632, "loss": 1.9662, "step": 17589 }, { "epoch": 0.4720373550880206, "grad_norm": 0.296875, "learning_rate": 0.0011336469693989935, "loss": 1.7812, "step": 17590 }, { "epoch": 0.4720641906397596, "grad_norm": 0.322265625, "learning_rate": 0.0011336341230925593, "loss": 1.9351, "step": 17591 }, { "epoch": 0.4720910261914985, "grad_norm": 0.314453125, "learning_rate": 0.001133621275615489, "loss": 1.9075, "step": 17592 }, { "epoch": 0.47211786174323744, "grad_norm": 0.31640625, "learning_rate": 0.0011336084269678104, "loss": 1.9016, "step": 17593 }, { "epoch": 0.4721446972949764, "grad_norm": 0.32421875, "learning_rate": 0.0011335955771495524, "loss": 1.8906, "step": 17594 }, { "epoch": 0.4721715328467153, "grad_norm": 0.310546875, "learning_rate": 0.0011335827261607422, "loss": 1.9279, "step": 17595 }, { "epoch": 0.47219836839845425, "grad_norm": 0.330078125, "learning_rate": 0.0011335698740014086, "loss": 1.8448, "step": 17596 }, { "epoch": 0.47222520395019324, "grad_norm": 0.3125, "learning_rate": 0.0011335570206715798, "loss": 1.7732, "step": 17597 }, { "epoch": 0.4722520395019322, "grad_norm": 0.3046875, "learning_rate": 0.0011335441661712838, "loss": 1.8485, "step": 17598 }, { "epoch": 0.4722788750536711, "grad_norm": 0.30078125, "learning_rate": 0.0011335313105005488, "loss": 1.7441, "step": 17599 }, { "epoch": 0.47230571060541005, "grad_norm": 0.294921875, "learning_rate": 0.001133518453659403, "loss": 1.7717, "step": 17600 }, { "epoch": 0.472332546157149, "grad_norm": 0.3125, "learning_rate": 0.001133505595647875, "loss": 1.8398, "step": 17601 }, { "epoch": 0.4723593817088879, "grad_norm": 0.318359375, "learning_rate": 0.0011334927364659924, "loss": 1.9332, "step": 17602 }, { "epoch": 0.4723862172606269, "grad_norm": 0.3203125, "learning_rate": 0.001133479876113784, "loss": 1.8541, "step": 17603 }, { "epoch": 0.47241305281236584, "grad_norm": 0.3125, "learning_rate": 0.0011334670145912774, "loss": 1.8979, "step": 17604 }, { "epoch": 0.4724398883641048, "grad_norm": 0.3125, "learning_rate": 0.001133454151898501, "loss": 1.9394, "step": 17605 }, { "epoch": 0.4724667239158437, "grad_norm": 0.302734375, "learning_rate": 0.0011334412880354833, "loss": 1.8525, "step": 17606 }, { "epoch": 0.47249355946758265, "grad_norm": 0.298828125, "learning_rate": 0.0011334284230022525, "loss": 1.7881, "step": 17607 }, { "epoch": 0.4725203950193216, "grad_norm": 0.30859375, "learning_rate": 0.0011334155567988367, "loss": 1.8522, "step": 17608 }, { "epoch": 0.4725472305710605, "grad_norm": 0.314453125, "learning_rate": 0.0011334026894252638, "loss": 1.8919, "step": 17609 }, { "epoch": 0.4725740661227995, "grad_norm": 0.314453125, "learning_rate": 0.0011333898208815626, "loss": 1.8853, "step": 17610 }, { "epoch": 0.47260090167453844, "grad_norm": 0.318359375, "learning_rate": 0.001133376951167761, "loss": 1.8955, "step": 17611 }, { "epoch": 0.4726277372262774, "grad_norm": 0.3203125, "learning_rate": 0.0011333640802838872, "loss": 1.8957, "step": 17612 }, { "epoch": 0.4726545727780163, "grad_norm": 0.326171875, "learning_rate": 0.0011333512082299696, "loss": 1.9596, "step": 17613 }, { "epoch": 0.47268140832975525, "grad_norm": 0.314453125, "learning_rate": 0.0011333383350060364, "loss": 1.874, "step": 17614 }, { "epoch": 0.4727082438814942, "grad_norm": 0.306640625, "learning_rate": 0.0011333254606121156, "loss": 1.8443, "step": 17615 }, { "epoch": 0.4727350794332332, "grad_norm": 0.31640625, "learning_rate": 0.0011333125850482358, "loss": 1.9023, "step": 17616 }, { "epoch": 0.4727619149849721, "grad_norm": 0.31640625, "learning_rate": 0.001133299708314425, "loss": 1.8226, "step": 17617 }, { "epoch": 0.47278875053671104, "grad_norm": 0.310546875, "learning_rate": 0.0011332868304107118, "loss": 1.8626, "step": 17618 }, { "epoch": 0.47281558608845, "grad_norm": 0.298828125, "learning_rate": 0.0011332739513371238, "loss": 1.7938, "step": 17619 }, { "epoch": 0.4728424216401889, "grad_norm": 0.3203125, "learning_rate": 0.00113326107109369, "loss": 1.9355, "step": 17620 }, { "epoch": 0.47286925719192785, "grad_norm": 0.3046875, "learning_rate": 0.0011332481896804381, "loss": 1.8179, "step": 17621 }, { "epoch": 0.4728960927436668, "grad_norm": 0.3125, "learning_rate": 0.0011332353070973966, "loss": 1.8372, "step": 17622 }, { "epoch": 0.4729229282954058, "grad_norm": 0.30859375, "learning_rate": 0.0011332224233445937, "loss": 1.8711, "step": 17623 }, { "epoch": 0.4729497638471447, "grad_norm": 0.3125, "learning_rate": 0.0011332095384220576, "loss": 1.8568, "step": 17624 }, { "epoch": 0.47297659939888365, "grad_norm": 0.31640625, "learning_rate": 0.0011331966523298168, "loss": 1.7815, "step": 17625 }, { "epoch": 0.4730034349506226, "grad_norm": 0.328125, "learning_rate": 0.0011331837650678995, "loss": 1.8634, "step": 17626 }, { "epoch": 0.4730302705023615, "grad_norm": 0.310546875, "learning_rate": 0.0011331708766363336, "loss": 1.8627, "step": 17627 }, { "epoch": 0.47305710605410045, "grad_norm": 0.3125, "learning_rate": 0.001133157987035148, "loss": 1.892, "step": 17628 }, { "epoch": 0.47308394160583944, "grad_norm": 0.32421875, "learning_rate": 0.0011331450962643703, "loss": 1.9425, "step": 17629 }, { "epoch": 0.4731107771575784, "grad_norm": 0.30859375, "learning_rate": 0.0011331322043240293, "loss": 1.8806, "step": 17630 }, { "epoch": 0.4731376127093173, "grad_norm": 0.310546875, "learning_rate": 0.0011331193112141531, "loss": 1.7608, "step": 17631 }, { "epoch": 0.47316444826105625, "grad_norm": 0.306640625, "learning_rate": 0.00113310641693477, "loss": 1.7786, "step": 17632 }, { "epoch": 0.4731912838127952, "grad_norm": 0.322265625, "learning_rate": 0.001133093521485908, "loss": 1.8882, "step": 17633 }, { "epoch": 0.4732181193645341, "grad_norm": 0.302734375, "learning_rate": 0.0011330806248675959, "loss": 1.7757, "step": 17634 }, { "epoch": 0.47324495491627305, "grad_norm": 0.310546875, "learning_rate": 0.0011330677270798616, "loss": 1.8388, "step": 17635 }, { "epoch": 0.47327179046801204, "grad_norm": 0.318359375, "learning_rate": 0.0011330548281227337, "loss": 1.8311, "step": 17636 }, { "epoch": 0.473298626019751, "grad_norm": 0.314453125, "learning_rate": 0.0011330419279962402, "loss": 1.8646, "step": 17637 }, { "epoch": 0.4733254615714899, "grad_norm": 0.314453125, "learning_rate": 0.0011330290267004095, "loss": 1.9028, "step": 17638 }, { "epoch": 0.47335229712322885, "grad_norm": 0.318359375, "learning_rate": 0.00113301612423527, "loss": 1.9044, "step": 17639 }, { "epoch": 0.4733791326749678, "grad_norm": 0.314453125, "learning_rate": 0.0011330032206008501, "loss": 1.8583, "step": 17640 }, { "epoch": 0.4734059682267067, "grad_norm": 0.326171875, "learning_rate": 0.0011329903157971779, "loss": 1.8978, "step": 17641 }, { "epoch": 0.4734328037784457, "grad_norm": 0.3203125, "learning_rate": 0.0011329774098242816, "loss": 1.8131, "step": 17642 }, { "epoch": 0.47345963933018465, "grad_norm": 0.314453125, "learning_rate": 0.0011329645026821897, "loss": 1.8419, "step": 17643 }, { "epoch": 0.4734864748819236, "grad_norm": 0.314453125, "learning_rate": 0.0011329515943709306, "loss": 1.8347, "step": 17644 }, { "epoch": 0.4735133104336625, "grad_norm": 0.310546875, "learning_rate": 0.0011329386848905323, "loss": 1.7586, "step": 17645 }, { "epoch": 0.47354014598540145, "grad_norm": 0.318359375, "learning_rate": 0.0011329257742410232, "loss": 1.8775, "step": 17646 }, { "epoch": 0.4735669815371404, "grad_norm": 0.318359375, "learning_rate": 0.001132912862422432, "loss": 1.8133, "step": 17647 }, { "epoch": 0.4735938170888793, "grad_norm": 0.3125, "learning_rate": 0.0011328999494347868, "loss": 1.8332, "step": 17648 }, { "epoch": 0.4736206526406183, "grad_norm": 0.310546875, "learning_rate": 0.0011328870352781159, "loss": 1.7533, "step": 17649 }, { "epoch": 0.47364748819235725, "grad_norm": 0.31640625, "learning_rate": 0.0011328741199524476, "loss": 1.8583, "step": 17650 }, { "epoch": 0.4736743237440962, "grad_norm": 0.31640625, "learning_rate": 0.0011328612034578103, "loss": 1.7799, "step": 17651 }, { "epoch": 0.4737011592958351, "grad_norm": 0.3125, "learning_rate": 0.0011328482857942321, "loss": 1.7823, "step": 17652 }, { "epoch": 0.47372799484757405, "grad_norm": 0.32421875, "learning_rate": 0.0011328353669617414, "loss": 1.9236, "step": 17653 }, { "epoch": 0.473754830399313, "grad_norm": 0.326171875, "learning_rate": 0.001132822446960367, "loss": 1.8242, "step": 17654 }, { "epoch": 0.473781665951052, "grad_norm": 0.31640625, "learning_rate": 0.0011328095257901367, "loss": 1.7673, "step": 17655 }, { "epoch": 0.4738085015027909, "grad_norm": 0.318359375, "learning_rate": 0.0011327966034510794, "loss": 1.8288, "step": 17656 }, { "epoch": 0.47383533705452985, "grad_norm": 0.326171875, "learning_rate": 0.0011327836799432227, "loss": 1.8792, "step": 17657 }, { "epoch": 0.4738621726062688, "grad_norm": 0.40234375, "learning_rate": 0.0011327707552665955, "loss": 2.2347, "step": 17658 }, { "epoch": 0.4738890081580077, "grad_norm": 0.376953125, "learning_rate": 0.001132757829421226, "loss": 2.1301, "step": 17659 }, { "epoch": 0.47391584370974665, "grad_norm": 0.357421875, "learning_rate": 0.0011327449024071427, "loss": 2.0805, "step": 17660 }, { "epoch": 0.4739426792614856, "grad_norm": 0.337890625, "learning_rate": 0.0011327319742243736, "loss": 2.065, "step": 17661 }, { "epoch": 0.4739695148132246, "grad_norm": 0.333984375, "learning_rate": 0.0011327190448729473, "loss": 2.0953, "step": 17662 }, { "epoch": 0.4739963503649635, "grad_norm": 0.333984375, "learning_rate": 0.0011327061143528922, "loss": 2.0754, "step": 17663 }, { "epoch": 0.47402318591670245, "grad_norm": 0.33203125, "learning_rate": 0.0011326931826642365, "loss": 2.0681, "step": 17664 }, { "epoch": 0.4740500214684414, "grad_norm": 0.337890625, "learning_rate": 0.0011326802498070091, "loss": 2.0741, "step": 17665 }, { "epoch": 0.4740768570201803, "grad_norm": 0.318359375, "learning_rate": 0.0011326673157812375, "loss": 2.0833, "step": 17666 }, { "epoch": 0.47410369257191926, "grad_norm": 0.314453125, "learning_rate": 0.0011326543805869507, "loss": 2.0164, "step": 17667 }, { "epoch": 0.47413052812365825, "grad_norm": 0.33203125, "learning_rate": 0.001132641444224177, "loss": 2.1512, "step": 17668 }, { "epoch": 0.4741573636753972, "grad_norm": 0.3125, "learning_rate": 0.0011326285066929444, "loss": 2.0241, "step": 17669 }, { "epoch": 0.4741841992271361, "grad_norm": 0.322265625, "learning_rate": 0.0011326155679932818, "loss": 2.1119, "step": 17670 }, { "epoch": 0.47421103477887505, "grad_norm": 0.314453125, "learning_rate": 0.0011326026281252173, "loss": 2.0396, "step": 17671 }, { "epoch": 0.474237870330614, "grad_norm": 0.306640625, "learning_rate": 0.0011325896870887791, "loss": 2.0244, "step": 17672 }, { "epoch": 0.4742647058823529, "grad_norm": 0.32421875, "learning_rate": 0.001132576744883996, "loss": 2.1265, "step": 17673 }, { "epoch": 0.4742915414340919, "grad_norm": 0.326171875, "learning_rate": 0.0011325638015108963, "loss": 2.0336, "step": 17674 }, { "epoch": 0.47431837698583085, "grad_norm": 0.31640625, "learning_rate": 0.0011325508569695081, "loss": 1.9842, "step": 17675 }, { "epoch": 0.4743452125375698, "grad_norm": 0.3046875, "learning_rate": 0.0011325379112598602, "loss": 1.9602, "step": 17676 }, { "epoch": 0.4743720480893087, "grad_norm": 0.310546875, "learning_rate": 0.0011325249643819807, "loss": 2.0464, "step": 17677 }, { "epoch": 0.47439888364104765, "grad_norm": 0.32421875, "learning_rate": 0.0011325120163358981, "loss": 2.1007, "step": 17678 }, { "epoch": 0.4744257191927866, "grad_norm": 0.328125, "learning_rate": 0.0011324990671216408, "loss": 2.1325, "step": 17679 }, { "epoch": 0.4744525547445255, "grad_norm": 0.326171875, "learning_rate": 0.0011324861167392372, "loss": 2.1946, "step": 17680 }, { "epoch": 0.4744793902962645, "grad_norm": 0.322265625, "learning_rate": 0.0011324731651887157, "loss": 2.0963, "step": 17681 }, { "epoch": 0.47450622584800345, "grad_norm": 0.3125, "learning_rate": 0.001132460212470105, "loss": 1.9766, "step": 17682 }, { "epoch": 0.4745330613997424, "grad_norm": 0.3125, "learning_rate": 0.0011324472585834328, "loss": 1.9969, "step": 17683 }, { "epoch": 0.4745598969514813, "grad_norm": 0.314453125, "learning_rate": 0.0011324343035287283, "loss": 2.0817, "step": 17684 }, { "epoch": 0.47458673250322025, "grad_norm": 0.30859375, "learning_rate": 0.0011324213473060192, "loss": 1.9661, "step": 17685 }, { "epoch": 0.4746135680549592, "grad_norm": 0.314453125, "learning_rate": 0.0011324083899153347, "loss": 2.0063, "step": 17686 }, { "epoch": 0.4746404036066982, "grad_norm": 0.31640625, "learning_rate": 0.0011323954313567028, "loss": 2.1111, "step": 17687 }, { "epoch": 0.4746672391584371, "grad_norm": 0.3125, "learning_rate": 0.0011323824716301517, "loss": 1.993, "step": 17688 }, { "epoch": 0.47469407471017605, "grad_norm": 0.30078125, "learning_rate": 0.0011323695107357103, "loss": 1.9614, "step": 17689 }, { "epoch": 0.474720910261915, "grad_norm": 0.310546875, "learning_rate": 0.0011323565486734068, "loss": 1.9938, "step": 17690 }, { "epoch": 0.4747477458136539, "grad_norm": 0.314453125, "learning_rate": 0.0011323435854432694, "loss": 2.0578, "step": 17691 }, { "epoch": 0.47477458136539286, "grad_norm": 0.30859375, "learning_rate": 0.0011323306210453271, "loss": 1.9662, "step": 17692 }, { "epoch": 0.4748014169171318, "grad_norm": 0.306640625, "learning_rate": 0.001132317655479608, "loss": 1.9524, "step": 17693 }, { "epoch": 0.4748282524688708, "grad_norm": 0.302734375, "learning_rate": 0.0011323046887461404, "loss": 1.8697, "step": 17694 }, { "epoch": 0.4748550880206097, "grad_norm": 0.306640625, "learning_rate": 0.001132291720844953, "loss": 1.9241, "step": 17695 }, { "epoch": 0.47488192357234865, "grad_norm": 0.3203125, "learning_rate": 0.0011322787517760744, "loss": 2.0733, "step": 17696 }, { "epoch": 0.4749087591240876, "grad_norm": 0.314453125, "learning_rate": 0.0011322657815395324, "loss": 1.974, "step": 17697 }, { "epoch": 0.4749355946758265, "grad_norm": 0.326171875, "learning_rate": 0.0011322528101353562, "loss": 2.0786, "step": 17698 }, { "epoch": 0.47496243022756546, "grad_norm": 0.322265625, "learning_rate": 0.0011322398375635738, "loss": 2.029, "step": 17699 }, { "epoch": 0.47498926577930445, "grad_norm": 0.314453125, "learning_rate": 0.0011322268638242138, "loss": 2.0956, "step": 17700 }, { "epoch": 0.4750161013310434, "grad_norm": 0.3203125, "learning_rate": 0.0011322138889173048, "loss": 1.9527, "step": 17701 }, { "epoch": 0.4750429368827823, "grad_norm": 0.30859375, "learning_rate": 0.0011322009128428747, "loss": 2.0418, "step": 17702 }, { "epoch": 0.47506977243452125, "grad_norm": 0.314453125, "learning_rate": 0.0011321879356009526, "loss": 2.1007, "step": 17703 }, { "epoch": 0.4750966079862602, "grad_norm": 0.30859375, "learning_rate": 0.0011321749571915668, "loss": 1.9381, "step": 17704 }, { "epoch": 0.4751234435379991, "grad_norm": 0.3046875, "learning_rate": 0.0011321619776147457, "loss": 1.9397, "step": 17705 }, { "epoch": 0.47515027908973806, "grad_norm": 0.310546875, "learning_rate": 0.0011321489968705177, "loss": 2.0424, "step": 17706 }, { "epoch": 0.47517711464147705, "grad_norm": 0.302734375, "learning_rate": 0.0011321360149589114, "loss": 1.9943, "step": 17707 }, { "epoch": 0.475203950193216, "grad_norm": 0.310546875, "learning_rate": 0.0011321230318799553, "loss": 1.9423, "step": 17708 }, { "epoch": 0.4752307857449549, "grad_norm": 0.31640625, "learning_rate": 0.0011321100476336778, "loss": 2.0271, "step": 17709 }, { "epoch": 0.47525762129669386, "grad_norm": 0.314453125, "learning_rate": 0.0011320970622201074, "loss": 1.9629, "step": 17710 }, { "epoch": 0.4752844568484328, "grad_norm": 0.310546875, "learning_rate": 0.0011320840756392726, "loss": 2.0133, "step": 17711 }, { "epoch": 0.4753112924001717, "grad_norm": 0.306640625, "learning_rate": 0.0011320710878912018, "loss": 1.9284, "step": 17712 }, { "epoch": 0.4753381279519107, "grad_norm": 0.30859375, "learning_rate": 0.0011320580989759236, "loss": 1.9479, "step": 17713 }, { "epoch": 0.47536496350364965, "grad_norm": 0.302734375, "learning_rate": 0.0011320451088934667, "loss": 1.9787, "step": 17714 }, { "epoch": 0.4753917990553886, "grad_norm": 0.30859375, "learning_rate": 0.0011320321176438591, "loss": 1.9522, "step": 17715 }, { "epoch": 0.4754186346071275, "grad_norm": 0.30078125, "learning_rate": 0.0011320191252271294, "loss": 1.9156, "step": 17716 }, { "epoch": 0.47544547015886646, "grad_norm": 0.30078125, "learning_rate": 0.0011320061316433066, "loss": 1.85, "step": 17717 }, { "epoch": 0.4754723057106054, "grad_norm": 0.306640625, "learning_rate": 0.0011319931368924188, "loss": 2.0155, "step": 17718 }, { "epoch": 0.4754991412623443, "grad_norm": 0.310546875, "learning_rate": 0.0011319801409744943, "loss": 2.0417, "step": 17719 }, { "epoch": 0.4755259768140833, "grad_norm": 0.318359375, "learning_rate": 0.0011319671438895622, "loss": 2.0504, "step": 17720 }, { "epoch": 0.47555281236582225, "grad_norm": 0.310546875, "learning_rate": 0.0011319541456376507, "loss": 1.9945, "step": 17721 }, { "epoch": 0.4755796479175612, "grad_norm": 0.3046875, "learning_rate": 0.001131941146218788, "loss": 2.0183, "step": 17722 }, { "epoch": 0.4756064834693001, "grad_norm": 0.302734375, "learning_rate": 0.0011319281456330032, "loss": 1.9711, "step": 17723 }, { "epoch": 0.47563331902103906, "grad_norm": 0.3046875, "learning_rate": 0.0011319151438803246, "loss": 1.9342, "step": 17724 }, { "epoch": 0.475660154572778, "grad_norm": 0.30859375, "learning_rate": 0.0011319021409607803, "loss": 1.8884, "step": 17725 }, { "epoch": 0.475686990124517, "grad_norm": 0.298828125, "learning_rate": 0.0011318891368743995, "loss": 1.932, "step": 17726 }, { "epoch": 0.4757138256762559, "grad_norm": 0.30859375, "learning_rate": 0.0011318761316212102, "loss": 1.907, "step": 17727 }, { "epoch": 0.47574066122799485, "grad_norm": 0.310546875, "learning_rate": 0.0011318631252012414, "loss": 1.9709, "step": 17728 }, { "epoch": 0.4757674967797338, "grad_norm": 0.326171875, "learning_rate": 0.0011318501176145212, "loss": 2.0106, "step": 17729 }, { "epoch": 0.4757943323314727, "grad_norm": 0.32421875, "learning_rate": 0.0011318371088610783, "loss": 2.0327, "step": 17730 }, { "epoch": 0.47582116788321166, "grad_norm": 0.330078125, "learning_rate": 0.0011318240989409414, "loss": 2.0711, "step": 17731 }, { "epoch": 0.4758480034349506, "grad_norm": 0.318359375, "learning_rate": 0.0011318110878541388, "loss": 2.0129, "step": 17732 }, { "epoch": 0.4758748389866896, "grad_norm": 0.30859375, "learning_rate": 0.0011317980756006991, "loss": 1.9363, "step": 17733 }, { "epoch": 0.4759016745384285, "grad_norm": 0.318359375, "learning_rate": 0.001131785062180651, "loss": 1.9794, "step": 17734 }, { "epoch": 0.47592851009016746, "grad_norm": 0.302734375, "learning_rate": 0.001131772047594023, "loss": 1.905, "step": 17735 }, { "epoch": 0.4759553456419064, "grad_norm": 0.32421875, "learning_rate": 0.0011317590318408433, "loss": 1.9785, "step": 17736 }, { "epoch": 0.4759821811936453, "grad_norm": 0.31640625, "learning_rate": 0.0011317460149211408, "loss": 2.0057, "step": 17737 }, { "epoch": 0.47600901674538426, "grad_norm": 0.294921875, "learning_rate": 0.0011317329968349441, "loss": 1.8, "step": 17738 }, { "epoch": 0.47603585229712325, "grad_norm": 0.3125, "learning_rate": 0.0011317199775822816, "loss": 1.929, "step": 17739 }, { "epoch": 0.4760626878488622, "grad_norm": 0.298828125, "learning_rate": 0.001131706957163182, "loss": 1.8508, "step": 17740 }, { "epoch": 0.4760895234006011, "grad_norm": 0.3125, "learning_rate": 0.0011316939355776737, "loss": 1.9809, "step": 17741 }, { "epoch": 0.47611635895234006, "grad_norm": 0.318359375, "learning_rate": 0.0011316809128257852, "loss": 2.0061, "step": 17742 }, { "epoch": 0.476143194504079, "grad_norm": 0.318359375, "learning_rate": 0.0011316678889075453, "loss": 1.9206, "step": 17743 }, { "epoch": 0.4761700300558179, "grad_norm": 0.306640625, "learning_rate": 0.0011316548638229825, "loss": 1.921, "step": 17744 }, { "epoch": 0.4761968656075569, "grad_norm": 0.328125, "learning_rate": 0.0011316418375721252, "loss": 2.0126, "step": 17745 }, { "epoch": 0.47622370115929585, "grad_norm": 0.314453125, "learning_rate": 0.0011316288101550023, "loss": 1.9232, "step": 17746 }, { "epoch": 0.4762505367110348, "grad_norm": 0.310546875, "learning_rate": 0.0011316157815716421, "loss": 1.8842, "step": 17747 }, { "epoch": 0.4762773722627737, "grad_norm": 0.31640625, "learning_rate": 0.0011316027518220733, "loss": 1.9594, "step": 17748 }, { "epoch": 0.47630420781451266, "grad_norm": 0.314453125, "learning_rate": 0.0011315897209063246, "loss": 1.8921, "step": 17749 }, { "epoch": 0.4763310433662516, "grad_norm": 0.3125, "learning_rate": 0.0011315766888244242, "loss": 1.9275, "step": 17750 }, { "epoch": 0.47635787891799053, "grad_norm": 0.3203125, "learning_rate": 0.001131563655576401, "loss": 2.0079, "step": 17751 }, { "epoch": 0.4763847144697295, "grad_norm": 0.31640625, "learning_rate": 0.0011315506211622835, "loss": 1.952, "step": 17752 }, { "epoch": 0.47641155002146846, "grad_norm": 0.3046875, "learning_rate": 0.0011315375855821006, "loss": 1.8711, "step": 17753 }, { "epoch": 0.4764383855732074, "grad_norm": 0.302734375, "learning_rate": 0.0011315245488358805, "loss": 1.8962, "step": 17754 }, { "epoch": 0.4764652211249463, "grad_norm": 0.3046875, "learning_rate": 0.0011315115109236517, "loss": 1.9004, "step": 17755 }, { "epoch": 0.47649205667668526, "grad_norm": 0.314453125, "learning_rate": 0.001131498471845443, "loss": 1.9628, "step": 17756 }, { "epoch": 0.4765188922284242, "grad_norm": 0.31640625, "learning_rate": 0.0011314854316012834, "loss": 1.9724, "step": 17757 }, { "epoch": 0.4765457277801632, "grad_norm": 0.3125, "learning_rate": 0.0011314723901912009, "loss": 1.8732, "step": 17758 }, { "epoch": 0.4765725633319021, "grad_norm": 0.322265625, "learning_rate": 0.0011314593476152244, "loss": 1.9352, "step": 17759 }, { "epoch": 0.47659939888364106, "grad_norm": 0.3203125, "learning_rate": 0.0011314463038733821, "loss": 1.9896, "step": 17760 }, { "epoch": 0.47662623443538, "grad_norm": 0.310546875, "learning_rate": 0.0011314332589657035, "loss": 1.8054, "step": 17761 }, { "epoch": 0.4766530699871189, "grad_norm": 0.31640625, "learning_rate": 0.0011314202128922164, "loss": 1.9255, "step": 17762 }, { "epoch": 0.47667990553885786, "grad_norm": 0.3203125, "learning_rate": 0.0011314071656529498, "loss": 1.9763, "step": 17763 }, { "epoch": 0.4767067410905968, "grad_norm": 0.306640625, "learning_rate": 0.001131394117247932, "loss": 1.8581, "step": 17764 }, { "epoch": 0.4767335766423358, "grad_norm": 0.314453125, "learning_rate": 0.001131381067677192, "loss": 1.9489, "step": 17765 }, { "epoch": 0.4767604121940747, "grad_norm": 0.3203125, "learning_rate": 0.0011313680169407585, "loss": 1.8934, "step": 17766 }, { "epoch": 0.47678724774581366, "grad_norm": 0.3125, "learning_rate": 0.0011313549650386597, "loss": 1.8751, "step": 17767 }, { "epoch": 0.4768140832975526, "grad_norm": 0.322265625, "learning_rate": 0.0011313419119709242, "loss": 1.9463, "step": 17768 }, { "epoch": 0.47684091884929153, "grad_norm": 0.31640625, "learning_rate": 0.001131328857737581, "loss": 1.9064, "step": 17769 }, { "epoch": 0.47686775440103046, "grad_norm": 0.328125, "learning_rate": 0.0011313158023386588, "loss": 2.0028, "step": 17770 }, { "epoch": 0.47689458995276945, "grad_norm": 0.3203125, "learning_rate": 0.001131302745774186, "loss": 1.8292, "step": 17771 }, { "epoch": 0.4769214255045084, "grad_norm": 0.3203125, "learning_rate": 0.0011312896880441912, "loss": 1.8831, "step": 17772 }, { "epoch": 0.4769482610562473, "grad_norm": 0.326171875, "learning_rate": 0.001131276629148703, "loss": 1.9557, "step": 17773 }, { "epoch": 0.47697509660798626, "grad_norm": 0.302734375, "learning_rate": 0.0011312635690877504, "loss": 1.76, "step": 17774 }, { "epoch": 0.4770019321597252, "grad_norm": 0.310546875, "learning_rate": 0.0011312505078613618, "loss": 1.8818, "step": 17775 }, { "epoch": 0.47702876771146413, "grad_norm": 0.30078125, "learning_rate": 0.001131237445469566, "loss": 1.8039, "step": 17776 }, { "epoch": 0.47705560326320307, "grad_norm": 0.318359375, "learning_rate": 0.0011312243819123912, "loss": 1.9122, "step": 17777 }, { "epoch": 0.47708243881494206, "grad_norm": 0.322265625, "learning_rate": 0.0011312113171898666, "loss": 1.95, "step": 17778 }, { "epoch": 0.477109274366681, "grad_norm": 0.328125, "learning_rate": 0.0011311982513020206, "loss": 1.9729, "step": 17779 }, { "epoch": 0.4771361099184199, "grad_norm": 0.30859375, "learning_rate": 0.001131185184248882, "loss": 1.8369, "step": 17780 }, { "epoch": 0.47716294547015886, "grad_norm": 0.314453125, "learning_rate": 0.0011311721160304792, "loss": 1.8031, "step": 17781 }, { "epoch": 0.4771897810218978, "grad_norm": 0.3125, "learning_rate": 0.0011311590466468412, "loss": 1.9009, "step": 17782 }, { "epoch": 0.47721661657363673, "grad_norm": 0.314453125, "learning_rate": 0.0011311459760979966, "loss": 1.8843, "step": 17783 }, { "epoch": 0.4772434521253757, "grad_norm": 0.30859375, "learning_rate": 0.001131132904383974, "loss": 1.8367, "step": 17784 }, { "epoch": 0.47727028767711466, "grad_norm": 0.328125, "learning_rate": 0.0011311198315048017, "loss": 1.9615, "step": 17785 }, { "epoch": 0.4772971232288536, "grad_norm": 0.3046875, "learning_rate": 0.0011311067574605092, "loss": 1.7893, "step": 17786 }, { "epoch": 0.4773239587805925, "grad_norm": 0.3203125, "learning_rate": 0.0011310936822511244, "loss": 1.933, "step": 17787 }, { "epoch": 0.47735079433233146, "grad_norm": 0.3203125, "learning_rate": 0.0011310806058766765, "loss": 1.8532, "step": 17788 }, { "epoch": 0.4773776298840704, "grad_norm": 0.322265625, "learning_rate": 0.001131067528337194, "loss": 1.9236, "step": 17789 }, { "epoch": 0.47740446543580933, "grad_norm": 0.3125, "learning_rate": 0.0011310544496327057, "loss": 1.8791, "step": 17790 }, { "epoch": 0.4774313009875483, "grad_norm": 0.31640625, "learning_rate": 0.0011310413697632397, "loss": 1.9083, "step": 17791 }, { "epoch": 0.47745813653928726, "grad_norm": 0.314453125, "learning_rate": 0.0011310282887288256, "loss": 1.9139, "step": 17792 }, { "epoch": 0.4774849720910262, "grad_norm": 0.298828125, "learning_rate": 0.0011310152065294916, "loss": 1.8217, "step": 17793 }, { "epoch": 0.47751180764276513, "grad_norm": 0.306640625, "learning_rate": 0.0011310021231652663, "loss": 1.799, "step": 17794 }, { "epoch": 0.47753864319450406, "grad_norm": 0.330078125, "learning_rate": 0.0011309890386361787, "loss": 2.004, "step": 17795 }, { "epoch": 0.477565478746243, "grad_norm": 0.328125, "learning_rate": 0.0011309759529422574, "loss": 1.9036, "step": 17796 }, { "epoch": 0.477592314297982, "grad_norm": 0.31640625, "learning_rate": 0.001130962866083531, "loss": 1.829, "step": 17797 }, { "epoch": 0.4776191498497209, "grad_norm": 0.314453125, "learning_rate": 0.0011309497780600283, "loss": 1.8953, "step": 17798 }, { "epoch": 0.47764598540145986, "grad_norm": 0.32421875, "learning_rate": 0.001130936688871778, "loss": 1.8814, "step": 17799 }, { "epoch": 0.4776728209531988, "grad_norm": 0.314453125, "learning_rate": 0.0011309235985188088, "loss": 1.8511, "step": 17800 }, { "epoch": 0.47769965650493773, "grad_norm": 0.3203125, "learning_rate": 0.0011309105070011493, "loss": 1.8763, "step": 17801 }, { "epoch": 0.47772649205667667, "grad_norm": 0.32421875, "learning_rate": 0.0011308974143188287, "loss": 1.9283, "step": 17802 }, { "epoch": 0.47775332760841566, "grad_norm": 0.345703125, "learning_rate": 0.001130884320471875, "loss": 2.0326, "step": 17803 }, { "epoch": 0.4777801631601546, "grad_norm": 0.32421875, "learning_rate": 0.0011308712254603174, "loss": 1.8445, "step": 17804 }, { "epoch": 0.4778069987118935, "grad_norm": 0.318359375, "learning_rate": 0.0011308581292841847, "loss": 1.8433, "step": 17805 }, { "epoch": 0.47783383426363246, "grad_norm": 0.318359375, "learning_rate": 0.001130845031943505, "loss": 1.8662, "step": 17806 }, { "epoch": 0.4778606698153714, "grad_norm": 0.3125, "learning_rate": 0.0011308319334383079, "loss": 1.7718, "step": 17807 }, { "epoch": 0.47788750536711033, "grad_norm": 0.3359375, "learning_rate": 0.0011308188337686214, "loss": 1.9266, "step": 17808 }, { "epoch": 0.47791434091884927, "grad_norm": 0.318359375, "learning_rate": 0.001130805732934475, "loss": 1.8851, "step": 17809 }, { "epoch": 0.47794117647058826, "grad_norm": 0.328125, "learning_rate": 0.0011307926309358967, "loss": 1.8561, "step": 17810 }, { "epoch": 0.4779680120223272, "grad_norm": 0.31640625, "learning_rate": 0.0011307795277729154, "loss": 1.8701, "step": 17811 }, { "epoch": 0.47799484757406613, "grad_norm": 0.330078125, "learning_rate": 0.0011307664234455601, "loss": 1.8322, "step": 17812 }, { "epoch": 0.47802168312580506, "grad_norm": 0.3125, "learning_rate": 0.0011307533179538594, "loss": 1.7488, "step": 17813 }, { "epoch": 0.478048518677544, "grad_norm": 0.326171875, "learning_rate": 0.0011307402112978422, "loss": 1.8028, "step": 17814 }, { "epoch": 0.47807535422928293, "grad_norm": 0.337890625, "learning_rate": 0.0011307271034775369, "loss": 1.9183, "step": 17815 }, { "epoch": 0.4781021897810219, "grad_norm": 0.357421875, "learning_rate": 0.0011307139944929728, "loss": 2.0023, "step": 17816 }, { "epoch": 0.47812902533276086, "grad_norm": 0.388671875, "learning_rate": 0.001130700884344178, "loss": 2.099, "step": 17817 }, { "epoch": 0.4781558608844998, "grad_norm": 0.3828125, "learning_rate": 0.0011306877730311816, "loss": 2.1747, "step": 17818 }, { "epoch": 0.47818269643623873, "grad_norm": 0.37890625, "learning_rate": 0.0011306746605540127, "loss": 2.2684, "step": 17819 }, { "epoch": 0.47820953198797767, "grad_norm": 0.333984375, "learning_rate": 0.0011306615469126994, "loss": 2.0915, "step": 17820 }, { "epoch": 0.4782363675397166, "grad_norm": 0.345703125, "learning_rate": 0.001130648432107271, "loss": 2.2282, "step": 17821 }, { "epoch": 0.47826320309145554, "grad_norm": 0.328125, "learning_rate": 0.0011306353161377558, "loss": 2.1196, "step": 17822 }, { "epoch": 0.4782900386431945, "grad_norm": 0.330078125, "learning_rate": 0.001130622199004183, "loss": 2.1348, "step": 17823 }, { "epoch": 0.47831687419493346, "grad_norm": 0.314453125, "learning_rate": 0.0011306090807065813, "loss": 2.0814, "step": 17824 }, { "epoch": 0.4783437097466724, "grad_norm": 0.30859375, "learning_rate": 0.0011305959612449793, "loss": 2.1043, "step": 17825 }, { "epoch": 0.47837054529841133, "grad_norm": 0.326171875, "learning_rate": 0.0011305828406194059, "loss": 2.1814, "step": 17826 }, { "epoch": 0.47839738085015027, "grad_norm": 0.326171875, "learning_rate": 0.0011305697188298898, "loss": 2.1558, "step": 17827 }, { "epoch": 0.4784242164018892, "grad_norm": 0.318359375, "learning_rate": 0.0011305565958764597, "loss": 2.1355, "step": 17828 }, { "epoch": 0.4784510519536282, "grad_norm": 0.326171875, "learning_rate": 0.0011305434717591447, "loss": 2.0711, "step": 17829 }, { "epoch": 0.4784778875053671, "grad_norm": 0.32421875, "learning_rate": 0.0011305303464779733, "loss": 2.0714, "step": 17830 }, { "epoch": 0.47850472305710606, "grad_norm": 0.32421875, "learning_rate": 0.0011305172200329744, "loss": 2.1508, "step": 17831 }, { "epoch": 0.478531558608845, "grad_norm": 0.31640625, "learning_rate": 0.0011305040924241769, "loss": 2.0786, "step": 17832 }, { "epoch": 0.47855839416058393, "grad_norm": 0.3125, "learning_rate": 0.0011304909636516095, "loss": 2.085, "step": 17833 }, { "epoch": 0.47858522971232287, "grad_norm": 0.3125, "learning_rate": 0.001130477833715301, "loss": 2.0422, "step": 17834 }, { "epoch": 0.4786120652640618, "grad_norm": 0.302734375, "learning_rate": 0.00113046470261528, "loss": 2.0424, "step": 17835 }, { "epoch": 0.4786389008158008, "grad_norm": 0.314453125, "learning_rate": 0.0011304515703515758, "loss": 2.0938, "step": 17836 }, { "epoch": 0.47866573636753973, "grad_norm": 0.310546875, "learning_rate": 0.0011304384369242167, "loss": 2.0646, "step": 17837 }, { "epoch": 0.47869257191927866, "grad_norm": 0.310546875, "learning_rate": 0.001130425302333232, "loss": 2.0635, "step": 17838 }, { "epoch": 0.4787194074710176, "grad_norm": 0.3125, "learning_rate": 0.00113041216657865, "loss": 2.0986, "step": 17839 }, { "epoch": 0.47874624302275653, "grad_norm": 0.318359375, "learning_rate": 0.0011303990296604998, "loss": 2.0909, "step": 17840 }, { "epoch": 0.47877307857449547, "grad_norm": 0.31640625, "learning_rate": 0.0011303858915788102, "loss": 2.0257, "step": 17841 }, { "epoch": 0.47879991412623446, "grad_norm": 0.314453125, "learning_rate": 0.00113037275233361, "loss": 2.1118, "step": 17842 }, { "epoch": 0.4788267496779734, "grad_norm": 0.322265625, "learning_rate": 0.0011303596119249279, "loss": 2.143, "step": 17843 }, { "epoch": 0.47885358522971233, "grad_norm": 0.31640625, "learning_rate": 0.001130346470352793, "loss": 2.0476, "step": 17844 }, { "epoch": 0.47888042078145127, "grad_norm": 0.314453125, "learning_rate": 0.001130333327617234, "loss": 2.0411, "step": 17845 }, { "epoch": 0.4789072563331902, "grad_norm": 0.3203125, "learning_rate": 0.0011303201837182795, "loss": 2.0804, "step": 17846 }, { "epoch": 0.47893409188492914, "grad_norm": 0.306640625, "learning_rate": 0.0011303070386559585, "loss": 2.0685, "step": 17847 }, { "epoch": 0.47896092743666807, "grad_norm": 0.310546875, "learning_rate": 0.0011302938924303001, "loss": 2.0339, "step": 17848 }, { "epoch": 0.47898776298840706, "grad_norm": 0.314453125, "learning_rate": 0.001130280745041333, "loss": 2.0497, "step": 17849 }, { "epoch": 0.479014598540146, "grad_norm": 0.31640625, "learning_rate": 0.0011302675964890857, "loss": 2.0999, "step": 17850 }, { "epoch": 0.47904143409188493, "grad_norm": 0.314453125, "learning_rate": 0.0011302544467735873, "loss": 2.0059, "step": 17851 }, { "epoch": 0.47906826964362387, "grad_norm": 0.318359375, "learning_rate": 0.001130241295894867, "loss": 2.0957, "step": 17852 }, { "epoch": 0.4790951051953628, "grad_norm": 0.326171875, "learning_rate": 0.001130228143852953, "loss": 2.0955, "step": 17853 }, { "epoch": 0.47912194074710174, "grad_norm": 0.310546875, "learning_rate": 0.0011302149906478743, "loss": 1.9601, "step": 17854 }, { "epoch": 0.47914877629884073, "grad_norm": 0.314453125, "learning_rate": 0.0011302018362796603, "loss": 2.0146, "step": 17855 }, { "epoch": 0.47917561185057966, "grad_norm": 0.30859375, "learning_rate": 0.0011301886807483393, "loss": 2.0253, "step": 17856 }, { "epoch": 0.4792024474023186, "grad_norm": 0.31640625, "learning_rate": 0.0011301755240539402, "loss": 2.0471, "step": 17857 }, { "epoch": 0.47922928295405753, "grad_norm": 0.3125, "learning_rate": 0.0011301623661964918, "loss": 2.0367, "step": 17858 }, { "epoch": 0.47925611850579647, "grad_norm": 0.31640625, "learning_rate": 0.0011301492071760237, "loss": 2.035, "step": 17859 }, { "epoch": 0.4792829540575354, "grad_norm": 0.3046875, "learning_rate": 0.001130136046992564, "loss": 1.9815, "step": 17860 }, { "epoch": 0.47930978960927434, "grad_norm": 0.31640625, "learning_rate": 0.0011301228856461416, "loss": 1.9935, "step": 17861 }, { "epoch": 0.47933662516101333, "grad_norm": 0.314453125, "learning_rate": 0.0011301097231367855, "loss": 2.0333, "step": 17862 }, { "epoch": 0.47936346071275227, "grad_norm": 0.3125, "learning_rate": 0.0011300965594645248, "loss": 2.0044, "step": 17863 }, { "epoch": 0.4793902962644912, "grad_norm": 0.31640625, "learning_rate": 0.0011300833946293882, "loss": 2.0317, "step": 17864 }, { "epoch": 0.47941713181623014, "grad_norm": 0.314453125, "learning_rate": 0.0011300702286314046, "loss": 2.0325, "step": 17865 }, { "epoch": 0.47944396736796907, "grad_norm": 0.306640625, "learning_rate": 0.0011300570614706028, "loss": 2.0191, "step": 17866 }, { "epoch": 0.479470802919708, "grad_norm": 0.32421875, "learning_rate": 0.001130043893147012, "loss": 2.0318, "step": 17867 }, { "epoch": 0.479497638471447, "grad_norm": 0.31640625, "learning_rate": 0.0011300307236606604, "loss": 1.9819, "step": 17868 }, { "epoch": 0.47952447402318593, "grad_norm": 0.30859375, "learning_rate": 0.0011300175530115778, "loss": 1.9932, "step": 17869 }, { "epoch": 0.47955130957492487, "grad_norm": 0.3046875, "learning_rate": 0.0011300043811997922, "loss": 1.9517, "step": 17870 }, { "epoch": 0.4795781451266638, "grad_norm": 0.322265625, "learning_rate": 0.0011299912082253331, "loss": 2.0756, "step": 17871 }, { "epoch": 0.47960498067840274, "grad_norm": 0.310546875, "learning_rate": 0.0011299780340882292, "loss": 2.0166, "step": 17872 }, { "epoch": 0.47963181623014167, "grad_norm": 0.3203125, "learning_rate": 0.0011299648587885095, "loss": 2.0168, "step": 17873 }, { "epoch": 0.47965865178188066, "grad_norm": 0.33984375, "learning_rate": 0.0011299516823262026, "loss": 1.9809, "step": 17874 }, { "epoch": 0.4796854873336196, "grad_norm": 0.314453125, "learning_rate": 0.0011299385047013378, "loss": 1.9815, "step": 17875 }, { "epoch": 0.47971232288535853, "grad_norm": 0.310546875, "learning_rate": 0.001129925325913944, "loss": 2.0383, "step": 17876 }, { "epoch": 0.47973915843709747, "grad_norm": 0.314453125, "learning_rate": 0.0011299121459640497, "loss": 2.0552, "step": 17877 }, { "epoch": 0.4797659939888364, "grad_norm": 0.314453125, "learning_rate": 0.0011298989648516838, "loss": 1.9498, "step": 17878 }, { "epoch": 0.47979282954057534, "grad_norm": 0.310546875, "learning_rate": 0.0011298857825768759, "loss": 1.9707, "step": 17879 }, { "epoch": 0.4798196650923143, "grad_norm": 0.3203125, "learning_rate": 0.0011298725991396544, "loss": 2.075, "step": 17880 }, { "epoch": 0.47984650064405326, "grad_norm": 0.30078125, "learning_rate": 0.0011298594145400483, "loss": 1.9287, "step": 17881 }, { "epoch": 0.4798733361957922, "grad_norm": 0.32421875, "learning_rate": 0.0011298462287780864, "loss": 2.1281, "step": 17882 }, { "epoch": 0.47990017174753113, "grad_norm": 0.31640625, "learning_rate": 0.0011298330418537978, "loss": 2.0091, "step": 17883 }, { "epoch": 0.47992700729927007, "grad_norm": 0.322265625, "learning_rate": 0.0011298198537672114, "loss": 2.1206, "step": 17884 }, { "epoch": 0.479953842851009, "grad_norm": 0.3125, "learning_rate": 0.0011298066645183563, "loss": 1.9942, "step": 17885 }, { "epoch": 0.47998067840274794, "grad_norm": 0.3125, "learning_rate": 0.001129793474107261, "loss": 1.9703, "step": 17886 }, { "epoch": 0.48000751395448693, "grad_norm": 0.3203125, "learning_rate": 0.001129780282533955, "loss": 1.9822, "step": 17887 }, { "epoch": 0.48003434950622587, "grad_norm": 0.306640625, "learning_rate": 0.0011297670897984667, "loss": 2.0127, "step": 17888 }, { "epoch": 0.4800611850579648, "grad_norm": 0.32421875, "learning_rate": 0.0011297538959008251, "loss": 2.0955, "step": 17889 }, { "epoch": 0.4800611850579648, "eval_loss": 2.720414161682129, "eval_runtime": 588.5775, "eval_samples_per_second": 81.447, "eval_steps_per_second": 20.363, "step": 17889 }, { "epoch": 0.48008802060970374, "grad_norm": 0.314453125, "learning_rate": 0.0011297407008410594, "loss": 1.9597, "step": 17890 }, { "epoch": 0.48011485616144267, "grad_norm": 0.314453125, "learning_rate": 0.0011297275046191987, "loss": 2.0008, "step": 17891 }, { "epoch": 0.4801416917131816, "grad_norm": 0.3203125, "learning_rate": 0.0011297143072352715, "loss": 2.0368, "step": 17892 }, { "epoch": 0.48016852726492054, "grad_norm": 0.310546875, "learning_rate": 0.001129701108689307, "loss": 1.9577, "step": 17893 }, { "epoch": 0.48019536281665953, "grad_norm": 0.3046875, "learning_rate": 0.001129687908981334, "loss": 1.9209, "step": 17894 }, { "epoch": 0.48022219836839847, "grad_norm": 0.30859375, "learning_rate": 0.0011296747081113816, "loss": 1.9523, "step": 17895 }, { "epoch": 0.4802490339201374, "grad_norm": 0.32421875, "learning_rate": 0.001129661506079479, "loss": 2.0316, "step": 17896 }, { "epoch": 0.48027586947187634, "grad_norm": 0.3125, "learning_rate": 0.0011296483028856547, "loss": 1.9609, "step": 17897 }, { "epoch": 0.4803027050236153, "grad_norm": 0.31640625, "learning_rate": 0.0011296350985299378, "loss": 1.9467, "step": 17898 }, { "epoch": 0.4803295405753542, "grad_norm": 0.314453125, "learning_rate": 0.0011296218930123573, "loss": 2.0362, "step": 17899 }, { "epoch": 0.4803563761270932, "grad_norm": 0.31640625, "learning_rate": 0.0011296086863329421, "loss": 1.9962, "step": 17900 }, { "epoch": 0.48038321167883213, "grad_norm": 0.3125, "learning_rate": 0.0011295954784917215, "loss": 1.8946, "step": 17901 }, { "epoch": 0.48041004723057107, "grad_norm": 0.33203125, "learning_rate": 0.0011295822694887242, "loss": 2.0413, "step": 17902 }, { "epoch": 0.48043688278231, "grad_norm": 0.310546875, "learning_rate": 0.0011295690593239789, "loss": 1.9935, "step": 17903 }, { "epoch": 0.48046371833404894, "grad_norm": 0.310546875, "learning_rate": 0.0011295558479975153, "loss": 1.904, "step": 17904 }, { "epoch": 0.4804905538857879, "grad_norm": 0.306640625, "learning_rate": 0.0011295426355093616, "loss": 1.8965, "step": 17905 }, { "epoch": 0.4805173894375268, "grad_norm": 0.32421875, "learning_rate": 0.0011295294218595475, "loss": 2.0187, "step": 17906 }, { "epoch": 0.4805442249892658, "grad_norm": 0.314453125, "learning_rate": 0.0011295162070481012, "loss": 1.9797, "step": 17907 }, { "epoch": 0.48057106054100474, "grad_norm": 0.3125, "learning_rate": 0.0011295029910750524, "loss": 1.9214, "step": 17908 }, { "epoch": 0.48059789609274367, "grad_norm": 0.3125, "learning_rate": 0.0011294897739404298, "loss": 1.9822, "step": 17909 }, { "epoch": 0.4806247316444826, "grad_norm": 0.298828125, "learning_rate": 0.0011294765556442625, "loss": 1.8419, "step": 17910 }, { "epoch": 0.48065156719622154, "grad_norm": 0.3046875, "learning_rate": 0.0011294633361865793, "loss": 1.9325, "step": 17911 }, { "epoch": 0.4806784027479605, "grad_norm": 0.318359375, "learning_rate": 0.0011294501155674093, "loss": 2.0035, "step": 17912 }, { "epoch": 0.48070523829969947, "grad_norm": 0.3203125, "learning_rate": 0.0011294368937867815, "loss": 2.0256, "step": 17913 }, { "epoch": 0.4807320738514384, "grad_norm": 0.32421875, "learning_rate": 0.001129423670844725, "loss": 2.038, "step": 17914 }, { "epoch": 0.48075890940317734, "grad_norm": 0.322265625, "learning_rate": 0.0011294104467412687, "loss": 1.9806, "step": 17915 }, { "epoch": 0.48078574495491627, "grad_norm": 0.33203125, "learning_rate": 0.0011293972214764419, "loss": 2.0484, "step": 17916 }, { "epoch": 0.4808125805066552, "grad_norm": 0.30859375, "learning_rate": 0.0011293839950502727, "loss": 1.8885, "step": 17917 }, { "epoch": 0.48083941605839414, "grad_norm": 0.322265625, "learning_rate": 0.0011293707674627914, "loss": 1.941, "step": 17918 }, { "epoch": 0.4808662516101331, "grad_norm": 0.310546875, "learning_rate": 0.001129357538714026, "loss": 1.9086, "step": 17919 }, { "epoch": 0.48089308716187207, "grad_norm": 0.326171875, "learning_rate": 0.001129344308804006, "loss": 2.0715, "step": 17920 }, { "epoch": 0.480919922713611, "grad_norm": 0.318359375, "learning_rate": 0.0011293310777327605, "loss": 1.9766, "step": 17921 }, { "epoch": 0.48094675826534994, "grad_norm": 0.3203125, "learning_rate": 0.001129317845500318, "loss": 1.9847, "step": 17922 }, { "epoch": 0.4809735938170889, "grad_norm": 0.3203125, "learning_rate": 0.001129304612106708, "loss": 2.026, "step": 17923 }, { "epoch": 0.4810004293688278, "grad_norm": 0.31640625, "learning_rate": 0.0011292913775519595, "loss": 1.9391, "step": 17924 }, { "epoch": 0.48102726492056674, "grad_norm": 0.31640625, "learning_rate": 0.0011292781418361015, "loss": 1.9666, "step": 17925 }, { "epoch": 0.48105410047230573, "grad_norm": 0.3125, "learning_rate": 0.001129264904959163, "loss": 1.938, "step": 17926 }, { "epoch": 0.48108093602404467, "grad_norm": 0.310546875, "learning_rate": 0.0011292516669211728, "loss": 1.8577, "step": 17927 }, { "epoch": 0.4811077715757836, "grad_norm": 0.310546875, "learning_rate": 0.00112923842772216, "loss": 1.9529, "step": 17928 }, { "epoch": 0.48113460712752254, "grad_norm": 0.310546875, "learning_rate": 0.0011292251873621542, "loss": 1.8822, "step": 17929 }, { "epoch": 0.4811614426792615, "grad_norm": 0.31640625, "learning_rate": 0.0011292119458411838, "loss": 1.9779, "step": 17930 }, { "epoch": 0.4811882782310004, "grad_norm": 0.318359375, "learning_rate": 0.0011291987031592783, "loss": 1.9075, "step": 17931 }, { "epoch": 0.48121511378273935, "grad_norm": 0.310546875, "learning_rate": 0.0011291854593164664, "loss": 1.8897, "step": 17932 }, { "epoch": 0.48124194933447834, "grad_norm": 0.326171875, "learning_rate": 0.0011291722143127773, "loss": 1.9091, "step": 17933 }, { "epoch": 0.48126878488621727, "grad_norm": 0.314453125, "learning_rate": 0.0011291589681482399, "loss": 1.9806, "step": 17934 }, { "epoch": 0.4812956204379562, "grad_norm": 0.310546875, "learning_rate": 0.0011291457208228836, "loss": 1.9219, "step": 17935 }, { "epoch": 0.48132245598969514, "grad_norm": 0.32421875, "learning_rate": 0.0011291324723367374, "loss": 1.9764, "step": 17936 }, { "epoch": 0.4813492915414341, "grad_norm": 0.31640625, "learning_rate": 0.0011291192226898297, "loss": 1.8944, "step": 17937 }, { "epoch": 0.481376127093173, "grad_norm": 0.322265625, "learning_rate": 0.0011291059718821904, "loss": 1.9582, "step": 17938 }, { "epoch": 0.481402962644912, "grad_norm": 0.326171875, "learning_rate": 0.0011290927199138483, "loss": 1.9945, "step": 17939 }, { "epoch": 0.48142979819665094, "grad_norm": 0.3125, "learning_rate": 0.0011290794667848324, "loss": 1.9161, "step": 17940 }, { "epoch": 0.4814566337483899, "grad_norm": 0.314453125, "learning_rate": 0.001129066212495172, "loss": 1.9537, "step": 17941 }, { "epoch": 0.4814834693001288, "grad_norm": 0.322265625, "learning_rate": 0.0011290529570448958, "loss": 1.8889, "step": 17942 }, { "epoch": 0.48151030485186774, "grad_norm": 0.314453125, "learning_rate": 0.0011290397004340331, "loss": 1.9049, "step": 17943 }, { "epoch": 0.4815371404036067, "grad_norm": 0.322265625, "learning_rate": 0.001129026442662613, "loss": 1.9245, "step": 17944 }, { "epoch": 0.48156397595534567, "grad_norm": 0.3125, "learning_rate": 0.0011290131837306643, "loss": 1.9295, "step": 17945 }, { "epoch": 0.4815908115070846, "grad_norm": 0.30859375, "learning_rate": 0.0011289999236382164, "loss": 1.89, "step": 17946 }, { "epoch": 0.48161764705882354, "grad_norm": 0.322265625, "learning_rate": 0.0011289866623852983, "loss": 1.8976, "step": 17947 }, { "epoch": 0.4816444826105625, "grad_norm": 0.314453125, "learning_rate": 0.0011289733999719392, "loss": 1.8435, "step": 17948 }, { "epoch": 0.4816713181623014, "grad_norm": 0.310546875, "learning_rate": 0.0011289601363981682, "loss": 1.8446, "step": 17949 }, { "epoch": 0.48169815371404034, "grad_norm": 0.3125, "learning_rate": 0.001128946871664014, "loss": 1.8592, "step": 17950 }, { "epoch": 0.4817249892657793, "grad_norm": 0.31640625, "learning_rate": 0.001128933605769506, "loss": 1.8696, "step": 17951 }, { "epoch": 0.48175182481751827, "grad_norm": 0.31640625, "learning_rate": 0.0011289203387146736, "loss": 1.8984, "step": 17952 }, { "epoch": 0.4817786603692572, "grad_norm": 0.31640625, "learning_rate": 0.0011289070704995452, "loss": 1.8898, "step": 17953 }, { "epoch": 0.48180549592099614, "grad_norm": 0.330078125, "learning_rate": 0.0011288938011241505, "loss": 1.9308, "step": 17954 }, { "epoch": 0.4818323314727351, "grad_norm": 0.3046875, "learning_rate": 0.0011288805305885185, "loss": 1.8578, "step": 17955 }, { "epoch": 0.481859167024474, "grad_norm": 0.330078125, "learning_rate": 0.001128867258892678, "loss": 1.8147, "step": 17956 }, { "epoch": 0.48188600257621295, "grad_norm": 0.3203125, "learning_rate": 0.0011288539860366583, "loss": 1.9302, "step": 17957 }, { "epoch": 0.48191283812795194, "grad_norm": 0.322265625, "learning_rate": 0.0011288407120204887, "loss": 1.8543, "step": 17958 }, { "epoch": 0.48193967367969087, "grad_norm": 0.328125, "learning_rate": 0.0011288274368441982, "loss": 1.8436, "step": 17959 }, { "epoch": 0.4819665092314298, "grad_norm": 0.333984375, "learning_rate": 0.0011288141605078157, "loss": 2.0037, "step": 17960 }, { "epoch": 0.48199334478316874, "grad_norm": 0.314453125, "learning_rate": 0.0011288008830113707, "loss": 1.8427, "step": 17961 }, { "epoch": 0.4820201803349077, "grad_norm": 0.310546875, "learning_rate": 0.001128787604354892, "loss": 1.8721, "step": 17962 }, { "epoch": 0.4820470158866466, "grad_norm": 0.314453125, "learning_rate": 0.001128774324538409, "loss": 1.784, "step": 17963 }, { "epoch": 0.48207385143838555, "grad_norm": 0.33203125, "learning_rate": 0.0011287610435619507, "loss": 1.9257, "step": 17964 }, { "epoch": 0.48210068699012454, "grad_norm": 0.322265625, "learning_rate": 0.0011287477614255461, "loss": 1.8559, "step": 17965 }, { "epoch": 0.4821275225418635, "grad_norm": 0.310546875, "learning_rate": 0.0011287344781292245, "loss": 1.859, "step": 17966 }, { "epoch": 0.4821543580936024, "grad_norm": 0.333984375, "learning_rate": 0.0011287211936730152, "loss": 1.8733, "step": 17967 }, { "epoch": 0.48218119364534134, "grad_norm": 0.31640625, "learning_rate": 0.001128707908056947, "loss": 1.8693, "step": 17968 }, { "epoch": 0.4822080291970803, "grad_norm": 0.32421875, "learning_rate": 0.0011286946212810491, "loss": 1.8797, "step": 17969 }, { "epoch": 0.4822348647488192, "grad_norm": 0.33984375, "learning_rate": 0.0011286813333453508, "loss": 1.9555, "step": 17970 }, { "epoch": 0.4822617003005582, "grad_norm": 0.3828125, "learning_rate": 0.0011286680442498813, "loss": 2.078, "step": 17971 }, { "epoch": 0.48228853585229714, "grad_norm": 0.375, "learning_rate": 0.0011286547539946694, "loss": 2.18, "step": 17972 }, { "epoch": 0.4823153714040361, "grad_norm": 0.3515625, "learning_rate": 0.0011286414625797448, "loss": 2.1516, "step": 17973 }, { "epoch": 0.482342206955775, "grad_norm": 0.326171875, "learning_rate": 0.0011286281700051363, "loss": 2.046, "step": 17974 }, { "epoch": 0.48236904250751395, "grad_norm": 0.34375, "learning_rate": 0.0011286148762708731, "loss": 2.2379, "step": 17975 }, { "epoch": 0.4823958780592529, "grad_norm": 0.33984375, "learning_rate": 0.0011286015813769842, "loss": 2.2341, "step": 17976 }, { "epoch": 0.4824227136109918, "grad_norm": 0.322265625, "learning_rate": 0.0011285882853234992, "loss": 2.0961, "step": 17977 }, { "epoch": 0.4824495491627308, "grad_norm": 0.3125, "learning_rate": 0.001128574988110447, "loss": 1.9916, "step": 17978 }, { "epoch": 0.48247638471446974, "grad_norm": 0.32421875, "learning_rate": 0.0011285616897378566, "loss": 2.1739, "step": 17979 }, { "epoch": 0.4825032202662087, "grad_norm": 0.318359375, "learning_rate": 0.0011285483902057575, "loss": 2.128, "step": 17980 }, { "epoch": 0.4825300558179476, "grad_norm": 0.3046875, "learning_rate": 0.0011285350895141788, "loss": 2.0762, "step": 17981 }, { "epoch": 0.48255689136968655, "grad_norm": 0.32421875, "learning_rate": 0.0011285217876631493, "loss": 2.1126, "step": 17982 }, { "epoch": 0.4825837269214255, "grad_norm": 0.3203125, "learning_rate": 0.0011285084846526987, "loss": 2.1042, "step": 17983 }, { "epoch": 0.4826105624731645, "grad_norm": 0.326171875, "learning_rate": 0.001128495180482856, "loss": 2.1198, "step": 17984 }, { "epoch": 0.4826373980249034, "grad_norm": 0.314453125, "learning_rate": 0.0011284818751536503, "loss": 2.0699, "step": 17985 }, { "epoch": 0.48266423357664234, "grad_norm": 0.318359375, "learning_rate": 0.001128468568665111, "loss": 2.0939, "step": 17986 }, { "epoch": 0.4826910691283813, "grad_norm": 0.310546875, "learning_rate": 0.0011284552610172668, "loss": 1.9827, "step": 17987 }, { "epoch": 0.4827179046801202, "grad_norm": 0.310546875, "learning_rate": 0.0011284419522101475, "loss": 2.0671, "step": 17988 }, { "epoch": 0.48274474023185915, "grad_norm": 0.314453125, "learning_rate": 0.001128428642243782, "loss": 2.0889, "step": 17989 }, { "epoch": 0.4827715757835981, "grad_norm": 0.3046875, "learning_rate": 0.0011284153311181995, "loss": 2.0794, "step": 17990 }, { "epoch": 0.4827984113353371, "grad_norm": 0.314453125, "learning_rate": 0.0011284020188334293, "loss": 2.0961, "step": 17991 }, { "epoch": 0.482825246887076, "grad_norm": 0.314453125, "learning_rate": 0.0011283887053895004, "loss": 2.1229, "step": 17992 }, { "epoch": 0.48285208243881494, "grad_norm": 0.3203125, "learning_rate": 0.001128375390786442, "loss": 2.1191, "step": 17993 }, { "epoch": 0.4828789179905539, "grad_norm": 0.318359375, "learning_rate": 0.0011283620750242838, "loss": 2.0701, "step": 17994 }, { "epoch": 0.4829057535422928, "grad_norm": 0.298828125, "learning_rate": 0.0011283487581030544, "loss": 2.0369, "step": 17995 }, { "epoch": 0.48293258909403175, "grad_norm": 0.314453125, "learning_rate": 0.0011283354400227835, "loss": 2.0552, "step": 17996 }, { "epoch": 0.48295942464577074, "grad_norm": 0.30859375, "learning_rate": 0.0011283221207835, "loss": 2.0753, "step": 17997 }, { "epoch": 0.4829862601975097, "grad_norm": 0.310546875, "learning_rate": 0.001128308800385233, "loss": 2.1054, "step": 17998 }, { "epoch": 0.4830130957492486, "grad_norm": 0.314453125, "learning_rate": 0.001128295478828012, "loss": 2.0974, "step": 17999 }, { "epoch": 0.48303993130098755, "grad_norm": 0.3046875, "learning_rate": 0.0011282821561118663, "loss": 1.9991, "step": 18000 }, { "epoch": 0.4830667668527265, "grad_norm": 0.30859375, "learning_rate": 0.0011282688322368248, "loss": 2.0413, "step": 18001 }, { "epoch": 0.4830936024044654, "grad_norm": 0.3125, "learning_rate": 0.0011282555072029171, "loss": 2.0253, "step": 18002 }, { "epoch": 0.4831204379562044, "grad_norm": 0.30859375, "learning_rate": 0.001128242181010172, "loss": 2.0749, "step": 18003 }, { "epoch": 0.48314727350794334, "grad_norm": 0.302734375, "learning_rate": 0.001128228853658619, "loss": 2.071, "step": 18004 }, { "epoch": 0.4831741090596823, "grad_norm": 0.30859375, "learning_rate": 0.0011282155251482874, "loss": 2.0329, "step": 18005 }, { "epoch": 0.4832009446114212, "grad_norm": 0.30859375, "learning_rate": 0.0011282021954792064, "loss": 2.0878, "step": 18006 }, { "epoch": 0.48322778016316015, "grad_norm": 0.314453125, "learning_rate": 0.001128188864651405, "loss": 2.0522, "step": 18007 }, { "epoch": 0.4832546157148991, "grad_norm": 0.314453125, "learning_rate": 0.0011281755326649128, "loss": 2.1134, "step": 18008 }, { "epoch": 0.483281451266638, "grad_norm": 0.3125, "learning_rate": 0.0011281621995197586, "loss": 2.0387, "step": 18009 }, { "epoch": 0.483308286818377, "grad_norm": 0.306640625, "learning_rate": 0.0011281488652159723, "loss": 2.0919, "step": 18010 }, { "epoch": 0.48333512237011594, "grad_norm": 0.296875, "learning_rate": 0.0011281355297535827, "loss": 1.9509, "step": 18011 }, { "epoch": 0.4833619579218549, "grad_norm": 0.310546875, "learning_rate": 0.001128122193132619, "loss": 2.0619, "step": 18012 }, { "epoch": 0.4833887934735938, "grad_norm": 0.3203125, "learning_rate": 0.0011281088553531105, "loss": 1.9553, "step": 18013 }, { "epoch": 0.48341562902533275, "grad_norm": 0.30859375, "learning_rate": 0.0011280955164150867, "loss": 2.0746, "step": 18014 }, { "epoch": 0.4834424645770717, "grad_norm": 0.306640625, "learning_rate": 0.0011280821763185766, "loss": 2.0267, "step": 18015 }, { "epoch": 0.4834693001288107, "grad_norm": 0.3125, "learning_rate": 0.0011280688350636097, "loss": 2.0457, "step": 18016 }, { "epoch": 0.4834961356805496, "grad_norm": 0.31640625, "learning_rate": 0.0011280554926502149, "loss": 2.0546, "step": 18017 }, { "epoch": 0.48352297123228855, "grad_norm": 0.310546875, "learning_rate": 0.001128042149078422, "loss": 2.0499, "step": 18018 }, { "epoch": 0.4835498067840275, "grad_norm": 0.31640625, "learning_rate": 0.0011280288043482598, "loss": 2.0617, "step": 18019 }, { "epoch": 0.4835766423357664, "grad_norm": 0.314453125, "learning_rate": 0.0011280154584597578, "loss": 2.0025, "step": 18020 }, { "epoch": 0.48360347788750535, "grad_norm": 0.314453125, "learning_rate": 0.0011280021114129452, "loss": 2.0591, "step": 18021 }, { "epoch": 0.4836303134392443, "grad_norm": 0.3125, "learning_rate": 0.0011279887632078514, "loss": 2.0701, "step": 18022 }, { "epoch": 0.4836571489909833, "grad_norm": 0.298828125, "learning_rate": 0.0011279754138445054, "loss": 1.9077, "step": 18023 }, { "epoch": 0.4836839845427222, "grad_norm": 0.314453125, "learning_rate": 0.0011279620633229369, "loss": 2.1036, "step": 18024 }, { "epoch": 0.48371082009446115, "grad_norm": 0.31640625, "learning_rate": 0.0011279487116431749, "loss": 2.0577, "step": 18025 }, { "epoch": 0.4837376556462001, "grad_norm": 0.310546875, "learning_rate": 0.0011279353588052486, "loss": 2.024, "step": 18026 }, { "epoch": 0.483764491197939, "grad_norm": 0.296875, "learning_rate": 0.0011279220048091875, "loss": 1.9396, "step": 18027 }, { "epoch": 0.48379132674967795, "grad_norm": 0.3125, "learning_rate": 0.001127908649655021, "loss": 2.0648, "step": 18028 }, { "epoch": 0.48381816230141694, "grad_norm": 0.310546875, "learning_rate": 0.001127895293342778, "loss": 2.0096, "step": 18029 }, { "epoch": 0.4838449978531559, "grad_norm": 0.310546875, "learning_rate": 0.0011278819358724883, "loss": 1.9703, "step": 18030 }, { "epoch": 0.4838718334048948, "grad_norm": 0.30859375, "learning_rate": 0.0011278685772441808, "loss": 2.0324, "step": 18031 }, { "epoch": 0.48389866895663375, "grad_norm": 0.30859375, "learning_rate": 0.001127855217457885, "loss": 1.9777, "step": 18032 }, { "epoch": 0.4839255045083727, "grad_norm": 0.3125, "learning_rate": 0.0011278418565136299, "loss": 1.9986, "step": 18033 }, { "epoch": 0.4839523400601116, "grad_norm": 0.3125, "learning_rate": 0.0011278284944114454, "loss": 2.0657, "step": 18034 }, { "epoch": 0.48397917561185055, "grad_norm": 0.314453125, "learning_rate": 0.0011278151311513603, "loss": 2.0223, "step": 18035 }, { "epoch": 0.48400601116358954, "grad_norm": 0.314453125, "learning_rate": 0.0011278017667334039, "loss": 2.0566, "step": 18036 }, { "epoch": 0.4840328467153285, "grad_norm": 0.30859375, "learning_rate": 0.001127788401157606, "loss": 1.9795, "step": 18037 }, { "epoch": 0.4840596822670674, "grad_norm": 0.306640625, "learning_rate": 0.0011277750344239957, "loss": 2.0441, "step": 18038 }, { "epoch": 0.48408651781880635, "grad_norm": 0.30859375, "learning_rate": 0.0011277616665326018, "loss": 1.9132, "step": 18039 }, { "epoch": 0.4841133533705453, "grad_norm": 0.306640625, "learning_rate": 0.0011277482974834544, "loss": 1.9657, "step": 18040 }, { "epoch": 0.4841401889222842, "grad_norm": 0.314453125, "learning_rate": 0.0011277349272765826, "loss": 2.0381, "step": 18041 }, { "epoch": 0.4841670244740232, "grad_norm": 0.3125, "learning_rate": 0.0011277215559120156, "loss": 1.9933, "step": 18042 }, { "epoch": 0.48419386002576215, "grad_norm": 0.302734375, "learning_rate": 0.0011277081833897826, "loss": 1.9746, "step": 18043 }, { "epoch": 0.4842206955775011, "grad_norm": 0.3203125, "learning_rate": 0.0011276948097099133, "loss": 1.9724, "step": 18044 }, { "epoch": 0.48424753112924, "grad_norm": 0.314453125, "learning_rate": 0.0011276814348724364, "loss": 2.008, "step": 18045 }, { "epoch": 0.48427436668097895, "grad_norm": 0.306640625, "learning_rate": 0.001127668058877382, "loss": 1.9958, "step": 18046 }, { "epoch": 0.4843012022327179, "grad_norm": 0.302734375, "learning_rate": 0.0011276546817247793, "loss": 1.9155, "step": 18047 }, { "epoch": 0.4843280377844568, "grad_norm": 0.318359375, "learning_rate": 0.001127641303414657, "loss": 2.055, "step": 18048 }, { "epoch": 0.4843548733361958, "grad_norm": 0.322265625, "learning_rate": 0.0011276279239470454, "loss": 2.0008, "step": 18049 }, { "epoch": 0.48438170888793475, "grad_norm": 0.31640625, "learning_rate": 0.001127614543321973, "loss": 1.9781, "step": 18050 }, { "epoch": 0.4844085444396737, "grad_norm": 0.3125, "learning_rate": 0.0011276011615394697, "loss": 1.8988, "step": 18051 }, { "epoch": 0.4844353799914126, "grad_norm": 0.31640625, "learning_rate": 0.0011275877785995648, "loss": 1.9906, "step": 18052 }, { "epoch": 0.48446221554315155, "grad_norm": 0.314453125, "learning_rate": 0.0011275743945022873, "loss": 2.0129, "step": 18053 }, { "epoch": 0.4844890510948905, "grad_norm": 0.310546875, "learning_rate": 0.0011275610092476669, "loss": 1.9875, "step": 18054 }, { "epoch": 0.4845158866466295, "grad_norm": 0.31640625, "learning_rate": 0.0011275476228357329, "loss": 1.9914, "step": 18055 }, { "epoch": 0.4845427221983684, "grad_norm": 0.314453125, "learning_rate": 0.0011275342352665143, "loss": 2.0033, "step": 18056 }, { "epoch": 0.48456955775010735, "grad_norm": 0.31640625, "learning_rate": 0.001127520846540041, "loss": 1.9034, "step": 18057 }, { "epoch": 0.4845963933018463, "grad_norm": 0.314453125, "learning_rate": 0.0011275074566563423, "loss": 2.0467, "step": 18058 }, { "epoch": 0.4846232288535852, "grad_norm": 0.3125, "learning_rate": 0.0011274940656154473, "loss": 1.927, "step": 18059 }, { "epoch": 0.48465006440532415, "grad_norm": 0.30078125, "learning_rate": 0.0011274806734173856, "loss": 1.9171, "step": 18060 }, { "epoch": 0.4846768999570631, "grad_norm": 0.302734375, "learning_rate": 0.0011274672800621864, "loss": 1.8571, "step": 18061 }, { "epoch": 0.4847037355088021, "grad_norm": 0.314453125, "learning_rate": 0.0011274538855498791, "loss": 1.9114, "step": 18062 }, { "epoch": 0.484730571060541, "grad_norm": 0.310546875, "learning_rate": 0.0011274404898804933, "loss": 1.8945, "step": 18063 }, { "epoch": 0.48475740661227995, "grad_norm": 0.32421875, "learning_rate": 0.0011274270930540582, "loss": 1.9585, "step": 18064 }, { "epoch": 0.4847842421640189, "grad_norm": 0.306640625, "learning_rate": 0.0011274136950706034, "loss": 1.9602, "step": 18065 }, { "epoch": 0.4848110777157578, "grad_norm": 0.30859375, "learning_rate": 0.001127400295930158, "loss": 1.9687, "step": 18066 }, { "epoch": 0.48483791326749676, "grad_norm": 0.30859375, "learning_rate": 0.0011273868956327513, "loss": 2.0002, "step": 18067 }, { "epoch": 0.48486474881923575, "grad_norm": 0.30859375, "learning_rate": 0.001127373494178413, "loss": 1.9914, "step": 18068 }, { "epoch": 0.4848915843709747, "grad_norm": 0.32421875, "learning_rate": 0.0011273600915671725, "loss": 1.9431, "step": 18069 }, { "epoch": 0.4849184199227136, "grad_norm": 0.318359375, "learning_rate": 0.001127346687799059, "loss": 1.993, "step": 18070 }, { "epoch": 0.48494525547445255, "grad_norm": 0.30078125, "learning_rate": 0.001127333282874102, "loss": 1.8366, "step": 18071 }, { "epoch": 0.4849720910261915, "grad_norm": 0.302734375, "learning_rate": 0.0011273198767923313, "loss": 1.9113, "step": 18072 }, { "epoch": 0.4849989265779304, "grad_norm": 0.318359375, "learning_rate": 0.0011273064695537756, "loss": 1.9844, "step": 18073 }, { "epoch": 0.4850257621296694, "grad_norm": 0.314453125, "learning_rate": 0.0011272930611584647, "loss": 2.0551, "step": 18074 }, { "epoch": 0.48505259768140835, "grad_norm": 0.306640625, "learning_rate": 0.001127279651606428, "loss": 1.882, "step": 18075 }, { "epoch": 0.4850794332331473, "grad_norm": 0.3125, "learning_rate": 0.0011272662408976948, "loss": 1.9423, "step": 18076 }, { "epoch": 0.4851062687848862, "grad_norm": 0.3125, "learning_rate": 0.0011272528290322944, "loss": 1.9755, "step": 18077 }, { "epoch": 0.48513310433662515, "grad_norm": 0.3125, "learning_rate": 0.0011272394160102568, "loss": 1.952, "step": 18078 }, { "epoch": 0.4851599398883641, "grad_norm": 0.314453125, "learning_rate": 0.0011272260018316109, "loss": 1.9559, "step": 18079 }, { "epoch": 0.485186775440103, "grad_norm": 0.31640625, "learning_rate": 0.0011272125864963862, "loss": 1.9666, "step": 18080 }, { "epoch": 0.485213610991842, "grad_norm": 0.318359375, "learning_rate": 0.0011271991700046123, "loss": 1.9996, "step": 18081 }, { "epoch": 0.48524044654358095, "grad_norm": 0.32421875, "learning_rate": 0.0011271857523563185, "loss": 2.0414, "step": 18082 }, { "epoch": 0.4852672820953199, "grad_norm": 0.3203125, "learning_rate": 0.0011271723335515341, "loss": 1.981, "step": 18083 }, { "epoch": 0.4852941176470588, "grad_norm": 0.314453125, "learning_rate": 0.0011271589135902888, "loss": 1.8998, "step": 18084 }, { "epoch": 0.48532095319879776, "grad_norm": 0.32421875, "learning_rate": 0.001127145492472612, "loss": 1.9682, "step": 18085 }, { "epoch": 0.4853477887505367, "grad_norm": 0.31640625, "learning_rate": 0.001127132070198533, "loss": 2.0115, "step": 18086 }, { "epoch": 0.4853746243022757, "grad_norm": 0.3203125, "learning_rate": 0.0011271186467680815, "loss": 1.9735, "step": 18087 }, { "epoch": 0.4854014598540146, "grad_norm": 0.326171875, "learning_rate": 0.0011271052221812864, "loss": 1.9391, "step": 18088 }, { "epoch": 0.48542829540575355, "grad_norm": 0.30859375, "learning_rate": 0.0011270917964381778, "loss": 1.8442, "step": 18089 }, { "epoch": 0.4854551309574925, "grad_norm": 0.3203125, "learning_rate": 0.0011270783695387849, "loss": 1.9432, "step": 18090 }, { "epoch": 0.4854819665092314, "grad_norm": 0.333984375, "learning_rate": 0.0011270649414831368, "loss": 1.835, "step": 18091 }, { "epoch": 0.48550880206097036, "grad_norm": 0.33203125, "learning_rate": 0.0011270515122712636, "loss": 1.9414, "step": 18092 }, { "epoch": 0.4855356376127093, "grad_norm": 0.318359375, "learning_rate": 0.0011270380819031944, "loss": 1.8914, "step": 18093 }, { "epoch": 0.4855624731644483, "grad_norm": 0.306640625, "learning_rate": 0.0011270246503789587, "loss": 1.8723, "step": 18094 }, { "epoch": 0.4855893087161872, "grad_norm": 0.330078125, "learning_rate": 0.001127011217698586, "loss": 1.9396, "step": 18095 }, { "epoch": 0.48561614426792615, "grad_norm": 0.3125, "learning_rate": 0.0011269977838621055, "loss": 1.924, "step": 18096 }, { "epoch": 0.4856429798196651, "grad_norm": 0.314453125, "learning_rate": 0.0011269843488695473, "loss": 1.894, "step": 18097 }, { "epoch": 0.485669815371404, "grad_norm": 0.3203125, "learning_rate": 0.0011269709127209402, "loss": 1.9565, "step": 18098 }, { "epoch": 0.48569665092314296, "grad_norm": 0.318359375, "learning_rate": 0.0011269574754163138, "loss": 1.921, "step": 18099 }, { "epoch": 0.48572348647488195, "grad_norm": 0.3125, "learning_rate": 0.001126944036955698, "loss": 1.9144, "step": 18100 }, { "epoch": 0.4857503220266209, "grad_norm": 0.318359375, "learning_rate": 0.001126930597339122, "loss": 1.9327, "step": 18101 }, { "epoch": 0.4857771575783598, "grad_norm": 0.31640625, "learning_rate": 0.0011269171565666152, "loss": 1.8817, "step": 18102 }, { "epoch": 0.48580399313009875, "grad_norm": 0.33203125, "learning_rate": 0.0011269037146382072, "loss": 2.005, "step": 18103 }, { "epoch": 0.4858308286818377, "grad_norm": 0.314453125, "learning_rate": 0.0011268902715539275, "loss": 1.8405, "step": 18104 }, { "epoch": 0.4858576642335766, "grad_norm": 0.326171875, "learning_rate": 0.0011268768273138057, "loss": 1.9815, "step": 18105 }, { "epoch": 0.48588449978531556, "grad_norm": 0.32421875, "learning_rate": 0.001126863381917871, "loss": 1.9885, "step": 18106 }, { "epoch": 0.48591133533705455, "grad_norm": 0.3203125, "learning_rate": 0.0011268499353661529, "loss": 1.8653, "step": 18107 }, { "epoch": 0.4859381708887935, "grad_norm": 0.3203125, "learning_rate": 0.0011268364876586814, "loss": 1.8701, "step": 18108 }, { "epoch": 0.4859650064405324, "grad_norm": 0.3203125, "learning_rate": 0.0011268230387954852, "loss": 1.8842, "step": 18109 }, { "epoch": 0.48599184199227136, "grad_norm": 0.314453125, "learning_rate": 0.0011268095887765946, "loss": 1.9467, "step": 18110 }, { "epoch": 0.4860186775440103, "grad_norm": 0.318359375, "learning_rate": 0.0011267961376020387, "loss": 1.8523, "step": 18111 }, { "epoch": 0.4860455130957492, "grad_norm": 0.322265625, "learning_rate": 0.0011267826852718469, "loss": 1.898, "step": 18112 }, { "epoch": 0.4860723486474882, "grad_norm": 0.328125, "learning_rate": 0.001126769231786049, "loss": 1.9097, "step": 18113 }, { "epoch": 0.48609918419922715, "grad_norm": 0.32421875, "learning_rate": 0.0011267557771446743, "loss": 1.9292, "step": 18114 }, { "epoch": 0.4861260197509661, "grad_norm": 0.322265625, "learning_rate": 0.0011267423213477525, "loss": 1.8586, "step": 18115 }, { "epoch": 0.486152855302705, "grad_norm": 0.3203125, "learning_rate": 0.001126728864395313, "loss": 1.8428, "step": 18116 }, { "epoch": 0.48617969085444396, "grad_norm": 0.318359375, "learning_rate": 0.0011267154062873852, "loss": 1.8796, "step": 18117 }, { "epoch": 0.4862065264061829, "grad_norm": 0.32421875, "learning_rate": 0.0011267019470239989, "loss": 1.9026, "step": 18118 }, { "epoch": 0.48623336195792183, "grad_norm": 0.330078125, "learning_rate": 0.0011266884866051835, "loss": 1.9299, "step": 18119 }, { "epoch": 0.4862601975096608, "grad_norm": 0.328125, "learning_rate": 0.0011266750250309683, "loss": 1.88, "step": 18120 }, { "epoch": 0.48628703306139975, "grad_norm": 0.330078125, "learning_rate": 0.0011266615623013833, "loss": 1.9214, "step": 18121 }, { "epoch": 0.4863138686131387, "grad_norm": 0.3203125, "learning_rate": 0.0011266480984164578, "loss": 1.9509, "step": 18122 }, { "epoch": 0.4863407041648776, "grad_norm": 0.384765625, "learning_rate": 0.001126634633376221, "loss": 2.1638, "step": 18123 }, { "epoch": 0.48636753971661656, "grad_norm": 0.357421875, "learning_rate": 0.001126621167180703, "loss": 2.1503, "step": 18124 }, { "epoch": 0.4863943752683555, "grad_norm": 0.359375, "learning_rate": 0.0011266076998299331, "loss": 2.2235, "step": 18125 }, { "epoch": 0.4864212108200945, "grad_norm": 0.3515625, "learning_rate": 0.0011265942313239408, "loss": 2.2204, "step": 18126 }, { "epoch": 0.4864480463718334, "grad_norm": 0.32421875, "learning_rate": 0.0011265807616627557, "loss": 2.1414, "step": 18127 }, { "epoch": 0.48647488192357236, "grad_norm": 0.34765625, "learning_rate": 0.0011265672908464073, "loss": 2.1853, "step": 18128 }, { "epoch": 0.4865017174753113, "grad_norm": 0.326171875, "learning_rate": 0.001126553818874925, "loss": 2.1664, "step": 18129 }, { "epoch": 0.4865285530270502, "grad_norm": 0.3125, "learning_rate": 0.001126540345748339, "loss": 2.0488, "step": 18130 }, { "epoch": 0.48655538857878916, "grad_norm": 0.322265625, "learning_rate": 0.001126526871466678, "loss": 2.1229, "step": 18131 }, { "epoch": 0.4865822241305281, "grad_norm": 0.322265625, "learning_rate": 0.001126513396029972, "loss": 2.1, "step": 18132 }, { "epoch": 0.4866090596822671, "grad_norm": 0.3203125, "learning_rate": 0.0011264999194382506, "loss": 2.0942, "step": 18133 }, { "epoch": 0.486635895234006, "grad_norm": 0.314453125, "learning_rate": 0.0011264864416915433, "loss": 2.0559, "step": 18134 }, { "epoch": 0.48666273078574496, "grad_norm": 0.3203125, "learning_rate": 0.0011264729627898796, "loss": 2.1754, "step": 18135 }, { "epoch": 0.4866895663374839, "grad_norm": 0.32421875, "learning_rate": 0.0011264594827332891, "loss": 2.1478, "step": 18136 }, { "epoch": 0.4867164018892228, "grad_norm": 0.318359375, "learning_rate": 0.0011264460015218015, "loss": 2.0817, "step": 18137 }, { "epoch": 0.48674323744096176, "grad_norm": 0.322265625, "learning_rate": 0.001126432519155446, "loss": 2.1302, "step": 18138 }, { "epoch": 0.48677007299270075, "grad_norm": 0.30859375, "learning_rate": 0.0011264190356342525, "loss": 2.0782, "step": 18139 }, { "epoch": 0.4867969085444397, "grad_norm": 0.314453125, "learning_rate": 0.0011264055509582506, "loss": 2.079, "step": 18140 }, { "epoch": 0.4868237440961786, "grad_norm": 0.318359375, "learning_rate": 0.0011263920651274698, "loss": 2.1181, "step": 18141 }, { "epoch": 0.48685057964791756, "grad_norm": 0.3125, "learning_rate": 0.0011263785781419396, "loss": 2.0751, "step": 18142 }, { "epoch": 0.4868774151996565, "grad_norm": 0.318359375, "learning_rate": 0.0011263650900016897, "loss": 2.111, "step": 18143 }, { "epoch": 0.48690425075139543, "grad_norm": 0.3125, "learning_rate": 0.0011263516007067498, "loss": 2.0549, "step": 18144 }, { "epoch": 0.4869310863031344, "grad_norm": 0.302734375, "learning_rate": 0.001126338110257149, "loss": 2.0016, "step": 18145 }, { "epoch": 0.48695792185487335, "grad_norm": 0.306640625, "learning_rate": 0.0011263246186529175, "loss": 2.0351, "step": 18146 }, { "epoch": 0.4869847574066123, "grad_norm": 0.3125, "learning_rate": 0.0011263111258940845, "loss": 2.1277, "step": 18147 }, { "epoch": 0.4870115929583512, "grad_norm": 0.3046875, "learning_rate": 0.0011262976319806799, "loss": 2.0443, "step": 18148 }, { "epoch": 0.48703842851009016, "grad_norm": 0.3125, "learning_rate": 0.001126284136912733, "loss": 2.112, "step": 18149 }, { "epoch": 0.4870652640618291, "grad_norm": 0.30859375, "learning_rate": 0.0011262706406902736, "loss": 2.0656, "step": 18150 }, { "epoch": 0.48709209961356803, "grad_norm": 0.314453125, "learning_rate": 0.0011262571433133313, "loss": 2.0797, "step": 18151 }, { "epoch": 0.487118935165307, "grad_norm": 0.30859375, "learning_rate": 0.0011262436447819354, "loss": 2.0928, "step": 18152 }, { "epoch": 0.48714577071704596, "grad_norm": 0.302734375, "learning_rate": 0.001126230145096116, "loss": 2.0726, "step": 18153 }, { "epoch": 0.4871726062687849, "grad_norm": 0.310546875, "learning_rate": 0.0011262166442559025, "loss": 2.123, "step": 18154 }, { "epoch": 0.4871994418205238, "grad_norm": 0.3125, "learning_rate": 0.0011262031422613246, "loss": 2.0401, "step": 18155 }, { "epoch": 0.48722627737226276, "grad_norm": 0.30078125, "learning_rate": 0.0011261896391124115, "loss": 1.9747, "step": 18156 }, { "epoch": 0.4872531129240017, "grad_norm": 0.314453125, "learning_rate": 0.0011261761348091933, "loss": 2.1355, "step": 18157 }, { "epoch": 0.4872799484757407, "grad_norm": 0.3125, "learning_rate": 0.0011261626293516994, "loss": 2.0615, "step": 18158 }, { "epoch": 0.4873067840274796, "grad_norm": 0.31640625, "learning_rate": 0.0011261491227399596, "loss": 2.0828, "step": 18159 }, { "epoch": 0.48733361957921856, "grad_norm": 0.32421875, "learning_rate": 0.0011261356149740033, "loss": 2.1723, "step": 18160 }, { "epoch": 0.4873604551309575, "grad_norm": 0.31640625, "learning_rate": 0.0011261221060538604, "loss": 2.0816, "step": 18161 }, { "epoch": 0.48738729068269643, "grad_norm": 0.318359375, "learning_rate": 0.0011261085959795602, "loss": 2.0571, "step": 18162 }, { "epoch": 0.48741412623443536, "grad_norm": 0.30859375, "learning_rate": 0.0011260950847511327, "loss": 2.0471, "step": 18163 }, { "epoch": 0.4874409617861743, "grad_norm": 0.310546875, "learning_rate": 0.001126081572368607, "loss": 1.9879, "step": 18164 }, { "epoch": 0.4874677973379133, "grad_norm": 0.314453125, "learning_rate": 0.0011260680588320137, "loss": 2.0307, "step": 18165 }, { "epoch": 0.4874946328896522, "grad_norm": 0.318359375, "learning_rate": 0.0011260545441413815, "loss": 2.108, "step": 18166 }, { "epoch": 0.48752146844139116, "grad_norm": 0.30078125, "learning_rate": 0.0011260410282967403, "loss": 2.0046, "step": 18167 }, { "epoch": 0.4875483039931301, "grad_norm": 0.302734375, "learning_rate": 0.0011260275112981199, "loss": 1.9746, "step": 18168 }, { "epoch": 0.48757513954486903, "grad_norm": 0.3125, "learning_rate": 0.00112601399314555, "loss": 2.0504, "step": 18169 }, { "epoch": 0.48760197509660796, "grad_norm": 0.3125, "learning_rate": 0.0011260004738390603, "loss": 1.9738, "step": 18170 }, { "epoch": 0.48762881064834696, "grad_norm": 0.3046875, "learning_rate": 0.00112598695337868, "loss": 1.9889, "step": 18171 }, { "epoch": 0.4876556462000859, "grad_norm": 0.3125, "learning_rate": 0.0011259734317644395, "loss": 1.9881, "step": 18172 }, { "epoch": 0.4876824817518248, "grad_norm": 0.306640625, "learning_rate": 0.0011259599089963675, "loss": 1.9901, "step": 18173 }, { "epoch": 0.48770931730356376, "grad_norm": 0.318359375, "learning_rate": 0.0011259463850744945, "loss": 2.0485, "step": 18174 }, { "epoch": 0.4877361528553027, "grad_norm": 0.3046875, "learning_rate": 0.00112593285999885, "loss": 1.9618, "step": 18175 }, { "epoch": 0.48776298840704163, "grad_norm": 0.302734375, "learning_rate": 0.0011259193337694632, "loss": 1.9391, "step": 18176 }, { "epoch": 0.48778982395878057, "grad_norm": 0.310546875, "learning_rate": 0.0011259058063863643, "loss": 1.998, "step": 18177 }, { "epoch": 0.48781665951051956, "grad_norm": 0.3125, "learning_rate": 0.0011258922778495826, "loss": 2.039, "step": 18178 }, { "epoch": 0.4878434950622585, "grad_norm": 0.314453125, "learning_rate": 0.0011258787481591482, "loss": 2.0296, "step": 18179 }, { "epoch": 0.4878703306139974, "grad_norm": 0.314453125, "learning_rate": 0.0011258652173150906, "loss": 2.0583, "step": 18180 }, { "epoch": 0.48789716616573636, "grad_norm": 0.302734375, "learning_rate": 0.0011258516853174393, "loss": 1.9602, "step": 18181 }, { "epoch": 0.4879240017174753, "grad_norm": 0.302734375, "learning_rate": 0.001125838152166224, "loss": 1.9544, "step": 18182 }, { "epoch": 0.48795083726921423, "grad_norm": 0.3203125, "learning_rate": 0.0011258246178614745, "loss": 2.0681, "step": 18183 }, { "epoch": 0.4879776728209532, "grad_norm": 0.3203125, "learning_rate": 0.0011258110824032207, "loss": 2.0592, "step": 18184 }, { "epoch": 0.48800450837269216, "grad_norm": 0.3125, "learning_rate": 0.0011257975457914919, "loss": 2.0067, "step": 18185 }, { "epoch": 0.4880313439244311, "grad_norm": 0.314453125, "learning_rate": 0.001125784008026318, "loss": 1.9358, "step": 18186 }, { "epoch": 0.48805817947617003, "grad_norm": 0.30859375, "learning_rate": 0.001125770469107729, "loss": 1.9938, "step": 18187 }, { "epoch": 0.48808501502790896, "grad_norm": 0.306640625, "learning_rate": 0.0011257569290357538, "loss": 1.9313, "step": 18188 }, { "epoch": 0.4881118505796479, "grad_norm": 0.322265625, "learning_rate": 0.001125743387810423, "loss": 2.0005, "step": 18189 }, { "epoch": 0.48813868613138683, "grad_norm": 0.30859375, "learning_rate": 0.0011257298454317654, "loss": 2.0127, "step": 18190 }, { "epoch": 0.4881655216831258, "grad_norm": 0.298828125, "learning_rate": 0.0011257163018998113, "loss": 1.9048, "step": 18191 }, { "epoch": 0.48819235723486476, "grad_norm": 0.30859375, "learning_rate": 0.0011257027572145904, "loss": 1.9754, "step": 18192 }, { "epoch": 0.4882191927866037, "grad_norm": 0.31640625, "learning_rate": 0.0011256892113761326, "loss": 1.98, "step": 18193 }, { "epoch": 0.48824602833834263, "grad_norm": 0.306640625, "learning_rate": 0.001125675664384467, "loss": 1.8902, "step": 18194 }, { "epoch": 0.48827286389008157, "grad_norm": 0.306640625, "learning_rate": 0.0011256621162396238, "loss": 1.9326, "step": 18195 }, { "epoch": 0.4882996994418205, "grad_norm": 0.3125, "learning_rate": 0.0011256485669416325, "loss": 2.0142, "step": 18196 }, { "epoch": 0.4883265349935595, "grad_norm": 0.328125, "learning_rate": 0.0011256350164905228, "loss": 2.0089, "step": 18197 }, { "epoch": 0.4883533705452984, "grad_norm": 0.30859375, "learning_rate": 0.0011256214648863248, "loss": 1.9597, "step": 18198 }, { "epoch": 0.48838020609703736, "grad_norm": 0.322265625, "learning_rate": 0.0011256079121290674, "loss": 2.0261, "step": 18199 }, { "epoch": 0.4884070416487763, "grad_norm": 0.3046875, "learning_rate": 0.0011255943582187814, "loss": 1.9003, "step": 18200 }, { "epoch": 0.48843387720051523, "grad_norm": 0.31640625, "learning_rate": 0.001125580803155496, "loss": 2.0017, "step": 18201 }, { "epoch": 0.48846071275225417, "grad_norm": 0.31640625, "learning_rate": 0.0011255672469392406, "loss": 2.0439, "step": 18202 }, { "epoch": 0.48848754830399316, "grad_norm": 0.306640625, "learning_rate": 0.0011255536895700454, "loss": 1.9045, "step": 18203 }, { "epoch": 0.4885143838557321, "grad_norm": 0.31640625, "learning_rate": 0.0011255401310479403, "loss": 1.9568, "step": 18204 }, { "epoch": 0.48854121940747103, "grad_norm": 0.314453125, "learning_rate": 0.0011255265713729545, "loss": 1.9786, "step": 18205 }, { "epoch": 0.48856805495920996, "grad_norm": 0.3046875, "learning_rate": 0.001125513010545118, "loss": 1.8488, "step": 18206 }, { "epoch": 0.4885948905109489, "grad_norm": 0.3203125, "learning_rate": 0.0011254994485644608, "loss": 1.9331, "step": 18207 }, { "epoch": 0.48862172606268783, "grad_norm": 0.306640625, "learning_rate": 0.0011254858854310122, "loss": 1.9306, "step": 18208 }, { "epoch": 0.48864856161442677, "grad_norm": 0.3125, "learning_rate": 0.0011254723211448023, "loss": 1.9757, "step": 18209 }, { "epoch": 0.48867539716616576, "grad_norm": 0.30859375, "learning_rate": 0.0011254587557058605, "loss": 1.9705, "step": 18210 }, { "epoch": 0.4887022327179047, "grad_norm": 0.30859375, "learning_rate": 0.001125445189114217, "loss": 1.9352, "step": 18211 }, { "epoch": 0.48872906826964363, "grad_norm": 0.31640625, "learning_rate": 0.0011254316213699014, "loss": 1.9701, "step": 18212 }, { "epoch": 0.48875590382138256, "grad_norm": 0.318359375, "learning_rate": 0.001125418052472943, "loss": 2.0088, "step": 18213 }, { "epoch": 0.4887827393731215, "grad_norm": 0.314453125, "learning_rate": 0.0011254044824233725, "loss": 2.021, "step": 18214 }, { "epoch": 0.48880957492486043, "grad_norm": 0.3125, "learning_rate": 0.001125390911221219, "loss": 1.9442, "step": 18215 }, { "epoch": 0.4888364104765994, "grad_norm": 0.3125, "learning_rate": 0.0011253773388665122, "loss": 1.9812, "step": 18216 }, { "epoch": 0.48886324602833836, "grad_norm": 0.310546875, "learning_rate": 0.0011253637653592821, "loss": 1.9707, "step": 18217 }, { "epoch": 0.4888900815800773, "grad_norm": 0.3203125, "learning_rate": 0.0011253501906995586, "loss": 1.9886, "step": 18218 }, { "epoch": 0.48891691713181623, "grad_norm": 0.318359375, "learning_rate": 0.0011253366148873715, "loss": 2.0183, "step": 18219 }, { "epoch": 0.48894375268355517, "grad_norm": 0.328125, "learning_rate": 0.0011253230379227501, "loss": 2.0839, "step": 18220 }, { "epoch": 0.4889705882352941, "grad_norm": 0.32421875, "learning_rate": 0.0011253094598057246, "loss": 2.0416, "step": 18221 }, { "epoch": 0.48899742378703304, "grad_norm": 0.31640625, "learning_rate": 0.0011252958805363247, "loss": 1.953, "step": 18222 }, { "epoch": 0.489024259338772, "grad_norm": 0.30859375, "learning_rate": 0.0011252823001145803, "loss": 1.9346, "step": 18223 }, { "epoch": 0.48905109489051096, "grad_norm": 0.31640625, "learning_rate": 0.001125268718540521, "loss": 1.9227, "step": 18224 }, { "epoch": 0.4890779304422499, "grad_norm": 0.3203125, "learning_rate": 0.0011252551358141764, "loss": 1.9225, "step": 18225 }, { "epoch": 0.48910476599398883, "grad_norm": 0.326171875, "learning_rate": 0.001125241551935577, "loss": 1.9115, "step": 18226 }, { "epoch": 0.48913160154572777, "grad_norm": 0.31640625, "learning_rate": 0.001125227966904752, "loss": 1.9322, "step": 18227 }, { "epoch": 0.4891584370974667, "grad_norm": 0.328125, "learning_rate": 0.0011252143807217313, "loss": 1.9206, "step": 18228 }, { "epoch": 0.4891852726492057, "grad_norm": 0.32421875, "learning_rate": 0.0011252007933865449, "loss": 1.9499, "step": 18229 }, { "epoch": 0.48921210820094463, "grad_norm": 0.31640625, "learning_rate": 0.0011251872048992223, "loss": 1.8305, "step": 18230 }, { "epoch": 0.48923894375268356, "grad_norm": 0.322265625, "learning_rate": 0.0011251736152597936, "loss": 1.9017, "step": 18231 }, { "epoch": 0.4892657793044225, "grad_norm": 0.322265625, "learning_rate": 0.0011251600244682886, "loss": 1.9083, "step": 18232 }, { "epoch": 0.48929261485616143, "grad_norm": 0.314453125, "learning_rate": 0.001125146432524737, "loss": 1.9081, "step": 18233 }, { "epoch": 0.48931945040790037, "grad_norm": 0.310546875, "learning_rate": 0.0011251328394291684, "loss": 1.8846, "step": 18234 }, { "epoch": 0.4893462859596393, "grad_norm": 0.3203125, "learning_rate": 0.0011251192451816128, "loss": 1.8947, "step": 18235 }, { "epoch": 0.4893731215113783, "grad_norm": 0.314453125, "learning_rate": 0.0011251056497821002, "loss": 1.9119, "step": 18236 }, { "epoch": 0.48939995706311723, "grad_norm": 0.31640625, "learning_rate": 0.0011250920532306604, "loss": 1.9096, "step": 18237 }, { "epoch": 0.48942679261485617, "grad_norm": 0.3046875, "learning_rate": 0.0011250784555273232, "loss": 1.8216, "step": 18238 }, { "epoch": 0.4894536281665951, "grad_norm": 0.310546875, "learning_rate": 0.001125064856672118, "loss": 1.9054, "step": 18239 }, { "epoch": 0.48948046371833404, "grad_norm": 0.318359375, "learning_rate": 0.0011250512566650752, "loss": 1.9292, "step": 18240 }, { "epoch": 0.48950729927007297, "grad_norm": 0.314453125, "learning_rate": 0.0011250376555062243, "loss": 1.8812, "step": 18241 }, { "epoch": 0.48953413482181196, "grad_norm": 0.31640625, "learning_rate": 0.0011250240531955954, "loss": 1.9494, "step": 18242 }, { "epoch": 0.4895609703735509, "grad_norm": 0.30859375, "learning_rate": 0.0011250104497332182, "loss": 1.878, "step": 18243 }, { "epoch": 0.48958780592528983, "grad_norm": 0.326171875, "learning_rate": 0.0011249968451191223, "loss": 1.9573, "step": 18244 }, { "epoch": 0.48961464147702877, "grad_norm": 0.322265625, "learning_rate": 0.0011249832393533381, "loss": 1.9178, "step": 18245 }, { "epoch": 0.4896414770287677, "grad_norm": 0.322265625, "learning_rate": 0.0011249696324358949, "loss": 1.8794, "step": 18246 }, { "epoch": 0.48966831258050664, "grad_norm": 0.330078125, "learning_rate": 0.0011249560243668229, "loss": 2.0233, "step": 18247 }, { "epoch": 0.4896951481322456, "grad_norm": 0.310546875, "learning_rate": 0.0011249424151461517, "loss": 1.8223, "step": 18248 }, { "epoch": 0.48972198368398456, "grad_norm": 0.3125, "learning_rate": 0.0011249288047739114, "loss": 1.8682, "step": 18249 }, { "epoch": 0.4897488192357235, "grad_norm": 0.3203125, "learning_rate": 0.0011249151932501316, "loss": 1.9118, "step": 18250 }, { "epoch": 0.48977565478746243, "grad_norm": 0.326171875, "learning_rate": 0.0011249015805748425, "loss": 1.9348, "step": 18251 }, { "epoch": 0.48980249033920137, "grad_norm": 0.32421875, "learning_rate": 0.0011248879667480737, "loss": 1.9816, "step": 18252 }, { "epoch": 0.4898293258909403, "grad_norm": 0.32421875, "learning_rate": 0.0011248743517698549, "loss": 1.9521, "step": 18253 }, { "epoch": 0.48985616144267924, "grad_norm": 0.3125, "learning_rate": 0.0011248607356402166, "loss": 2.0107, "step": 18254 }, { "epoch": 0.48988299699441823, "grad_norm": 0.306640625, "learning_rate": 0.001124847118359188, "loss": 1.8662, "step": 18255 }, { "epoch": 0.48990983254615716, "grad_norm": 0.310546875, "learning_rate": 0.0011248334999267992, "loss": 1.8666, "step": 18256 }, { "epoch": 0.4899366680978961, "grad_norm": 0.310546875, "learning_rate": 0.0011248198803430801, "loss": 1.8146, "step": 18257 }, { "epoch": 0.48996350364963503, "grad_norm": 0.314453125, "learning_rate": 0.0011248062596080606, "loss": 1.8934, "step": 18258 }, { "epoch": 0.48999033920137397, "grad_norm": 0.33203125, "learning_rate": 0.0011247926377217707, "loss": 1.8984, "step": 18259 }, { "epoch": 0.4900171747531129, "grad_norm": 0.314453125, "learning_rate": 0.00112477901468424, "loss": 1.9118, "step": 18260 }, { "epoch": 0.49004401030485184, "grad_norm": 0.330078125, "learning_rate": 0.0011247653904954988, "loss": 1.9305, "step": 18261 }, { "epoch": 0.49007084585659083, "grad_norm": 0.318359375, "learning_rate": 0.0011247517651555763, "loss": 1.8931, "step": 18262 }, { "epoch": 0.49009768140832977, "grad_norm": 0.31640625, "learning_rate": 0.0011247381386645032, "loss": 1.8418, "step": 18263 }, { "epoch": 0.4901245169600687, "grad_norm": 0.32421875, "learning_rate": 0.0011247245110223088, "loss": 1.8949, "step": 18264 }, { "epoch": 0.49015135251180764, "grad_norm": 0.322265625, "learning_rate": 0.0011247108822290232, "loss": 1.9498, "step": 18265 }, { "epoch": 0.49017818806354657, "grad_norm": 0.3203125, "learning_rate": 0.0011246972522846762, "loss": 1.8694, "step": 18266 }, { "epoch": 0.4902050236152855, "grad_norm": 0.31640625, "learning_rate": 0.0011246836211892982, "loss": 1.8525, "step": 18267 }, { "epoch": 0.4902318591670245, "grad_norm": 0.3203125, "learning_rate": 0.0011246699889429182, "loss": 1.8429, "step": 18268 }, { "epoch": 0.49025869471876343, "grad_norm": 0.328125, "learning_rate": 0.0011246563555455669, "loss": 1.9635, "step": 18269 }, { "epoch": 0.49028553027050237, "grad_norm": 0.326171875, "learning_rate": 0.0011246427209972737, "loss": 1.8594, "step": 18270 }, { "epoch": 0.4903123658222413, "grad_norm": 0.39453125, "learning_rate": 0.0011246290852980692, "loss": 2.1683, "step": 18271 }, { "epoch": 0.49033920137398024, "grad_norm": 0.3671875, "learning_rate": 0.0011246154484479823, "loss": 2.0866, "step": 18272 }, { "epoch": 0.4903660369257192, "grad_norm": 0.357421875, "learning_rate": 0.0011246018104470437, "loss": 2.1111, "step": 18273 }, { "epoch": 0.49039287247745816, "grad_norm": 0.349609375, "learning_rate": 0.0011245881712952832, "loss": 2.1883, "step": 18274 }, { "epoch": 0.4904197080291971, "grad_norm": 0.337890625, "learning_rate": 0.0011245745309927304, "loss": 2.1289, "step": 18275 }, { "epoch": 0.49044654358093603, "grad_norm": 0.3359375, "learning_rate": 0.0011245608895394154, "loss": 2.0675, "step": 18276 }, { "epoch": 0.49047337913267497, "grad_norm": 0.333984375, "learning_rate": 0.0011245472469353683, "loss": 2.1643, "step": 18277 }, { "epoch": 0.4905002146844139, "grad_norm": 0.3203125, "learning_rate": 0.0011245336031806183, "loss": 2.1178, "step": 18278 }, { "epoch": 0.49052705023615284, "grad_norm": 0.314453125, "learning_rate": 0.0011245199582751966, "loss": 2.0827, "step": 18279 }, { "epoch": 0.4905538857878918, "grad_norm": 0.32421875, "learning_rate": 0.001124506312219132, "loss": 2.0954, "step": 18280 }, { "epoch": 0.49058072133963077, "grad_norm": 0.314453125, "learning_rate": 0.0011244926650124552, "loss": 2.0128, "step": 18281 }, { "epoch": 0.4906075568913697, "grad_norm": 0.32421875, "learning_rate": 0.0011244790166551955, "loss": 2.0769, "step": 18282 }, { "epoch": 0.49063439244310864, "grad_norm": 0.3125, "learning_rate": 0.0011244653671473833, "loss": 2.1131, "step": 18283 }, { "epoch": 0.49066122799484757, "grad_norm": 0.31640625, "learning_rate": 0.0011244517164890483, "loss": 2.0754, "step": 18284 }, { "epoch": 0.4906880635465865, "grad_norm": 0.310546875, "learning_rate": 0.0011244380646802206, "loss": 2.0641, "step": 18285 }, { "epoch": 0.49071489909832544, "grad_norm": 0.31640625, "learning_rate": 0.00112442441172093, "loss": 2.0306, "step": 18286 }, { "epoch": 0.49074173465006443, "grad_norm": 0.322265625, "learning_rate": 0.0011244107576112066, "loss": 2.0498, "step": 18287 }, { "epoch": 0.49076857020180337, "grad_norm": 0.3125, "learning_rate": 0.0011243971023510802, "loss": 2.0786, "step": 18288 }, { "epoch": 0.4907954057535423, "grad_norm": 0.314453125, "learning_rate": 0.001124383445940581, "loss": 2.0078, "step": 18289 }, { "epoch": 0.49082224130528124, "grad_norm": 0.322265625, "learning_rate": 0.0011243697883797387, "loss": 2.1199, "step": 18290 }, { "epoch": 0.4908490768570202, "grad_norm": 0.326171875, "learning_rate": 0.0011243561296685833, "loss": 2.0772, "step": 18291 }, { "epoch": 0.4908759124087591, "grad_norm": 0.322265625, "learning_rate": 0.001124342469807145, "loss": 2.1474, "step": 18292 }, { "epoch": 0.49090274796049804, "grad_norm": 0.31640625, "learning_rate": 0.0011243288087954533, "loss": 2.0312, "step": 18293 }, { "epoch": 0.49092958351223703, "grad_norm": 0.3203125, "learning_rate": 0.0011243151466335387, "loss": 2.0721, "step": 18294 }, { "epoch": 0.49095641906397597, "grad_norm": 0.3125, "learning_rate": 0.001124301483321431, "loss": 2.0429, "step": 18295 }, { "epoch": 0.4909832546157149, "grad_norm": 0.31640625, "learning_rate": 0.0011242878188591598, "loss": 2.0671, "step": 18296 }, { "epoch": 0.49101009016745384, "grad_norm": 0.302734375, "learning_rate": 0.0011242741532467555, "loss": 2.0035, "step": 18297 }, { "epoch": 0.4910369257191928, "grad_norm": 0.306640625, "learning_rate": 0.001124260486484248, "loss": 2.0571, "step": 18298 }, { "epoch": 0.4910637612709317, "grad_norm": 0.30859375, "learning_rate": 0.001124246818571667, "loss": 1.9297, "step": 18299 }, { "epoch": 0.4910905968226707, "grad_norm": 0.310546875, "learning_rate": 0.001124233149509043, "loss": 1.9321, "step": 18300 }, { "epoch": 0.49111743237440963, "grad_norm": 0.30859375, "learning_rate": 0.0011242194792964054, "loss": 2.0498, "step": 18301 }, { "epoch": 0.49114426792614857, "grad_norm": 0.30078125, "learning_rate": 0.0011242058079337848, "loss": 1.9972, "step": 18302 }, { "epoch": 0.4911711034778875, "grad_norm": 0.302734375, "learning_rate": 0.0011241921354212107, "loss": 2.0278, "step": 18303 }, { "epoch": 0.49119793902962644, "grad_norm": 0.30078125, "learning_rate": 0.0011241784617587135, "loss": 1.997, "step": 18304 }, { "epoch": 0.4912247745813654, "grad_norm": 0.306640625, "learning_rate": 0.0011241647869463228, "loss": 1.9633, "step": 18305 }, { "epoch": 0.4912516101331043, "grad_norm": 0.306640625, "learning_rate": 0.0011241511109840686, "loss": 2.0022, "step": 18306 }, { "epoch": 0.4912784456848433, "grad_norm": 0.30078125, "learning_rate": 0.0011241374338719816, "loss": 1.989, "step": 18307 }, { "epoch": 0.49130528123658224, "grad_norm": 0.3046875, "learning_rate": 0.0011241237556100909, "loss": 1.9967, "step": 18308 }, { "epoch": 0.49133211678832117, "grad_norm": 0.3046875, "learning_rate": 0.0011241100761984267, "loss": 2.0495, "step": 18309 }, { "epoch": 0.4913589523400601, "grad_norm": 0.306640625, "learning_rate": 0.0011240963956370194, "loss": 2.075, "step": 18310 }, { "epoch": 0.49138578789179904, "grad_norm": 0.30859375, "learning_rate": 0.0011240827139258989, "loss": 1.9468, "step": 18311 }, { "epoch": 0.491412623443538, "grad_norm": 0.30859375, "learning_rate": 0.001124069031065095, "loss": 2.0224, "step": 18312 }, { "epoch": 0.49143945899527697, "grad_norm": 0.294921875, "learning_rate": 0.0011240553470546377, "loss": 1.8918, "step": 18313 }, { "epoch": 0.4914662945470159, "grad_norm": 0.3046875, "learning_rate": 0.0011240416618945575, "loss": 2.0211, "step": 18314 }, { "epoch": 0.49149313009875484, "grad_norm": 0.3125, "learning_rate": 0.0011240279755848838, "loss": 2.0539, "step": 18315 }, { "epoch": 0.4915199656504938, "grad_norm": 0.30859375, "learning_rate": 0.0011240142881256468, "loss": 2.0097, "step": 18316 }, { "epoch": 0.4915468012022327, "grad_norm": 0.318359375, "learning_rate": 0.001124000599516877, "loss": 1.9879, "step": 18317 }, { "epoch": 0.49157363675397164, "grad_norm": 0.30859375, "learning_rate": 0.0011239869097586037, "loss": 2.0221, "step": 18318 }, { "epoch": 0.4916004723057106, "grad_norm": 0.318359375, "learning_rate": 0.0011239732188508575, "loss": 2.0702, "step": 18319 }, { "epoch": 0.49162730785744957, "grad_norm": 0.314453125, "learning_rate": 0.001123959526793668, "loss": 1.9249, "step": 18320 }, { "epoch": 0.4916541434091885, "grad_norm": 0.3046875, "learning_rate": 0.0011239458335870656, "loss": 2.0403, "step": 18321 }, { "epoch": 0.49168097896092744, "grad_norm": 0.306640625, "learning_rate": 0.00112393213923108, "loss": 1.9812, "step": 18322 }, { "epoch": 0.4917078145126664, "grad_norm": 0.30859375, "learning_rate": 0.0011239184437257417, "loss": 2.0561, "step": 18323 }, { "epoch": 0.4917346500644053, "grad_norm": 0.30859375, "learning_rate": 0.0011239047470710804, "loss": 2.0605, "step": 18324 }, { "epoch": 0.49176148561614424, "grad_norm": 0.3125, "learning_rate": 0.001123891049267126, "loss": 2.0687, "step": 18325 }, { "epoch": 0.49178832116788324, "grad_norm": 0.298828125, "learning_rate": 0.001123877350313909, "loss": 1.9664, "step": 18326 }, { "epoch": 0.49181515671962217, "grad_norm": 0.314453125, "learning_rate": 0.0011238636502114592, "loss": 2.0539, "step": 18327 }, { "epoch": 0.4918419922713611, "grad_norm": 0.326171875, "learning_rate": 0.0011238499489598067, "loss": 2.0203, "step": 18328 }, { "epoch": 0.49186882782310004, "grad_norm": 0.298828125, "learning_rate": 0.0011238362465589816, "loss": 1.9287, "step": 18329 }, { "epoch": 0.491895663374839, "grad_norm": 0.30078125, "learning_rate": 0.0011238225430090136, "loss": 1.8975, "step": 18330 }, { "epoch": 0.4919224989265779, "grad_norm": 0.306640625, "learning_rate": 0.0011238088383099332, "loss": 1.9651, "step": 18331 }, { "epoch": 0.49194933447831685, "grad_norm": 0.310546875, "learning_rate": 0.0011237951324617702, "loss": 1.9906, "step": 18332 }, { "epoch": 0.49197617003005584, "grad_norm": 0.3125, "learning_rate": 0.001123781425464555, "loss": 1.9208, "step": 18333 }, { "epoch": 0.4920030055817948, "grad_norm": 0.30859375, "learning_rate": 0.0011237677173183173, "loss": 1.9873, "step": 18334 }, { "epoch": 0.4920298411335337, "grad_norm": 0.310546875, "learning_rate": 0.0011237540080230874, "loss": 1.9958, "step": 18335 }, { "epoch": 0.49205667668527264, "grad_norm": 0.322265625, "learning_rate": 0.0011237402975788954, "loss": 2.0308, "step": 18336 }, { "epoch": 0.4920835122370116, "grad_norm": 0.31640625, "learning_rate": 0.001123726585985771, "loss": 2.0079, "step": 18337 }, { "epoch": 0.4921103477887505, "grad_norm": 0.30859375, "learning_rate": 0.0011237128732437447, "loss": 1.9822, "step": 18338 }, { "epoch": 0.4921371833404895, "grad_norm": 0.310546875, "learning_rate": 0.0011236991593528466, "loss": 1.9763, "step": 18339 }, { "epoch": 0.49216401889222844, "grad_norm": 0.310546875, "learning_rate": 0.0011236854443131064, "loss": 2.0075, "step": 18340 }, { "epoch": 0.4921908544439674, "grad_norm": 0.30859375, "learning_rate": 0.0011236717281245543, "loss": 1.9728, "step": 18341 }, { "epoch": 0.4922176899957063, "grad_norm": 0.3046875, "learning_rate": 0.001123658010787221, "loss": 1.9694, "step": 18342 }, { "epoch": 0.49224452554744524, "grad_norm": 0.302734375, "learning_rate": 0.0011236442923011356, "loss": 1.8412, "step": 18343 }, { "epoch": 0.4922713610991842, "grad_norm": 0.322265625, "learning_rate": 0.0011236305726663287, "loss": 1.9913, "step": 18344 }, { "epoch": 0.49229819665092317, "grad_norm": 0.314453125, "learning_rate": 0.0011236168518828306, "loss": 2.0098, "step": 18345 }, { "epoch": 0.4923250322026621, "grad_norm": 0.306640625, "learning_rate": 0.001123603129950671, "loss": 1.8914, "step": 18346 }, { "epoch": 0.49235186775440104, "grad_norm": 0.3046875, "learning_rate": 0.0011235894068698804, "loss": 1.8761, "step": 18347 }, { "epoch": 0.49237870330614, "grad_norm": 0.314453125, "learning_rate": 0.0011235756826404883, "loss": 1.9877, "step": 18348 }, { "epoch": 0.4924055388578789, "grad_norm": 0.302734375, "learning_rate": 0.0011235619572625257, "loss": 1.829, "step": 18349 }, { "epoch": 0.49243237440961785, "grad_norm": 0.3046875, "learning_rate": 0.001123548230736022, "loss": 1.8567, "step": 18350 }, { "epoch": 0.4924592099613568, "grad_norm": 0.322265625, "learning_rate": 0.0011235345030610074, "loss": 2.0527, "step": 18351 }, { "epoch": 0.49248604551309577, "grad_norm": 0.322265625, "learning_rate": 0.001123520774237512, "loss": 1.965, "step": 18352 }, { "epoch": 0.4925128810648347, "grad_norm": 0.318359375, "learning_rate": 0.0011235070442655662, "loss": 1.9235, "step": 18353 }, { "epoch": 0.49253971661657364, "grad_norm": 0.306640625, "learning_rate": 0.0011234933131452, "loss": 1.9487, "step": 18354 }, { "epoch": 0.4925665521683126, "grad_norm": 0.30078125, "learning_rate": 0.0011234795808764434, "loss": 1.8412, "step": 18355 }, { "epoch": 0.4925933877200515, "grad_norm": 0.302734375, "learning_rate": 0.0011234658474593268, "loss": 1.9107, "step": 18356 }, { "epoch": 0.49262022327179045, "grad_norm": 0.306640625, "learning_rate": 0.00112345211289388, "loss": 1.8536, "step": 18357 }, { "epoch": 0.49264705882352944, "grad_norm": 0.32421875, "learning_rate": 0.0011234383771801333, "loss": 1.9479, "step": 18358 }, { "epoch": 0.4926738943752684, "grad_norm": 0.318359375, "learning_rate": 0.0011234246403181167, "loss": 1.9268, "step": 18359 }, { "epoch": 0.4927007299270073, "grad_norm": 0.310546875, "learning_rate": 0.0011234109023078606, "loss": 1.9958, "step": 18360 }, { "epoch": 0.49272756547874624, "grad_norm": 0.318359375, "learning_rate": 0.0011233971631493948, "loss": 1.9076, "step": 18361 }, { "epoch": 0.4927544010304852, "grad_norm": 0.314453125, "learning_rate": 0.0011233834228427497, "loss": 1.9497, "step": 18362 }, { "epoch": 0.4927812365822241, "grad_norm": 0.3125, "learning_rate": 0.0011233696813879553, "loss": 1.9538, "step": 18363 }, { "epoch": 0.49280807213396305, "grad_norm": 0.31640625, "learning_rate": 0.0011233559387850418, "loss": 1.9929, "step": 18364 }, { "epoch": 0.49283490768570204, "grad_norm": 0.3125, "learning_rate": 0.0011233421950340395, "loss": 1.9529, "step": 18365 }, { "epoch": 0.492861743237441, "grad_norm": 0.30078125, "learning_rate": 0.0011233284501349782, "loss": 1.8788, "step": 18366 }, { "epoch": 0.4928885787891799, "grad_norm": 0.3125, "learning_rate": 0.0011233147040878885, "loss": 1.8637, "step": 18367 }, { "epoch": 0.49291541434091884, "grad_norm": 0.298828125, "learning_rate": 0.0011233009568928, "loss": 1.9052, "step": 18368 }, { "epoch": 0.4929422498926578, "grad_norm": 0.314453125, "learning_rate": 0.0011232872085497434, "loss": 1.9, "step": 18369 }, { "epoch": 0.4929690854443967, "grad_norm": 0.3203125, "learning_rate": 0.0011232734590587484, "loss": 1.9789, "step": 18370 }, { "epoch": 0.4929959209961357, "grad_norm": 0.32421875, "learning_rate": 0.0011232597084198455, "loss": 1.9711, "step": 18371 }, { "epoch": 0.49302275654787464, "grad_norm": 0.30859375, "learning_rate": 0.0011232459566330648, "loss": 1.8548, "step": 18372 }, { "epoch": 0.4930495920996136, "grad_norm": 0.314453125, "learning_rate": 0.0011232322036984362, "loss": 1.9162, "step": 18373 }, { "epoch": 0.4930764276513525, "grad_norm": 0.30859375, "learning_rate": 0.0011232184496159903, "loss": 1.8712, "step": 18374 }, { "epoch": 0.49310326320309145, "grad_norm": 0.3125, "learning_rate": 0.001123204694385757, "loss": 1.874, "step": 18375 }, { "epoch": 0.4931300987548304, "grad_norm": 0.314453125, "learning_rate": 0.0011231909380077663, "loss": 1.9428, "step": 18376 }, { "epoch": 0.4931569343065693, "grad_norm": 0.3125, "learning_rate": 0.001123177180482049, "loss": 1.9923, "step": 18377 }, { "epoch": 0.4931837698583083, "grad_norm": 0.3203125, "learning_rate": 0.0011231634218086346, "loss": 1.9136, "step": 18378 }, { "epoch": 0.49321060541004724, "grad_norm": 0.314453125, "learning_rate": 0.0011231496619875537, "loss": 1.9114, "step": 18379 }, { "epoch": 0.4932374409617862, "grad_norm": 0.310546875, "learning_rate": 0.0011231359010188365, "loss": 1.897, "step": 18380 }, { "epoch": 0.4932642765135251, "grad_norm": 0.3125, "learning_rate": 0.0011231221389025126, "loss": 1.9254, "step": 18381 }, { "epoch": 0.49329111206526405, "grad_norm": 0.314453125, "learning_rate": 0.0011231083756386131, "loss": 1.9743, "step": 18382 }, { "epoch": 0.493317947617003, "grad_norm": 0.30859375, "learning_rate": 0.0011230946112271674, "loss": 1.9098, "step": 18383 }, { "epoch": 0.493344783168742, "grad_norm": 0.3125, "learning_rate": 0.0011230808456682063, "loss": 1.9504, "step": 18384 }, { "epoch": 0.4933716187204809, "grad_norm": 0.310546875, "learning_rate": 0.0011230670789617594, "loss": 1.8749, "step": 18385 }, { "epoch": 0.49339845427221984, "grad_norm": 0.30859375, "learning_rate": 0.0011230533111078575, "loss": 1.8506, "step": 18386 }, { "epoch": 0.4934252898239588, "grad_norm": 0.318359375, "learning_rate": 0.0011230395421065304, "loss": 1.9143, "step": 18387 }, { "epoch": 0.4934521253756977, "grad_norm": 0.3203125, "learning_rate": 0.0011230257719578082, "loss": 1.9373, "step": 18388 }, { "epoch": 0.49347896092743665, "grad_norm": 0.31640625, "learning_rate": 0.0011230120006617216, "loss": 1.8578, "step": 18389 }, { "epoch": 0.4935057964791756, "grad_norm": 0.318359375, "learning_rate": 0.0011229982282183004, "loss": 1.9233, "step": 18390 }, { "epoch": 0.4935326320309146, "grad_norm": 0.310546875, "learning_rate": 0.0011229844546275752, "loss": 1.8794, "step": 18391 }, { "epoch": 0.4935594675826535, "grad_norm": 0.326171875, "learning_rate": 0.0011229706798895757, "loss": 1.8581, "step": 18392 }, { "epoch": 0.49358630313439245, "grad_norm": 0.314453125, "learning_rate": 0.0011229569040043326, "loss": 1.8032, "step": 18393 }, { "epoch": 0.4936131386861314, "grad_norm": 0.31640625, "learning_rate": 0.0011229431269718759, "loss": 1.9119, "step": 18394 }, { "epoch": 0.4936399742378703, "grad_norm": 0.31640625, "learning_rate": 0.0011229293487922357, "loss": 1.8894, "step": 18395 }, { "epoch": 0.49366680978960925, "grad_norm": 0.310546875, "learning_rate": 0.0011229155694654424, "loss": 1.8299, "step": 18396 }, { "epoch": 0.49369364534134824, "grad_norm": 0.326171875, "learning_rate": 0.0011229017889915261, "loss": 2.0562, "step": 18397 }, { "epoch": 0.4937204808930872, "grad_norm": 0.3203125, "learning_rate": 0.0011228880073705172, "loss": 1.8928, "step": 18398 }, { "epoch": 0.4937473164448261, "grad_norm": 0.3203125, "learning_rate": 0.0011228742246024459, "loss": 1.9094, "step": 18399 }, { "epoch": 0.49377415199656505, "grad_norm": 0.30859375, "learning_rate": 0.0011228604406873424, "loss": 1.8825, "step": 18400 }, { "epoch": 0.493800987548304, "grad_norm": 0.333984375, "learning_rate": 0.0011228466556252369, "loss": 2.0547, "step": 18401 }, { "epoch": 0.4938278231000429, "grad_norm": 0.314453125, "learning_rate": 0.0011228328694161595, "loss": 1.839, "step": 18402 }, { "epoch": 0.4938546586517819, "grad_norm": 0.306640625, "learning_rate": 0.0011228190820601407, "loss": 1.9381, "step": 18403 }, { "epoch": 0.49388149420352084, "grad_norm": 0.318359375, "learning_rate": 0.0011228052935572108, "loss": 1.9151, "step": 18404 }, { "epoch": 0.4939083297552598, "grad_norm": 0.314453125, "learning_rate": 0.0011227915039073997, "loss": 1.9196, "step": 18405 }, { "epoch": 0.4939351653069987, "grad_norm": 0.31640625, "learning_rate": 0.001122777713110738, "loss": 1.9023, "step": 18406 }, { "epoch": 0.49396200085873765, "grad_norm": 0.32421875, "learning_rate": 0.0011227639211672555, "loss": 1.9697, "step": 18407 }, { "epoch": 0.4939888364104766, "grad_norm": 0.32421875, "learning_rate": 0.0011227501280769833, "loss": 1.9587, "step": 18408 }, { "epoch": 0.4940156719622155, "grad_norm": 0.326171875, "learning_rate": 0.0011227363338399505, "loss": 1.9668, "step": 18409 }, { "epoch": 0.4940425075139545, "grad_norm": 0.322265625, "learning_rate": 0.0011227225384561883, "loss": 1.8777, "step": 18410 }, { "epoch": 0.49406934306569344, "grad_norm": 0.326171875, "learning_rate": 0.0011227087419257266, "loss": 1.9291, "step": 18411 }, { "epoch": 0.4940961786174324, "grad_norm": 0.318359375, "learning_rate": 0.0011226949442485958, "loss": 1.8788, "step": 18412 }, { "epoch": 0.4941230141691713, "grad_norm": 0.330078125, "learning_rate": 0.001122681145424826, "loss": 1.8453, "step": 18413 }, { "epoch": 0.49414984972091025, "grad_norm": 0.326171875, "learning_rate": 0.0011226673454544475, "loss": 1.8563, "step": 18414 }, { "epoch": 0.4941766852726492, "grad_norm": 0.31640625, "learning_rate": 0.0011226535443374906, "loss": 1.8362, "step": 18415 }, { "epoch": 0.4942035208243882, "grad_norm": 0.314453125, "learning_rate": 0.0011226397420739857, "loss": 1.8223, "step": 18416 }, { "epoch": 0.4942303563761271, "grad_norm": 0.400390625, "learning_rate": 0.0011226259386639627, "loss": 2.176, "step": 18417 }, { "epoch": 0.49425719192786605, "grad_norm": 0.37890625, "learning_rate": 0.0011226121341074526, "loss": 2.1224, "step": 18418 }, { "epoch": 0.494284027479605, "grad_norm": 0.357421875, "learning_rate": 0.0011225983284044849, "loss": 2.2458, "step": 18419 }, { "epoch": 0.4943108630313439, "grad_norm": 0.345703125, "learning_rate": 0.0011225845215550904, "loss": 2.2297, "step": 18420 }, { "epoch": 0.49433769858308285, "grad_norm": 0.326171875, "learning_rate": 0.001122570713559299, "loss": 2.0576, "step": 18421 }, { "epoch": 0.4943645341348218, "grad_norm": 0.32421875, "learning_rate": 0.0011225569044171413, "loss": 2.0584, "step": 18422 }, { "epoch": 0.4943913696865608, "grad_norm": 0.337890625, "learning_rate": 0.0011225430941286475, "loss": 2.1021, "step": 18423 }, { "epoch": 0.4944182052382997, "grad_norm": 0.328125, "learning_rate": 0.0011225292826938479, "loss": 2.1439, "step": 18424 }, { "epoch": 0.49444504079003865, "grad_norm": 0.31640625, "learning_rate": 0.0011225154701127726, "loss": 2.1371, "step": 18425 }, { "epoch": 0.4944718763417776, "grad_norm": 0.314453125, "learning_rate": 0.0011225016563854522, "loss": 2.1135, "step": 18426 }, { "epoch": 0.4944987118935165, "grad_norm": 0.30859375, "learning_rate": 0.0011224878415119171, "loss": 2.0599, "step": 18427 }, { "epoch": 0.49452554744525545, "grad_norm": 0.3125, "learning_rate": 0.0011224740254921972, "loss": 2.063, "step": 18428 }, { "epoch": 0.49455238299699444, "grad_norm": 0.31640625, "learning_rate": 0.001122460208326323, "loss": 2.1013, "step": 18429 }, { "epoch": 0.4945792185487334, "grad_norm": 0.310546875, "learning_rate": 0.0011224463900143247, "loss": 2.0496, "step": 18430 }, { "epoch": 0.4946060541004723, "grad_norm": 0.31640625, "learning_rate": 0.0011224325705562331, "loss": 2.0955, "step": 18431 }, { "epoch": 0.49463288965221125, "grad_norm": 0.30859375, "learning_rate": 0.001122418749952078, "loss": 2.0536, "step": 18432 }, { "epoch": 0.4946597252039502, "grad_norm": 0.3125, "learning_rate": 0.0011224049282018897, "loss": 2.092, "step": 18433 }, { "epoch": 0.4946865607556891, "grad_norm": 0.30859375, "learning_rate": 0.0011223911053056988, "loss": 2.0603, "step": 18434 }, { "epoch": 0.49471339630742805, "grad_norm": 0.306640625, "learning_rate": 0.0011223772812635354, "loss": 1.98, "step": 18435 }, { "epoch": 0.49474023185916705, "grad_norm": 0.298828125, "learning_rate": 0.00112236345607543, "loss": 2.0116, "step": 18436 }, { "epoch": 0.494767067410906, "grad_norm": 0.30859375, "learning_rate": 0.0011223496297414128, "loss": 1.9694, "step": 18437 }, { "epoch": 0.4947939029626449, "grad_norm": 0.302734375, "learning_rate": 0.001122335802261514, "loss": 2.0083, "step": 18438 }, { "epoch": 0.49482073851438385, "grad_norm": 0.31640625, "learning_rate": 0.0011223219736357646, "loss": 2.0619, "step": 18439 }, { "epoch": 0.4948475740661228, "grad_norm": 0.3125, "learning_rate": 0.0011223081438641942, "loss": 2.029, "step": 18440 }, { "epoch": 0.4948744096178617, "grad_norm": 0.306640625, "learning_rate": 0.0011222943129468334, "loss": 1.9843, "step": 18441 }, { "epoch": 0.4949012451696007, "grad_norm": 0.298828125, "learning_rate": 0.0011222804808837124, "loss": 2.0384, "step": 18442 }, { "epoch": 0.49492808072133965, "grad_norm": 0.3125, "learning_rate": 0.001122266647674862, "loss": 2.1336, "step": 18443 }, { "epoch": 0.4949549162730786, "grad_norm": 0.302734375, "learning_rate": 0.0011222528133203122, "loss": 2.004, "step": 18444 }, { "epoch": 0.4949817518248175, "grad_norm": 0.31640625, "learning_rate": 0.0011222389778200931, "loss": 2.1243, "step": 18445 }, { "epoch": 0.49500858737655645, "grad_norm": 0.3046875, "learning_rate": 0.0011222251411742354, "loss": 2.0489, "step": 18446 }, { "epoch": 0.4950354229282954, "grad_norm": 0.30859375, "learning_rate": 0.0011222113033827696, "loss": 1.9848, "step": 18447 }, { "epoch": 0.4950622584800343, "grad_norm": 0.3125, "learning_rate": 0.0011221974644457257, "loss": 2.1066, "step": 18448 }, { "epoch": 0.4950890940317733, "grad_norm": 0.298828125, "learning_rate": 0.001122183624363134, "loss": 2.0251, "step": 18449 }, { "epoch": 0.49511592958351225, "grad_norm": 0.306640625, "learning_rate": 0.0011221697831350253, "loss": 2.0322, "step": 18450 }, { "epoch": 0.4951427651352512, "grad_norm": 0.30859375, "learning_rate": 0.0011221559407614296, "loss": 1.9679, "step": 18451 }, { "epoch": 0.4951696006869901, "grad_norm": 0.3125, "learning_rate": 0.0011221420972423774, "loss": 2.0644, "step": 18452 }, { "epoch": 0.49519643623872905, "grad_norm": 0.326171875, "learning_rate": 0.0011221282525778991, "loss": 2.0208, "step": 18453 }, { "epoch": 0.495223271790468, "grad_norm": 0.3046875, "learning_rate": 0.001122114406768025, "loss": 2.0388, "step": 18454 }, { "epoch": 0.495250107342207, "grad_norm": 0.298828125, "learning_rate": 0.0011221005598127858, "loss": 2.0113, "step": 18455 }, { "epoch": 0.4952769428939459, "grad_norm": 0.306640625, "learning_rate": 0.0011220867117122113, "loss": 2.0279, "step": 18456 }, { "epoch": 0.49530377844568485, "grad_norm": 0.30078125, "learning_rate": 0.001122072862466332, "loss": 2.0408, "step": 18457 }, { "epoch": 0.4953306139974238, "grad_norm": 0.3046875, "learning_rate": 0.0011220590120751787, "loss": 2.0314, "step": 18458 }, { "epoch": 0.4953574495491627, "grad_norm": 0.302734375, "learning_rate": 0.0011220451605387814, "loss": 2.0498, "step": 18459 }, { "epoch": 0.49538428510090166, "grad_norm": 0.294921875, "learning_rate": 0.0011220313078571706, "loss": 1.8785, "step": 18460 }, { "epoch": 0.4954111206526406, "grad_norm": 0.302734375, "learning_rate": 0.0011220174540303768, "loss": 1.9444, "step": 18461 }, { "epoch": 0.4954379562043796, "grad_norm": 0.298828125, "learning_rate": 0.0011220035990584301, "loss": 1.8963, "step": 18462 }, { "epoch": 0.4954647917561185, "grad_norm": 0.306640625, "learning_rate": 0.0011219897429413613, "loss": 1.9849, "step": 18463 }, { "epoch": 0.49549162730785745, "grad_norm": 0.306640625, "learning_rate": 0.0011219758856792002, "loss": 2.0304, "step": 18464 }, { "epoch": 0.4955184628595964, "grad_norm": 0.30859375, "learning_rate": 0.001121962027271978, "loss": 1.9802, "step": 18465 }, { "epoch": 0.4955452984113353, "grad_norm": 0.294921875, "learning_rate": 0.0011219481677197244, "loss": 1.8602, "step": 18466 }, { "epoch": 0.49557213396307426, "grad_norm": 0.3046875, "learning_rate": 0.0011219343070224702, "loss": 1.9831, "step": 18467 }, { "epoch": 0.49559896951481325, "grad_norm": 0.30078125, "learning_rate": 0.0011219204451802457, "loss": 1.9565, "step": 18468 }, { "epoch": 0.4956258050665522, "grad_norm": 0.326171875, "learning_rate": 0.0011219065821930813, "loss": 2.0776, "step": 18469 }, { "epoch": 0.4956526406182911, "grad_norm": 0.296875, "learning_rate": 0.0011218927180610073, "loss": 1.887, "step": 18470 }, { "epoch": 0.49567947617003005, "grad_norm": 0.302734375, "learning_rate": 0.0011218788527840545, "loss": 1.9743, "step": 18471 }, { "epoch": 0.495706311721769, "grad_norm": 0.302734375, "learning_rate": 0.0011218649863622527, "loss": 1.978, "step": 18472 }, { "epoch": 0.4957331472735079, "grad_norm": 0.30078125, "learning_rate": 0.0011218511187956327, "loss": 1.9398, "step": 18473 }, { "epoch": 0.4957599828252469, "grad_norm": 0.30859375, "learning_rate": 0.0011218372500842251, "loss": 2.0113, "step": 18474 }, { "epoch": 0.49578681837698585, "grad_norm": 0.30859375, "learning_rate": 0.00112182338022806, "loss": 2.0301, "step": 18475 }, { "epoch": 0.4958136539287248, "grad_norm": 0.302734375, "learning_rate": 0.001121809509227168, "loss": 2.0149, "step": 18476 }, { "epoch": 0.4958404894804637, "grad_norm": 0.30078125, "learning_rate": 0.001121795637081579, "loss": 2.0061, "step": 18477 }, { "epoch": 0.49586732503220265, "grad_norm": 0.306640625, "learning_rate": 0.0011217817637913243, "loss": 2.0021, "step": 18478 }, { "epoch": 0.4958941605839416, "grad_norm": 0.314453125, "learning_rate": 0.0011217678893564338, "loss": 2.0293, "step": 18479 }, { "epoch": 0.4959209961356805, "grad_norm": 0.310546875, "learning_rate": 0.0011217540137769381, "loss": 2.0336, "step": 18480 }, { "epoch": 0.4959478316874195, "grad_norm": 0.296875, "learning_rate": 0.0011217401370528675, "loss": 1.903, "step": 18481 }, { "epoch": 0.49597466723915845, "grad_norm": 0.30859375, "learning_rate": 0.0011217262591842528, "loss": 1.9939, "step": 18482 }, { "epoch": 0.4960015027908974, "grad_norm": 0.306640625, "learning_rate": 0.0011217123801711242, "loss": 1.9822, "step": 18483 }, { "epoch": 0.4960283383426363, "grad_norm": 0.314453125, "learning_rate": 0.0011216985000135117, "loss": 2.0048, "step": 18484 }, { "epoch": 0.49605517389437526, "grad_norm": 0.30859375, "learning_rate": 0.0011216846187114464, "loss": 2.0139, "step": 18485 }, { "epoch": 0.4960820094461142, "grad_norm": 0.302734375, "learning_rate": 0.0011216707362649587, "loss": 1.936, "step": 18486 }, { "epoch": 0.4961088449978532, "grad_norm": 0.310546875, "learning_rate": 0.0011216568526740787, "loss": 1.9795, "step": 18487 }, { "epoch": 0.4961356805495921, "grad_norm": 0.3125, "learning_rate": 0.001121642967938837, "loss": 1.9028, "step": 18488 }, { "epoch": 0.49616251610133105, "grad_norm": 0.3046875, "learning_rate": 0.0011216290820592642, "loss": 1.9968, "step": 18489 }, { "epoch": 0.49618935165307, "grad_norm": 0.3203125, "learning_rate": 0.0011216151950353907, "loss": 2.0003, "step": 18490 }, { "epoch": 0.4962161872048089, "grad_norm": 0.318359375, "learning_rate": 0.001121601306867247, "loss": 1.9916, "step": 18491 }, { "epoch": 0.49624302275654786, "grad_norm": 0.30859375, "learning_rate": 0.0011215874175548634, "loss": 1.9972, "step": 18492 }, { "epoch": 0.4962698583082868, "grad_norm": 0.3046875, "learning_rate": 0.0011215735270982703, "loss": 1.9552, "step": 18493 }, { "epoch": 0.4962966938600258, "grad_norm": 0.306640625, "learning_rate": 0.0011215596354974984, "loss": 1.9554, "step": 18494 }, { "epoch": 0.4963235294117647, "grad_norm": 0.3046875, "learning_rate": 0.0011215457427525782, "loss": 1.9893, "step": 18495 }, { "epoch": 0.49635036496350365, "grad_norm": 0.302734375, "learning_rate": 0.00112153184886354, "loss": 1.9034, "step": 18496 }, { "epoch": 0.4963772005152426, "grad_norm": 0.30859375, "learning_rate": 0.0011215179538304142, "loss": 1.9705, "step": 18497 }, { "epoch": 0.4964040360669815, "grad_norm": 0.3046875, "learning_rate": 0.0011215040576532317, "loss": 1.9247, "step": 18498 }, { "epoch": 0.49643087161872046, "grad_norm": 0.3125, "learning_rate": 0.0011214901603320228, "loss": 1.9928, "step": 18499 }, { "epoch": 0.49645770717045945, "grad_norm": 0.302734375, "learning_rate": 0.0011214762618668177, "loss": 1.9097, "step": 18500 }, { "epoch": 0.4964845427221984, "grad_norm": 0.31640625, "learning_rate": 0.0011214623622576472, "loss": 1.9744, "step": 18501 }, { "epoch": 0.4965113782739373, "grad_norm": 0.318359375, "learning_rate": 0.0011214484615045415, "loss": 1.9227, "step": 18502 }, { "epoch": 0.49653821382567626, "grad_norm": 0.3125, "learning_rate": 0.0011214345596075315, "loss": 1.9611, "step": 18503 }, { "epoch": 0.4965650493774152, "grad_norm": 0.314453125, "learning_rate": 0.0011214206565666475, "loss": 1.9433, "step": 18504 }, { "epoch": 0.4965918849291541, "grad_norm": 0.3125, "learning_rate": 0.00112140675238192, "loss": 2.0246, "step": 18505 }, { "epoch": 0.49661872048089306, "grad_norm": 0.30078125, "learning_rate": 0.0011213928470533792, "loss": 1.8876, "step": 18506 }, { "epoch": 0.49664555603263205, "grad_norm": 0.31640625, "learning_rate": 0.001121378940581056, "loss": 1.9421, "step": 18507 }, { "epoch": 0.496672391584371, "grad_norm": 0.298828125, "learning_rate": 0.001121365032964981, "loss": 1.9406, "step": 18508 }, { "epoch": 0.4966992271361099, "grad_norm": 0.314453125, "learning_rate": 0.0011213511242051844, "loss": 1.9504, "step": 18509 }, { "epoch": 0.49672606268784886, "grad_norm": 0.30859375, "learning_rate": 0.0011213372143016969, "loss": 1.8277, "step": 18510 }, { "epoch": 0.4967528982395878, "grad_norm": 0.30859375, "learning_rate": 0.0011213233032545489, "loss": 1.8785, "step": 18511 }, { "epoch": 0.4967797337913267, "grad_norm": 0.326171875, "learning_rate": 0.0011213093910637708, "loss": 1.9969, "step": 18512 }, { "epoch": 0.4968065693430657, "grad_norm": 0.31640625, "learning_rate": 0.0011212954777293934, "loss": 2.0037, "step": 18513 }, { "epoch": 0.49683340489480465, "grad_norm": 0.318359375, "learning_rate": 0.001121281563251447, "loss": 1.9633, "step": 18514 }, { "epoch": 0.4968602404465436, "grad_norm": 0.310546875, "learning_rate": 0.0011212676476299623, "loss": 2.0109, "step": 18515 }, { "epoch": 0.4968870759982825, "grad_norm": 0.3046875, "learning_rate": 0.0011212537308649697, "loss": 1.8911, "step": 18516 }, { "epoch": 0.49691391155002146, "grad_norm": 0.306640625, "learning_rate": 0.0011212398129564999, "loss": 1.9257, "step": 18517 }, { "epoch": 0.4969407471017604, "grad_norm": 0.29296875, "learning_rate": 0.001121225893904583, "loss": 1.8538, "step": 18518 }, { "epoch": 0.49696758265349933, "grad_norm": 0.3203125, "learning_rate": 0.00112121197370925, "loss": 1.875, "step": 18519 }, { "epoch": 0.4969944182052383, "grad_norm": 0.3125, "learning_rate": 0.0011211980523705314, "loss": 1.9282, "step": 18520 }, { "epoch": 0.49702125375697725, "grad_norm": 0.3203125, "learning_rate": 0.0011211841298884573, "loss": 1.9658, "step": 18521 }, { "epoch": 0.4970480893087162, "grad_norm": 0.306640625, "learning_rate": 0.0011211702062630588, "loss": 1.9124, "step": 18522 }, { "epoch": 0.4970749248604551, "grad_norm": 0.30859375, "learning_rate": 0.0011211562814943663, "loss": 1.8528, "step": 18523 }, { "epoch": 0.49710176041219406, "grad_norm": 0.310546875, "learning_rate": 0.00112114235558241, "loss": 1.8952, "step": 18524 }, { "epoch": 0.497128595963933, "grad_norm": 0.310546875, "learning_rate": 0.0011211284285272208, "loss": 1.8265, "step": 18525 }, { "epoch": 0.497155431515672, "grad_norm": 0.3125, "learning_rate": 0.001121114500328829, "loss": 1.874, "step": 18526 }, { "epoch": 0.4971822670674109, "grad_norm": 0.3125, "learning_rate": 0.0011211005709872656, "loss": 1.9463, "step": 18527 }, { "epoch": 0.49720910261914986, "grad_norm": 0.3125, "learning_rate": 0.0011210866405025608, "loss": 1.8497, "step": 18528 }, { "epoch": 0.4972359381708888, "grad_norm": 0.318359375, "learning_rate": 0.001121072708874745, "loss": 1.9513, "step": 18529 }, { "epoch": 0.4972627737226277, "grad_norm": 0.31640625, "learning_rate": 0.001121058776103849, "loss": 1.889, "step": 18530 }, { "epoch": 0.49728960927436666, "grad_norm": 0.310546875, "learning_rate": 0.0011210448421899036, "loss": 1.9326, "step": 18531 }, { "epoch": 0.4973164448261056, "grad_norm": 0.314453125, "learning_rate": 0.0011210309071329387, "loss": 1.903, "step": 18532 }, { "epoch": 0.4973432803778446, "grad_norm": 0.31640625, "learning_rate": 0.0011210169709329856, "loss": 1.9724, "step": 18533 }, { "epoch": 0.4973701159295835, "grad_norm": 0.314453125, "learning_rate": 0.0011210030335900745, "loss": 1.9074, "step": 18534 }, { "epoch": 0.49739695148132246, "grad_norm": 0.328125, "learning_rate": 0.0011209890951042359, "loss": 1.9019, "step": 18535 }, { "epoch": 0.4974237870330614, "grad_norm": 0.328125, "learning_rate": 0.0011209751554755007, "loss": 1.9918, "step": 18536 }, { "epoch": 0.49745062258480033, "grad_norm": 0.314453125, "learning_rate": 0.0011209612147038992, "loss": 1.9536, "step": 18537 }, { "epoch": 0.49747745813653926, "grad_norm": 0.310546875, "learning_rate": 0.001120947272789462, "loss": 1.8748, "step": 18538 }, { "epoch": 0.49750429368827825, "grad_norm": 0.302734375, "learning_rate": 0.0011209333297322198, "loss": 1.8838, "step": 18539 }, { "epoch": 0.4975311292400172, "grad_norm": 0.30859375, "learning_rate": 0.0011209193855322031, "loss": 1.8373, "step": 18540 }, { "epoch": 0.4975579647917561, "grad_norm": 0.318359375, "learning_rate": 0.0011209054401894426, "loss": 1.9468, "step": 18541 }, { "epoch": 0.49758480034349506, "grad_norm": 0.3125, "learning_rate": 0.0011208914937039687, "loss": 1.9015, "step": 18542 }, { "epoch": 0.497611635895234, "grad_norm": 0.310546875, "learning_rate": 0.0011208775460758123, "loss": 1.9139, "step": 18543 }, { "epoch": 0.49763847144697293, "grad_norm": 0.31640625, "learning_rate": 0.0011208635973050034, "loss": 1.938, "step": 18544 }, { "epoch": 0.4976653069987119, "grad_norm": 0.3203125, "learning_rate": 0.0011208496473915734, "loss": 1.9494, "step": 18545 }, { "epoch": 0.49769214255045086, "grad_norm": 0.302734375, "learning_rate": 0.0011208356963355523, "loss": 1.8195, "step": 18546 }, { "epoch": 0.4977189781021898, "grad_norm": 0.3125, "learning_rate": 0.0011208217441369712, "loss": 1.9723, "step": 18547 }, { "epoch": 0.4977458136539287, "grad_norm": 0.31640625, "learning_rate": 0.0011208077907958604, "loss": 1.9185, "step": 18548 }, { "epoch": 0.49777264920566766, "grad_norm": 0.310546875, "learning_rate": 0.0011207938363122503, "loss": 1.9023, "step": 18549 }, { "epoch": 0.4977994847574066, "grad_norm": 0.3125, "learning_rate": 0.0011207798806861717, "loss": 1.8781, "step": 18550 }, { "epoch": 0.49782632030914553, "grad_norm": 0.3203125, "learning_rate": 0.0011207659239176554, "loss": 1.8758, "step": 18551 }, { "epoch": 0.4978531558608845, "grad_norm": 0.318359375, "learning_rate": 0.0011207519660067318, "loss": 1.9334, "step": 18552 }, { "epoch": 0.49787999141262346, "grad_norm": 0.3359375, "learning_rate": 0.0011207380069534316, "loss": 1.9371, "step": 18553 }, { "epoch": 0.4979068269643624, "grad_norm": 0.318359375, "learning_rate": 0.0011207240467577856, "loss": 1.8212, "step": 18554 }, { "epoch": 0.4979336625161013, "grad_norm": 0.318359375, "learning_rate": 0.001120710085419824, "loss": 1.9778, "step": 18555 }, { "epoch": 0.49796049806784026, "grad_norm": 0.310546875, "learning_rate": 0.0011206961229395778, "loss": 1.8242, "step": 18556 }, { "epoch": 0.4979873336195792, "grad_norm": 0.32421875, "learning_rate": 0.0011206821593170773, "loss": 1.8719, "step": 18557 }, { "epoch": 0.4980141691713182, "grad_norm": 0.31640625, "learning_rate": 0.0011206681945523536, "loss": 1.8717, "step": 18558 }, { "epoch": 0.4980410047230571, "grad_norm": 0.330078125, "learning_rate": 0.001120654228645437, "loss": 1.947, "step": 18559 }, { "epoch": 0.49806784027479606, "grad_norm": 0.392578125, "learning_rate": 0.001120640261596358, "loss": 2.1336, "step": 18560 }, { "epoch": 0.498094675826535, "grad_norm": 0.37109375, "learning_rate": 0.0011206262934051476, "loss": 2.2352, "step": 18561 }, { "epoch": 0.49812151137827393, "grad_norm": 0.341796875, "learning_rate": 0.0011206123240718364, "loss": 2.1097, "step": 18562 }, { "epoch": 0.49814834693001286, "grad_norm": 0.33984375, "learning_rate": 0.0011205983535964548, "loss": 2.1284, "step": 18563 }, { "epoch": 0.4981751824817518, "grad_norm": 0.33203125, "learning_rate": 0.0011205843819790337, "loss": 2.1355, "step": 18564 }, { "epoch": 0.4982020180334908, "grad_norm": 0.326171875, "learning_rate": 0.0011205704092196035, "loss": 2.0843, "step": 18565 }, { "epoch": 0.4982288535852297, "grad_norm": 0.3359375, "learning_rate": 0.0011205564353181949, "loss": 2.1193, "step": 18566 }, { "epoch": 0.49825568913696866, "grad_norm": 0.314453125, "learning_rate": 0.0011205424602748388, "loss": 2.0759, "step": 18567 }, { "epoch": 0.4982825246887076, "grad_norm": 0.314453125, "learning_rate": 0.0011205284840895655, "loss": 2.1154, "step": 18568 }, { "epoch": 0.49830936024044653, "grad_norm": 0.31640625, "learning_rate": 0.001120514506762406, "loss": 2.0835, "step": 18569 }, { "epoch": 0.49833619579218547, "grad_norm": 0.32421875, "learning_rate": 0.0011205005282933906, "loss": 2.0576, "step": 18570 }, { "epoch": 0.49836303134392446, "grad_norm": 0.326171875, "learning_rate": 0.0011204865486825505, "loss": 2.0498, "step": 18571 }, { "epoch": 0.4983898668956634, "grad_norm": 0.3125, "learning_rate": 0.0011204725679299157, "loss": 2.0762, "step": 18572 }, { "epoch": 0.4984167024474023, "grad_norm": 0.322265625, "learning_rate": 0.0011204585860355174, "loss": 2.107, "step": 18573 }, { "epoch": 0.49844353799914126, "grad_norm": 0.3203125, "learning_rate": 0.0011204446029993863, "loss": 2.1409, "step": 18574 }, { "epoch": 0.4984703735508802, "grad_norm": 0.322265625, "learning_rate": 0.0011204306188215525, "loss": 2.0827, "step": 18575 }, { "epoch": 0.49849720910261913, "grad_norm": 0.314453125, "learning_rate": 0.001120416633502047, "loss": 2.1482, "step": 18576 }, { "epoch": 0.49852404465435807, "grad_norm": 0.314453125, "learning_rate": 0.0011204026470409008, "loss": 2.1539, "step": 18577 }, { "epoch": 0.49855088020609706, "grad_norm": 0.31640625, "learning_rate": 0.0011203886594381442, "loss": 2.1375, "step": 18578 }, { "epoch": 0.498577715757836, "grad_norm": 0.3125, "learning_rate": 0.001120374670693808, "loss": 2.0453, "step": 18579 }, { "epoch": 0.49860455130957493, "grad_norm": 0.302734375, "learning_rate": 0.0011203606808079228, "loss": 1.9912, "step": 18580 }, { "epoch": 0.49863138686131386, "grad_norm": 0.310546875, "learning_rate": 0.0011203466897805195, "loss": 2.1126, "step": 18581 }, { "epoch": 0.4986582224130528, "grad_norm": 0.302734375, "learning_rate": 0.0011203326976116283, "loss": 2.0419, "step": 18582 }, { "epoch": 0.49868505796479173, "grad_norm": 0.310546875, "learning_rate": 0.0011203187043012806, "loss": 2.1089, "step": 18583 }, { "epoch": 0.4987118935165307, "grad_norm": 0.31640625, "learning_rate": 0.0011203047098495068, "loss": 2.1346, "step": 18584 }, { "epoch": 0.49873872906826966, "grad_norm": 0.310546875, "learning_rate": 0.0011202907142563371, "loss": 2.0536, "step": 18585 }, { "epoch": 0.4987655646200086, "grad_norm": 0.30078125, "learning_rate": 0.001120276717521803, "loss": 1.9993, "step": 18586 }, { "epoch": 0.49879240017174753, "grad_norm": 0.30078125, "learning_rate": 0.001120262719645935, "loss": 1.9951, "step": 18587 }, { "epoch": 0.49881923572348646, "grad_norm": 0.30859375, "learning_rate": 0.0011202487206287634, "loss": 2.0252, "step": 18588 }, { "epoch": 0.4988460712752254, "grad_norm": 0.306640625, "learning_rate": 0.001120234720470319, "loss": 1.9981, "step": 18589 }, { "epoch": 0.49887290682696434, "grad_norm": 0.3125, "learning_rate": 0.0011202207191706329, "loss": 2.0583, "step": 18590 }, { "epoch": 0.4988997423787033, "grad_norm": 0.310546875, "learning_rate": 0.0011202067167297356, "loss": 1.9896, "step": 18591 }, { "epoch": 0.49892657793044226, "grad_norm": 0.310546875, "learning_rate": 0.0011201927131476576, "loss": 2.0324, "step": 18592 }, { "epoch": 0.4989534134821812, "grad_norm": 0.3125, "learning_rate": 0.0011201787084244301, "loss": 2.0953, "step": 18593 }, { "epoch": 0.49898024903392013, "grad_norm": 0.302734375, "learning_rate": 0.0011201647025600833, "loss": 2.0352, "step": 18594 }, { "epoch": 0.49900708458565907, "grad_norm": 0.294921875, "learning_rate": 0.0011201506955546485, "loss": 2.0338, "step": 18595 }, { "epoch": 0.499033920137398, "grad_norm": 0.30078125, "learning_rate": 0.0011201366874081559, "loss": 2.0751, "step": 18596 }, { "epoch": 0.499060755689137, "grad_norm": 0.30859375, "learning_rate": 0.0011201226781206363, "loss": 2.0426, "step": 18597 }, { "epoch": 0.4990875912408759, "grad_norm": 0.30859375, "learning_rate": 0.0011201086676921209, "loss": 2.0561, "step": 18598 }, { "epoch": 0.49911442679261486, "grad_norm": 0.298828125, "learning_rate": 0.00112009465612264, "loss": 1.9602, "step": 18599 }, { "epoch": 0.4991412623443538, "grad_norm": 0.314453125, "learning_rate": 0.0011200806434122242, "loss": 2.0911, "step": 18600 }, { "epoch": 0.49916809789609273, "grad_norm": 0.306640625, "learning_rate": 0.0011200666295609048, "loss": 2.091, "step": 18601 }, { "epoch": 0.49919493344783167, "grad_norm": 0.3046875, "learning_rate": 0.001120052614568712, "loss": 2.0035, "step": 18602 }, { "epoch": 0.49922176899957066, "grad_norm": 0.306640625, "learning_rate": 0.0011200385984356767, "loss": 2.0165, "step": 18603 }, { "epoch": 0.4992486045513096, "grad_norm": 0.3046875, "learning_rate": 0.0011200245811618299, "loss": 1.9582, "step": 18604 }, { "epoch": 0.49927544010304853, "grad_norm": 0.3046875, "learning_rate": 0.0011200105627472022, "loss": 1.9488, "step": 18605 }, { "epoch": 0.49930227565478746, "grad_norm": 0.3046875, "learning_rate": 0.0011199965431918243, "loss": 1.9759, "step": 18606 }, { "epoch": 0.4993291112065264, "grad_norm": 0.302734375, "learning_rate": 0.0011199825224957269, "loss": 2.0421, "step": 18607 }, { "epoch": 0.49935594675826533, "grad_norm": 0.294921875, "learning_rate": 0.0011199685006589408, "loss": 1.9909, "step": 18608 }, { "epoch": 0.49938278231000427, "grad_norm": 0.302734375, "learning_rate": 0.0011199544776814968, "loss": 2.0113, "step": 18609 }, { "epoch": 0.49940961786174326, "grad_norm": 0.296875, "learning_rate": 0.0011199404535634256, "loss": 1.9855, "step": 18610 }, { "epoch": 0.4994364534134822, "grad_norm": 0.306640625, "learning_rate": 0.001119926428304758, "loss": 2.0012, "step": 18611 }, { "epoch": 0.49946328896522113, "grad_norm": 0.30078125, "learning_rate": 0.001119912401905525, "loss": 1.9959, "step": 18612 }, { "epoch": 0.49949012451696007, "grad_norm": 0.30078125, "learning_rate": 0.0011198983743657567, "loss": 1.9446, "step": 18613 }, { "epoch": 0.499516960068699, "grad_norm": 0.310546875, "learning_rate": 0.0011198843456854848, "loss": 2.0341, "step": 18614 }, { "epoch": 0.49954379562043794, "grad_norm": 0.30078125, "learning_rate": 0.0011198703158647396, "loss": 1.9871, "step": 18615 }, { "epoch": 0.4995706311721769, "grad_norm": 0.296875, "learning_rate": 0.0011198562849035515, "loss": 1.9338, "step": 18616 }, { "epoch": 0.49959746672391586, "grad_norm": 0.294921875, "learning_rate": 0.001119842252801952, "loss": 1.9088, "step": 18617 }, { "epoch": 0.4996243022756548, "grad_norm": 0.30078125, "learning_rate": 0.0011198282195599713, "loss": 1.9374, "step": 18618 }, { "epoch": 0.49965113782739373, "grad_norm": 0.3046875, "learning_rate": 0.0011198141851776404, "loss": 1.9779, "step": 18619 }, { "epoch": 0.49967797337913267, "grad_norm": 0.302734375, "learning_rate": 0.0011198001496549904, "loss": 1.976, "step": 18620 }, { "epoch": 0.4997048089308716, "grad_norm": 0.3046875, "learning_rate": 0.0011197861129920515, "loss": 1.9779, "step": 18621 }, { "epoch": 0.49973164448261054, "grad_norm": 0.30859375, "learning_rate": 0.0011197720751888551, "loss": 1.9988, "step": 18622 }, { "epoch": 0.49975848003434953, "grad_norm": 0.318359375, "learning_rate": 0.0011197580362454313, "loss": 2.0647, "step": 18623 }, { "epoch": 0.49978531558608846, "grad_norm": 0.3046875, "learning_rate": 0.0011197439961618118, "loss": 1.9979, "step": 18624 }, { "epoch": 0.4998121511378274, "grad_norm": 0.31640625, "learning_rate": 0.0011197299549380263, "loss": 2.1231, "step": 18625 }, { "epoch": 0.49983898668956633, "grad_norm": 0.314453125, "learning_rate": 0.0011197159125741064, "loss": 2.0345, "step": 18626 }, { "epoch": 0.49986582224130527, "grad_norm": 0.3125, "learning_rate": 0.001119701869070083, "loss": 2.002, "step": 18627 }, { "epoch": 0.4998926577930442, "grad_norm": 0.306640625, "learning_rate": 0.001119687824425986, "loss": 1.989, "step": 18628 }, { "epoch": 0.4999194933447832, "grad_norm": 0.3046875, "learning_rate": 0.0011196737786418472, "loss": 2.0178, "step": 18629 }, { "epoch": 0.49994632889652213, "grad_norm": 0.30078125, "learning_rate": 0.0011196597317176971, "loss": 2.0174, "step": 18630 }, { "epoch": 0.49997316444826106, "grad_norm": 0.294921875, "learning_rate": 0.0011196456836535663, "loss": 1.918, "step": 18631 }, { "epoch": 0.5, "grad_norm": 0.306640625, "learning_rate": 0.0011196316344494858, "loss": 2.008, "step": 18632 }, { "epoch": 0.500026835551739, "grad_norm": 0.302734375, "learning_rate": 0.0011196175841054863, "loss": 2.0, "step": 18633 }, { "epoch": 0.5000536711034779, "grad_norm": 0.3046875, "learning_rate": 0.0011196035326215988, "loss": 1.9554, "step": 18634 }, { "epoch": 0.5000805066552169, "grad_norm": 0.302734375, "learning_rate": 0.0011195894799978538, "loss": 1.9795, "step": 18635 }, { "epoch": 0.5001073422069557, "grad_norm": 0.30078125, "learning_rate": 0.0011195754262342825, "loss": 1.9349, "step": 18636 }, { "epoch": 0.5001341777586947, "grad_norm": 0.30859375, "learning_rate": 0.0011195613713309155, "loss": 1.8989, "step": 18637 }, { "epoch": 0.5001610133104336, "grad_norm": 0.314453125, "learning_rate": 0.001119547315287784, "loss": 2.0268, "step": 18638 }, { "epoch": 0.5001878488621726, "grad_norm": 0.3046875, "learning_rate": 0.0011195332581049181, "loss": 1.9525, "step": 18639 }, { "epoch": 0.5002146844139116, "grad_norm": 0.30078125, "learning_rate": 0.0011195191997823493, "loss": 1.9126, "step": 18640 }, { "epoch": 0.5002415199656505, "grad_norm": 0.30078125, "learning_rate": 0.0011195051403201084, "loss": 1.8734, "step": 18641 }, { "epoch": 0.5002683555173895, "grad_norm": 0.310546875, "learning_rate": 0.0011194910797182257, "loss": 1.9998, "step": 18642 }, { "epoch": 0.5002951910691283, "grad_norm": 0.306640625, "learning_rate": 0.0011194770179767327, "loss": 1.9718, "step": 18643 }, { "epoch": 0.5003220266208673, "grad_norm": 0.314453125, "learning_rate": 0.0011194629550956598, "loss": 2.0507, "step": 18644 }, { "epoch": 0.5003488621726063, "grad_norm": 0.318359375, "learning_rate": 0.001119448891075038, "loss": 1.9594, "step": 18645 }, { "epoch": 0.5003756977243452, "grad_norm": 0.310546875, "learning_rate": 0.0011194348259148982, "loss": 1.9323, "step": 18646 }, { "epoch": 0.5004025332760842, "grad_norm": 0.310546875, "learning_rate": 0.0011194207596152714, "loss": 1.9786, "step": 18647 }, { "epoch": 0.5004293688278231, "grad_norm": 0.296875, "learning_rate": 0.001119406692176188, "loss": 1.8754, "step": 18648 }, { "epoch": 0.5004562043795621, "grad_norm": 0.310546875, "learning_rate": 0.0011193926235976792, "loss": 1.9315, "step": 18649 }, { "epoch": 0.5004830399313009, "grad_norm": 0.31640625, "learning_rate": 0.0011193785538797757, "loss": 1.9028, "step": 18650 }, { "epoch": 0.5005098754830399, "grad_norm": 0.298828125, "learning_rate": 0.0011193644830225086, "loss": 1.8417, "step": 18651 }, { "epoch": 0.5005367110347789, "grad_norm": 0.3203125, "learning_rate": 0.0011193504110259087, "loss": 1.9557, "step": 18652 }, { "epoch": 0.5005635465865178, "grad_norm": 0.30859375, "learning_rate": 0.0011193363378900066, "loss": 1.933, "step": 18653 }, { "epoch": 0.5005903821382568, "grad_norm": 0.310546875, "learning_rate": 0.0011193222636148334, "loss": 1.944, "step": 18654 }, { "epoch": 0.5006172176899957, "grad_norm": 0.318359375, "learning_rate": 0.00111930818820042, "loss": 1.949, "step": 18655 }, { "epoch": 0.5006440532417347, "grad_norm": 0.310546875, "learning_rate": 0.0011192941116467973, "loss": 1.9715, "step": 18656 }, { "epoch": 0.5006708887934735, "grad_norm": 0.31640625, "learning_rate": 0.001119280033953996, "loss": 2.0103, "step": 18657 }, { "epoch": 0.5006977243452125, "grad_norm": 0.322265625, "learning_rate": 0.0011192659551220471, "loss": 1.7907, "step": 18658 }, { "epoch": 0.5007245598969515, "grad_norm": 0.302734375, "learning_rate": 0.0011192518751509813, "loss": 1.8822, "step": 18659 }, { "epoch": 0.5007513954486904, "grad_norm": 0.322265625, "learning_rate": 0.00111923779404083, "loss": 1.9673, "step": 18660 }, { "epoch": 0.5007782310004294, "grad_norm": 0.3125, "learning_rate": 0.0011192237117916234, "loss": 1.9048, "step": 18661 }, { "epoch": 0.5008050665521683, "grad_norm": 0.3046875, "learning_rate": 0.001119209628403393, "loss": 1.8482, "step": 18662 }, { "epoch": 0.5008319021039073, "grad_norm": 0.310546875, "learning_rate": 0.001119195543876169, "loss": 1.9172, "step": 18663 }, { "epoch": 0.5008587376556461, "grad_norm": 0.30859375, "learning_rate": 0.0011191814582099833, "loss": 1.8791, "step": 18664 }, { "epoch": 0.5008855732073851, "grad_norm": 0.3125, "learning_rate": 0.0011191673714048658, "loss": 1.944, "step": 18665 }, { "epoch": 0.5009124087591241, "grad_norm": 0.31640625, "learning_rate": 0.001119153283460848, "loss": 1.8793, "step": 18666 }, { "epoch": 0.500939244310863, "grad_norm": 0.3125, "learning_rate": 0.0011191391943779607, "loss": 1.8734, "step": 18667 }, { "epoch": 0.500966079862602, "grad_norm": 0.310546875, "learning_rate": 0.0011191251041562345, "loss": 1.9155, "step": 18668 }, { "epoch": 0.5009929154143409, "grad_norm": 0.310546875, "learning_rate": 0.0011191110127957008, "loss": 1.9709, "step": 18669 }, { "epoch": 0.5010197509660799, "grad_norm": 0.3125, "learning_rate": 0.00111909692029639, "loss": 1.9313, "step": 18670 }, { "epoch": 0.5010465865178189, "grad_norm": 0.314453125, "learning_rate": 0.0011190828266583338, "loss": 1.9687, "step": 18671 }, { "epoch": 0.5010734220695577, "grad_norm": 0.306640625, "learning_rate": 0.001119068731881562, "loss": 1.9329, "step": 18672 }, { "epoch": 0.5011002576212967, "grad_norm": 0.306640625, "learning_rate": 0.0011190546359661064, "loss": 1.81, "step": 18673 }, { "epoch": 0.5011270931730356, "grad_norm": 0.3203125, "learning_rate": 0.0011190405389119977, "loss": 1.9561, "step": 18674 }, { "epoch": 0.5011539287247746, "grad_norm": 0.3046875, "learning_rate": 0.0011190264407192666, "loss": 1.7839, "step": 18675 }, { "epoch": 0.5011807642765135, "grad_norm": 0.3203125, "learning_rate": 0.0011190123413879443, "loss": 1.9275, "step": 18676 }, { "epoch": 0.5012075998282525, "grad_norm": 0.328125, "learning_rate": 0.0011189982409180615, "loss": 1.9414, "step": 18677 }, { "epoch": 0.5012344353799915, "grad_norm": 0.326171875, "learning_rate": 0.0011189841393096493, "loss": 1.9129, "step": 18678 }, { "epoch": 0.5012612709317303, "grad_norm": 0.314453125, "learning_rate": 0.0011189700365627385, "loss": 1.9243, "step": 18679 }, { "epoch": 0.5012881064834693, "grad_norm": 0.318359375, "learning_rate": 0.0011189559326773603, "loss": 1.9331, "step": 18680 }, { "epoch": 0.5013149420352082, "grad_norm": 0.302734375, "learning_rate": 0.0011189418276535456, "loss": 1.8728, "step": 18681 }, { "epoch": 0.5013417775869472, "grad_norm": 0.30859375, "learning_rate": 0.0011189277214913248, "loss": 1.8667, "step": 18682 }, { "epoch": 0.5013686131386861, "grad_norm": 0.3125, "learning_rate": 0.0011189136141907296, "loss": 1.9223, "step": 18683 }, { "epoch": 0.5013954486904251, "grad_norm": 0.318359375, "learning_rate": 0.0011188995057517903, "loss": 1.9423, "step": 18684 }, { "epoch": 0.5014222842421641, "grad_norm": 0.314453125, "learning_rate": 0.0011188853961745383, "loss": 1.8673, "step": 18685 }, { "epoch": 0.5014491197939029, "grad_norm": 0.310546875, "learning_rate": 0.0011188712854590043, "loss": 1.9654, "step": 18686 }, { "epoch": 0.5014759553456419, "grad_norm": 0.314453125, "learning_rate": 0.0011188571736052196, "loss": 1.8967, "step": 18687 }, { "epoch": 0.5015027908973808, "grad_norm": 0.310546875, "learning_rate": 0.0011188430606132145, "loss": 1.8143, "step": 18688 }, { "epoch": 0.5015296264491198, "grad_norm": 0.3046875, "learning_rate": 0.0011188289464830207, "loss": 1.8141, "step": 18689 }, { "epoch": 0.5015564620008587, "grad_norm": 0.32421875, "learning_rate": 0.0011188148312146688, "loss": 1.9427, "step": 18690 }, { "epoch": 0.5015832975525977, "grad_norm": 0.31640625, "learning_rate": 0.0011188007148081897, "loss": 1.9289, "step": 18691 }, { "epoch": 0.5016101331043367, "grad_norm": 0.3046875, "learning_rate": 0.0011187865972636145, "loss": 1.8308, "step": 18692 }, { "epoch": 0.5016369686560755, "grad_norm": 0.314453125, "learning_rate": 0.0011187724785809742, "loss": 1.8755, "step": 18693 }, { "epoch": 0.5016638042078145, "grad_norm": 0.318359375, "learning_rate": 0.0011187583587602995, "loss": 1.893, "step": 18694 }, { "epoch": 0.5016906397595534, "grad_norm": 0.3046875, "learning_rate": 0.001118744237801622, "loss": 1.8672, "step": 18695 }, { "epoch": 0.5017174753112924, "grad_norm": 0.31640625, "learning_rate": 0.001118730115704972, "loss": 1.9117, "step": 18696 }, { "epoch": 0.5017443108630314, "grad_norm": 0.30859375, "learning_rate": 0.0011187159924703808, "loss": 1.875, "step": 18697 }, { "epoch": 0.5017711464147703, "grad_norm": 0.3125, "learning_rate": 0.0011187018680978793, "loss": 1.8962, "step": 18698 }, { "epoch": 0.5017979819665093, "grad_norm": 0.37109375, "learning_rate": 0.0011186877425874984, "loss": 2.1589, "step": 18699 }, { "epoch": 0.5018248175182481, "grad_norm": 0.40234375, "learning_rate": 0.0011186736159392691, "loss": 2.2166, "step": 18700 }, { "epoch": 0.5018516530699871, "grad_norm": 0.345703125, "learning_rate": 0.0011186594881532226, "loss": 2.1149, "step": 18701 }, { "epoch": 0.501878488621726, "grad_norm": 0.345703125, "learning_rate": 0.0011186453592293899, "loss": 2.1912, "step": 18702 }, { "epoch": 0.501905324173465, "grad_norm": 0.33984375, "learning_rate": 0.0011186312291678018, "loss": 2.1672, "step": 18703 }, { "epoch": 0.501932159725204, "grad_norm": 0.3359375, "learning_rate": 0.0011186170979684894, "loss": 2.1872, "step": 18704 }, { "epoch": 0.5019589952769429, "grad_norm": 0.33203125, "learning_rate": 0.0011186029656314836, "loss": 2.1786, "step": 18705 }, { "epoch": 0.5019858308286819, "grad_norm": 0.306640625, "learning_rate": 0.0011185888321568155, "loss": 2.0404, "step": 18706 }, { "epoch": 0.5020126663804207, "grad_norm": 0.310546875, "learning_rate": 0.001118574697544516, "loss": 2.1122, "step": 18707 }, { "epoch": 0.5020395019321597, "grad_norm": 0.310546875, "learning_rate": 0.0011185605617946162, "loss": 2.0375, "step": 18708 }, { "epoch": 0.5020663374838986, "grad_norm": 0.314453125, "learning_rate": 0.0011185464249071472, "loss": 2.1224, "step": 18709 }, { "epoch": 0.5020931730356376, "grad_norm": 0.310546875, "learning_rate": 0.0011185322868821398, "loss": 2.0744, "step": 18710 }, { "epoch": 0.5021200085873766, "grad_norm": 0.314453125, "learning_rate": 0.0011185181477196251, "loss": 2.1217, "step": 18711 }, { "epoch": 0.5021468441391155, "grad_norm": 0.30859375, "learning_rate": 0.0011185040074196341, "loss": 2.067, "step": 18712 }, { "epoch": 0.5021736796908545, "grad_norm": 0.314453125, "learning_rate": 0.0011184898659821981, "loss": 2.0568, "step": 18713 }, { "epoch": 0.5022005152425933, "grad_norm": 0.306640625, "learning_rate": 0.0011184757234073478, "loss": 2.0643, "step": 18714 }, { "epoch": 0.5022273507943323, "grad_norm": 0.314453125, "learning_rate": 0.0011184615796951142, "loss": 2.0813, "step": 18715 }, { "epoch": 0.5022541863460713, "grad_norm": 0.3125, "learning_rate": 0.0011184474348455284, "loss": 2.1123, "step": 18716 }, { "epoch": 0.5022810218978102, "grad_norm": 0.310546875, "learning_rate": 0.0011184332888586215, "loss": 2.1053, "step": 18717 }, { "epoch": 0.5023078574495492, "grad_norm": 0.310546875, "learning_rate": 0.0011184191417344246, "loss": 2.0551, "step": 18718 }, { "epoch": 0.5023346930012881, "grad_norm": 0.3125, "learning_rate": 0.0011184049934729683, "loss": 2.071, "step": 18719 }, { "epoch": 0.5023615285530271, "grad_norm": 0.310546875, "learning_rate": 0.0011183908440742843, "loss": 2.0856, "step": 18720 }, { "epoch": 0.502388364104766, "grad_norm": 0.3046875, "learning_rate": 0.0011183766935384033, "loss": 2.0309, "step": 18721 }, { "epoch": 0.5024151996565049, "grad_norm": 0.306640625, "learning_rate": 0.001118362541865356, "loss": 2.1081, "step": 18722 }, { "epoch": 0.5024420352082439, "grad_norm": 0.302734375, "learning_rate": 0.001118348389055174, "loss": 2.0344, "step": 18723 }, { "epoch": 0.5024688707599828, "grad_norm": 0.314453125, "learning_rate": 0.0011183342351078884, "loss": 2.1171, "step": 18724 }, { "epoch": 0.5024957063117218, "grad_norm": 0.306640625, "learning_rate": 0.0011183200800235296, "loss": 2.086, "step": 18725 }, { "epoch": 0.5025225418634607, "grad_norm": 0.306640625, "learning_rate": 0.001118305923802129, "loss": 2.0707, "step": 18726 }, { "epoch": 0.5025493774151997, "grad_norm": 0.3046875, "learning_rate": 0.001118291766443718, "loss": 2.0457, "step": 18727 }, { "epoch": 0.5025762129669386, "grad_norm": 0.298828125, "learning_rate": 0.001118277607948327, "loss": 2.0233, "step": 18728 }, { "epoch": 0.5026030485186775, "grad_norm": 0.291015625, "learning_rate": 0.0011182634483159878, "loss": 1.8935, "step": 18729 }, { "epoch": 0.5026298840704165, "grad_norm": 0.3125, "learning_rate": 0.0011182492875467308, "loss": 2.1277, "step": 18730 }, { "epoch": 0.5026567196221554, "grad_norm": 0.298828125, "learning_rate": 0.0011182351256405875, "loss": 2.0252, "step": 18731 }, { "epoch": 0.5026835551738944, "grad_norm": 0.318359375, "learning_rate": 0.0011182209625975887, "loss": 2.0905, "step": 18732 }, { "epoch": 0.5027103907256333, "grad_norm": 0.30078125, "learning_rate": 0.0011182067984177656, "loss": 1.9637, "step": 18733 }, { "epoch": 0.5027372262773723, "grad_norm": 0.302734375, "learning_rate": 0.0011181926331011493, "loss": 1.9955, "step": 18734 }, { "epoch": 0.5027640618291112, "grad_norm": 0.30859375, "learning_rate": 0.0011181784666477706, "loss": 2.0536, "step": 18735 }, { "epoch": 0.5027908973808501, "grad_norm": 0.314453125, "learning_rate": 0.001118164299057661, "loss": 2.1068, "step": 18736 }, { "epoch": 0.5028177329325891, "grad_norm": 0.2890625, "learning_rate": 0.0011181501303308512, "loss": 1.9579, "step": 18737 }, { "epoch": 0.502844568484328, "grad_norm": 0.306640625, "learning_rate": 0.0011181359604673727, "loss": 2.0326, "step": 18738 }, { "epoch": 0.502871404036067, "grad_norm": 0.302734375, "learning_rate": 0.0011181217894672562, "loss": 1.9933, "step": 18739 }, { "epoch": 0.5028982395878059, "grad_norm": 0.302734375, "learning_rate": 0.001118107617330533, "loss": 2.056, "step": 18740 }, { "epoch": 0.5029250751395449, "grad_norm": 0.32421875, "learning_rate": 0.001118093444057234, "loss": 2.0812, "step": 18741 }, { "epoch": 0.5029519106912839, "grad_norm": 0.30078125, "learning_rate": 0.0011180792696473905, "loss": 1.9859, "step": 18742 }, { "epoch": 0.5029787462430227, "grad_norm": 0.296875, "learning_rate": 0.0011180650941010335, "loss": 1.9762, "step": 18743 }, { "epoch": 0.5030055817947617, "grad_norm": 0.294921875, "learning_rate": 0.001118050917418194, "loss": 1.9469, "step": 18744 }, { "epoch": 0.5030324173465006, "grad_norm": 0.298828125, "learning_rate": 0.0011180367395989034, "loss": 2.0424, "step": 18745 }, { "epoch": 0.5030592528982396, "grad_norm": 0.30078125, "learning_rate": 0.0011180225606431924, "loss": 2.0033, "step": 18746 }, { "epoch": 0.5030860884499785, "grad_norm": 0.30859375, "learning_rate": 0.0011180083805510924, "loss": 2.0487, "step": 18747 }, { "epoch": 0.5031129240017175, "grad_norm": 0.306640625, "learning_rate": 0.0011179941993226343, "loss": 1.91, "step": 18748 }, { "epoch": 0.5031397595534565, "grad_norm": 0.3046875, "learning_rate": 0.0011179800169578496, "loss": 1.9504, "step": 18749 }, { "epoch": 0.5031665951051953, "grad_norm": 0.3125, "learning_rate": 0.0011179658334567687, "loss": 2.0229, "step": 18750 }, { "epoch": 0.5031934306569343, "grad_norm": 0.318359375, "learning_rate": 0.0011179516488194234, "loss": 2.0161, "step": 18751 }, { "epoch": 0.5032202662086732, "grad_norm": 0.30078125, "learning_rate": 0.0011179374630458448, "loss": 1.8598, "step": 18752 }, { "epoch": 0.5032471017604122, "grad_norm": 0.310546875, "learning_rate": 0.0011179232761360633, "loss": 1.9522, "step": 18753 }, { "epoch": 0.5032739373121511, "grad_norm": 0.31640625, "learning_rate": 0.001117909088090111, "loss": 2.0177, "step": 18754 }, { "epoch": 0.5033007728638901, "grad_norm": 0.296875, "learning_rate": 0.001117894898908018, "loss": 1.9242, "step": 18755 }, { "epoch": 0.5033276084156291, "grad_norm": 0.302734375, "learning_rate": 0.0011178807085898165, "loss": 1.9102, "step": 18756 }, { "epoch": 0.503354443967368, "grad_norm": 0.302734375, "learning_rate": 0.0011178665171355368, "loss": 1.922, "step": 18757 }, { "epoch": 0.5033812795191069, "grad_norm": 0.30078125, "learning_rate": 0.0011178523245452105, "loss": 1.987, "step": 18758 }, { "epoch": 0.5034081150708458, "grad_norm": 0.30078125, "learning_rate": 0.0011178381308188682, "loss": 1.9343, "step": 18759 }, { "epoch": 0.5034349506225848, "grad_norm": 0.30078125, "learning_rate": 0.0011178239359565416, "loss": 1.9969, "step": 18760 }, { "epoch": 0.5034617861743237, "grad_norm": 0.298828125, "learning_rate": 0.0011178097399582618, "loss": 1.9091, "step": 18761 }, { "epoch": 0.5034886217260627, "grad_norm": 0.30078125, "learning_rate": 0.0011177955428240596, "loss": 1.9894, "step": 18762 }, { "epoch": 0.5035154572778017, "grad_norm": 0.310546875, "learning_rate": 0.0011177813445539663, "loss": 1.993, "step": 18763 }, { "epoch": 0.5035422928295405, "grad_norm": 0.310546875, "learning_rate": 0.001117767145148013, "loss": 1.9561, "step": 18764 }, { "epoch": 0.5035691283812795, "grad_norm": 0.31640625, "learning_rate": 0.001117752944606231, "loss": 2.0126, "step": 18765 }, { "epoch": 0.5035959639330184, "grad_norm": 0.318359375, "learning_rate": 0.0011177387429286514, "loss": 2.047, "step": 18766 }, { "epoch": 0.5036227994847574, "grad_norm": 0.31640625, "learning_rate": 0.0011177245401153052, "loss": 1.979, "step": 18767 }, { "epoch": 0.5036496350364964, "grad_norm": 0.306640625, "learning_rate": 0.0011177103361662238, "loss": 1.9817, "step": 18768 }, { "epoch": 0.5036764705882353, "grad_norm": 0.318359375, "learning_rate": 0.0011176961310814383, "loss": 2.0202, "step": 18769 }, { "epoch": 0.5037033061399743, "grad_norm": 0.326171875, "learning_rate": 0.0011176819248609796, "loss": 2.0976, "step": 18770 }, { "epoch": 0.5037301416917132, "grad_norm": 0.302734375, "learning_rate": 0.0011176677175048792, "loss": 1.9143, "step": 18771 }, { "epoch": 0.5037569772434521, "grad_norm": 0.296875, "learning_rate": 0.001117653509013168, "loss": 1.9758, "step": 18772 }, { "epoch": 0.503783812795191, "grad_norm": 0.310546875, "learning_rate": 0.0011176392993858775, "loss": 2.0731, "step": 18773 }, { "epoch": 0.50381064834693, "grad_norm": 0.314453125, "learning_rate": 0.0011176250886230385, "loss": 2.0456, "step": 18774 }, { "epoch": 0.503837483898669, "grad_norm": 0.318359375, "learning_rate": 0.0011176108767246822, "loss": 1.9848, "step": 18775 }, { "epoch": 0.5038643194504079, "grad_norm": 0.298828125, "learning_rate": 0.0011175966636908403, "loss": 1.9399, "step": 18776 }, { "epoch": 0.5038911550021469, "grad_norm": 0.306640625, "learning_rate": 0.0011175824495215432, "loss": 1.9483, "step": 18777 }, { "epoch": 0.5039179905538858, "grad_norm": 0.302734375, "learning_rate": 0.0011175682342168228, "loss": 1.931, "step": 18778 }, { "epoch": 0.5039448261056247, "grad_norm": 0.302734375, "learning_rate": 0.0011175540177767098, "loss": 1.8439, "step": 18779 }, { "epoch": 0.5039716616573636, "grad_norm": 0.314453125, "learning_rate": 0.0011175398002012356, "loss": 2.001, "step": 18780 }, { "epoch": 0.5039984972091026, "grad_norm": 0.302734375, "learning_rate": 0.0011175255814904314, "loss": 1.8657, "step": 18781 }, { "epoch": 0.5040253327608416, "grad_norm": 0.30078125, "learning_rate": 0.0011175113616443283, "loss": 1.8759, "step": 18782 }, { "epoch": 0.5040521683125805, "grad_norm": 0.3125, "learning_rate": 0.0011174971406629575, "loss": 2.0411, "step": 18783 }, { "epoch": 0.5040790038643195, "grad_norm": 0.310546875, "learning_rate": 0.0011174829185463502, "loss": 1.9747, "step": 18784 }, { "epoch": 0.5041058394160584, "grad_norm": 0.302734375, "learning_rate": 0.0011174686952945376, "loss": 1.9248, "step": 18785 }, { "epoch": 0.5041326749677973, "grad_norm": 0.3125, "learning_rate": 0.0011174544709075512, "loss": 1.9831, "step": 18786 }, { "epoch": 0.5041595105195363, "grad_norm": 0.310546875, "learning_rate": 0.0011174402453854215, "loss": 1.9128, "step": 18787 }, { "epoch": 0.5041863460712752, "grad_norm": 0.310546875, "learning_rate": 0.0011174260187281804, "loss": 1.9031, "step": 18788 }, { "epoch": 0.5042131816230142, "grad_norm": 0.30078125, "learning_rate": 0.001117411790935859, "loss": 1.9169, "step": 18789 }, { "epoch": 0.5042400171747531, "grad_norm": 0.30078125, "learning_rate": 0.001117397562008488, "loss": 1.9707, "step": 18790 }, { "epoch": 0.5042668527264921, "grad_norm": 0.302734375, "learning_rate": 0.001117383331946099, "loss": 1.8521, "step": 18791 }, { "epoch": 0.504293688278231, "grad_norm": 0.30859375, "learning_rate": 0.0011173691007487234, "loss": 1.9293, "step": 18792 }, { "epoch": 0.50432052382997, "grad_norm": 0.326171875, "learning_rate": 0.001117354868416392, "loss": 2.0499, "step": 18793 }, { "epoch": 0.5043473593817089, "grad_norm": 0.310546875, "learning_rate": 0.0011173406349491363, "loss": 1.9378, "step": 18794 }, { "epoch": 0.5043741949334478, "grad_norm": 0.318359375, "learning_rate": 0.0011173264003469876, "loss": 2.024, "step": 18795 }, { "epoch": 0.5044010304851868, "grad_norm": 0.310546875, "learning_rate": 0.0011173121646099768, "loss": 1.8307, "step": 18796 }, { "epoch": 0.5044278660369257, "grad_norm": 0.314453125, "learning_rate": 0.0011172979277381354, "loss": 2.0496, "step": 18797 }, { "epoch": 0.5044547015886647, "grad_norm": 0.298828125, "learning_rate": 0.0011172836897314945, "loss": 1.9803, "step": 18798 }, { "epoch": 0.5044815371404036, "grad_norm": 0.306640625, "learning_rate": 0.0011172694505900852, "loss": 1.9362, "step": 18799 }, { "epoch": 0.5045083726921425, "grad_norm": 0.3125, "learning_rate": 0.0011172552103139392, "loss": 1.9966, "step": 18800 }, { "epoch": 0.5045352082438815, "grad_norm": 0.30859375, "learning_rate": 0.0011172409689030873, "loss": 1.9478, "step": 18801 }, { "epoch": 0.5045620437956204, "grad_norm": 0.310546875, "learning_rate": 0.001117226726357561, "loss": 1.9403, "step": 18802 }, { "epoch": 0.5045888793473594, "grad_norm": 0.3125, "learning_rate": 0.0011172124826773914, "loss": 1.8409, "step": 18803 }, { "epoch": 0.5046157148990983, "grad_norm": 0.302734375, "learning_rate": 0.0011171982378626096, "loss": 1.8352, "step": 18804 }, { "epoch": 0.5046425504508373, "grad_norm": 0.30078125, "learning_rate": 0.0011171839919132472, "loss": 1.8108, "step": 18805 }, { "epoch": 0.5046693860025762, "grad_norm": 0.3125, "learning_rate": 0.0011171697448293352, "loss": 1.896, "step": 18806 }, { "epoch": 0.5046962215543151, "grad_norm": 0.322265625, "learning_rate": 0.001117155496610905, "loss": 2.0022, "step": 18807 }, { "epoch": 0.5047230571060541, "grad_norm": 0.3125, "learning_rate": 0.0011171412472579876, "loss": 1.8994, "step": 18808 }, { "epoch": 0.504749892657793, "grad_norm": 0.3125, "learning_rate": 0.0011171269967706148, "loss": 1.8477, "step": 18809 }, { "epoch": 0.504776728209532, "grad_norm": 0.3046875, "learning_rate": 0.0011171127451488172, "loss": 1.8916, "step": 18810 }, { "epoch": 0.5048035637612709, "grad_norm": 0.318359375, "learning_rate": 0.0011170984923926264, "loss": 1.9642, "step": 18811 }, { "epoch": 0.5048303993130099, "grad_norm": 0.322265625, "learning_rate": 0.0011170842385020737, "loss": 2.0405, "step": 18812 }, { "epoch": 0.5048572348647489, "grad_norm": 0.310546875, "learning_rate": 0.0011170699834771906, "loss": 1.914, "step": 18813 }, { "epoch": 0.5048840704164878, "grad_norm": 0.30859375, "learning_rate": 0.0011170557273180077, "loss": 1.8643, "step": 18814 }, { "epoch": 0.5049109059682267, "grad_norm": 0.31640625, "learning_rate": 0.0011170414700245569, "loss": 1.9243, "step": 18815 }, { "epoch": 0.5049377415199656, "grad_norm": 0.310546875, "learning_rate": 0.001117027211596869, "loss": 1.8636, "step": 18816 }, { "epoch": 0.5049645770717046, "grad_norm": 0.318359375, "learning_rate": 0.0011170129520349757, "loss": 1.9303, "step": 18817 }, { "epoch": 0.5049914126234435, "grad_norm": 0.3125, "learning_rate": 0.001116998691338908, "loss": 2.0014, "step": 18818 }, { "epoch": 0.5050182481751825, "grad_norm": 0.310546875, "learning_rate": 0.0011169844295086974, "loss": 1.9267, "step": 18819 }, { "epoch": 0.5050450837269215, "grad_norm": 0.3203125, "learning_rate": 0.001116970166544375, "loss": 1.8935, "step": 18820 }, { "epoch": 0.5050719192786604, "grad_norm": 0.322265625, "learning_rate": 0.0011169559024459719, "loss": 1.9016, "step": 18821 }, { "epoch": 0.5050987548303993, "grad_norm": 0.3046875, "learning_rate": 0.0011169416372135198, "loss": 1.8289, "step": 18822 }, { "epoch": 0.5051255903821382, "grad_norm": 0.3125, "learning_rate": 0.0011169273708470499, "loss": 1.8731, "step": 18823 }, { "epoch": 0.5051524259338772, "grad_norm": 0.328125, "learning_rate": 0.0011169131033465934, "loss": 1.9575, "step": 18824 }, { "epoch": 0.5051792614856161, "grad_norm": 0.3203125, "learning_rate": 0.0011168988347121816, "loss": 1.9572, "step": 18825 }, { "epoch": 0.5052060970373551, "grad_norm": 0.32421875, "learning_rate": 0.0011168845649438459, "loss": 1.9193, "step": 18826 }, { "epoch": 0.5052329325890941, "grad_norm": 0.326171875, "learning_rate": 0.0011168702940416173, "loss": 1.8747, "step": 18827 }, { "epoch": 0.505259768140833, "grad_norm": 0.3125, "learning_rate": 0.0011168560220055277, "loss": 1.9314, "step": 18828 }, { "epoch": 0.505286603692572, "grad_norm": 0.322265625, "learning_rate": 0.0011168417488356078, "loss": 1.9206, "step": 18829 }, { "epoch": 0.5053134392443108, "grad_norm": 0.322265625, "learning_rate": 0.0011168274745318894, "loss": 1.9548, "step": 18830 }, { "epoch": 0.5053402747960498, "grad_norm": 0.322265625, "learning_rate": 0.0011168131990944034, "loss": 1.9046, "step": 18831 }, { "epoch": 0.5053671103477888, "grad_norm": 0.318359375, "learning_rate": 0.0011167989225231813, "loss": 1.8799, "step": 18832 }, { "epoch": 0.5053939458995277, "grad_norm": 0.31640625, "learning_rate": 0.0011167846448182544, "loss": 1.8772, "step": 18833 }, { "epoch": 0.5054207814512667, "grad_norm": 0.3203125, "learning_rate": 0.001116770365979654, "loss": 1.9322, "step": 18834 }, { "epoch": 0.5054476170030056, "grad_norm": 0.328125, "learning_rate": 0.0011167560860074115, "loss": 1.9749, "step": 18835 }, { "epoch": 0.5054744525547445, "grad_norm": 0.384765625, "learning_rate": 0.001116741804901558, "loss": 2.1789, "step": 18836 }, { "epoch": 0.5055012881064834, "grad_norm": 0.365234375, "learning_rate": 0.0011167275226621253, "loss": 2.1642, "step": 18837 }, { "epoch": 0.5055281236582224, "grad_norm": 0.353515625, "learning_rate": 0.0011167132392891444, "loss": 2.1958, "step": 18838 }, { "epoch": 0.5055549592099614, "grad_norm": 0.333984375, "learning_rate": 0.0011166989547826466, "loss": 2.1406, "step": 18839 }, { "epoch": 0.5055817947617003, "grad_norm": 0.330078125, "learning_rate": 0.001116684669142663, "loss": 2.0975, "step": 18840 }, { "epoch": 0.5056086303134393, "grad_norm": 0.328125, "learning_rate": 0.0011166703823692259, "loss": 2.1442, "step": 18841 }, { "epoch": 0.5056354658651782, "grad_norm": 0.32421875, "learning_rate": 0.0011166560944623656, "loss": 2.2705, "step": 18842 }, { "epoch": 0.5056623014169171, "grad_norm": 0.314453125, "learning_rate": 0.0011166418054221138, "loss": 2.0866, "step": 18843 }, { "epoch": 0.505689136968656, "grad_norm": 0.3125, "learning_rate": 0.0011166275152485019, "loss": 2.1105, "step": 18844 }, { "epoch": 0.505715972520395, "grad_norm": 0.31640625, "learning_rate": 0.0011166132239415613, "loss": 2.1033, "step": 18845 }, { "epoch": 0.505742808072134, "grad_norm": 0.314453125, "learning_rate": 0.0011165989315013232, "loss": 2.1684, "step": 18846 }, { "epoch": 0.5057696436238729, "grad_norm": 0.30859375, "learning_rate": 0.001116584637927819, "loss": 2.0582, "step": 18847 }, { "epoch": 0.5057964791756119, "grad_norm": 0.322265625, "learning_rate": 0.0011165703432210801, "loss": 2.1168, "step": 18848 }, { "epoch": 0.5058233147273508, "grad_norm": 0.306640625, "learning_rate": 0.0011165560473811382, "loss": 2.0898, "step": 18849 }, { "epoch": 0.5058501502790897, "grad_norm": 0.296875, "learning_rate": 0.0011165417504080239, "loss": 2.0327, "step": 18850 }, { "epoch": 0.5058769858308286, "grad_norm": 0.30859375, "learning_rate": 0.0011165274523017691, "loss": 2.0143, "step": 18851 }, { "epoch": 0.5059038213825676, "grad_norm": 0.30859375, "learning_rate": 0.001116513153062405, "loss": 2.0502, "step": 18852 }, { "epoch": 0.5059306569343066, "grad_norm": 0.30859375, "learning_rate": 0.001116498852689963, "loss": 2.0145, "step": 18853 }, { "epoch": 0.5059574924860455, "grad_norm": 0.306640625, "learning_rate": 0.0011164845511844744, "loss": 2.0392, "step": 18854 }, { "epoch": 0.5059843280377845, "grad_norm": 0.302734375, "learning_rate": 0.0011164702485459707, "loss": 1.9129, "step": 18855 }, { "epoch": 0.5060111635895234, "grad_norm": 0.3125, "learning_rate": 0.0011164559447744834, "loss": 2.052, "step": 18856 }, { "epoch": 0.5060379991412624, "grad_norm": 0.326171875, "learning_rate": 0.0011164416398700436, "loss": 2.1699, "step": 18857 }, { "epoch": 0.5060648346930013, "grad_norm": 0.3046875, "learning_rate": 0.0011164273338326828, "loss": 2.0237, "step": 18858 }, { "epoch": 0.5060916702447402, "grad_norm": 0.318359375, "learning_rate": 0.0011164130266624324, "loss": 2.1196, "step": 18859 }, { "epoch": 0.5061185057964792, "grad_norm": 0.30078125, "learning_rate": 0.0011163987183593236, "loss": 2.0577, "step": 18860 }, { "epoch": 0.5061453413482181, "grad_norm": 0.3046875, "learning_rate": 0.001116384408923388, "loss": 2.0106, "step": 18861 }, { "epoch": 0.5061721768999571, "grad_norm": 0.30078125, "learning_rate": 0.0011163700983546569, "loss": 2.0174, "step": 18862 }, { "epoch": 0.506199012451696, "grad_norm": 0.296875, "learning_rate": 0.0011163557866531617, "loss": 1.9928, "step": 18863 }, { "epoch": 0.506225848003435, "grad_norm": 0.3046875, "learning_rate": 0.0011163414738189338, "loss": 2.0896, "step": 18864 }, { "epoch": 0.5062526835551739, "grad_norm": 0.306640625, "learning_rate": 0.0011163271598520048, "loss": 2.0831, "step": 18865 }, { "epoch": 0.5062795191069128, "grad_norm": 0.31640625, "learning_rate": 0.0011163128447524056, "loss": 2.0749, "step": 18866 }, { "epoch": 0.5063063546586518, "grad_norm": 0.30078125, "learning_rate": 0.0011162985285201681, "loss": 1.9462, "step": 18867 }, { "epoch": 0.5063331902103907, "grad_norm": 0.30859375, "learning_rate": 0.0011162842111553236, "loss": 2.0587, "step": 18868 }, { "epoch": 0.5063600257621297, "grad_norm": 0.30859375, "learning_rate": 0.0011162698926579034, "loss": 2.0113, "step": 18869 }, { "epoch": 0.5063868613138686, "grad_norm": 0.310546875, "learning_rate": 0.001116255573027939, "loss": 2.1034, "step": 18870 }, { "epoch": 0.5064136968656076, "grad_norm": 0.306640625, "learning_rate": 0.0011162412522654616, "loss": 2.048, "step": 18871 }, { "epoch": 0.5064405324173465, "grad_norm": 0.310546875, "learning_rate": 0.001116226930370503, "loss": 2.036, "step": 18872 }, { "epoch": 0.5064673679690854, "grad_norm": 0.30078125, "learning_rate": 0.0011162126073430941, "loss": 1.9545, "step": 18873 }, { "epoch": 0.5064942035208244, "grad_norm": 0.298828125, "learning_rate": 0.0011161982831832669, "loss": 1.9677, "step": 18874 }, { "epoch": 0.5065210390725633, "grad_norm": 0.310546875, "learning_rate": 0.0011161839578910523, "loss": 2.042, "step": 18875 }, { "epoch": 0.5065478746243023, "grad_norm": 0.296875, "learning_rate": 0.001116169631466482, "loss": 1.9564, "step": 18876 }, { "epoch": 0.5065747101760412, "grad_norm": 0.32421875, "learning_rate": 0.0011161553039095876, "loss": 2.1816, "step": 18877 }, { "epoch": 0.5066015457277802, "grad_norm": 0.306640625, "learning_rate": 0.0011161409752204002, "loss": 2.0394, "step": 18878 }, { "epoch": 0.5066283812795191, "grad_norm": 0.3046875, "learning_rate": 0.0011161266453989511, "loss": 2.0483, "step": 18879 }, { "epoch": 0.506655216831258, "grad_norm": 0.3046875, "learning_rate": 0.0011161123144452722, "loss": 1.9643, "step": 18880 }, { "epoch": 0.506682052382997, "grad_norm": 0.30859375, "learning_rate": 0.0011160979823593947, "loss": 1.9754, "step": 18881 }, { "epoch": 0.5067088879347359, "grad_norm": 0.30078125, "learning_rate": 0.0011160836491413502, "loss": 1.9331, "step": 18882 }, { "epoch": 0.5067357234864749, "grad_norm": 0.306640625, "learning_rate": 0.0011160693147911698, "loss": 1.9492, "step": 18883 }, { "epoch": 0.5067625590382139, "grad_norm": 0.302734375, "learning_rate": 0.0011160549793088853, "loss": 1.965, "step": 18884 }, { "epoch": 0.5067893945899528, "grad_norm": 0.310546875, "learning_rate": 0.001116040642694528, "loss": 2.0472, "step": 18885 }, { "epoch": 0.5068162301416917, "grad_norm": 0.302734375, "learning_rate": 0.0011160263049481292, "loss": 1.9662, "step": 18886 }, { "epoch": 0.5068430656934306, "grad_norm": 0.3046875, "learning_rate": 0.0011160119660697206, "loss": 1.9539, "step": 18887 }, { "epoch": 0.5068699012451696, "grad_norm": 0.298828125, "learning_rate": 0.0011159976260593336, "loss": 1.8997, "step": 18888 }, { "epoch": 0.5068967367969085, "grad_norm": 0.306640625, "learning_rate": 0.0011159832849169996, "loss": 1.9833, "step": 18889 }, { "epoch": 0.5069235723486475, "grad_norm": 0.30859375, "learning_rate": 0.00111596894264275, "loss": 2.0167, "step": 18890 }, { "epoch": 0.5069504079003865, "grad_norm": 0.294921875, "learning_rate": 0.0011159545992366164, "loss": 1.9429, "step": 18891 }, { "epoch": 0.5069772434521254, "grad_norm": 0.3203125, "learning_rate": 0.0011159402546986302, "loss": 2.0178, "step": 18892 }, { "epoch": 0.5070040790038643, "grad_norm": 0.30859375, "learning_rate": 0.001115925909028823, "loss": 1.9891, "step": 18893 }, { "epoch": 0.5070309145556032, "grad_norm": 0.318359375, "learning_rate": 0.001115911562227226, "loss": 2.0352, "step": 18894 }, { "epoch": 0.5070577501073422, "grad_norm": 0.31640625, "learning_rate": 0.0011158972142938707, "loss": 2.0869, "step": 18895 }, { "epoch": 0.5070845856590811, "grad_norm": 0.3046875, "learning_rate": 0.001115882865228789, "loss": 1.8649, "step": 18896 }, { "epoch": 0.5071114212108201, "grad_norm": 0.298828125, "learning_rate": 0.0011158685150320117, "loss": 1.8919, "step": 18897 }, { "epoch": 0.5071382567625591, "grad_norm": 0.3125, "learning_rate": 0.001115854163703571, "loss": 1.9564, "step": 18898 }, { "epoch": 0.507165092314298, "grad_norm": 0.302734375, "learning_rate": 0.001115839811243498, "loss": 1.9369, "step": 18899 }, { "epoch": 0.507191927866037, "grad_norm": 0.3125, "learning_rate": 0.001115825457651824, "loss": 1.9976, "step": 18900 }, { "epoch": 0.5072187634177758, "grad_norm": 0.31640625, "learning_rate": 0.0011158111029285806, "loss": 1.9864, "step": 18901 }, { "epoch": 0.5072455989695148, "grad_norm": 0.298828125, "learning_rate": 0.0011157967470737998, "loss": 1.919, "step": 18902 }, { "epoch": 0.5072724345212538, "grad_norm": 0.306640625, "learning_rate": 0.0011157823900875125, "loss": 1.905, "step": 18903 }, { "epoch": 0.5072992700729927, "grad_norm": 0.306640625, "learning_rate": 0.0011157680319697503, "loss": 1.9608, "step": 18904 }, { "epoch": 0.5073261056247317, "grad_norm": 0.30078125, "learning_rate": 0.0011157536727205448, "loss": 1.9139, "step": 18905 }, { "epoch": 0.5073529411764706, "grad_norm": 0.3046875, "learning_rate": 0.0011157393123399273, "loss": 1.9696, "step": 18906 }, { "epoch": 0.5073797767282096, "grad_norm": 0.3125, "learning_rate": 0.0011157249508279299, "loss": 1.9259, "step": 18907 }, { "epoch": 0.5074066122799484, "grad_norm": 0.306640625, "learning_rate": 0.0011157105881845835, "loss": 1.9628, "step": 18908 }, { "epoch": 0.5074334478316874, "grad_norm": 0.314453125, "learning_rate": 0.0011156962244099197, "loss": 1.995, "step": 18909 }, { "epoch": 0.5074602833834264, "grad_norm": 0.30078125, "learning_rate": 0.00111568185950397, "loss": 1.9382, "step": 18910 }, { "epoch": 0.5074871189351653, "grad_norm": 0.3046875, "learning_rate": 0.0011156674934667665, "loss": 1.9165, "step": 18911 }, { "epoch": 0.5075139544869043, "grad_norm": 0.30078125, "learning_rate": 0.00111565312629834, "loss": 1.9563, "step": 18912 }, { "epoch": 0.5075407900386432, "grad_norm": 0.302734375, "learning_rate": 0.0011156387579987221, "loss": 1.9496, "step": 18913 }, { "epoch": 0.5075676255903822, "grad_norm": 0.31640625, "learning_rate": 0.0011156243885679445, "loss": 1.8892, "step": 18914 }, { "epoch": 0.507594461142121, "grad_norm": 0.30859375, "learning_rate": 0.0011156100180060387, "loss": 1.9069, "step": 18915 }, { "epoch": 0.50762129669386, "grad_norm": 0.30859375, "learning_rate": 0.0011155956463130361, "loss": 1.9488, "step": 18916 }, { "epoch": 0.507648132245599, "grad_norm": 0.310546875, "learning_rate": 0.0011155812734889687, "loss": 2.0313, "step": 18917 }, { "epoch": 0.5076749677973379, "grad_norm": 0.306640625, "learning_rate": 0.0011155668995338672, "loss": 1.8634, "step": 18918 }, { "epoch": 0.5077018033490769, "grad_norm": 0.306640625, "learning_rate": 0.001115552524447764, "loss": 1.8992, "step": 18919 }, { "epoch": 0.5077286389008158, "grad_norm": 0.3046875, "learning_rate": 0.0011155381482306902, "loss": 1.9155, "step": 18920 }, { "epoch": 0.5077554744525548, "grad_norm": 0.302734375, "learning_rate": 0.0011155237708826771, "loss": 1.9176, "step": 18921 }, { "epoch": 0.5077823100042936, "grad_norm": 0.31640625, "learning_rate": 0.0011155093924037568, "loss": 1.9736, "step": 18922 }, { "epoch": 0.5078091455560326, "grad_norm": 0.298828125, "learning_rate": 0.0011154950127939605, "loss": 1.8789, "step": 18923 }, { "epoch": 0.5078359811077716, "grad_norm": 0.310546875, "learning_rate": 0.0011154806320533197, "loss": 1.937, "step": 18924 }, { "epoch": 0.5078628166595105, "grad_norm": 0.31640625, "learning_rate": 0.0011154662501818662, "loss": 1.9944, "step": 18925 }, { "epoch": 0.5078896522112495, "grad_norm": 0.3125, "learning_rate": 0.0011154518671796312, "loss": 1.9606, "step": 18926 }, { "epoch": 0.5079164877629884, "grad_norm": 0.31640625, "learning_rate": 0.0011154374830466467, "loss": 2.0016, "step": 18927 }, { "epoch": 0.5079433233147274, "grad_norm": 0.302734375, "learning_rate": 0.0011154230977829438, "loss": 1.8521, "step": 18928 }, { "epoch": 0.5079701588664663, "grad_norm": 0.3125, "learning_rate": 0.0011154087113885544, "loss": 1.9227, "step": 18929 }, { "epoch": 0.5079969944182052, "grad_norm": 0.314453125, "learning_rate": 0.0011153943238635098, "loss": 1.9517, "step": 18930 }, { "epoch": 0.5080238299699442, "grad_norm": 0.3125, "learning_rate": 0.0011153799352078416, "loss": 1.8615, "step": 18931 }, { "epoch": 0.5080506655216831, "grad_norm": 0.306640625, "learning_rate": 0.001115365545421582, "loss": 1.8708, "step": 18932 }, { "epoch": 0.5080775010734221, "grad_norm": 0.3046875, "learning_rate": 0.0011153511545047613, "loss": 1.9516, "step": 18933 }, { "epoch": 0.508104336625161, "grad_norm": 0.30859375, "learning_rate": 0.001115336762457412, "loss": 1.9207, "step": 18934 }, { "epoch": 0.5081311721769, "grad_norm": 0.310546875, "learning_rate": 0.0011153223692795659, "loss": 1.9261, "step": 18935 }, { "epoch": 0.508158007728639, "grad_norm": 0.3046875, "learning_rate": 0.0011153079749712535, "loss": 1.8516, "step": 18936 }, { "epoch": 0.5081848432803778, "grad_norm": 0.3203125, "learning_rate": 0.0011152935795325075, "loss": 2.0178, "step": 18937 }, { "epoch": 0.5082116788321168, "grad_norm": 0.310546875, "learning_rate": 0.0011152791829633587, "loss": 1.9749, "step": 18938 }, { "epoch": 0.5082385143838557, "grad_norm": 0.322265625, "learning_rate": 0.001115264785263839, "loss": 2.0261, "step": 18939 }, { "epoch": 0.5082653499355947, "grad_norm": 0.30859375, "learning_rate": 0.00111525038643398, "loss": 1.8955, "step": 18940 }, { "epoch": 0.5082921854873336, "grad_norm": 0.30859375, "learning_rate": 0.0011152359864738132, "loss": 1.9033, "step": 18941 }, { "epoch": 0.5083190210390726, "grad_norm": 0.30859375, "learning_rate": 0.0011152215853833703, "loss": 1.9228, "step": 18942 }, { "epoch": 0.5083458565908116, "grad_norm": 0.310546875, "learning_rate": 0.0011152071831626827, "loss": 1.8844, "step": 18943 }, { "epoch": 0.5083726921425504, "grad_norm": 0.30859375, "learning_rate": 0.0011151927798117823, "loss": 1.8711, "step": 18944 }, { "epoch": 0.5083995276942894, "grad_norm": 0.3203125, "learning_rate": 0.0011151783753307004, "loss": 1.9659, "step": 18945 }, { "epoch": 0.5084263632460283, "grad_norm": 0.3125, "learning_rate": 0.0011151639697194687, "loss": 1.9732, "step": 18946 }, { "epoch": 0.5084531987977673, "grad_norm": 0.302734375, "learning_rate": 0.001115149562978119, "loss": 1.7982, "step": 18947 }, { "epoch": 0.5084800343495062, "grad_norm": 0.30859375, "learning_rate": 0.0011151351551066827, "loss": 1.9168, "step": 18948 }, { "epoch": 0.5085068699012452, "grad_norm": 0.314453125, "learning_rate": 0.0011151207461051911, "loss": 1.9144, "step": 18949 }, { "epoch": 0.5085337054529842, "grad_norm": 0.3125, "learning_rate": 0.0011151063359736762, "loss": 1.8972, "step": 18950 }, { "epoch": 0.508560541004723, "grad_norm": 0.318359375, "learning_rate": 0.0011150919247121698, "loss": 1.9649, "step": 18951 }, { "epoch": 0.508587376556462, "grad_norm": 0.326171875, "learning_rate": 0.0011150775123207031, "loss": 1.938, "step": 18952 }, { "epoch": 0.5086142121082009, "grad_norm": 0.3125, "learning_rate": 0.0011150630987993078, "loss": 1.8371, "step": 18953 }, { "epoch": 0.5086410476599399, "grad_norm": 0.318359375, "learning_rate": 0.0011150486841480158, "loss": 2.0208, "step": 18954 }, { "epoch": 0.5086678832116789, "grad_norm": 0.302734375, "learning_rate": 0.0011150342683668585, "loss": 1.8072, "step": 18955 }, { "epoch": 0.5086947187634178, "grad_norm": 0.3125, "learning_rate": 0.0011150198514558673, "loss": 1.9032, "step": 18956 }, { "epoch": 0.5087215543151568, "grad_norm": 0.310546875, "learning_rate": 0.0011150054334150743, "loss": 1.8961, "step": 18957 }, { "epoch": 0.5087483898668956, "grad_norm": 0.310546875, "learning_rate": 0.0011149910142445108, "loss": 1.8915, "step": 18958 }, { "epoch": 0.5087752254186346, "grad_norm": 0.3046875, "learning_rate": 0.0011149765939442084, "loss": 1.8145, "step": 18959 }, { "epoch": 0.5088020609703735, "grad_norm": 0.310546875, "learning_rate": 0.001114962172514199, "loss": 1.863, "step": 18960 }, { "epoch": 0.5088288965221125, "grad_norm": 0.3046875, "learning_rate": 0.0011149477499545143, "loss": 1.8366, "step": 18961 }, { "epoch": 0.5088557320738515, "grad_norm": 0.328125, "learning_rate": 0.0011149333262651857, "loss": 1.95, "step": 18962 }, { "epoch": 0.5088825676255904, "grad_norm": 0.310546875, "learning_rate": 0.0011149189014462446, "loss": 1.8823, "step": 18963 }, { "epoch": 0.5089094031773294, "grad_norm": 0.31640625, "learning_rate": 0.0011149044754977231, "loss": 1.8528, "step": 18964 }, { "epoch": 0.5089362387290682, "grad_norm": 0.318359375, "learning_rate": 0.0011148900484196525, "loss": 1.8391, "step": 18965 }, { "epoch": 0.5089630742808072, "grad_norm": 0.322265625, "learning_rate": 0.0011148756202120648, "loss": 1.91, "step": 18966 }, { "epoch": 0.5089899098325461, "grad_norm": 0.328125, "learning_rate": 0.0011148611908749914, "loss": 1.8904, "step": 18967 }, { "epoch": 0.5090167453842851, "grad_norm": 0.322265625, "learning_rate": 0.001114846760408464, "loss": 1.9128, "step": 18968 }, { "epoch": 0.5090435809360241, "grad_norm": 0.328125, "learning_rate": 0.0011148323288125144, "loss": 1.9388, "step": 18969 }, { "epoch": 0.509070416487763, "grad_norm": 0.322265625, "learning_rate": 0.001114817896087174, "loss": 1.9281, "step": 18970 }, { "epoch": 0.509097252039502, "grad_norm": 0.365234375, "learning_rate": 0.0011148034622324744, "loss": 2.0912, "step": 18971 }, { "epoch": 0.5091240875912408, "grad_norm": 0.384765625, "learning_rate": 0.001114789027248448, "loss": 2.1636, "step": 18972 }, { "epoch": 0.5091509231429798, "grad_norm": 0.349609375, "learning_rate": 0.0011147745911351255, "loss": 2.0366, "step": 18973 }, { "epoch": 0.5091777586947188, "grad_norm": 0.34765625, "learning_rate": 0.0011147601538925391, "loss": 2.1334, "step": 18974 }, { "epoch": 0.5092045942464577, "grad_norm": 0.341796875, "learning_rate": 0.0011147457155207205, "loss": 2.0806, "step": 18975 }, { "epoch": 0.5092314297981967, "grad_norm": 0.33984375, "learning_rate": 0.001114731276019701, "loss": 2.1416, "step": 18976 }, { "epoch": 0.5092582653499356, "grad_norm": 0.365234375, "learning_rate": 0.0011147168353895127, "loss": 2.1891, "step": 18977 }, { "epoch": 0.5092851009016746, "grad_norm": 0.330078125, "learning_rate": 0.001114702393630187, "loss": 2.1541, "step": 18978 }, { "epoch": 0.5093119364534134, "grad_norm": 0.3203125, "learning_rate": 0.0011146879507417558, "loss": 2.1083, "step": 18979 }, { "epoch": 0.5093387720051524, "grad_norm": 0.3125, "learning_rate": 0.0011146735067242505, "loss": 2.1593, "step": 18980 }, { "epoch": 0.5093656075568914, "grad_norm": 0.302734375, "learning_rate": 0.0011146590615777028, "loss": 2.0124, "step": 18981 }, { "epoch": 0.5093924431086303, "grad_norm": 0.30078125, "learning_rate": 0.0011146446153021449, "loss": 1.9916, "step": 18982 }, { "epoch": 0.5094192786603693, "grad_norm": 0.30078125, "learning_rate": 0.001114630167897608, "loss": 2.045, "step": 18983 }, { "epoch": 0.5094461142121082, "grad_norm": 0.306640625, "learning_rate": 0.001114615719364124, "loss": 2.0879, "step": 18984 }, { "epoch": 0.5094729497638472, "grad_norm": 0.3125, "learning_rate": 0.0011146012697017243, "loss": 2.0781, "step": 18985 }, { "epoch": 0.509499785315586, "grad_norm": 0.306640625, "learning_rate": 0.0011145868189104409, "loss": 2.0692, "step": 18986 }, { "epoch": 0.509526620867325, "grad_norm": 0.3125, "learning_rate": 0.0011145723669903054, "loss": 2.0234, "step": 18987 }, { "epoch": 0.509553456419064, "grad_norm": 0.318359375, "learning_rate": 0.0011145579139413496, "loss": 2.1193, "step": 18988 }, { "epoch": 0.5095802919708029, "grad_norm": 0.3046875, "learning_rate": 0.001114543459763605, "loss": 2.007, "step": 18989 }, { "epoch": 0.5096071275225419, "grad_norm": 0.31640625, "learning_rate": 0.0011145290044571037, "loss": 2.0689, "step": 18990 }, { "epoch": 0.5096339630742808, "grad_norm": 0.30859375, "learning_rate": 0.0011145145480218768, "loss": 2.0531, "step": 18991 }, { "epoch": 0.5096607986260198, "grad_norm": 0.294921875, "learning_rate": 0.0011145000904579565, "loss": 1.9484, "step": 18992 }, { "epoch": 0.5096876341777586, "grad_norm": 0.3203125, "learning_rate": 0.0011144856317653745, "loss": 2.1881, "step": 18993 }, { "epoch": 0.5097144697294976, "grad_norm": 0.296875, "learning_rate": 0.0011144711719441622, "loss": 2.0337, "step": 18994 }, { "epoch": 0.5097413052812366, "grad_norm": 0.30859375, "learning_rate": 0.0011144567109943515, "loss": 2.1056, "step": 18995 }, { "epoch": 0.5097681408329755, "grad_norm": 0.314453125, "learning_rate": 0.0011144422489159742, "loss": 2.0476, "step": 18996 }, { "epoch": 0.5097949763847145, "grad_norm": 0.30078125, "learning_rate": 0.001114427785709062, "loss": 2.059, "step": 18997 }, { "epoch": 0.5098218119364534, "grad_norm": 0.306640625, "learning_rate": 0.0011144133213736465, "loss": 2.0107, "step": 18998 }, { "epoch": 0.5098486474881924, "grad_norm": 0.296875, "learning_rate": 0.0011143988559097597, "loss": 2.0491, "step": 18999 }, { "epoch": 0.5098754830399314, "grad_norm": 0.298828125, "learning_rate": 0.0011143843893174328, "loss": 2.0141, "step": 19000 }, { "epoch": 0.5099023185916702, "grad_norm": 0.296875, "learning_rate": 0.001114369921596698, "loss": 1.9375, "step": 19001 }, { "epoch": 0.5099291541434092, "grad_norm": 0.306640625, "learning_rate": 0.0011143554527475873, "loss": 2.0503, "step": 19002 }, { "epoch": 0.5099559896951481, "grad_norm": 0.291015625, "learning_rate": 0.0011143409827701315, "loss": 1.9521, "step": 19003 }, { "epoch": 0.5099828252468871, "grad_norm": 0.306640625, "learning_rate": 0.0011143265116643633, "loss": 2.0256, "step": 19004 }, { "epoch": 0.510009660798626, "grad_norm": 0.302734375, "learning_rate": 0.001114312039430314, "loss": 2.0064, "step": 19005 }, { "epoch": 0.510036496350365, "grad_norm": 0.294921875, "learning_rate": 0.0011142975660680154, "loss": 1.9551, "step": 19006 }, { "epoch": 0.510063331902104, "grad_norm": 0.302734375, "learning_rate": 0.001114283091577499, "loss": 2.0428, "step": 19007 }, { "epoch": 0.5100901674538428, "grad_norm": 0.306640625, "learning_rate": 0.001114268615958797, "loss": 2.0377, "step": 19008 }, { "epoch": 0.5101170030055818, "grad_norm": 0.30078125, "learning_rate": 0.001114254139211941, "loss": 1.9762, "step": 19009 }, { "epoch": 0.5101438385573207, "grad_norm": 0.306640625, "learning_rate": 0.0011142396613369626, "loss": 1.9956, "step": 19010 }, { "epoch": 0.5101706741090597, "grad_norm": 0.298828125, "learning_rate": 0.0011142251823338936, "loss": 1.9627, "step": 19011 }, { "epoch": 0.5101975096607986, "grad_norm": 0.30078125, "learning_rate": 0.001114210702202766, "loss": 1.9999, "step": 19012 }, { "epoch": 0.5102243452125376, "grad_norm": 0.3046875, "learning_rate": 0.0011141962209436113, "loss": 2.004, "step": 19013 }, { "epoch": 0.5102511807642766, "grad_norm": 0.298828125, "learning_rate": 0.0011141817385564615, "loss": 1.9696, "step": 19014 }, { "epoch": 0.5102780163160154, "grad_norm": 0.302734375, "learning_rate": 0.0011141672550413482, "loss": 2.0809, "step": 19015 }, { "epoch": 0.5103048518677544, "grad_norm": 0.306640625, "learning_rate": 0.001114152770398303, "loss": 2.0744, "step": 19016 }, { "epoch": 0.5103316874194933, "grad_norm": 0.298828125, "learning_rate": 0.0011141382846273583, "loss": 1.9117, "step": 19017 }, { "epoch": 0.5103585229712323, "grad_norm": 0.287109375, "learning_rate": 0.0011141237977285452, "loss": 1.9054, "step": 19018 }, { "epoch": 0.5103853585229712, "grad_norm": 0.298828125, "learning_rate": 0.0011141093097018957, "loss": 1.9783, "step": 19019 }, { "epoch": 0.5104121940747102, "grad_norm": 0.310546875, "learning_rate": 0.0011140948205474418, "loss": 1.9551, "step": 19020 }, { "epoch": 0.5104390296264492, "grad_norm": 0.298828125, "learning_rate": 0.0011140803302652148, "loss": 1.935, "step": 19021 }, { "epoch": 0.510465865178188, "grad_norm": 0.3046875, "learning_rate": 0.0011140658388552472, "loss": 1.952, "step": 19022 }, { "epoch": 0.510492700729927, "grad_norm": 0.3046875, "learning_rate": 0.0011140513463175701, "loss": 1.9839, "step": 19023 }, { "epoch": 0.5105195362816659, "grad_norm": 0.306640625, "learning_rate": 0.0011140368526522159, "loss": 2.0041, "step": 19024 }, { "epoch": 0.5105463718334049, "grad_norm": 0.306640625, "learning_rate": 0.0011140223578592159, "loss": 2.0132, "step": 19025 }, { "epoch": 0.5105732073851439, "grad_norm": 0.296875, "learning_rate": 0.0011140078619386018, "loss": 2.0026, "step": 19026 }, { "epoch": 0.5106000429368828, "grad_norm": 0.302734375, "learning_rate": 0.001113993364890406, "loss": 1.9704, "step": 19027 }, { "epoch": 0.5106268784886218, "grad_norm": 0.306640625, "learning_rate": 0.00111397886671466, "loss": 1.9746, "step": 19028 }, { "epoch": 0.5106537140403606, "grad_norm": 0.3125, "learning_rate": 0.0011139643674113954, "loss": 2.0085, "step": 19029 }, { "epoch": 0.5106805495920996, "grad_norm": 0.30078125, "learning_rate": 0.001113949866980644, "loss": 1.9139, "step": 19030 }, { "epoch": 0.5107073851438385, "grad_norm": 0.298828125, "learning_rate": 0.0011139353654224383, "loss": 1.937, "step": 19031 }, { "epoch": 0.5107342206955775, "grad_norm": 0.310546875, "learning_rate": 0.0011139208627368095, "loss": 2.0154, "step": 19032 }, { "epoch": 0.5107610562473165, "grad_norm": 0.296875, "learning_rate": 0.0011139063589237894, "loss": 1.8952, "step": 19033 }, { "epoch": 0.5107878917990554, "grad_norm": 0.306640625, "learning_rate": 0.00111389185398341, "loss": 1.983, "step": 19034 }, { "epoch": 0.5108147273507944, "grad_norm": 0.296875, "learning_rate": 0.0011138773479157029, "loss": 1.9165, "step": 19035 }, { "epoch": 0.5108415629025332, "grad_norm": 0.3046875, "learning_rate": 0.0011138628407207003, "loss": 1.9598, "step": 19036 }, { "epoch": 0.5108683984542722, "grad_norm": 0.302734375, "learning_rate": 0.0011138483323984336, "loss": 1.9268, "step": 19037 }, { "epoch": 0.5108952340060111, "grad_norm": 0.296875, "learning_rate": 0.001113833822948935, "loss": 1.9203, "step": 19038 }, { "epoch": 0.5109220695577501, "grad_norm": 0.298828125, "learning_rate": 0.0011138193123722358, "loss": 1.908, "step": 19039 }, { "epoch": 0.5109489051094891, "grad_norm": 0.298828125, "learning_rate": 0.0011138048006683686, "loss": 1.9854, "step": 19040 }, { "epoch": 0.510975740661228, "grad_norm": 0.29296875, "learning_rate": 0.0011137902878373648, "loss": 1.8446, "step": 19041 }, { "epoch": 0.511002576212967, "grad_norm": 0.314453125, "learning_rate": 0.0011137757738792563, "loss": 2.0294, "step": 19042 }, { "epoch": 0.5110294117647058, "grad_norm": 0.314453125, "learning_rate": 0.0011137612587940747, "loss": 2.0964, "step": 19043 }, { "epoch": 0.5110562473164448, "grad_norm": 0.314453125, "learning_rate": 0.0011137467425818522, "loss": 2.0109, "step": 19044 }, { "epoch": 0.5110830828681838, "grad_norm": 0.30859375, "learning_rate": 0.0011137322252426202, "loss": 1.9766, "step": 19045 }, { "epoch": 0.5111099184199227, "grad_norm": 0.302734375, "learning_rate": 0.0011137177067764112, "loss": 2.0344, "step": 19046 }, { "epoch": 0.5111367539716617, "grad_norm": 0.298828125, "learning_rate": 0.0011137031871832567, "loss": 1.9406, "step": 19047 }, { "epoch": 0.5111635895234006, "grad_norm": 0.30078125, "learning_rate": 0.0011136886664631883, "loss": 1.9615, "step": 19048 }, { "epoch": 0.5111904250751396, "grad_norm": 0.306640625, "learning_rate": 0.0011136741446162383, "loss": 1.9867, "step": 19049 }, { "epoch": 0.5112172606268784, "grad_norm": 0.30078125, "learning_rate": 0.0011136596216424382, "loss": 1.9185, "step": 19050 }, { "epoch": 0.5112440961786174, "grad_norm": 0.296875, "learning_rate": 0.00111364509754182, "loss": 1.9065, "step": 19051 }, { "epoch": 0.5112709317303564, "grad_norm": 0.30078125, "learning_rate": 0.0011136305723144157, "loss": 1.8834, "step": 19052 }, { "epoch": 0.5112977672820953, "grad_norm": 0.31640625, "learning_rate": 0.0011136160459602569, "loss": 2.0141, "step": 19053 }, { "epoch": 0.5113246028338343, "grad_norm": 0.3046875, "learning_rate": 0.0011136015184793756, "loss": 1.9748, "step": 19054 }, { "epoch": 0.5113514383855732, "grad_norm": 0.31640625, "learning_rate": 0.0011135869898718038, "loss": 1.9599, "step": 19055 }, { "epoch": 0.5113782739373122, "grad_norm": 0.310546875, "learning_rate": 0.0011135724601375732, "loss": 1.9721, "step": 19056 }, { "epoch": 0.511405109489051, "grad_norm": 0.310546875, "learning_rate": 0.0011135579292767157, "loss": 1.9554, "step": 19057 }, { "epoch": 0.51143194504079, "grad_norm": 0.294921875, "learning_rate": 0.0011135433972892632, "loss": 1.8068, "step": 19058 }, { "epoch": 0.511458780592529, "grad_norm": 0.298828125, "learning_rate": 0.0011135288641752476, "loss": 1.8874, "step": 19059 }, { "epoch": 0.5114856161442679, "grad_norm": 0.298828125, "learning_rate": 0.0011135143299347008, "loss": 1.8846, "step": 19060 }, { "epoch": 0.5115124516960069, "grad_norm": 0.306640625, "learning_rate": 0.0011134997945676545, "loss": 1.941, "step": 19061 }, { "epoch": 0.5115392872477458, "grad_norm": 0.3046875, "learning_rate": 0.0011134852580741408, "loss": 1.9401, "step": 19062 }, { "epoch": 0.5115661227994848, "grad_norm": 0.314453125, "learning_rate": 0.0011134707204541916, "loss": 1.9711, "step": 19063 }, { "epoch": 0.5115929583512236, "grad_norm": 0.31640625, "learning_rate": 0.0011134561817078385, "loss": 1.9564, "step": 19064 }, { "epoch": 0.5116197939029626, "grad_norm": 0.310546875, "learning_rate": 0.0011134416418351139, "loss": 1.9116, "step": 19065 }, { "epoch": 0.5116466294547016, "grad_norm": 0.30859375, "learning_rate": 0.001113427100836049, "loss": 1.9372, "step": 19066 }, { "epoch": 0.5116734650064405, "grad_norm": 0.306640625, "learning_rate": 0.0011134125587106763, "loss": 2.0083, "step": 19067 }, { "epoch": 0.5117003005581795, "grad_norm": 0.302734375, "learning_rate": 0.0011133980154590275, "loss": 1.8883, "step": 19068 }, { "epoch": 0.5117271361099184, "grad_norm": 0.3125, "learning_rate": 0.0011133834710811347, "loss": 1.9189, "step": 19069 }, { "epoch": 0.5117539716616574, "grad_norm": 0.310546875, "learning_rate": 0.0011133689255770293, "loss": 1.9599, "step": 19070 }, { "epoch": 0.5117808072133964, "grad_norm": 0.314453125, "learning_rate": 0.0011133543789467438, "loss": 1.9152, "step": 19071 }, { "epoch": 0.5118076427651352, "grad_norm": 0.306640625, "learning_rate": 0.0011133398311903096, "loss": 1.9689, "step": 19072 }, { "epoch": 0.5118344783168742, "grad_norm": 0.306640625, "learning_rate": 0.0011133252823077588, "loss": 1.8503, "step": 19073 }, { "epoch": 0.5118613138686131, "grad_norm": 0.3125, "learning_rate": 0.0011133107322991232, "loss": 1.8913, "step": 19074 }, { "epoch": 0.5118881494203521, "grad_norm": 0.30078125, "learning_rate": 0.0011132961811644352, "loss": 1.8952, "step": 19075 }, { "epoch": 0.511914984972091, "grad_norm": 0.3125, "learning_rate": 0.0011132816289037262, "loss": 1.9595, "step": 19076 }, { "epoch": 0.51194182052383, "grad_norm": 0.302734375, "learning_rate": 0.0011132670755170286, "loss": 1.8986, "step": 19077 }, { "epoch": 0.511968656075569, "grad_norm": 0.30078125, "learning_rate": 0.0011132525210043737, "loss": 1.8639, "step": 19078 }, { "epoch": 0.5119954916273078, "grad_norm": 0.3125, "learning_rate": 0.001113237965365794, "loss": 1.9227, "step": 19079 }, { "epoch": 0.5120223271790468, "grad_norm": 0.3125, "learning_rate": 0.0011132234086013212, "loss": 1.9634, "step": 19080 }, { "epoch": 0.5120491627307857, "grad_norm": 0.302734375, "learning_rate": 0.0011132088507109872, "loss": 1.8274, "step": 19081 }, { "epoch": 0.5120759982825247, "grad_norm": 0.318359375, "learning_rate": 0.0011131942916948238, "loss": 1.9696, "step": 19082 }, { "epoch": 0.5121028338342636, "grad_norm": 0.3203125, "learning_rate": 0.0011131797315528632, "loss": 1.8908, "step": 19083 }, { "epoch": 0.5121296693860026, "grad_norm": 0.318359375, "learning_rate": 0.0011131651702851373, "loss": 1.9824, "step": 19084 }, { "epoch": 0.5121565049377416, "grad_norm": 0.306640625, "learning_rate": 0.001113150607891678, "loss": 1.937, "step": 19085 }, { "epoch": 0.5121833404894804, "grad_norm": 0.3046875, "learning_rate": 0.0011131360443725172, "loss": 1.9251, "step": 19086 }, { "epoch": 0.5122101760412194, "grad_norm": 0.30859375, "learning_rate": 0.001113121479727687, "loss": 1.8858, "step": 19087 }, { "epoch": 0.5122370115929583, "grad_norm": 0.306640625, "learning_rate": 0.001113106913957219, "loss": 1.8563, "step": 19088 }, { "epoch": 0.5122638471446973, "grad_norm": 0.3125, "learning_rate": 0.0011130923470611454, "loss": 1.8563, "step": 19089 }, { "epoch": 0.5122906826964362, "grad_norm": 0.306640625, "learning_rate": 0.0011130777790394984, "loss": 1.8904, "step": 19090 }, { "epoch": 0.5123175182481752, "grad_norm": 0.310546875, "learning_rate": 0.0011130632098923096, "loss": 1.8805, "step": 19091 }, { "epoch": 0.5123443537999142, "grad_norm": 0.3046875, "learning_rate": 0.0011130486396196111, "loss": 1.8045, "step": 19092 }, { "epoch": 0.512371189351653, "grad_norm": 0.31640625, "learning_rate": 0.0011130340682214347, "loss": 1.9061, "step": 19093 }, { "epoch": 0.512398024903392, "grad_norm": 0.296875, "learning_rate": 0.0011130194956978125, "loss": 1.7555, "step": 19094 }, { "epoch": 0.5124248604551309, "grad_norm": 0.298828125, "learning_rate": 0.0011130049220487764, "loss": 1.7789, "step": 19095 }, { "epoch": 0.5124516960068699, "grad_norm": 0.310546875, "learning_rate": 0.0011129903472743587, "loss": 1.8436, "step": 19096 }, { "epoch": 0.5124785315586089, "grad_norm": 0.306640625, "learning_rate": 0.001112975771374591, "loss": 1.8956, "step": 19097 }, { "epoch": 0.5125053671103478, "grad_norm": 0.30859375, "learning_rate": 0.0011129611943495052, "loss": 1.8425, "step": 19098 }, { "epoch": 0.5125322026620868, "grad_norm": 0.326171875, "learning_rate": 0.0011129466161991337, "loss": 1.861, "step": 19099 }, { "epoch": 0.5125590382138256, "grad_norm": 0.314453125, "learning_rate": 0.001112932036923508, "loss": 1.8335, "step": 19100 }, { "epoch": 0.5125858737655646, "grad_norm": 0.318359375, "learning_rate": 0.0011129174565226603, "loss": 1.9442, "step": 19101 }, { "epoch": 0.5126127093173035, "grad_norm": 0.33203125, "learning_rate": 0.0011129028749966227, "loss": 1.9769, "step": 19102 }, { "epoch": 0.5126395448690425, "grad_norm": 0.373046875, "learning_rate": 0.0011128882923454272, "loss": 2.2088, "step": 19103 }, { "epoch": 0.5126663804207815, "grad_norm": 0.359375, "learning_rate": 0.0011128737085691054, "loss": 2.1009, "step": 19104 }, { "epoch": 0.5126932159725204, "grad_norm": 0.33984375, "learning_rate": 0.00111285912366769, "loss": 2.0645, "step": 19105 }, { "epoch": 0.5127200515242594, "grad_norm": 0.326171875, "learning_rate": 0.0011128445376412124, "loss": 2.0213, "step": 19106 }, { "epoch": 0.5127468870759982, "grad_norm": 0.3203125, "learning_rate": 0.0011128299504897046, "loss": 2.1315, "step": 19107 }, { "epoch": 0.5127737226277372, "grad_norm": 0.322265625, "learning_rate": 0.0011128153622131987, "loss": 2.1428, "step": 19108 }, { "epoch": 0.5128005581794761, "grad_norm": 0.33203125, "learning_rate": 0.001112800772811727, "loss": 2.0633, "step": 19109 }, { "epoch": 0.5128273937312151, "grad_norm": 0.318359375, "learning_rate": 0.0011127861822853213, "loss": 2.1399, "step": 19110 }, { "epoch": 0.5128542292829541, "grad_norm": 0.306640625, "learning_rate": 0.0011127715906340135, "loss": 2.1397, "step": 19111 }, { "epoch": 0.512881064834693, "grad_norm": 0.31640625, "learning_rate": 0.0011127569978578358, "loss": 2.1462, "step": 19112 }, { "epoch": 0.512907900386432, "grad_norm": 0.302734375, "learning_rate": 0.00111274240395682, "loss": 1.997, "step": 19113 }, { "epoch": 0.5129347359381708, "grad_norm": 0.306640625, "learning_rate": 0.0011127278089309981, "loss": 2.0583, "step": 19114 }, { "epoch": 0.5129615714899098, "grad_norm": 0.302734375, "learning_rate": 0.0011127132127804024, "loss": 2.0294, "step": 19115 }, { "epoch": 0.5129884070416488, "grad_norm": 0.310546875, "learning_rate": 0.0011126986155050645, "loss": 2.0491, "step": 19116 }, { "epoch": 0.5130152425933877, "grad_norm": 0.302734375, "learning_rate": 0.001112684017105017, "loss": 1.9972, "step": 19117 }, { "epoch": 0.5130420781451267, "grad_norm": 0.30078125, "learning_rate": 0.0011126694175802915, "loss": 2.0091, "step": 19118 }, { "epoch": 0.5130689136968656, "grad_norm": 0.318359375, "learning_rate": 0.00111265481693092, "loss": 2.0914, "step": 19119 }, { "epoch": 0.5130957492486046, "grad_norm": 0.306640625, "learning_rate": 0.0011126402151569348, "loss": 2.1427, "step": 19120 }, { "epoch": 0.5131225848003435, "grad_norm": 0.302734375, "learning_rate": 0.0011126256122583676, "loss": 2.1235, "step": 19121 }, { "epoch": 0.5131494203520824, "grad_norm": 0.314453125, "learning_rate": 0.0011126110082352507, "loss": 2.0635, "step": 19122 }, { "epoch": 0.5131762559038214, "grad_norm": 0.294921875, "learning_rate": 0.0011125964030876162, "loss": 1.9938, "step": 19123 }, { "epoch": 0.5132030914555603, "grad_norm": 0.296875, "learning_rate": 0.001112581796815496, "loss": 2.0264, "step": 19124 }, { "epoch": 0.5132299270072993, "grad_norm": 0.30078125, "learning_rate": 0.001112567189418922, "loss": 1.9994, "step": 19125 }, { "epoch": 0.5132567625590382, "grad_norm": 0.294921875, "learning_rate": 0.0011125525808979265, "loss": 2.0126, "step": 19126 }, { "epoch": 0.5132835981107772, "grad_norm": 0.291015625, "learning_rate": 0.0011125379712525413, "loss": 1.9587, "step": 19127 }, { "epoch": 0.513310433662516, "grad_norm": 0.3046875, "learning_rate": 0.0011125233604827986, "loss": 1.9805, "step": 19128 }, { "epoch": 0.513337269214255, "grad_norm": 0.310546875, "learning_rate": 0.0011125087485887306, "loss": 2.0127, "step": 19129 }, { "epoch": 0.513364104765994, "grad_norm": 0.302734375, "learning_rate": 0.001112494135570369, "loss": 1.9945, "step": 19130 }, { "epoch": 0.5133909403177329, "grad_norm": 0.298828125, "learning_rate": 0.0011124795214277462, "loss": 1.9945, "step": 19131 }, { "epoch": 0.5134177758694719, "grad_norm": 0.30859375, "learning_rate": 0.001112464906160894, "loss": 2.0646, "step": 19132 }, { "epoch": 0.5134446114212108, "grad_norm": 0.310546875, "learning_rate": 0.0011124502897698447, "loss": 2.0221, "step": 19133 }, { "epoch": 0.5134714469729498, "grad_norm": 0.3046875, "learning_rate": 0.0011124356722546302, "loss": 2.0121, "step": 19134 }, { "epoch": 0.5134982825246887, "grad_norm": 0.298828125, "learning_rate": 0.0011124210536152823, "loss": 1.9793, "step": 19135 }, { "epoch": 0.5135251180764276, "grad_norm": 0.306640625, "learning_rate": 0.0011124064338518336, "loss": 2.0069, "step": 19136 }, { "epoch": 0.5135519536281666, "grad_norm": 0.30078125, "learning_rate": 0.001112391812964316, "loss": 1.9684, "step": 19137 }, { "epoch": 0.5135787891799055, "grad_norm": 0.294921875, "learning_rate": 0.0011123771909527613, "loss": 2.0006, "step": 19138 }, { "epoch": 0.5136056247316445, "grad_norm": 0.30078125, "learning_rate": 0.001112362567817202, "loss": 2.0458, "step": 19139 }, { "epoch": 0.5136324602833834, "grad_norm": 0.310546875, "learning_rate": 0.0011123479435576698, "loss": 2.0573, "step": 19140 }, { "epoch": 0.5136592958351224, "grad_norm": 0.302734375, "learning_rate": 0.0011123333181741971, "loss": 2.0355, "step": 19141 }, { "epoch": 0.5136861313868614, "grad_norm": 0.31640625, "learning_rate": 0.0011123186916668158, "loss": 2.0222, "step": 19142 }, { "epoch": 0.5137129669386002, "grad_norm": 0.31640625, "learning_rate": 0.001112304064035558, "loss": 2.0561, "step": 19143 }, { "epoch": 0.5137398024903392, "grad_norm": 0.298828125, "learning_rate": 0.0011122894352804558, "loss": 2.0102, "step": 19144 }, { "epoch": 0.5137666380420781, "grad_norm": 0.310546875, "learning_rate": 0.0011122748054015412, "loss": 1.9617, "step": 19145 }, { "epoch": 0.5137934735938171, "grad_norm": 0.296875, "learning_rate": 0.0011122601743988466, "loss": 1.9796, "step": 19146 }, { "epoch": 0.513820309145556, "grad_norm": 0.30078125, "learning_rate": 0.0011122455422724036, "loss": 1.9304, "step": 19147 }, { "epoch": 0.513847144697295, "grad_norm": 0.30859375, "learning_rate": 0.001112230909022245, "loss": 1.9422, "step": 19148 }, { "epoch": 0.513873980249034, "grad_norm": 0.2890625, "learning_rate": 0.001112216274648402, "loss": 1.8828, "step": 19149 }, { "epoch": 0.5139008158007728, "grad_norm": 0.296875, "learning_rate": 0.0011122016391509075, "loss": 1.9366, "step": 19150 }, { "epoch": 0.5139276513525118, "grad_norm": 0.296875, "learning_rate": 0.0011121870025297932, "loss": 1.9223, "step": 19151 }, { "epoch": 0.5139544869042507, "grad_norm": 0.306640625, "learning_rate": 0.0011121723647850913, "loss": 1.9338, "step": 19152 }, { "epoch": 0.5139813224559897, "grad_norm": 0.30859375, "learning_rate": 0.001112157725916834, "loss": 2.0297, "step": 19153 }, { "epoch": 0.5140081580077286, "grad_norm": 0.3046875, "learning_rate": 0.0011121430859250532, "loss": 2.0232, "step": 19154 }, { "epoch": 0.5140349935594676, "grad_norm": 0.294921875, "learning_rate": 0.0011121284448097812, "loss": 1.9527, "step": 19155 }, { "epoch": 0.5140618291112066, "grad_norm": 0.30078125, "learning_rate": 0.00111211380257105, "loss": 1.9379, "step": 19156 }, { "epoch": 0.5140886646629454, "grad_norm": 0.30078125, "learning_rate": 0.001112099159208892, "loss": 1.9735, "step": 19157 }, { "epoch": 0.5141155002146844, "grad_norm": 0.306640625, "learning_rate": 0.001112084514723339, "loss": 1.9581, "step": 19158 }, { "epoch": 0.5141423357664233, "grad_norm": 0.302734375, "learning_rate": 0.001112069869114423, "loss": 2.0137, "step": 19159 }, { "epoch": 0.5141691713181623, "grad_norm": 0.30078125, "learning_rate": 0.0011120552223821767, "loss": 2.0007, "step": 19160 }, { "epoch": 0.5141960068699012, "grad_norm": 0.3125, "learning_rate": 0.0011120405745266317, "loss": 2.0491, "step": 19161 }, { "epoch": 0.5142228424216402, "grad_norm": 0.306640625, "learning_rate": 0.0011120259255478205, "loss": 2.017, "step": 19162 }, { "epoch": 0.5142496779733792, "grad_norm": 0.294921875, "learning_rate": 0.001112011275445775, "loss": 1.8748, "step": 19163 }, { "epoch": 0.514276513525118, "grad_norm": 0.283203125, "learning_rate": 0.0011119966242205271, "loss": 1.8263, "step": 19164 }, { "epoch": 0.514303349076857, "grad_norm": 0.30859375, "learning_rate": 0.0011119819718721094, "loss": 2.0517, "step": 19165 }, { "epoch": 0.5143301846285959, "grad_norm": 0.294921875, "learning_rate": 0.0011119673184005541, "loss": 1.8918, "step": 19166 }, { "epoch": 0.5143570201803349, "grad_norm": 0.310546875, "learning_rate": 0.0011119526638058929, "loss": 1.9777, "step": 19167 }, { "epoch": 0.5143838557320739, "grad_norm": 0.298828125, "learning_rate": 0.0011119380080881582, "loss": 1.915, "step": 19168 }, { "epoch": 0.5144106912838128, "grad_norm": 0.30078125, "learning_rate": 0.0011119233512473822, "loss": 1.9623, "step": 19169 }, { "epoch": 0.5144375268355518, "grad_norm": 0.298828125, "learning_rate": 0.0011119086932835968, "loss": 1.9402, "step": 19170 }, { "epoch": 0.5144643623872907, "grad_norm": 0.3046875, "learning_rate": 0.0011118940341968346, "loss": 1.9614, "step": 19171 }, { "epoch": 0.5144911979390296, "grad_norm": 0.296875, "learning_rate": 0.0011118793739871274, "loss": 1.9798, "step": 19172 }, { "epoch": 0.5145180334907685, "grad_norm": 0.3046875, "learning_rate": 0.0011118647126545074, "loss": 1.9947, "step": 19173 }, { "epoch": 0.5145448690425075, "grad_norm": 0.302734375, "learning_rate": 0.001111850050199007, "loss": 1.9485, "step": 19174 }, { "epoch": 0.5145717045942465, "grad_norm": 0.302734375, "learning_rate": 0.0011118353866206578, "loss": 1.9304, "step": 19175 }, { "epoch": 0.5145985401459854, "grad_norm": 0.3125, "learning_rate": 0.0011118207219194926, "loss": 1.9593, "step": 19176 }, { "epoch": 0.5146253756977244, "grad_norm": 0.3046875, "learning_rate": 0.0011118060560955433, "loss": 1.9173, "step": 19177 }, { "epoch": 0.5146522112494633, "grad_norm": 0.298828125, "learning_rate": 0.001111791389148842, "loss": 1.8714, "step": 19178 }, { "epoch": 0.5146790468012022, "grad_norm": 0.3125, "learning_rate": 0.001111776721079421, "loss": 2.0293, "step": 19179 }, { "epoch": 0.5147058823529411, "grad_norm": 0.294921875, "learning_rate": 0.0011117620518873127, "loss": 1.8244, "step": 19180 }, { "epoch": 0.5147327179046801, "grad_norm": 0.330078125, "learning_rate": 0.0011117473815725485, "loss": 2.0659, "step": 19181 }, { "epoch": 0.5147595534564191, "grad_norm": 0.306640625, "learning_rate": 0.0011117327101351614, "loss": 1.9446, "step": 19182 }, { "epoch": 0.514786389008158, "grad_norm": 0.298828125, "learning_rate": 0.0011117180375751835, "loss": 1.9602, "step": 19183 }, { "epoch": 0.514813224559897, "grad_norm": 0.29296875, "learning_rate": 0.0011117033638926464, "loss": 1.8386, "step": 19184 }, { "epoch": 0.5148400601116359, "grad_norm": 0.296875, "learning_rate": 0.0011116886890875828, "loss": 1.827, "step": 19185 }, { "epoch": 0.5148668956633748, "grad_norm": 0.296875, "learning_rate": 0.0011116740131600247, "loss": 1.8658, "step": 19186 }, { "epoch": 0.5148937312151138, "grad_norm": 0.291015625, "learning_rate": 0.0011116593361100047, "loss": 1.834, "step": 19187 }, { "epoch": 0.5149205667668527, "grad_norm": 0.298828125, "learning_rate": 0.0011116446579375544, "loss": 1.9502, "step": 19188 }, { "epoch": 0.5149474023185917, "grad_norm": 0.3125, "learning_rate": 0.0011116299786427062, "loss": 1.9595, "step": 19189 }, { "epoch": 0.5149742378703306, "grad_norm": 0.3046875, "learning_rate": 0.0011116152982254923, "loss": 1.9765, "step": 19190 }, { "epoch": 0.5150010734220696, "grad_norm": 0.314453125, "learning_rate": 0.001111600616685945, "loss": 1.9367, "step": 19191 }, { "epoch": 0.5150279089738085, "grad_norm": 0.294921875, "learning_rate": 0.0011115859340240966, "loss": 1.9208, "step": 19192 }, { "epoch": 0.5150547445255474, "grad_norm": 0.31640625, "learning_rate": 0.001111571250239979, "loss": 1.9928, "step": 19193 }, { "epoch": 0.5150815800772864, "grad_norm": 0.30859375, "learning_rate": 0.0011115565653336246, "loss": 1.9462, "step": 19194 }, { "epoch": 0.5151084156290253, "grad_norm": 0.3046875, "learning_rate": 0.0011115418793050656, "loss": 1.8547, "step": 19195 }, { "epoch": 0.5151352511807643, "grad_norm": 0.296875, "learning_rate": 0.0011115271921543343, "loss": 1.8196, "step": 19196 }, { "epoch": 0.5151620867325032, "grad_norm": 0.3125, "learning_rate": 0.0011115125038814629, "loss": 2.0045, "step": 19197 }, { "epoch": 0.5151889222842422, "grad_norm": 0.296875, "learning_rate": 0.0011114978144864834, "loss": 1.8445, "step": 19198 }, { "epoch": 0.5152157578359811, "grad_norm": 0.306640625, "learning_rate": 0.0011114831239694282, "loss": 1.8832, "step": 19199 }, { "epoch": 0.51524259338772, "grad_norm": 0.3046875, "learning_rate": 0.0011114684323303295, "loss": 1.9153, "step": 19200 }, { "epoch": 0.515269428939459, "grad_norm": 0.306640625, "learning_rate": 0.0011114537395692195, "loss": 1.88, "step": 19201 }, { "epoch": 0.5152962644911979, "grad_norm": 0.306640625, "learning_rate": 0.0011114390456861307, "loss": 1.9145, "step": 19202 }, { "epoch": 0.5153231000429369, "grad_norm": 0.30078125, "learning_rate": 0.001111424350681095, "loss": 1.8417, "step": 19203 }, { "epoch": 0.5153499355946758, "grad_norm": 0.3125, "learning_rate": 0.0011114096545541447, "loss": 1.9293, "step": 19204 }, { "epoch": 0.5153767711464148, "grad_norm": 0.31640625, "learning_rate": 0.0011113949573053118, "loss": 1.9566, "step": 19205 }, { "epoch": 0.5154036066981537, "grad_norm": 0.3046875, "learning_rate": 0.001111380258934629, "loss": 1.9018, "step": 19206 }, { "epoch": 0.5154304422498927, "grad_norm": 0.30859375, "learning_rate": 0.0011113655594421287, "loss": 1.9316, "step": 19207 }, { "epoch": 0.5154572778016316, "grad_norm": 0.31640625, "learning_rate": 0.0011113508588278425, "loss": 1.8719, "step": 19208 }, { "epoch": 0.5154841133533705, "grad_norm": 0.302734375, "learning_rate": 0.001111336157091803, "loss": 1.8549, "step": 19209 }, { "epoch": 0.5155109489051095, "grad_norm": 0.318359375, "learning_rate": 0.0011113214542340423, "loss": 1.9874, "step": 19210 }, { "epoch": 0.5155377844568484, "grad_norm": 0.3046875, "learning_rate": 0.0011113067502545929, "loss": 1.911, "step": 19211 }, { "epoch": 0.5155646200085874, "grad_norm": 0.318359375, "learning_rate": 0.001111292045153487, "loss": 1.9359, "step": 19212 }, { "epoch": 0.5155914555603264, "grad_norm": 0.322265625, "learning_rate": 0.0011112773389307566, "loss": 1.9871, "step": 19213 }, { "epoch": 0.5156182911120653, "grad_norm": 0.31640625, "learning_rate": 0.0011112626315864342, "loss": 1.9605, "step": 19214 }, { "epoch": 0.5156451266638042, "grad_norm": 0.30859375, "learning_rate": 0.0011112479231205519, "loss": 1.9323, "step": 19215 }, { "epoch": 0.5156719622155431, "grad_norm": 0.302734375, "learning_rate": 0.0011112332135331422, "loss": 1.813, "step": 19216 }, { "epoch": 0.5156987977672821, "grad_norm": 0.318359375, "learning_rate": 0.0011112185028242373, "loss": 1.9531, "step": 19217 }, { "epoch": 0.515725633319021, "grad_norm": 0.314453125, "learning_rate": 0.0011112037909938692, "loss": 2.0106, "step": 19218 }, { "epoch": 0.51575246887076, "grad_norm": 0.31640625, "learning_rate": 0.0011111890780420707, "loss": 1.8697, "step": 19219 }, { "epoch": 0.515779304422499, "grad_norm": 0.30859375, "learning_rate": 0.0011111743639688736, "loss": 1.8434, "step": 19220 }, { "epoch": 0.5158061399742379, "grad_norm": 0.32421875, "learning_rate": 0.0011111596487743103, "loss": 1.9116, "step": 19221 }, { "epoch": 0.5158329755259768, "grad_norm": 0.306640625, "learning_rate": 0.001111144932458413, "loss": 1.8699, "step": 19222 }, { "epoch": 0.5158598110777157, "grad_norm": 0.3125, "learning_rate": 0.0011111302150212143, "loss": 1.9119, "step": 19223 }, { "epoch": 0.5158866466294547, "grad_norm": 0.30859375, "learning_rate": 0.001111115496462746, "loss": 1.8257, "step": 19224 }, { "epoch": 0.5159134821811936, "grad_norm": 0.30859375, "learning_rate": 0.001111100776783041, "loss": 1.8572, "step": 19225 }, { "epoch": 0.5159403177329326, "grad_norm": 0.30859375, "learning_rate": 0.001111086055982131, "loss": 1.9175, "step": 19226 }, { "epoch": 0.5159671532846716, "grad_norm": 0.3046875, "learning_rate": 0.0011110713340600489, "loss": 1.8468, "step": 19227 }, { "epoch": 0.5159939888364105, "grad_norm": 0.30859375, "learning_rate": 0.0011110566110168264, "loss": 1.869, "step": 19228 }, { "epoch": 0.5160208243881494, "grad_norm": 0.322265625, "learning_rate": 0.001111041886852496, "loss": 1.8809, "step": 19229 }, { "epoch": 0.5160476599398883, "grad_norm": 0.322265625, "learning_rate": 0.0011110271615670903, "loss": 1.8628, "step": 19230 }, { "epoch": 0.5160744954916273, "grad_norm": 0.310546875, "learning_rate": 0.001111012435160641, "loss": 1.9229, "step": 19231 }, { "epoch": 0.5161013310433663, "grad_norm": 0.318359375, "learning_rate": 0.001110997707633181, "loss": 1.9871, "step": 19232 }, { "epoch": 0.5161281665951052, "grad_norm": 0.359375, "learning_rate": 0.0011109829789847424, "loss": 2.0762, "step": 19233 }, { "epoch": 0.5161550021468442, "grad_norm": 0.359375, "learning_rate": 0.0011109682492153573, "loss": 2.1778, "step": 19234 }, { "epoch": 0.5161818376985831, "grad_norm": 0.357421875, "learning_rate": 0.0011109535183250585, "loss": 2.1793, "step": 19235 }, { "epoch": 0.516208673250322, "grad_norm": 0.359375, "learning_rate": 0.0011109387863138776, "loss": 2.1251, "step": 19236 }, { "epoch": 0.5162355088020609, "grad_norm": 0.333984375, "learning_rate": 0.0011109240531818477, "loss": 2.1401, "step": 19237 }, { "epoch": 0.5162623443537999, "grad_norm": 0.341796875, "learning_rate": 0.0011109093189290004, "loss": 2.1468, "step": 19238 }, { "epoch": 0.5162891799055389, "grad_norm": 0.3203125, "learning_rate": 0.0011108945835553686, "loss": 2.0788, "step": 19239 }, { "epoch": 0.5163160154572778, "grad_norm": 0.30859375, "learning_rate": 0.0011108798470609842, "loss": 2.1039, "step": 19240 }, { "epoch": 0.5163428510090168, "grad_norm": 0.31640625, "learning_rate": 0.00111086510944588, "loss": 2.0015, "step": 19241 }, { "epoch": 0.5163696865607557, "grad_norm": 0.30078125, "learning_rate": 0.0011108503707100879, "loss": 2.0028, "step": 19242 }, { "epoch": 0.5163965221124946, "grad_norm": 0.30078125, "learning_rate": 0.0011108356308536403, "loss": 2.0539, "step": 19243 }, { "epoch": 0.5164233576642335, "grad_norm": 0.314453125, "learning_rate": 0.0011108208898765698, "loss": 2.0706, "step": 19244 }, { "epoch": 0.5164501932159725, "grad_norm": 0.306640625, "learning_rate": 0.0011108061477789085, "loss": 2.0558, "step": 19245 }, { "epoch": 0.5164770287677115, "grad_norm": 0.3125, "learning_rate": 0.0011107914045606886, "loss": 2.156, "step": 19246 }, { "epoch": 0.5165038643194504, "grad_norm": 0.3125, "learning_rate": 0.0011107766602219426, "loss": 2.0669, "step": 19247 }, { "epoch": 0.5165306998711894, "grad_norm": 0.306640625, "learning_rate": 0.0011107619147627031, "loss": 2.0801, "step": 19248 }, { "epoch": 0.5165575354229283, "grad_norm": 0.314453125, "learning_rate": 0.0011107471681830023, "loss": 2.1184, "step": 19249 }, { "epoch": 0.5165843709746673, "grad_norm": 0.314453125, "learning_rate": 0.0011107324204828724, "loss": 2.0651, "step": 19250 }, { "epoch": 0.5166112065264061, "grad_norm": 0.306640625, "learning_rate": 0.0011107176716623457, "loss": 2.1326, "step": 19251 }, { "epoch": 0.5166380420781451, "grad_norm": 0.3046875, "learning_rate": 0.0011107029217214549, "loss": 2.048, "step": 19252 }, { "epoch": 0.5166648776298841, "grad_norm": 0.298828125, "learning_rate": 0.001110688170660232, "loss": 2.0162, "step": 19253 }, { "epoch": 0.516691713181623, "grad_norm": 0.302734375, "learning_rate": 0.0011106734184787093, "loss": 2.0788, "step": 19254 }, { "epoch": 0.516718548733362, "grad_norm": 0.294921875, "learning_rate": 0.0011106586651769197, "loss": 1.9806, "step": 19255 }, { "epoch": 0.5167453842851009, "grad_norm": 0.30078125, "learning_rate": 0.001110643910754895, "loss": 2.0301, "step": 19256 }, { "epoch": 0.5167722198368399, "grad_norm": 0.474609375, "learning_rate": 0.001110629155212668, "loss": 2.0436, "step": 19257 }, { "epoch": 0.5167990553885788, "grad_norm": 0.30859375, "learning_rate": 0.0011106143985502707, "loss": 2.0458, "step": 19258 }, { "epoch": 0.5168258909403177, "grad_norm": 0.3046875, "learning_rate": 0.0011105996407677356, "loss": 2.003, "step": 19259 }, { "epoch": 0.5168527264920567, "grad_norm": 0.302734375, "learning_rate": 0.0011105848818650952, "loss": 1.9906, "step": 19260 }, { "epoch": 0.5168795620437956, "grad_norm": 0.294921875, "learning_rate": 0.0011105701218423818, "loss": 1.9734, "step": 19261 }, { "epoch": 0.5169063975955346, "grad_norm": 0.296875, "learning_rate": 0.0011105553606996278, "loss": 2.0204, "step": 19262 }, { "epoch": 0.5169332331472735, "grad_norm": 0.298828125, "learning_rate": 0.0011105405984368655, "loss": 1.9421, "step": 19263 }, { "epoch": 0.5169600686990125, "grad_norm": 0.3046875, "learning_rate": 0.0011105258350541274, "loss": 2.0116, "step": 19264 }, { "epoch": 0.5169869042507514, "grad_norm": 0.3046875, "learning_rate": 0.0011105110705514458, "loss": 2.0858, "step": 19265 }, { "epoch": 0.5170137398024903, "grad_norm": 0.3046875, "learning_rate": 0.0011104963049288529, "loss": 2.0188, "step": 19266 }, { "epoch": 0.5170405753542293, "grad_norm": 0.30859375, "learning_rate": 0.0011104815381863813, "loss": 2.1416, "step": 19267 }, { "epoch": 0.5170674109059682, "grad_norm": 0.298828125, "learning_rate": 0.0011104667703240637, "loss": 1.945, "step": 19268 }, { "epoch": 0.5170942464577072, "grad_norm": 0.302734375, "learning_rate": 0.001110452001341932, "loss": 2.0733, "step": 19269 }, { "epoch": 0.5171210820094461, "grad_norm": 0.30078125, "learning_rate": 0.0011104372312400188, "loss": 2.0244, "step": 19270 }, { "epoch": 0.517147917561185, "grad_norm": 0.291015625, "learning_rate": 0.0011104224600183565, "loss": 1.9322, "step": 19271 }, { "epoch": 0.517174753112924, "grad_norm": 0.296875, "learning_rate": 0.0011104076876769775, "loss": 1.991, "step": 19272 }, { "epoch": 0.5172015886646629, "grad_norm": 0.3046875, "learning_rate": 0.001110392914215914, "loss": 2.0764, "step": 19273 }, { "epoch": 0.5172284242164019, "grad_norm": 0.29296875, "learning_rate": 0.001110378139635199, "loss": 1.8991, "step": 19274 }, { "epoch": 0.5172552597681408, "grad_norm": 0.3046875, "learning_rate": 0.0011103633639348643, "loss": 2.0075, "step": 19275 }, { "epoch": 0.5172820953198798, "grad_norm": 0.302734375, "learning_rate": 0.0011103485871149425, "loss": 2.0274, "step": 19276 }, { "epoch": 0.5173089308716187, "grad_norm": 0.296875, "learning_rate": 0.001110333809175466, "loss": 1.9518, "step": 19277 }, { "epoch": 0.5173357664233577, "grad_norm": 0.296875, "learning_rate": 0.0011103190301164675, "loss": 1.9765, "step": 19278 }, { "epoch": 0.5173626019750966, "grad_norm": 0.29296875, "learning_rate": 0.0011103042499379788, "loss": 1.905, "step": 19279 }, { "epoch": 0.5173894375268355, "grad_norm": 0.302734375, "learning_rate": 0.0011102894686400332, "loss": 2.0222, "step": 19280 }, { "epoch": 0.5174162730785745, "grad_norm": 0.298828125, "learning_rate": 0.0011102746862226623, "loss": 1.9118, "step": 19281 }, { "epoch": 0.5174431086303134, "grad_norm": 0.306640625, "learning_rate": 0.001110259902685899, "loss": 2.0846, "step": 19282 }, { "epoch": 0.5174699441820524, "grad_norm": 0.310546875, "learning_rate": 0.0011102451180297756, "loss": 2.0749, "step": 19283 }, { "epoch": 0.5174967797337914, "grad_norm": 0.29296875, "learning_rate": 0.0011102303322543246, "loss": 1.9527, "step": 19284 }, { "epoch": 0.5175236152855303, "grad_norm": 0.302734375, "learning_rate": 0.001110215545359578, "loss": 1.9443, "step": 19285 }, { "epoch": 0.5175504508372692, "grad_norm": 0.306640625, "learning_rate": 0.0011102007573455688, "loss": 2.0303, "step": 19286 }, { "epoch": 0.5175772863890081, "grad_norm": 0.294921875, "learning_rate": 0.0011101859682123292, "loss": 1.9276, "step": 19287 }, { "epoch": 0.5176041219407471, "grad_norm": 0.29296875, "learning_rate": 0.0011101711779598917, "loss": 1.8388, "step": 19288 }, { "epoch": 0.517630957492486, "grad_norm": 0.314453125, "learning_rate": 0.0011101563865882889, "loss": 2.0339, "step": 19289 }, { "epoch": 0.517657793044225, "grad_norm": 0.294921875, "learning_rate": 0.0011101415940975529, "loss": 1.9691, "step": 19290 }, { "epoch": 0.517684628595964, "grad_norm": 0.30078125, "learning_rate": 0.0011101268004877166, "loss": 1.9214, "step": 19291 }, { "epoch": 0.5177114641477029, "grad_norm": 0.30078125, "learning_rate": 0.001110112005758812, "loss": 1.9265, "step": 19292 }, { "epoch": 0.5177382996994419, "grad_norm": 0.298828125, "learning_rate": 0.0011100972099108714, "loss": 1.8784, "step": 19293 }, { "epoch": 0.5177651352511807, "grad_norm": 0.306640625, "learning_rate": 0.001110082412943928, "loss": 2.0393, "step": 19294 }, { "epoch": 0.5177919708029197, "grad_norm": 0.30859375, "learning_rate": 0.0011100676148580139, "loss": 2.016, "step": 19295 }, { "epoch": 0.5178188063546586, "grad_norm": 0.298828125, "learning_rate": 0.0011100528156531614, "loss": 1.9141, "step": 19296 }, { "epoch": 0.5178456419063976, "grad_norm": 0.3046875, "learning_rate": 0.001110038015329403, "loss": 1.9627, "step": 19297 }, { "epoch": 0.5178724774581366, "grad_norm": 0.298828125, "learning_rate": 0.0011100232138867713, "loss": 1.9776, "step": 19298 }, { "epoch": 0.5178993130098755, "grad_norm": 0.298828125, "learning_rate": 0.0011100084113252987, "loss": 1.9836, "step": 19299 }, { "epoch": 0.5179261485616145, "grad_norm": 0.296875, "learning_rate": 0.0011099936076450177, "loss": 1.8897, "step": 19300 }, { "epoch": 0.5179529841133533, "grad_norm": 0.3125, "learning_rate": 0.001109978802845961, "loss": 2.0126, "step": 19301 }, { "epoch": 0.5179798196650923, "grad_norm": 0.30078125, "learning_rate": 0.0011099639969281603, "loss": 1.9891, "step": 19302 }, { "epoch": 0.5180066552168313, "grad_norm": 0.30078125, "learning_rate": 0.0011099491898916491, "loss": 1.9517, "step": 19303 }, { "epoch": 0.5180334907685702, "grad_norm": 0.298828125, "learning_rate": 0.0011099343817364592, "loss": 1.9999, "step": 19304 }, { "epoch": 0.5180603263203092, "grad_norm": 0.29296875, "learning_rate": 0.0011099195724626233, "loss": 1.917, "step": 19305 }, { "epoch": 0.5180871618720481, "grad_norm": 0.294921875, "learning_rate": 0.001109904762070174, "loss": 1.9603, "step": 19306 }, { "epoch": 0.518113997423787, "grad_norm": 0.294921875, "learning_rate": 0.0011098899505591435, "loss": 1.937, "step": 19307 }, { "epoch": 0.5181408329755259, "grad_norm": 0.296875, "learning_rate": 0.0011098751379295647, "loss": 1.9562, "step": 19308 }, { "epoch": 0.5181676685272649, "grad_norm": 0.30078125, "learning_rate": 0.0011098603241814697, "loss": 1.9016, "step": 19309 }, { "epoch": 0.5181945040790039, "grad_norm": 0.310546875, "learning_rate": 0.0011098455093148912, "loss": 1.9934, "step": 19310 }, { "epoch": 0.5182213396307428, "grad_norm": 0.30078125, "learning_rate": 0.0011098306933298616, "loss": 1.8805, "step": 19311 }, { "epoch": 0.5182481751824818, "grad_norm": 0.3046875, "learning_rate": 0.0011098158762264136, "loss": 1.9333, "step": 19312 }, { "epoch": 0.5182750107342207, "grad_norm": 0.306640625, "learning_rate": 0.0011098010580045793, "loss": 1.8773, "step": 19313 }, { "epoch": 0.5183018462859597, "grad_norm": 0.30078125, "learning_rate": 0.0011097862386643916, "loss": 1.9035, "step": 19314 }, { "epoch": 0.5183286818376985, "grad_norm": 0.30859375, "learning_rate": 0.001109771418205883, "loss": 1.9983, "step": 19315 }, { "epoch": 0.5183555173894375, "grad_norm": 0.3046875, "learning_rate": 0.0011097565966290855, "loss": 1.9896, "step": 19316 }, { "epoch": 0.5183823529411765, "grad_norm": 0.30859375, "learning_rate": 0.0011097417739340323, "loss": 1.8824, "step": 19317 }, { "epoch": 0.5184091884929154, "grad_norm": 0.3046875, "learning_rate": 0.0011097269501207556, "loss": 2.0536, "step": 19318 }, { "epoch": 0.5184360240446544, "grad_norm": 0.310546875, "learning_rate": 0.0011097121251892879, "loss": 2.019, "step": 19319 }, { "epoch": 0.5184628595963933, "grad_norm": 0.302734375, "learning_rate": 0.0011096972991396618, "loss": 1.9324, "step": 19320 }, { "epoch": 0.5184896951481323, "grad_norm": 0.296875, "learning_rate": 0.0011096824719719098, "loss": 1.9035, "step": 19321 }, { "epoch": 0.5185165306998711, "grad_norm": 0.306640625, "learning_rate": 0.0011096676436860645, "loss": 1.9889, "step": 19322 }, { "epoch": 0.5185433662516101, "grad_norm": 0.294921875, "learning_rate": 0.001109652814282158, "loss": 1.8918, "step": 19323 }, { "epoch": 0.5185702018033491, "grad_norm": 0.3046875, "learning_rate": 0.0011096379837602234, "loss": 1.9661, "step": 19324 }, { "epoch": 0.518597037355088, "grad_norm": 0.296875, "learning_rate": 0.0011096231521202928, "loss": 1.9062, "step": 19325 }, { "epoch": 0.518623872906827, "grad_norm": 0.302734375, "learning_rate": 0.0011096083193623991, "loss": 1.8812, "step": 19326 }, { "epoch": 0.5186507084585659, "grad_norm": 0.302734375, "learning_rate": 0.0011095934854865749, "loss": 1.8861, "step": 19327 }, { "epoch": 0.5186775440103049, "grad_norm": 0.3125, "learning_rate": 0.0011095786504928522, "loss": 2.0192, "step": 19328 }, { "epoch": 0.5187043795620438, "grad_norm": 0.314453125, "learning_rate": 0.0011095638143812639, "loss": 1.8823, "step": 19329 }, { "epoch": 0.5187312151137827, "grad_norm": 0.30859375, "learning_rate": 0.0011095489771518426, "loss": 1.821, "step": 19330 }, { "epoch": 0.5187580506655217, "grad_norm": 0.310546875, "learning_rate": 0.0011095341388046207, "loss": 1.9561, "step": 19331 }, { "epoch": 0.5187848862172606, "grad_norm": 0.318359375, "learning_rate": 0.0011095192993396308, "loss": 1.925, "step": 19332 }, { "epoch": 0.5188117217689996, "grad_norm": 0.31640625, "learning_rate": 0.0011095044587569054, "loss": 2.0019, "step": 19333 }, { "epoch": 0.5188385573207385, "grad_norm": 0.322265625, "learning_rate": 0.0011094896170564773, "loss": 1.911, "step": 19334 }, { "epoch": 0.5188653928724775, "grad_norm": 0.30859375, "learning_rate": 0.0011094747742383788, "loss": 1.8777, "step": 19335 }, { "epoch": 0.5188922284242165, "grad_norm": 0.314453125, "learning_rate": 0.0011094599303026424, "loss": 1.9055, "step": 19336 }, { "epoch": 0.5189190639759553, "grad_norm": 0.31640625, "learning_rate": 0.0011094450852493008, "loss": 1.9378, "step": 19337 }, { "epoch": 0.5189458995276943, "grad_norm": 0.302734375, "learning_rate": 0.0011094302390783869, "loss": 1.8689, "step": 19338 }, { "epoch": 0.5189727350794332, "grad_norm": 0.3125, "learning_rate": 0.0011094153917899326, "loss": 1.9767, "step": 19339 }, { "epoch": 0.5189995706311722, "grad_norm": 0.302734375, "learning_rate": 0.0011094005433839708, "loss": 1.9798, "step": 19340 }, { "epoch": 0.5190264061829111, "grad_norm": 0.3125, "learning_rate": 0.0011093856938605342, "loss": 1.9527, "step": 19341 }, { "epoch": 0.5190532417346501, "grad_norm": 0.31640625, "learning_rate": 0.0011093708432196553, "loss": 1.9409, "step": 19342 }, { "epoch": 0.519080077286389, "grad_norm": 0.3046875, "learning_rate": 0.0011093559914613666, "loss": 1.8854, "step": 19343 }, { "epoch": 0.5191069128381279, "grad_norm": 0.306640625, "learning_rate": 0.0011093411385857007, "loss": 1.7963, "step": 19344 }, { "epoch": 0.5191337483898669, "grad_norm": 0.3046875, "learning_rate": 0.00110932628459269, "loss": 1.9621, "step": 19345 }, { "epoch": 0.5191605839416058, "grad_norm": 0.3046875, "learning_rate": 0.0011093114294823676, "loss": 1.888, "step": 19346 }, { "epoch": 0.5191874194933448, "grad_norm": 0.326171875, "learning_rate": 0.0011092965732547656, "loss": 1.8932, "step": 19347 }, { "epoch": 0.5192142550450837, "grad_norm": 0.31640625, "learning_rate": 0.0011092817159099166, "loss": 1.8091, "step": 19348 }, { "epoch": 0.5192410905968227, "grad_norm": 0.310546875, "learning_rate": 0.0011092668574478536, "loss": 1.8883, "step": 19349 }, { "epoch": 0.5192679261485617, "grad_norm": 0.314453125, "learning_rate": 0.0011092519978686088, "loss": 1.8802, "step": 19350 }, { "epoch": 0.5192947617003005, "grad_norm": 0.30078125, "learning_rate": 0.001109237137172215, "loss": 1.7441, "step": 19351 }, { "epoch": 0.5193215972520395, "grad_norm": 0.353515625, "learning_rate": 0.0011092222753587048, "loss": 1.9172, "step": 19352 }, { "epoch": 0.5193484328037784, "grad_norm": 0.322265625, "learning_rate": 0.0011092074124281106, "loss": 1.9391, "step": 19353 }, { "epoch": 0.5193752683555174, "grad_norm": 0.310546875, "learning_rate": 0.0011091925483804653, "loss": 1.8384, "step": 19354 }, { "epoch": 0.5194021039072564, "grad_norm": 0.318359375, "learning_rate": 0.0011091776832158012, "loss": 1.9743, "step": 19355 }, { "epoch": 0.5194289394589953, "grad_norm": 0.314453125, "learning_rate": 0.001109162816934151, "loss": 1.7387, "step": 19356 }, { "epoch": 0.5194557750107343, "grad_norm": 0.32421875, "learning_rate": 0.0011091479495355476, "loss": 1.8648, "step": 19357 }, { "epoch": 0.5194826105624731, "grad_norm": 0.318359375, "learning_rate": 0.0011091330810200234, "loss": 1.8561, "step": 19358 }, { "epoch": 0.5195094461142121, "grad_norm": 0.37890625, "learning_rate": 0.0011091182113876108, "loss": 2.1073, "step": 19359 }, { "epoch": 0.519536281665951, "grad_norm": 0.380859375, "learning_rate": 0.0011091033406383428, "loss": 2.1622, "step": 19360 }, { "epoch": 0.51956311721769, "grad_norm": 0.34375, "learning_rate": 0.0011090884687722517, "loss": 2.094, "step": 19361 }, { "epoch": 0.519589952769429, "grad_norm": 0.32421875, "learning_rate": 0.0011090735957893704, "loss": 2.0554, "step": 19362 }, { "epoch": 0.5196167883211679, "grad_norm": 0.330078125, "learning_rate": 0.0011090587216897314, "loss": 2.1445, "step": 19363 }, { "epoch": 0.5196436238729069, "grad_norm": 0.318359375, "learning_rate": 0.0011090438464733674, "loss": 2.0974, "step": 19364 }, { "epoch": 0.5196704594246457, "grad_norm": 0.33203125, "learning_rate": 0.001109028970140311, "loss": 2.0868, "step": 19365 }, { "epoch": 0.5196972949763847, "grad_norm": 0.314453125, "learning_rate": 0.0011090140926905945, "loss": 2.0398, "step": 19366 }, { "epoch": 0.5197241305281236, "grad_norm": 0.310546875, "learning_rate": 0.0011089992141242512, "loss": 2.1168, "step": 19367 }, { "epoch": 0.5197509660798626, "grad_norm": 0.3125, "learning_rate": 0.0011089843344413131, "loss": 2.0616, "step": 19368 }, { "epoch": 0.5197778016316016, "grad_norm": 0.31640625, "learning_rate": 0.0011089694536418131, "loss": 2.1743, "step": 19369 }, { "epoch": 0.5198046371833405, "grad_norm": 0.314453125, "learning_rate": 0.001108954571725784, "loss": 1.9896, "step": 19370 }, { "epoch": 0.5198314727350795, "grad_norm": 0.298828125, "learning_rate": 0.0011089396886932582, "loss": 2.0211, "step": 19371 }, { "epoch": 0.5198583082868183, "grad_norm": 0.3125, "learning_rate": 0.0011089248045442687, "loss": 2.0492, "step": 19372 }, { "epoch": 0.5198851438385573, "grad_norm": 0.306640625, "learning_rate": 0.0011089099192788476, "loss": 2.0654, "step": 19373 }, { "epoch": 0.5199119793902963, "grad_norm": 0.302734375, "learning_rate": 0.0011088950328970281, "loss": 1.9913, "step": 19374 }, { "epoch": 0.5199388149420352, "grad_norm": 0.306640625, "learning_rate": 0.0011088801453988423, "loss": 2.0539, "step": 19375 }, { "epoch": 0.5199656504937742, "grad_norm": 0.31640625, "learning_rate": 0.0011088652567843235, "loss": 2.1298, "step": 19376 }, { "epoch": 0.5199924860455131, "grad_norm": 0.3125, "learning_rate": 0.0011088503670535041, "loss": 2.0265, "step": 19377 }, { "epoch": 0.5200193215972521, "grad_norm": 0.314453125, "learning_rate": 0.0011088354762064164, "loss": 2.1539, "step": 19378 }, { "epoch": 0.5200461571489909, "grad_norm": 0.306640625, "learning_rate": 0.0011088205842430935, "loss": 1.9834, "step": 19379 }, { "epoch": 0.5200729927007299, "grad_norm": 0.294921875, "learning_rate": 0.0011088056911635678, "loss": 1.9221, "step": 19380 }, { "epoch": 0.5200998282524689, "grad_norm": 0.302734375, "learning_rate": 0.0011087907969678722, "loss": 2.0854, "step": 19381 }, { "epoch": 0.5201266638042078, "grad_norm": 0.29296875, "learning_rate": 0.0011087759016560393, "loss": 1.9559, "step": 19382 }, { "epoch": 0.5201534993559468, "grad_norm": 0.3046875, "learning_rate": 0.001108761005228102, "loss": 2.0726, "step": 19383 }, { "epoch": 0.5201803349076857, "grad_norm": 0.3203125, "learning_rate": 0.0011087461076840924, "loss": 2.0674, "step": 19384 }, { "epoch": 0.5202071704594247, "grad_norm": 0.30078125, "learning_rate": 0.0011087312090240435, "loss": 2.0813, "step": 19385 }, { "epoch": 0.5202340060111635, "grad_norm": 0.298828125, "learning_rate": 0.0011087163092479882, "loss": 2.0609, "step": 19386 }, { "epoch": 0.5202608415629025, "grad_norm": 0.30078125, "learning_rate": 0.0011087014083559588, "loss": 2.0405, "step": 19387 }, { "epoch": 0.5202876771146415, "grad_norm": 0.296875, "learning_rate": 0.0011086865063479883, "loss": 2.032, "step": 19388 }, { "epoch": 0.5203145126663804, "grad_norm": 0.30078125, "learning_rate": 0.0011086716032241092, "loss": 2.0127, "step": 19389 }, { "epoch": 0.5203413482181194, "grad_norm": 0.298828125, "learning_rate": 0.0011086566989843543, "loss": 1.9901, "step": 19390 }, { "epoch": 0.5203681837698583, "grad_norm": 0.294921875, "learning_rate": 0.0011086417936287563, "loss": 2.0306, "step": 19391 }, { "epoch": 0.5203950193215973, "grad_norm": 0.30078125, "learning_rate": 0.0011086268871573477, "loss": 2.0419, "step": 19392 }, { "epoch": 0.5204218548733361, "grad_norm": 0.291015625, "learning_rate": 0.0011086119795701615, "loss": 2.0044, "step": 19393 }, { "epoch": 0.5204486904250751, "grad_norm": 0.2890625, "learning_rate": 0.0011085970708672303, "loss": 1.9203, "step": 19394 }, { "epoch": 0.5204755259768141, "grad_norm": 0.30078125, "learning_rate": 0.0011085821610485865, "loss": 2.0611, "step": 19395 }, { "epoch": 0.520502361528553, "grad_norm": 0.306640625, "learning_rate": 0.0011085672501142633, "loss": 2.0291, "step": 19396 }, { "epoch": 0.520529197080292, "grad_norm": 0.298828125, "learning_rate": 0.001108552338064293, "loss": 2.031, "step": 19397 }, { "epoch": 0.5205560326320309, "grad_norm": 0.291015625, "learning_rate": 0.0011085374248987086, "loss": 1.9637, "step": 19398 }, { "epoch": 0.5205828681837699, "grad_norm": 0.30078125, "learning_rate": 0.0011085225106175428, "loss": 1.9983, "step": 19399 }, { "epoch": 0.5206097037355089, "grad_norm": 0.30078125, "learning_rate": 0.001108507595220828, "loss": 2.0662, "step": 19400 }, { "epoch": 0.5206365392872477, "grad_norm": 0.30859375, "learning_rate": 0.0011084926787085973, "loss": 2.0307, "step": 19401 }, { "epoch": 0.5206633748389867, "grad_norm": 0.306640625, "learning_rate": 0.001108477761080883, "loss": 2.1044, "step": 19402 }, { "epoch": 0.5206902103907256, "grad_norm": 0.302734375, "learning_rate": 0.0011084628423377186, "loss": 2.0704, "step": 19403 }, { "epoch": 0.5207170459424646, "grad_norm": 0.298828125, "learning_rate": 0.0011084479224791359, "loss": 2.0059, "step": 19404 }, { "epoch": 0.5207438814942035, "grad_norm": 0.3125, "learning_rate": 0.001108433001505168, "loss": 1.9381, "step": 19405 }, { "epoch": 0.5207707170459425, "grad_norm": 0.30078125, "learning_rate": 0.001108418079415848, "loss": 1.9327, "step": 19406 }, { "epoch": 0.5207975525976815, "grad_norm": 0.310546875, "learning_rate": 0.001108403156211208, "loss": 1.9801, "step": 19407 }, { "epoch": 0.5208243881494203, "grad_norm": 0.302734375, "learning_rate": 0.0011083882318912812, "loss": 2.0682, "step": 19408 }, { "epoch": 0.5208512237011593, "grad_norm": 0.296875, "learning_rate": 0.0011083733064561003, "loss": 2.0318, "step": 19409 }, { "epoch": 0.5208780592528982, "grad_norm": 0.3125, "learning_rate": 0.0011083583799056977, "loss": 2.0372, "step": 19410 }, { "epoch": 0.5209048948046372, "grad_norm": 0.294921875, "learning_rate": 0.0011083434522401066, "loss": 1.9494, "step": 19411 }, { "epoch": 0.5209317303563761, "grad_norm": 0.296875, "learning_rate": 0.0011083285234593594, "loss": 1.9266, "step": 19412 }, { "epoch": 0.5209585659081151, "grad_norm": 0.302734375, "learning_rate": 0.001108313593563489, "loss": 1.9379, "step": 19413 }, { "epoch": 0.5209854014598541, "grad_norm": 0.30078125, "learning_rate": 0.001108298662552528, "loss": 1.9818, "step": 19414 }, { "epoch": 0.5210122370115929, "grad_norm": 0.30078125, "learning_rate": 0.0011082837304265095, "loss": 1.9481, "step": 19415 }, { "epoch": 0.5210390725633319, "grad_norm": 0.294921875, "learning_rate": 0.0011082687971854658, "loss": 1.9131, "step": 19416 }, { "epoch": 0.5210659081150708, "grad_norm": 0.30078125, "learning_rate": 0.00110825386282943, "loss": 2.0076, "step": 19417 }, { "epoch": 0.5210927436668098, "grad_norm": 0.294921875, "learning_rate": 0.001108238927358435, "loss": 1.97, "step": 19418 }, { "epoch": 0.5211195792185487, "grad_norm": 0.298828125, "learning_rate": 0.001108223990772513, "loss": 1.9751, "step": 19419 }, { "epoch": 0.5211464147702877, "grad_norm": 0.30078125, "learning_rate": 0.0011082090530716972, "loss": 2.1013, "step": 19420 }, { "epoch": 0.5211732503220267, "grad_norm": 0.296875, "learning_rate": 0.0011081941142560203, "loss": 1.9411, "step": 19421 }, { "epoch": 0.5212000858737655, "grad_norm": 0.28515625, "learning_rate": 0.001108179174325515, "loss": 1.9182, "step": 19422 }, { "epoch": 0.5212269214255045, "grad_norm": 0.2890625, "learning_rate": 0.001108164233280214, "loss": 1.8576, "step": 19423 }, { "epoch": 0.5212537569772434, "grad_norm": 0.302734375, "learning_rate": 0.00110814929112015, "loss": 2.0155, "step": 19424 }, { "epoch": 0.5212805925289824, "grad_norm": 0.298828125, "learning_rate": 0.0011081343478453562, "loss": 1.9007, "step": 19425 }, { "epoch": 0.5213074280807214, "grad_norm": 0.306640625, "learning_rate": 0.0011081194034558654, "loss": 1.975, "step": 19426 }, { "epoch": 0.5213342636324603, "grad_norm": 0.30859375, "learning_rate": 0.0011081044579517097, "loss": 1.9872, "step": 19427 }, { "epoch": 0.5213610991841993, "grad_norm": 0.302734375, "learning_rate": 0.0011080895113329224, "loss": 2.0111, "step": 19428 }, { "epoch": 0.5213879347359381, "grad_norm": 0.302734375, "learning_rate": 0.0011080745635995363, "loss": 1.9986, "step": 19429 }, { "epoch": 0.5214147702876771, "grad_norm": 0.29296875, "learning_rate": 0.0011080596147515838, "loss": 1.9161, "step": 19430 }, { "epoch": 0.521441605839416, "grad_norm": 0.30859375, "learning_rate": 0.0011080446647890984, "loss": 2.0159, "step": 19431 }, { "epoch": 0.521468441391155, "grad_norm": 0.294921875, "learning_rate": 0.0011080297137121124, "loss": 1.9081, "step": 19432 }, { "epoch": 0.521495276942894, "grad_norm": 0.306640625, "learning_rate": 0.0011080147615206584, "loss": 2.0016, "step": 19433 }, { "epoch": 0.5215221124946329, "grad_norm": 0.302734375, "learning_rate": 0.0011079998082147698, "loss": 1.8771, "step": 19434 }, { "epoch": 0.5215489480463719, "grad_norm": 0.30078125, "learning_rate": 0.001107984853794479, "loss": 1.8395, "step": 19435 }, { "epoch": 0.5215757835981107, "grad_norm": 0.306640625, "learning_rate": 0.0011079698982598186, "loss": 1.9178, "step": 19436 }, { "epoch": 0.5216026191498497, "grad_norm": 0.3046875, "learning_rate": 0.001107954941610822, "loss": 2.0225, "step": 19437 }, { "epoch": 0.5216294547015886, "grad_norm": 0.30859375, "learning_rate": 0.0011079399838475215, "loss": 1.8926, "step": 19438 }, { "epoch": 0.5216562902533276, "grad_norm": 0.310546875, "learning_rate": 0.0011079250249699502, "loss": 1.9425, "step": 19439 }, { "epoch": 0.5216831258050666, "grad_norm": 0.294921875, "learning_rate": 0.0011079100649781409, "loss": 1.8667, "step": 19440 }, { "epoch": 0.5217099613568055, "grad_norm": 0.302734375, "learning_rate": 0.0011078951038721262, "loss": 1.9106, "step": 19441 }, { "epoch": 0.5217367969085445, "grad_norm": 0.30078125, "learning_rate": 0.0011078801416519392, "loss": 1.8553, "step": 19442 }, { "epoch": 0.5217636324602833, "grad_norm": 0.30859375, "learning_rate": 0.0011078651783176124, "loss": 1.911, "step": 19443 }, { "epoch": 0.5217904680120223, "grad_norm": 0.296875, "learning_rate": 0.001107850213869179, "loss": 1.865, "step": 19444 }, { "epoch": 0.5218173035637613, "grad_norm": 0.306640625, "learning_rate": 0.0011078352483066716, "loss": 1.9416, "step": 19445 }, { "epoch": 0.5218441391155002, "grad_norm": 0.296875, "learning_rate": 0.0011078202816301231, "loss": 1.8515, "step": 19446 }, { "epoch": 0.5218709746672392, "grad_norm": 0.31640625, "learning_rate": 0.0011078053138395662, "loss": 1.9507, "step": 19447 }, { "epoch": 0.5218978102189781, "grad_norm": 0.318359375, "learning_rate": 0.001107790344935034, "loss": 1.9281, "step": 19448 }, { "epoch": 0.5219246457707171, "grad_norm": 0.30078125, "learning_rate": 0.001107775374916559, "loss": 1.9776, "step": 19449 }, { "epoch": 0.521951481322456, "grad_norm": 0.302734375, "learning_rate": 0.0011077604037841742, "loss": 1.9573, "step": 19450 }, { "epoch": 0.5219783168741949, "grad_norm": 0.3125, "learning_rate": 0.0011077454315379126, "loss": 1.9347, "step": 19451 }, { "epoch": 0.5220051524259339, "grad_norm": 0.3046875, "learning_rate": 0.0011077304581778068, "loss": 1.8475, "step": 19452 }, { "epoch": 0.5220319879776728, "grad_norm": 0.3046875, "learning_rate": 0.0011077154837038899, "loss": 1.8804, "step": 19453 }, { "epoch": 0.5220588235294118, "grad_norm": 0.314453125, "learning_rate": 0.0011077005081161946, "loss": 1.9154, "step": 19454 }, { "epoch": 0.5220856590811507, "grad_norm": 0.306640625, "learning_rate": 0.0011076855314147537, "loss": 1.8797, "step": 19455 }, { "epoch": 0.5221124946328897, "grad_norm": 0.302734375, "learning_rate": 0.0011076705535995998, "loss": 1.8689, "step": 19456 }, { "epoch": 0.5221393301846285, "grad_norm": 0.310546875, "learning_rate": 0.0011076555746707664, "loss": 1.9366, "step": 19457 }, { "epoch": 0.5221661657363675, "grad_norm": 0.318359375, "learning_rate": 0.001107640594628286, "loss": 1.9534, "step": 19458 }, { "epoch": 0.5221930012881065, "grad_norm": 0.306640625, "learning_rate": 0.0011076256134721916, "loss": 1.8379, "step": 19459 }, { "epoch": 0.5222198368398454, "grad_norm": 0.3046875, "learning_rate": 0.0011076106312025156, "loss": 1.8901, "step": 19460 }, { "epoch": 0.5222466723915844, "grad_norm": 0.310546875, "learning_rate": 0.0011075956478192915, "loss": 1.9063, "step": 19461 }, { "epoch": 0.5222735079433233, "grad_norm": 0.310546875, "learning_rate": 0.001107580663322552, "loss": 1.871, "step": 19462 }, { "epoch": 0.5223003434950623, "grad_norm": 0.3125, "learning_rate": 0.0011075656777123296, "loss": 1.8908, "step": 19463 }, { "epoch": 0.5223271790468011, "grad_norm": 0.30078125, "learning_rate": 0.0011075506909886575, "loss": 1.8488, "step": 19464 }, { "epoch": 0.5223540145985401, "grad_norm": 0.296875, "learning_rate": 0.0011075357031515684, "loss": 1.8772, "step": 19465 }, { "epoch": 0.5223808501502791, "grad_norm": 0.310546875, "learning_rate": 0.0011075207142010953, "loss": 1.8563, "step": 19466 }, { "epoch": 0.522407685702018, "grad_norm": 0.322265625, "learning_rate": 0.0011075057241372711, "loss": 1.9288, "step": 19467 }, { "epoch": 0.522434521253757, "grad_norm": 0.30859375, "learning_rate": 0.0011074907329601288, "loss": 1.9246, "step": 19468 }, { "epoch": 0.5224613568054959, "grad_norm": 0.30859375, "learning_rate": 0.001107475740669701, "loss": 1.8975, "step": 19469 }, { "epoch": 0.5224881923572349, "grad_norm": 0.298828125, "learning_rate": 0.001107460747266021, "loss": 1.7778, "step": 19470 }, { "epoch": 0.5225150279089739, "grad_norm": 0.314453125, "learning_rate": 0.001107445752749121, "loss": 1.9411, "step": 19471 }, { "epoch": 0.5225418634607127, "grad_norm": 0.310546875, "learning_rate": 0.0011074307571190345, "loss": 1.9112, "step": 19472 }, { "epoch": 0.5225686990124517, "grad_norm": 0.318359375, "learning_rate": 0.0011074157603757943, "loss": 1.943, "step": 19473 }, { "epoch": 0.5225955345641906, "grad_norm": 0.302734375, "learning_rate": 0.001107400762519433, "loss": 1.8787, "step": 19474 }, { "epoch": 0.5226223701159296, "grad_norm": 0.29296875, "learning_rate": 0.0011073857635499839, "loss": 1.7752, "step": 19475 }, { "epoch": 0.5226492056676685, "grad_norm": 0.3125, "learning_rate": 0.0011073707634674797, "loss": 1.8333, "step": 19476 }, { "epoch": 0.5226760412194075, "grad_norm": 0.314453125, "learning_rate": 0.0011073557622719532, "loss": 1.8858, "step": 19477 }, { "epoch": 0.5227028767711465, "grad_norm": 0.3125, "learning_rate": 0.0011073407599634373, "loss": 1.8822, "step": 19478 }, { "epoch": 0.5227297123228853, "grad_norm": 0.3125, "learning_rate": 0.0011073257565419653, "loss": 1.8897, "step": 19479 }, { "epoch": 0.5227565478746243, "grad_norm": 0.314453125, "learning_rate": 0.0011073107520075697, "loss": 1.8274, "step": 19480 }, { "epoch": 0.5227833834263632, "grad_norm": 0.310546875, "learning_rate": 0.0011072957463602836, "loss": 1.8881, "step": 19481 }, { "epoch": 0.5228102189781022, "grad_norm": 0.310546875, "learning_rate": 0.0011072807396001398, "loss": 1.8327, "step": 19482 }, { "epoch": 0.5228370545298411, "grad_norm": 0.326171875, "learning_rate": 0.0011072657317271713, "loss": 1.971, "step": 19483 }, { "epoch": 0.5228638900815801, "grad_norm": 0.400390625, "learning_rate": 0.0011072507227414114, "loss": 2.2721, "step": 19484 }, { "epoch": 0.5228907256333191, "grad_norm": 0.353515625, "learning_rate": 0.0011072357126428922, "loss": 2.0906, "step": 19485 }, { "epoch": 0.5229175611850579, "grad_norm": 0.337890625, "learning_rate": 0.0011072207014316473, "loss": 2.1687, "step": 19486 }, { "epoch": 0.5229443967367969, "grad_norm": 0.330078125, "learning_rate": 0.0011072056891077093, "loss": 2.188, "step": 19487 }, { "epoch": 0.5229712322885358, "grad_norm": 0.33203125, "learning_rate": 0.0011071906756711112, "loss": 2.1482, "step": 19488 }, { "epoch": 0.5229980678402748, "grad_norm": 0.322265625, "learning_rate": 0.0011071756611218864, "loss": 2.0487, "step": 19489 }, { "epoch": 0.5230249033920137, "grad_norm": 0.3203125, "learning_rate": 0.001107160645460067, "loss": 2.0734, "step": 19490 }, { "epoch": 0.5230517389437527, "grad_norm": 0.310546875, "learning_rate": 0.0011071456286856861, "loss": 2.0357, "step": 19491 }, { "epoch": 0.5230785744954917, "grad_norm": 0.314453125, "learning_rate": 0.0011071306107987774, "loss": 2.1187, "step": 19492 }, { "epoch": 0.5231054100472305, "grad_norm": 0.314453125, "learning_rate": 0.0011071155917993733, "loss": 2.0374, "step": 19493 }, { "epoch": 0.5231322455989695, "grad_norm": 0.322265625, "learning_rate": 0.0011071005716875065, "loss": 2.0967, "step": 19494 }, { "epoch": 0.5231590811507084, "grad_norm": 0.310546875, "learning_rate": 0.0011070855504632104, "loss": 2.1022, "step": 19495 }, { "epoch": 0.5231859167024474, "grad_norm": 0.3046875, "learning_rate": 0.0011070705281265177, "loss": 2.0307, "step": 19496 }, { "epoch": 0.5232127522541864, "grad_norm": 0.318359375, "learning_rate": 0.0011070555046774617, "loss": 2.0552, "step": 19497 }, { "epoch": 0.5232395878059253, "grad_norm": 0.302734375, "learning_rate": 0.0011070404801160748, "loss": 2.0248, "step": 19498 }, { "epoch": 0.5232664233576643, "grad_norm": 0.30859375, "learning_rate": 0.0011070254544423904, "loss": 2.0229, "step": 19499 }, { "epoch": 0.5232932589094031, "grad_norm": 0.3046875, "learning_rate": 0.0011070104276564414, "loss": 2.0081, "step": 19500 }, { "epoch": 0.5233200944611421, "grad_norm": 0.298828125, "learning_rate": 0.0011069953997582605, "loss": 1.9686, "step": 19501 }, { "epoch": 0.523346930012881, "grad_norm": 0.314453125, "learning_rate": 0.001106980370747881, "loss": 2.0179, "step": 19502 }, { "epoch": 0.52337376556462, "grad_norm": 0.30078125, "learning_rate": 0.0011069653406253356, "loss": 2.025, "step": 19503 }, { "epoch": 0.523400601116359, "grad_norm": 0.3046875, "learning_rate": 0.0011069503093906576, "loss": 2.1051, "step": 19504 }, { "epoch": 0.5234274366680979, "grad_norm": 0.29296875, "learning_rate": 0.0011069352770438797, "loss": 1.9875, "step": 19505 }, { "epoch": 0.5234542722198369, "grad_norm": 0.3125, "learning_rate": 0.001106920243585035, "loss": 2.0949, "step": 19506 }, { "epoch": 0.5234811077715757, "grad_norm": 0.291015625, "learning_rate": 0.001106905209014156, "loss": 1.9375, "step": 19507 }, { "epoch": 0.5235079433233147, "grad_norm": 0.294921875, "learning_rate": 0.0011068901733312763, "loss": 1.9829, "step": 19508 }, { "epoch": 0.5235347788750536, "grad_norm": 0.296875, "learning_rate": 0.0011068751365364289, "loss": 2.0636, "step": 19509 }, { "epoch": 0.5235616144267926, "grad_norm": 0.296875, "learning_rate": 0.0011068600986296464, "loss": 1.9792, "step": 19510 }, { "epoch": 0.5235884499785316, "grad_norm": 0.287109375, "learning_rate": 0.0011068450596109622, "loss": 1.9176, "step": 19511 }, { "epoch": 0.5236152855302705, "grad_norm": 0.296875, "learning_rate": 0.0011068300194804089, "loss": 1.9719, "step": 19512 }, { "epoch": 0.5236421210820095, "grad_norm": 0.306640625, "learning_rate": 0.0011068149782380195, "loss": 2.0457, "step": 19513 }, { "epoch": 0.5236689566337484, "grad_norm": 0.294921875, "learning_rate": 0.0011067999358838274, "loss": 1.9437, "step": 19514 }, { "epoch": 0.5236957921854873, "grad_norm": 0.310546875, "learning_rate": 0.001106784892417865, "loss": 2.0817, "step": 19515 }, { "epoch": 0.5237226277372263, "grad_norm": 0.3046875, "learning_rate": 0.001106769847840166, "loss": 2.0189, "step": 19516 }, { "epoch": 0.5237494632889652, "grad_norm": 0.298828125, "learning_rate": 0.0011067548021507629, "loss": 1.9861, "step": 19517 }, { "epoch": 0.5237762988407042, "grad_norm": 0.294921875, "learning_rate": 0.001106739755349689, "loss": 1.9528, "step": 19518 }, { "epoch": 0.5238031343924431, "grad_norm": 0.28515625, "learning_rate": 0.001106724707436977, "loss": 1.9559, "step": 19519 }, { "epoch": 0.5238299699441821, "grad_norm": 0.296875, "learning_rate": 0.0011067096584126599, "loss": 1.9911, "step": 19520 }, { "epoch": 0.523856805495921, "grad_norm": 0.30078125, "learning_rate": 0.0011066946082767713, "loss": 2.0012, "step": 19521 }, { "epoch": 0.5238836410476599, "grad_norm": 0.29296875, "learning_rate": 0.0011066795570293436, "loss": 1.9023, "step": 19522 }, { "epoch": 0.5239104765993989, "grad_norm": 0.2890625, "learning_rate": 0.0011066645046704099, "loss": 1.9188, "step": 19523 }, { "epoch": 0.5239373121511378, "grad_norm": 0.296875, "learning_rate": 0.0011066494512000035, "loss": 1.9357, "step": 19524 }, { "epoch": 0.5239641477028768, "grad_norm": 0.28515625, "learning_rate": 0.0011066343966181574, "loss": 1.8139, "step": 19525 }, { "epoch": 0.5239909832546157, "grad_norm": 0.302734375, "learning_rate": 0.001106619340924904, "loss": 1.947, "step": 19526 }, { "epoch": 0.5240178188063547, "grad_norm": 0.298828125, "learning_rate": 0.0011066042841202772, "loss": 1.9374, "step": 19527 }, { "epoch": 0.5240446543580936, "grad_norm": 0.302734375, "learning_rate": 0.0011065892262043095, "loss": 1.9614, "step": 19528 }, { "epoch": 0.5240714899098325, "grad_norm": 0.302734375, "learning_rate": 0.0011065741671770343, "loss": 1.9651, "step": 19529 }, { "epoch": 0.5240983254615715, "grad_norm": 0.298828125, "learning_rate": 0.0011065591070384842, "loss": 1.954, "step": 19530 }, { "epoch": 0.5241251610133104, "grad_norm": 0.302734375, "learning_rate": 0.0011065440457886926, "loss": 1.9461, "step": 19531 }, { "epoch": 0.5241519965650494, "grad_norm": 0.3046875, "learning_rate": 0.0011065289834276922, "loss": 1.9257, "step": 19532 }, { "epoch": 0.5241788321167883, "grad_norm": 0.291015625, "learning_rate": 0.0011065139199555162, "loss": 1.9088, "step": 19533 }, { "epoch": 0.5242056676685273, "grad_norm": 0.306640625, "learning_rate": 0.001106498855372198, "loss": 1.9609, "step": 19534 }, { "epoch": 0.5242325032202662, "grad_norm": 0.30078125, "learning_rate": 0.0011064837896777701, "loss": 1.9575, "step": 19535 }, { "epoch": 0.5242593387720051, "grad_norm": 0.310546875, "learning_rate": 0.0011064687228722657, "loss": 1.8857, "step": 19536 }, { "epoch": 0.5242861743237441, "grad_norm": 0.294921875, "learning_rate": 0.0011064536549557182, "loss": 1.8982, "step": 19537 }, { "epoch": 0.524313009875483, "grad_norm": 0.294921875, "learning_rate": 0.0011064385859281601, "loss": 1.919, "step": 19538 }, { "epoch": 0.524339845427222, "grad_norm": 0.302734375, "learning_rate": 0.001106423515789625, "loss": 1.9299, "step": 19539 }, { "epoch": 0.5243666809789609, "grad_norm": 0.318359375, "learning_rate": 0.0011064084445401453, "loss": 2.0628, "step": 19540 }, { "epoch": 0.5243935165306999, "grad_norm": 0.29296875, "learning_rate": 0.001106393372179755, "loss": 1.9233, "step": 19541 }, { "epoch": 0.5244203520824389, "grad_norm": 0.302734375, "learning_rate": 0.0011063782987084862, "loss": 1.9699, "step": 19542 }, { "epoch": 0.5244471876341777, "grad_norm": 0.291015625, "learning_rate": 0.0011063632241263726, "loss": 1.9754, "step": 19543 }, { "epoch": 0.5244740231859167, "grad_norm": 0.29296875, "learning_rate": 0.0011063481484334468, "loss": 1.932, "step": 19544 }, { "epoch": 0.5245008587376556, "grad_norm": 0.29296875, "learning_rate": 0.0011063330716297424, "loss": 1.8702, "step": 19545 }, { "epoch": 0.5245276942893946, "grad_norm": 0.291015625, "learning_rate": 0.0011063179937152922, "loss": 1.876, "step": 19546 }, { "epoch": 0.5245545298411335, "grad_norm": 0.296875, "learning_rate": 0.0011063029146901292, "loss": 1.9202, "step": 19547 }, { "epoch": 0.5245813653928725, "grad_norm": 0.294921875, "learning_rate": 0.0011062878345542866, "loss": 1.9238, "step": 19548 }, { "epoch": 0.5246082009446115, "grad_norm": 0.291015625, "learning_rate": 0.0011062727533077974, "loss": 1.8674, "step": 19549 }, { "epoch": 0.5246350364963503, "grad_norm": 0.296875, "learning_rate": 0.0011062576709506948, "loss": 1.9043, "step": 19550 }, { "epoch": 0.5246618720480893, "grad_norm": 0.30078125, "learning_rate": 0.0011062425874830118, "loss": 1.885, "step": 19551 }, { "epoch": 0.5246887075998282, "grad_norm": 0.296875, "learning_rate": 0.0011062275029047814, "loss": 1.8924, "step": 19552 }, { "epoch": 0.5247155431515672, "grad_norm": 0.296875, "learning_rate": 0.0011062124172160368, "loss": 1.9583, "step": 19553 }, { "epoch": 0.5247423787033061, "grad_norm": 0.296875, "learning_rate": 0.0011061973304168113, "loss": 1.9144, "step": 19554 }, { "epoch": 0.5247692142550451, "grad_norm": 0.2890625, "learning_rate": 0.0011061822425071375, "loss": 1.8501, "step": 19555 }, { "epoch": 0.5247960498067841, "grad_norm": 0.30078125, "learning_rate": 0.001106167153487049, "loss": 1.9236, "step": 19556 }, { "epoch": 0.524822885358523, "grad_norm": 0.296875, "learning_rate": 0.0011061520633565785, "loss": 1.8963, "step": 19557 }, { "epoch": 0.5248497209102619, "grad_norm": 0.302734375, "learning_rate": 0.0011061369721157594, "loss": 1.9068, "step": 19558 }, { "epoch": 0.5248765564620008, "grad_norm": 0.302734375, "learning_rate": 0.0011061218797646247, "loss": 1.9312, "step": 19559 }, { "epoch": 0.5249033920137398, "grad_norm": 0.298828125, "learning_rate": 0.0011061067863032074, "loss": 1.8709, "step": 19560 }, { "epoch": 0.5249302275654787, "grad_norm": 0.298828125, "learning_rate": 0.0011060916917315408, "loss": 1.9401, "step": 19561 }, { "epoch": 0.5249570631172177, "grad_norm": 0.310546875, "learning_rate": 0.0011060765960496579, "loss": 2.0038, "step": 19562 }, { "epoch": 0.5249838986689567, "grad_norm": 0.306640625, "learning_rate": 0.0011060614992575918, "loss": 1.9964, "step": 19563 }, { "epoch": 0.5250107342206956, "grad_norm": 0.294921875, "learning_rate": 0.0011060464013553757, "loss": 1.9002, "step": 19564 }, { "epoch": 0.5250375697724345, "grad_norm": 0.3046875, "learning_rate": 0.0011060313023430427, "loss": 1.9005, "step": 19565 }, { "epoch": 0.5250644053241734, "grad_norm": 0.306640625, "learning_rate": 0.001106016202220626, "loss": 1.925, "step": 19566 }, { "epoch": 0.5250912408759124, "grad_norm": 0.294921875, "learning_rate": 0.0011060011009881583, "loss": 1.7911, "step": 19567 }, { "epoch": 0.5251180764276514, "grad_norm": 0.322265625, "learning_rate": 0.0011059859986456732, "loss": 2.0151, "step": 19568 }, { "epoch": 0.5251449119793903, "grad_norm": 0.29296875, "learning_rate": 0.0011059708951932038, "loss": 1.8621, "step": 19569 }, { "epoch": 0.5251717475311293, "grad_norm": 0.298828125, "learning_rate": 0.001105955790630783, "loss": 1.9494, "step": 19570 }, { "epoch": 0.5251985830828682, "grad_norm": 0.302734375, "learning_rate": 0.001105940684958444, "loss": 1.9473, "step": 19571 }, { "epoch": 0.5252254186346071, "grad_norm": 0.296875, "learning_rate": 0.0011059255781762202, "loss": 1.9112, "step": 19572 }, { "epoch": 0.525252254186346, "grad_norm": 0.302734375, "learning_rate": 0.0011059104702841442, "loss": 1.9824, "step": 19573 }, { "epoch": 0.525279089738085, "grad_norm": 0.298828125, "learning_rate": 0.0011058953612822497, "loss": 1.9114, "step": 19574 }, { "epoch": 0.525305925289824, "grad_norm": 0.30859375, "learning_rate": 0.0011058802511705699, "loss": 1.8736, "step": 19575 }, { "epoch": 0.5253327608415629, "grad_norm": 0.306640625, "learning_rate": 0.001105865139949137, "loss": 1.8734, "step": 19576 }, { "epoch": 0.5253595963933019, "grad_norm": 0.3046875, "learning_rate": 0.0011058500276179854, "loss": 1.9184, "step": 19577 }, { "epoch": 0.5253864319450408, "grad_norm": 0.30859375, "learning_rate": 0.0011058349141771474, "loss": 1.8354, "step": 19578 }, { "epoch": 0.5254132674967797, "grad_norm": 0.318359375, "learning_rate": 0.0011058197996266564, "loss": 2.0112, "step": 19579 }, { "epoch": 0.5254401030485186, "grad_norm": 0.296875, "learning_rate": 0.0011058046839665457, "loss": 1.8659, "step": 19580 }, { "epoch": 0.5254669386002576, "grad_norm": 0.3046875, "learning_rate": 0.0011057895671968481, "loss": 1.8413, "step": 19581 }, { "epoch": 0.5254937741519966, "grad_norm": 0.30078125, "learning_rate": 0.0011057744493175971, "loss": 1.8729, "step": 19582 }, { "epoch": 0.5255206097037355, "grad_norm": 0.310546875, "learning_rate": 0.001105759330328826, "loss": 1.866, "step": 19583 }, { "epoch": 0.5255474452554745, "grad_norm": 0.30859375, "learning_rate": 0.0011057442102305674, "loss": 1.9414, "step": 19584 }, { "epoch": 0.5255742808072134, "grad_norm": 0.30859375, "learning_rate": 0.001105729089022855, "loss": 1.9451, "step": 19585 }, { "epoch": 0.5256011163589523, "grad_norm": 0.310546875, "learning_rate": 0.0011057139667057217, "loss": 1.821, "step": 19586 }, { "epoch": 0.5256279519106913, "grad_norm": 0.306640625, "learning_rate": 0.0011056988432792006, "loss": 1.8513, "step": 19587 }, { "epoch": 0.5256547874624302, "grad_norm": 0.30078125, "learning_rate": 0.0011056837187433252, "loss": 1.8399, "step": 19588 }, { "epoch": 0.5256816230141692, "grad_norm": 0.294921875, "learning_rate": 0.0011056685930981283, "loss": 1.8937, "step": 19589 }, { "epoch": 0.5257084585659081, "grad_norm": 0.298828125, "learning_rate": 0.0011056534663436435, "loss": 1.8756, "step": 19590 }, { "epoch": 0.5257352941176471, "grad_norm": 0.314453125, "learning_rate": 0.0011056383384799038, "loss": 1.9639, "step": 19591 }, { "epoch": 0.525762129669386, "grad_norm": 0.306640625, "learning_rate": 0.0011056232095069422, "loss": 1.862, "step": 19592 }, { "epoch": 0.525788965221125, "grad_norm": 0.302734375, "learning_rate": 0.001105608079424792, "loss": 1.9096, "step": 19593 }, { "epoch": 0.5258158007728639, "grad_norm": 0.306640625, "learning_rate": 0.0011055929482334866, "loss": 1.898, "step": 19594 }, { "epoch": 0.5258426363246028, "grad_norm": 0.3125, "learning_rate": 0.0011055778159330587, "loss": 1.8253, "step": 19595 }, { "epoch": 0.5258694718763418, "grad_norm": 0.296875, "learning_rate": 0.001105562682523542, "loss": 1.7721, "step": 19596 }, { "epoch": 0.5258963074280807, "grad_norm": 0.30859375, "learning_rate": 0.0011055475480049697, "loss": 1.8539, "step": 19597 }, { "epoch": 0.5259231429798197, "grad_norm": 0.3125, "learning_rate": 0.0011055324123773744, "loss": 1.9495, "step": 19598 }, { "epoch": 0.5259499785315586, "grad_norm": 0.306640625, "learning_rate": 0.00110551727564079, "loss": 1.84, "step": 19599 }, { "epoch": 0.5259768140832976, "grad_norm": 0.31640625, "learning_rate": 0.0011055021377952493, "loss": 1.8262, "step": 19600 }, { "epoch": 0.5260036496350365, "grad_norm": 0.30078125, "learning_rate": 0.0011054869988407858, "loss": 1.8264, "step": 19601 }, { "epoch": 0.5260304851867754, "grad_norm": 0.30859375, "learning_rate": 0.0011054718587774326, "loss": 1.8337, "step": 19602 }, { "epoch": 0.5260573207385144, "grad_norm": 0.306640625, "learning_rate": 0.0011054567176052227, "loss": 1.8143, "step": 19603 }, { "epoch": 0.5260841562902533, "grad_norm": 0.32421875, "learning_rate": 0.0011054415753241895, "loss": 1.9483, "step": 19604 }, { "epoch": 0.5261109918419923, "grad_norm": 0.314453125, "learning_rate": 0.0011054264319343662, "loss": 1.934, "step": 19605 }, { "epoch": 0.5261378273937312, "grad_norm": 0.388671875, "learning_rate": 0.001105411287435786, "loss": 2.0452, "step": 19606 }, { "epoch": 0.5261646629454702, "grad_norm": 0.349609375, "learning_rate": 0.001105396141828482, "loss": 2.0516, "step": 19607 }, { "epoch": 0.5261914984972091, "grad_norm": 0.328125, "learning_rate": 0.0011053809951124876, "loss": 2.0879, "step": 19608 }, { "epoch": 0.526218334048948, "grad_norm": 0.337890625, "learning_rate": 0.001105365847287836, "loss": 2.1677, "step": 19609 }, { "epoch": 0.526245169600687, "grad_norm": 0.33203125, "learning_rate": 0.0011053506983545605, "loss": 2.1548, "step": 19610 }, { "epoch": 0.5262720051524259, "grad_norm": 0.326171875, "learning_rate": 0.0011053355483126941, "loss": 2.0526, "step": 19611 }, { "epoch": 0.5262988407041649, "grad_norm": 0.32421875, "learning_rate": 0.0011053203971622704, "loss": 2.1384, "step": 19612 }, { "epoch": 0.5263256762559039, "grad_norm": 0.330078125, "learning_rate": 0.0011053052449033222, "loss": 2.0943, "step": 19613 }, { "epoch": 0.5263525118076428, "grad_norm": 0.30078125, "learning_rate": 0.0011052900915358831, "loss": 2.1172, "step": 19614 }, { "epoch": 0.5263793473593817, "grad_norm": 0.31640625, "learning_rate": 0.001105274937059986, "loss": 2.0802, "step": 19615 }, { "epoch": 0.5264061829111206, "grad_norm": 0.3046875, "learning_rate": 0.0011052597814756647, "loss": 2.0172, "step": 19616 }, { "epoch": 0.5264330184628596, "grad_norm": 0.30859375, "learning_rate": 0.0011052446247829517, "loss": 2.1451, "step": 19617 }, { "epoch": 0.5264598540145985, "grad_norm": 0.3125, "learning_rate": 0.001105229466981881, "loss": 2.1155, "step": 19618 }, { "epoch": 0.5264866895663375, "grad_norm": 0.302734375, "learning_rate": 0.0011052143080724853, "loss": 2.0684, "step": 19619 }, { "epoch": 0.5265135251180765, "grad_norm": 0.3046875, "learning_rate": 0.0011051991480547979, "loss": 2.0115, "step": 19620 }, { "epoch": 0.5265403606698154, "grad_norm": 0.310546875, "learning_rate": 0.0011051839869288525, "loss": 2.178, "step": 19621 }, { "epoch": 0.5265671962215543, "grad_norm": 0.302734375, "learning_rate": 0.0011051688246946818, "loss": 2.0409, "step": 19622 }, { "epoch": 0.5265940317732932, "grad_norm": 0.302734375, "learning_rate": 0.0011051536613523195, "loss": 2.0547, "step": 19623 }, { "epoch": 0.5266208673250322, "grad_norm": 0.306640625, "learning_rate": 0.0011051384969017987, "loss": 2.0606, "step": 19624 }, { "epoch": 0.5266477028767711, "grad_norm": 0.29296875, "learning_rate": 0.0011051233313431525, "loss": 1.9705, "step": 19625 }, { "epoch": 0.5266745384285101, "grad_norm": 0.291015625, "learning_rate": 0.0011051081646764143, "loss": 2.011, "step": 19626 }, { "epoch": 0.5267013739802491, "grad_norm": 0.3046875, "learning_rate": 0.0011050929969016175, "loss": 2.0562, "step": 19627 }, { "epoch": 0.526728209531988, "grad_norm": 0.294921875, "learning_rate": 0.0011050778280187951, "loss": 1.9903, "step": 19628 }, { "epoch": 0.526755045083727, "grad_norm": 0.296875, "learning_rate": 0.0011050626580279808, "loss": 1.923, "step": 19629 }, { "epoch": 0.5267818806354658, "grad_norm": 0.302734375, "learning_rate": 0.0011050474869292075, "loss": 1.9787, "step": 19630 }, { "epoch": 0.5268087161872048, "grad_norm": 0.294921875, "learning_rate": 0.0011050323147225086, "loss": 2.0018, "step": 19631 }, { "epoch": 0.5268355517389438, "grad_norm": 0.314453125, "learning_rate": 0.0011050171414079175, "loss": 2.06, "step": 19632 }, { "epoch": 0.5268623872906827, "grad_norm": 0.30078125, "learning_rate": 0.0011050019669854674, "loss": 2.0441, "step": 19633 }, { "epoch": 0.5268892228424217, "grad_norm": 0.29296875, "learning_rate": 0.0011049867914551912, "loss": 1.9444, "step": 19634 }, { "epoch": 0.5269160583941606, "grad_norm": 0.30078125, "learning_rate": 0.0011049716148171227, "loss": 1.9812, "step": 19635 }, { "epoch": 0.5269428939458995, "grad_norm": 0.3046875, "learning_rate": 0.0011049564370712952, "loss": 2.0146, "step": 19636 }, { "epoch": 0.5269697294976384, "grad_norm": 0.3125, "learning_rate": 0.0011049412582177418, "loss": 2.0687, "step": 19637 }, { "epoch": 0.5269965650493774, "grad_norm": 0.30078125, "learning_rate": 0.0011049260782564957, "loss": 2.0434, "step": 19638 }, { "epoch": 0.5270234006011164, "grad_norm": 0.302734375, "learning_rate": 0.0011049108971875905, "loss": 2.0089, "step": 19639 }, { "epoch": 0.5270502361528553, "grad_norm": 0.296875, "learning_rate": 0.001104895715011059, "loss": 1.9461, "step": 19640 }, { "epoch": 0.5270770717045943, "grad_norm": 0.296875, "learning_rate": 0.0011048805317269352, "loss": 1.9923, "step": 19641 }, { "epoch": 0.5271039072563332, "grad_norm": 0.31640625, "learning_rate": 0.0011048653473352522, "loss": 1.9782, "step": 19642 }, { "epoch": 0.5271307428080722, "grad_norm": 0.314453125, "learning_rate": 0.0011048501618360427, "loss": 2.0392, "step": 19643 }, { "epoch": 0.527157578359811, "grad_norm": 0.30078125, "learning_rate": 0.0011048349752293407, "loss": 1.9926, "step": 19644 }, { "epoch": 0.52718441391155, "grad_norm": 0.30078125, "learning_rate": 0.0011048197875151795, "loss": 1.9955, "step": 19645 }, { "epoch": 0.527211249463289, "grad_norm": 0.302734375, "learning_rate": 0.0011048045986935917, "loss": 2.0116, "step": 19646 }, { "epoch": 0.5272380850150279, "grad_norm": 0.298828125, "learning_rate": 0.0011047894087646115, "loss": 1.915, "step": 19647 }, { "epoch": 0.5272649205667669, "grad_norm": 0.287109375, "learning_rate": 0.0011047742177282718, "loss": 1.9072, "step": 19648 }, { "epoch": 0.5272917561185058, "grad_norm": 0.2890625, "learning_rate": 0.0011047590255846061, "loss": 1.9445, "step": 19649 }, { "epoch": 0.5273185916702448, "grad_norm": 0.29296875, "learning_rate": 0.0011047438323336474, "loss": 1.9177, "step": 19650 }, { "epoch": 0.5273454272219836, "grad_norm": 0.30078125, "learning_rate": 0.0011047286379754291, "loss": 2.0551, "step": 19651 }, { "epoch": 0.5273722627737226, "grad_norm": 0.291015625, "learning_rate": 0.0011047134425099848, "loss": 1.9652, "step": 19652 }, { "epoch": 0.5273990983254616, "grad_norm": 0.30078125, "learning_rate": 0.0011046982459373476, "loss": 1.9528, "step": 19653 }, { "epoch": 0.5274259338772005, "grad_norm": 0.3125, "learning_rate": 0.0011046830482575511, "loss": 1.9964, "step": 19654 }, { "epoch": 0.5274527694289395, "grad_norm": 0.296875, "learning_rate": 0.0011046678494706285, "loss": 1.9642, "step": 19655 }, { "epoch": 0.5274796049806784, "grad_norm": 0.306640625, "learning_rate": 0.0011046526495766132, "loss": 2.0394, "step": 19656 }, { "epoch": 0.5275064405324174, "grad_norm": 0.298828125, "learning_rate": 0.0011046374485755382, "loss": 1.9244, "step": 19657 }, { "epoch": 0.5275332760841563, "grad_norm": 0.3046875, "learning_rate": 0.0011046222464674374, "loss": 1.9958, "step": 19658 }, { "epoch": 0.5275601116358952, "grad_norm": 0.294921875, "learning_rate": 0.0011046070432523434, "loss": 1.8448, "step": 19659 }, { "epoch": 0.5275869471876342, "grad_norm": 0.291015625, "learning_rate": 0.0011045918389302903, "loss": 1.9397, "step": 19660 }, { "epoch": 0.5276137827393731, "grad_norm": 0.302734375, "learning_rate": 0.0011045766335013112, "loss": 2.0043, "step": 19661 }, { "epoch": 0.5276406182911121, "grad_norm": 0.296875, "learning_rate": 0.0011045614269654391, "loss": 1.9481, "step": 19662 }, { "epoch": 0.527667453842851, "grad_norm": 0.2890625, "learning_rate": 0.001104546219322708, "loss": 1.8519, "step": 19663 }, { "epoch": 0.52769428939459, "grad_norm": 0.294921875, "learning_rate": 0.0011045310105731508, "loss": 1.9236, "step": 19664 }, { "epoch": 0.527721124946329, "grad_norm": 0.30859375, "learning_rate": 0.0011045158007168012, "loss": 2.0075, "step": 19665 }, { "epoch": 0.5277479604980678, "grad_norm": 0.296875, "learning_rate": 0.001104500589753692, "loss": 1.9333, "step": 19666 }, { "epoch": 0.5277747960498068, "grad_norm": 0.30078125, "learning_rate": 0.0011044853776838572, "loss": 1.9517, "step": 19667 }, { "epoch": 0.5278016316015457, "grad_norm": 0.3046875, "learning_rate": 0.0011044701645073298, "loss": 1.9786, "step": 19668 }, { "epoch": 0.5278284671532847, "grad_norm": 0.294921875, "learning_rate": 0.0011044549502241434, "loss": 1.9031, "step": 19669 }, { "epoch": 0.5278553027050236, "grad_norm": 0.291015625, "learning_rate": 0.0011044397348343312, "loss": 1.8694, "step": 19670 }, { "epoch": 0.5278821382567626, "grad_norm": 0.294921875, "learning_rate": 0.0011044245183379267, "loss": 1.9862, "step": 19671 }, { "epoch": 0.5279089738085015, "grad_norm": 0.294921875, "learning_rate": 0.001104409300734963, "loss": 1.8685, "step": 19672 }, { "epoch": 0.5279358093602404, "grad_norm": 0.294921875, "learning_rate": 0.0011043940820254737, "loss": 1.8811, "step": 19673 }, { "epoch": 0.5279626449119794, "grad_norm": 0.30078125, "learning_rate": 0.0011043788622094924, "loss": 1.9395, "step": 19674 }, { "epoch": 0.5279894804637183, "grad_norm": 0.29296875, "learning_rate": 0.0011043636412870522, "loss": 1.8872, "step": 19675 }, { "epoch": 0.5280163160154573, "grad_norm": 0.30859375, "learning_rate": 0.0011043484192581863, "loss": 2.0411, "step": 19676 }, { "epoch": 0.5280431515671962, "grad_norm": 0.30078125, "learning_rate": 0.0011043331961229288, "loss": 1.8959, "step": 19677 }, { "epoch": 0.5280699871189352, "grad_norm": 0.298828125, "learning_rate": 0.0011043179718813122, "loss": 1.8973, "step": 19678 }, { "epoch": 0.5280968226706741, "grad_norm": 0.31640625, "learning_rate": 0.0011043027465333706, "loss": 1.9653, "step": 19679 }, { "epoch": 0.528123658222413, "grad_norm": 0.298828125, "learning_rate": 0.0011042875200791371, "loss": 2.0215, "step": 19680 }, { "epoch": 0.528150493774152, "grad_norm": 0.291015625, "learning_rate": 0.001104272292518645, "loss": 1.8594, "step": 19681 }, { "epoch": 0.5281773293258909, "grad_norm": 0.30078125, "learning_rate": 0.001104257063851928, "loss": 1.9698, "step": 19682 }, { "epoch": 0.5282041648776299, "grad_norm": 0.302734375, "learning_rate": 0.0011042418340790193, "loss": 1.9947, "step": 19683 }, { "epoch": 0.5282310004293689, "grad_norm": 0.294921875, "learning_rate": 0.0011042266031999525, "loss": 1.8944, "step": 19684 }, { "epoch": 0.5282578359811078, "grad_norm": 0.30078125, "learning_rate": 0.0011042113712147609, "loss": 1.9292, "step": 19685 }, { "epoch": 0.5282846715328468, "grad_norm": 0.30859375, "learning_rate": 0.0011041961381234777, "loss": 1.9188, "step": 19686 }, { "epoch": 0.5283115070845856, "grad_norm": 0.291015625, "learning_rate": 0.0011041809039261366, "loss": 1.8078, "step": 19687 }, { "epoch": 0.5283383426363246, "grad_norm": 0.306640625, "learning_rate": 0.001104165668622771, "loss": 1.9401, "step": 19688 }, { "epoch": 0.5283651781880635, "grad_norm": 0.2890625, "learning_rate": 0.001104150432213414, "loss": 1.8212, "step": 19689 }, { "epoch": 0.5283920137398025, "grad_norm": 0.302734375, "learning_rate": 0.0011041351946980995, "loss": 1.8834, "step": 19690 }, { "epoch": 0.5284188492915415, "grad_norm": 0.302734375, "learning_rate": 0.0011041199560768605, "loss": 1.9806, "step": 19691 }, { "epoch": 0.5284456848432804, "grad_norm": 0.29296875, "learning_rate": 0.001104104716349731, "loss": 1.8595, "step": 19692 }, { "epoch": 0.5284725203950194, "grad_norm": 0.3046875, "learning_rate": 0.0011040894755167435, "loss": 1.9073, "step": 19693 }, { "epoch": 0.5284993559467582, "grad_norm": 0.30859375, "learning_rate": 0.0011040742335779324, "loss": 1.937, "step": 19694 }, { "epoch": 0.5285261914984972, "grad_norm": 0.296875, "learning_rate": 0.0011040589905333307, "loss": 1.8389, "step": 19695 }, { "epoch": 0.5285530270502361, "grad_norm": 0.3046875, "learning_rate": 0.001104043746382972, "loss": 1.8059, "step": 19696 }, { "epoch": 0.5285798626019751, "grad_norm": 0.30078125, "learning_rate": 0.0011040285011268893, "loss": 1.9246, "step": 19697 }, { "epoch": 0.5286066981537141, "grad_norm": 0.302734375, "learning_rate": 0.0011040132547651166, "loss": 2.0528, "step": 19698 }, { "epoch": 0.528633533705453, "grad_norm": 0.29296875, "learning_rate": 0.001103998007297687, "loss": 1.837, "step": 19699 }, { "epoch": 0.528660369257192, "grad_norm": 0.302734375, "learning_rate": 0.001103982758724634, "loss": 1.9747, "step": 19700 }, { "epoch": 0.5286872048089308, "grad_norm": 0.296875, "learning_rate": 0.001103967509045991, "loss": 1.9066, "step": 19701 }, { "epoch": 0.5287140403606698, "grad_norm": 0.30859375, "learning_rate": 0.0011039522582617918, "loss": 1.9754, "step": 19702 }, { "epoch": 0.5287408759124088, "grad_norm": 0.30078125, "learning_rate": 0.0011039370063720697, "loss": 1.8605, "step": 19703 }, { "epoch": 0.5287677114641477, "grad_norm": 0.30859375, "learning_rate": 0.0011039217533768578, "loss": 1.8729, "step": 19704 }, { "epoch": 0.5287945470158867, "grad_norm": 0.30859375, "learning_rate": 0.0011039064992761898, "loss": 1.9483, "step": 19705 }, { "epoch": 0.5288213825676256, "grad_norm": 0.306640625, "learning_rate": 0.0011038912440700993, "loss": 1.8834, "step": 19706 }, { "epoch": 0.5288482181193646, "grad_norm": 0.310546875, "learning_rate": 0.0011038759877586197, "loss": 1.9619, "step": 19707 }, { "epoch": 0.5288750536711034, "grad_norm": 0.294921875, "learning_rate": 0.0011038607303417844, "loss": 1.8778, "step": 19708 }, { "epoch": 0.5289018892228424, "grad_norm": 0.30078125, "learning_rate": 0.0011038454718196268, "loss": 1.8652, "step": 19709 }, { "epoch": 0.5289287247745814, "grad_norm": 0.3046875, "learning_rate": 0.0011038302121921806, "loss": 1.8264, "step": 19710 }, { "epoch": 0.5289555603263203, "grad_norm": 0.306640625, "learning_rate": 0.0011038149514594792, "loss": 1.9312, "step": 19711 }, { "epoch": 0.5289823958780593, "grad_norm": 0.30078125, "learning_rate": 0.0011037996896215558, "loss": 1.8578, "step": 19712 }, { "epoch": 0.5290092314297982, "grad_norm": 0.302734375, "learning_rate": 0.0011037844266784443, "loss": 1.8834, "step": 19713 }, { "epoch": 0.5290360669815372, "grad_norm": 0.302734375, "learning_rate": 0.0011037691626301779, "loss": 1.81, "step": 19714 }, { "epoch": 0.529062902533276, "grad_norm": 0.310546875, "learning_rate": 0.0011037538974767902, "loss": 1.8503, "step": 19715 }, { "epoch": 0.529089738085015, "grad_norm": 0.3125, "learning_rate": 0.0011037386312183145, "loss": 1.9002, "step": 19716 }, { "epoch": 0.529116573636754, "grad_norm": 0.298828125, "learning_rate": 0.0011037233638547847, "loss": 1.8114, "step": 19717 }, { "epoch": 0.5291434091884929, "grad_norm": 0.314453125, "learning_rate": 0.001103708095386234, "loss": 1.8185, "step": 19718 }, { "epoch": 0.5291702447402319, "grad_norm": 0.2890625, "learning_rate": 0.0011036928258126957, "loss": 1.7488, "step": 19719 }, { "epoch": 0.5291970802919708, "grad_norm": 0.3046875, "learning_rate": 0.0011036775551342035, "loss": 1.922, "step": 19720 }, { "epoch": 0.5292239158437098, "grad_norm": 0.302734375, "learning_rate": 0.0011036622833507912, "loss": 1.8351, "step": 19721 }, { "epoch": 0.5292507513954486, "grad_norm": 0.294921875, "learning_rate": 0.001103647010462492, "loss": 1.7668, "step": 19722 }, { "epoch": 0.5292775869471876, "grad_norm": 0.296875, "learning_rate": 0.0011036317364693394, "loss": 1.758, "step": 19723 }, { "epoch": 0.5293044224989266, "grad_norm": 0.31640625, "learning_rate": 0.0011036164613713666, "loss": 1.919, "step": 19724 }, { "epoch": 0.5293312580506655, "grad_norm": 0.302734375, "learning_rate": 0.001103601185168608, "loss": 1.8444, "step": 19725 }, { "epoch": 0.5293580936024045, "grad_norm": 0.314453125, "learning_rate": 0.0011035859078610962, "loss": 1.9071, "step": 19726 }, { "epoch": 0.5293849291541434, "grad_norm": 0.31640625, "learning_rate": 0.0011035706294488653, "loss": 1.887, "step": 19727 }, { "epoch": 0.5294117647058824, "grad_norm": 0.359375, "learning_rate": 0.0011035553499319484, "loss": 2.1456, "step": 19728 }, { "epoch": 0.5294386002576214, "grad_norm": 0.353515625, "learning_rate": 0.0011035400693103793, "loss": 2.1238, "step": 19729 }, { "epoch": 0.5294654358093602, "grad_norm": 0.33203125, "learning_rate": 0.0011035247875841913, "loss": 2.1063, "step": 19730 }, { "epoch": 0.5294922713610992, "grad_norm": 0.330078125, "learning_rate": 0.0011035095047534182, "loss": 2.1151, "step": 19731 }, { "epoch": 0.5295191069128381, "grad_norm": 0.330078125, "learning_rate": 0.0011034942208180934, "loss": 2.0698, "step": 19732 }, { "epoch": 0.5295459424645771, "grad_norm": 0.322265625, "learning_rate": 0.0011034789357782504, "loss": 2.0561, "step": 19733 }, { "epoch": 0.529572778016316, "grad_norm": 0.3125, "learning_rate": 0.0011034636496339229, "loss": 2.1635, "step": 19734 }, { "epoch": 0.529599613568055, "grad_norm": 0.302734375, "learning_rate": 0.001103448362385144, "loss": 2.066, "step": 19735 }, { "epoch": 0.529626449119794, "grad_norm": 0.318359375, "learning_rate": 0.0011034330740319475, "loss": 2.1175, "step": 19736 }, { "epoch": 0.5296532846715328, "grad_norm": 0.30078125, "learning_rate": 0.001103417784574367, "loss": 2.044, "step": 19737 }, { "epoch": 0.5296801202232718, "grad_norm": 0.294921875, "learning_rate": 0.001103402494012436, "loss": 2.1116, "step": 19738 }, { "epoch": 0.5297069557750107, "grad_norm": 0.318359375, "learning_rate": 0.0011033872023461884, "loss": 2.0882, "step": 19739 }, { "epoch": 0.5297337913267497, "grad_norm": 0.306640625, "learning_rate": 0.0011033719095756567, "loss": 2.0525, "step": 19740 }, { "epoch": 0.5297606268784886, "grad_norm": 0.3046875, "learning_rate": 0.0011033566157008755, "loss": 2.0462, "step": 19741 }, { "epoch": 0.5297874624302276, "grad_norm": 0.296875, "learning_rate": 0.001103341320721878, "loss": 1.9864, "step": 19742 }, { "epoch": 0.5298142979819666, "grad_norm": 0.30078125, "learning_rate": 0.0011033260246386977, "loss": 2.0883, "step": 19743 }, { "epoch": 0.5298411335337054, "grad_norm": 0.30078125, "learning_rate": 0.001103310727451368, "loss": 2.0544, "step": 19744 }, { "epoch": 0.5298679690854444, "grad_norm": 0.306640625, "learning_rate": 0.0011032954291599229, "loss": 1.9622, "step": 19745 }, { "epoch": 0.5298948046371833, "grad_norm": 0.291015625, "learning_rate": 0.0011032801297643955, "loss": 2.0363, "step": 19746 }, { "epoch": 0.5299216401889223, "grad_norm": 0.30859375, "learning_rate": 0.0011032648292648199, "loss": 2.0477, "step": 19747 }, { "epoch": 0.5299484757406612, "grad_norm": 0.3046875, "learning_rate": 0.0011032495276612289, "loss": 2.0578, "step": 19748 }, { "epoch": 0.5299753112924002, "grad_norm": 0.3046875, "learning_rate": 0.0011032342249536566, "loss": 2.0413, "step": 19749 }, { "epoch": 0.5300021468441392, "grad_norm": 0.306640625, "learning_rate": 0.0011032189211421365, "loss": 2.077, "step": 19750 }, { "epoch": 0.530028982395878, "grad_norm": 0.30078125, "learning_rate": 0.0011032036162267023, "loss": 1.9825, "step": 19751 }, { "epoch": 0.530055817947617, "grad_norm": 0.3046875, "learning_rate": 0.0011031883102073873, "loss": 2.0602, "step": 19752 }, { "epoch": 0.5300826534993559, "grad_norm": 0.306640625, "learning_rate": 0.0011031730030842252, "loss": 2.0564, "step": 19753 }, { "epoch": 0.5301094890510949, "grad_norm": 0.302734375, "learning_rate": 0.0011031576948572494, "loss": 2.009, "step": 19754 }, { "epoch": 0.5301363246028339, "grad_norm": 0.287109375, "learning_rate": 0.0011031423855264939, "loss": 2.0077, "step": 19755 }, { "epoch": 0.5301631601545728, "grad_norm": 0.298828125, "learning_rate": 0.0011031270750919918, "loss": 2.0311, "step": 19756 }, { "epoch": 0.5301899957063118, "grad_norm": 0.298828125, "learning_rate": 0.0011031117635537772, "loss": 2.0007, "step": 19757 }, { "epoch": 0.5302168312580506, "grad_norm": 0.28515625, "learning_rate": 0.0011030964509118832, "loss": 1.9107, "step": 19758 }, { "epoch": 0.5302436668097896, "grad_norm": 0.291015625, "learning_rate": 0.0011030811371663437, "loss": 1.9656, "step": 19759 }, { "epoch": 0.5302705023615285, "grad_norm": 0.296875, "learning_rate": 0.001103065822317192, "loss": 2.0462, "step": 19760 }, { "epoch": 0.5302973379132675, "grad_norm": 0.287109375, "learning_rate": 0.001103050506364462, "loss": 1.8682, "step": 19761 }, { "epoch": 0.5303241734650065, "grad_norm": 0.294921875, "learning_rate": 0.0011030351893081872, "loss": 2.0404, "step": 19762 }, { "epoch": 0.5303510090167454, "grad_norm": 0.294921875, "learning_rate": 0.0011030198711484013, "loss": 1.9531, "step": 19763 }, { "epoch": 0.5303778445684844, "grad_norm": 0.298828125, "learning_rate": 0.0011030045518851377, "loss": 2.02, "step": 19764 }, { "epoch": 0.5304046801202232, "grad_norm": 0.298828125, "learning_rate": 0.00110298923151843, "loss": 1.9587, "step": 19765 }, { "epoch": 0.5304315156719622, "grad_norm": 0.302734375, "learning_rate": 0.001102973910048312, "loss": 2.0915, "step": 19766 }, { "epoch": 0.5304583512237011, "grad_norm": 0.296875, "learning_rate": 0.0011029585874748173, "loss": 1.9931, "step": 19767 }, { "epoch": 0.5304851867754401, "grad_norm": 0.30078125, "learning_rate": 0.0011029432637979794, "loss": 2.0258, "step": 19768 }, { "epoch": 0.5305120223271791, "grad_norm": 0.30078125, "learning_rate": 0.0011029279390178319, "loss": 1.968, "step": 19769 }, { "epoch": 0.530538857878918, "grad_norm": 0.296875, "learning_rate": 0.0011029126131344084, "loss": 1.999, "step": 19770 }, { "epoch": 0.530565693430657, "grad_norm": 0.2890625, "learning_rate": 0.0011028972861477427, "loss": 1.9695, "step": 19771 }, { "epoch": 0.5305925289823958, "grad_norm": 0.294921875, "learning_rate": 0.0011028819580578683, "loss": 1.9152, "step": 19772 }, { "epoch": 0.5306193645341348, "grad_norm": 0.291015625, "learning_rate": 0.0011028666288648188, "loss": 1.8841, "step": 19773 }, { "epoch": 0.5306462000858738, "grad_norm": 0.30078125, "learning_rate": 0.001102851298568628, "loss": 2.0043, "step": 19774 }, { "epoch": 0.5306730356376127, "grad_norm": 0.287109375, "learning_rate": 0.0011028359671693291, "loss": 1.8298, "step": 19775 }, { "epoch": 0.5306998711893517, "grad_norm": 0.294921875, "learning_rate": 0.0011028206346669562, "loss": 1.972, "step": 19776 }, { "epoch": 0.5307267067410906, "grad_norm": 0.30078125, "learning_rate": 0.0011028053010615428, "loss": 2.0372, "step": 19777 }, { "epoch": 0.5307535422928296, "grad_norm": 0.3046875, "learning_rate": 0.0011027899663531224, "loss": 2.0565, "step": 19778 }, { "epoch": 0.5307803778445684, "grad_norm": 0.3125, "learning_rate": 0.0011027746305417288, "loss": 2.0827, "step": 19779 }, { "epoch": 0.5308072133963074, "grad_norm": 0.296875, "learning_rate": 0.0011027592936273956, "loss": 2.0112, "step": 19780 }, { "epoch": 0.5308340489480464, "grad_norm": 0.298828125, "learning_rate": 0.0011027439556101563, "loss": 1.9373, "step": 19781 }, { "epoch": 0.5308608844997853, "grad_norm": 0.294921875, "learning_rate": 0.0011027286164900446, "loss": 2.0221, "step": 19782 }, { "epoch": 0.5308877200515243, "grad_norm": 0.3046875, "learning_rate": 0.0011027132762670944, "loss": 2.068, "step": 19783 }, { "epoch": 0.5309145556032632, "grad_norm": 0.291015625, "learning_rate": 0.0011026979349413393, "loss": 1.9681, "step": 19784 }, { "epoch": 0.5309413911550022, "grad_norm": 0.296875, "learning_rate": 0.0011026825925128127, "loss": 2.0201, "step": 19785 }, { "epoch": 0.530968226706741, "grad_norm": 0.287109375, "learning_rate": 0.0011026672489815482, "loss": 1.9396, "step": 19786 }, { "epoch": 0.53099506225848, "grad_norm": 0.30078125, "learning_rate": 0.0011026519043475797, "loss": 2.0363, "step": 19787 }, { "epoch": 0.531021897810219, "grad_norm": 0.306640625, "learning_rate": 0.001102636558610941, "loss": 2.0682, "step": 19788 }, { "epoch": 0.5310487333619579, "grad_norm": 0.2890625, "learning_rate": 0.0011026212117716656, "loss": 1.9941, "step": 19789 }, { "epoch": 0.5310755689136969, "grad_norm": 0.2890625, "learning_rate": 0.0011026058638297868, "loss": 1.8746, "step": 19790 }, { "epoch": 0.5311024044654358, "grad_norm": 0.28515625, "learning_rate": 0.0011025905147853388, "loss": 1.8625, "step": 19791 }, { "epoch": 0.5311292400171748, "grad_norm": 0.294921875, "learning_rate": 0.0011025751646383552, "loss": 1.8701, "step": 19792 }, { "epoch": 0.5311560755689136, "grad_norm": 0.298828125, "learning_rate": 0.0011025598133888694, "loss": 1.8989, "step": 19793 }, { "epoch": 0.5311829111206526, "grad_norm": 0.296875, "learning_rate": 0.0011025444610369152, "loss": 1.9172, "step": 19794 }, { "epoch": 0.5312097466723916, "grad_norm": 0.29296875, "learning_rate": 0.0011025291075825263, "loss": 1.8614, "step": 19795 }, { "epoch": 0.5312365822241305, "grad_norm": 0.298828125, "learning_rate": 0.0011025137530257364, "loss": 1.8979, "step": 19796 }, { "epoch": 0.5312634177758695, "grad_norm": 0.296875, "learning_rate": 0.0011024983973665792, "loss": 1.8756, "step": 19797 }, { "epoch": 0.5312902533276084, "grad_norm": 0.310546875, "learning_rate": 0.0011024830406050885, "loss": 1.9583, "step": 19798 }, { "epoch": 0.5313170888793474, "grad_norm": 0.3125, "learning_rate": 0.0011024676827412977, "loss": 1.958, "step": 19799 }, { "epoch": 0.5313439244310864, "grad_norm": 0.310546875, "learning_rate": 0.0011024523237752406, "loss": 1.9613, "step": 19800 }, { "epoch": 0.5313707599828252, "grad_norm": 0.298828125, "learning_rate": 0.001102436963706951, "loss": 1.9172, "step": 19801 }, { "epoch": 0.5313975955345642, "grad_norm": 0.29296875, "learning_rate": 0.0011024216025364625, "loss": 1.9028, "step": 19802 }, { "epoch": 0.5314244310863031, "grad_norm": 0.302734375, "learning_rate": 0.0011024062402638089, "loss": 1.8104, "step": 19803 }, { "epoch": 0.5314512666380421, "grad_norm": 0.302734375, "learning_rate": 0.0011023908768890234, "loss": 1.9443, "step": 19804 }, { "epoch": 0.531478102189781, "grad_norm": 0.3046875, "learning_rate": 0.0011023755124121408, "loss": 1.955, "step": 19805 }, { "epoch": 0.53150493774152, "grad_norm": 0.294921875, "learning_rate": 0.0011023601468331935, "loss": 1.9568, "step": 19806 }, { "epoch": 0.531531773293259, "grad_norm": 0.296875, "learning_rate": 0.0011023447801522162, "loss": 1.9197, "step": 19807 }, { "epoch": 0.5315586088449978, "grad_norm": 0.294921875, "learning_rate": 0.0011023294123692423, "loss": 1.8645, "step": 19808 }, { "epoch": 0.5315854443967368, "grad_norm": 0.30078125, "learning_rate": 0.0011023140434843052, "loss": 1.8458, "step": 19809 }, { "epoch": 0.5316122799484757, "grad_norm": 0.3046875, "learning_rate": 0.0011022986734974388, "loss": 1.9097, "step": 19810 }, { "epoch": 0.5316391155002147, "grad_norm": 0.29296875, "learning_rate": 0.0011022833024086772, "loss": 1.8314, "step": 19811 }, { "epoch": 0.5316659510519536, "grad_norm": 0.3125, "learning_rate": 0.0011022679302180535, "loss": 2.0188, "step": 19812 }, { "epoch": 0.5316927866036926, "grad_norm": 0.306640625, "learning_rate": 0.001102252556925602, "loss": 1.9622, "step": 19813 }, { "epoch": 0.5317196221554316, "grad_norm": 0.30859375, "learning_rate": 0.001102237182531356, "loss": 1.9446, "step": 19814 }, { "epoch": 0.5317464577071704, "grad_norm": 0.30078125, "learning_rate": 0.0011022218070353496, "loss": 1.9248, "step": 19815 }, { "epoch": 0.5317732932589094, "grad_norm": 0.3046875, "learning_rate": 0.001102206430437616, "loss": 1.844, "step": 19816 }, { "epoch": 0.5318001288106483, "grad_norm": 0.298828125, "learning_rate": 0.0011021910527381894, "loss": 1.8487, "step": 19817 }, { "epoch": 0.5318269643623873, "grad_norm": 0.298828125, "learning_rate": 0.0011021756739371035, "loss": 1.8707, "step": 19818 }, { "epoch": 0.5318537999141262, "grad_norm": 0.310546875, "learning_rate": 0.0011021602940343917, "loss": 1.8788, "step": 19819 }, { "epoch": 0.5318806354658652, "grad_norm": 0.298828125, "learning_rate": 0.001102144913030088, "loss": 1.8625, "step": 19820 }, { "epoch": 0.5319074710176042, "grad_norm": 0.3046875, "learning_rate": 0.0011021295309242261, "loss": 1.8537, "step": 19821 }, { "epoch": 0.531934306569343, "grad_norm": 0.306640625, "learning_rate": 0.0011021141477168398, "loss": 1.9057, "step": 19822 }, { "epoch": 0.531961142121082, "grad_norm": 0.30859375, "learning_rate": 0.0011020987634079629, "loss": 1.9758, "step": 19823 }, { "epoch": 0.5319879776728209, "grad_norm": 0.29296875, "learning_rate": 0.0011020833779976287, "loss": 1.8382, "step": 19824 }, { "epoch": 0.5320148132245599, "grad_norm": 0.3046875, "learning_rate": 0.0011020679914858715, "loss": 1.8847, "step": 19825 }, { "epoch": 0.5320416487762989, "grad_norm": 0.302734375, "learning_rate": 0.0011020526038727247, "loss": 1.877, "step": 19826 }, { "epoch": 0.5320684843280378, "grad_norm": 0.30078125, "learning_rate": 0.0011020372151582226, "loss": 1.8278, "step": 19827 }, { "epoch": 0.5320953198797768, "grad_norm": 0.310546875, "learning_rate": 0.001102021825342398, "loss": 1.9189, "step": 19828 }, { "epoch": 0.5321221554315156, "grad_norm": 0.30078125, "learning_rate": 0.0011020064344252855, "loss": 1.9013, "step": 19829 }, { "epoch": 0.5321489909832546, "grad_norm": 0.30078125, "learning_rate": 0.0011019910424069186, "loss": 1.8473, "step": 19830 }, { "epoch": 0.5321758265349935, "grad_norm": 0.294921875, "learning_rate": 0.0011019756492873308, "loss": 1.8741, "step": 19831 }, { "epoch": 0.5322026620867325, "grad_norm": 0.30859375, "learning_rate": 0.0011019602550665566, "loss": 1.9064, "step": 19832 }, { "epoch": 0.5322294976384715, "grad_norm": 0.314453125, "learning_rate": 0.0011019448597446287, "loss": 1.9939, "step": 19833 }, { "epoch": 0.5322563331902104, "grad_norm": 0.29296875, "learning_rate": 0.0011019294633215818, "loss": 1.8182, "step": 19834 }, { "epoch": 0.5322831687419494, "grad_norm": 0.298828125, "learning_rate": 0.0011019140657974491, "loss": 1.8709, "step": 19835 }, { "epoch": 0.5323100042936882, "grad_norm": 0.3125, "learning_rate": 0.0011018986671722647, "loss": 1.8597, "step": 19836 }, { "epoch": 0.5323368398454272, "grad_norm": 0.31640625, "learning_rate": 0.0011018832674460626, "loss": 1.9035, "step": 19837 }, { "epoch": 0.5323636753971661, "grad_norm": 0.3046875, "learning_rate": 0.001101867866618876, "loss": 1.8475, "step": 19838 }, { "epoch": 0.5323905109489051, "grad_norm": 0.29296875, "learning_rate": 0.0011018524646907387, "loss": 1.8276, "step": 19839 }, { "epoch": 0.5324173465006441, "grad_norm": 0.3046875, "learning_rate": 0.0011018370616616853, "loss": 1.8523, "step": 19840 }, { "epoch": 0.532444182052383, "grad_norm": 0.30078125, "learning_rate": 0.0011018216575317487, "loss": 1.8341, "step": 19841 }, { "epoch": 0.532471017604122, "grad_norm": 0.30859375, "learning_rate": 0.001101806252300963, "loss": 1.8463, "step": 19842 }, { "epoch": 0.5324978531558608, "grad_norm": 0.30859375, "learning_rate": 0.0011017908459693621, "loss": 1.9063, "step": 19843 }, { "epoch": 0.5325246887075998, "grad_norm": 0.365234375, "learning_rate": 0.0011017754385369797, "loss": 2.0813, "step": 19844 }, { "epoch": 0.5325515242593388, "grad_norm": 0.35546875, "learning_rate": 0.0011017600300038495, "loss": 2.0898, "step": 19845 }, { "epoch": 0.5325783598110777, "grad_norm": 0.35546875, "learning_rate": 0.0011017446203700054, "loss": 2.141, "step": 19846 }, { "epoch": 0.5326051953628167, "grad_norm": 0.337890625, "learning_rate": 0.0011017292096354816, "loss": 2.1224, "step": 19847 }, { "epoch": 0.5326320309145556, "grad_norm": 0.318359375, "learning_rate": 0.001101713797800311, "loss": 2.0986, "step": 19848 }, { "epoch": 0.5326588664662946, "grad_norm": 0.326171875, "learning_rate": 0.0011016983848645282, "loss": 2.0582, "step": 19849 }, { "epoch": 0.5326857020180334, "grad_norm": 0.310546875, "learning_rate": 0.0011016829708281667, "loss": 2.0857, "step": 19850 }, { "epoch": 0.5327125375697724, "grad_norm": 0.30078125, "learning_rate": 0.0011016675556912606, "loss": 2.0965, "step": 19851 }, { "epoch": 0.5327393731215114, "grad_norm": 0.302734375, "learning_rate": 0.001101652139453843, "loss": 2.1513, "step": 19852 }, { "epoch": 0.5327662086732503, "grad_norm": 0.3046875, "learning_rate": 0.0011016367221159487, "loss": 2.1245, "step": 19853 }, { "epoch": 0.5327930442249893, "grad_norm": 0.298828125, "learning_rate": 0.0011016213036776105, "loss": 2.0566, "step": 19854 }, { "epoch": 0.5328198797767282, "grad_norm": 0.298828125, "learning_rate": 0.0011016058841388631, "loss": 1.9899, "step": 19855 }, { "epoch": 0.5328467153284672, "grad_norm": 0.306640625, "learning_rate": 0.0011015904634997398, "loss": 2.0468, "step": 19856 }, { "epoch": 0.532873550880206, "grad_norm": 0.30078125, "learning_rate": 0.0011015750417602747, "loss": 1.9556, "step": 19857 }, { "epoch": 0.532900386431945, "grad_norm": 0.31640625, "learning_rate": 0.0011015596189205017, "loss": 2.0715, "step": 19858 }, { "epoch": 0.532927221983684, "grad_norm": 0.31640625, "learning_rate": 0.0011015441949804542, "loss": 2.0729, "step": 19859 }, { "epoch": 0.5329540575354229, "grad_norm": 0.3046875, "learning_rate": 0.0011015287699401661, "loss": 1.9675, "step": 19860 }, { "epoch": 0.5329808930871619, "grad_norm": 0.30078125, "learning_rate": 0.0011015133437996718, "loss": 2.1326, "step": 19861 }, { "epoch": 0.5330077286389008, "grad_norm": 0.298828125, "learning_rate": 0.0011014979165590046, "loss": 1.9982, "step": 19862 }, { "epoch": 0.5330345641906398, "grad_norm": 0.294921875, "learning_rate": 0.0011014824882181987, "loss": 2.021, "step": 19863 }, { "epoch": 0.5330613997423786, "grad_norm": 0.298828125, "learning_rate": 0.0011014670587772874, "loss": 1.985, "step": 19864 }, { "epoch": 0.5330882352941176, "grad_norm": 0.302734375, "learning_rate": 0.0011014516282363052, "loss": 1.9994, "step": 19865 }, { "epoch": 0.5331150708458566, "grad_norm": 0.294921875, "learning_rate": 0.0011014361965952855, "loss": 1.9755, "step": 19866 }, { "epoch": 0.5331419063975955, "grad_norm": 0.28515625, "learning_rate": 0.0011014207638542625, "loss": 1.8918, "step": 19867 }, { "epoch": 0.5331687419493345, "grad_norm": 0.296875, "learning_rate": 0.0011014053300132697, "loss": 1.9921, "step": 19868 }, { "epoch": 0.5331955775010734, "grad_norm": 0.294921875, "learning_rate": 0.0011013898950723413, "loss": 1.9428, "step": 19869 }, { "epoch": 0.5332224130528124, "grad_norm": 0.30078125, "learning_rate": 0.001101374459031511, "loss": 2.034, "step": 19870 }, { "epoch": 0.5332492486045514, "grad_norm": 0.28515625, "learning_rate": 0.0011013590218908124, "loss": 1.9108, "step": 19871 }, { "epoch": 0.5332760841562902, "grad_norm": 0.291015625, "learning_rate": 0.0011013435836502797, "loss": 1.9525, "step": 19872 }, { "epoch": 0.5333029197080292, "grad_norm": 0.29296875, "learning_rate": 0.0011013281443099465, "loss": 1.9775, "step": 19873 }, { "epoch": 0.5333297552597681, "grad_norm": 0.302734375, "learning_rate": 0.001101312703869847, "loss": 2.0446, "step": 19874 }, { "epoch": 0.5333565908115071, "grad_norm": 0.30078125, "learning_rate": 0.0011012972623300151, "loss": 1.994, "step": 19875 }, { "epoch": 0.533383426363246, "grad_norm": 0.298828125, "learning_rate": 0.0011012818196904843, "loss": 1.9846, "step": 19876 }, { "epoch": 0.533410261914985, "grad_norm": 0.3046875, "learning_rate": 0.0011012663759512888, "loss": 2.0324, "step": 19877 }, { "epoch": 0.533437097466724, "grad_norm": 0.291015625, "learning_rate": 0.001101250931112462, "loss": 1.9281, "step": 19878 }, { "epoch": 0.5334639330184628, "grad_norm": 0.302734375, "learning_rate": 0.0011012354851740384, "loss": 1.9937, "step": 19879 }, { "epoch": 0.5334907685702018, "grad_norm": 0.287109375, "learning_rate": 0.0011012200381360516, "loss": 1.9365, "step": 19880 }, { "epoch": 0.5335176041219407, "grad_norm": 0.294921875, "learning_rate": 0.0011012045899985354, "loss": 2.0113, "step": 19881 }, { "epoch": 0.5335444396736797, "grad_norm": 0.291015625, "learning_rate": 0.001101189140761524, "loss": 1.9759, "step": 19882 }, { "epoch": 0.5335712752254186, "grad_norm": 0.287109375, "learning_rate": 0.0011011736904250511, "loss": 1.9712, "step": 19883 }, { "epoch": 0.5335981107771576, "grad_norm": 0.306640625, "learning_rate": 0.0011011582389891504, "loss": 2.0126, "step": 19884 }, { "epoch": 0.5336249463288966, "grad_norm": 0.298828125, "learning_rate": 0.0011011427864538559, "loss": 2.0378, "step": 19885 }, { "epoch": 0.5336517818806354, "grad_norm": 0.310546875, "learning_rate": 0.0011011273328192016, "loss": 2.0675, "step": 19886 }, { "epoch": 0.5336786174323744, "grad_norm": 0.302734375, "learning_rate": 0.0011011118780852216, "loss": 2.0246, "step": 19887 }, { "epoch": 0.5337054529841133, "grad_norm": 0.294921875, "learning_rate": 0.0011010964222519493, "loss": 1.9663, "step": 19888 }, { "epoch": 0.5337322885358523, "grad_norm": 0.294921875, "learning_rate": 0.001101080965319419, "loss": 1.9217, "step": 19889 }, { "epoch": 0.5337591240875912, "grad_norm": 0.294921875, "learning_rate": 0.0011010655072876644, "loss": 1.8523, "step": 19890 }, { "epoch": 0.5337859596393302, "grad_norm": 0.294921875, "learning_rate": 0.0011010500481567194, "loss": 1.9803, "step": 19891 }, { "epoch": 0.5338127951910692, "grad_norm": 0.291015625, "learning_rate": 0.001101034587926618, "loss": 1.8696, "step": 19892 }, { "epoch": 0.533839630742808, "grad_norm": 0.294921875, "learning_rate": 0.0011010191265973944, "loss": 1.9351, "step": 19893 }, { "epoch": 0.533866466294547, "grad_norm": 0.29296875, "learning_rate": 0.001101003664169082, "loss": 1.8903, "step": 19894 }, { "epoch": 0.5338933018462859, "grad_norm": 0.28515625, "learning_rate": 0.0011009882006417153, "loss": 1.8455, "step": 19895 }, { "epoch": 0.5339201373980249, "grad_norm": 0.291015625, "learning_rate": 0.0011009727360153277, "loss": 1.8752, "step": 19896 }, { "epoch": 0.5339469729497639, "grad_norm": 0.3046875, "learning_rate": 0.001100957270289953, "loss": 1.9728, "step": 19897 }, { "epoch": 0.5339738085015028, "grad_norm": 0.30859375, "learning_rate": 0.0011009418034656257, "loss": 1.9974, "step": 19898 }, { "epoch": 0.5340006440532418, "grad_norm": 0.296875, "learning_rate": 0.0011009263355423794, "loss": 1.8837, "step": 19899 }, { "epoch": 0.5340274796049806, "grad_norm": 0.294921875, "learning_rate": 0.0011009108665202482, "loss": 1.8771, "step": 19900 }, { "epoch": 0.5340543151567196, "grad_norm": 0.302734375, "learning_rate": 0.0011008953963992658, "loss": 1.9091, "step": 19901 }, { "epoch": 0.5340811507084585, "grad_norm": 0.29296875, "learning_rate": 0.0011008799251794664, "loss": 1.894, "step": 19902 }, { "epoch": 0.5341079862601975, "grad_norm": 0.294921875, "learning_rate": 0.0011008644528608838, "loss": 1.8887, "step": 19903 }, { "epoch": 0.5341348218119365, "grad_norm": 0.30859375, "learning_rate": 0.0011008489794435518, "loss": 2.0437, "step": 19904 }, { "epoch": 0.5341616573636754, "grad_norm": 0.306640625, "learning_rate": 0.0011008335049275046, "loss": 1.9868, "step": 19905 }, { "epoch": 0.5341884929154144, "grad_norm": 0.291015625, "learning_rate": 0.0011008180293127759, "loss": 1.9455, "step": 19906 }, { "epoch": 0.5342153284671532, "grad_norm": 0.298828125, "learning_rate": 0.0011008025525994, "loss": 2.0193, "step": 19907 }, { "epoch": 0.5342421640188922, "grad_norm": 0.294921875, "learning_rate": 0.0011007870747874104, "loss": 1.888, "step": 19908 }, { "epoch": 0.5342689995706311, "grad_norm": 0.3046875, "learning_rate": 0.0011007715958768415, "loss": 2.0003, "step": 19909 }, { "epoch": 0.5342958351223701, "grad_norm": 0.298828125, "learning_rate": 0.0011007561158677271, "loss": 1.9569, "step": 19910 }, { "epoch": 0.5343226706741091, "grad_norm": 0.3125, "learning_rate": 0.0011007406347601008, "loss": 1.959, "step": 19911 }, { "epoch": 0.534349506225848, "grad_norm": 0.291015625, "learning_rate": 0.001100725152553997, "loss": 1.8758, "step": 19912 }, { "epoch": 0.534376341777587, "grad_norm": 0.30078125, "learning_rate": 0.0011007096692494498, "loss": 2.0045, "step": 19913 }, { "epoch": 0.5344031773293259, "grad_norm": 0.298828125, "learning_rate": 0.0011006941848464924, "loss": 1.9543, "step": 19914 }, { "epoch": 0.5344300128810648, "grad_norm": 0.302734375, "learning_rate": 0.0011006786993451596, "loss": 1.9826, "step": 19915 }, { "epoch": 0.5344568484328038, "grad_norm": 0.291015625, "learning_rate": 0.001100663212745485, "loss": 1.8903, "step": 19916 }, { "epoch": 0.5344836839845427, "grad_norm": 0.294921875, "learning_rate": 0.0011006477250475027, "loss": 1.9006, "step": 19917 }, { "epoch": 0.5345105195362817, "grad_norm": 0.29296875, "learning_rate": 0.0011006322362512465, "loss": 1.8931, "step": 19918 }, { "epoch": 0.5345373550880206, "grad_norm": 0.298828125, "learning_rate": 0.0011006167463567503, "loss": 1.9409, "step": 19919 }, { "epoch": 0.5345641906397596, "grad_norm": 0.30859375, "learning_rate": 0.0011006012553640484, "loss": 2.0101, "step": 19920 }, { "epoch": 0.5345910261914985, "grad_norm": 0.302734375, "learning_rate": 0.0011005857632731745, "loss": 1.8605, "step": 19921 }, { "epoch": 0.5346178617432374, "grad_norm": 0.294921875, "learning_rate": 0.0011005702700841629, "loss": 1.8952, "step": 19922 }, { "epoch": 0.5346446972949764, "grad_norm": 0.306640625, "learning_rate": 0.0011005547757970474, "loss": 1.9666, "step": 19923 }, { "epoch": 0.5346715328467153, "grad_norm": 0.294921875, "learning_rate": 0.0011005392804118617, "loss": 1.7796, "step": 19924 }, { "epoch": 0.5346983683984543, "grad_norm": 0.306640625, "learning_rate": 0.0011005237839286403, "loss": 1.9978, "step": 19925 }, { "epoch": 0.5347252039501932, "grad_norm": 0.30078125, "learning_rate": 0.001100508286347417, "loss": 1.9159, "step": 19926 }, { "epoch": 0.5347520395019322, "grad_norm": 0.294921875, "learning_rate": 0.0011004927876682256, "loss": 1.8364, "step": 19927 }, { "epoch": 0.534778875053671, "grad_norm": 0.3125, "learning_rate": 0.0011004772878911004, "loss": 1.9505, "step": 19928 }, { "epoch": 0.53480571060541, "grad_norm": 0.318359375, "learning_rate": 0.0011004617870160754, "loss": 2.0086, "step": 19929 }, { "epoch": 0.534832546157149, "grad_norm": 0.310546875, "learning_rate": 0.0011004462850431843, "loss": 1.9414, "step": 19930 }, { "epoch": 0.5348593817088879, "grad_norm": 0.28515625, "learning_rate": 0.0011004307819724613, "loss": 1.7606, "step": 19931 }, { "epoch": 0.5348862172606269, "grad_norm": 0.30859375, "learning_rate": 0.0011004152778039404, "loss": 1.7959, "step": 19932 }, { "epoch": 0.5349130528123658, "grad_norm": 0.310546875, "learning_rate": 0.0011003997725376555, "loss": 1.9447, "step": 19933 }, { "epoch": 0.5349398883641048, "grad_norm": 0.30859375, "learning_rate": 0.001100384266173641, "loss": 1.9767, "step": 19934 }, { "epoch": 0.5349667239158437, "grad_norm": 0.296875, "learning_rate": 0.0011003687587119303, "loss": 1.8205, "step": 19935 }, { "epoch": 0.5349935594675826, "grad_norm": 0.3125, "learning_rate": 0.001100353250152558, "loss": 1.9153, "step": 19936 }, { "epoch": 0.5350203950193216, "grad_norm": 0.3125, "learning_rate": 0.001100337740495558, "loss": 1.9077, "step": 19937 }, { "epoch": 0.5350472305710605, "grad_norm": 0.310546875, "learning_rate": 0.0011003222297409638, "loss": 1.9281, "step": 19938 }, { "epoch": 0.5350740661227995, "grad_norm": 0.310546875, "learning_rate": 0.0011003067178888101, "loss": 1.8402, "step": 19939 }, { "epoch": 0.5351009016745384, "grad_norm": 0.30078125, "learning_rate": 0.0011002912049391305, "loss": 1.8397, "step": 19940 }, { "epoch": 0.5351277372262774, "grad_norm": 0.30078125, "learning_rate": 0.0011002756908919593, "loss": 1.8755, "step": 19941 }, { "epoch": 0.5351545727780164, "grad_norm": 0.314453125, "learning_rate": 0.0011002601757473304, "loss": 1.9078, "step": 19942 }, { "epoch": 0.5351814083297552, "grad_norm": 0.30078125, "learning_rate": 0.0011002446595052779, "loss": 1.8446, "step": 19943 }, { "epoch": 0.5352082438814942, "grad_norm": 0.3046875, "learning_rate": 0.0011002291421658356, "loss": 1.8844, "step": 19944 }, { "epoch": 0.5352350794332331, "grad_norm": 0.3125, "learning_rate": 0.0011002136237290379, "loss": 1.939, "step": 19945 }, { "epoch": 0.5352619149849721, "grad_norm": 0.306640625, "learning_rate": 0.0011001981041949187, "loss": 1.8933, "step": 19946 }, { "epoch": 0.535288750536711, "grad_norm": 0.306640625, "learning_rate": 0.0011001825835635119, "loss": 1.8634, "step": 19947 }, { "epoch": 0.53531558608845, "grad_norm": 0.310546875, "learning_rate": 0.001100167061834852, "loss": 1.9172, "step": 19948 }, { "epoch": 0.535342421640189, "grad_norm": 0.30859375, "learning_rate": 0.0011001515390089722, "loss": 1.8836, "step": 19949 }, { "epoch": 0.5353692571919278, "grad_norm": 0.314453125, "learning_rate": 0.0011001360150859072, "loss": 1.9723, "step": 19950 }, { "epoch": 0.5353960927436668, "grad_norm": 0.296875, "learning_rate": 0.0011001204900656912, "loss": 1.8752, "step": 19951 }, { "epoch": 0.5354229282954057, "grad_norm": 0.294921875, "learning_rate": 0.0011001049639483577, "loss": 1.8334, "step": 19952 }, { "epoch": 0.5354497638471447, "grad_norm": 0.314453125, "learning_rate": 0.0011000894367339412, "loss": 1.8769, "step": 19953 }, { "epoch": 0.5354765993988836, "grad_norm": 0.310546875, "learning_rate": 0.0011000739084224755, "loss": 1.8306, "step": 19954 }, { "epoch": 0.5355034349506226, "grad_norm": 0.30859375, "learning_rate": 0.0011000583790139948, "loss": 1.8843, "step": 19955 }, { "epoch": 0.5355302705023616, "grad_norm": 0.3046875, "learning_rate": 0.0011000428485085333, "loss": 1.9061, "step": 19956 }, { "epoch": 0.5355571060541005, "grad_norm": 0.310546875, "learning_rate": 0.0011000273169061246, "loss": 1.9149, "step": 19957 }, { "epoch": 0.5355839416058394, "grad_norm": 0.302734375, "learning_rate": 0.0011000117842068034, "loss": 1.7917, "step": 19958 }, { "epoch": 0.5356107771575783, "grad_norm": 0.349609375, "learning_rate": 0.001099996250410603, "loss": 2.0888, "step": 19959 }, { "epoch": 0.5356376127093173, "grad_norm": 0.361328125, "learning_rate": 0.0010999807155175584, "loss": 2.1387, "step": 19960 }, { "epoch": 0.5356644482610562, "grad_norm": 0.333984375, "learning_rate": 0.001099965179527703, "loss": 2.1305, "step": 19961 }, { "epoch": 0.5356912838127952, "grad_norm": 0.33984375, "learning_rate": 0.001099949642441071, "loss": 2.0521, "step": 19962 }, { "epoch": 0.5357181193645342, "grad_norm": 0.318359375, "learning_rate": 0.001099934104257697, "loss": 1.9879, "step": 19963 }, { "epoch": 0.535744954916273, "grad_norm": 0.314453125, "learning_rate": 0.0010999185649776142, "loss": 2.1112, "step": 19964 }, { "epoch": 0.535771790468012, "grad_norm": 0.330078125, "learning_rate": 0.0010999030246008573, "loss": 2.1917, "step": 19965 }, { "epoch": 0.5357986260197509, "grad_norm": 0.31640625, "learning_rate": 0.0010998874831274603, "loss": 2.1373, "step": 19966 }, { "epoch": 0.5358254615714899, "grad_norm": 0.306640625, "learning_rate": 0.001099871940557457, "loss": 1.9904, "step": 19967 }, { "epoch": 0.5358522971232289, "grad_norm": 0.298828125, "learning_rate": 0.001099856396890882, "loss": 2.0569, "step": 19968 }, { "epoch": 0.5358791326749678, "grad_norm": 0.306640625, "learning_rate": 0.001099840852127769, "loss": 2.1076, "step": 19969 }, { "epoch": 0.5359059682267068, "grad_norm": 0.29296875, "learning_rate": 0.0010998253062681521, "loss": 2.113, "step": 19970 }, { "epoch": 0.5359328037784457, "grad_norm": 0.310546875, "learning_rate": 0.001099809759312066, "loss": 2.1192, "step": 19971 }, { "epoch": 0.5359596393301846, "grad_norm": 0.30859375, "learning_rate": 0.0010997942112595438, "loss": 2.0684, "step": 19972 }, { "epoch": 0.5359864748819235, "grad_norm": 0.291015625, "learning_rate": 0.0010997786621106203, "loss": 1.9842, "step": 19973 }, { "epoch": 0.5360133104336625, "grad_norm": 0.306640625, "learning_rate": 0.0010997631118653296, "loss": 2.0147, "step": 19974 }, { "epoch": 0.5360401459854015, "grad_norm": 0.30078125, "learning_rate": 0.0010997475605237055, "loss": 2.0686, "step": 19975 }, { "epoch": 0.5360669815371404, "grad_norm": 0.302734375, "learning_rate": 0.0010997320080857824, "loss": 2.0241, "step": 19976 }, { "epoch": 0.5360938170888794, "grad_norm": 0.294921875, "learning_rate": 0.0010997164545515942, "loss": 2.036, "step": 19977 }, { "epoch": 0.5361206526406183, "grad_norm": 0.31640625, "learning_rate": 0.0010997008999211752, "loss": 2.1473, "step": 19978 }, { "epoch": 0.5361474881923572, "grad_norm": 0.30859375, "learning_rate": 0.0010996853441945593, "loss": 2.1202, "step": 19979 }, { "epoch": 0.5361743237440961, "grad_norm": 0.296875, "learning_rate": 0.001099669787371781, "loss": 1.9829, "step": 19980 }, { "epoch": 0.5362011592958351, "grad_norm": 0.31640625, "learning_rate": 0.001099654229452874, "loss": 2.0566, "step": 19981 }, { "epoch": 0.5362279948475741, "grad_norm": 0.30859375, "learning_rate": 0.0010996386704378728, "loss": 2.1033, "step": 19982 }, { "epoch": 0.536254830399313, "grad_norm": 0.296875, "learning_rate": 0.0010996231103268111, "loss": 2.0546, "step": 19983 }, { "epoch": 0.536281665951052, "grad_norm": 0.296875, "learning_rate": 0.0010996075491197235, "loss": 2.0625, "step": 19984 }, { "epoch": 0.5363085015027909, "grad_norm": 0.3125, "learning_rate": 0.0010995919868166436, "loss": 2.0661, "step": 19985 }, { "epoch": 0.5363353370545298, "grad_norm": 0.291015625, "learning_rate": 0.0010995764234176062, "loss": 2.0248, "step": 19986 }, { "epoch": 0.5363621726062688, "grad_norm": 0.287109375, "learning_rate": 0.001099560858922645, "loss": 1.9574, "step": 19987 }, { "epoch": 0.5363890081580077, "grad_norm": 0.29296875, "learning_rate": 0.0010995452933317944, "loss": 2.0019, "step": 19988 }, { "epoch": 0.5364158437097467, "grad_norm": 0.2890625, "learning_rate": 0.0010995297266450882, "loss": 1.9256, "step": 19989 }, { "epoch": 0.5364426792614856, "grad_norm": 0.294921875, "learning_rate": 0.001099514158862561, "loss": 2.0434, "step": 19990 }, { "epoch": 0.5364695148132246, "grad_norm": 0.294921875, "learning_rate": 0.0010994985899842463, "loss": 1.9969, "step": 19991 }, { "epoch": 0.5364963503649635, "grad_norm": 0.306640625, "learning_rate": 0.0010994830200101787, "loss": 2.0875, "step": 19992 }, { "epoch": 0.5365231859167024, "grad_norm": 0.30078125, "learning_rate": 0.0010994674489403925, "loss": 1.9827, "step": 19993 }, { "epoch": 0.5365500214684414, "grad_norm": 0.298828125, "learning_rate": 0.0010994518767749217, "loss": 2.0368, "step": 19994 }, { "epoch": 0.5365768570201803, "grad_norm": 0.291015625, "learning_rate": 0.0010994363035138003, "loss": 1.9468, "step": 19995 }, { "epoch": 0.5366036925719193, "grad_norm": 0.30859375, "learning_rate": 0.0010994207291570627, "loss": 2.0247, "step": 19996 }, { "epoch": 0.5366305281236582, "grad_norm": 0.296875, "learning_rate": 0.0010994051537047429, "loss": 2.0011, "step": 19997 }, { "epoch": 0.5366573636753972, "grad_norm": 0.298828125, "learning_rate": 0.001099389577156875, "loss": 1.9603, "step": 19998 }, { "epoch": 0.5366841992271361, "grad_norm": 0.296875, "learning_rate": 0.0010993739995134933, "loss": 1.9899, "step": 19999 }, { "epoch": 0.536711034778875, "grad_norm": 0.2890625, "learning_rate": 0.001099358420774632, "loss": 1.9176, "step": 20000 }, { "epoch": 0.536737870330614, "grad_norm": 0.294921875, "learning_rate": 0.0010993428409403253, "loss": 1.9305, "step": 20001 }, { "epoch": 0.5367647058823529, "grad_norm": 0.296875, "learning_rate": 0.0010993272600106074, "loss": 2.0111, "step": 20002 }, { "epoch": 0.5367915414340919, "grad_norm": 0.302734375, "learning_rate": 0.0010993116779855123, "loss": 2.008, "step": 20003 }, { "epoch": 0.5368183769858308, "grad_norm": 0.296875, "learning_rate": 0.0010992960948650742, "loss": 1.948, "step": 20004 }, { "epoch": 0.5368452125375698, "grad_norm": 0.3046875, "learning_rate": 0.0010992805106493275, "loss": 2.0183, "step": 20005 }, { "epoch": 0.5368720480893087, "grad_norm": 0.298828125, "learning_rate": 0.0010992649253383062, "loss": 2.0924, "step": 20006 }, { "epoch": 0.5368988836410477, "grad_norm": 0.296875, "learning_rate": 0.0010992493389320446, "loss": 2.0434, "step": 20007 }, { "epoch": 0.5369257191927866, "grad_norm": 0.296875, "learning_rate": 0.0010992337514305766, "loss": 1.9221, "step": 20008 }, { "epoch": 0.5369525547445255, "grad_norm": 0.291015625, "learning_rate": 0.001099218162833937, "loss": 1.8773, "step": 20009 }, { "epoch": 0.5369793902962645, "grad_norm": 0.287109375, "learning_rate": 0.0010992025731421592, "loss": 1.8814, "step": 20010 }, { "epoch": 0.5370062258480034, "grad_norm": 0.3046875, "learning_rate": 0.0010991869823552781, "loss": 2.0042, "step": 20011 }, { "epoch": 0.5370330613997424, "grad_norm": 0.298828125, "learning_rate": 0.0010991713904733276, "loss": 2.0104, "step": 20012 }, { "epoch": 0.5370598969514814, "grad_norm": 0.306640625, "learning_rate": 0.0010991557974963416, "loss": 2.0329, "step": 20013 }, { "epoch": 0.5370867325032203, "grad_norm": 0.294921875, "learning_rate": 0.0010991402034243552, "loss": 1.9826, "step": 20014 }, { "epoch": 0.5371135680549592, "grad_norm": 0.291015625, "learning_rate": 0.0010991246082574016, "loss": 1.8751, "step": 20015 }, { "epoch": 0.5371404036066981, "grad_norm": 0.30078125, "learning_rate": 0.0010991090119955157, "loss": 2.0122, "step": 20016 }, { "epoch": 0.5371672391584371, "grad_norm": 0.29296875, "learning_rate": 0.0010990934146387313, "loss": 1.9927, "step": 20017 }, { "epoch": 0.537194074710176, "grad_norm": 0.30078125, "learning_rate": 0.001099077816187083, "loss": 1.9393, "step": 20018 }, { "epoch": 0.537220910261915, "grad_norm": 0.294921875, "learning_rate": 0.0010990622166406046, "loss": 1.9517, "step": 20019 }, { "epoch": 0.537247745813654, "grad_norm": 0.298828125, "learning_rate": 0.0010990466159993305, "loss": 1.953, "step": 20020 }, { "epoch": 0.5372745813653929, "grad_norm": 0.298828125, "learning_rate": 0.001099031014263295, "loss": 1.9392, "step": 20021 }, { "epoch": 0.5373014169171318, "grad_norm": 0.2890625, "learning_rate": 0.0010990154114325324, "loss": 1.8825, "step": 20022 }, { "epoch": 0.5373282524688707, "grad_norm": 0.291015625, "learning_rate": 0.0010989998075070766, "loss": 1.9133, "step": 20023 }, { "epoch": 0.5373550880206097, "grad_norm": 0.3125, "learning_rate": 0.0010989842024869622, "loss": 1.9504, "step": 20024 }, { "epoch": 0.5373819235723486, "grad_norm": 0.306640625, "learning_rate": 0.0010989685963722232, "loss": 1.9542, "step": 20025 }, { "epoch": 0.5374087591240876, "grad_norm": 0.298828125, "learning_rate": 0.001098952989162894, "loss": 1.9589, "step": 20026 }, { "epoch": 0.5374355946758266, "grad_norm": 0.310546875, "learning_rate": 0.0010989373808590085, "loss": 1.9782, "step": 20027 }, { "epoch": 0.5374624302275655, "grad_norm": 0.28125, "learning_rate": 0.0010989217714606012, "loss": 1.8234, "step": 20028 }, { "epoch": 0.5374892657793044, "grad_norm": 0.298828125, "learning_rate": 0.0010989061609677066, "loss": 2.0064, "step": 20029 }, { "epoch": 0.5375161013310433, "grad_norm": 0.28515625, "learning_rate": 0.0010988905493803584, "loss": 1.8592, "step": 20030 }, { "epoch": 0.5375429368827823, "grad_norm": 0.291015625, "learning_rate": 0.0010988749366985912, "loss": 1.9314, "step": 20031 }, { "epoch": 0.5375697724345213, "grad_norm": 0.3046875, "learning_rate": 0.0010988593229224392, "loss": 1.9861, "step": 20032 }, { "epoch": 0.5375966079862602, "grad_norm": 0.302734375, "learning_rate": 0.0010988437080519365, "loss": 1.9584, "step": 20033 }, { "epoch": 0.5376234435379992, "grad_norm": 0.3046875, "learning_rate": 0.0010988280920871177, "loss": 1.9788, "step": 20034 }, { "epoch": 0.5376502790897381, "grad_norm": 0.30859375, "learning_rate": 0.0010988124750280165, "loss": 1.959, "step": 20035 }, { "epoch": 0.537677114641477, "grad_norm": 0.291015625, "learning_rate": 0.0010987968568746678, "loss": 1.9331, "step": 20036 }, { "epoch": 0.5377039501932159, "grad_norm": 0.298828125, "learning_rate": 0.0010987812376271053, "loss": 1.9064, "step": 20037 }, { "epoch": 0.5377307857449549, "grad_norm": 0.296875, "learning_rate": 0.0010987656172853637, "loss": 1.8583, "step": 20038 }, { "epoch": 0.5377576212966939, "grad_norm": 0.296875, "learning_rate": 0.0010987499958494769, "loss": 1.8581, "step": 20039 }, { "epoch": 0.5377844568484328, "grad_norm": 0.306640625, "learning_rate": 0.0010987343733194794, "loss": 1.9655, "step": 20040 }, { "epoch": 0.5378112924001718, "grad_norm": 0.283203125, "learning_rate": 0.0010987187496954054, "loss": 1.7843, "step": 20041 }, { "epoch": 0.5378381279519107, "grad_norm": 0.29296875, "learning_rate": 0.0010987031249772893, "loss": 1.907, "step": 20042 }, { "epoch": 0.5378649635036497, "grad_norm": 0.296875, "learning_rate": 0.001098687499165165, "loss": 1.8982, "step": 20043 }, { "epoch": 0.5378917990553885, "grad_norm": 0.30078125, "learning_rate": 0.0010986718722590673, "loss": 2.0002, "step": 20044 }, { "epoch": 0.5379186346071275, "grad_norm": 0.3046875, "learning_rate": 0.0010986562442590303, "loss": 1.9238, "step": 20045 }, { "epoch": 0.5379454701588665, "grad_norm": 0.294921875, "learning_rate": 0.001098640615165088, "loss": 1.8398, "step": 20046 }, { "epoch": 0.5379723057106054, "grad_norm": 0.3046875, "learning_rate": 0.001098624984977275, "loss": 1.9058, "step": 20047 }, { "epoch": 0.5379991412623444, "grad_norm": 0.30078125, "learning_rate": 0.0010986093536956255, "loss": 1.8986, "step": 20048 }, { "epoch": 0.5380259768140833, "grad_norm": 0.294921875, "learning_rate": 0.0010985937213201734, "loss": 1.9357, "step": 20049 }, { "epoch": 0.5380528123658223, "grad_norm": 0.296875, "learning_rate": 0.0010985780878509537, "loss": 1.8092, "step": 20050 }, { "epoch": 0.5380796479175611, "grad_norm": 0.3046875, "learning_rate": 0.0010985624532880004, "loss": 1.9377, "step": 20051 }, { "epoch": 0.5381064834693001, "grad_norm": 0.3046875, "learning_rate": 0.0010985468176313478, "loss": 1.9301, "step": 20052 }, { "epoch": 0.5381333190210391, "grad_norm": 0.298828125, "learning_rate": 0.00109853118088103, "loss": 1.8945, "step": 20053 }, { "epoch": 0.538160154572778, "grad_norm": 0.294921875, "learning_rate": 0.0010985155430370814, "loss": 1.888, "step": 20054 }, { "epoch": 0.538186990124517, "grad_norm": 0.294921875, "learning_rate": 0.0010984999040995365, "loss": 1.8456, "step": 20055 }, { "epoch": 0.5382138256762559, "grad_norm": 0.3046875, "learning_rate": 0.0010984842640684295, "loss": 1.8442, "step": 20056 }, { "epoch": 0.5382406612279949, "grad_norm": 0.306640625, "learning_rate": 0.0010984686229437944, "loss": 1.9666, "step": 20057 }, { "epoch": 0.5382674967797338, "grad_norm": 0.30859375, "learning_rate": 0.001098452980725666, "loss": 1.9407, "step": 20058 }, { "epoch": 0.5382943323314727, "grad_norm": 0.302734375, "learning_rate": 0.0010984373374140785, "loss": 1.888, "step": 20059 }, { "epoch": 0.5383211678832117, "grad_norm": 0.302734375, "learning_rate": 0.001098421693009066, "loss": 1.8787, "step": 20060 }, { "epoch": 0.5383480034349506, "grad_norm": 0.30859375, "learning_rate": 0.001098406047510663, "loss": 1.9055, "step": 20061 }, { "epoch": 0.5383748389866896, "grad_norm": 0.302734375, "learning_rate": 0.0010983904009189038, "loss": 1.9354, "step": 20062 }, { "epoch": 0.5384016745384285, "grad_norm": 0.306640625, "learning_rate": 0.0010983747532338226, "loss": 1.8506, "step": 20063 }, { "epoch": 0.5384285100901675, "grad_norm": 0.302734375, "learning_rate": 0.001098359104455454, "loss": 1.8907, "step": 20064 }, { "epoch": 0.5384553456419064, "grad_norm": 0.3046875, "learning_rate": 0.0010983434545838318, "loss": 1.8361, "step": 20065 }, { "epoch": 0.5384821811936453, "grad_norm": 0.310546875, "learning_rate": 0.0010983278036189909, "loss": 1.9216, "step": 20066 }, { "epoch": 0.5385090167453843, "grad_norm": 0.30859375, "learning_rate": 0.0010983121515609653, "loss": 2.0017, "step": 20067 }, { "epoch": 0.5385358522971232, "grad_norm": 0.298828125, "learning_rate": 0.0010982964984097897, "loss": 1.9245, "step": 20068 }, { "epoch": 0.5385626878488622, "grad_norm": 0.310546875, "learning_rate": 0.001098280844165498, "loss": 1.8594, "step": 20069 }, { "epoch": 0.5385895234006011, "grad_norm": 0.30859375, "learning_rate": 0.0010982651888281248, "loss": 1.8704, "step": 20070 }, { "epoch": 0.5386163589523401, "grad_norm": 0.294921875, "learning_rate": 0.0010982495323977042, "loss": 1.8654, "step": 20071 }, { "epoch": 0.538643194504079, "grad_norm": 0.30078125, "learning_rate": 0.0010982338748742708, "loss": 1.8479, "step": 20072 }, { "epoch": 0.5386700300558179, "grad_norm": 0.33984375, "learning_rate": 0.0010982182162578588, "loss": 2.0746, "step": 20073 }, { "epoch": 0.5386968656075569, "grad_norm": 0.359375, "learning_rate": 0.0010982025565485028, "loss": 2.1389, "step": 20074 }, { "epoch": 0.5387237011592958, "grad_norm": 0.33984375, "learning_rate": 0.0010981868957462366, "loss": 2.1354, "step": 20075 }, { "epoch": 0.5387505367110348, "grad_norm": 0.337890625, "learning_rate": 0.0010981712338510954, "loss": 2.1341, "step": 20076 }, { "epoch": 0.5387773722627737, "grad_norm": 0.33203125, "learning_rate": 0.0010981555708631128, "loss": 2.1832, "step": 20077 }, { "epoch": 0.5388042078145127, "grad_norm": 0.330078125, "learning_rate": 0.0010981399067823236, "loss": 2.0864, "step": 20078 }, { "epoch": 0.5388310433662516, "grad_norm": 0.310546875, "learning_rate": 0.0010981242416087617, "loss": 2.0932, "step": 20079 }, { "epoch": 0.5388578789179905, "grad_norm": 0.306640625, "learning_rate": 0.001098108575342462, "loss": 2.0239, "step": 20080 }, { "epoch": 0.5388847144697295, "grad_norm": 0.306640625, "learning_rate": 0.0010980929079834586, "loss": 2.0706, "step": 20081 }, { "epoch": 0.5389115500214684, "grad_norm": 0.30078125, "learning_rate": 0.001098077239531786, "loss": 2.0231, "step": 20082 }, { "epoch": 0.5389383855732074, "grad_norm": 0.310546875, "learning_rate": 0.0010980615699874782, "loss": 2.1593, "step": 20083 }, { "epoch": 0.5389652211249464, "grad_norm": 0.30859375, "learning_rate": 0.00109804589935057, "loss": 2.0667, "step": 20084 }, { "epoch": 0.5389920566766853, "grad_norm": 0.3046875, "learning_rate": 0.0010980302276210956, "loss": 1.9847, "step": 20085 }, { "epoch": 0.5390188922284243, "grad_norm": 0.302734375, "learning_rate": 0.0010980145547990893, "loss": 2.0801, "step": 20086 }, { "epoch": 0.5390457277801631, "grad_norm": 0.310546875, "learning_rate": 0.001097998880884586, "loss": 2.0884, "step": 20087 }, { "epoch": 0.5390725633319021, "grad_norm": 0.3046875, "learning_rate": 0.0010979832058776193, "loss": 2.0124, "step": 20088 }, { "epoch": 0.539099398883641, "grad_norm": 0.298828125, "learning_rate": 0.0010979675297782242, "loss": 2.0243, "step": 20089 }, { "epoch": 0.53912623443538, "grad_norm": 0.294921875, "learning_rate": 0.0010979518525864346, "loss": 1.9048, "step": 20090 }, { "epoch": 0.539153069987119, "grad_norm": 0.302734375, "learning_rate": 0.0010979361743022852, "loss": 2.0709, "step": 20091 }, { "epoch": 0.5391799055388579, "grad_norm": 0.30859375, "learning_rate": 0.0010979204949258102, "loss": 2.0611, "step": 20092 }, { "epoch": 0.5392067410905969, "grad_norm": 0.30078125, "learning_rate": 0.0010979048144570444, "loss": 2.0856, "step": 20093 }, { "epoch": 0.5392335766423357, "grad_norm": 0.298828125, "learning_rate": 0.0010978891328960218, "loss": 2.0343, "step": 20094 }, { "epoch": 0.5392604121940747, "grad_norm": 0.30078125, "learning_rate": 0.0010978734502427767, "loss": 1.9815, "step": 20095 }, { "epoch": 0.5392872477458136, "grad_norm": 0.298828125, "learning_rate": 0.001097857766497344, "loss": 2.0267, "step": 20096 }, { "epoch": 0.5393140832975526, "grad_norm": 0.30078125, "learning_rate": 0.0010978420816597579, "loss": 1.9697, "step": 20097 }, { "epoch": 0.5393409188492916, "grad_norm": 0.30078125, "learning_rate": 0.0010978263957300526, "loss": 2.0254, "step": 20098 }, { "epoch": 0.5393677544010305, "grad_norm": 0.3046875, "learning_rate": 0.0010978107087082625, "loss": 1.9243, "step": 20099 }, { "epoch": 0.5393945899527695, "grad_norm": 0.30078125, "learning_rate": 0.0010977950205944224, "loss": 1.9466, "step": 20100 }, { "epoch": 0.5394214255045083, "grad_norm": 0.306640625, "learning_rate": 0.0010977793313885664, "loss": 2.0222, "step": 20101 }, { "epoch": 0.5394482610562473, "grad_norm": 0.302734375, "learning_rate": 0.001097763641090729, "loss": 2.1383, "step": 20102 }, { "epoch": 0.5394750966079863, "grad_norm": 0.296875, "learning_rate": 0.0010977479497009447, "loss": 2.0539, "step": 20103 }, { "epoch": 0.5395019321597252, "grad_norm": 0.296875, "learning_rate": 0.0010977322572192476, "loss": 2.0238, "step": 20104 }, { "epoch": 0.5395287677114642, "grad_norm": 0.296875, "learning_rate": 0.0010977165636456726, "loss": 2.07, "step": 20105 }, { "epoch": 0.5395556032632031, "grad_norm": 0.291015625, "learning_rate": 0.0010977008689802537, "loss": 1.8212, "step": 20106 }, { "epoch": 0.5395824388149421, "grad_norm": 0.294921875, "learning_rate": 0.0010976851732230257, "loss": 2.0011, "step": 20107 }, { "epoch": 0.5396092743666809, "grad_norm": 0.296875, "learning_rate": 0.0010976694763740226, "loss": 2.0268, "step": 20108 }, { "epoch": 0.5396361099184199, "grad_norm": 0.294921875, "learning_rate": 0.0010976537784332793, "loss": 1.9756, "step": 20109 }, { "epoch": 0.5396629454701589, "grad_norm": 0.302734375, "learning_rate": 0.00109763807940083, "loss": 2.0479, "step": 20110 }, { "epoch": 0.5396897810218978, "grad_norm": 0.287109375, "learning_rate": 0.0010976223792767094, "loss": 2.0048, "step": 20111 }, { "epoch": 0.5397166165736368, "grad_norm": 0.294921875, "learning_rate": 0.0010976066780609515, "loss": 1.9683, "step": 20112 }, { "epoch": 0.5397434521253757, "grad_norm": 0.3046875, "learning_rate": 0.0010975909757535906, "loss": 2.0268, "step": 20113 }, { "epoch": 0.5397702876771147, "grad_norm": 0.298828125, "learning_rate": 0.001097575272354662, "loss": 1.9758, "step": 20114 }, { "epoch": 0.5397971232288535, "grad_norm": 0.298828125, "learning_rate": 0.0010975595678641991, "loss": 2.0043, "step": 20115 }, { "epoch": 0.5398239587805925, "grad_norm": 0.298828125, "learning_rate": 0.0010975438622822375, "loss": 1.9571, "step": 20116 }, { "epoch": 0.5398507943323315, "grad_norm": 0.298828125, "learning_rate": 0.0010975281556088107, "loss": 1.9895, "step": 20117 }, { "epoch": 0.5398776298840704, "grad_norm": 0.298828125, "learning_rate": 0.0010975124478439537, "loss": 2.0122, "step": 20118 }, { "epoch": 0.5399044654358094, "grad_norm": 0.302734375, "learning_rate": 0.0010974967389877006, "loss": 2.0251, "step": 20119 }, { "epoch": 0.5399313009875483, "grad_norm": 0.30078125, "learning_rate": 0.0010974810290400859, "loss": 1.9323, "step": 20120 }, { "epoch": 0.5399581365392873, "grad_norm": 0.302734375, "learning_rate": 0.0010974653180011445, "loss": 1.9819, "step": 20121 }, { "epoch": 0.5399849720910261, "grad_norm": 0.302734375, "learning_rate": 0.0010974496058709102, "loss": 2.0732, "step": 20122 }, { "epoch": 0.5400118076427651, "grad_norm": 0.29296875, "learning_rate": 0.001097433892649418, "loss": 1.9834, "step": 20123 }, { "epoch": 0.5400386431945041, "grad_norm": 0.29296875, "learning_rate": 0.0010974181783367024, "loss": 1.9538, "step": 20124 }, { "epoch": 0.540065478746243, "grad_norm": 0.291015625, "learning_rate": 0.0010974024629327973, "loss": 1.9211, "step": 20125 }, { "epoch": 0.540092314297982, "grad_norm": 0.296875, "learning_rate": 0.0010973867464377377, "loss": 1.9293, "step": 20126 }, { "epoch": 0.5401191498497209, "grad_norm": 0.291015625, "learning_rate": 0.0010973710288515578, "loss": 1.8881, "step": 20127 }, { "epoch": 0.5401459854014599, "grad_norm": 0.314453125, "learning_rate": 0.0010973553101742922, "loss": 2.005, "step": 20128 }, { "epoch": 0.5401728209531989, "grad_norm": 0.302734375, "learning_rate": 0.0010973395904059755, "loss": 2.0744, "step": 20129 }, { "epoch": 0.5401996565049377, "grad_norm": 0.302734375, "learning_rate": 0.001097323869546642, "loss": 1.941, "step": 20130 }, { "epoch": 0.5402264920566767, "grad_norm": 0.30859375, "learning_rate": 0.0010973081475963264, "loss": 2.0762, "step": 20131 }, { "epoch": 0.5402533276084156, "grad_norm": 0.294921875, "learning_rate": 0.001097292424555063, "loss": 1.9542, "step": 20132 }, { "epoch": 0.5402801631601546, "grad_norm": 0.29296875, "learning_rate": 0.001097276700422886, "loss": 1.9886, "step": 20133 }, { "epoch": 0.5403069987118935, "grad_norm": 0.29296875, "learning_rate": 0.0010972609751998305, "loss": 1.9307, "step": 20134 }, { "epoch": 0.5403338342636325, "grad_norm": 0.294921875, "learning_rate": 0.0010972452488859307, "loss": 1.9635, "step": 20135 }, { "epoch": 0.5403606698153715, "grad_norm": 0.314453125, "learning_rate": 0.001097229521481221, "loss": 1.9827, "step": 20136 }, { "epoch": 0.5403875053671103, "grad_norm": 0.291015625, "learning_rate": 0.001097213792985736, "loss": 1.8753, "step": 20137 }, { "epoch": 0.5404143409188493, "grad_norm": 0.296875, "learning_rate": 0.0010971980633995104, "loss": 1.9173, "step": 20138 }, { "epoch": 0.5404411764705882, "grad_norm": 0.294921875, "learning_rate": 0.0010971823327225784, "loss": 1.8957, "step": 20139 }, { "epoch": 0.5404680120223272, "grad_norm": 0.30078125, "learning_rate": 0.0010971666009549747, "loss": 1.959, "step": 20140 }, { "epoch": 0.5404948475740661, "grad_norm": 0.302734375, "learning_rate": 0.0010971508680967337, "loss": 1.9581, "step": 20141 }, { "epoch": 0.5405216831258051, "grad_norm": 0.29296875, "learning_rate": 0.00109713513414789, "loss": 1.9101, "step": 20142 }, { "epoch": 0.540548518677544, "grad_norm": 0.29296875, "learning_rate": 0.0010971193991084782, "loss": 1.9016, "step": 20143 }, { "epoch": 0.5405753542292829, "grad_norm": 0.298828125, "learning_rate": 0.0010971036629785324, "loss": 1.8814, "step": 20144 }, { "epoch": 0.5406021897810219, "grad_norm": 0.29296875, "learning_rate": 0.0010970879257580877, "loss": 1.9346, "step": 20145 }, { "epoch": 0.5406290253327608, "grad_norm": 0.306640625, "learning_rate": 0.001097072187447178, "loss": 1.9469, "step": 20146 }, { "epoch": 0.5406558608844998, "grad_norm": 0.3046875, "learning_rate": 0.0010970564480458384, "loss": 2.0153, "step": 20147 }, { "epoch": 0.5406826964362387, "grad_norm": 0.296875, "learning_rate": 0.001097040707554103, "loss": 1.9062, "step": 20148 }, { "epoch": 0.5407095319879777, "grad_norm": 0.294921875, "learning_rate": 0.0010970249659720069, "loss": 1.865, "step": 20149 }, { "epoch": 0.5407363675397167, "grad_norm": 0.30859375, "learning_rate": 0.0010970092232995838, "loss": 2.033, "step": 20150 }, { "epoch": 0.5407632030914555, "grad_norm": 0.3046875, "learning_rate": 0.001096993479536869, "loss": 1.9554, "step": 20151 }, { "epoch": 0.5407900386431945, "grad_norm": 0.29296875, "learning_rate": 0.0010969777346838967, "loss": 1.8393, "step": 20152 }, { "epoch": 0.5408168741949334, "grad_norm": 0.28515625, "learning_rate": 0.0010969619887407013, "loss": 1.764, "step": 20153 }, { "epoch": 0.5408437097466724, "grad_norm": 0.3046875, "learning_rate": 0.0010969462417073177, "loss": 1.9306, "step": 20154 }, { "epoch": 0.5408705452984114, "grad_norm": 0.3046875, "learning_rate": 0.00109693049358378, "loss": 1.9079, "step": 20155 }, { "epoch": 0.5408973808501503, "grad_norm": 0.3046875, "learning_rate": 0.001096914744370123, "loss": 1.9138, "step": 20156 }, { "epoch": 0.5409242164018893, "grad_norm": 0.30078125, "learning_rate": 0.0010968989940663817, "loss": 1.8166, "step": 20157 }, { "epoch": 0.5409510519536281, "grad_norm": 0.3046875, "learning_rate": 0.0010968832426725898, "loss": 1.8736, "step": 20158 }, { "epoch": 0.5409778875053671, "grad_norm": 0.3125, "learning_rate": 0.0010968674901887824, "loss": 1.9677, "step": 20159 }, { "epoch": 0.541004723057106, "grad_norm": 0.296875, "learning_rate": 0.0010968517366149939, "loss": 1.9317, "step": 20160 }, { "epoch": 0.541031558608845, "grad_norm": 0.294921875, "learning_rate": 0.0010968359819512586, "loss": 1.8255, "step": 20161 }, { "epoch": 0.541058394160584, "grad_norm": 0.29296875, "learning_rate": 0.0010968202261976115, "loss": 1.8841, "step": 20162 }, { "epoch": 0.5410852297123229, "grad_norm": 0.296875, "learning_rate": 0.0010968044693540871, "loss": 1.8336, "step": 20163 }, { "epoch": 0.5411120652640619, "grad_norm": 0.30078125, "learning_rate": 0.0010967887114207199, "loss": 1.8538, "step": 20164 }, { "epoch": 0.5411389008158007, "grad_norm": 0.30078125, "learning_rate": 0.0010967729523975443, "loss": 1.9255, "step": 20165 }, { "epoch": 0.5411657363675397, "grad_norm": 0.3046875, "learning_rate": 0.0010967571922845948, "loss": 1.8991, "step": 20166 }, { "epoch": 0.5411925719192786, "grad_norm": 0.306640625, "learning_rate": 0.0010967414310819065, "loss": 1.873, "step": 20167 }, { "epoch": 0.5412194074710176, "grad_norm": 0.302734375, "learning_rate": 0.0010967256687895134, "loss": 1.8813, "step": 20168 }, { "epoch": 0.5412462430227566, "grad_norm": 0.30859375, "learning_rate": 0.0010967099054074506, "loss": 1.9049, "step": 20169 }, { "epoch": 0.5412730785744955, "grad_norm": 0.298828125, "learning_rate": 0.001096694140935752, "loss": 1.8494, "step": 20170 }, { "epoch": 0.5412999141262345, "grad_norm": 0.298828125, "learning_rate": 0.001096678375374453, "loss": 1.8539, "step": 20171 }, { "epoch": 0.5413267496779733, "grad_norm": 0.306640625, "learning_rate": 0.0010966626087235876, "loss": 1.8495, "step": 20172 }, { "epoch": 0.5413535852297123, "grad_norm": 0.30859375, "learning_rate": 0.0010966468409831904, "loss": 1.9294, "step": 20173 }, { "epoch": 0.5413804207814513, "grad_norm": 0.3046875, "learning_rate": 0.0010966310721532964, "loss": 1.9278, "step": 20174 }, { "epoch": 0.5414072563331902, "grad_norm": 0.296875, "learning_rate": 0.0010966153022339397, "loss": 1.8225, "step": 20175 }, { "epoch": 0.5414340918849292, "grad_norm": 0.3046875, "learning_rate": 0.0010965995312251552, "loss": 1.8202, "step": 20176 }, { "epoch": 0.5414609274366681, "grad_norm": 0.302734375, "learning_rate": 0.0010965837591269776, "loss": 1.8872, "step": 20177 }, { "epoch": 0.5414877629884071, "grad_norm": 0.30078125, "learning_rate": 0.0010965679859394413, "loss": 1.9249, "step": 20178 }, { "epoch": 0.5415145985401459, "grad_norm": 0.30078125, "learning_rate": 0.001096552211662581, "loss": 1.9198, "step": 20179 }, { "epoch": 0.5415414340918849, "grad_norm": 0.302734375, "learning_rate": 0.001096536436296431, "loss": 1.8877, "step": 20180 }, { "epoch": 0.5415682696436239, "grad_norm": 0.310546875, "learning_rate": 0.0010965206598410261, "loss": 1.908, "step": 20181 }, { "epoch": 0.5415951051953628, "grad_norm": 0.298828125, "learning_rate": 0.0010965048822964012, "loss": 1.8854, "step": 20182 }, { "epoch": 0.5416219407471018, "grad_norm": 0.294921875, "learning_rate": 0.0010964891036625906, "loss": 1.855, "step": 20183 }, { "epoch": 0.5416487762988407, "grad_norm": 0.310546875, "learning_rate": 0.0010964733239396287, "loss": 1.8826, "step": 20184 }, { "epoch": 0.5416756118505797, "grad_norm": 0.365234375, "learning_rate": 0.0010964575431275509, "loss": 2.1409, "step": 20185 }, { "epoch": 0.5417024474023185, "grad_norm": 0.341796875, "learning_rate": 0.0010964417612263908, "loss": 2.0681, "step": 20186 }, { "epoch": 0.5417292829540575, "grad_norm": 0.330078125, "learning_rate": 0.0010964259782361837, "loss": 2.0492, "step": 20187 }, { "epoch": 0.5417561185057965, "grad_norm": 0.3203125, "learning_rate": 0.001096410194156964, "loss": 2.0601, "step": 20188 }, { "epoch": 0.5417829540575354, "grad_norm": 0.326171875, "learning_rate": 0.0010963944089887666, "loss": 2.0839, "step": 20189 }, { "epoch": 0.5418097896092744, "grad_norm": 0.326171875, "learning_rate": 0.0010963786227316259, "loss": 2.0482, "step": 20190 }, { "epoch": 0.5418366251610133, "grad_norm": 0.30859375, "learning_rate": 0.0010963628353855765, "loss": 2.095, "step": 20191 }, { "epoch": 0.5418634607127523, "grad_norm": 0.298828125, "learning_rate": 0.001096347046950653, "loss": 2.0671, "step": 20192 }, { "epoch": 0.5418902962644911, "grad_norm": 0.298828125, "learning_rate": 0.00109633125742689, "loss": 2.0246, "step": 20193 }, { "epoch": 0.5419171318162301, "grad_norm": 0.3125, "learning_rate": 0.0010963154668143223, "loss": 2.1904, "step": 20194 }, { "epoch": 0.5419439673679691, "grad_norm": 0.28515625, "learning_rate": 0.0010962996751129846, "loss": 2.0147, "step": 20195 }, { "epoch": 0.541970802919708, "grad_norm": 0.296875, "learning_rate": 0.0010962838823229113, "loss": 2.0177, "step": 20196 }, { "epoch": 0.541997638471447, "grad_norm": 0.302734375, "learning_rate": 0.0010962680884441375, "loss": 1.9869, "step": 20197 }, { "epoch": 0.5420244740231859, "grad_norm": 0.291015625, "learning_rate": 0.001096252293476697, "loss": 1.9642, "step": 20198 }, { "epoch": 0.5420513095749249, "grad_norm": 0.306640625, "learning_rate": 0.0010962364974206253, "loss": 2.0422, "step": 20199 }, { "epoch": 0.5420781451266639, "grad_norm": 0.30859375, "learning_rate": 0.0010962207002759568, "loss": 2.0569, "step": 20200 }, { "epoch": 0.5421049806784027, "grad_norm": 0.294921875, "learning_rate": 0.0010962049020427258, "loss": 1.9129, "step": 20201 }, { "epoch": 0.5421318162301417, "grad_norm": 0.302734375, "learning_rate": 0.0010961891027209674, "loss": 2.0646, "step": 20202 }, { "epoch": 0.5421586517818806, "grad_norm": 0.302734375, "learning_rate": 0.001096173302310716, "loss": 2.0399, "step": 20203 }, { "epoch": 0.5421854873336196, "grad_norm": 0.291015625, "learning_rate": 0.0010961575008120062, "loss": 2.0095, "step": 20204 }, { "epoch": 0.5422123228853585, "grad_norm": 0.294921875, "learning_rate": 0.0010961416982248732, "loss": 2.0197, "step": 20205 }, { "epoch": 0.5422391584370975, "grad_norm": 0.2890625, "learning_rate": 0.0010961258945493511, "loss": 2.0024, "step": 20206 }, { "epoch": 0.5422659939888365, "grad_norm": 0.291015625, "learning_rate": 0.0010961100897854746, "loss": 2.0554, "step": 20207 }, { "epoch": 0.5422928295405753, "grad_norm": 0.294921875, "learning_rate": 0.0010960942839332787, "loss": 1.9825, "step": 20208 }, { "epoch": 0.5423196650923143, "grad_norm": 0.296875, "learning_rate": 0.001096078476992798, "loss": 2.0, "step": 20209 }, { "epoch": 0.5423465006440532, "grad_norm": 0.294921875, "learning_rate": 0.001096062668964067, "loss": 2.0256, "step": 20210 }, { "epoch": 0.5423733361957922, "grad_norm": 0.294921875, "learning_rate": 0.0010960468598471202, "loss": 2.0431, "step": 20211 }, { "epoch": 0.5424001717475311, "grad_norm": 0.291015625, "learning_rate": 0.0010960310496419927, "loss": 1.9453, "step": 20212 }, { "epoch": 0.5424270072992701, "grad_norm": 0.2890625, "learning_rate": 0.0010960152383487191, "loss": 1.9052, "step": 20213 }, { "epoch": 0.5424538428510091, "grad_norm": 0.283203125, "learning_rate": 0.0010959994259673337, "loss": 1.9003, "step": 20214 }, { "epoch": 0.5424806784027479, "grad_norm": 0.294921875, "learning_rate": 0.001095983612497872, "loss": 1.9859, "step": 20215 }, { "epoch": 0.5425075139544869, "grad_norm": 0.298828125, "learning_rate": 0.0010959677979403676, "loss": 1.9492, "step": 20216 }, { "epoch": 0.5425343495062258, "grad_norm": 0.2890625, "learning_rate": 0.001095951982294856, "loss": 1.9436, "step": 20217 }, { "epoch": 0.5425611850579648, "grad_norm": 0.296875, "learning_rate": 0.0010959361655613717, "loss": 1.9363, "step": 20218 }, { "epoch": 0.5425880206097037, "grad_norm": 0.283203125, "learning_rate": 0.0010959203477399495, "loss": 1.8687, "step": 20219 }, { "epoch": 0.5426148561614427, "grad_norm": 0.298828125, "learning_rate": 0.0010959045288306236, "loss": 1.9984, "step": 20220 }, { "epoch": 0.5426416917131817, "grad_norm": 0.2890625, "learning_rate": 0.0010958887088334294, "loss": 1.9629, "step": 20221 }, { "epoch": 0.5426685272649205, "grad_norm": 0.29296875, "learning_rate": 0.001095872887748401, "loss": 1.9381, "step": 20222 }, { "epoch": 0.5426953628166595, "grad_norm": 0.291015625, "learning_rate": 0.0010958570655755736, "loss": 1.9268, "step": 20223 }, { "epoch": 0.5427221983683984, "grad_norm": 0.29296875, "learning_rate": 0.0010958412423149816, "loss": 1.9331, "step": 20224 }, { "epoch": 0.5427490339201374, "grad_norm": 0.29296875, "learning_rate": 0.00109582541796666, "loss": 1.9212, "step": 20225 }, { "epoch": 0.5427758694718764, "grad_norm": 0.30078125, "learning_rate": 0.001095809592530643, "loss": 1.9339, "step": 20226 }, { "epoch": 0.5428027050236153, "grad_norm": 0.310546875, "learning_rate": 0.0010957937660069656, "loss": 2.1039, "step": 20227 }, { "epoch": 0.5428295405753543, "grad_norm": 0.291015625, "learning_rate": 0.0010957779383956626, "loss": 1.8482, "step": 20228 }, { "epoch": 0.5428563761270931, "grad_norm": 0.29296875, "learning_rate": 0.0010957621096967687, "loss": 1.9718, "step": 20229 }, { "epoch": 0.5428832116788321, "grad_norm": 0.294921875, "learning_rate": 0.0010957462799103188, "loss": 2.0261, "step": 20230 }, { "epoch": 0.542910047230571, "grad_norm": 0.296875, "learning_rate": 0.001095730449036347, "loss": 1.9462, "step": 20231 }, { "epoch": 0.54293688278231, "grad_norm": 0.291015625, "learning_rate": 0.0010957146170748888, "loss": 1.9363, "step": 20232 }, { "epoch": 0.542963718334049, "grad_norm": 0.287109375, "learning_rate": 0.0010956987840259785, "loss": 1.9183, "step": 20233 }, { "epoch": 0.5429905538857879, "grad_norm": 0.29296875, "learning_rate": 0.0010956829498896508, "loss": 1.8527, "step": 20234 }, { "epoch": 0.5430173894375269, "grad_norm": 0.30078125, "learning_rate": 0.0010956671146659404, "loss": 1.9174, "step": 20235 }, { "epoch": 0.5430442249892657, "grad_norm": 0.30859375, "learning_rate": 0.0010956512783548825, "loss": 2.071, "step": 20236 }, { "epoch": 0.5430710605410047, "grad_norm": 0.287109375, "learning_rate": 0.0010956354409565113, "loss": 1.9542, "step": 20237 }, { "epoch": 0.5430978960927436, "grad_norm": 0.302734375, "learning_rate": 0.001095619602470862, "loss": 2.0068, "step": 20238 }, { "epoch": 0.5431247316444826, "grad_norm": 0.298828125, "learning_rate": 0.0010956037628979689, "loss": 1.9106, "step": 20239 }, { "epoch": 0.5431515671962216, "grad_norm": 0.298828125, "learning_rate": 0.0010955879222378669, "loss": 1.9712, "step": 20240 }, { "epoch": 0.5431784027479605, "grad_norm": 0.30078125, "learning_rate": 0.001095572080490591, "loss": 1.9981, "step": 20241 }, { "epoch": 0.5432052382996995, "grad_norm": 0.2890625, "learning_rate": 0.0010955562376561757, "loss": 1.87, "step": 20242 }, { "epoch": 0.5432320738514383, "grad_norm": 0.30078125, "learning_rate": 0.0010955403937346557, "loss": 2.0555, "step": 20243 }, { "epoch": 0.5432589094031773, "grad_norm": 0.291015625, "learning_rate": 0.001095524548726066, "loss": 1.8937, "step": 20244 }, { "epoch": 0.5432857449549163, "grad_norm": 0.296875, "learning_rate": 0.001095508702630441, "loss": 1.9947, "step": 20245 }, { "epoch": 0.5433125805066552, "grad_norm": 0.296875, "learning_rate": 0.0010954928554478159, "loss": 1.8691, "step": 20246 }, { "epoch": 0.5433394160583942, "grad_norm": 0.29296875, "learning_rate": 0.0010954770071782251, "loss": 1.8952, "step": 20247 }, { "epoch": 0.5433662516101331, "grad_norm": 0.294921875, "learning_rate": 0.0010954611578217037, "loss": 1.9415, "step": 20248 }, { "epoch": 0.5433930871618721, "grad_norm": 0.294921875, "learning_rate": 0.0010954453073782862, "loss": 1.8978, "step": 20249 }, { "epoch": 0.543419922713611, "grad_norm": 0.298828125, "learning_rate": 0.0010954294558480073, "loss": 1.9651, "step": 20250 }, { "epoch": 0.5434467582653499, "grad_norm": 0.291015625, "learning_rate": 0.0010954136032309022, "loss": 1.8414, "step": 20251 }, { "epoch": 0.5434735938170889, "grad_norm": 0.291015625, "learning_rate": 0.0010953977495270053, "loss": 1.8511, "step": 20252 }, { "epoch": 0.5435004293688278, "grad_norm": 0.294921875, "learning_rate": 0.0010953818947363513, "loss": 1.9174, "step": 20253 }, { "epoch": 0.5435272649205668, "grad_norm": 0.296875, "learning_rate": 0.0010953660388589755, "loss": 1.8604, "step": 20254 }, { "epoch": 0.5435541004723057, "grad_norm": 0.306640625, "learning_rate": 0.001095350181894912, "loss": 2.02, "step": 20255 }, { "epoch": 0.5435809360240447, "grad_norm": 0.3046875, "learning_rate": 0.0010953343238441964, "loss": 2.0339, "step": 20256 }, { "epoch": 0.5436077715757835, "grad_norm": 0.30078125, "learning_rate": 0.0010953184647068627, "loss": 1.9531, "step": 20257 }, { "epoch": 0.5436346071275225, "grad_norm": 0.302734375, "learning_rate": 0.001095302604482946, "loss": 1.9417, "step": 20258 }, { "epoch": 0.5436614426792615, "grad_norm": 0.294921875, "learning_rate": 0.0010952867431724812, "loss": 1.865, "step": 20259 }, { "epoch": 0.5436882782310004, "grad_norm": 0.298828125, "learning_rate": 0.001095270880775503, "loss": 1.8658, "step": 20260 }, { "epoch": 0.5437151137827394, "grad_norm": 0.298828125, "learning_rate": 0.001095255017292046, "loss": 1.9382, "step": 20261 }, { "epoch": 0.5437419493344783, "grad_norm": 0.296875, "learning_rate": 0.0010952391527221455, "loss": 1.9283, "step": 20262 }, { "epoch": 0.5437687848862173, "grad_norm": 0.3046875, "learning_rate": 0.001095223287065836, "loss": 1.9759, "step": 20263 }, { "epoch": 0.5437956204379562, "grad_norm": 0.314453125, "learning_rate": 0.001095207420323152, "loss": 2.0313, "step": 20264 }, { "epoch": 0.5438224559896951, "grad_norm": 0.294921875, "learning_rate": 0.001095191552494129, "loss": 1.9579, "step": 20265 }, { "epoch": 0.5438492915414341, "grad_norm": 0.296875, "learning_rate": 0.001095175683578801, "loss": 1.8152, "step": 20266 }, { "epoch": 0.543876127093173, "grad_norm": 0.30078125, "learning_rate": 0.0010951598135772035, "loss": 1.9214, "step": 20267 }, { "epoch": 0.543902962644912, "grad_norm": 0.296875, "learning_rate": 0.0010951439424893709, "loss": 1.9371, "step": 20268 }, { "epoch": 0.5439297981966509, "grad_norm": 0.291015625, "learning_rate": 0.0010951280703153383, "loss": 1.8301, "step": 20269 }, { "epoch": 0.5439566337483899, "grad_norm": 0.34765625, "learning_rate": 0.0010951121970551405, "loss": 1.9163, "step": 20270 }, { "epoch": 0.5439834693001289, "grad_norm": 0.302734375, "learning_rate": 0.001095096322708812, "loss": 1.9272, "step": 20271 }, { "epoch": 0.5440103048518677, "grad_norm": 0.3046875, "learning_rate": 0.0010950804472763878, "loss": 1.9508, "step": 20272 }, { "epoch": 0.5440371404036067, "grad_norm": 0.29296875, "learning_rate": 0.0010950645707579027, "loss": 1.7906, "step": 20273 }, { "epoch": 0.5440639759553456, "grad_norm": 0.30859375, "learning_rate": 0.0010950486931533918, "loss": 1.9291, "step": 20274 }, { "epoch": 0.5440908115070846, "grad_norm": 0.298828125, "learning_rate": 0.0010950328144628896, "loss": 1.8722, "step": 20275 }, { "epoch": 0.5441176470588235, "grad_norm": 0.3125, "learning_rate": 0.001095016934686431, "loss": 1.9909, "step": 20276 }, { "epoch": 0.5441444826105625, "grad_norm": 0.30859375, "learning_rate": 0.0010950010538240509, "loss": 1.9094, "step": 20277 }, { "epoch": 0.5441713181623015, "grad_norm": 0.30078125, "learning_rate": 0.0010949851718757844, "loss": 1.8434, "step": 20278 }, { "epoch": 0.5441981537140403, "grad_norm": 0.3046875, "learning_rate": 0.0010949692888416658, "loss": 1.8983, "step": 20279 }, { "epoch": 0.5442249892657793, "grad_norm": 0.314453125, "learning_rate": 0.0010949534047217302, "loss": 1.968, "step": 20280 }, { "epoch": 0.5442518248175182, "grad_norm": 0.29296875, "learning_rate": 0.0010949375195160123, "loss": 1.8142, "step": 20281 }, { "epoch": 0.5442786603692572, "grad_norm": 0.306640625, "learning_rate": 0.0010949216332245475, "loss": 1.8803, "step": 20282 }, { "epoch": 0.5443054959209961, "grad_norm": 0.302734375, "learning_rate": 0.0010949057458473697, "loss": 1.8342, "step": 20283 }, { "epoch": 0.5443323314727351, "grad_norm": 0.30859375, "learning_rate": 0.0010948898573845149, "loss": 1.929, "step": 20284 }, { "epoch": 0.5443591670244741, "grad_norm": 0.302734375, "learning_rate": 0.001094873967836017, "loss": 1.9101, "step": 20285 }, { "epoch": 0.544386002576213, "grad_norm": 0.306640625, "learning_rate": 0.001094858077201911, "loss": 1.8517, "step": 20286 }, { "epoch": 0.5444128381279519, "grad_norm": 0.30859375, "learning_rate": 0.0010948421854822324, "loss": 1.8721, "step": 20287 }, { "epoch": 0.5444396736796908, "grad_norm": 0.314453125, "learning_rate": 0.0010948262926770154, "loss": 1.8704, "step": 20288 }, { "epoch": 0.5444665092314298, "grad_norm": 0.30859375, "learning_rate": 0.0010948103987862953, "loss": 1.8488, "step": 20289 }, { "epoch": 0.5444933447831687, "grad_norm": 0.30078125, "learning_rate": 0.0010947945038101065, "loss": 1.8318, "step": 20290 }, { "epoch": 0.5445201803349077, "grad_norm": 0.30078125, "learning_rate": 0.0010947786077484842, "loss": 1.8908, "step": 20291 }, { "epoch": 0.5445470158866467, "grad_norm": 0.294921875, "learning_rate": 0.0010947627106014633, "loss": 1.8236, "step": 20292 }, { "epoch": 0.5445738514383855, "grad_norm": 0.34765625, "learning_rate": 0.0010947468123690786, "loss": 2.0505, "step": 20293 }, { "epoch": 0.5446006869901245, "grad_norm": 0.38671875, "learning_rate": 0.0010947309130513647, "loss": 2.2546, "step": 20294 }, { "epoch": 0.5446275225418634, "grad_norm": 0.33984375, "learning_rate": 0.001094715012648357, "loss": 2.1, "step": 20295 }, { "epoch": 0.5446543580936024, "grad_norm": 0.31640625, "learning_rate": 0.00109469911116009, "loss": 2.0526, "step": 20296 }, { "epoch": 0.5446811936453414, "grad_norm": 0.318359375, "learning_rate": 0.0010946832085865987, "loss": 2.0502, "step": 20297 }, { "epoch": 0.5447080291970803, "grad_norm": 0.330078125, "learning_rate": 0.0010946673049279179, "loss": 1.9824, "step": 20298 }, { "epoch": 0.5447348647488193, "grad_norm": 0.3203125, "learning_rate": 0.0010946514001840828, "loss": 2.0573, "step": 20299 }, { "epoch": 0.5447617003005581, "grad_norm": 0.3125, "learning_rate": 0.0010946354943551278, "loss": 2.085, "step": 20300 }, { "epoch": 0.5447885358522971, "grad_norm": 0.306640625, "learning_rate": 0.0010946195874410884, "loss": 2.0576, "step": 20301 }, { "epoch": 0.544815371404036, "grad_norm": 0.310546875, "learning_rate": 0.0010946036794419987, "loss": 2.0964, "step": 20302 }, { "epoch": 0.544842206955775, "grad_norm": 0.302734375, "learning_rate": 0.0010945877703578944, "loss": 2.1462, "step": 20303 }, { "epoch": 0.544869042507514, "grad_norm": 0.302734375, "learning_rate": 0.0010945718601888099, "loss": 1.9851, "step": 20304 }, { "epoch": 0.5448958780592529, "grad_norm": 0.306640625, "learning_rate": 0.00109455594893478, "loss": 2.1306, "step": 20305 }, { "epoch": 0.5449227136109919, "grad_norm": 0.294921875, "learning_rate": 0.0010945400365958403, "loss": 2.0178, "step": 20306 }, { "epoch": 0.5449495491627308, "grad_norm": 0.28515625, "learning_rate": 0.001094524123172025, "loss": 1.9745, "step": 20307 }, { "epoch": 0.5449763847144697, "grad_norm": 0.2890625, "learning_rate": 0.0010945082086633692, "loss": 1.9192, "step": 20308 }, { "epoch": 0.5450032202662086, "grad_norm": 0.302734375, "learning_rate": 0.001094492293069908, "loss": 2.0662, "step": 20309 }, { "epoch": 0.5450300558179476, "grad_norm": 0.3046875, "learning_rate": 0.0010944763763916761, "loss": 2.0277, "step": 20310 }, { "epoch": 0.5450568913696866, "grad_norm": 0.294921875, "learning_rate": 0.0010944604586287086, "loss": 1.9737, "step": 20311 }, { "epoch": 0.5450837269214255, "grad_norm": 0.296875, "learning_rate": 0.0010944445397810402, "loss": 1.951, "step": 20312 }, { "epoch": 0.5451105624731645, "grad_norm": 0.29296875, "learning_rate": 0.0010944286198487062, "loss": 1.8795, "step": 20313 }, { "epoch": 0.5451373980249034, "grad_norm": 0.2890625, "learning_rate": 0.001094412698831741, "loss": 1.9639, "step": 20314 }, { "epoch": 0.5451642335766423, "grad_norm": 0.296875, "learning_rate": 0.00109439677673018, "loss": 1.9619, "step": 20315 }, { "epoch": 0.5451910691283813, "grad_norm": 0.30078125, "learning_rate": 0.0010943808535440577, "loss": 2.053, "step": 20316 }, { "epoch": 0.5452179046801202, "grad_norm": 0.302734375, "learning_rate": 0.0010943649292734095, "loss": 2.0083, "step": 20317 }, { "epoch": 0.5452447402318592, "grad_norm": 0.296875, "learning_rate": 0.0010943490039182699, "loss": 2.0724, "step": 20318 }, { "epoch": 0.5452715757835981, "grad_norm": 0.296875, "learning_rate": 0.0010943330774786742, "loss": 1.9711, "step": 20319 }, { "epoch": 0.5452984113353371, "grad_norm": 0.294921875, "learning_rate": 0.0010943171499546572, "loss": 1.963, "step": 20320 }, { "epoch": 0.545325246887076, "grad_norm": 0.291015625, "learning_rate": 0.0010943012213462536, "loss": 1.9679, "step": 20321 }, { "epoch": 0.545352082438815, "grad_norm": 0.30859375, "learning_rate": 0.0010942852916534986, "loss": 2.0936, "step": 20322 }, { "epoch": 0.5453789179905539, "grad_norm": 0.3046875, "learning_rate": 0.001094269360876427, "loss": 2.0339, "step": 20323 }, { "epoch": 0.5454057535422928, "grad_norm": 0.306640625, "learning_rate": 0.001094253429015074, "loss": 1.9984, "step": 20324 }, { "epoch": 0.5454325890940318, "grad_norm": 0.30078125, "learning_rate": 0.0010942374960694744, "loss": 2.0496, "step": 20325 }, { "epoch": 0.5454594246457707, "grad_norm": 0.298828125, "learning_rate": 0.0010942215620396631, "loss": 1.905, "step": 20326 }, { "epoch": 0.5454862601975097, "grad_norm": 0.29296875, "learning_rate": 0.001094205626925675, "loss": 1.9258, "step": 20327 }, { "epoch": 0.5455130957492486, "grad_norm": 0.294921875, "learning_rate": 0.0010941896907275452, "loss": 1.8945, "step": 20328 }, { "epoch": 0.5455399313009875, "grad_norm": 0.3046875, "learning_rate": 0.0010941737534453087, "loss": 2.0317, "step": 20329 }, { "epoch": 0.5455667668527265, "grad_norm": 0.296875, "learning_rate": 0.0010941578150790003, "loss": 1.9062, "step": 20330 }, { "epoch": 0.5455936024044654, "grad_norm": 0.298828125, "learning_rate": 0.001094141875628655, "loss": 2.0285, "step": 20331 }, { "epoch": 0.5456204379562044, "grad_norm": 0.294921875, "learning_rate": 0.0010941259350943078, "loss": 2.0362, "step": 20332 }, { "epoch": 0.5456472735079433, "grad_norm": 0.287109375, "learning_rate": 0.001094109993475994, "loss": 1.9835, "step": 20333 }, { "epoch": 0.5456741090596823, "grad_norm": 0.294921875, "learning_rate": 0.0010940940507737477, "loss": 1.9488, "step": 20334 }, { "epoch": 0.5457009446114212, "grad_norm": 0.294921875, "learning_rate": 0.0010940781069876048, "loss": 1.975, "step": 20335 }, { "epoch": 0.5457277801631601, "grad_norm": 0.2890625, "learning_rate": 0.0010940621621176, "loss": 1.9474, "step": 20336 }, { "epoch": 0.5457546157148991, "grad_norm": 0.298828125, "learning_rate": 0.0010940462161637677, "loss": 1.9653, "step": 20337 }, { "epoch": 0.545781451266638, "grad_norm": 0.298828125, "learning_rate": 0.0010940302691261436, "loss": 1.9717, "step": 20338 }, { "epoch": 0.545808286818377, "grad_norm": 0.2890625, "learning_rate": 0.0010940143210047624, "loss": 1.8927, "step": 20339 }, { "epoch": 0.5458351223701159, "grad_norm": 0.314453125, "learning_rate": 0.001093998371799659, "loss": 2.0731, "step": 20340 }, { "epoch": 0.5458619579218549, "grad_norm": 0.294921875, "learning_rate": 0.0010939824215108688, "loss": 1.8939, "step": 20341 }, { "epoch": 0.5458887934735939, "grad_norm": 0.294921875, "learning_rate": 0.0010939664701384265, "loss": 1.9443, "step": 20342 }, { "epoch": 0.5459156290253327, "grad_norm": 0.302734375, "learning_rate": 0.0010939505176823668, "loss": 1.9895, "step": 20343 }, { "epoch": 0.5459424645770717, "grad_norm": 0.30078125, "learning_rate": 0.0010939345641427253, "loss": 2.0221, "step": 20344 }, { "epoch": 0.5459693001288106, "grad_norm": 0.30078125, "learning_rate": 0.0010939186095195366, "loss": 1.9555, "step": 20345 }, { "epoch": 0.5459961356805496, "grad_norm": 0.28125, "learning_rate": 0.0010939026538128356, "loss": 1.8882, "step": 20346 }, { "epoch": 0.5460229712322885, "grad_norm": 0.298828125, "learning_rate": 0.0010938866970226576, "loss": 1.8901, "step": 20347 }, { "epoch": 0.5460498067840275, "grad_norm": 0.291015625, "learning_rate": 0.0010938707391490376, "loss": 1.9004, "step": 20348 }, { "epoch": 0.5460766423357665, "grad_norm": 0.29296875, "learning_rate": 0.0010938547801920104, "loss": 2.0115, "step": 20349 }, { "epoch": 0.5461034778875054, "grad_norm": 0.30078125, "learning_rate": 0.001093838820151611, "loss": 1.956, "step": 20350 }, { "epoch": 0.5461303134392443, "grad_norm": 0.294921875, "learning_rate": 0.0010938228590278746, "loss": 1.8655, "step": 20351 }, { "epoch": 0.5461571489909832, "grad_norm": 0.296875, "learning_rate": 0.0010938068968208362, "loss": 2.0134, "step": 20352 }, { "epoch": 0.5461839845427222, "grad_norm": 0.30078125, "learning_rate": 0.0010937909335305305, "loss": 1.9185, "step": 20353 }, { "epoch": 0.5462108200944611, "grad_norm": 0.30078125, "learning_rate": 0.001093774969156993, "loss": 1.954, "step": 20354 }, { "epoch": 0.5462376556462001, "grad_norm": 0.298828125, "learning_rate": 0.0010937590037002586, "loss": 1.9332, "step": 20355 }, { "epoch": 0.5462644911979391, "grad_norm": 0.291015625, "learning_rate": 0.0010937430371603621, "loss": 1.8641, "step": 20356 }, { "epoch": 0.546291326749678, "grad_norm": 0.287109375, "learning_rate": 0.0010937270695373384, "loss": 1.9343, "step": 20357 }, { "epoch": 0.5463181623014169, "grad_norm": 0.29296875, "learning_rate": 0.001093711100831223, "loss": 2.0004, "step": 20358 }, { "epoch": 0.5463449978531558, "grad_norm": 0.291015625, "learning_rate": 0.0010936951310420507, "loss": 1.9405, "step": 20359 }, { "epoch": 0.5463718334048948, "grad_norm": 0.3046875, "learning_rate": 0.0010936791601698565, "loss": 1.9419, "step": 20360 }, { "epoch": 0.5463986689566337, "grad_norm": 0.294921875, "learning_rate": 0.0010936631882146755, "loss": 1.9057, "step": 20361 }, { "epoch": 0.5464255045083727, "grad_norm": 0.294921875, "learning_rate": 0.0010936472151765427, "loss": 1.8965, "step": 20362 }, { "epoch": 0.5464523400601117, "grad_norm": 0.2890625, "learning_rate": 0.001093631241055493, "loss": 1.9007, "step": 20363 }, { "epoch": 0.5464791756118506, "grad_norm": 0.287109375, "learning_rate": 0.0010936152658515617, "loss": 1.8115, "step": 20364 }, { "epoch": 0.5465060111635895, "grad_norm": 0.306640625, "learning_rate": 0.0010935992895647836, "loss": 1.9017, "step": 20365 }, { "epoch": 0.5465328467153284, "grad_norm": 0.291015625, "learning_rate": 0.0010935833121951941, "loss": 1.9308, "step": 20366 }, { "epoch": 0.5465596822670674, "grad_norm": 0.30078125, "learning_rate": 0.0010935673337428278, "loss": 1.9124, "step": 20367 }, { "epoch": 0.5465865178188064, "grad_norm": 0.298828125, "learning_rate": 0.0010935513542077201, "loss": 1.8591, "step": 20368 }, { "epoch": 0.5466133533705453, "grad_norm": 0.28515625, "learning_rate": 0.001093535373589906, "loss": 1.7966, "step": 20369 }, { "epoch": 0.5466401889222843, "grad_norm": 0.296875, "learning_rate": 0.0010935193918894205, "loss": 1.8747, "step": 20370 }, { "epoch": 0.5466670244740232, "grad_norm": 0.294921875, "learning_rate": 0.0010935034091062985, "loss": 1.8178, "step": 20371 }, { "epoch": 0.5466938600257621, "grad_norm": 0.298828125, "learning_rate": 0.0010934874252405754, "loss": 1.8662, "step": 20372 }, { "epoch": 0.546720695577501, "grad_norm": 0.30078125, "learning_rate": 0.0010934714402922859, "loss": 1.8815, "step": 20373 }, { "epoch": 0.54674753112924, "grad_norm": 0.3046875, "learning_rate": 0.0010934554542614654, "loss": 1.9135, "step": 20374 }, { "epoch": 0.546774366680979, "grad_norm": 0.294921875, "learning_rate": 0.0010934394671481486, "loss": 1.8099, "step": 20375 }, { "epoch": 0.5468012022327179, "grad_norm": 0.294921875, "learning_rate": 0.001093423478952371, "loss": 1.9091, "step": 20376 }, { "epoch": 0.5468280377844569, "grad_norm": 0.30859375, "learning_rate": 0.0010934074896741671, "loss": 1.991, "step": 20377 }, { "epoch": 0.5468548733361958, "grad_norm": 0.3125, "learning_rate": 0.0010933914993135727, "loss": 1.9918, "step": 20378 }, { "epoch": 0.5468817088879347, "grad_norm": 0.294921875, "learning_rate": 0.0010933755078706225, "loss": 1.8911, "step": 20379 }, { "epoch": 0.5469085444396736, "grad_norm": 0.298828125, "learning_rate": 0.0010933595153453516, "loss": 1.9611, "step": 20380 }, { "epoch": 0.5469353799914126, "grad_norm": 0.291015625, "learning_rate": 0.0010933435217377948, "loss": 1.8053, "step": 20381 }, { "epoch": 0.5469622155431516, "grad_norm": 0.287109375, "learning_rate": 0.0010933275270479876, "loss": 1.8645, "step": 20382 }, { "epoch": 0.5469890510948905, "grad_norm": 0.3046875, "learning_rate": 0.0010933115312759649, "loss": 1.8936, "step": 20383 }, { "epoch": 0.5470158866466295, "grad_norm": 0.29296875, "learning_rate": 0.001093295534421762, "loss": 1.7999, "step": 20384 }, { "epoch": 0.5470427221983684, "grad_norm": 0.310546875, "learning_rate": 0.0010932795364854136, "loss": 1.967, "step": 20385 }, { "epoch": 0.5470695577501073, "grad_norm": 0.30078125, "learning_rate": 0.0010932635374669552, "loss": 1.8812, "step": 20386 }, { "epoch": 0.5470963933018463, "grad_norm": 0.30859375, "learning_rate": 0.0010932475373664216, "loss": 1.8899, "step": 20387 }, { "epoch": 0.5471232288535852, "grad_norm": 0.30078125, "learning_rate": 0.001093231536183848, "loss": 1.8782, "step": 20388 }, { "epoch": 0.5471500644053242, "grad_norm": 0.302734375, "learning_rate": 0.0010932155339192697, "loss": 1.9056, "step": 20389 }, { "epoch": 0.5471768999570631, "grad_norm": 0.30859375, "learning_rate": 0.0010931995305727216, "loss": 1.8387, "step": 20390 }, { "epoch": 0.5472037355088021, "grad_norm": 0.3046875, "learning_rate": 0.0010931835261442386, "loss": 1.8662, "step": 20391 }, { "epoch": 0.547230571060541, "grad_norm": 0.31640625, "learning_rate": 0.0010931675206338563, "loss": 2.0587, "step": 20392 }, { "epoch": 0.54725740661228, "grad_norm": 0.306640625, "learning_rate": 0.0010931515140416093, "loss": 1.879, "step": 20393 }, { "epoch": 0.5472842421640189, "grad_norm": 0.30078125, "learning_rate": 0.0010931355063675333, "loss": 1.8322, "step": 20394 }, { "epoch": 0.5473110777157578, "grad_norm": 0.2890625, "learning_rate": 0.0010931194976116628, "loss": 1.7868, "step": 20395 }, { "epoch": 0.5473379132674968, "grad_norm": 0.322265625, "learning_rate": 0.0010931034877740332, "loss": 2.0017, "step": 20396 }, { "epoch": 0.5473647488192357, "grad_norm": 0.294921875, "learning_rate": 0.0010930874768546798, "loss": 1.8609, "step": 20397 }, { "epoch": 0.5473915843709747, "grad_norm": 0.3125, "learning_rate": 0.0010930714648536375, "loss": 1.8952, "step": 20398 }, { "epoch": 0.5474184199227136, "grad_norm": 0.29296875, "learning_rate": 0.0010930554517709413, "loss": 1.8196, "step": 20399 }, { "epoch": 0.5474452554744526, "grad_norm": 0.3359375, "learning_rate": 0.0010930394376066268, "loss": 1.9156, "step": 20400 }, { "epoch": 0.5474720910261915, "grad_norm": 0.37109375, "learning_rate": 0.0010930234223607285, "loss": 2.1149, "step": 20401 }, { "epoch": 0.5474989265779304, "grad_norm": 0.357421875, "learning_rate": 0.001093007406033282, "loss": 2.1192, "step": 20402 }, { "epoch": 0.5475257621296694, "grad_norm": 0.3203125, "learning_rate": 0.0010929913886243224, "loss": 2.0005, "step": 20403 }, { "epoch": 0.5475525976814083, "grad_norm": 0.333984375, "learning_rate": 0.0010929753701338846, "loss": 2.1177, "step": 20404 }, { "epoch": 0.5475794332331473, "grad_norm": 0.326171875, "learning_rate": 0.001092959350562004, "loss": 2.1599, "step": 20405 }, { "epoch": 0.5476062687848862, "grad_norm": 0.3046875, "learning_rate": 0.0010929433299087154, "loss": 1.9494, "step": 20406 }, { "epoch": 0.5476331043366252, "grad_norm": 0.306640625, "learning_rate": 0.0010929273081740542, "loss": 2.0527, "step": 20407 }, { "epoch": 0.5476599398883641, "grad_norm": 0.306640625, "learning_rate": 0.0010929112853580556, "loss": 2.0301, "step": 20408 }, { "epoch": 0.547686775440103, "grad_norm": 0.302734375, "learning_rate": 0.001092895261460755, "loss": 2.0337, "step": 20409 }, { "epoch": 0.547713610991842, "grad_norm": 0.3046875, "learning_rate": 0.0010928792364821866, "loss": 2.0601, "step": 20410 }, { "epoch": 0.5477404465435809, "grad_norm": 0.287109375, "learning_rate": 0.0010928632104223865, "loss": 1.949, "step": 20411 }, { "epoch": 0.5477672820953199, "grad_norm": 0.310546875, "learning_rate": 0.0010928471832813895, "loss": 2.0312, "step": 20412 }, { "epoch": 0.5477941176470589, "grad_norm": 0.30859375, "learning_rate": 0.0010928311550592305, "loss": 2.1441, "step": 20413 }, { "epoch": 0.5478209531987978, "grad_norm": 0.302734375, "learning_rate": 0.0010928151257559454, "loss": 2.0703, "step": 20414 }, { "epoch": 0.5478477887505367, "grad_norm": 0.30078125, "learning_rate": 0.0010927990953715687, "loss": 2.1205, "step": 20415 }, { "epoch": 0.5478746243022756, "grad_norm": 0.30078125, "learning_rate": 0.0010927830639061355, "loss": 2.0836, "step": 20416 }, { "epoch": 0.5479014598540146, "grad_norm": 0.29296875, "learning_rate": 0.0010927670313596815, "loss": 1.9793, "step": 20417 }, { "epoch": 0.5479282954057535, "grad_norm": 0.298828125, "learning_rate": 0.0010927509977322416, "loss": 2.0274, "step": 20418 }, { "epoch": 0.5479551309574925, "grad_norm": 0.298828125, "learning_rate": 0.001092734963023851, "loss": 2.0305, "step": 20419 }, { "epoch": 0.5479819665092315, "grad_norm": 0.29296875, "learning_rate": 0.001092718927234545, "loss": 1.9659, "step": 20420 }, { "epoch": 0.5480088020609704, "grad_norm": 0.3046875, "learning_rate": 0.0010927028903643583, "loss": 2.0198, "step": 20421 }, { "epoch": 0.5480356376127093, "grad_norm": 0.294921875, "learning_rate": 0.0010926868524133266, "loss": 1.9501, "step": 20422 }, { "epoch": 0.5480624731644482, "grad_norm": 0.294921875, "learning_rate": 0.0010926708133814848, "loss": 2.0206, "step": 20423 }, { "epoch": 0.5480893087161872, "grad_norm": 0.294921875, "learning_rate": 0.0010926547732688682, "loss": 2.0287, "step": 20424 }, { "epoch": 0.5481161442679261, "grad_norm": 0.294921875, "learning_rate": 0.0010926387320755121, "loss": 1.9819, "step": 20425 }, { "epoch": 0.5481429798196651, "grad_norm": 0.291015625, "learning_rate": 0.0010926226898014513, "loss": 1.9046, "step": 20426 }, { "epoch": 0.5481698153714041, "grad_norm": 0.2890625, "learning_rate": 0.0010926066464467215, "loss": 2.0617, "step": 20427 }, { "epoch": 0.548196650923143, "grad_norm": 0.291015625, "learning_rate": 0.0010925906020113575, "loss": 2.0353, "step": 20428 }, { "epoch": 0.548223486474882, "grad_norm": 0.291015625, "learning_rate": 0.0010925745564953947, "loss": 1.9735, "step": 20429 }, { "epoch": 0.5482503220266208, "grad_norm": 0.296875, "learning_rate": 0.0010925585098988681, "loss": 1.9606, "step": 20430 }, { "epoch": 0.5482771575783598, "grad_norm": 0.302734375, "learning_rate": 0.0010925424622218133, "loss": 2.0798, "step": 20431 }, { "epoch": 0.5483039931300988, "grad_norm": 0.30078125, "learning_rate": 0.0010925264134642653, "loss": 1.9973, "step": 20432 }, { "epoch": 0.5483308286818377, "grad_norm": 0.306640625, "learning_rate": 0.001092510363626259, "loss": 2.0392, "step": 20433 }, { "epoch": 0.5483576642335767, "grad_norm": 0.287109375, "learning_rate": 0.00109249431270783, "loss": 1.9406, "step": 20434 }, { "epoch": 0.5483844997853156, "grad_norm": 0.28515625, "learning_rate": 0.0010924782607090132, "loss": 1.8874, "step": 20435 }, { "epoch": 0.5484113353370546, "grad_norm": 0.302734375, "learning_rate": 0.001092462207629844, "loss": 2.0483, "step": 20436 }, { "epoch": 0.5484381708887934, "grad_norm": 0.294921875, "learning_rate": 0.001092446153470358, "loss": 2.0342, "step": 20437 }, { "epoch": 0.5484650064405324, "grad_norm": 0.298828125, "learning_rate": 0.0010924300982305896, "loss": 1.9377, "step": 20438 }, { "epoch": 0.5484918419922714, "grad_norm": 0.283203125, "learning_rate": 0.0010924140419105745, "loss": 1.8362, "step": 20439 }, { "epoch": 0.5485186775440103, "grad_norm": 0.29296875, "learning_rate": 0.001092397984510348, "loss": 1.916, "step": 20440 }, { "epoch": 0.5485455130957493, "grad_norm": 0.2890625, "learning_rate": 0.0010923819260299451, "loss": 1.9068, "step": 20441 }, { "epoch": 0.5485723486474882, "grad_norm": 0.29296875, "learning_rate": 0.001092365866469401, "loss": 1.9604, "step": 20442 }, { "epoch": 0.5485991841992272, "grad_norm": 0.287109375, "learning_rate": 0.0010923498058287512, "loss": 1.9173, "step": 20443 }, { "epoch": 0.548626019750966, "grad_norm": 0.291015625, "learning_rate": 0.0010923337441080309, "loss": 1.9139, "step": 20444 }, { "epoch": 0.548652855302705, "grad_norm": 0.30078125, "learning_rate": 0.0010923176813072751, "loss": 1.9766, "step": 20445 }, { "epoch": 0.548679690854444, "grad_norm": 0.29296875, "learning_rate": 0.001092301617426519, "loss": 1.9795, "step": 20446 }, { "epoch": 0.5487065264061829, "grad_norm": 0.291015625, "learning_rate": 0.001092285552465798, "loss": 2.0062, "step": 20447 }, { "epoch": 0.5487333619579219, "grad_norm": 0.28515625, "learning_rate": 0.0010922694864251474, "loss": 1.8874, "step": 20448 }, { "epoch": 0.5487601975096608, "grad_norm": 0.287109375, "learning_rate": 0.0010922534193046026, "loss": 1.949, "step": 20449 }, { "epoch": 0.5487870330613998, "grad_norm": 0.296875, "learning_rate": 0.0010922373511041985, "loss": 1.9341, "step": 20450 }, { "epoch": 0.5488138686131386, "grad_norm": 0.28515625, "learning_rate": 0.0010922212818239702, "loss": 1.9317, "step": 20451 }, { "epoch": 0.5488407041648776, "grad_norm": 0.287109375, "learning_rate": 0.0010922052114639535, "loss": 1.8819, "step": 20452 }, { "epoch": 0.5488675397166166, "grad_norm": 0.2890625, "learning_rate": 0.0010921891400241834, "loss": 1.913, "step": 20453 }, { "epoch": 0.5488943752683555, "grad_norm": 0.29296875, "learning_rate": 0.0010921730675046949, "loss": 1.9109, "step": 20454 }, { "epoch": 0.5489212108200945, "grad_norm": 0.287109375, "learning_rate": 0.0010921569939055236, "loss": 1.9433, "step": 20455 }, { "epoch": 0.5489480463718334, "grad_norm": 0.3046875, "learning_rate": 0.0010921409192267046, "loss": 1.9338, "step": 20456 }, { "epoch": 0.5489748819235724, "grad_norm": 0.29296875, "learning_rate": 0.0010921248434682734, "loss": 1.9648, "step": 20457 }, { "epoch": 0.5490017174753113, "grad_norm": 0.291015625, "learning_rate": 0.0010921087666302648, "loss": 1.8992, "step": 20458 }, { "epoch": 0.5490285530270502, "grad_norm": 0.29296875, "learning_rate": 0.0010920926887127145, "loss": 1.9047, "step": 20459 }, { "epoch": 0.5490553885787892, "grad_norm": 0.302734375, "learning_rate": 0.0010920766097156577, "loss": 2.0051, "step": 20460 }, { "epoch": 0.5490822241305281, "grad_norm": 0.298828125, "learning_rate": 0.0010920605296391296, "loss": 1.9501, "step": 20461 }, { "epoch": 0.5491090596822671, "grad_norm": 0.291015625, "learning_rate": 0.0010920444484831652, "loss": 1.9261, "step": 20462 }, { "epoch": 0.549135895234006, "grad_norm": 0.296875, "learning_rate": 0.0010920283662478004, "loss": 1.9404, "step": 20463 }, { "epoch": 0.549162730785745, "grad_norm": 0.30078125, "learning_rate": 0.0010920122829330698, "loss": 2.0271, "step": 20464 }, { "epoch": 0.549189566337484, "grad_norm": 0.287109375, "learning_rate": 0.001091996198539009, "loss": 1.8748, "step": 20465 }, { "epoch": 0.5492164018892228, "grad_norm": 0.287109375, "learning_rate": 0.0010919801130656536, "loss": 1.8532, "step": 20466 }, { "epoch": 0.5492432374409618, "grad_norm": 0.291015625, "learning_rate": 0.0010919640265130385, "loss": 1.9112, "step": 20467 }, { "epoch": 0.5492700729927007, "grad_norm": 0.310546875, "learning_rate": 0.0010919479388811987, "loss": 1.968, "step": 20468 }, { "epoch": 0.5492969085444397, "grad_norm": 0.298828125, "learning_rate": 0.0010919318501701704, "loss": 1.9344, "step": 20469 }, { "epoch": 0.5493237440961786, "grad_norm": 0.3046875, "learning_rate": 0.0010919157603799879, "loss": 2.0094, "step": 20470 }, { "epoch": 0.5493505796479176, "grad_norm": 0.296875, "learning_rate": 0.001091899669510687, "loss": 1.9707, "step": 20471 }, { "epoch": 0.5493774151996565, "grad_norm": 0.296875, "learning_rate": 0.0010918835775623032, "loss": 1.9078, "step": 20472 }, { "epoch": 0.5494042507513954, "grad_norm": 0.298828125, "learning_rate": 0.0010918674845348713, "loss": 1.9806, "step": 20473 }, { "epoch": 0.5494310863031344, "grad_norm": 0.302734375, "learning_rate": 0.001091851390428427, "loss": 1.8311, "step": 20474 }, { "epoch": 0.5494579218548733, "grad_norm": 0.296875, "learning_rate": 0.0010918352952430054, "loss": 1.926, "step": 20475 }, { "epoch": 0.5494847574066123, "grad_norm": 0.3046875, "learning_rate": 0.0010918191989786419, "loss": 1.9294, "step": 20476 }, { "epoch": 0.5495115929583512, "grad_norm": 0.294921875, "learning_rate": 0.0010918031016353717, "loss": 1.8624, "step": 20477 }, { "epoch": 0.5495384285100902, "grad_norm": 0.294921875, "learning_rate": 0.0010917870032132303, "loss": 1.9213, "step": 20478 }, { "epoch": 0.5495652640618292, "grad_norm": 0.291015625, "learning_rate": 0.0010917709037122528, "loss": 1.8585, "step": 20479 }, { "epoch": 0.549592099613568, "grad_norm": 0.298828125, "learning_rate": 0.0010917548031324747, "loss": 1.9325, "step": 20480 }, { "epoch": 0.549618935165307, "grad_norm": 0.302734375, "learning_rate": 0.0010917387014739311, "loss": 1.9921, "step": 20481 }, { "epoch": 0.5496457707170459, "grad_norm": 0.291015625, "learning_rate": 0.0010917225987366575, "loss": 1.8463, "step": 20482 }, { "epoch": 0.5496726062687849, "grad_norm": 0.28125, "learning_rate": 0.0010917064949206893, "loss": 1.7865, "step": 20483 }, { "epoch": 0.5496994418205239, "grad_norm": 0.306640625, "learning_rate": 0.0010916903900260616, "loss": 1.9517, "step": 20484 }, { "epoch": 0.5497262773722628, "grad_norm": 0.302734375, "learning_rate": 0.0010916742840528099, "loss": 1.9396, "step": 20485 }, { "epoch": 0.5497531129240018, "grad_norm": 0.3203125, "learning_rate": 0.0010916581770009693, "loss": 1.8668, "step": 20486 }, { "epoch": 0.5497799484757406, "grad_norm": 0.298828125, "learning_rate": 0.0010916420688705756, "loss": 1.9091, "step": 20487 }, { "epoch": 0.5498067840274796, "grad_norm": 0.30078125, "learning_rate": 0.0010916259596616637, "loss": 1.8863, "step": 20488 }, { "epoch": 0.5498336195792185, "grad_norm": 0.29296875, "learning_rate": 0.0010916098493742692, "loss": 1.8965, "step": 20489 }, { "epoch": 0.5498604551309575, "grad_norm": 0.287109375, "learning_rate": 0.001091593738008427, "loss": 1.8237, "step": 20490 }, { "epoch": 0.5498872906826965, "grad_norm": 0.30078125, "learning_rate": 0.0010915776255641732, "loss": 1.9122, "step": 20491 }, { "epoch": 0.5499141262344354, "grad_norm": 0.294921875, "learning_rate": 0.0010915615120415424, "loss": 1.901, "step": 20492 }, { "epoch": 0.5499409617861744, "grad_norm": 0.30078125, "learning_rate": 0.0010915453974405704, "loss": 1.8807, "step": 20493 }, { "epoch": 0.5499677973379132, "grad_norm": 0.28515625, "learning_rate": 0.0010915292817612924, "loss": 1.8736, "step": 20494 }, { "epoch": 0.5499946328896522, "grad_norm": 0.29296875, "learning_rate": 0.0010915131650037436, "loss": 1.8557, "step": 20495 }, { "epoch": 0.5500214684413911, "grad_norm": 0.294921875, "learning_rate": 0.0010914970471679597, "loss": 1.897, "step": 20496 }, { "epoch": 0.5500483039931301, "grad_norm": 0.3046875, "learning_rate": 0.001091480928253976, "loss": 1.7954, "step": 20497 }, { "epoch": 0.5500751395448691, "grad_norm": 0.29296875, "learning_rate": 0.0010914648082618276, "loss": 1.7874, "step": 20498 }, { "epoch": 0.550101975096608, "grad_norm": 0.29296875, "learning_rate": 0.0010914486871915498, "loss": 1.8145, "step": 20499 }, { "epoch": 0.550128810648347, "grad_norm": 0.302734375, "learning_rate": 0.0010914325650431785, "loss": 1.9159, "step": 20500 }, { "epoch": 0.5501556462000858, "grad_norm": 0.298828125, "learning_rate": 0.0010914164418167486, "loss": 1.8225, "step": 20501 }, { "epoch": 0.5501824817518248, "grad_norm": 0.30078125, "learning_rate": 0.0010914003175122955, "loss": 1.8417, "step": 20502 }, { "epoch": 0.5502093173035638, "grad_norm": 0.30859375, "learning_rate": 0.0010913841921298547, "loss": 1.9016, "step": 20503 }, { "epoch": 0.5502361528553027, "grad_norm": 0.306640625, "learning_rate": 0.0010913680656694617, "loss": 1.9024, "step": 20504 }, { "epoch": 0.5502629884070417, "grad_norm": 0.294921875, "learning_rate": 0.0010913519381311517, "loss": 1.8279, "step": 20505 }, { "epoch": 0.5502898239587806, "grad_norm": 0.333984375, "learning_rate": 0.00109133580951496, "loss": 1.8852, "step": 20506 }, { "epoch": 0.5503166595105196, "grad_norm": 0.34375, "learning_rate": 0.001091319679820922, "loss": 2.0468, "step": 20507 }, { "epoch": 0.5503434950622584, "grad_norm": 0.333984375, "learning_rate": 0.0010913035490490734, "loss": 2.0789, "step": 20508 }, { "epoch": 0.5503703306139974, "grad_norm": 0.3203125, "learning_rate": 0.0010912874171994493, "loss": 2.1032, "step": 20509 }, { "epoch": 0.5503971661657364, "grad_norm": 0.31640625, "learning_rate": 0.001091271284272085, "loss": 2.0763, "step": 20510 }, { "epoch": 0.5504240017174753, "grad_norm": 0.3203125, "learning_rate": 0.0010912551502670162, "loss": 2.132, "step": 20511 }, { "epoch": 0.5504508372692143, "grad_norm": 0.30078125, "learning_rate": 0.001091239015184278, "loss": 1.9027, "step": 20512 }, { "epoch": 0.5504776728209532, "grad_norm": 0.3125, "learning_rate": 0.001091222879023906, "loss": 2.0745, "step": 20513 }, { "epoch": 0.5505045083726922, "grad_norm": 0.31640625, "learning_rate": 0.0010912067417859356, "loss": 2.0936, "step": 20514 }, { "epoch": 0.550531343924431, "grad_norm": 0.302734375, "learning_rate": 0.001091190603470402, "loss": 2.0169, "step": 20515 }, { "epoch": 0.55055817947617, "grad_norm": 0.306640625, "learning_rate": 0.0010911744640773409, "loss": 2.1247, "step": 20516 }, { "epoch": 0.550585015027909, "grad_norm": 0.302734375, "learning_rate": 0.0010911583236067873, "loss": 1.9682, "step": 20517 }, { "epoch": 0.5506118505796479, "grad_norm": 0.306640625, "learning_rate": 0.001091142182058777, "loss": 2.0024, "step": 20518 }, { "epoch": 0.5506386861313869, "grad_norm": 0.29296875, "learning_rate": 0.001091126039433345, "loss": 2.0453, "step": 20519 }, { "epoch": 0.5506655216831258, "grad_norm": 0.296875, "learning_rate": 0.0010911098957305273, "loss": 2.0719, "step": 20520 }, { "epoch": 0.5506923572348648, "grad_norm": 0.30859375, "learning_rate": 0.0010910937509503588, "loss": 2.0312, "step": 20521 }, { "epoch": 0.5507191927866036, "grad_norm": 0.296875, "learning_rate": 0.001091077605092875, "loss": 2.0181, "step": 20522 }, { "epoch": 0.5507460283383426, "grad_norm": 0.296875, "learning_rate": 0.0010910614581581117, "loss": 2.0437, "step": 20523 }, { "epoch": 0.5507728638900816, "grad_norm": 0.29296875, "learning_rate": 0.0010910453101461038, "loss": 2.008, "step": 20524 }, { "epoch": 0.5507996994418205, "grad_norm": 0.296875, "learning_rate": 0.001091029161056887, "loss": 2.0639, "step": 20525 }, { "epoch": 0.5508265349935595, "grad_norm": 0.30859375, "learning_rate": 0.0010910130108904967, "loss": 2.092, "step": 20526 }, { "epoch": 0.5508533705452984, "grad_norm": 0.296875, "learning_rate": 0.0010909968596469683, "loss": 1.9424, "step": 20527 }, { "epoch": 0.5508802060970374, "grad_norm": 0.291015625, "learning_rate": 0.0010909807073263373, "loss": 2.0515, "step": 20528 }, { "epoch": 0.5509070416487764, "grad_norm": 0.298828125, "learning_rate": 0.0010909645539286389, "loss": 2.0216, "step": 20529 }, { "epoch": 0.5509338772005152, "grad_norm": 0.29296875, "learning_rate": 0.001090948399453909, "loss": 2.0111, "step": 20530 }, { "epoch": 0.5509607127522542, "grad_norm": 0.29296875, "learning_rate": 0.0010909322439021825, "loss": 2.0265, "step": 20531 }, { "epoch": 0.5509875483039931, "grad_norm": 0.287109375, "learning_rate": 0.0010909160872734952, "loss": 1.9648, "step": 20532 }, { "epoch": 0.5510143838557321, "grad_norm": 0.283203125, "learning_rate": 0.0010908999295678823, "loss": 2.0122, "step": 20533 }, { "epoch": 0.551041219407471, "grad_norm": 0.294921875, "learning_rate": 0.0010908837707853797, "loss": 1.9894, "step": 20534 }, { "epoch": 0.55106805495921, "grad_norm": 0.28125, "learning_rate": 0.0010908676109260224, "loss": 1.8752, "step": 20535 }, { "epoch": 0.551094890510949, "grad_norm": 0.28515625, "learning_rate": 0.0010908514499898458, "loss": 1.952, "step": 20536 }, { "epoch": 0.5511217260626878, "grad_norm": 0.296875, "learning_rate": 0.0010908352879768857, "loss": 2.0619, "step": 20537 }, { "epoch": 0.5511485616144268, "grad_norm": 0.28515625, "learning_rate": 0.0010908191248871772, "loss": 1.9127, "step": 20538 }, { "epoch": 0.5511753971661657, "grad_norm": 0.287109375, "learning_rate": 0.0010908029607207559, "loss": 1.8941, "step": 20539 }, { "epoch": 0.5512022327179047, "grad_norm": 0.28515625, "learning_rate": 0.0010907867954776578, "loss": 1.8591, "step": 20540 }, { "epoch": 0.5512290682696436, "grad_norm": 0.2890625, "learning_rate": 0.0010907706291579174, "loss": 1.9189, "step": 20541 }, { "epoch": 0.5512559038213826, "grad_norm": 0.30078125, "learning_rate": 0.0010907544617615708, "loss": 2.0534, "step": 20542 }, { "epoch": 0.5512827393731216, "grad_norm": 0.294921875, "learning_rate": 0.001090738293288653, "loss": 2.0044, "step": 20543 }, { "epoch": 0.5513095749248604, "grad_norm": 0.291015625, "learning_rate": 0.0010907221237392002, "loss": 1.9036, "step": 20544 }, { "epoch": 0.5513364104765994, "grad_norm": 0.298828125, "learning_rate": 0.0010907059531132472, "loss": 1.9273, "step": 20545 }, { "epoch": 0.5513632460283383, "grad_norm": 0.28125, "learning_rate": 0.0010906897814108299, "loss": 1.9235, "step": 20546 }, { "epoch": 0.5513900815800773, "grad_norm": 0.291015625, "learning_rate": 0.0010906736086319833, "loss": 1.8613, "step": 20547 }, { "epoch": 0.5514169171318162, "grad_norm": 0.291015625, "learning_rate": 0.0010906574347767433, "loss": 2.0036, "step": 20548 }, { "epoch": 0.5514437526835552, "grad_norm": 0.279296875, "learning_rate": 0.0010906412598451452, "loss": 1.8753, "step": 20549 }, { "epoch": 0.5514705882352942, "grad_norm": 0.30078125, "learning_rate": 0.0010906250838372245, "loss": 2.0464, "step": 20550 }, { "epoch": 0.551497423787033, "grad_norm": 0.294921875, "learning_rate": 0.001090608906753017, "loss": 1.9254, "step": 20551 }, { "epoch": 0.551524259338772, "grad_norm": 0.291015625, "learning_rate": 0.0010905927285925574, "loss": 1.9137, "step": 20552 }, { "epoch": 0.5515510948905109, "grad_norm": 0.294921875, "learning_rate": 0.001090576549355882, "loss": 1.935, "step": 20553 }, { "epoch": 0.5515779304422499, "grad_norm": 0.28515625, "learning_rate": 0.001090560369043026, "loss": 1.9267, "step": 20554 }, { "epoch": 0.5516047659939889, "grad_norm": 0.2890625, "learning_rate": 0.0010905441876540246, "loss": 1.9589, "step": 20555 }, { "epoch": 0.5516316015457278, "grad_norm": 0.287109375, "learning_rate": 0.001090528005188914, "loss": 1.8852, "step": 20556 }, { "epoch": 0.5516584370974668, "grad_norm": 0.294921875, "learning_rate": 0.001090511821647729, "loss": 2.0479, "step": 20557 }, { "epoch": 0.5516852726492056, "grad_norm": 0.287109375, "learning_rate": 0.0010904956370305053, "loss": 1.8969, "step": 20558 }, { "epoch": 0.5517121082009446, "grad_norm": 0.291015625, "learning_rate": 0.0010904794513372786, "loss": 1.928, "step": 20559 }, { "epoch": 0.5517389437526835, "grad_norm": 0.291015625, "learning_rate": 0.0010904632645680845, "loss": 1.9737, "step": 20560 }, { "epoch": 0.5517657793044225, "grad_norm": 0.30078125, "learning_rate": 0.001090447076722958, "loss": 1.9166, "step": 20561 }, { "epoch": 0.5517926148561615, "grad_norm": 0.3046875, "learning_rate": 0.0010904308878019351, "loss": 2.0474, "step": 20562 }, { "epoch": 0.5518194504079004, "grad_norm": 0.30078125, "learning_rate": 0.001090414697805051, "loss": 1.8965, "step": 20563 }, { "epoch": 0.5518462859596394, "grad_norm": 0.287109375, "learning_rate": 0.0010903985067323412, "loss": 1.88, "step": 20564 }, { "epoch": 0.5518731215113782, "grad_norm": 0.296875, "learning_rate": 0.0010903823145838416, "loss": 1.9548, "step": 20565 }, { "epoch": 0.5518999570631172, "grad_norm": 0.294921875, "learning_rate": 0.0010903661213595873, "loss": 1.8569, "step": 20566 }, { "epoch": 0.5519267926148561, "grad_norm": 0.298828125, "learning_rate": 0.001090349927059614, "loss": 1.9358, "step": 20567 }, { "epoch": 0.5519536281665951, "grad_norm": 0.294921875, "learning_rate": 0.0010903337316839573, "loss": 1.9224, "step": 20568 }, { "epoch": 0.5519804637183341, "grad_norm": 0.287109375, "learning_rate": 0.0010903175352326526, "loss": 1.8158, "step": 20569 }, { "epoch": 0.552007299270073, "grad_norm": 0.302734375, "learning_rate": 0.0010903013377057357, "loss": 1.9652, "step": 20570 }, { "epoch": 0.552034134821812, "grad_norm": 0.298828125, "learning_rate": 0.0010902851391032416, "loss": 1.9336, "step": 20571 }, { "epoch": 0.5520609703735508, "grad_norm": 0.30078125, "learning_rate": 0.0010902689394252064, "loss": 1.8888, "step": 20572 }, { "epoch": 0.5520878059252898, "grad_norm": 0.294921875, "learning_rate": 0.0010902527386716652, "loss": 1.911, "step": 20573 }, { "epoch": 0.5521146414770288, "grad_norm": 0.287109375, "learning_rate": 0.0010902365368426537, "loss": 1.9059, "step": 20574 }, { "epoch": 0.5521414770287677, "grad_norm": 0.283203125, "learning_rate": 0.0010902203339382076, "loss": 1.8745, "step": 20575 }, { "epoch": 0.5521683125805067, "grad_norm": 0.302734375, "learning_rate": 0.0010902041299583624, "loss": 1.8326, "step": 20576 }, { "epoch": 0.5521951481322456, "grad_norm": 0.30859375, "learning_rate": 0.0010901879249031533, "loss": 1.885, "step": 20577 }, { "epoch": 0.5522219836839846, "grad_norm": 0.298828125, "learning_rate": 0.0010901717187726163, "loss": 1.8435, "step": 20578 }, { "epoch": 0.5522488192357234, "grad_norm": 0.298828125, "learning_rate": 0.0010901555115667867, "loss": 1.917, "step": 20579 }, { "epoch": 0.5522756547874624, "grad_norm": 0.294921875, "learning_rate": 0.0010901393032857, "loss": 1.8951, "step": 20580 }, { "epoch": 0.5523024903392014, "grad_norm": 0.3125, "learning_rate": 0.0010901230939293922, "loss": 1.884, "step": 20581 }, { "epoch": 0.5523293258909403, "grad_norm": 0.302734375, "learning_rate": 0.0010901068834978983, "loss": 1.9231, "step": 20582 }, { "epoch": 0.5523561614426793, "grad_norm": 0.29296875, "learning_rate": 0.001090090671991254, "loss": 1.8716, "step": 20583 }, { "epoch": 0.5523829969944182, "grad_norm": 0.294921875, "learning_rate": 0.0010900744594094952, "loss": 1.8226, "step": 20584 }, { "epoch": 0.5524098325461572, "grad_norm": 0.30078125, "learning_rate": 0.001090058245752657, "loss": 1.9667, "step": 20585 }, { "epoch": 0.552436668097896, "grad_norm": 0.30859375, "learning_rate": 0.0010900420310207754, "loss": 1.9481, "step": 20586 }, { "epoch": 0.552463503649635, "grad_norm": 0.298828125, "learning_rate": 0.0010900258152138854, "loss": 1.9537, "step": 20587 }, { "epoch": 0.552490339201374, "grad_norm": 0.294921875, "learning_rate": 0.0010900095983320233, "loss": 1.9056, "step": 20588 }, { "epoch": 0.5525171747531129, "grad_norm": 0.294921875, "learning_rate": 0.0010899933803752242, "loss": 1.8549, "step": 20589 }, { "epoch": 0.5525440103048519, "grad_norm": 0.2890625, "learning_rate": 0.0010899771613435236, "loss": 1.8517, "step": 20590 }, { "epoch": 0.5525708458565908, "grad_norm": 0.298828125, "learning_rate": 0.0010899609412369576, "loss": 1.8383, "step": 20591 }, { "epoch": 0.5525976814083298, "grad_norm": 0.2890625, "learning_rate": 0.001089944720055561, "loss": 1.8894, "step": 20592 }, { "epoch": 0.5526245169600686, "grad_norm": 0.3046875, "learning_rate": 0.00108992849779937, "loss": 1.8995, "step": 20593 }, { "epoch": 0.5526513525118076, "grad_norm": 0.3046875, "learning_rate": 0.0010899122744684202, "loss": 1.8893, "step": 20594 }, { "epoch": 0.5526781880635466, "grad_norm": 0.294921875, "learning_rate": 0.0010898960500627468, "loss": 1.8297, "step": 20595 }, { "epoch": 0.5527050236152855, "grad_norm": 0.302734375, "learning_rate": 0.0010898798245823856, "loss": 1.8293, "step": 20596 }, { "epoch": 0.5527318591670245, "grad_norm": 0.30078125, "learning_rate": 0.0010898635980273726, "loss": 1.7894, "step": 20597 }, { "epoch": 0.5527586947187634, "grad_norm": 0.291015625, "learning_rate": 0.0010898473703977426, "loss": 1.8112, "step": 20598 }, { "epoch": 0.5527855302705024, "grad_norm": 0.302734375, "learning_rate": 0.0010898311416935316, "loss": 1.8462, "step": 20599 }, { "epoch": 0.5528123658222414, "grad_norm": 0.302734375, "learning_rate": 0.0010898149119147754, "loss": 1.8734, "step": 20600 }, { "epoch": 0.5528392013739802, "grad_norm": 0.294921875, "learning_rate": 0.0010897986810615092, "loss": 1.8376, "step": 20601 }, { "epoch": 0.5528660369257192, "grad_norm": 0.3046875, "learning_rate": 0.0010897824491337687, "loss": 1.8624, "step": 20602 }, { "epoch": 0.5528928724774581, "grad_norm": 0.302734375, "learning_rate": 0.0010897662161315897, "loss": 1.9081, "step": 20603 }, { "epoch": 0.5529197080291971, "grad_norm": 0.29296875, "learning_rate": 0.0010897499820550077, "loss": 1.8294, "step": 20604 }, { "epoch": 0.552946543580936, "grad_norm": 0.29296875, "learning_rate": 0.0010897337469040584, "loss": 1.8203, "step": 20605 }, { "epoch": 0.552973379132675, "grad_norm": 0.296875, "learning_rate": 0.0010897175106787773, "loss": 1.8218, "step": 20606 }, { "epoch": 0.553000214684414, "grad_norm": 0.302734375, "learning_rate": 0.0010897012733792, "loss": 1.8159, "step": 20607 }, { "epoch": 0.5530270502361528, "grad_norm": 0.302734375, "learning_rate": 0.0010896850350053623, "loss": 1.9011, "step": 20608 }, { "epoch": 0.5530538857878918, "grad_norm": 0.302734375, "learning_rate": 0.0010896687955572995, "loss": 1.8159, "step": 20609 }, { "epoch": 0.5530807213396307, "grad_norm": 0.302734375, "learning_rate": 0.0010896525550350476, "loss": 1.8291, "step": 20610 }, { "epoch": 0.5531075568913697, "grad_norm": 0.35546875, "learning_rate": 0.001089636313438642, "loss": 1.9936, "step": 20611 }, { "epoch": 0.5531343924431086, "grad_norm": 0.35546875, "learning_rate": 0.0010896200707681183, "loss": 2.1566, "step": 20612 }, { "epoch": 0.5531612279948476, "grad_norm": 0.341796875, "learning_rate": 0.0010896038270235124, "loss": 2.0382, "step": 20613 }, { "epoch": 0.5531880635465866, "grad_norm": 0.32421875, "learning_rate": 0.0010895875822048596, "loss": 2.116, "step": 20614 }, { "epoch": 0.5532148990983254, "grad_norm": 0.31640625, "learning_rate": 0.0010895713363121957, "loss": 2.0296, "step": 20615 }, { "epoch": 0.5532417346500644, "grad_norm": 0.314453125, "learning_rate": 0.0010895550893455564, "loss": 2.0167, "step": 20616 }, { "epoch": 0.5532685702018033, "grad_norm": 0.30859375, "learning_rate": 0.0010895388413049774, "loss": 2.0239, "step": 20617 }, { "epoch": 0.5532954057535423, "grad_norm": 0.29296875, "learning_rate": 0.001089522592190494, "loss": 1.969, "step": 20618 }, { "epoch": 0.5533222413052812, "grad_norm": 0.3046875, "learning_rate": 0.0010895063420021422, "loss": 1.9826, "step": 20619 }, { "epoch": 0.5533490768570202, "grad_norm": 0.29296875, "learning_rate": 0.0010894900907399572, "loss": 2.0423, "step": 20620 }, { "epoch": 0.5533759124087592, "grad_norm": 0.294921875, "learning_rate": 0.001089473838403975, "loss": 1.9946, "step": 20621 }, { "epoch": 0.553402747960498, "grad_norm": 0.30078125, "learning_rate": 0.0010894575849942315, "loss": 2.0423, "step": 20622 }, { "epoch": 0.553429583512237, "grad_norm": 0.29296875, "learning_rate": 0.0010894413305107618, "loss": 1.978, "step": 20623 }, { "epoch": 0.5534564190639759, "grad_norm": 0.30078125, "learning_rate": 0.001089425074953602, "loss": 2.0472, "step": 20624 }, { "epoch": 0.5534832546157149, "grad_norm": 0.287109375, "learning_rate": 0.0010894088183227874, "loss": 1.988, "step": 20625 }, { "epoch": 0.5535100901674539, "grad_norm": 0.2890625, "learning_rate": 0.0010893925606183538, "loss": 2.0858, "step": 20626 }, { "epoch": 0.5535369257191928, "grad_norm": 0.294921875, "learning_rate": 0.0010893763018403372, "loss": 1.9858, "step": 20627 }, { "epoch": 0.5535637612709318, "grad_norm": 0.294921875, "learning_rate": 0.0010893600419887729, "loss": 1.9071, "step": 20628 }, { "epoch": 0.5535905968226706, "grad_norm": 0.2890625, "learning_rate": 0.0010893437810636966, "loss": 1.9592, "step": 20629 }, { "epoch": 0.5536174323744096, "grad_norm": 0.326171875, "learning_rate": 0.0010893275190651437, "loss": 2.0135, "step": 20630 }, { "epoch": 0.5536442679261485, "grad_norm": 0.302734375, "learning_rate": 0.0010893112559931506, "loss": 2.0696, "step": 20631 }, { "epoch": 0.5536711034778875, "grad_norm": 0.296875, "learning_rate": 0.0010892949918477523, "loss": 2.1053, "step": 20632 }, { "epoch": 0.5536979390296265, "grad_norm": 0.28515625, "learning_rate": 0.0010892787266289848, "loss": 2.0471, "step": 20633 }, { "epoch": 0.5537247745813654, "grad_norm": 0.2890625, "learning_rate": 0.0010892624603368838, "loss": 1.9312, "step": 20634 }, { "epoch": 0.5537516101331044, "grad_norm": 0.28515625, "learning_rate": 0.0010892461929714848, "loss": 2.0003, "step": 20635 }, { "epoch": 0.5537784456848432, "grad_norm": 0.291015625, "learning_rate": 0.0010892299245328237, "loss": 1.9982, "step": 20636 }, { "epoch": 0.5538052812365822, "grad_norm": 0.306640625, "learning_rate": 0.001089213655020936, "loss": 2.0811, "step": 20637 }, { "epoch": 0.5538321167883211, "grad_norm": 0.2890625, "learning_rate": 0.0010891973844358576, "loss": 1.9597, "step": 20638 }, { "epoch": 0.5538589523400601, "grad_norm": 0.2890625, "learning_rate": 0.0010891811127776239, "loss": 2.0218, "step": 20639 }, { "epoch": 0.5538857878917991, "grad_norm": 0.28515625, "learning_rate": 0.001089164840046271, "loss": 1.9587, "step": 20640 }, { "epoch": 0.553912623443538, "grad_norm": 0.2890625, "learning_rate": 0.001089148566241834, "loss": 1.9294, "step": 20641 }, { "epoch": 0.553939458995277, "grad_norm": 0.27734375, "learning_rate": 0.0010891322913643491, "loss": 1.903, "step": 20642 }, { "epoch": 0.5539662945470158, "grad_norm": 0.287109375, "learning_rate": 0.0010891160154138518, "loss": 1.9052, "step": 20643 }, { "epoch": 0.5539931300987548, "grad_norm": 0.296875, "learning_rate": 0.0010890997383903782, "loss": 2.0062, "step": 20644 }, { "epoch": 0.5540199656504938, "grad_norm": 0.29296875, "learning_rate": 0.0010890834602939633, "loss": 1.9503, "step": 20645 }, { "epoch": 0.5540468012022327, "grad_norm": 0.287109375, "learning_rate": 0.0010890671811246435, "loss": 1.9719, "step": 20646 }, { "epoch": 0.5540736367539717, "grad_norm": 0.28515625, "learning_rate": 0.001089050900882454, "loss": 1.9328, "step": 20647 }, { "epoch": 0.5541004723057106, "grad_norm": 0.283203125, "learning_rate": 0.0010890346195674306, "loss": 1.9335, "step": 20648 }, { "epoch": 0.5541273078574496, "grad_norm": 0.287109375, "learning_rate": 0.0010890183371796093, "loss": 1.9495, "step": 20649 }, { "epoch": 0.5541541434091884, "grad_norm": 0.279296875, "learning_rate": 0.0010890020537190255, "loss": 1.8826, "step": 20650 }, { "epoch": 0.5541809789609274, "grad_norm": 0.30078125, "learning_rate": 0.0010889857691857151, "loss": 1.9742, "step": 20651 }, { "epoch": 0.5542078145126664, "grad_norm": 0.287109375, "learning_rate": 0.0010889694835797138, "loss": 1.934, "step": 20652 }, { "epoch": 0.5542346500644053, "grad_norm": 0.291015625, "learning_rate": 0.0010889531969010572, "loss": 1.9629, "step": 20653 }, { "epoch": 0.5542614856161443, "grad_norm": 0.296875, "learning_rate": 0.0010889369091497813, "loss": 1.9905, "step": 20654 }, { "epoch": 0.5542883211678832, "grad_norm": 0.279296875, "learning_rate": 0.0010889206203259217, "loss": 1.8832, "step": 20655 }, { "epoch": 0.5543151567196222, "grad_norm": 0.30859375, "learning_rate": 0.001088904330429514, "loss": 2.0729, "step": 20656 }, { "epoch": 0.554341992271361, "grad_norm": 0.296875, "learning_rate": 0.001088888039460594, "loss": 1.9631, "step": 20657 }, { "epoch": 0.5543688278231, "grad_norm": 0.287109375, "learning_rate": 0.0010888717474191976, "loss": 1.9144, "step": 20658 }, { "epoch": 0.554395663374839, "grad_norm": 0.296875, "learning_rate": 0.0010888554543053604, "loss": 1.9468, "step": 20659 }, { "epoch": 0.5544224989265779, "grad_norm": 0.291015625, "learning_rate": 0.001088839160119118, "loss": 1.941, "step": 20660 }, { "epoch": 0.5544493344783169, "grad_norm": 0.291015625, "learning_rate": 0.0010888228648605065, "loss": 1.9141, "step": 20661 }, { "epoch": 0.5544761700300558, "grad_norm": 0.29296875, "learning_rate": 0.0010888065685295612, "loss": 1.9517, "step": 20662 }, { "epoch": 0.5545030055817948, "grad_norm": 0.28515625, "learning_rate": 0.0010887902711263183, "loss": 1.8594, "step": 20663 }, { "epoch": 0.5545298411335337, "grad_norm": 0.30078125, "learning_rate": 0.0010887739726508133, "loss": 1.963, "step": 20664 }, { "epoch": 0.5545566766852726, "grad_norm": 0.3046875, "learning_rate": 0.001088757673103082, "loss": 2.0307, "step": 20665 }, { "epoch": 0.5545835122370116, "grad_norm": 0.296875, "learning_rate": 0.0010887413724831603, "loss": 1.9787, "step": 20666 }, { "epoch": 0.5546103477887505, "grad_norm": 0.3046875, "learning_rate": 0.0010887250707910837, "loss": 1.8972, "step": 20667 }, { "epoch": 0.5546371833404895, "grad_norm": 0.2890625, "learning_rate": 0.001088708768026888, "loss": 1.9999, "step": 20668 }, { "epoch": 0.5546640188922284, "grad_norm": 0.283203125, "learning_rate": 0.0010886924641906092, "loss": 1.8789, "step": 20669 }, { "epoch": 0.5546908544439674, "grad_norm": 0.29296875, "learning_rate": 0.0010886761592822827, "loss": 1.966, "step": 20670 }, { "epoch": 0.5547176899957064, "grad_norm": 0.29296875, "learning_rate": 0.0010886598533019446, "loss": 1.904, "step": 20671 }, { "epoch": 0.5547445255474452, "grad_norm": 0.28515625, "learning_rate": 0.0010886435462496306, "loss": 1.9309, "step": 20672 }, { "epoch": 0.5547713610991842, "grad_norm": 0.2890625, "learning_rate": 0.0010886272381253764, "loss": 1.904, "step": 20673 }, { "epoch": 0.5547981966509231, "grad_norm": 0.294921875, "learning_rate": 0.0010886109289292177, "loss": 1.9401, "step": 20674 }, { "epoch": 0.5548250322026621, "grad_norm": 0.3046875, "learning_rate": 0.0010885946186611904, "loss": 1.9903, "step": 20675 }, { "epoch": 0.554851867754401, "grad_norm": 0.302734375, "learning_rate": 0.0010885783073213303, "loss": 1.9701, "step": 20676 }, { "epoch": 0.55487870330614, "grad_norm": 0.28515625, "learning_rate": 0.0010885619949096731, "loss": 1.7339, "step": 20677 }, { "epoch": 0.554905538857879, "grad_norm": 0.296875, "learning_rate": 0.0010885456814262546, "loss": 1.9868, "step": 20678 }, { "epoch": 0.5549323744096178, "grad_norm": 0.302734375, "learning_rate": 0.0010885293668711107, "loss": 1.9687, "step": 20679 }, { "epoch": 0.5549592099613568, "grad_norm": 0.29296875, "learning_rate": 0.0010885130512442772, "loss": 1.8325, "step": 20680 }, { "epoch": 0.5549860455130957, "grad_norm": 0.291015625, "learning_rate": 0.0010884967345457896, "loss": 1.9176, "step": 20681 }, { "epoch": 0.5550128810648347, "grad_norm": 0.287109375, "learning_rate": 0.0010884804167756838, "loss": 1.8823, "step": 20682 }, { "epoch": 0.5550397166165736, "grad_norm": 0.2890625, "learning_rate": 0.0010884640979339957, "loss": 1.892, "step": 20683 }, { "epoch": 0.5550665521683126, "grad_norm": 0.294921875, "learning_rate": 0.0010884477780207613, "loss": 1.898, "step": 20684 }, { "epoch": 0.5550933877200516, "grad_norm": 0.298828125, "learning_rate": 0.001088431457036016, "loss": 1.9803, "step": 20685 }, { "epoch": 0.5551202232717904, "grad_norm": 0.296875, "learning_rate": 0.0010884151349797957, "loss": 1.875, "step": 20686 }, { "epoch": 0.5551470588235294, "grad_norm": 0.291015625, "learning_rate": 0.0010883988118521367, "loss": 1.8574, "step": 20687 }, { "epoch": 0.5551738943752683, "grad_norm": 0.302734375, "learning_rate": 0.001088382487653074, "loss": 2.0332, "step": 20688 }, { "epoch": 0.5552007299270073, "grad_norm": 0.29296875, "learning_rate": 0.001088366162382644, "loss": 1.9021, "step": 20689 }, { "epoch": 0.5552275654787462, "grad_norm": 0.3046875, "learning_rate": 0.001088349836040882, "loss": 1.9546, "step": 20690 }, { "epoch": 0.5552544010304852, "grad_norm": 0.294921875, "learning_rate": 0.0010883335086278245, "loss": 1.9186, "step": 20691 }, { "epoch": 0.5552812365822242, "grad_norm": 0.28125, "learning_rate": 0.0010883171801435068, "loss": 1.8248, "step": 20692 }, { "epoch": 0.555308072133963, "grad_norm": 0.298828125, "learning_rate": 0.001088300850587965, "loss": 1.9536, "step": 20693 }, { "epoch": 0.555334907685702, "grad_norm": 0.294921875, "learning_rate": 0.0010882845199612346, "loss": 1.8845, "step": 20694 }, { "epoch": 0.5553617432374409, "grad_norm": 0.2890625, "learning_rate": 0.0010882681882633516, "loss": 1.8498, "step": 20695 }, { "epoch": 0.5553885787891799, "grad_norm": 0.302734375, "learning_rate": 0.0010882518554943522, "loss": 1.9116, "step": 20696 }, { "epoch": 0.5554154143409189, "grad_norm": 0.30078125, "learning_rate": 0.0010882355216542715, "loss": 1.8932, "step": 20697 }, { "epoch": 0.5554422498926578, "grad_norm": 0.294921875, "learning_rate": 0.001088219186743146, "loss": 1.8857, "step": 20698 }, { "epoch": 0.5554690854443968, "grad_norm": 0.298828125, "learning_rate": 0.001088202850761011, "loss": 1.8617, "step": 20699 }, { "epoch": 0.5554959209961357, "grad_norm": 0.302734375, "learning_rate": 0.0010881865137079027, "loss": 1.9424, "step": 20700 }, { "epoch": 0.5555227565478746, "grad_norm": 0.3046875, "learning_rate": 0.0010881701755838568, "loss": 1.9173, "step": 20701 }, { "epoch": 0.5555495920996135, "grad_norm": 0.291015625, "learning_rate": 0.0010881538363889092, "loss": 1.896, "step": 20702 }, { "epoch": 0.5555764276513525, "grad_norm": 0.298828125, "learning_rate": 0.0010881374961230954, "loss": 1.9684, "step": 20703 }, { "epoch": 0.5556032632030915, "grad_norm": 0.306640625, "learning_rate": 0.0010881211547864517, "loss": 1.954, "step": 20704 }, { "epoch": 0.5556300987548304, "grad_norm": 0.291015625, "learning_rate": 0.001088104812379014, "loss": 1.8174, "step": 20705 }, { "epoch": 0.5556569343065694, "grad_norm": 0.296875, "learning_rate": 0.0010880884689008178, "loss": 1.8892, "step": 20706 }, { "epoch": 0.5556837698583083, "grad_norm": 0.306640625, "learning_rate": 0.001088072124351899, "loss": 1.9079, "step": 20707 }, { "epoch": 0.5557106054100472, "grad_norm": 0.302734375, "learning_rate": 0.0010880557787322938, "loss": 1.9063, "step": 20708 }, { "epoch": 0.5557374409617861, "grad_norm": 0.31640625, "learning_rate": 0.0010880394320420376, "loss": 1.847, "step": 20709 }, { "epoch": 0.5557642765135251, "grad_norm": 0.294921875, "learning_rate": 0.0010880230842811665, "loss": 1.9065, "step": 20710 }, { "epoch": 0.5557911120652641, "grad_norm": 0.30078125, "learning_rate": 0.0010880067354497164, "loss": 1.8041, "step": 20711 }, { "epoch": 0.555817947617003, "grad_norm": 0.30078125, "learning_rate": 0.0010879903855477231, "loss": 1.9063, "step": 20712 }, { "epoch": 0.555844783168742, "grad_norm": 0.37109375, "learning_rate": 0.0010879740345752225, "loss": 2.1903, "step": 20713 }, { "epoch": 0.5558716187204809, "grad_norm": 0.357421875, "learning_rate": 0.0010879576825322502, "loss": 2.0474, "step": 20714 }, { "epoch": 0.5558984542722198, "grad_norm": 0.3203125, "learning_rate": 0.0010879413294188425, "loss": 2.0127, "step": 20715 }, { "epoch": 0.5559252898239588, "grad_norm": 0.326171875, "learning_rate": 0.0010879249752350352, "loss": 2.0695, "step": 20716 }, { "epoch": 0.5559521253756977, "grad_norm": 0.310546875, "learning_rate": 0.0010879086199808638, "loss": 2.0567, "step": 20717 }, { "epoch": 0.5559789609274367, "grad_norm": 0.33203125, "learning_rate": 0.0010878922636563646, "loss": 2.1075, "step": 20718 }, { "epoch": 0.5560057964791756, "grad_norm": 0.30859375, "learning_rate": 0.0010878759062615732, "loss": 2.1064, "step": 20719 }, { "epoch": 0.5560326320309146, "grad_norm": 0.310546875, "learning_rate": 0.0010878595477965256, "loss": 2.1158, "step": 20720 }, { "epoch": 0.5560594675826535, "grad_norm": 0.30078125, "learning_rate": 0.0010878431882612577, "loss": 2.0733, "step": 20721 }, { "epoch": 0.5560863031343924, "grad_norm": 0.291015625, "learning_rate": 0.0010878268276558055, "loss": 2.0204, "step": 20722 }, { "epoch": 0.5561131386861314, "grad_norm": 0.3046875, "learning_rate": 0.0010878104659802045, "loss": 2.0957, "step": 20723 }, { "epoch": 0.5561399742378703, "grad_norm": 0.296875, "learning_rate": 0.001087794103234491, "loss": 2.0603, "step": 20724 }, { "epoch": 0.5561668097896093, "grad_norm": 0.298828125, "learning_rate": 0.001087777739418701, "loss": 2.0316, "step": 20725 }, { "epoch": 0.5561936453413482, "grad_norm": 0.294921875, "learning_rate": 0.0010877613745328698, "loss": 2.0719, "step": 20726 }, { "epoch": 0.5562204808930872, "grad_norm": 0.294921875, "learning_rate": 0.0010877450085770338, "loss": 2.036, "step": 20727 }, { "epoch": 0.5562473164448261, "grad_norm": 0.2890625, "learning_rate": 0.0010877286415512286, "loss": 1.9176, "step": 20728 }, { "epoch": 0.556274151996565, "grad_norm": 0.294921875, "learning_rate": 0.0010877122734554905, "loss": 2.0067, "step": 20729 }, { "epoch": 0.556300987548304, "grad_norm": 0.3046875, "learning_rate": 0.001087695904289855, "loss": 2.0477, "step": 20730 }, { "epoch": 0.5563278231000429, "grad_norm": 0.291015625, "learning_rate": 0.001087679534054358, "loss": 2.0495, "step": 20731 }, { "epoch": 0.5563546586517819, "grad_norm": 0.30078125, "learning_rate": 0.0010876631627490358, "loss": 2.0313, "step": 20732 }, { "epoch": 0.5563814942035208, "grad_norm": 0.298828125, "learning_rate": 0.0010876467903739241, "loss": 2.0462, "step": 20733 }, { "epoch": 0.5564083297552598, "grad_norm": 0.28515625, "learning_rate": 0.0010876304169290588, "loss": 1.9033, "step": 20734 }, { "epoch": 0.5564351653069987, "grad_norm": 0.2890625, "learning_rate": 0.0010876140424144758, "loss": 1.97, "step": 20735 }, { "epoch": 0.5564620008587376, "grad_norm": 0.298828125, "learning_rate": 0.0010875976668302109, "loss": 1.99, "step": 20736 }, { "epoch": 0.5564888364104766, "grad_norm": 0.287109375, "learning_rate": 0.0010875812901763003, "loss": 1.9473, "step": 20737 }, { "epoch": 0.5565156719622155, "grad_norm": 0.2890625, "learning_rate": 0.0010875649124527798, "loss": 2.0113, "step": 20738 }, { "epoch": 0.5565425075139545, "grad_norm": 0.28515625, "learning_rate": 0.0010875485336596855, "loss": 1.9736, "step": 20739 }, { "epoch": 0.5565693430656934, "grad_norm": 0.29296875, "learning_rate": 0.001087532153797053, "loss": 1.9353, "step": 20740 }, { "epoch": 0.5565961786174324, "grad_norm": 0.294921875, "learning_rate": 0.0010875157728649184, "loss": 2.038, "step": 20741 }, { "epoch": 0.5566230141691714, "grad_norm": 0.291015625, "learning_rate": 0.0010874993908633174, "loss": 2.0215, "step": 20742 }, { "epoch": 0.5566498497209103, "grad_norm": 0.29296875, "learning_rate": 0.0010874830077922865, "loss": 2.0419, "step": 20743 }, { "epoch": 0.5566766852726492, "grad_norm": 0.29296875, "learning_rate": 0.001087466623651861, "loss": 1.9605, "step": 20744 }, { "epoch": 0.5567035208243881, "grad_norm": 0.2890625, "learning_rate": 0.0010874502384420773, "loss": 1.9631, "step": 20745 }, { "epoch": 0.5567303563761271, "grad_norm": 0.291015625, "learning_rate": 0.0010874338521629713, "loss": 1.9296, "step": 20746 }, { "epoch": 0.556757191927866, "grad_norm": 0.298828125, "learning_rate": 0.0010874174648145788, "loss": 1.9953, "step": 20747 }, { "epoch": 0.556784027479605, "grad_norm": 0.291015625, "learning_rate": 0.0010874010763969356, "loss": 2.0174, "step": 20748 }, { "epoch": 0.556810863031344, "grad_norm": 0.2890625, "learning_rate": 0.0010873846869100781, "loss": 1.9086, "step": 20749 }, { "epoch": 0.5568376985830829, "grad_norm": 0.296875, "learning_rate": 0.0010873682963540416, "loss": 2.0027, "step": 20750 }, { "epoch": 0.5568645341348218, "grad_norm": 0.30078125, "learning_rate": 0.0010873519047288629, "loss": 1.9808, "step": 20751 }, { "epoch": 0.5568913696865607, "grad_norm": 0.291015625, "learning_rate": 0.001087335512034577, "loss": 1.9232, "step": 20752 }, { "epoch": 0.5569182052382997, "grad_norm": 0.298828125, "learning_rate": 0.001087319118271221, "loss": 2.056, "step": 20753 }, { "epoch": 0.5569450407900386, "grad_norm": 0.294921875, "learning_rate": 0.0010873027234388297, "loss": 1.9329, "step": 20754 }, { "epoch": 0.5569718763417776, "grad_norm": 0.283203125, "learning_rate": 0.00108728632753744, "loss": 1.849, "step": 20755 }, { "epoch": 0.5569987118935166, "grad_norm": 0.3046875, "learning_rate": 0.001087269930567087, "loss": 2.0365, "step": 20756 }, { "epoch": 0.5570255474452555, "grad_norm": 0.279296875, "learning_rate": 0.0010872535325278073, "loss": 1.856, "step": 20757 }, { "epoch": 0.5570523829969944, "grad_norm": 0.279296875, "learning_rate": 0.001087237133419637, "loss": 1.8937, "step": 20758 }, { "epoch": 0.5570792185487333, "grad_norm": 0.296875, "learning_rate": 0.0010872207332426114, "loss": 1.9302, "step": 20759 }, { "epoch": 0.5571060541004723, "grad_norm": 0.29296875, "learning_rate": 0.001087204331996767, "loss": 1.9711, "step": 20760 }, { "epoch": 0.5571328896522112, "grad_norm": 0.287109375, "learning_rate": 0.0010871879296821395, "loss": 1.9183, "step": 20761 }, { "epoch": 0.5571597252039502, "grad_norm": 0.291015625, "learning_rate": 0.001087171526298765, "loss": 1.9927, "step": 20762 }, { "epoch": 0.5571865607556892, "grad_norm": 0.28515625, "learning_rate": 0.0010871551218466797, "loss": 1.9726, "step": 20763 }, { "epoch": 0.557213396307428, "grad_norm": 0.302734375, "learning_rate": 0.0010871387163259192, "loss": 1.955, "step": 20764 }, { "epoch": 0.557240231859167, "grad_norm": 0.30078125, "learning_rate": 0.00108712230973652, "loss": 1.9746, "step": 20765 }, { "epoch": 0.5572670674109059, "grad_norm": 0.29296875, "learning_rate": 0.0010871059020785175, "loss": 1.9279, "step": 20766 }, { "epoch": 0.5572939029626449, "grad_norm": 0.302734375, "learning_rate": 0.0010870894933519481, "loss": 1.9863, "step": 20767 }, { "epoch": 0.5573207385143839, "grad_norm": 0.296875, "learning_rate": 0.0010870730835568475, "loss": 1.9478, "step": 20768 }, { "epoch": 0.5573475740661228, "grad_norm": 0.298828125, "learning_rate": 0.0010870566726932519, "loss": 1.9027, "step": 20769 }, { "epoch": 0.5573744096178618, "grad_norm": 0.30078125, "learning_rate": 0.0010870402607611972, "loss": 1.9678, "step": 20770 }, { "epoch": 0.5574012451696007, "grad_norm": 0.294921875, "learning_rate": 0.0010870238477607196, "loss": 1.9517, "step": 20771 }, { "epoch": 0.5574280807213396, "grad_norm": 0.28515625, "learning_rate": 0.001087007433691855, "loss": 1.883, "step": 20772 }, { "epoch": 0.5574549162730785, "grad_norm": 0.291015625, "learning_rate": 0.0010869910185546392, "loss": 1.9224, "step": 20773 }, { "epoch": 0.5574817518248175, "grad_norm": 0.306640625, "learning_rate": 0.0010869746023491084, "loss": 1.9979, "step": 20774 }, { "epoch": 0.5575085873765565, "grad_norm": 0.296875, "learning_rate": 0.0010869581850752987, "loss": 1.9752, "step": 20775 }, { "epoch": 0.5575354229282954, "grad_norm": 0.29296875, "learning_rate": 0.0010869417667332458, "loss": 1.9435, "step": 20776 }, { "epoch": 0.5575622584800344, "grad_norm": 0.29296875, "learning_rate": 0.0010869253473229862, "loss": 1.8901, "step": 20777 }, { "epoch": 0.5575890940317733, "grad_norm": 0.298828125, "learning_rate": 0.0010869089268445555, "loss": 1.8981, "step": 20778 }, { "epoch": 0.5576159295835122, "grad_norm": 0.302734375, "learning_rate": 0.0010868925052979898, "loss": 1.9684, "step": 20779 }, { "epoch": 0.5576427651352511, "grad_norm": 0.296875, "learning_rate": 0.0010868760826833253, "loss": 1.9161, "step": 20780 }, { "epoch": 0.5576696006869901, "grad_norm": 0.302734375, "learning_rate": 0.001086859659000598, "loss": 1.9516, "step": 20781 }, { "epoch": 0.5576964362387291, "grad_norm": 0.2890625, "learning_rate": 0.0010868432342498436, "loss": 1.8208, "step": 20782 }, { "epoch": 0.557723271790468, "grad_norm": 0.296875, "learning_rate": 0.0010868268084310987, "loss": 1.9575, "step": 20783 }, { "epoch": 0.557750107342207, "grad_norm": 0.298828125, "learning_rate": 0.0010868103815443986, "loss": 1.9969, "step": 20784 }, { "epoch": 0.5577769428939459, "grad_norm": 0.294921875, "learning_rate": 0.00108679395358978, "loss": 1.9006, "step": 20785 }, { "epoch": 0.5578037784456849, "grad_norm": 0.296875, "learning_rate": 0.0010867775245672786, "loss": 1.9344, "step": 20786 }, { "epoch": 0.5578306139974238, "grad_norm": 0.296875, "learning_rate": 0.0010867610944769305, "loss": 1.9269, "step": 20787 }, { "epoch": 0.5578574495491627, "grad_norm": 0.294921875, "learning_rate": 0.0010867446633187718, "loss": 1.918, "step": 20788 }, { "epoch": 0.5578842851009017, "grad_norm": 0.29296875, "learning_rate": 0.0010867282310928386, "loss": 1.9092, "step": 20789 }, { "epoch": 0.5579111206526406, "grad_norm": 0.296875, "learning_rate": 0.001086711797799167, "loss": 1.8862, "step": 20790 }, { "epoch": 0.5579379562043796, "grad_norm": 0.298828125, "learning_rate": 0.0010866953634377924, "loss": 1.9827, "step": 20791 }, { "epoch": 0.5579647917561185, "grad_norm": 0.296875, "learning_rate": 0.0010866789280087519, "loss": 1.9505, "step": 20792 }, { "epoch": 0.5579916273078575, "grad_norm": 0.28515625, "learning_rate": 0.0010866624915120806, "loss": 1.8505, "step": 20793 }, { "epoch": 0.5580184628595964, "grad_norm": 0.287109375, "learning_rate": 0.001086646053947815, "loss": 1.8105, "step": 20794 }, { "epoch": 0.5580452984113353, "grad_norm": 0.291015625, "learning_rate": 0.0010866296153159915, "loss": 1.807, "step": 20795 }, { "epoch": 0.5580721339630743, "grad_norm": 0.294921875, "learning_rate": 0.0010866131756166455, "loss": 1.8409, "step": 20796 }, { "epoch": 0.5580989695148132, "grad_norm": 0.291015625, "learning_rate": 0.0010865967348498136, "loss": 1.8697, "step": 20797 }, { "epoch": 0.5581258050665522, "grad_norm": 0.30078125, "learning_rate": 0.0010865802930155315, "loss": 1.9777, "step": 20798 }, { "epoch": 0.5581526406182911, "grad_norm": 0.30078125, "learning_rate": 0.0010865638501138353, "loss": 1.9428, "step": 20799 }, { "epoch": 0.55817947617003, "grad_norm": 0.30078125, "learning_rate": 0.0010865474061447613, "loss": 1.8961, "step": 20800 }, { "epoch": 0.558206311721769, "grad_norm": 0.28515625, "learning_rate": 0.0010865309611083453, "loss": 1.789, "step": 20801 }, { "epoch": 0.5582331472735079, "grad_norm": 0.298828125, "learning_rate": 0.0010865145150046236, "loss": 1.8594, "step": 20802 }, { "epoch": 0.5582599828252469, "grad_norm": 0.3828125, "learning_rate": 0.0010864980678336321, "loss": 1.8376, "step": 20803 }, { "epoch": 0.5582868183769858, "grad_norm": 0.29296875, "learning_rate": 0.001086481619595407, "loss": 1.8816, "step": 20804 }, { "epoch": 0.5583136539287248, "grad_norm": 0.298828125, "learning_rate": 0.0010864651702899844, "loss": 1.8564, "step": 20805 }, { "epoch": 0.5583404894804637, "grad_norm": 0.3046875, "learning_rate": 0.0010864487199174003, "loss": 1.821, "step": 20806 }, { "epoch": 0.5583673250322027, "grad_norm": 0.296875, "learning_rate": 0.0010864322684776906, "loss": 1.887, "step": 20807 }, { "epoch": 0.5583941605839416, "grad_norm": 0.298828125, "learning_rate": 0.001086415815970892, "loss": 1.7266, "step": 20808 }, { "epoch": 0.5584209961356805, "grad_norm": 0.291015625, "learning_rate": 0.0010863993623970402, "loss": 1.8292, "step": 20809 }, { "epoch": 0.5584478316874195, "grad_norm": 0.30078125, "learning_rate": 0.0010863829077561711, "loss": 1.8921, "step": 20810 }, { "epoch": 0.5584746672391584, "grad_norm": 0.30078125, "learning_rate": 0.001086366452048321, "loss": 1.8833, "step": 20811 }, { "epoch": 0.5585015027908974, "grad_norm": 0.302734375, "learning_rate": 0.001086349995273526, "loss": 1.8962, "step": 20812 }, { "epoch": 0.5585283383426364, "grad_norm": 0.294921875, "learning_rate": 0.001086333537431822, "loss": 1.7743, "step": 20813 }, { "epoch": 0.5585551738943753, "grad_norm": 0.302734375, "learning_rate": 0.0010863170785232457, "loss": 1.9022, "step": 20814 }, { "epoch": 0.5585820094461142, "grad_norm": 0.349609375, "learning_rate": 0.0010863006185478326, "loss": 2.0154, "step": 20815 }, { "epoch": 0.5586088449978531, "grad_norm": 0.337890625, "learning_rate": 0.001086284157505619, "loss": 2.0624, "step": 20816 }, { "epoch": 0.5586356805495921, "grad_norm": 0.322265625, "learning_rate": 0.0010862676953966412, "loss": 1.9729, "step": 20817 }, { "epoch": 0.558662516101331, "grad_norm": 0.322265625, "learning_rate": 0.001086251232220935, "loss": 1.9066, "step": 20818 }, { "epoch": 0.55868935165307, "grad_norm": 0.310546875, "learning_rate": 0.0010862347679785366, "loss": 2.0336, "step": 20819 }, { "epoch": 0.558716187204809, "grad_norm": 0.3203125, "learning_rate": 0.0010862183026694823, "loss": 2.0726, "step": 20820 }, { "epoch": 0.5587430227565479, "grad_norm": 0.310546875, "learning_rate": 0.001086201836293808, "loss": 2.1146, "step": 20821 }, { "epoch": 0.5587698583082868, "grad_norm": 0.3046875, "learning_rate": 0.0010861853688515501, "loss": 1.9981, "step": 20822 }, { "epoch": 0.5587966938600257, "grad_norm": 0.30859375, "learning_rate": 0.0010861689003427444, "loss": 2.059, "step": 20823 }, { "epoch": 0.5588235294117647, "grad_norm": 0.314453125, "learning_rate": 0.001086152430767427, "loss": 2.0658, "step": 20824 }, { "epoch": 0.5588503649635036, "grad_norm": 0.296875, "learning_rate": 0.0010861359601256344, "loss": 1.9656, "step": 20825 }, { "epoch": 0.5588772005152426, "grad_norm": 0.298828125, "learning_rate": 0.0010861194884174026, "loss": 2.0837, "step": 20826 }, { "epoch": 0.5589040360669816, "grad_norm": 0.294921875, "learning_rate": 0.0010861030156427674, "loss": 2.0433, "step": 20827 }, { "epoch": 0.5589308716187205, "grad_norm": 0.291015625, "learning_rate": 0.0010860865418017653, "loss": 1.9321, "step": 20828 }, { "epoch": 0.5589577071704595, "grad_norm": 0.291015625, "learning_rate": 0.0010860700668944324, "loss": 1.9917, "step": 20829 }, { "epoch": 0.5589845427221983, "grad_norm": 0.2890625, "learning_rate": 0.0010860535909208047, "loss": 1.9503, "step": 20830 }, { "epoch": 0.5590113782739373, "grad_norm": 0.3046875, "learning_rate": 0.0010860371138809186, "loss": 2.1358, "step": 20831 }, { "epoch": 0.5590382138256763, "grad_norm": 0.287109375, "learning_rate": 0.00108602063577481, "loss": 1.9519, "step": 20832 }, { "epoch": 0.5590650493774152, "grad_norm": 0.291015625, "learning_rate": 0.0010860041566025148, "loss": 1.9265, "step": 20833 }, { "epoch": 0.5590918849291542, "grad_norm": 0.287109375, "learning_rate": 0.0010859876763640698, "loss": 1.9657, "step": 20834 }, { "epoch": 0.5591187204808931, "grad_norm": 0.3046875, "learning_rate": 0.0010859711950595109, "loss": 2.0648, "step": 20835 }, { "epoch": 0.559145556032632, "grad_norm": 0.291015625, "learning_rate": 0.0010859547126888737, "loss": 1.9562, "step": 20836 }, { "epoch": 0.5591723915843709, "grad_norm": 0.29296875, "learning_rate": 0.0010859382292521952, "loss": 2.0341, "step": 20837 }, { "epoch": 0.5591992271361099, "grad_norm": 0.2890625, "learning_rate": 0.0010859217447495113, "loss": 2.0187, "step": 20838 }, { "epoch": 0.5592260626878489, "grad_norm": 0.283203125, "learning_rate": 0.0010859052591808578, "loss": 1.9051, "step": 20839 }, { "epoch": 0.5592528982395878, "grad_norm": 0.294921875, "learning_rate": 0.0010858887725462712, "loss": 2.0303, "step": 20840 }, { "epoch": 0.5592797337913268, "grad_norm": 0.2890625, "learning_rate": 0.0010858722848457875, "loss": 2.0415, "step": 20841 }, { "epoch": 0.5593065693430657, "grad_norm": 0.296875, "learning_rate": 0.0010858557960794432, "loss": 2.0172, "step": 20842 }, { "epoch": 0.5593334048948047, "grad_norm": 0.30078125, "learning_rate": 0.001085839306247274, "loss": 1.9933, "step": 20843 }, { "epoch": 0.5593602404465435, "grad_norm": 0.279296875, "learning_rate": 0.0010858228153493162, "loss": 1.8764, "step": 20844 }, { "epoch": 0.5593870759982825, "grad_norm": 0.3046875, "learning_rate": 0.0010858063233856062, "loss": 1.9929, "step": 20845 }, { "epoch": 0.5594139115500215, "grad_norm": 0.2890625, "learning_rate": 0.0010857898303561803, "loss": 1.951, "step": 20846 }, { "epoch": 0.5594407471017604, "grad_norm": 0.2890625, "learning_rate": 0.001085773336261074, "loss": 2.017, "step": 20847 }, { "epoch": 0.5594675826534994, "grad_norm": 0.283203125, "learning_rate": 0.0010857568411003242, "loss": 1.9489, "step": 20848 }, { "epoch": 0.5594944182052383, "grad_norm": 0.283203125, "learning_rate": 0.0010857403448739668, "loss": 1.9744, "step": 20849 }, { "epoch": 0.5595212537569773, "grad_norm": 0.291015625, "learning_rate": 0.0010857238475820378, "loss": 2.0148, "step": 20850 }, { "epoch": 0.5595480893087161, "grad_norm": 0.291015625, "learning_rate": 0.0010857073492245737, "loss": 1.972, "step": 20851 }, { "epoch": 0.5595749248604551, "grad_norm": 0.298828125, "learning_rate": 0.0010856908498016106, "loss": 1.998, "step": 20852 }, { "epoch": 0.5596017604121941, "grad_norm": 0.298828125, "learning_rate": 0.0010856743493131847, "loss": 1.9389, "step": 20853 }, { "epoch": 0.559628595963933, "grad_norm": 0.2890625, "learning_rate": 0.0010856578477593322, "loss": 1.9178, "step": 20854 }, { "epoch": 0.559655431515672, "grad_norm": 0.294921875, "learning_rate": 0.001085641345140089, "loss": 1.9674, "step": 20855 }, { "epoch": 0.5596822670674109, "grad_norm": 0.29296875, "learning_rate": 0.0010856248414554918, "loss": 1.9686, "step": 20856 }, { "epoch": 0.5597091026191499, "grad_norm": 0.291015625, "learning_rate": 0.0010856083367055765, "loss": 1.9603, "step": 20857 }, { "epoch": 0.5597359381708888, "grad_norm": 0.2890625, "learning_rate": 0.0010855918308903794, "loss": 1.9313, "step": 20858 }, { "epoch": 0.5597627737226277, "grad_norm": 0.291015625, "learning_rate": 0.0010855753240099368, "loss": 1.903, "step": 20859 }, { "epoch": 0.5597896092743667, "grad_norm": 0.2890625, "learning_rate": 0.0010855588160642846, "loss": 1.9148, "step": 20860 }, { "epoch": 0.5598164448261056, "grad_norm": 0.279296875, "learning_rate": 0.0010855423070534594, "loss": 1.9027, "step": 20861 }, { "epoch": 0.5598432803778446, "grad_norm": 0.294921875, "learning_rate": 0.001085525796977497, "loss": 1.9052, "step": 20862 }, { "epoch": 0.5598701159295835, "grad_norm": 0.28515625, "learning_rate": 0.001085509285836434, "loss": 1.9078, "step": 20863 }, { "epoch": 0.5598969514813225, "grad_norm": 0.3046875, "learning_rate": 0.0010854927736303065, "loss": 2.0549, "step": 20864 }, { "epoch": 0.5599237870330614, "grad_norm": 0.29296875, "learning_rate": 0.0010854762603591504, "loss": 2.0179, "step": 20865 }, { "epoch": 0.5599506225848003, "grad_norm": 0.291015625, "learning_rate": 0.0010854597460230025, "loss": 1.9633, "step": 20866 }, { "epoch": 0.5599774581365393, "grad_norm": 0.29296875, "learning_rate": 0.0010854432306218987, "loss": 1.9458, "step": 20867 }, { "epoch": 0.5600042936882782, "grad_norm": 0.29296875, "learning_rate": 0.001085426714155875, "loss": 1.9697, "step": 20868 }, { "epoch": 0.5600311292400172, "grad_norm": 0.287109375, "learning_rate": 0.0010854101966249685, "loss": 1.8912, "step": 20869 }, { "epoch": 0.5600579647917561, "grad_norm": 0.291015625, "learning_rate": 0.0010853936780292144, "loss": 1.9665, "step": 20870 }, { "epoch": 0.5600848003434951, "grad_norm": 0.2890625, "learning_rate": 0.0010853771583686494, "loss": 1.8904, "step": 20871 }, { "epoch": 0.560111635895234, "grad_norm": 0.294921875, "learning_rate": 0.0010853606376433097, "loss": 1.9579, "step": 20872 }, { "epoch": 0.5601384714469729, "grad_norm": 0.283203125, "learning_rate": 0.0010853441158532318, "loss": 1.8123, "step": 20873 }, { "epoch": 0.5601653069987119, "grad_norm": 0.298828125, "learning_rate": 0.0010853275929984514, "loss": 1.9351, "step": 20874 }, { "epoch": 0.5601921425504508, "grad_norm": 0.30078125, "learning_rate": 0.0010853110690790054, "loss": 1.999, "step": 20875 }, { "epoch": 0.5602189781021898, "grad_norm": 0.287109375, "learning_rate": 0.0010852945440949292, "loss": 1.8616, "step": 20876 }, { "epoch": 0.5602458136539287, "grad_norm": 0.302734375, "learning_rate": 0.0010852780180462599, "loss": 1.9588, "step": 20877 }, { "epoch": 0.5602726492056677, "grad_norm": 0.279296875, "learning_rate": 0.0010852614909330333, "loss": 1.8789, "step": 20878 }, { "epoch": 0.5602994847574067, "grad_norm": 0.28125, "learning_rate": 0.0010852449627552856, "loss": 1.8609, "step": 20879 }, { "epoch": 0.5603263203091455, "grad_norm": 0.298828125, "learning_rate": 0.0010852284335130537, "loss": 1.9252, "step": 20880 }, { "epoch": 0.5603531558608845, "grad_norm": 0.296875, "learning_rate": 0.001085211903206373, "loss": 1.9378, "step": 20881 }, { "epoch": 0.5603799914126234, "grad_norm": 0.294921875, "learning_rate": 0.00108519537183528, "loss": 1.9238, "step": 20882 }, { "epoch": 0.5604068269643624, "grad_norm": 0.302734375, "learning_rate": 0.0010851788393998114, "loss": 1.9172, "step": 20883 }, { "epoch": 0.5604336625161014, "grad_norm": 0.28515625, "learning_rate": 0.001085162305900003, "loss": 1.84, "step": 20884 }, { "epoch": 0.5604604980678403, "grad_norm": 0.2890625, "learning_rate": 0.0010851457713358915, "loss": 1.8823, "step": 20885 }, { "epoch": 0.5604873336195793, "grad_norm": 0.3046875, "learning_rate": 0.0010851292357075125, "loss": 1.9686, "step": 20886 }, { "epoch": 0.5605141691713181, "grad_norm": 0.291015625, "learning_rate": 0.001085112699014903, "loss": 1.8937, "step": 20887 }, { "epoch": 0.5605410047230571, "grad_norm": 0.3046875, "learning_rate": 0.0010850961612580987, "loss": 2.059, "step": 20888 }, { "epoch": 0.560567840274796, "grad_norm": 0.2890625, "learning_rate": 0.0010850796224371364, "loss": 1.9206, "step": 20889 }, { "epoch": 0.560594675826535, "grad_norm": 0.2890625, "learning_rate": 0.0010850630825520518, "loss": 1.9143, "step": 20890 }, { "epoch": 0.560621511378274, "grad_norm": 0.287109375, "learning_rate": 0.001085046541602882, "loss": 1.8624, "step": 20891 }, { "epoch": 0.5606483469300129, "grad_norm": 0.29296875, "learning_rate": 0.0010850299995896623, "loss": 1.8515, "step": 20892 }, { "epoch": 0.5606751824817519, "grad_norm": 0.302734375, "learning_rate": 0.0010850134565124298, "loss": 1.9883, "step": 20893 }, { "epoch": 0.5607020180334907, "grad_norm": 0.287109375, "learning_rate": 0.0010849969123712204, "loss": 1.8466, "step": 20894 }, { "epoch": 0.5607288535852297, "grad_norm": 0.296875, "learning_rate": 0.0010849803671660705, "loss": 1.9434, "step": 20895 }, { "epoch": 0.5607556891369686, "grad_norm": 0.296875, "learning_rate": 0.0010849638208970161, "loss": 1.8425, "step": 20896 }, { "epoch": 0.5607825246887076, "grad_norm": 0.291015625, "learning_rate": 0.0010849472735640939, "loss": 1.8709, "step": 20897 }, { "epoch": 0.5608093602404466, "grad_norm": 0.291015625, "learning_rate": 0.0010849307251673402, "loss": 1.9234, "step": 20898 }, { "epoch": 0.5608361957921855, "grad_norm": 0.291015625, "learning_rate": 0.001084914175706791, "loss": 1.8337, "step": 20899 }, { "epoch": 0.5608630313439245, "grad_norm": 0.29296875, "learning_rate": 0.0010848976251824829, "loss": 1.8755, "step": 20900 }, { "epoch": 0.5608898668956633, "grad_norm": 0.29296875, "learning_rate": 0.001084881073594452, "loss": 1.8479, "step": 20901 }, { "epoch": 0.5609167024474023, "grad_norm": 0.298828125, "learning_rate": 0.0010848645209427345, "loss": 1.8541, "step": 20902 }, { "epoch": 0.5609435379991413, "grad_norm": 0.30078125, "learning_rate": 0.001084847967227367, "loss": 1.9162, "step": 20903 }, { "epoch": 0.5609703735508802, "grad_norm": 0.298828125, "learning_rate": 0.0010848314124483859, "loss": 1.8935, "step": 20904 }, { "epoch": 0.5609972091026192, "grad_norm": 0.30078125, "learning_rate": 0.0010848148566058272, "loss": 1.8575, "step": 20905 }, { "epoch": 0.5610240446543581, "grad_norm": 0.298828125, "learning_rate": 0.0010847982996997272, "loss": 1.9345, "step": 20906 }, { "epoch": 0.5610508802060971, "grad_norm": 0.294921875, "learning_rate": 0.0010847817417301226, "loss": 1.8505, "step": 20907 }, { "epoch": 0.5610777157578359, "grad_norm": 0.296875, "learning_rate": 0.0010847651826970492, "loss": 1.8635, "step": 20908 }, { "epoch": 0.5611045513095749, "grad_norm": 0.298828125, "learning_rate": 0.001084748622600544, "loss": 1.9097, "step": 20909 }, { "epoch": 0.5611313868613139, "grad_norm": 0.314453125, "learning_rate": 0.0010847320614406426, "loss": 1.9187, "step": 20910 }, { "epoch": 0.5611582224130528, "grad_norm": 0.310546875, "learning_rate": 0.0010847154992173818, "loss": 1.8918, "step": 20911 }, { "epoch": 0.5611850579647918, "grad_norm": 0.310546875, "learning_rate": 0.0010846989359307975, "loss": 1.8862, "step": 20912 }, { "epoch": 0.5612118935165307, "grad_norm": 0.3515625, "learning_rate": 0.0010846823715809268, "loss": 2.1181, "step": 20913 }, { "epoch": 0.5612387290682697, "grad_norm": 0.3515625, "learning_rate": 0.0010846658061678053, "loss": 2.1007, "step": 20914 }, { "epoch": 0.5612655646200085, "grad_norm": 0.3203125, "learning_rate": 0.0010846492396914698, "loss": 2.0071, "step": 20915 }, { "epoch": 0.5612924001717475, "grad_norm": 0.326171875, "learning_rate": 0.0010846326721519564, "loss": 2.0231, "step": 20916 }, { "epoch": 0.5613192357234865, "grad_norm": 0.322265625, "learning_rate": 0.0010846161035493013, "loss": 2.1119, "step": 20917 }, { "epoch": 0.5613460712752254, "grad_norm": 0.30078125, "learning_rate": 0.0010845995338835412, "loss": 1.9872, "step": 20918 }, { "epoch": 0.5613729068269644, "grad_norm": 0.330078125, "learning_rate": 0.0010845829631547122, "loss": 2.1275, "step": 20919 }, { "epoch": 0.5613997423787033, "grad_norm": 0.294921875, "learning_rate": 0.001084566391362851, "loss": 1.9527, "step": 20920 }, { "epoch": 0.5614265779304423, "grad_norm": 0.294921875, "learning_rate": 0.0010845498185079933, "loss": 1.9945, "step": 20921 }, { "epoch": 0.5614534134821811, "grad_norm": 0.314453125, "learning_rate": 0.0010845332445901762, "loss": 2.0699, "step": 20922 }, { "epoch": 0.5614802490339201, "grad_norm": 0.314453125, "learning_rate": 0.0010845166696094356, "loss": 2.0532, "step": 20923 }, { "epoch": 0.5615070845856591, "grad_norm": 0.2890625, "learning_rate": 0.0010845000935658078, "loss": 1.9544, "step": 20924 }, { "epoch": 0.561533920137398, "grad_norm": 0.294921875, "learning_rate": 0.0010844835164593297, "loss": 1.9589, "step": 20925 }, { "epoch": 0.561560755689137, "grad_norm": 0.294921875, "learning_rate": 0.001084466938290037, "loss": 1.91, "step": 20926 }, { "epoch": 0.5615875912408759, "grad_norm": 0.294921875, "learning_rate": 0.0010844503590579666, "loss": 2.0439, "step": 20927 }, { "epoch": 0.5616144267926149, "grad_norm": 0.302734375, "learning_rate": 0.0010844337787631543, "loss": 2.0775, "step": 20928 }, { "epoch": 0.5616412623443539, "grad_norm": 0.296875, "learning_rate": 0.001084417197405637, "loss": 1.9775, "step": 20929 }, { "epoch": 0.5616680978960927, "grad_norm": 0.28515625, "learning_rate": 0.0010844006149854511, "loss": 2.0354, "step": 20930 }, { "epoch": 0.5616949334478317, "grad_norm": 0.291015625, "learning_rate": 0.0010843840315026324, "loss": 1.9247, "step": 20931 }, { "epoch": 0.5617217689995706, "grad_norm": 0.28515625, "learning_rate": 0.001084367446957218, "loss": 2.0383, "step": 20932 }, { "epoch": 0.5617486045513096, "grad_norm": 0.291015625, "learning_rate": 0.0010843508613492437, "loss": 2.0593, "step": 20933 }, { "epoch": 0.5617754401030485, "grad_norm": 0.294921875, "learning_rate": 0.001084334274678746, "loss": 2.0307, "step": 20934 }, { "epoch": 0.5618022756547875, "grad_norm": 0.291015625, "learning_rate": 0.0010843176869457618, "loss": 1.9347, "step": 20935 }, { "epoch": 0.5618291112065265, "grad_norm": 0.29296875, "learning_rate": 0.0010843010981503266, "loss": 1.9667, "step": 20936 }, { "epoch": 0.5618559467582653, "grad_norm": 0.2890625, "learning_rate": 0.0010842845082924776, "loss": 1.9819, "step": 20937 }, { "epoch": 0.5618827823100043, "grad_norm": 0.287109375, "learning_rate": 0.001084267917372251, "loss": 1.9809, "step": 20938 }, { "epoch": 0.5619096178617432, "grad_norm": 0.283203125, "learning_rate": 0.0010842513253896827, "loss": 2.0132, "step": 20939 }, { "epoch": 0.5619364534134822, "grad_norm": 0.291015625, "learning_rate": 0.0010842347323448095, "loss": 2.009, "step": 20940 }, { "epoch": 0.5619632889652211, "grad_norm": 0.287109375, "learning_rate": 0.001084218138237668, "loss": 2.0237, "step": 20941 }, { "epoch": 0.5619901245169601, "grad_norm": 0.294921875, "learning_rate": 0.001084201543068294, "loss": 2.0124, "step": 20942 }, { "epoch": 0.5620169600686991, "grad_norm": 0.2890625, "learning_rate": 0.0010841849468367244, "loss": 1.9673, "step": 20943 }, { "epoch": 0.5620437956204379, "grad_norm": 0.287109375, "learning_rate": 0.0010841683495429958, "loss": 1.9241, "step": 20944 }, { "epoch": 0.5620706311721769, "grad_norm": 0.2890625, "learning_rate": 0.001084151751187144, "loss": 1.9389, "step": 20945 }, { "epoch": 0.5620974667239158, "grad_norm": 0.283203125, "learning_rate": 0.0010841351517692057, "loss": 1.8719, "step": 20946 }, { "epoch": 0.5621243022756548, "grad_norm": 0.27734375, "learning_rate": 0.0010841185512892174, "loss": 1.911, "step": 20947 }, { "epoch": 0.5621511378273937, "grad_norm": 0.28125, "learning_rate": 0.0010841019497472154, "loss": 1.8358, "step": 20948 }, { "epoch": 0.5621779733791327, "grad_norm": 0.30078125, "learning_rate": 0.001084085347143236, "loss": 2.0623, "step": 20949 }, { "epoch": 0.5622048089308717, "grad_norm": 0.294921875, "learning_rate": 0.001084068743477316, "loss": 1.9993, "step": 20950 }, { "epoch": 0.5622316444826105, "grad_norm": 0.283203125, "learning_rate": 0.0010840521387494916, "loss": 1.8962, "step": 20951 }, { "epoch": 0.5622584800343495, "grad_norm": 0.296875, "learning_rate": 0.001084035532959799, "loss": 2.0458, "step": 20952 }, { "epoch": 0.5622853155860884, "grad_norm": 0.287109375, "learning_rate": 0.001084018926108275, "loss": 1.964, "step": 20953 }, { "epoch": 0.5623121511378274, "grad_norm": 0.28515625, "learning_rate": 0.001084002318194956, "loss": 1.9221, "step": 20954 }, { "epoch": 0.5623389866895664, "grad_norm": 0.30078125, "learning_rate": 0.001083985709219878, "loss": 2.0197, "step": 20955 }, { "epoch": 0.5623658222413053, "grad_norm": 0.294921875, "learning_rate": 0.001083969099183078, "loss": 1.9193, "step": 20956 }, { "epoch": 0.5623926577930443, "grad_norm": 0.29296875, "learning_rate": 0.0010839524880845918, "loss": 2.0329, "step": 20957 }, { "epoch": 0.5624194933447831, "grad_norm": 0.287109375, "learning_rate": 0.0010839358759244565, "loss": 1.9378, "step": 20958 }, { "epoch": 0.5624463288965221, "grad_norm": 0.287109375, "learning_rate": 0.0010839192627027084, "loss": 1.9345, "step": 20959 }, { "epoch": 0.562473164448261, "grad_norm": 0.296875, "learning_rate": 0.0010839026484193836, "loss": 2.0019, "step": 20960 }, { "epoch": 0.5625, "grad_norm": 0.29296875, "learning_rate": 0.0010838860330745189, "loss": 1.9862, "step": 20961 }, { "epoch": 0.562526835551739, "grad_norm": 0.28515625, "learning_rate": 0.0010838694166681504, "loss": 1.9442, "step": 20962 }, { "epoch": 0.5625536711034779, "grad_norm": 0.29296875, "learning_rate": 0.001083852799200315, "loss": 2.0002, "step": 20963 }, { "epoch": 0.5625805066552169, "grad_norm": 0.291015625, "learning_rate": 0.0010838361806710488, "loss": 1.9282, "step": 20964 }, { "epoch": 0.5626073422069557, "grad_norm": 0.29296875, "learning_rate": 0.0010838195610803883, "loss": 1.9892, "step": 20965 }, { "epoch": 0.5626341777586947, "grad_norm": 0.291015625, "learning_rate": 0.00108380294042837, "loss": 1.9813, "step": 20966 }, { "epoch": 0.5626610133104336, "grad_norm": 0.29296875, "learning_rate": 0.0010837863187150305, "loss": 1.9365, "step": 20967 }, { "epoch": 0.5626878488621726, "grad_norm": 0.291015625, "learning_rate": 0.0010837696959404063, "loss": 1.8925, "step": 20968 }, { "epoch": 0.5627146844139116, "grad_norm": 0.283203125, "learning_rate": 0.0010837530721045335, "loss": 1.8723, "step": 20969 }, { "epoch": 0.5627415199656505, "grad_norm": 0.291015625, "learning_rate": 0.0010837364472074487, "loss": 1.9753, "step": 20970 }, { "epoch": 0.5627683555173895, "grad_norm": 0.291015625, "learning_rate": 0.0010837198212491888, "loss": 1.8811, "step": 20971 }, { "epoch": 0.5627951910691283, "grad_norm": 0.287109375, "learning_rate": 0.0010837031942297895, "loss": 1.9206, "step": 20972 }, { "epoch": 0.5628220266208673, "grad_norm": 0.294921875, "learning_rate": 0.001083686566149288, "loss": 1.9845, "step": 20973 }, { "epoch": 0.5628488621726063, "grad_norm": 0.298828125, "learning_rate": 0.00108366993700772, "loss": 2.0163, "step": 20974 }, { "epoch": 0.5628756977243452, "grad_norm": 0.291015625, "learning_rate": 0.0010836533068051232, "loss": 1.8424, "step": 20975 }, { "epoch": 0.5629025332760842, "grad_norm": 0.28515625, "learning_rate": 0.0010836366755415327, "loss": 1.8809, "step": 20976 }, { "epoch": 0.5629293688278231, "grad_norm": 0.283203125, "learning_rate": 0.001083620043216986, "loss": 1.8664, "step": 20977 }, { "epoch": 0.5629562043795621, "grad_norm": 0.29296875, "learning_rate": 0.0010836034098315192, "loss": 1.8916, "step": 20978 }, { "epoch": 0.5629830399313009, "grad_norm": 0.291015625, "learning_rate": 0.0010835867753851687, "loss": 1.8877, "step": 20979 }, { "epoch": 0.5630098754830399, "grad_norm": 0.2890625, "learning_rate": 0.001083570139877971, "loss": 1.932, "step": 20980 }, { "epoch": 0.5630367110347789, "grad_norm": 0.296875, "learning_rate": 0.0010835535033099628, "loss": 1.9266, "step": 20981 }, { "epoch": 0.5630635465865178, "grad_norm": 0.283203125, "learning_rate": 0.0010835368656811803, "loss": 1.8587, "step": 20982 }, { "epoch": 0.5630903821382568, "grad_norm": 0.294921875, "learning_rate": 0.0010835202269916604, "loss": 1.9037, "step": 20983 }, { "epoch": 0.5631172176899957, "grad_norm": 0.29296875, "learning_rate": 0.0010835035872414393, "loss": 1.7946, "step": 20984 }, { "epoch": 0.5631440532417347, "grad_norm": 0.291015625, "learning_rate": 0.0010834869464305535, "loss": 1.878, "step": 20985 }, { "epoch": 0.5631708887934735, "grad_norm": 0.298828125, "learning_rate": 0.0010834703045590397, "loss": 1.8635, "step": 20986 }, { "epoch": 0.5631977243452125, "grad_norm": 0.29296875, "learning_rate": 0.0010834536616269343, "loss": 1.8866, "step": 20987 }, { "epoch": 0.5632245598969515, "grad_norm": 0.291015625, "learning_rate": 0.0010834370176342736, "loss": 1.8349, "step": 20988 }, { "epoch": 0.5632513954486904, "grad_norm": 0.310546875, "learning_rate": 0.0010834203725810945, "loss": 2.0337, "step": 20989 }, { "epoch": 0.5632782310004294, "grad_norm": 0.298828125, "learning_rate": 0.0010834037264674332, "loss": 1.8775, "step": 20990 }, { "epoch": 0.5633050665521683, "grad_norm": 0.3046875, "learning_rate": 0.0010833870792933266, "loss": 1.9379, "step": 20991 }, { "epoch": 0.5633319021039073, "grad_norm": 0.29296875, "learning_rate": 0.0010833704310588108, "loss": 1.8406, "step": 20992 }, { "epoch": 0.5633587376556461, "grad_norm": 0.291015625, "learning_rate": 0.0010833537817639225, "loss": 1.8284, "step": 20993 }, { "epoch": 0.5633855732073851, "grad_norm": 0.291015625, "learning_rate": 0.001083337131408698, "loss": 1.8677, "step": 20994 }, { "epoch": 0.5634124087591241, "grad_norm": 0.296875, "learning_rate": 0.0010833204799931741, "loss": 1.892, "step": 20995 }, { "epoch": 0.563439244310863, "grad_norm": 0.296875, "learning_rate": 0.0010833038275173876, "loss": 1.9341, "step": 20996 }, { "epoch": 0.563466079862602, "grad_norm": 0.298828125, "learning_rate": 0.0010832871739813745, "loss": 1.8238, "step": 20997 }, { "epoch": 0.5634929154143409, "grad_norm": 0.3046875, "learning_rate": 0.0010832705193851715, "loss": 1.9077, "step": 20998 }, { "epoch": 0.5635197509660799, "grad_norm": 0.2890625, "learning_rate": 0.001083253863728815, "loss": 1.8945, "step": 20999 }, { "epoch": 0.5635465865178189, "grad_norm": 0.291015625, "learning_rate": 0.001083237207012342, "loss": 1.7763, "step": 21000 }, { "epoch": 0.5635734220695577, "grad_norm": 0.306640625, "learning_rate": 0.0010832205492357883, "loss": 1.8506, "step": 21001 }, { "epoch": 0.5636002576212967, "grad_norm": 0.2890625, "learning_rate": 0.0010832038903991913, "loss": 1.8667, "step": 21002 }, { "epoch": 0.5636270931730356, "grad_norm": 0.298828125, "learning_rate": 0.0010831872305025873, "loss": 1.8116, "step": 21003 }, { "epoch": 0.5636539287247746, "grad_norm": 0.294921875, "learning_rate": 0.0010831705695460122, "loss": 1.8545, "step": 21004 }, { "epoch": 0.5636807642765135, "grad_norm": 0.29296875, "learning_rate": 0.001083153907529503, "loss": 1.793, "step": 21005 }, { "epoch": 0.5637075998282525, "grad_norm": 0.302734375, "learning_rate": 0.0010831372444530967, "loss": 1.8334, "step": 21006 }, { "epoch": 0.5637344353799915, "grad_norm": 0.294921875, "learning_rate": 0.001083120580316829, "loss": 1.855, "step": 21007 }, { "epoch": 0.5637612709317303, "grad_norm": 0.296875, "learning_rate": 0.0010831039151207372, "loss": 1.8396, "step": 21008 }, { "epoch": 0.5637881064834693, "grad_norm": 0.341796875, "learning_rate": 0.0010830872488648575, "loss": 1.9631, "step": 21009 }, { "epoch": 0.5638149420352082, "grad_norm": 0.34765625, "learning_rate": 0.0010830705815492263, "loss": 2.082, "step": 21010 }, { "epoch": 0.5638417775869472, "grad_norm": 0.318359375, "learning_rate": 0.0010830539131738805, "loss": 2.0804, "step": 21011 }, { "epoch": 0.5638686131386861, "grad_norm": 0.3125, "learning_rate": 0.0010830372437388564, "loss": 2.1161, "step": 21012 }, { "epoch": 0.5638954486904251, "grad_norm": 0.3046875, "learning_rate": 0.001083020573244191, "loss": 1.9782, "step": 21013 }, { "epoch": 0.5639222842421641, "grad_norm": 0.30859375, "learning_rate": 0.0010830039016899201, "loss": 2.0329, "step": 21014 }, { "epoch": 0.5639491197939029, "grad_norm": 0.310546875, "learning_rate": 0.0010829872290760811, "loss": 2.0989, "step": 21015 }, { "epoch": 0.5639759553456419, "grad_norm": 0.294921875, "learning_rate": 0.0010829705554027103, "loss": 1.9635, "step": 21016 }, { "epoch": 0.5640027908973808, "grad_norm": 0.298828125, "learning_rate": 0.0010829538806698441, "loss": 2.1025, "step": 21017 }, { "epoch": 0.5640296264491198, "grad_norm": 0.29296875, "learning_rate": 0.001082937204877519, "loss": 2.0937, "step": 21018 }, { "epoch": 0.5640564620008587, "grad_norm": 0.283203125, "learning_rate": 0.001082920528025772, "loss": 1.9667, "step": 21019 }, { "epoch": 0.5640832975525977, "grad_norm": 0.298828125, "learning_rate": 0.0010829038501146391, "loss": 1.9879, "step": 21020 }, { "epoch": 0.5641101331043367, "grad_norm": 0.310546875, "learning_rate": 0.0010828871711441575, "loss": 2.0153, "step": 21021 }, { "epoch": 0.5641369686560755, "grad_norm": 0.294921875, "learning_rate": 0.0010828704911143635, "loss": 2.0302, "step": 21022 }, { "epoch": 0.5641638042078145, "grad_norm": 0.294921875, "learning_rate": 0.0010828538100252935, "loss": 2.0042, "step": 21023 }, { "epoch": 0.5641906397595534, "grad_norm": 0.291015625, "learning_rate": 0.0010828371278769845, "loss": 1.9611, "step": 21024 }, { "epoch": 0.5642174753112924, "grad_norm": 0.283203125, "learning_rate": 0.0010828204446694729, "loss": 1.9677, "step": 21025 }, { "epoch": 0.5642443108630314, "grad_norm": 0.287109375, "learning_rate": 0.001082803760402795, "loss": 2.0298, "step": 21026 }, { "epoch": 0.5642711464147703, "grad_norm": 0.28515625, "learning_rate": 0.001082787075076988, "loss": 1.8973, "step": 21027 }, { "epoch": 0.5642979819665093, "grad_norm": 0.298828125, "learning_rate": 0.001082770388692088, "loss": 1.8989, "step": 21028 }, { "epoch": 0.5643248175182481, "grad_norm": 0.28125, "learning_rate": 0.0010827537012481321, "loss": 1.9177, "step": 21029 }, { "epoch": 0.5643516530699871, "grad_norm": 0.294921875, "learning_rate": 0.0010827370127451563, "loss": 2.0778, "step": 21030 }, { "epoch": 0.564378488621726, "grad_norm": 0.28125, "learning_rate": 0.0010827203231831978, "loss": 1.972, "step": 21031 }, { "epoch": 0.564405324173465, "grad_norm": 0.30078125, "learning_rate": 0.0010827036325622927, "loss": 1.9434, "step": 21032 }, { "epoch": 0.564432159725204, "grad_norm": 0.29296875, "learning_rate": 0.0010826869408824779, "loss": 1.9678, "step": 21033 }, { "epoch": 0.5644589952769429, "grad_norm": 0.291015625, "learning_rate": 0.00108267024814379, "loss": 1.9557, "step": 21034 }, { "epoch": 0.5644858308286819, "grad_norm": 0.28515625, "learning_rate": 0.0010826535543462655, "loss": 2.0125, "step": 21035 }, { "epoch": 0.5645126663804207, "grad_norm": 0.287109375, "learning_rate": 0.0010826368594899411, "loss": 1.9594, "step": 21036 }, { "epoch": 0.5645395019321597, "grad_norm": 0.30078125, "learning_rate": 0.0010826201635748536, "loss": 1.986, "step": 21037 }, { "epoch": 0.5645663374838986, "grad_norm": 0.287109375, "learning_rate": 0.0010826034666010394, "loss": 1.9999, "step": 21038 }, { "epoch": 0.5645931730356376, "grad_norm": 0.291015625, "learning_rate": 0.001082586768568535, "loss": 2.0192, "step": 21039 }, { "epoch": 0.5646200085873766, "grad_norm": 0.29296875, "learning_rate": 0.0010825700694773773, "loss": 1.9006, "step": 21040 }, { "epoch": 0.5646468441391155, "grad_norm": 0.287109375, "learning_rate": 0.001082553369327603, "loss": 1.9571, "step": 21041 }, { "epoch": 0.5646736796908545, "grad_norm": 0.28125, "learning_rate": 0.0010825366681192483, "loss": 1.9335, "step": 21042 }, { "epoch": 0.5647005152425933, "grad_norm": 0.2890625, "learning_rate": 0.0010825199658523502, "loss": 1.9335, "step": 21043 }, { "epoch": 0.5647273507943323, "grad_norm": 0.291015625, "learning_rate": 0.0010825032625269453, "loss": 1.8837, "step": 21044 }, { "epoch": 0.5647541863460713, "grad_norm": 0.287109375, "learning_rate": 0.0010824865581430703, "loss": 1.8937, "step": 21045 }, { "epoch": 0.5647810218978102, "grad_norm": 0.28125, "learning_rate": 0.0010824698527007615, "loss": 1.8913, "step": 21046 }, { "epoch": 0.5648078574495492, "grad_norm": 0.2890625, "learning_rate": 0.0010824531462000561, "loss": 1.9368, "step": 21047 }, { "epoch": 0.5648346930012881, "grad_norm": 0.291015625, "learning_rate": 0.0010824364386409903, "loss": 1.9841, "step": 21048 }, { "epoch": 0.5648615285530271, "grad_norm": 0.29296875, "learning_rate": 0.0010824197300236009, "loss": 1.8943, "step": 21049 }, { "epoch": 0.564888364104766, "grad_norm": 0.275390625, "learning_rate": 0.0010824030203479243, "loss": 1.8484, "step": 21050 }, { "epoch": 0.5649151996565049, "grad_norm": 0.2890625, "learning_rate": 0.0010823863096139977, "loss": 1.9205, "step": 21051 }, { "epoch": 0.5649420352082439, "grad_norm": 0.28515625, "learning_rate": 0.0010823695978218572, "loss": 1.8905, "step": 21052 }, { "epoch": 0.5649688707599828, "grad_norm": 0.2890625, "learning_rate": 0.00108235288497154, "loss": 1.9639, "step": 21053 }, { "epoch": 0.5649957063117218, "grad_norm": 0.302734375, "learning_rate": 0.0010823361710630823, "loss": 1.9213, "step": 21054 }, { "epoch": 0.5650225418634607, "grad_norm": 0.294921875, "learning_rate": 0.001082319456096521, "loss": 1.9948, "step": 21055 }, { "epoch": 0.5650493774151997, "grad_norm": 0.291015625, "learning_rate": 0.0010823027400718927, "loss": 1.9427, "step": 21056 }, { "epoch": 0.5650762129669386, "grad_norm": 0.287109375, "learning_rate": 0.001082286022989234, "loss": 1.8792, "step": 21057 }, { "epoch": 0.5651030485186775, "grad_norm": 0.296875, "learning_rate": 0.001082269304848582, "loss": 1.9666, "step": 21058 }, { "epoch": 0.5651298840704165, "grad_norm": 0.298828125, "learning_rate": 0.0010822525856499727, "loss": 1.9583, "step": 21059 }, { "epoch": 0.5651567196221554, "grad_norm": 0.283203125, "learning_rate": 0.0010822358653934433, "loss": 1.8844, "step": 21060 }, { "epoch": 0.5651835551738944, "grad_norm": 0.2890625, "learning_rate": 0.00108221914407903, "loss": 2.0161, "step": 21061 }, { "epoch": 0.5652103907256333, "grad_norm": 0.283203125, "learning_rate": 0.00108220242170677, "loss": 1.9126, "step": 21062 }, { "epoch": 0.5652372262773723, "grad_norm": 0.287109375, "learning_rate": 0.0010821856982766997, "loss": 1.963, "step": 21063 }, { "epoch": 0.5652640618291112, "grad_norm": 0.279296875, "learning_rate": 0.0010821689737888559, "loss": 1.8713, "step": 21064 }, { "epoch": 0.5652908973808501, "grad_norm": 0.29296875, "learning_rate": 0.001082152248243275, "loss": 1.8822, "step": 21065 }, { "epoch": 0.5653177329325891, "grad_norm": 0.2890625, "learning_rate": 0.0010821355216399941, "loss": 1.8871, "step": 21066 }, { "epoch": 0.565344568484328, "grad_norm": 0.291015625, "learning_rate": 0.0010821187939790498, "loss": 1.9255, "step": 21067 }, { "epoch": 0.565371404036067, "grad_norm": 0.30078125, "learning_rate": 0.0010821020652604786, "loss": 2.0288, "step": 21068 }, { "epoch": 0.5653982395878059, "grad_norm": 0.3046875, "learning_rate": 0.0010820853354843172, "loss": 1.9598, "step": 21069 }, { "epoch": 0.5654250751395449, "grad_norm": 0.294921875, "learning_rate": 0.0010820686046506026, "loss": 1.9376, "step": 21070 }, { "epoch": 0.5654519106912839, "grad_norm": 0.2890625, "learning_rate": 0.0010820518727593712, "loss": 1.9269, "step": 21071 }, { "epoch": 0.5654787462430227, "grad_norm": 0.291015625, "learning_rate": 0.0010820351398106599, "loss": 1.8562, "step": 21072 }, { "epoch": 0.5655055817947617, "grad_norm": 0.287109375, "learning_rate": 0.0010820184058045051, "loss": 1.8614, "step": 21073 }, { "epoch": 0.5655324173465006, "grad_norm": 0.29296875, "learning_rate": 0.0010820016707409439, "loss": 1.9316, "step": 21074 }, { "epoch": 0.5655592528982396, "grad_norm": 0.291015625, "learning_rate": 0.001081984934620013, "loss": 1.9262, "step": 21075 }, { "epoch": 0.5655860884499785, "grad_norm": 0.287109375, "learning_rate": 0.0010819681974417487, "loss": 1.7533, "step": 21076 }, { "epoch": 0.5656129240017175, "grad_norm": 0.29296875, "learning_rate": 0.001081951459206188, "loss": 1.9393, "step": 21077 }, { "epoch": 0.5656397595534565, "grad_norm": 0.283203125, "learning_rate": 0.0010819347199133677, "loss": 1.817, "step": 21078 }, { "epoch": 0.5656665951051953, "grad_norm": 0.291015625, "learning_rate": 0.0010819179795633243, "loss": 1.8544, "step": 21079 }, { "epoch": 0.5656934306569343, "grad_norm": 0.302734375, "learning_rate": 0.0010819012381560948, "loss": 2.0091, "step": 21080 }, { "epoch": 0.5657202662086732, "grad_norm": 0.29296875, "learning_rate": 0.0010818844956917155, "loss": 1.873, "step": 21081 }, { "epoch": 0.5657471017604122, "grad_norm": 0.29296875, "learning_rate": 0.0010818677521702237, "loss": 1.8453, "step": 21082 }, { "epoch": 0.5657739373121511, "grad_norm": 0.28125, "learning_rate": 0.0010818510075916556, "loss": 1.8524, "step": 21083 }, { "epoch": 0.5658007728638901, "grad_norm": 0.29296875, "learning_rate": 0.0010818342619560483, "loss": 1.8788, "step": 21084 }, { "epoch": 0.5658276084156291, "grad_norm": 0.298828125, "learning_rate": 0.0010818175152634381, "loss": 1.9041, "step": 21085 }, { "epoch": 0.565854443967368, "grad_norm": 0.291015625, "learning_rate": 0.0010818007675138623, "loss": 1.9146, "step": 21086 }, { "epoch": 0.5658812795191069, "grad_norm": 0.29296875, "learning_rate": 0.0010817840187073572, "loss": 1.8686, "step": 21087 }, { "epoch": 0.5659081150708458, "grad_norm": 0.298828125, "learning_rate": 0.0010817672688439599, "loss": 1.8378, "step": 21088 }, { "epoch": 0.5659349506225848, "grad_norm": 0.294921875, "learning_rate": 0.001081750517923707, "loss": 1.8931, "step": 21089 }, { "epoch": 0.5659617861743237, "grad_norm": 0.296875, "learning_rate": 0.001081733765946635, "loss": 1.9395, "step": 21090 }, { "epoch": 0.5659886217260627, "grad_norm": 0.3125, "learning_rate": 0.001081717012912781, "loss": 2.0041, "step": 21091 }, { "epoch": 0.5660154572778017, "grad_norm": 0.298828125, "learning_rate": 0.0010817002588221812, "loss": 1.9117, "step": 21092 }, { "epoch": 0.5660422928295405, "grad_norm": 0.296875, "learning_rate": 0.001081683503674873, "loss": 1.8669, "step": 21093 }, { "epoch": 0.5660691283812795, "grad_norm": 0.30078125, "learning_rate": 0.0010816667474708931, "loss": 1.8439, "step": 21094 }, { "epoch": 0.5660959639330184, "grad_norm": 0.291015625, "learning_rate": 0.0010816499902102778, "loss": 1.8091, "step": 21095 }, { "epoch": 0.5661227994847574, "grad_norm": 0.291015625, "learning_rate": 0.0010816332318930642, "loss": 1.7909, "step": 21096 }, { "epoch": 0.5661496350364964, "grad_norm": 0.28125, "learning_rate": 0.0010816164725192892, "loss": 1.7686, "step": 21097 }, { "epoch": 0.5661764705882353, "grad_norm": 0.294921875, "learning_rate": 0.001081599712088989, "loss": 1.8188, "step": 21098 }, { "epoch": 0.5662033061399743, "grad_norm": 0.2890625, "learning_rate": 0.0010815829506022009, "loss": 1.7998, "step": 21099 }, { "epoch": 0.5662301416917132, "grad_norm": 0.291015625, "learning_rate": 0.0010815661880589616, "loss": 1.8312, "step": 21100 }, { "epoch": 0.5662569772434521, "grad_norm": 0.30078125, "learning_rate": 0.0010815494244593076, "loss": 1.7868, "step": 21101 }, { "epoch": 0.566283812795191, "grad_norm": 0.294921875, "learning_rate": 0.0010815326598032759, "loss": 1.7688, "step": 21102 }, { "epoch": 0.56631064834693, "grad_norm": 0.322265625, "learning_rate": 0.0010815158940909032, "loss": 1.9666, "step": 21103 }, { "epoch": 0.566337483898669, "grad_norm": 0.36328125, "learning_rate": 0.0010814991273222262, "loss": 2.122, "step": 21104 }, { "epoch": 0.5663643194504079, "grad_norm": 0.328125, "learning_rate": 0.001081482359497282, "loss": 2.1152, "step": 21105 }, { "epoch": 0.5663911550021469, "grad_norm": 0.3203125, "learning_rate": 0.0010814655906161068, "loss": 2.0847, "step": 21106 }, { "epoch": 0.5664179905538858, "grad_norm": 0.314453125, "learning_rate": 0.0010814488206787381, "loss": 2.0917, "step": 21107 }, { "epoch": 0.5664448261056247, "grad_norm": 0.30859375, "learning_rate": 0.0010814320496852123, "loss": 2.0358, "step": 21108 }, { "epoch": 0.5664716616573636, "grad_norm": 0.314453125, "learning_rate": 0.001081415277635566, "loss": 1.9981, "step": 21109 }, { "epoch": 0.5664984972091026, "grad_norm": 0.287109375, "learning_rate": 0.0010813985045298364, "loss": 1.9998, "step": 21110 }, { "epoch": 0.5665253327608416, "grad_norm": 0.29296875, "learning_rate": 0.00108138173036806, "loss": 2.0678, "step": 21111 }, { "epoch": 0.5665521683125805, "grad_norm": 0.306640625, "learning_rate": 0.0010813649551502738, "loss": 2.0732, "step": 21112 }, { "epoch": 0.5665790038643195, "grad_norm": 0.306640625, "learning_rate": 0.0010813481788765145, "loss": 2.1636, "step": 21113 }, { "epoch": 0.5666058394160584, "grad_norm": 0.30078125, "learning_rate": 0.0010813314015468188, "loss": 2.0412, "step": 21114 }, { "epoch": 0.5666326749677973, "grad_norm": 0.287109375, "learning_rate": 0.0010813146231612237, "loss": 1.9436, "step": 21115 }, { "epoch": 0.5666595105195363, "grad_norm": 0.287109375, "learning_rate": 0.0010812978437197659, "loss": 2.0064, "step": 21116 }, { "epoch": 0.5666863460712752, "grad_norm": 0.296875, "learning_rate": 0.0010812810632224823, "loss": 2.0643, "step": 21117 }, { "epoch": 0.5667131816230142, "grad_norm": 0.294921875, "learning_rate": 0.0010812642816694096, "loss": 2.0667, "step": 21118 }, { "epoch": 0.5667400171747531, "grad_norm": 0.291015625, "learning_rate": 0.0010812474990605843, "loss": 2.0071, "step": 21119 }, { "epoch": 0.5667668527264921, "grad_norm": 0.291015625, "learning_rate": 0.0010812307153960441, "loss": 1.9775, "step": 21120 }, { "epoch": 0.566793688278231, "grad_norm": 0.29296875, "learning_rate": 0.0010812139306758252, "loss": 2.0285, "step": 21121 }, { "epoch": 0.56682052382997, "grad_norm": 0.291015625, "learning_rate": 0.0010811971448999643, "loss": 1.8793, "step": 21122 }, { "epoch": 0.5668473593817089, "grad_norm": 0.291015625, "learning_rate": 0.0010811803580684986, "loss": 2.0189, "step": 21123 }, { "epoch": 0.5668741949334478, "grad_norm": 0.29296875, "learning_rate": 0.0010811635701814648, "loss": 1.9952, "step": 21124 }, { "epoch": 0.5669010304851868, "grad_norm": 0.27734375, "learning_rate": 0.0010811467812388996, "loss": 1.9088, "step": 21125 }, { "epoch": 0.5669278660369257, "grad_norm": 0.287109375, "learning_rate": 0.0010811299912408397, "loss": 1.9247, "step": 21126 }, { "epoch": 0.5669547015886647, "grad_norm": 0.41015625, "learning_rate": 0.0010811132001873225, "loss": 2.0548, "step": 21127 }, { "epoch": 0.5669815371404036, "grad_norm": 0.294921875, "learning_rate": 0.0010810964080783842, "loss": 1.9004, "step": 21128 }, { "epoch": 0.5670083726921425, "grad_norm": 0.734375, "learning_rate": 0.0010810796149140621, "loss": 1.9167, "step": 21129 }, { "epoch": 0.5670352082438815, "grad_norm": 0.2890625, "learning_rate": 0.001081062820694393, "loss": 1.9495, "step": 21130 }, { "epoch": 0.5670620437956204, "grad_norm": 0.296875, "learning_rate": 0.0010810460254194133, "loss": 1.9855, "step": 21131 }, { "epoch": 0.5670888793473594, "grad_norm": 0.296875, "learning_rate": 0.0010810292290891602, "loss": 1.9595, "step": 21132 }, { "epoch": 0.5671157148990983, "grad_norm": 0.29296875, "learning_rate": 0.0010810124317036708, "loss": 1.9898, "step": 21133 }, { "epoch": 0.5671425504508373, "grad_norm": 0.296875, "learning_rate": 0.0010809956332629813, "loss": 1.9098, "step": 21134 }, { "epoch": 0.5671693860025762, "grad_norm": 0.30859375, "learning_rate": 0.001080978833767129, "loss": 1.9968, "step": 21135 }, { "epoch": 0.5671962215543151, "grad_norm": 0.294921875, "learning_rate": 0.0010809620332161505, "loss": 1.896, "step": 21136 }, { "epoch": 0.5672230571060541, "grad_norm": 0.294921875, "learning_rate": 0.0010809452316100831, "loss": 1.9401, "step": 21137 }, { "epoch": 0.567249892657793, "grad_norm": 0.28515625, "learning_rate": 0.001080928428948963, "loss": 1.9344, "step": 21138 }, { "epoch": 0.567276728209532, "grad_norm": 0.28515625, "learning_rate": 0.0010809116252328277, "loss": 1.8988, "step": 21139 }, { "epoch": 0.5673035637612709, "grad_norm": 0.294921875, "learning_rate": 0.0010808948204617138, "loss": 1.911, "step": 21140 }, { "epoch": 0.5673303993130099, "grad_norm": 0.291015625, "learning_rate": 0.0010808780146356579, "loss": 1.8902, "step": 21141 }, { "epoch": 0.5673572348647489, "grad_norm": 0.291015625, "learning_rate": 0.0010808612077546974, "loss": 1.9534, "step": 21142 }, { "epoch": 0.5673840704164878, "grad_norm": 0.29296875, "learning_rate": 0.0010808443998188686, "loss": 1.971, "step": 21143 }, { "epoch": 0.5674109059682267, "grad_norm": 0.28515625, "learning_rate": 0.0010808275908282088, "loss": 1.9837, "step": 21144 }, { "epoch": 0.5674377415199656, "grad_norm": 0.294921875, "learning_rate": 0.0010808107807827548, "loss": 1.9761, "step": 21145 }, { "epoch": 0.5674645770717046, "grad_norm": 0.287109375, "learning_rate": 0.0010807939696825435, "loss": 1.9226, "step": 21146 }, { "epoch": 0.5674914126234435, "grad_norm": 0.291015625, "learning_rate": 0.0010807771575276114, "loss": 2.0314, "step": 21147 }, { "epoch": 0.5675182481751825, "grad_norm": 0.287109375, "learning_rate": 0.0010807603443179956, "loss": 1.9423, "step": 21148 }, { "epoch": 0.5675450837269215, "grad_norm": 0.29296875, "learning_rate": 0.0010807435300537332, "loss": 1.9881, "step": 21149 }, { "epoch": 0.5675719192786604, "grad_norm": 0.2734375, "learning_rate": 0.001080726714734861, "loss": 1.8314, "step": 21150 }, { "epoch": 0.5675987548303993, "grad_norm": 0.294921875, "learning_rate": 0.0010807098983614159, "loss": 1.906, "step": 21151 }, { "epoch": 0.5676255903821382, "grad_norm": 0.296875, "learning_rate": 0.0010806930809334344, "loss": 1.9641, "step": 21152 }, { "epoch": 0.5676524259338772, "grad_norm": 0.29296875, "learning_rate": 0.001080676262450954, "loss": 1.9522, "step": 21153 }, { "epoch": 0.5676792614856161, "grad_norm": 0.2890625, "learning_rate": 0.0010806594429140111, "loss": 1.8915, "step": 21154 }, { "epoch": 0.5677060970373551, "grad_norm": 0.29296875, "learning_rate": 0.0010806426223226428, "loss": 1.8885, "step": 21155 }, { "epoch": 0.5677329325890941, "grad_norm": 0.296875, "learning_rate": 0.0010806258006768861, "loss": 1.963, "step": 21156 }, { "epoch": 0.567759768140833, "grad_norm": 0.287109375, "learning_rate": 0.0010806089779767777, "loss": 1.8542, "step": 21157 }, { "epoch": 0.567786603692572, "grad_norm": 0.291015625, "learning_rate": 0.0010805921542223547, "loss": 1.8994, "step": 21158 }, { "epoch": 0.5678134392443108, "grad_norm": 0.2890625, "learning_rate": 0.001080575329413654, "loss": 1.9968, "step": 21159 }, { "epoch": 0.5678402747960498, "grad_norm": 0.28515625, "learning_rate": 0.001080558503550712, "loss": 1.9049, "step": 21160 }, { "epoch": 0.5678671103477888, "grad_norm": 0.275390625, "learning_rate": 0.0010805416766335664, "loss": 1.8168, "step": 21161 }, { "epoch": 0.5678939458995277, "grad_norm": 0.287109375, "learning_rate": 0.0010805248486622538, "loss": 1.8143, "step": 21162 }, { "epoch": 0.5679207814512667, "grad_norm": 0.28125, "learning_rate": 0.0010805080196368108, "loss": 1.8631, "step": 21163 }, { "epoch": 0.5679476170030056, "grad_norm": 0.294921875, "learning_rate": 0.0010804911895572747, "loss": 1.9653, "step": 21164 }, { "epoch": 0.5679744525547445, "grad_norm": 0.291015625, "learning_rate": 0.0010804743584236823, "loss": 1.9654, "step": 21165 }, { "epoch": 0.5680012881064834, "grad_norm": 0.294921875, "learning_rate": 0.0010804575262360705, "loss": 1.9379, "step": 21166 }, { "epoch": 0.5680281236582224, "grad_norm": 0.3046875, "learning_rate": 0.0010804406929944763, "loss": 1.9891, "step": 21167 }, { "epoch": 0.5680549592099614, "grad_norm": 0.287109375, "learning_rate": 0.0010804238586989365, "loss": 1.8911, "step": 21168 }, { "epoch": 0.5680817947617003, "grad_norm": 0.283203125, "learning_rate": 0.001080407023349488, "loss": 1.897, "step": 21169 }, { "epoch": 0.5681086303134393, "grad_norm": 0.2890625, "learning_rate": 0.0010803901869461678, "loss": 1.9352, "step": 21170 }, { "epoch": 0.5681354658651782, "grad_norm": 0.298828125, "learning_rate": 0.001080373349489013, "loss": 1.9162, "step": 21171 }, { "epoch": 0.5681623014169171, "grad_norm": 0.287109375, "learning_rate": 0.0010803565109780604, "loss": 1.9, "step": 21172 }, { "epoch": 0.568189136968656, "grad_norm": 0.291015625, "learning_rate": 0.0010803396714133468, "loss": 1.968, "step": 21173 }, { "epoch": 0.568215972520395, "grad_norm": 0.291015625, "learning_rate": 0.0010803228307949095, "loss": 1.8244, "step": 21174 }, { "epoch": 0.568242808072134, "grad_norm": 0.298828125, "learning_rate": 0.0010803059891227851, "loss": 1.9274, "step": 21175 }, { "epoch": 0.5682696436238729, "grad_norm": 0.283203125, "learning_rate": 0.0010802891463970107, "loss": 1.7901, "step": 21176 }, { "epoch": 0.5682964791756119, "grad_norm": 0.298828125, "learning_rate": 0.0010802723026176232, "loss": 1.8396, "step": 21177 }, { "epoch": 0.5683233147273508, "grad_norm": 0.30078125, "learning_rate": 0.0010802554577846596, "loss": 1.8442, "step": 21178 }, { "epoch": 0.5683501502790897, "grad_norm": 0.298828125, "learning_rate": 0.0010802386118981568, "loss": 1.8477, "step": 21179 }, { "epoch": 0.5683769858308286, "grad_norm": 0.294921875, "learning_rate": 0.0010802217649581519, "loss": 1.8809, "step": 21180 }, { "epoch": 0.5684038213825676, "grad_norm": 0.29296875, "learning_rate": 0.0010802049169646812, "loss": 1.8435, "step": 21181 }, { "epoch": 0.5684306569343066, "grad_norm": 0.287109375, "learning_rate": 0.0010801880679177829, "loss": 1.8976, "step": 21182 }, { "epoch": 0.5684574924860455, "grad_norm": 0.298828125, "learning_rate": 0.0010801712178174928, "loss": 1.9191, "step": 21183 }, { "epoch": 0.5684843280377845, "grad_norm": 0.298828125, "learning_rate": 0.0010801543666638484, "loss": 1.883, "step": 21184 }, { "epoch": 0.5685111635895234, "grad_norm": 0.306640625, "learning_rate": 0.0010801375144568867, "loss": 1.9154, "step": 21185 }, { "epoch": 0.5685379991412624, "grad_norm": 0.29296875, "learning_rate": 0.0010801206611966445, "loss": 1.8174, "step": 21186 }, { "epoch": 0.5685648346930013, "grad_norm": 0.30078125, "learning_rate": 0.0010801038068831588, "loss": 1.7887, "step": 21187 }, { "epoch": 0.5685916702447402, "grad_norm": 0.291015625, "learning_rate": 0.0010800869515164666, "loss": 1.9559, "step": 21188 }, { "epoch": 0.5686185057964792, "grad_norm": 0.298828125, "learning_rate": 0.0010800700950966049, "loss": 1.9327, "step": 21189 }, { "epoch": 0.5686453413482181, "grad_norm": 0.29296875, "learning_rate": 0.0010800532376236105, "loss": 1.8921, "step": 21190 }, { "epoch": 0.5686721768999571, "grad_norm": 0.283203125, "learning_rate": 0.0010800363790975207, "loss": 1.789, "step": 21191 }, { "epoch": 0.568699012451696, "grad_norm": 0.30859375, "learning_rate": 0.0010800195195183723, "loss": 1.8962, "step": 21192 }, { "epoch": 0.568725848003435, "grad_norm": 0.296875, "learning_rate": 0.0010800026588862022, "loss": 1.8525, "step": 21193 }, { "epoch": 0.5687526835551739, "grad_norm": 0.306640625, "learning_rate": 0.0010799857972010477, "loss": 1.936, "step": 21194 }, { "epoch": 0.5687795191069128, "grad_norm": 0.296875, "learning_rate": 0.0010799689344629453, "loss": 1.8404, "step": 21195 }, { "epoch": 0.5688063546586518, "grad_norm": 0.298828125, "learning_rate": 0.0010799520706719324, "loss": 1.8906, "step": 21196 }, { "epoch": 0.5688331902103907, "grad_norm": 0.29296875, "learning_rate": 0.0010799352058280458, "loss": 1.8234, "step": 21197 }, { "epoch": 0.5688600257621297, "grad_norm": 0.357421875, "learning_rate": 0.0010799183399313227, "loss": 2.0891, "step": 21198 }, { "epoch": 0.5688868613138686, "grad_norm": 0.3359375, "learning_rate": 0.0010799014729817999, "loss": 2.0349, "step": 21199 }, { "epoch": 0.5689136968656076, "grad_norm": 0.32421875, "learning_rate": 0.0010798846049795143, "loss": 2.0605, "step": 21200 }, { "epoch": 0.5689405324173465, "grad_norm": 0.333984375, "learning_rate": 0.0010798677359245034, "loss": 2.1512, "step": 21201 }, { "epoch": 0.5689673679690854, "grad_norm": 0.318359375, "learning_rate": 0.0010798508658168035, "loss": 2.1669, "step": 21202 }, { "epoch": 0.5689942035208244, "grad_norm": 0.30859375, "learning_rate": 0.0010798339946564521, "loss": 2.0393, "step": 21203 }, { "epoch": 0.5690210390725633, "grad_norm": 0.302734375, "learning_rate": 0.0010798171224434862, "loss": 1.9903, "step": 21204 }, { "epoch": 0.5690478746243023, "grad_norm": 0.306640625, "learning_rate": 0.0010798002491779425, "loss": 2.1522, "step": 21205 }, { "epoch": 0.5690747101760412, "grad_norm": 0.287109375, "learning_rate": 0.0010797833748598584, "loss": 1.9844, "step": 21206 }, { "epoch": 0.5691015457277802, "grad_norm": 0.291015625, "learning_rate": 0.0010797664994892707, "loss": 2.0387, "step": 21207 }, { "epoch": 0.5691283812795191, "grad_norm": 0.298828125, "learning_rate": 0.0010797496230662163, "loss": 2.0356, "step": 21208 }, { "epoch": 0.569155216831258, "grad_norm": 0.291015625, "learning_rate": 0.0010797327455907325, "loss": 1.9631, "step": 21209 }, { "epoch": 0.569182052382997, "grad_norm": 0.29296875, "learning_rate": 0.0010797158670628562, "loss": 2.0325, "step": 21210 }, { "epoch": 0.5692088879347359, "grad_norm": 0.2890625, "learning_rate": 0.0010796989874826243, "loss": 1.9749, "step": 21211 }, { "epoch": 0.5692357234864749, "grad_norm": 0.2890625, "learning_rate": 0.001079682106850074, "loss": 1.9166, "step": 21212 }, { "epoch": 0.5692625590382139, "grad_norm": 0.29296875, "learning_rate": 0.0010796652251652424, "loss": 1.9954, "step": 21213 }, { "epoch": 0.5692893945899528, "grad_norm": 0.28125, "learning_rate": 0.0010796483424281662, "loss": 1.8986, "step": 21214 }, { "epoch": 0.5693162301416917, "grad_norm": 0.296875, "learning_rate": 0.001079631458638883, "loss": 1.9392, "step": 21215 }, { "epoch": 0.5693430656934306, "grad_norm": 0.291015625, "learning_rate": 0.0010796145737974293, "loss": 1.9811, "step": 21216 }, { "epoch": 0.5693699012451696, "grad_norm": 0.2890625, "learning_rate": 0.0010795976879038422, "loss": 2.001, "step": 21217 }, { "epoch": 0.5693967367969085, "grad_norm": 0.287109375, "learning_rate": 0.001079580800958159, "loss": 1.9725, "step": 21218 }, { "epoch": 0.5694235723486475, "grad_norm": 0.2890625, "learning_rate": 0.0010795639129604166, "loss": 1.9991, "step": 21219 }, { "epoch": 0.5694504079003865, "grad_norm": 0.30859375, "learning_rate": 0.001079547023910652, "loss": 2.1122, "step": 21220 }, { "epoch": 0.5694772434521254, "grad_norm": 0.29296875, "learning_rate": 0.0010795301338089025, "loss": 2.0595, "step": 21221 }, { "epoch": 0.5695040790038643, "grad_norm": 0.294921875, "learning_rate": 0.0010795132426552048, "loss": 2.0785, "step": 21222 }, { "epoch": 0.5695309145556032, "grad_norm": 0.291015625, "learning_rate": 0.0010794963504495963, "loss": 2.0251, "step": 21223 }, { "epoch": 0.5695577501073422, "grad_norm": 0.283203125, "learning_rate": 0.0010794794571921135, "loss": 1.9707, "step": 21224 }, { "epoch": 0.5695845856590811, "grad_norm": 0.28515625, "learning_rate": 0.0010794625628827942, "loss": 1.9405, "step": 21225 }, { "epoch": 0.5696114212108201, "grad_norm": 0.28125, "learning_rate": 0.001079445667521675, "loss": 1.9041, "step": 21226 }, { "epoch": 0.5696382567625591, "grad_norm": 0.275390625, "learning_rate": 0.001079428771108793, "loss": 1.9219, "step": 21227 }, { "epoch": 0.569665092314298, "grad_norm": 0.28125, "learning_rate": 0.0010794118736441854, "loss": 2.0036, "step": 21228 }, { "epoch": 0.569691927866037, "grad_norm": 0.279296875, "learning_rate": 0.001079394975127889, "loss": 1.8107, "step": 21229 }, { "epoch": 0.5697187634177758, "grad_norm": 0.287109375, "learning_rate": 0.0010793780755599413, "loss": 1.9815, "step": 21230 }, { "epoch": 0.5697455989695148, "grad_norm": 0.298828125, "learning_rate": 0.001079361174940379, "loss": 2.021, "step": 21231 }, { "epoch": 0.5697724345212538, "grad_norm": 0.28515625, "learning_rate": 0.0010793442732692396, "loss": 1.9637, "step": 21232 }, { "epoch": 0.5697992700729927, "grad_norm": 0.287109375, "learning_rate": 0.0010793273705465595, "loss": 2.0439, "step": 21233 }, { "epoch": 0.5698261056247317, "grad_norm": 0.287109375, "learning_rate": 0.0010793104667723764, "loss": 1.9556, "step": 21234 }, { "epoch": 0.5698529411764706, "grad_norm": 0.30078125, "learning_rate": 0.001079293561946727, "loss": 1.9396, "step": 21235 }, { "epoch": 0.5698797767282096, "grad_norm": 0.283203125, "learning_rate": 0.0010792766560696486, "loss": 1.8672, "step": 21236 }, { "epoch": 0.5699066122799484, "grad_norm": 0.28515625, "learning_rate": 0.0010792597491411783, "loss": 1.938, "step": 21237 }, { "epoch": 0.5699334478316874, "grad_norm": 0.28125, "learning_rate": 0.001079242841161353, "loss": 1.9795, "step": 21238 }, { "epoch": 0.5699602833834264, "grad_norm": 0.298828125, "learning_rate": 0.00107922593213021, "loss": 1.9965, "step": 21239 }, { "epoch": 0.5699871189351653, "grad_norm": 0.28125, "learning_rate": 0.0010792090220477862, "loss": 1.8474, "step": 21240 }, { "epoch": 0.5700139544869043, "grad_norm": 0.2890625, "learning_rate": 0.0010791921109141187, "loss": 1.9643, "step": 21241 }, { "epoch": 0.5700407900386432, "grad_norm": 0.283203125, "learning_rate": 0.001079175198729245, "loss": 1.9101, "step": 21242 }, { "epoch": 0.5700676255903822, "grad_norm": 0.28515625, "learning_rate": 0.0010791582854932018, "loss": 1.9388, "step": 21243 }, { "epoch": 0.570094461142121, "grad_norm": 0.2890625, "learning_rate": 0.001079141371206026, "loss": 1.9084, "step": 21244 }, { "epoch": 0.57012129669386, "grad_norm": 0.283203125, "learning_rate": 0.0010791244558677553, "loss": 1.9642, "step": 21245 }, { "epoch": 0.570148132245599, "grad_norm": 0.2890625, "learning_rate": 0.0010791075394784265, "loss": 1.9555, "step": 21246 }, { "epoch": 0.5701749677973379, "grad_norm": 0.287109375, "learning_rate": 0.0010790906220380764, "loss": 1.8953, "step": 21247 }, { "epoch": 0.5702018033490769, "grad_norm": 0.287109375, "learning_rate": 0.0010790737035467426, "loss": 1.9633, "step": 21248 }, { "epoch": 0.5702286389008158, "grad_norm": 0.287109375, "learning_rate": 0.0010790567840044622, "loss": 1.9106, "step": 21249 }, { "epoch": 0.5702554744525548, "grad_norm": 0.296875, "learning_rate": 0.0010790398634112721, "loss": 1.9361, "step": 21250 }, { "epoch": 0.5702823100042936, "grad_norm": 0.28515625, "learning_rate": 0.0010790229417672094, "loss": 1.9593, "step": 21251 }, { "epoch": 0.5703091455560326, "grad_norm": 0.296875, "learning_rate": 0.0010790060190723114, "loss": 2.0382, "step": 21252 }, { "epoch": 0.5703359811077716, "grad_norm": 0.287109375, "learning_rate": 0.001078989095326615, "loss": 1.8577, "step": 21253 }, { "epoch": 0.5703628166595105, "grad_norm": 0.287109375, "learning_rate": 0.0010789721705301574, "loss": 1.8879, "step": 21254 }, { "epoch": 0.5703896522112495, "grad_norm": 0.30078125, "learning_rate": 0.001078955244682976, "loss": 1.9191, "step": 21255 }, { "epoch": 0.5704164877629884, "grad_norm": 0.291015625, "learning_rate": 0.0010789383177851075, "loss": 1.9086, "step": 21256 }, { "epoch": 0.5704433233147274, "grad_norm": 0.29296875, "learning_rate": 0.0010789213898365893, "loss": 1.9825, "step": 21257 }, { "epoch": 0.5704701588664663, "grad_norm": 0.283203125, "learning_rate": 0.0010789044608374584, "loss": 1.9725, "step": 21258 }, { "epoch": 0.5704969944182052, "grad_norm": 0.283203125, "learning_rate": 0.001078887530787752, "loss": 1.8925, "step": 21259 }, { "epoch": 0.5705238299699442, "grad_norm": 0.28515625, "learning_rate": 0.0010788705996875076, "loss": 1.9936, "step": 21260 }, { "epoch": 0.5705506655216831, "grad_norm": 0.28515625, "learning_rate": 0.0010788536675367614, "loss": 1.9089, "step": 21261 }, { "epoch": 0.5705775010734221, "grad_norm": 0.2890625, "learning_rate": 0.0010788367343355517, "loss": 1.8784, "step": 21262 }, { "epoch": 0.570604336625161, "grad_norm": 0.302734375, "learning_rate": 0.0010788198000839149, "loss": 1.9615, "step": 21263 }, { "epoch": 0.5706311721769, "grad_norm": 0.287109375, "learning_rate": 0.0010788028647818883, "loss": 1.8395, "step": 21264 }, { "epoch": 0.570658007728639, "grad_norm": 0.294921875, "learning_rate": 0.001078785928429509, "loss": 1.937, "step": 21265 }, { "epoch": 0.5706848432803778, "grad_norm": 0.275390625, "learning_rate": 0.0010787689910268141, "loss": 1.7782, "step": 21266 }, { "epoch": 0.5707116788321168, "grad_norm": 0.294921875, "learning_rate": 0.001078752052573841, "loss": 1.9233, "step": 21267 }, { "epoch": 0.5707385143838557, "grad_norm": 0.287109375, "learning_rate": 0.0010787351130706268, "loss": 1.7863, "step": 21268 }, { "epoch": 0.5707653499355947, "grad_norm": 0.279296875, "learning_rate": 0.0010787181725172086, "loss": 1.8467, "step": 21269 }, { "epoch": 0.5707921854873336, "grad_norm": 0.26953125, "learning_rate": 0.0010787012309136236, "loss": 1.7449, "step": 21270 }, { "epoch": 0.5708190210390726, "grad_norm": 0.29296875, "learning_rate": 0.0010786842882599089, "loss": 1.8299, "step": 21271 }, { "epoch": 0.5708458565908116, "grad_norm": 0.287109375, "learning_rate": 0.0010786673445561017, "loss": 1.756, "step": 21272 }, { "epoch": 0.5708726921425504, "grad_norm": 0.29296875, "learning_rate": 0.0010786503998022393, "loss": 1.9271, "step": 21273 }, { "epoch": 0.5708995276942894, "grad_norm": 0.291015625, "learning_rate": 0.0010786334539983587, "loss": 1.9181, "step": 21274 }, { "epoch": 0.5709263632460283, "grad_norm": 0.291015625, "learning_rate": 0.001078616507144497, "loss": 1.8731, "step": 21275 }, { "epoch": 0.5709531987977673, "grad_norm": 0.291015625, "learning_rate": 0.0010785995592406917, "loss": 1.8761, "step": 21276 }, { "epoch": 0.5709800343495062, "grad_norm": 0.30078125, "learning_rate": 0.0010785826102869794, "loss": 1.8329, "step": 21277 }, { "epoch": 0.5710068699012452, "grad_norm": 0.30859375, "learning_rate": 0.001078565660283398, "loss": 1.955, "step": 21278 }, { "epoch": 0.5710337054529842, "grad_norm": 0.287109375, "learning_rate": 0.0010785487092299841, "loss": 1.8167, "step": 21279 }, { "epoch": 0.571060541004723, "grad_norm": 0.29296875, "learning_rate": 0.0010785317571267753, "loss": 2.0014, "step": 21280 }, { "epoch": 0.571087376556462, "grad_norm": 0.283203125, "learning_rate": 0.0010785148039738086, "loss": 1.7936, "step": 21281 }, { "epoch": 0.5711142121082009, "grad_norm": 0.296875, "learning_rate": 0.0010784978497711212, "loss": 1.9139, "step": 21282 }, { "epoch": 0.5711410476599399, "grad_norm": 0.291015625, "learning_rate": 0.0010784808945187502, "loss": 1.9102, "step": 21283 }, { "epoch": 0.5711678832116789, "grad_norm": 0.291015625, "learning_rate": 0.001078463938216733, "loss": 1.8567, "step": 21284 }, { "epoch": 0.5711947187634178, "grad_norm": 0.28515625, "learning_rate": 0.0010784469808651066, "loss": 1.8117, "step": 21285 }, { "epoch": 0.5712215543151568, "grad_norm": 0.30078125, "learning_rate": 0.0010784300224639081, "loss": 1.8709, "step": 21286 }, { "epoch": 0.5712483898668956, "grad_norm": 0.296875, "learning_rate": 0.0010784130630131753, "loss": 1.8743, "step": 21287 }, { "epoch": 0.5712752254186346, "grad_norm": 0.298828125, "learning_rate": 0.0010783961025129447, "loss": 1.8983, "step": 21288 }, { "epoch": 0.5713020609703735, "grad_norm": 0.287109375, "learning_rate": 0.001078379140963254, "loss": 1.7887, "step": 21289 }, { "epoch": 0.5713288965221125, "grad_norm": 0.3125, "learning_rate": 0.0010783621783641399, "loss": 1.8717, "step": 21290 }, { "epoch": 0.5713557320738515, "grad_norm": 0.37109375, "learning_rate": 0.0010783452147156403, "loss": 2.0437, "step": 21291 }, { "epoch": 0.5713825676255904, "grad_norm": 0.318359375, "learning_rate": 0.0010783282500177918, "loss": 2.0835, "step": 21292 }, { "epoch": 0.5714094031773294, "grad_norm": 0.322265625, "learning_rate": 0.001078311284270632, "loss": 2.0903, "step": 21293 }, { "epoch": 0.5714362387290682, "grad_norm": 0.33203125, "learning_rate": 0.0010782943174741976, "loss": 2.1246, "step": 21294 }, { "epoch": 0.5714630742808072, "grad_norm": 0.30078125, "learning_rate": 0.0010782773496285265, "loss": 2.0882, "step": 21295 }, { "epoch": 0.5714899098325461, "grad_norm": 0.298828125, "learning_rate": 0.0010782603807336557, "loss": 1.9886, "step": 21296 }, { "epoch": 0.5715167453842851, "grad_norm": 0.306640625, "learning_rate": 0.0010782434107896222, "loss": 1.9994, "step": 21297 }, { "epoch": 0.5715435809360241, "grad_norm": 0.294921875, "learning_rate": 0.0010782264397964632, "loss": 1.948, "step": 21298 }, { "epoch": 0.571570416487763, "grad_norm": 0.30078125, "learning_rate": 0.0010782094677542161, "loss": 2.0477, "step": 21299 }, { "epoch": 0.571597252039502, "grad_norm": 0.28515625, "learning_rate": 0.0010781924946629182, "loss": 1.9559, "step": 21300 }, { "epoch": 0.5716240875912408, "grad_norm": 0.28515625, "learning_rate": 0.0010781755205226068, "loss": 2.0151, "step": 21301 }, { "epoch": 0.5716509231429798, "grad_norm": 0.287109375, "learning_rate": 0.001078158545333319, "loss": 2.0078, "step": 21302 }, { "epoch": 0.5716777586947188, "grad_norm": 0.279296875, "learning_rate": 0.001078141569095092, "loss": 1.9269, "step": 21303 }, { "epoch": 0.5717045942464577, "grad_norm": 0.291015625, "learning_rate": 0.0010781245918079628, "loss": 2.0387, "step": 21304 }, { "epoch": 0.5717314297981967, "grad_norm": 0.3046875, "learning_rate": 0.001078107613471969, "loss": 2.081, "step": 21305 }, { "epoch": 0.5717582653499356, "grad_norm": 0.296875, "learning_rate": 0.0010780906340871479, "loss": 2.1008, "step": 21306 }, { "epoch": 0.5717851009016746, "grad_norm": 0.29296875, "learning_rate": 0.0010780736536535364, "loss": 2.0482, "step": 21307 }, { "epoch": 0.5718119364534134, "grad_norm": 0.2890625, "learning_rate": 0.0010780566721711722, "loss": 2.0905, "step": 21308 }, { "epoch": 0.5718387720051524, "grad_norm": 0.291015625, "learning_rate": 0.0010780396896400922, "loss": 1.948, "step": 21309 }, { "epoch": 0.5718656075568914, "grad_norm": 0.287109375, "learning_rate": 0.0010780227060603338, "loss": 1.9868, "step": 21310 }, { "epoch": 0.5718924431086303, "grad_norm": 0.2890625, "learning_rate": 0.001078005721431934, "loss": 1.9528, "step": 21311 }, { "epoch": 0.5719192786603693, "grad_norm": 0.27734375, "learning_rate": 0.0010779887357549307, "loss": 1.9385, "step": 21312 }, { "epoch": 0.5719461142121082, "grad_norm": 0.28125, "learning_rate": 0.0010779717490293605, "loss": 1.9421, "step": 21313 }, { "epoch": 0.5719729497638472, "grad_norm": 0.283203125, "learning_rate": 0.001077954761255261, "loss": 1.9306, "step": 21314 }, { "epoch": 0.571999785315586, "grad_norm": 0.298828125, "learning_rate": 0.0010779377724326692, "loss": 2.0534, "step": 21315 }, { "epoch": 0.572026620867325, "grad_norm": 0.28125, "learning_rate": 0.0010779207825616225, "loss": 1.8249, "step": 21316 }, { "epoch": 0.572053456419064, "grad_norm": 0.30078125, "learning_rate": 0.0010779037916421584, "loss": 1.9496, "step": 21317 }, { "epoch": 0.5720802919708029, "grad_norm": 0.287109375, "learning_rate": 0.001077886799674314, "loss": 1.9709, "step": 21318 }, { "epoch": 0.5721071275225419, "grad_norm": 0.28515625, "learning_rate": 0.0010778698066581265, "loss": 1.9888, "step": 21319 }, { "epoch": 0.5721339630742808, "grad_norm": 0.287109375, "learning_rate": 0.001077852812593633, "loss": 2.0284, "step": 21320 }, { "epoch": 0.5721607986260198, "grad_norm": 0.279296875, "learning_rate": 0.0010778358174808715, "loss": 1.8654, "step": 21321 }, { "epoch": 0.5721876341777586, "grad_norm": 0.283203125, "learning_rate": 0.0010778188213198783, "loss": 1.9051, "step": 21322 }, { "epoch": 0.5722144697294976, "grad_norm": 0.2890625, "learning_rate": 0.0010778018241106916, "loss": 1.9215, "step": 21323 }, { "epoch": 0.5722413052812366, "grad_norm": 0.287109375, "learning_rate": 0.001077784825853348, "loss": 2.0254, "step": 21324 }, { "epoch": 0.5722681408329755, "grad_norm": 0.29296875, "learning_rate": 0.0010777678265478851, "loss": 1.922, "step": 21325 }, { "epoch": 0.5722949763847145, "grad_norm": 0.291015625, "learning_rate": 0.00107775082619434, "loss": 2.0207, "step": 21326 }, { "epoch": 0.5723218119364534, "grad_norm": 0.294921875, "learning_rate": 0.0010777338247927506, "loss": 1.9674, "step": 21327 }, { "epoch": 0.5723486474881924, "grad_norm": 0.2890625, "learning_rate": 0.0010777168223431533, "loss": 1.9617, "step": 21328 }, { "epoch": 0.5723754830399314, "grad_norm": 0.2890625, "learning_rate": 0.001077699818845586, "loss": 1.9497, "step": 21329 }, { "epoch": 0.5724023185916702, "grad_norm": 0.3046875, "learning_rate": 0.0010776828143000859, "loss": 2.0468, "step": 21330 }, { "epoch": 0.5724291541434092, "grad_norm": 0.28125, "learning_rate": 0.0010776658087066902, "loss": 1.8528, "step": 21331 }, { "epoch": 0.5724559896951481, "grad_norm": 0.28515625, "learning_rate": 0.0010776488020654363, "loss": 1.9796, "step": 21332 }, { "epoch": 0.5724828252468871, "grad_norm": 0.28515625, "learning_rate": 0.0010776317943763612, "loss": 1.9424, "step": 21333 }, { "epoch": 0.572509660798626, "grad_norm": 0.287109375, "learning_rate": 0.0010776147856395026, "loss": 1.965, "step": 21334 }, { "epoch": 0.572536496350365, "grad_norm": 0.287109375, "learning_rate": 0.001077597775854898, "loss": 1.9592, "step": 21335 }, { "epoch": 0.572563331902104, "grad_norm": 0.28515625, "learning_rate": 0.001077580765022584, "loss": 1.9083, "step": 21336 }, { "epoch": 0.5725901674538428, "grad_norm": 0.2890625, "learning_rate": 0.0010775637531425983, "loss": 1.8946, "step": 21337 }, { "epoch": 0.5726170030055818, "grad_norm": 0.294921875, "learning_rate": 0.0010775467402149783, "loss": 1.8893, "step": 21338 }, { "epoch": 0.5726438385573207, "grad_norm": 0.296875, "learning_rate": 0.0010775297262397615, "loss": 1.952, "step": 21339 }, { "epoch": 0.5726706741090597, "grad_norm": 0.29296875, "learning_rate": 0.0010775127112169847, "loss": 2.0036, "step": 21340 }, { "epoch": 0.5726975096607986, "grad_norm": 0.294921875, "learning_rate": 0.0010774956951466856, "loss": 1.9922, "step": 21341 }, { "epoch": 0.5727243452125376, "grad_norm": 0.287109375, "learning_rate": 0.0010774786780289015, "loss": 1.8566, "step": 21342 }, { "epoch": 0.5727511807642766, "grad_norm": 0.28515625, "learning_rate": 0.0010774616598636694, "loss": 1.9179, "step": 21343 }, { "epoch": 0.5727780163160154, "grad_norm": 0.294921875, "learning_rate": 0.0010774446406510271, "loss": 1.9264, "step": 21344 }, { "epoch": 0.5728048518677544, "grad_norm": 0.2890625, "learning_rate": 0.0010774276203910117, "loss": 1.8914, "step": 21345 }, { "epoch": 0.5728316874194933, "grad_norm": 0.28515625, "learning_rate": 0.0010774105990836605, "loss": 1.903, "step": 21346 }, { "epoch": 0.5728585229712323, "grad_norm": 0.294921875, "learning_rate": 0.001077393576729011, "loss": 1.8945, "step": 21347 }, { "epoch": 0.5728853585229712, "grad_norm": 0.291015625, "learning_rate": 0.0010773765533271004, "loss": 1.9646, "step": 21348 }, { "epoch": 0.5729121940747102, "grad_norm": 0.294921875, "learning_rate": 0.001077359528877966, "loss": 1.9108, "step": 21349 }, { "epoch": 0.5729390296264492, "grad_norm": 0.2890625, "learning_rate": 0.0010773425033816452, "loss": 1.8775, "step": 21350 }, { "epoch": 0.572965865178188, "grad_norm": 0.296875, "learning_rate": 0.0010773254768381756, "loss": 1.8562, "step": 21351 }, { "epoch": 0.572992700729927, "grad_norm": 0.294921875, "learning_rate": 0.0010773084492475941, "loss": 1.9876, "step": 21352 }, { "epoch": 0.5730195362816659, "grad_norm": 0.2890625, "learning_rate": 0.0010772914206099383, "loss": 1.8803, "step": 21353 }, { "epoch": 0.5730463718334049, "grad_norm": 0.291015625, "learning_rate": 0.0010772743909252458, "loss": 1.8725, "step": 21354 }, { "epoch": 0.5730732073851439, "grad_norm": 0.28125, "learning_rate": 0.0010772573601935537, "loss": 1.8737, "step": 21355 }, { "epoch": 0.5731000429368828, "grad_norm": 0.296875, "learning_rate": 0.0010772403284148992, "loss": 1.9435, "step": 21356 }, { "epoch": 0.5731268784886218, "grad_norm": 0.310546875, "learning_rate": 0.0010772232955893197, "loss": 1.9548, "step": 21357 }, { "epoch": 0.5731537140403606, "grad_norm": 0.3125, "learning_rate": 0.0010772062617168529, "loss": 1.9664, "step": 21358 }, { "epoch": 0.5731805495920996, "grad_norm": 0.291015625, "learning_rate": 0.001077189226797536, "loss": 1.9184, "step": 21359 }, { "epoch": 0.5732073851438385, "grad_norm": 0.298828125, "learning_rate": 0.0010771721908314062, "loss": 1.8797, "step": 21360 }, { "epoch": 0.5732342206955775, "grad_norm": 0.287109375, "learning_rate": 0.001077155153818501, "loss": 1.8523, "step": 21361 }, { "epoch": 0.5732610562473165, "grad_norm": 0.287109375, "learning_rate": 0.0010771381157588576, "loss": 1.8668, "step": 21362 }, { "epoch": 0.5732878917990554, "grad_norm": 0.28125, "learning_rate": 0.0010771210766525139, "loss": 1.8684, "step": 21363 }, { "epoch": 0.5733147273507944, "grad_norm": 0.29296875, "learning_rate": 0.0010771040364995067, "loss": 1.8834, "step": 21364 }, { "epoch": 0.5733415629025332, "grad_norm": 0.283203125, "learning_rate": 0.0010770869952998737, "loss": 1.8006, "step": 21365 }, { "epoch": 0.5733683984542722, "grad_norm": 0.298828125, "learning_rate": 0.0010770699530536522, "loss": 1.8283, "step": 21366 }, { "epoch": 0.5733952340060111, "grad_norm": 0.306640625, "learning_rate": 0.0010770529097608795, "loss": 1.8993, "step": 21367 }, { "epoch": 0.5734220695577501, "grad_norm": 0.291015625, "learning_rate": 0.0010770358654215932, "loss": 1.8265, "step": 21368 }, { "epoch": 0.5734489051094891, "grad_norm": 0.28515625, "learning_rate": 0.0010770188200358304, "loss": 1.8829, "step": 21369 }, { "epoch": 0.573475740661228, "grad_norm": 0.287109375, "learning_rate": 0.0010770017736036286, "loss": 1.8309, "step": 21370 }, { "epoch": 0.573502576212967, "grad_norm": 0.28515625, "learning_rate": 0.0010769847261250256, "loss": 1.9351, "step": 21371 }, { "epoch": 0.5735294117647058, "grad_norm": 0.291015625, "learning_rate": 0.0010769676776000582, "loss": 1.9197, "step": 21372 }, { "epoch": 0.5735562473164448, "grad_norm": 0.30078125, "learning_rate": 0.0010769506280287639, "loss": 1.8475, "step": 21373 }, { "epoch": 0.5735830828681838, "grad_norm": 0.298828125, "learning_rate": 0.0010769335774111805, "loss": 1.8809, "step": 21374 }, { "epoch": 0.5736099184199227, "grad_norm": 0.3046875, "learning_rate": 0.001076916525747345, "loss": 1.8855, "step": 21375 }, { "epoch": 0.5736367539716617, "grad_norm": 0.298828125, "learning_rate": 0.001076899473037295, "loss": 1.8999, "step": 21376 }, { "epoch": 0.5736635895234006, "grad_norm": 0.287109375, "learning_rate": 0.001076882419281068, "loss": 1.8037, "step": 21377 }, { "epoch": 0.5736904250751396, "grad_norm": 0.29296875, "learning_rate": 0.001076865364478701, "loss": 1.864, "step": 21378 }, { "epoch": 0.5737172606268784, "grad_norm": 0.2890625, "learning_rate": 0.0010768483086302319, "loss": 1.8396, "step": 21379 }, { "epoch": 0.5737440961786174, "grad_norm": 0.3125, "learning_rate": 0.0010768312517356977, "loss": 1.917, "step": 21380 }, { "epoch": 0.5737709317303564, "grad_norm": 0.357421875, "learning_rate": 0.0010768141937951361, "loss": 2.0761, "step": 21381 }, { "epoch": 0.5737977672820953, "grad_norm": 0.33984375, "learning_rate": 0.0010767971348085846, "loss": 2.0782, "step": 21382 }, { "epoch": 0.5738246028338343, "grad_norm": 0.318359375, "learning_rate": 0.0010767800747760803, "loss": 2.035, "step": 21383 }, { "epoch": 0.5738514383855732, "grad_norm": 0.32421875, "learning_rate": 0.001076763013697661, "loss": 2.1399, "step": 21384 }, { "epoch": 0.5738782739373122, "grad_norm": 0.31640625, "learning_rate": 0.0010767459515733636, "loss": 2.0782, "step": 21385 }, { "epoch": 0.573905109489051, "grad_norm": 0.306640625, "learning_rate": 0.001076728888403226, "loss": 1.9594, "step": 21386 }, { "epoch": 0.57393194504079, "grad_norm": 0.291015625, "learning_rate": 0.0010767118241872854, "loss": 1.9934, "step": 21387 }, { "epoch": 0.573958780592529, "grad_norm": 0.28125, "learning_rate": 0.0010766947589255795, "loss": 1.9189, "step": 21388 }, { "epoch": 0.5739856161442679, "grad_norm": 0.298828125, "learning_rate": 0.0010766776926181452, "loss": 1.978, "step": 21389 }, { "epoch": 0.5740124516960069, "grad_norm": 0.298828125, "learning_rate": 0.0010766606252650206, "loss": 2.0278, "step": 21390 }, { "epoch": 0.5740392872477458, "grad_norm": 0.298828125, "learning_rate": 0.0010766435568662427, "loss": 2.0705, "step": 21391 }, { "epoch": 0.5740661227994848, "grad_norm": 0.291015625, "learning_rate": 0.001076626487421849, "loss": 2.0275, "step": 21392 }, { "epoch": 0.5740929583512236, "grad_norm": 0.294921875, "learning_rate": 0.0010766094169318773, "loss": 1.969, "step": 21393 }, { "epoch": 0.5741197939029626, "grad_norm": 0.287109375, "learning_rate": 0.0010765923453963644, "loss": 2.0337, "step": 21394 }, { "epoch": 0.5741466294547016, "grad_norm": 0.28125, "learning_rate": 0.0010765752728153484, "loss": 1.9896, "step": 21395 }, { "epoch": 0.5741734650064405, "grad_norm": 0.294921875, "learning_rate": 0.0010765581991888664, "loss": 2.0711, "step": 21396 }, { "epoch": 0.5742003005581795, "grad_norm": 0.298828125, "learning_rate": 0.0010765411245169556, "loss": 2.0126, "step": 21397 }, { "epoch": 0.5742271361099184, "grad_norm": 0.2890625, "learning_rate": 0.0010765240487996541, "loss": 1.9316, "step": 21398 }, { "epoch": 0.5742539716616574, "grad_norm": 0.287109375, "learning_rate": 0.001076506972036999, "loss": 1.9264, "step": 21399 }, { "epoch": 0.5742808072133964, "grad_norm": 0.2890625, "learning_rate": 0.0010764898942290278, "loss": 1.9566, "step": 21400 }, { "epoch": 0.5743076427651352, "grad_norm": 0.287109375, "learning_rate": 0.0010764728153757777, "loss": 2.0211, "step": 21401 }, { "epoch": 0.5743344783168742, "grad_norm": 0.279296875, "learning_rate": 0.0010764557354772867, "loss": 1.9915, "step": 21402 }, { "epoch": 0.5743613138686131, "grad_norm": 0.27734375, "learning_rate": 0.0010764386545335921, "loss": 1.8659, "step": 21403 }, { "epoch": 0.5743881494203521, "grad_norm": 0.28515625, "learning_rate": 0.001076421572544731, "loss": 1.8309, "step": 21404 }, { "epoch": 0.574414984972091, "grad_norm": 0.294921875, "learning_rate": 0.0010764044895107412, "loss": 2.0122, "step": 21405 }, { "epoch": 0.57444182052383, "grad_norm": 0.296875, "learning_rate": 0.0010763874054316603, "loss": 2.1025, "step": 21406 }, { "epoch": 0.574468656075569, "grad_norm": 0.283203125, "learning_rate": 0.0010763703203075254, "loss": 1.9145, "step": 21407 }, { "epoch": 0.5744954916273078, "grad_norm": 0.298828125, "learning_rate": 0.001076353234138374, "loss": 2.0457, "step": 21408 }, { "epoch": 0.5745223271790468, "grad_norm": 0.291015625, "learning_rate": 0.001076336146924244, "loss": 1.991, "step": 21409 }, { "epoch": 0.5745491627307857, "grad_norm": 0.29296875, "learning_rate": 0.0010763190586651726, "loss": 1.9751, "step": 21410 }, { "epoch": 0.5745759982825247, "grad_norm": 0.291015625, "learning_rate": 0.0010763019693611972, "loss": 1.9999, "step": 21411 }, { "epoch": 0.5746028338342636, "grad_norm": 0.28515625, "learning_rate": 0.0010762848790123554, "loss": 1.9391, "step": 21412 }, { "epoch": 0.5746296693860026, "grad_norm": 0.287109375, "learning_rate": 0.001076267787618685, "loss": 1.9429, "step": 21413 }, { "epoch": 0.5746565049377416, "grad_norm": 0.2890625, "learning_rate": 0.0010762506951802228, "loss": 1.9805, "step": 21414 }, { "epoch": 0.5746833404894804, "grad_norm": 0.287109375, "learning_rate": 0.001076233601697007, "loss": 1.9034, "step": 21415 }, { "epoch": 0.5747101760412194, "grad_norm": 0.291015625, "learning_rate": 0.0010762165071690746, "loss": 1.9835, "step": 21416 }, { "epoch": 0.5747370115929583, "grad_norm": 0.294921875, "learning_rate": 0.0010761994115964634, "loss": 1.9731, "step": 21417 }, { "epoch": 0.5747638471446973, "grad_norm": 0.287109375, "learning_rate": 0.0010761823149792109, "loss": 1.9245, "step": 21418 }, { "epoch": 0.5747906826964362, "grad_norm": 0.28515625, "learning_rate": 0.001076165217317354, "loss": 2.0093, "step": 21419 }, { "epoch": 0.5748175182481752, "grad_norm": 0.296875, "learning_rate": 0.0010761481186109313, "loss": 1.975, "step": 21420 }, { "epoch": 0.5748443537999142, "grad_norm": 0.294921875, "learning_rate": 0.0010761310188599793, "loss": 1.9814, "step": 21421 }, { "epoch": 0.574871189351653, "grad_norm": 0.287109375, "learning_rate": 0.0010761139180645361, "loss": 1.9768, "step": 21422 }, { "epoch": 0.574898024903392, "grad_norm": 0.28125, "learning_rate": 0.0010760968162246392, "loss": 1.9056, "step": 21423 }, { "epoch": 0.5749248604551309, "grad_norm": 0.28125, "learning_rate": 0.0010760797133403258, "loss": 1.9504, "step": 21424 }, { "epoch": 0.5749516960068699, "grad_norm": 0.287109375, "learning_rate": 0.0010760626094116334, "loss": 2.0032, "step": 21425 }, { "epoch": 0.5749785315586089, "grad_norm": 0.27734375, "learning_rate": 0.0010760455044386, "loss": 1.8393, "step": 21426 }, { "epoch": 0.5750053671103478, "grad_norm": 0.283203125, "learning_rate": 0.0010760283984212625, "loss": 2.0131, "step": 21427 }, { "epoch": 0.5750322026620868, "grad_norm": 0.28125, "learning_rate": 0.001076011291359659, "loss": 1.8951, "step": 21428 }, { "epoch": 0.5750590382138256, "grad_norm": 0.2890625, "learning_rate": 0.0010759941832538266, "loss": 1.9622, "step": 21429 }, { "epoch": 0.5750858737655646, "grad_norm": 0.29296875, "learning_rate": 0.001075977074103803, "loss": 1.949, "step": 21430 }, { "epoch": 0.5751127093173035, "grad_norm": 0.283203125, "learning_rate": 0.0010759599639096258, "loss": 1.9634, "step": 21431 }, { "epoch": 0.5751395448690425, "grad_norm": 0.28515625, "learning_rate": 0.0010759428526713325, "loss": 1.8282, "step": 21432 }, { "epoch": 0.5751663804207815, "grad_norm": 0.2890625, "learning_rate": 0.0010759257403889608, "loss": 1.9681, "step": 21433 }, { "epoch": 0.5751932159725204, "grad_norm": 0.287109375, "learning_rate": 0.0010759086270625477, "loss": 1.8786, "step": 21434 }, { "epoch": 0.5752200515242594, "grad_norm": 0.291015625, "learning_rate": 0.0010758915126921314, "loss": 1.9583, "step": 21435 }, { "epoch": 0.5752468870759982, "grad_norm": 0.2890625, "learning_rate": 0.0010758743972777488, "loss": 1.8565, "step": 21436 }, { "epoch": 0.5752737226277372, "grad_norm": 0.28125, "learning_rate": 0.001075857280819438, "loss": 1.8215, "step": 21437 }, { "epoch": 0.5753005581794761, "grad_norm": 0.283203125, "learning_rate": 0.0010758401633172363, "loss": 1.8801, "step": 21438 }, { "epoch": 0.5753273937312151, "grad_norm": 0.294921875, "learning_rate": 0.0010758230447711812, "loss": 1.9502, "step": 21439 }, { "epoch": 0.5753542292829541, "grad_norm": 0.296875, "learning_rate": 0.0010758059251813102, "loss": 1.9422, "step": 21440 }, { "epoch": 0.575381064834693, "grad_norm": 0.28515625, "learning_rate": 0.0010757888045476612, "loss": 1.8632, "step": 21441 }, { "epoch": 0.575407900386432, "grad_norm": 0.296875, "learning_rate": 0.0010757716828702715, "loss": 2.02, "step": 21442 }, { "epoch": 0.5754347359381708, "grad_norm": 0.291015625, "learning_rate": 0.0010757545601491787, "loss": 1.983, "step": 21443 }, { "epoch": 0.5754615714899098, "grad_norm": 0.283203125, "learning_rate": 0.0010757374363844204, "loss": 1.9046, "step": 21444 }, { "epoch": 0.5754884070416488, "grad_norm": 0.2890625, "learning_rate": 0.0010757203115760341, "loss": 1.8625, "step": 21445 }, { "epoch": 0.5755152425933877, "grad_norm": 0.29296875, "learning_rate": 0.0010757031857240571, "loss": 1.8705, "step": 21446 }, { "epoch": 0.5755420781451267, "grad_norm": 0.283203125, "learning_rate": 0.0010756860588285277, "loss": 1.8759, "step": 21447 }, { "epoch": 0.5755689136968656, "grad_norm": 0.291015625, "learning_rate": 0.0010756689308894828, "loss": 1.8286, "step": 21448 }, { "epoch": 0.5755957492486046, "grad_norm": 0.28515625, "learning_rate": 0.0010756518019069605, "loss": 1.8991, "step": 21449 }, { "epoch": 0.5756225848003435, "grad_norm": 0.2890625, "learning_rate": 0.0010756346718809977, "loss": 1.8303, "step": 21450 }, { "epoch": 0.5756494203520824, "grad_norm": 0.294921875, "learning_rate": 0.0010756175408116325, "loss": 1.9032, "step": 21451 }, { "epoch": 0.5756762559038214, "grad_norm": 0.28125, "learning_rate": 0.0010756004086989025, "loss": 1.8094, "step": 21452 }, { "epoch": 0.5757030914555603, "grad_norm": 0.291015625, "learning_rate": 0.001075583275542845, "loss": 1.8224, "step": 21453 }, { "epoch": 0.5757299270072993, "grad_norm": 0.29296875, "learning_rate": 0.0010755661413434977, "loss": 1.9593, "step": 21454 }, { "epoch": 0.5757567625590382, "grad_norm": 0.291015625, "learning_rate": 0.0010755490061008983, "loss": 1.8792, "step": 21455 }, { "epoch": 0.5757835981107772, "grad_norm": 0.291015625, "learning_rate": 0.0010755318698150841, "loss": 1.8019, "step": 21456 }, { "epoch": 0.575810433662516, "grad_norm": 0.283203125, "learning_rate": 0.001075514732486093, "loss": 1.8179, "step": 21457 }, { "epoch": 0.575837269214255, "grad_norm": 0.28515625, "learning_rate": 0.0010754975941139624, "loss": 1.8769, "step": 21458 }, { "epoch": 0.575864104765994, "grad_norm": 0.283203125, "learning_rate": 0.0010754804546987302, "loss": 1.7636, "step": 21459 }, { "epoch": 0.5758909403177329, "grad_norm": 0.291015625, "learning_rate": 0.0010754633142404334, "loss": 1.8477, "step": 21460 }, { "epoch": 0.5759177758694719, "grad_norm": 0.296875, "learning_rate": 0.0010754461727391102, "loss": 1.8163, "step": 21461 }, { "epoch": 0.5759446114212108, "grad_norm": 0.29296875, "learning_rate": 0.001075429030194798, "loss": 1.9242, "step": 21462 }, { "epoch": 0.5759714469729498, "grad_norm": 0.294921875, "learning_rate": 0.0010754118866075342, "loss": 1.9085, "step": 21463 }, { "epoch": 0.5759982825246887, "grad_norm": 0.3125, "learning_rate": 0.0010753947419773569, "loss": 1.8741, "step": 21464 }, { "epoch": 0.5760251180764276, "grad_norm": 0.298828125, "learning_rate": 0.0010753775963043032, "loss": 1.8742, "step": 21465 }, { "epoch": 0.5760519536281666, "grad_norm": 0.3046875, "learning_rate": 0.001075360449588411, "loss": 1.9249, "step": 21466 }, { "epoch": 0.5760787891799055, "grad_norm": 0.298828125, "learning_rate": 0.0010753433018297178, "loss": 1.9192, "step": 21467 }, { "epoch": 0.5761056247316445, "grad_norm": 0.294921875, "learning_rate": 0.001075326153028261, "loss": 1.9184, "step": 21468 }, { "epoch": 0.5761324602833834, "grad_norm": 0.341796875, "learning_rate": 0.0010753090031840786, "loss": 2.0236, "step": 21469 }, { "epoch": 0.5761592958351224, "grad_norm": 0.345703125, "learning_rate": 0.0010752918522972082, "loss": 2.1464, "step": 21470 }, { "epoch": 0.5761861313868614, "grad_norm": 0.31640625, "learning_rate": 0.0010752747003676872, "loss": 2.0013, "step": 21471 }, { "epoch": 0.5762129669386002, "grad_norm": 0.306640625, "learning_rate": 0.0010752575473955536, "loss": 1.9839, "step": 21472 }, { "epoch": 0.5762398024903392, "grad_norm": 0.31640625, "learning_rate": 0.0010752403933808445, "loss": 2.1447, "step": 21473 }, { "epoch": 0.5762666380420781, "grad_norm": 0.322265625, "learning_rate": 0.0010752232383235978, "loss": 2.0351, "step": 21474 }, { "epoch": 0.5762934735938171, "grad_norm": 0.302734375, "learning_rate": 0.0010752060822238512, "loss": 1.9131, "step": 21475 }, { "epoch": 0.576320309145556, "grad_norm": 0.30859375, "learning_rate": 0.0010751889250816423, "loss": 1.947, "step": 21476 }, { "epoch": 0.576347144697295, "grad_norm": 0.296875, "learning_rate": 0.0010751717668970088, "loss": 1.9526, "step": 21477 }, { "epoch": 0.576373980249034, "grad_norm": 0.298828125, "learning_rate": 0.001075154607669988, "loss": 2.0477, "step": 21478 }, { "epoch": 0.5764008158007728, "grad_norm": 0.294921875, "learning_rate": 0.001075137447400618, "loss": 2.0371, "step": 21479 }, { "epoch": 0.5764276513525118, "grad_norm": 0.29296875, "learning_rate": 0.001075120286088936, "loss": 1.9134, "step": 21480 }, { "epoch": 0.5764544869042507, "grad_norm": 0.287109375, "learning_rate": 0.00107510312373498, "loss": 2.0592, "step": 21481 }, { "epoch": 0.5764813224559897, "grad_norm": 0.291015625, "learning_rate": 0.0010750859603387874, "loss": 2.0915, "step": 21482 }, { "epoch": 0.5765081580077286, "grad_norm": 0.296875, "learning_rate": 0.001075068795900396, "loss": 1.9794, "step": 21483 }, { "epoch": 0.5765349935594676, "grad_norm": 0.291015625, "learning_rate": 0.0010750516304198435, "loss": 2.0589, "step": 21484 }, { "epoch": 0.5765618291112066, "grad_norm": 0.29296875, "learning_rate": 0.0010750344638971675, "loss": 2.016, "step": 21485 }, { "epoch": 0.5765886646629454, "grad_norm": 0.28515625, "learning_rate": 0.0010750172963324058, "loss": 1.9539, "step": 21486 }, { "epoch": 0.5766155002146844, "grad_norm": 0.28125, "learning_rate": 0.0010750001277255957, "loss": 1.9691, "step": 21487 }, { "epoch": 0.5766423357664233, "grad_norm": 0.294921875, "learning_rate": 0.001074982958076775, "loss": 2.0632, "step": 21488 }, { "epoch": 0.5766691713181623, "grad_norm": 0.279296875, "learning_rate": 0.0010749657873859815, "loss": 1.8382, "step": 21489 }, { "epoch": 0.5766960068699012, "grad_norm": 0.291015625, "learning_rate": 0.0010749486156532528, "loss": 2.0015, "step": 21490 }, { "epoch": 0.5767228424216402, "grad_norm": 0.287109375, "learning_rate": 0.0010749314428786267, "loss": 1.9985, "step": 21491 }, { "epoch": 0.5767496779733792, "grad_norm": 0.2890625, "learning_rate": 0.0010749142690621406, "loss": 1.9973, "step": 21492 }, { "epoch": 0.576776513525118, "grad_norm": 0.28125, "learning_rate": 0.0010748970942038325, "loss": 1.9672, "step": 21493 }, { "epoch": 0.576803349076857, "grad_norm": 0.27734375, "learning_rate": 0.0010748799183037396, "loss": 1.9369, "step": 21494 }, { "epoch": 0.5768301846285959, "grad_norm": 0.283203125, "learning_rate": 0.0010748627413619, "loss": 2.0146, "step": 21495 }, { "epoch": 0.5768570201803349, "grad_norm": 0.291015625, "learning_rate": 0.0010748455633783514, "loss": 1.9312, "step": 21496 }, { "epoch": 0.5768838557320739, "grad_norm": 0.287109375, "learning_rate": 0.0010748283843531312, "loss": 1.9016, "step": 21497 }, { "epoch": 0.5769106912838128, "grad_norm": 0.28515625, "learning_rate": 0.0010748112042862773, "loss": 2.0203, "step": 21498 }, { "epoch": 0.5769375268355518, "grad_norm": 0.27734375, "learning_rate": 0.0010747940231778273, "loss": 2.0377, "step": 21499 }, { "epoch": 0.5769643623872907, "grad_norm": 0.28515625, "learning_rate": 0.0010747768410278187, "loss": 2.0281, "step": 21500 }, { "epoch": 0.5769911979390296, "grad_norm": 0.28515625, "learning_rate": 0.0010747596578362896, "loss": 1.9172, "step": 21501 }, { "epoch": 0.5770180334907685, "grad_norm": 0.29296875, "learning_rate": 0.0010747424736032773, "loss": 1.9372, "step": 21502 }, { "epoch": 0.5770448690425075, "grad_norm": 0.27734375, "learning_rate": 0.0010747252883288197, "loss": 1.8903, "step": 21503 }, { "epoch": 0.5770717045942465, "grad_norm": 0.267578125, "learning_rate": 0.0010747081020129547, "loss": 1.7793, "step": 21504 }, { "epoch": 0.5770985401459854, "grad_norm": 0.29296875, "learning_rate": 0.0010746909146557195, "loss": 1.9675, "step": 21505 }, { "epoch": 0.5771253756977244, "grad_norm": 0.28125, "learning_rate": 0.0010746737262571525, "loss": 1.9119, "step": 21506 }, { "epoch": 0.5771522112494633, "grad_norm": 0.291015625, "learning_rate": 0.0010746565368172907, "loss": 2.0125, "step": 21507 }, { "epoch": 0.5771790468012022, "grad_norm": 0.294921875, "learning_rate": 0.0010746393463361723, "loss": 1.9801, "step": 21508 }, { "epoch": 0.5772058823529411, "grad_norm": 0.296875, "learning_rate": 0.0010746221548138346, "loss": 2.0041, "step": 21509 }, { "epoch": 0.5772327179046801, "grad_norm": 0.2890625, "learning_rate": 0.0010746049622503155, "loss": 2.0286, "step": 21510 }, { "epoch": 0.5772595534564191, "grad_norm": 0.28515625, "learning_rate": 0.0010745877686456529, "loss": 1.9774, "step": 21511 }, { "epoch": 0.577286389008158, "grad_norm": 0.28515625, "learning_rate": 0.0010745705739998843, "loss": 1.9476, "step": 21512 }, { "epoch": 0.577313224559897, "grad_norm": 0.28125, "learning_rate": 0.0010745533783130476, "loss": 1.8898, "step": 21513 }, { "epoch": 0.5773400601116359, "grad_norm": 0.27734375, "learning_rate": 0.0010745361815851801, "loss": 1.8555, "step": 21514 }, { "epoch": 0.5773668956633748, "grad_norm": 0.279296875, "learning_rate": 0.0010745189838163203, "loss": 1.9163, "step": 21515 }, { "epoch": 0.5773937312151138, "grad_norm": 0.2890625, "learning_rate": 0.001074501785006505, "loss": 1.9642, "step": 21516 }, { "epoch": 0.5774205667668527, "grad_norm": 0.291015625, "learning_rate": 0.0010744845851557725, "loss": 1.9955, "step": 21517 }, { "epoch": 0.5774474023185917, "grad_norm": 0.271484375, "learning_rate": 0.0010744673842641606, "loss": 1.8189, "step": 21518 }, { "epoch": 0.5774742378703306, "grad_norm": 0.2890625, "learning_rate": 0.0010744501823317068, "loss": 1.9431, "step": 21519 }, { "epoch": 0.5775010734220696, "grad_norm": 0.29296875, "learning_rate": 0.0010744329793584487, "loss": 1.9258, "step": 21520 }, { "epoch": 0.5775279089738085, "grad_norm": 0.287109375, "learning_rate": 0.0010744157753444243, "loss": 1.9456, "step": 21521 }, { "epoch": 0.5775547445255474, "grad_norm": 0.283203125, "learning_rate": 0.0010743985702896715, "loss": 1.8311, "step": 21522 }, { "epoch": 0.5775815800772864, "grad_norm": 0.291015625, "learning_rate": 0.0010743813641942274, "loss": 1.9196, "step": 21523 }, { "epoch": 0.5776084156290253, "grad_norm": 0.28515625, "learning_rate": 0.0010743641570581304, "loss": 1.766, "step": 21524 }, { "epoch": 0.5776352511807643, "grad_norm": 0.29296875, "learning_rate": 0.001074346948881418, "loss": 1.9931, "step": 21525 }, { "epoch": 0.5776620867325032, "grad_norm": 0.298828125, "learning_rate": 0.0010743297396641279, "loss": 1.9249, "step": 21526 }, { "epoch": 0.5776889222842422, "grad_norm": 0.283203125, "learning_rate": 0.0010743125294062979, "loss": 1.8725, "step": 21527 }, { "epoch": 0.5777157578359811, "grad_norm": 0.294921875, "learning_rate": 0.0010742953181079658, "loss": 1.9248, "step": 21528 }, { "epoch": 0.57774259338772, "grad_norm": 0.287109375, "learning_rate": 0.0010742781057691694, "loss": 1.876, "step": 21529 }, { "epoch": 0.577769428939459, "grad_norm": 0.283203125, "learning_rate": 0.001074260892389946, "loss": 1.8445, "step": 21530 }, { "epoch": 0.5777962644911979, "grad_norm": 0.291015625, "learning_rate": 0.001074243677970334, "loss": 1.8829, "step": 21531 }, { "epoch": 0.5778231000429369, "grad_norm": 0.2890625, "learning_rate": 0.001074226462510371, "loss": 1.8963, "step": 21532 }, { "epoch": 0.5778499355946758, "grad_norm": 0.2890625, "learning_rate": 0.0010742092460100945, "loss": 1.8164, "step": 21533 }, { "epoch": 0.5778767711464148, "grad_norm": 0.291015625, "learning_rate": 0.0010741920284695426, "loss": 1.8913, "step": 21534 }, { "epoch": 0.5779036066981537, "grad_norm": 0.30078125, "learning_rate": 0.0010741748098887526, "loss": 1.9483, "step": 21535 }, { "epoch": 0.5779304422498927, "grad_norm": 0.29296875, "learning_rate": 0.0010741575902677627, "loss": 1.9456, "step": 21536 }, { "epoch": 0.5779572778016316, "grad_norm": 0.2890625, "learning_rate": 0.0010741403696066106, "loss": 1.8619, "step": 21537 }, { "epoch": 0.5779841133533705, "grad_norm": 0.287109375, "learning_rate": 0.001074123147905334, "loss": 1.8467, "step": 21538 }, { "epoch": 0.5780109489051095, "grad_norm": 0.28125, "learning_rate": 0.0010741059251639707, "loss": 1.8267, "step": 21539 }, { "epoch": 0.5780377844568484, "grad_norm": 0.2890625, "learning_rate": 0.0010740887013825585, "loss": 1.8469, "step": 21540 }, { "epoch": 0.5780646200085874, "grad_norm": 0.298828125, "learning_rate": 0.0010740714765611352, "loss": 1.9464, "step": 21541 }, { "epoch": 0.5780914555603264, "grad_norm": 0.28515625, "learning_rate": 0.0010740542506997386, "loss": 1.9265, "step": 21542 }, { "epoch": 0.5781182911120653, "grad_norm": 0.28125, "learning_rate": 0.0010740370237984063, "loss": 1.815, "step": 21543 }, { "epoch": 0.5781451266638042, "grad_norm": 0.29296875, "learning_rate": 0.0010740197958571764, "loss": 1.8426, "step": 21544 }, { "epoch": 0.5781719622155431, "grad_norm": 0.28515625, "learning_rate": 0.0010740025668760863, "loss": 1.8014, "step": 21545 }, { "epoch": 0.5781987977672821, "grad_norm": 0.28515625, "learning_rate": 0.0010739853368551741, "loss": 1.8365, "step": 21546 }, { "epoch": 0.578225633319021, "grad_norm": 0.291015625, "learning_rate": 0.0010739681057944776, "loss": 1.9021, "step": 21547 }, { "epoch": 0.57825246887076, "grad_norm": 0.2890625, "learning_rate": 0.0010739508736940344, "loss": 1.9029, "step": 21548 }, { "epoch": 0.578279304422499, "grad_norm": 0.298828125, "learning_rate": 0.0010739336405538827, "loss": 1.8884, "step": 21549 }, { "epoch": 0.5783061399742379, "grad_norm": 0.291015625, "learning_rate": 0.0010739164063740597, "loss": 1.9383, "step": 21550 }, { "epoch": 0.5783329755259768, "grad_norm": 0.291015625, "learning_rate": 0.0010738991711546036, "loss": 1.8598, "step": 21551 }, { "epoch": 0.5783598110777157, "grad_norm": 0.296875, "learning_rate": 0.0010738819348955523, "loss": 1.8919, "step": 21552 }, { "epoch": 0.5783866466294547, "grad_norm": 0.294921875, "learning_rate": 0.0010738646975969432, "loss": 1.8666, "step": 21553 }, { "epoch": 0.5784134821811936, "grad_norm": 0.29296875, "learning_rate": 0.0010738474592588145, "loss": 1.8718, "step": 21554 }, { "epoch": 0.5784403177329326, "grad_norm": 0.306640625, "learning_rate": 0.001073830219881204, "loss": 1.8902, "step": 21555 }, { "epoch": 0.5784671532846716, "grad_norm": 0.326171875, "learning_rate": 0.0010738129794641493, "loss": 2.0656, "step": 21556 }, { "epoch": 0.5784939888364105, "grad_norm": 0.345703125, "learning_rate": 0.0010737957380076884, "loss": 2.1399, "step": 21557 }, { "epoch": 0.5785208243881494, "grad_norm": 0.32421875, "learning_rate": 0.0010737784955118588, "loss": 2.0892, "step": 21558 }, { "epoch": 0.5785476599398883, "grad_norm": 0.3359375, "learning_rate": 0.0010737612519766987, "loss": 2.1214, "step": 21559 }, { "epoch": 0.5785744954916273, "grad_norm": 0.291015625, "learning_rate": 0.001073744007402246, "loss": 1.9831, "step": 21560 }, { "epoch": 0.5786013310433663, "grad_norm": 0.287109375, "learning_rate": 0.0010737267617885382, "loss": 2.0335, "step": 21561 }, { "epoch": 0.5786281665951052, "grad_norm": 0.3125, "learning_rate": 0.0010737095151356132, "loss": 2.1144, "step": 21562 }, { "epoch": 0.5786550021468442, "grad_norm": 0.298828125, "learning_rate": 0.001073692267443509, "loss": 2.1048, "step": 21563 }, { "epoch": 0.5786818376985831, "grad_norm": 0.287109375, "learning_rate": 0.001073675018712263, "loss": 1.9756, "step": 21564 }, { "epoch": 0.578708673250322, "grad_norm": 0.29296875, "learning_rate": 0.0010736577689419136, "loss": 2.0529, "step": 21565 }, { "epoch": 0.5787355088020609, "grad_norm": 0.28125, "learning_rate": 0.0010736405181324985, "loss": 1.9851, "step": 21566 }, { "epoch": 0.5787623443537999, "grad_norm": 0.30078125, "learning_rate": 0.0010736232662840552, "loss": 2.0962, "step": 21567 }, { "epoch": 0.5787891799055389, "grad_norm": 0.302734375, "learning_rate": 0.0010736060133966223, "loss": 2.0118, "step": 21568 }, { "epoch": 0.5788160154572778, "grad_norm": 0.2890625, "learning_rate": 0.0010735887594702366, "loss": 2.0567, "step": 21569 }, { "epoch": 0.5788428510090168, "grad_norm": 0.287109375, "learning_rate": 0.0010735715045049367, "loss": 2.0198, "step": 21570 }, { "epoch": 0.5788696865607557, "grad_norm": 0.29296875, "learning_rate": 0.0010735542485007601, "loss": 2.0035, "step": 21571 }, { "epoch": 0.5788965221124946, "grad_norm": 0.287109375, "learning_rate": 0.001073536991457745, "loss": 1.8965, "step": 21572 }, { "epoch": 0.5789233576642335, "grad_norm": 0.291015625, "learning_rate": 0.0010735197333759292, "loss": 1.9854, "step": 21573 }, { "epoch": 0.5789501932159725, "grad_norm": 0.291015625, "learning_rate": 0.00107350247425535, "loss": 2.0159, "step": 21574 }, { "epoch": 0.5789770287677115, "grad_norm": 0.294921875, "learning_rate": 0.001073485214096046, "loss": 1.9934, "step": 21575 }, { "epoch": 0.5790038643194504, "grad_norm": 0.28515625, "learning_rate": 0.0010734679528980546, "loss": 1.9595, "step": 21576 }, { "epoch": 0.5790306998711894, "grad_norm": 0.291015625, "learning_rate": 0.0010734506906614138, "loss": 1.9761, "step": 21577 }, { "epoch": 0.5790575354229283, "grad_norm": 0.28125, "learning_rate": 0.0010734334273861616, "loss": 1.9189, "step": 21578 }, { "epoch": 0.5790843709746673, "grad_norm": 0.28515625, "learning_rate": 0.0010734161630723356, "loss": 2.0372, "step": 21579 }, { "epoch": 0.5791112065264061, "grad_norm": 0.2890625, "learning_rate": 0.0010733988977199738, "loss": 1.978, "step": 21580 }, { "epoch": 0.5791380420781451, "grad_norm": 0.27734375, "learning_rate": 0.0010733816313291144, "loss": 1.8576, "step": 21581 }, { "epoch": 0.5791648776298841, "grad_norm": 0.28515625, "learning_rate": 0.0010733643638997947, "loss": 2.0326, "step": 21582 }, { "epoch": 0.579191713181623, "grad_norm": 0.279296875, "learning_rate": 0.0010733470954320528, "loss": 2.0068, "step": 21583 }, { "epoch": 0.579218548733362, "grad_norm": 0.294921875, "learning_rate": 0.0010733298259259269, "loss": 2.028, "step": 21584 }, { "epoch": 0.5792453842851009, "grad_norm": 0.29296875, "learning_rate": 0.0010733125553814545, "loss": 1.9969, "step": 21585 }, { "epoch": 0.5792722198368399, "grad_norm": 0.294921875, "learning_rate": 0.0010732952837986733, "loss": 2.033, "step": 21586 }, { "epoch": 0.5792990553885788, "grad_norm": 0.298828125, "learning_rate": 0.0010732780111776218, "loss": 1.9903, "step": 21587 }, { "epoch": 0.5793258909403177, "grad_norm": 0.27734375, "learning_rate": 0.0010732607375183376, "loss": 1.9343, "step": 21588 }, { "epoch": 0.5793527264920567, "grad_norm": 0.283203125, "learning_rate": 0.0010732434628208582, "loss": 1.9337, "step": 21589 }, { "epoch": 0.5793795620437956, "grad_norm": 0.279296875, "learning_rate": 0.0010732261870852224, "loss": 2.008, "step": 21590 }, { "epoch": 0.5794063975955346, "grad_norm": 0.2734375, "learning_rate": 0.0010732089103114672, "loss": 1.8932, "step": 21591 }, { "epoch": 0.5794332331472735, "grad_norm": 0.2890625, "learning_rate": 0.0010731916324996313, "loss": 2.0505, "step": 21592 }, { "epoch": 0.5794600686990125, "grad_norm": 0.296875, "learning_rate": 0.0010731743536497517, "loss": 2.0252, "step": 21593 }, { "epoch": 0.5794869042507514, "grad_norm": 0.287109375, "learning_rate": 0.001073157073761867, "loss": 1.9034, "step": 21594 }, { "epoch": 0.5795137398024903, "grad_norm": 0.287109375, "learning_rate": 0.0010731397928360148, "loss": 1.9322, "step": 21595 }, { "epoch": 0.5795405753542293, "grad_norm": 0.2890625, "learning_rate": 0.0010731225108722333, "loss": 2.0029, "step": 21596 }, { "epoch": 0.5795674109059682, "grad_norm": 0.2890625, "learning_rate": 0.00107310522787056, "loss": 2.0151, "step": 21597 }, { "epoch": 0.5795942464577072, "grad_norm": 0.28515625, "learning_rate": 0.001073087943831033, "loss": 1.9168, "step": 21598 }, { "epoch": 0.5796210820094461, "grad_norm": 0.287109375, "learning_rate": 0.0010730706587536905, "loss": 1.8422, "step": 21599 }, { "epoch": 0.579647917561185, "grad_norm": 0.283203125, "learning_rate": 0.0010730533726385698, "loss": 1.8877, "step": 21600 }, { "epoch": 0.579674753112924, "grad_norm": 0.287109375, "learning_rate": 0.0010730360854857095, "loss": 1.952, "step": 21601 }, { "epoch": 0.5797015886646629, "grad_norm": 0.291015625, "learning_rate": 0.0010730187972951468, "loss": 2.044, "step": 21602 }, { "epoch": 0.5797284242164019, "grad_norm": 0.291015625, "learning_rate": 0.0010730015080669204, "loss": 1.9493, "step": 21603 }, { "epoch": 0.5797552597681408, "grad_norm": 0.271484375, "learning_rate": 0.0010729842178010677, "loss": 1.8373, "step": 21604 }, { "epoch": 0.5797820953198798, "grad_norm": 0.275390625, "learning_rate": 0.0010729669264976267, "loss": 1.9085, "step": 21605 }, { "epoch": 0.5798089308716187, "grad_norm": 0.283203125, "learning_rate": 0.0010729496341566355, "loss": 1.9049, "step": 21606 }, { "epoch": 0.5798357664233577, "grad_norm": 0.283203125, "learning_rate": 0.001072932340778132, "loss": 1.9052, "step": 21607 }, { "epoch": 0.5798626019750966, "grad_norm": 0.2890625, "learning_rate": 0.001072915046362154, "loss": 1.9063, "step": 21608 }, { "epoch": 0.5798894375268355, "grad_norm": 0.27734375, "learning_rate": 0.0010728977509087397, "loss": 1.8694, "step": 21609 }, { "epoch": 0.5799162730785745, "grad_norm": 0.28515625, "learning_rate": 0.0010728804544179267, "loss": 1.922, "step": 21610 }, { "epoch": 0.5799431086303134, "grad_norm": 0.291015625, "learning_rate": 0.001072863156889753, "loss": 1.9517, "step": 21611 }, { "epoch": 0.5799699441820524, "grad_norm": 0.283203125, "learning_rate": 0.001072845858324257, "loss": 1.8758, "step": 21612 }, { "epoch": 0.5799967797337914, "grad_norm": 0.287109375, "learning_rate": 0.001072828558721476, "loss": 1.9681, "step": 21613 }, { "epoch": 0.5800236152855303, "grad_norm": 0.283203125, "learning_rate": 0.0010728112580814483, "loss": 1.8736, "step": 21614 }, { "epoch": 0.5800504508372692, "grad_norm": 0.283203125, "learning_rate": 0.0010727939564042117, "loss": 1.8328, "step": 21615 }, { "epoch": 0.5800772863890081, "grad_norm": 0.279296875, "learning_rate": 0.0010727766536898047, "loss": 1.89, "step": 21616 }, { "epoch": 0.5801041219407471, "grad_norm": 0.27734375, "learning_rate": 0.0010727593499382643, "loss": 1.8293, "step": 21617 }, { "epoch": 0.580130957492486, "grad_norm": 0.283203125, "learning_rate": 0.0010727420451496293, "loss": 1.8757, "step": 21618 }, { "epoch": 0.580157793044225, "grad_norm": 0.29296875, "learning_rate": 0.001072724739323937, "loss": 1.9706, "step": 21619 }, { "epoch": 0.580184628595964, "grad_norm": 0.28515625, "learning_rate": 0.0010727074324612261, "loss": 1.825, "step": 21620 }, { "epoch": 0.5802114641477029, "grad_norm": 0.294921875, "learning_rate": 0.0010726901245615339, "loss": 1.8767, "step": 21621 }, { "epoch": 0.5802382996994419, "grad_norm": 0.29296875, "learning_rate": 0.0010726728156248988, "loss": 1.9435, "step": 21622 }, { "epoch": 0.5802651352511807, "grad_norm": 0.29296875, "learning_rate": 0.0010726555056513585, "loss": 1.9603, "step": 21623 }, { "epoch": 0.5802919708029197, "grad_norm": 0.294921875, "learning_rate": 0.001072638194640951, "loss": 1.9483, "step": 21624 }, { "epoch": 0.5803188063546586, "grad_norm": 0.298828125, "learning_rate": 0.0010726208825937145, "loss": 1.9668, "step": 21625 }, { "epoch": 0.5803456419063976, "grad_norm": 0.291015625, "learning_rate": 0.001072603569509687, "loss": 1.8118, "step": 21626 }, { "epoch": 0.5803724774581366, "grad_norm": 0.294921875, "learning_rate": 0.001072586255388906, "loss": 1.9055, "step": 21627 }, { "epoch": 0.5803993130098755, "grad_norm": 0.2890625, "learning_rate": 0.0010725689402314097, "loss": 1.8476, "step": 21628 }, { "epoch": 0.5804261485616145, "grad_norm": 0.279296875, "learning_rate": 0.0010725516240372365, "loss": 1.829, "step": 21629 }, { "epoch": 0.5804529841133533, "grad_norm": 0.291015625, "learning_rate": 0.001072534306806424, "loss": 1.8866, "step": 21630 }, { "epoch": 0.5804798196650923, "grad_norm": 0.28515625, "learning_rate": 0.00107251698853901, "loss": 1.8274, "step": 21631 }, { "epoch": 0.5805066552168313, "grad_norm": 0.29296875, "learning_rate": 0.0010724996692350328, "loss": 1.8235, "step": 21632 }, { "epoch": 0.5805334907685702, "grad_norm": 0.302734375, "learning_rate": 0.0010724823488945305, "loss": 1.9089, "step": 21633 }, { "epoch": 0.5805603263203092, "grad_norm": 0.29296875, "learning_rate": 0.0010724650275175409, "loss": 1.8872, "step": 21634 }, { "epoch": 0.5805871618720481, "grad_norm": 0.28515625, "learning_rate": 0.001072447705104102, "loss": 1.8421, "step": 21635 }, { "epoch": 0.580613997423787, "grad_norm": 0.294921875, "learning_rate": 0.001072430381654252, "loss": 1.8772, "step": 21636 }, { "epoch": 0.5806408329755259, "grad_norm": 0.28515625, "learning_rate": 0.0010724130571680285, "loss": 1.9111, "step": 21637 }, { "epoch": 0.5806676685272649, "grad_norm": 0.296875, "learning_rate": 0.0010723957316454698, "loss": 1.8887, "step": 21638 }, { "epoch": 0.5806945040790039, "grad_norm": 0.30078125, "learning_rate": 0.001072378405086614, "loss": 1.8465, "step": 21639 }, { "epoch": 0.5807213396307428, "grad_norm": 0.2890625, "learning_rate": 0.0010723610774914989, "loss": 1.8682, "step": 21640 }, { "epoch": 0.5807481751824818, "grad_norm": 0.34375, "learning_rate": 0.0010723437488601624, "loss": 2.0798, "step": 21641 }, { "epoch": 0.5807750107342207, "grad_norm": 0.32421875, "learning_rate": 0.0010723264191926426, "loss": 1.9581, "step": 21642 }, { "epoch": 0.5808018462859597, "grad_norm": 0.34765625, "learning_rate": 0.0010723090884889779, "loss": 2.0667, "step": 21643 }, { "epoch": 0.5808286818376985, "grad_norm": 0.326171875, "learning_rate": 0.0010722917567492058, "loss": 2.0409, "step": 21644 }, { "epoch": 0.5808555173894375, "grad_norm": 0.31640625, "learning_rate": 0.0010722744239733647, "loss": 2.1004, "step": 21645 }, { "epoch": 0.5808823529411765, "grad_norm": 0.296875, "learning_rate": 0.0010722570901614923, "loss": 1.9808, "step": 21646 }, { "epoch": 0.5809091884929154, "grad_norm": 0.3046875, "learning_rate": 0.001072239755313627, "loss": 2.0809, "step": 21647 }, { "epoch": 0.5809360240446544, "grad_norm": 0.302734375, "learning_rate": 0.0010722224194298063, "loss": 2.0525, "step": 21648 }, { "epoch": 0.5809628595963933, "grad_norm": 0.283203125, "learning_rate": 0.0010722050825100688, "loss": 1.9758, "step": 21649 }, { "epoch": 0.5809896951481323, "grad_norm": 0.294921875, "learning_rate": 0.0010721877445544522, "loss": 1.9948, "step": 21650 }, { "epoch": 0.5810165306998711, "grad_norm": 0.2890625, "learning_rate": 0.0010721704055629945, "loss": 1.9926, "step": 21651 }, { "epoch": 0.5810433662516101, "grad_norm": 0.2890625, "learning_rate": 0.0010721530655357341, "loss": 1.9993, "step": 21652 }, { "epoch": 0.5810702018033491, "grad_norm": 0.287109375, "learning_rate": 0.0010721357244727085, "loss": 1.885, "step": 21653 }, { "epoch": 0.581097037355088, "grad_norm": 0.294921875, "learning_rate": 0.0010721183823739564, "loss": 2.0132, "step": 21654 }, { "epoch": 0.581123872906827, "grad_norm": 0.291015625, "learning_rate": 0.001072101039239515, "loss": 2.0151, "step": 21655 }, { "epoch": 0.5811507084585659, "grad_norm": 0.298828125, "learning_rate": 0.0010720836950694232, "loss": 1.9365, "step": 21656 }, { "epoch": 0.5811775440103049, "grad_norm": 0.291015625, "learning_rate": 0.0010720663498637185, "loss": 1.9656, "step": 21657 }, { "epoch": 0.5812043795620438, "grad_norm": 0.28125, "learning_rate": 0.001072049003622439, "loss": 1.9214, "step": 21658 }, { "epoch": 0.5812312151137827, "grad_norm": 0.283203125, "learning_rate": 0.001072031656345623, "loss": 2.0354, "step": 21659 }, { "epoch": 0.5812580506655217, "grad_norm": 0.283203125, "learning_rate": 0.0010720143080333085, "loss": 2.0135, "step": 21660 }, { "epoch": 0.5812848862172606, "grad_norm": 0.296875, "learning_rate": 0.0010719969586855333, "loss": 1.9282, "step": 21661 }, { "epoch": 0.5813117217689996, "grad_norm": 0.306640625, "learning_rate": 0.0010719796083023356, "loss": 2.116, "step": 21662 }, { "epoch": 0.5813385573207385, "grad_norm": 0.287109375, "learning_rate": 0.0010719622568837537, "loss": 1.9063, "step": 21663 }, { "epoch": 0.5813653928724775, "grad_norm": 0.28125, "learning_rate": 0.0010719449044298253, "loss": 2.0816, "step": 21664 }, { "epoch": 0.5813922284242165, "grad_norm": 0.279296875, "learning_rate": 0.0010719275509405886, "loss": 1.9201, "step": 21665 }, { "epoch": 0.5814190639759553, "grad_norm": 0.2890625, "learning_rate": 0.0010719101964160819, "loss": 2.0359, "step": 21666 }, { "epoch": 0.5814458995276943, "grad_norm": 0.28125, "learning_rate": 0.001071892840856343, "loss": 1.9648, "step": 21667 }, { "epoch": 0.5814727350794332, "grad_norm": 0.279296875, "learning_rate": 0.00107187548426141, "loss": 1.9203, "step": 21668 }, { "epoch": 0.5814995706311722, "grad_norm": 0.2890625, "learning_rate": 0.001071858126631321, "loss": 1.955, "step": 21669 }, { "epoch": 0.5815264061829111, "grad_norm": 0.294921875, "learning_rate": 0.001071840767966114, "loss": 1.9929, "step": 21670 }, { "epoch": 0.5815532417346501, "grad_norm": 0.283203125, "learning_rate": 0.0010718234082658276, "loss": 1.8791, "step": 21671 }, { "epoch": 0.581580077286389, "grad_norm": 0.28515625, "learning_rate": 0.0010718060475304989, "loss": 1.8749, "step": 21672 }, { "epoch": 0.5816069128381279, "grad_norm": 0.28125, "learning_rate": 0.001071788685760167, "loss": 1.8808, "step": 21673 }, { "epoch": 0.5816337483898669, "grad_norm": 0.2734375, "learning_rate": 0.001071771322954869, "loss": 1.8812, "step": 21674 }, { "epoch": 0.5816605839416058, "grad_norm": 0.28125, "learning_rate": 0.001071753959114644, "loss": 1.8388, "step": 21675 }, { "epoch": 0.5816874194933448, "grad_norm": 0.28515625, "learning_rate": 0.0010717365942395293, "loss": 1.97, "step": 21676 }, { "epoch": 0.5817142550450837, "grad_norm": 0.279296875, "learning_rate": 0.0010717192283295636, "loss": 1.8185, "step": 21677 }, { "epoch": 0.5817410905968227, "grad_norm": 0.294921875, "learning_rate": 0.0010717018613847845, "loss": 1.999, "step": 21678 }, { "epoch": 0.5817679261485617, "grad_norm": 0.28515625, "learning_rate": 0.0010716844934052301, "loss": 1.8509, "step": 21679 }, { "epoch": 0.5817947617003005, "grad_norm": 0.2890625, "learning_rate": 0.001071667124390939, "loss": 1.9401, "step": 21680 }, { "epoch": 0.5818215972520395, "grad_norm": 0.28125, "learning_rate": 0.001071649754341949, "loss": 1.8981, "step": 21681 }, { "epoch": 0.5818484328037784, "grad_norm": 0.279296875, "learning_rate": 0.001071632383258298, "loss": 1.9762, "step": 21682 }, { "epoch": 0.5818752683555174, "grad_norm": 0.283203125, "learning_rate": 0.0010716150111400244, "loss": 1.9346, "step": 21683 }, { "epoch": 0.5819021039072564, "grad_norm": 0.2890625, "learning_rate": 0.0010715976379871662, "loss": 2.0527, "step": 21684 }, { "epoch": 0.5819289394589953, "grad_norm": 0.28125, "learning_rate": 0.0010715802637997615, "loss": 1.944, "step": 21685 }, { "epoch": 0.5819557750107343, "grad_norm": 0.275390625, "learning_rate": 0.0010715628885778485, "loss": 1.8545, "step": 21686 }, { "epoch": 0.5819826105624731, "grad_norm": 0.279296875, "learning_rate": 0.0010715455123214651, "loss": 1.8653, "step": 21687 }, { "epoch": 0.5820094461142121, "grad_norm": 0.28125, "learning_rate": 0.0010715281350306498, "loss": 1.7902, "step": 21688 }, { "epoch": 0.582036281665951, "grad_norm": 0.28515625, "learning_rate": 0.0010715107567054406, "loss": 1.9797, "step": 21689 }, { "epoch": 0.58206311721769, "grad_norm": 0.27734375, "learning_rate": 0.0010714933773458752, "loss": 1.9696, "step": 21690 }, { "epoch": 0.582089952769429, "grad_norm": 0.283203125, "learning_rate": 0.0010714759969519922, "loss": 1.9246, "step": 21691 }, { "epoch": 0.5821167883211679, "grad_norm": 0.279296875, "learning_rate": 0.0010714586155238296, "loss": 1.9145, "step": 21692 }, { "epoch": 0.5821436238729069, "grad_norm": 0.294921875, "learning_rate": 0.0010714412330614254, "loss": 1.9313, "step": 21693 }, { "epoch": 0.5821704594246457, "grad_norm": 0.291015625, "learning_rate": 0.001071423849564818, "loss": 1.8315, "step": 21694 }, { "epoch": 0.5821972949763847, "grad_norm": 0.28515625, "learning_rate": 0.0010714064650340454, "loss": 1.9063, "step": 21695 }, { "epoch": 0.5822241305281236, "grad_norm": 0.283203125, "learning_rate": 0.0010713890794691453, "loss": 1.797, "step": 21696 }, { "epoch": 0.5822509660798626, "grad_norm": 0.283203125, "learning_rate": 0.0010713716928701566, "loss": 1.9153, "step": 21697 }, { "epoch": 0.5822778016316016, "grad_norm": 0.279296875, "learning_rate": 0.001071354305237117, "loss": 1.8204, "step": 21698 }, { "epoch": 0.5823046371833405, "grad_norm": 0.296875, "learning_rate": 0.0010713369165700648, "loss": 1.824, "step": 21699 }, { "epoch": 0.5823314727350795, "grad_norm": 0.27734375, "learning_rate": 0.001071319526869038, "loss": 1.8436, "step": 21700 }, { "epoch": 0.5823583082868183, "grad_norm": 0.294921875, "learning_rate": 0.0010713021361340748, "loss": 1.8688, "step": 21701 }, { "epoch": 0.5823851438385573, "grad_norm": 0.287109375, "learning_rate": 0.0010712847443652134, "loss": 1.8622, "step": 21702 }, { "epoch": 0.5824119793902963, "grad_norm": 0.283203125, "learning_rate": 0.001071267351562492, "loss": 1.8989, "step": 21703 }, { "epoch": 0.5824388149420352, "grad_norm": 0.296875, "learning_rate": 0.0010712499577259485, "loss": 1.9084, "step": 21704 }, { "epoch": 0.5824656504937742, "grad_norm": 0.298828125, "learning_rate": 0.0010712325628556216, "loss": 1.8621, "step": 21705 }, { "epoch": 0.5824924860455131, "grad_norm": 0.294921875, "learning_rate": 0.0010712151669515489, "loss": 1.8696, "step": 21706 }, { "epoch": 0.5825193215972521, "grad_norm": 0.28515625, "learning_rate": 0.0010711977700137685, "loss": 1.8343, "step": 21707 }, { "epoch": 0.5825461571489909, "grad_norm": 0.291015625, "learning_rate": 0.001071180372042319, "loss": 1.8928, "step": 21708 }, { "epoch": 0.5825729927007299, "grad_norm": 0.28515625, "learning_rate": 0.0010711629730372383, "loss": 1.8434, "step": 21709 }, { "epoch": 0.5825998282524689, "grad_norm": 0.279296875, "learning_rate": 0.001071145572998565, "loss": 1.8586, "step": 21710 }, { "epoch": 0.5826266638042078, "grad_norm": 0.30078125, "learning_rate": 0.0010711281719263366, "loss": 1.9227, "step": 21711 }, { "epoch": 0.5826534993559468, "grad_norm": 0.29296875, "learning_rate": 0.0010711107698205917, "loss": 1.8686, "step": 21712 }, { "epoch": 0.5826803349076857, "grad_norm": 0.287109375, "learning_rate": 0.0010710933666813682, "loss": 1.9088, "step": 21713 }, { "epoch": 0.5827071704594247, "grad_norm": 0.287109375, "learning_rate": 0.0010710759625087048, "loss": 1.9137, "step": 21714 }, { "epoch": 0.5827340060111635, "grad_norm": 0.287109375, "learning_rate": 0.001071058557302639, "loss": 1.8889, "step": 21715 }, { "epoch": 0.5827608415629025, "grad_norm": 0.28515625, "learning_rate": 0.0010710411510632095, "loss": 1.8125, "step": 21716 }, { "epoch": 0.5827876771146415, "grad_norm": 0.283203125, "learning_rate": 0.0010710237437904543, "loss": 1.7518, "step": 21717 }, { "epoch": 0.5828145126663804, "grad_norm": 0.294921875, "learning_rate": 0.0010710063354844115, "loss": 1.853, "step": 21718 }, { "epoch": 0.5828413482181194, "grad_norm": 0.2890625, "learning_rate": 0.0010709889261451195, "loss": 1.8012, "step": 21719 }, { "epoch": 0.5828681837698583, "grad_norm": 0.296875, "learning_rate": 0.0010709715157726162, "loss": 1.8839, "step": 21720 }, { "epoch": 0.5828950193215973, "grad_norm": 0.2890625, "learning_rate": 0.00107095410436694, "loss": 1.859, "step": 21721 }, { "epoch": 0.5829218548733361, "grad_norm": 0.291015625, "learning_rate": 0.0010709366919281291, "loss": 1.8704, "step": 21722 }, { "epoch": 0.5829486904250751, "grad_norm": 0.291015625, "learning_rate": 0.0010709192784562217, "loss": 1.9162, "step": 21723 }, { "epoch": 0.5829755259768141, "grad_norm": 0.30078125, "learning_rate": 0.0010709018639512558, "loss": 1.916, "step": 21724 }, { "epoch": 0.583002361528553, "grad_norm": 0.31640625, "learning_rate": 0.0010708844484132697, "loss": 2.0089, "step": 21725 }, { "epoch": 0.583029197080292, "grad_norm": 0.33984375, "learning_rate": 0.0010708670318423019, "loss": 2.1231, "step": 21726 }, { "epoch": 0.5830560326320309, "grad_norm": 0.328125, "learning_rate": 0.0010708496142383902, "loss": 2.0105, "step": 21727 }, { "epoch": 0.5830828681837699, "grad_norm": 0.32421875, "learning_rate": 0.001070832195601573, "loss": 2.1232, "step": 21728 }, { "epoch": 0.5831097037355089, "grad_norm": 0.30078125, "learning_rate": 0.0010708147759318886, "loss": 2.0625, "step": 21729 }, { "epoch": 0.5831365392872477, "grad_norm": 0.30078125, "learning_rate": 0.001070797355229375, "loss": 2.0694, "step": 21730 }, { "epoch": 0.5831633748389867, "grad_norm": 0.306640625, "learning_rate": 0.0010707799334940707, "loss": 2.0699, "step": 21731 }, { "epoch": 0.5831902103907256, "grad_norm": 0.294921875, "learning_rate": 0.0010707625107260134, "loss": 2.0116, "step": 21732 }, { "epoch": 0.5832170459424646, "grad_norm": 0.306640625, "learning_rate": 0.0010707450869252416, "loss": 2.1377, "step": 21733 }, { "epoch": 0.5832438814942035, "grad_norm": 0.30078125, "learning_rate": 0.0010707276620917938, "loss": 1.9992, "step": 21734 }, { "epoch": 0.5832707170459425, "grad_norm": 0.279296875, "learning_rate": 0.001070710236225708, "loss": 1.9833, "step": 21735 }, { "epoch": 0.5832975525976815, "grad_norm": 0.291015625, "learning_rate": 0.0010706928093270225, "loss": 2.02, "step": 21736 }, { "epoch": 0.5833243881494203, "grad_norm": 0.291015625, "learning_rate": 0.0010706753813957753, "loss": 2.0061, "step": 21737 }, { "epoch": 0.5833512237011593, "grad_norm": 0.296875, "learning_rate": 0.0010706579524320048, "loss": 2.0421, "step": 21738 }, { "epoch": 0.5833780592528982, "grad_norm": 0.283203125, "learning_rate": 0.0010706405224357493, "loss": 1.9496, "step": 21739 }, { "epoch": 0.5834048948046372, "grad_norm": 0.287109375, "learning_rate": 0.001070623091407047, "loss": 1.9367, "step": 21740 }, { "epoch": 0.5834317303563761, "grad_norm": 0.294921875, "learning_rate": 0.001070605659345936, "loss": 1.9634, "step": 21741 }, { "epoch": 0.5834585659081151, "grad_norm": 0.28515625, "learning_rate": 0.0010705882262524546, "loss": 1.9066, "step": 21742 }, { "epoch": 0.5834854014598541, "grad_norm": 0.283203125, "learning_rate": 0.0010705707921266413, "loss": 1.9796, "step": 21743 }, { "epoch": 0.5835122370115929, "grad_norm": 0.279296875, "learning_rate": 0.0010705533569685339, "loss": 1.9037, "step": 21744 }, { "epoch": 0.5835390725633319, "grad_norm": 0.28515625, "learning_rate": 0.001070535920778171, "loss": 1.9592, "step": 21745 }, { "epoch": 0.5835659081150708, "grad_norm": 0.28125, "learning_rate": 0.0010705184835555904, "loss": 1.9588, "step": 21746 }, { "epoch": 0.5835927436668098, "grad_norm": 0.283203125, "learning_rate": 0.0010705010453008312, "loss": 1.9451, "step": 21747 }, { "epoch": 0.5836195792185487, "grad_norm": 0.287109375, "learning_rate": 0.0010704836060139308, "loss": 1.9678, "step": 21748 }, { "epoch": 0.5836464147702877, "grad_norm": 0.2890625, "learning_rate": 0.0010704661656949278, "loss": 1.8928, "step": 21749 }, { "epoch": 0.5836732503220267, "grad_norm": 0.283203125, "learning_rate": 0.0010704487243438604, "loss": 2.0234, "step": 21750 }, { "epoch": 0.5837000858737655, "grad_norm": 0.28125, "learning_rate": 0.0010704312819607672, "loss": 1.8865, "step": 21751 }, { "epoch": 0.5837269214255045, "grad_norm": 0.283203125, "learning_rate": 0.0010704138385456859, "loss": 1.9721, "step": 21752 }, { "epoch": 0.5837537569772434, "grad_norm": 0.2890625, "learning_rate": 0.0010703963940986551, "loss": 1.9434, "step": 21753 }, { "epoch": 0.5837805925289824, "grad_norm": 0.294921875, "learning_rate": 0.001070378948619713, "loss": 1.9037, "step": 21754 }, { "epoch": 0.5838074280807214, "grad_norm": 0.28125, "learning_rate": 0.0010703615021088978, "loss": 1.9334, "step": 21755 }, { "epoch": 0.5838342636324603, "grad_norm": 0.279296875, "learning_rate": 0.001070344054566248, "loss": 1.9169, "step": 21756 }, { "epoch": 0.5838610991841993, "grad_norm": 0.2734375, "learning_rate": 0.0010703266059918018, "loss": 1.8915, "step": 21757 }, { "epoch": 0.5838879347359381, "grad_norm": 0.29296875, "learning_rate": 0.0010703091563855972, "loss": 2.0734, "step": 21758 }, { "epoch": 0.5839147702876771, "grad_norm": 0.28515625, "learning_rate": 0.0010702917057476729, "loss": 1.858, "step": 21759 }, { "epoch": 0.583941605839416, "grad_norm": 0.27734375, "learning_rate": 0.0010702742540780667, "loss": 1.8629, "step": 21760 }, { "epoch": 0.583968441391155, "grad_norm": 0.28125, "learning_rate": 0.001070256801376817, "loss": 1.862, "step": 21761 }, { "epoch": 0.583995276942894, "grad_norm": 0.28515625, "learning_rate": 0.0010702393476439626, "loss": 1.8755, "step": 21762 }, { "epoch": 0.5840221124946329, "grad_norm": 0.291015625, "learning_rate": 0.0010702218928795412, "loss": 1.7985, "step": 21763 }, { "epoch": 0.5840489480463719, "grad_norm": 0.291015625, "learning_rate": 0.0010702044370835915, "loss": 1.9025, "step": 21764 }, { "epoch": 0.5840757835981107, "grad_norm": 0.279296875, "learning_rate": 0.0010701869802561515, "loss": 1.9611, "step": 21765 }, { "epoch": 0.5841026191498497, "grad_norm": 0.28125, "learning_rate": 0.0010701695223972595, "loss": 1.9299, "step": 21766 }, { "epoch": 0.5841294547015886, "grad_norm": 0.28125, "learning_rate": 0.001070152063506954, "loss": 1.9525, "step": 21767 }, { "epoch": 0.5841562902533276, "grad_norm": 0.298828125, "learning_rate": 0.0010701346035852733, "loss": 2.0114, "step": 21768 }, { "epoch": 0.5841831258050666, "grad_norm": 0.291015625, "learning_rate": 0.0010701171426322556, "loss": 1.8826, "step": 21769 }, { "epoch": 0.5842099613568055, "grad_norm": 0.287109375, "learning_rate": 0.001070099680647939, "loss": 1.8703, "step": 21770 }, { "epoch": 0.5842367969085445, "grad_norm": 0.291015625, "learning_rate": 0.0010700822176323622, "loss": 1.915, "step": 21771 }, { "epoch": 0.5842636324602833, "grad_norm": 0.3046875, "learning_rate": 0.001070064753585563, "loss": 1.9954, "step": 21772 }, { "epoch": 0.5842904680120223, "grad_norm": 0.28515625, "learning_rate": 0.0010700472885075802, "loss": 1.8756, "step": 21773 }, { "epoch": 0.5843173035637613, "grad_norm": 0.2890625, "learning_rate": 0.001070029822398452, "loss": 1.9881, "step": 21774 }, { "epoch": 0.5843441391155002, "grad_norm": 0.2890625, "learning_rate": 0.0010700123552582167, "loss": 1.9488, "step": 21775 }, { "epoch": 0.5843709746672392, "grad_norm": 0.27734375, "learning_rate": 0.0010699948870869123, "loss": 1.8855, "step": 21776 }, { "epoch": 0.5843978102189781, "grad_norm": 0.27734375, "learning_rate": 0.0010699774178845776, "loss": 1.8631, "step": 21777 }, { "epoch": 0.5844246457707171, "grad_norm": 0.287109375, "learning_rate": 0.0010699599476512508, "loss": 1.9096, "step": 21778 }, { "epoch": 0.584451481322456, "grad_norm": 0.283203125, "learning_rate": 0.00106994247638697, "loss": 1.9558, "step": 21779 }, { "epoch": 0.5844783168741949, "grad_norm": 0.29296875, "learning_rate": 0.0010699250040917738, "loss": 1.8968, "step": 21780 }, { "epoch": 0.5845051524259339, "grad_norm": 0.2890625, "learning_rate": 0.0010699075307657002, "loss": 1.8627, "step": 21781 }, { "epoch": 0.5845319879776728, "grad_norm": 0.291015625, "learning_rate": 0.0010698900564087878, "loss": 1.9204, "step": 21782 }, { "epoch": 0.5845588235294118, "grad_norm": 0.28515625, "learning_rate": 0.001069872581021075, "loss": 1.8633, "step": 21783 }, { "epoch": 0.5845856590811507, "grad_norm": 0.294921875, "learning_rate": 0.0010698551046025998, "loss": 1.9342, "step": 21784 }, { "epoch": 0.5846124946328897, "grad_norm": 0.287109375, "learning_rate": 0.0010698376271534007, "loss": 1.9221, "step": 21785 }, { "epoch": 0.5846393301846285, "grad_norm": 0.287109375, "learning_rate": 0.0010698201486735162, "loss": 1.9096, "step": 21786 }, { "epoch": 0.5846661657363675, "grad_norm": 0.28515625, "learning_rate": 0.0010698026691629847, "loss": 1.8442, "step": 21787 }, { "epoch": 0.5846930012881065, "grad_norm": 0.28515625, "learning_rate": 0.0010697851886218442, "loss": 1.8648, "step": 21788 }, { "epoch": 0.5847198368398454, "grad_norm": 0.287109375, "learning_rate": 0.001069767707050133, "loss": 1.875, "step": 21789 }, { "epoch": 0.5847466723915844, "grad_norm": 0.27734375, "learning_rate": 0.00106975022444789, "loss": 1.8532, "step": 21790 }, { "epoch": 0.5847735079433233, "grad_norm": 0.283203125, "learning_rate": 0.0010697327408151528, "loss": 1.7884, "step": 21791 }, { "epoch": 0.5848003434950623, "grad_norm": 0.28515625, "learning_rate": 0.0010697152561519605, "loss": 1.8432, "step": 21792 }, { "epoch": 0.5848271790468011, "grad_norm": 0.296875, "learning_rate": 0.001069697770458351, "loss": 1.851, "step": 21793 }, { "epoch": 0.5848540145985401, "grad_norm": 0.29296875, "learning_rate": 0.0010696802837343628, "loss": 1.8711, "step": 21794 }, { "epoch": 0.5848808501502791, "grad_norm": 0.2890625, "learning_rate": 0.0010696627959800344, "loss": 1.8784, "step": 21795 }, { "epoch": 0.584907685702018, "grad_norm": 0.287109375, "learning_rate": 0.0010696453071954038, "loss": 1.8582, "step": 21796 }, { "epoch": 0.584934521253757, "grad_norm": 0.283203125, "learning_rate": 0.0010696278173805095, "loss": 1.8174, "step": 21797 }, { "epoch": 0.5849613568054959, "grad_norm": 0.28515625, "learning_rate": 0.0010696103265353902, "loss": 1.9293, "step": 21798 }, { "epoch": 0.5849881923572349, "grad_norm": 0.296875, "learning_rate": 0.0010695928346600837, "loss": 1.8699, "step": 21799 }, { "epoch": 0.5850150279089739, "grad_norm": 0.2890625, "learning_rate": 0.0010695753417546288, "loss": 1.8269, "step": 21800 }, { "epoch": 0.5850418634607127, "grad_norm": 0.296875, "learning_rate": 0.001069557847819064, "loss": 1.7751, "step": 21801 }, { "epoch": 0.5850686990124517, "grad_norm": 0.287109375, "learning_rate": 0.001069540352853427, "loss": 1.8477, "step": 21802 }, { "epoch": 0.5850955345641906, "grad_norm": 0.28515625, "learning_rate": 0.0010695228568577569, "loss": 1.8184, "step": 21803 }, { "epoch": 0.5851223701159296, "grad_norm": 0.302734375, "learning_rate": 0.0010695053598320917, "loss": 1.9889, "step": 21804 }, { "epoch": 0.5851492056676685, "grad_norm": 0.2890625, "learning_rate": 0.0010694878617764699, "loss": 1.8143, "step": 21805 }, { "epoch": 0.5851760412194075, "grad_norm": 0.296875, "learning_rate": 0.0010694703626909297, "loss": 1.8825, "step": 21806 }, { "epoch": 0.5852028767711465, "grad_norm": 0.291015625, "learning_rate": 0.00106945286257551, "loss": 1.8978, "step": 21807 }, { "epoch": 0.5852297123228853, "grad_norm": 0.345703125, "learning_rate": 0.0010694353614302485, "loss": 2.1319, "step": 21808 }, { "epoch": 0.5852565478746243, "grad_norm": 0.322265625, "learning_rate": 0.001069417859255184, "loss": 2.0587, "step": 21809 }, { "epoch": 0.5852833834263632, "grad_norm": 0.3125, "learning_rate": 0.0010694003560503549, "loss": 1.9882, "step": 21810 }, { "epoch": 0.5853102189781022, "grad_norm": 0.322265625, "learning_rate": 0.0010693828518157996, "loss": 2.0934, "step": 21811 }, { "epoch": 0.5853370545298411, "grad_norm": 0.294921875, "learning_rate": 0.0010693653465515562, "loss": 2.0577, "step": 21812 }, { "epoch": 0.5853638900815801, "grad_norm": 0.30859375, "learning_rate": 0.0010693478402576634, "loss": 2.0225, "step": 21813 }, { "epoch": 0.5853907256333191, "grad_norm": 0.302734375, "learning_rate": 0.0010693303329341595, "loss": 2.041, "step": 21814 }, { "epoch": 0.5854175611850579, "grad_norm": 0.298828125, "learning_rate": 0.001069312824581083, "loss": 2.0316, "step": 21815 }, { "epoch": 0.5854443967367969, "grad_norm": 0.287109375, "learning_rate": 0.0010692953151984722, "loss": 1.932, "step": 21816 }, { "epoch": 0.5854712322885358, "grad_norm": 0.294921875, "learning_rate": 0.0010692778047863656, "loss": 1.9989, "step": 21817 }, { "epoch": 0.5854980678402748, "grad_norm": 0.298828125, "learning_rate": 0.0010692602933448014, "loss": 1.9794, "step": 21818 }, { "epoch": 0.5855249033920137, "grad_norm": 0.29296875, "learning_rate": 0.0010692427808738183, "loss": 1.9406, "step": 21819 }, { "epoch": 0.5855517389437527, "grad_norm": 0.283203125, "learning_rate": 0.0010692252673734546, "loss": 1.9968, "step": 21820 }, { "epoch": 0.5855785744954917, "grad_norm": 0.2890625, "learning_rate": 0.0010692077528437487, "loss": 2.0149, "step": 21821 }, { "epoch": 0.5856054100472305, "grad_norm": 0.28515625, "learning_rate": 0.001069190237284739, "loss": 2.0185, "step": 21822 }, { "epoch": 0.5856322455989695, "grad_norm": 0.28515625, "learning_rate": 0.001069172720696464, "loss": 1.9793, "step": 21823 }, { "epoch": 0.5856590811507084, "grad_norm": 0.283203125, "learning_rate": 0.001069155203078962, "loss": 1.93, "step": 21824 }, { "epoch": 0.5856859167024474, "grad_norm": 0.287109375, "learning_rate": 0.0010691376844322717, "loss": 1.9551, "step": 21825 }, { "epoch": 0.5857127522541864, "grad_norm": 0.279296875, "learning_rate": 0.0010691201647564312, "loss": 1.9687, "step": 21826 }, { "epoch": 0.5857395878059253, "grad_norm": 0.28125, "learning_rate": 0.0010691026440514792, "loss": 1.8998, "step": 21827 }, { "epoch": 0.5857664233576643, "grad_norm": 0.28125, "learning_rate": 0.001069085122317454, "loss": 1.9013, "step": 21828 }, { "epoch": 0.5857932589094031, "grad_norm": 0.28125, "learning_rate": 0.001069067599554394, "loss": 1.9886, "step": 21829 }, { "epoch": 0.5858200944611421, "grad_norm": 0.29296875, "learning_rate": 0.0010690500757623376, "loss": 2.0598, "step": 21830 }, { "epoch": 0.585846930012881, "grad_norm": 0.291015625, "learning_rate": 0.001069032550941323, "loss": 1.9581, "step": 21831 }, { "epoch": 0.58587376556462, "grad_norm": 0.279296875, "learning_rate": 0.0010690150250913895, "loss": 1.9807, "step": 21832 }, { "epoch": 0.585900601116359, "grad_norm": 0.28515625, "learning_rate": 0.0010689974982125748, "loss": 2.0305, "step": 21833 }, { "epoch": 0.5859274366680979, "grad_norm": 0.279296875, "learning_rate": 0.0010689799703049176, "loss": 1.9748, "step": 21834 }, { "epoch": 0.5859542722198369, "grad_norm": 0.27734375, "learning_rate": 0.0010689624413684564, "loss": 1.9361, "step": 21835 }, { "epoch": 0.5859811077715757, "grad_norm": 0.28125, "learning_rate": 0.0010689449114032296, "loss": 2.0127, "step": 21836 }, { "epoch": 0.5860079433233147, "grad_norm": 0.287109375, "learning_rate": 0.0010689273804092756, "loss": 1.898, "step": 21837 }, { "epoch": 0.5860347788750536, "grad_norm": 0.29296875, "learning_rate": 0.001068909848386633, "loss": 1.9715, "step": 21838 }, { "epoch": 0.5860616144267926, "grad_norm": 0.27734375, "learning_rate": 0.0010688923153353398, "loss": 1.8978, "step": 21839 }, { "epoch": 0.5860884499785316, "grad_norm": 0.28515625, "learning_rate": 0.0010688747812554352, "loss": 1.8999, "step": 21840 }, { "epoch": 0.5861152855302705, "grad_norm": 0.287109375, "learning_rate": 0.0010688572461469569, "loss": 1.8944, "step": 21841 }, { "epoch": 0.5861421210820095, "grad_norm": 0.298828125, "learning_rate": 0.001068839710009944, "loss": 2.0332, "step": 21842 }, { "epoch": 0.5861689566337484, "grad_norm": 0.2890625, "learning_rate": 0.0010688221728444346, "loss": 1.9353, "step": 21843 }, { "epoch": 0.5861957921854873, "grad_norm": 0.28125, "learning_rate": 0.0010688046346504674, "loss": 1.8767, "step": 21844 }, { "epoch": 0.5862226277372263, "grad_norm": 0.279296875, "learning_rate": 0.0010687870954280807, "loss": 1.9343, "step": 21845 }, { "epoch": 0.5862494632889652, "grad_norm": 0.279296875, "learning_rate": 0.0010687695551773128, "loss": 1.8703, "step": 21846 }, { "epoch": 0.5862762988407042, "grad_norm": 0.271484375, "learning_rate": 0.0010687520138982028, "loss": 1.8613, "step": 21847 }, { "epoch": 0.5863031343924431, "grad_norm": 0.2890625, "learning_rate": 0.0010687344715907886, "loss": 1.9525, "step": 21848 }, { "epoch": 0.5863299699441821, "grad_norm": 0.279296875, "learning_rate": 0.0010687169282551088, "loss": 1.9237, "step": 21849 }, { "epoch": 0.586356805495921, "grad_norm": 0.28125, "learning_rate": 0.001068699383891202, "loss": 1.9314, "step": 21850 }, { "epoch": 0.5863836410476599, "grad_norm": 0.291015625, "learning_rate": 0.0010686818384991069, "loss": 2.002, "step": 21851 }, { "epoch": 0.5864104765993989, "grad_norm": 0.2890625, "learning_rate": 0.0010686642920788613, "loss": 2.011, "step": 21852 }, { "epoch": 0.5864373121511378, "grad_norm": 0.283203125, "learning_rate": 0.0010686467446305043, "loss": 1.9363, "step": 21853 }, { "epoch": 0.5864641477028768, "grad_norm": 0.283203125, "learning_rate": 0.0010686291961540743, "loss": 1.9073, "step": 21854 }, { "epoch": 0.5864909832546157, "grad_norm": 0.283203125, "learning_rate": 0.0010686116466496099, "loss": 1.9339, "step": 21855 }, { "epoch": 0.5865178188063547, "grad_norm": 0.2734375, "learning_rate": 0.001068594096117149, "loss": 1.8779, "step": 21856 }, { "epoch": 0.5865446543580936, "grad_norm": 0.28125, "learning_rate": 0.0010685765445567308, "loss": 1.9178, "step": 21857 }, { "epoch": 0.5865714899098325, "grad_norm": 0.28515625, "learning_rate": 0.0010685589919683933, "loss": 1.9681, "step": 21858 }, { "epoch": 0.5865983254615715, "grad_norm": 0.29296875, "learning_rate": 0.0010685414383521754, "loss": 1.9364, "step": 21859 }, { "epoch": 0.5866251610133104, "grad_norm": 0.294921875, "learning_rate": 0.0010685238837081154, "loss": 1.916, "step": 21860 }, { "epoch": 0.5866519965650494, "grad_norm": 0.283203125, "learning_rate": 0.001068506328036252, "loss": 1.935, "step": 21861 }, { "epoch": 0.5866788321167883, "grad_norm": 0.275390625, "learning_rate": 0.001068488771336623, "loss": 1.8672, "step": 21862 }, { "epoch": 0.5867056676685273, "grad_norm": 0.28515625, "learning_rate": 0.0010684712136092682, "loss": 1.9573, "step": 21863 }, { "epoch": 0.5867325032202662, "grad_norm": 0.29296875, "learning_rate": 0.001068453654854225, "loss": 1.9231, "step": 21864 }, { "epoch": 0.5867593387720051, "grad_norm": 0.27734375, "learning_rate": 0.0010684360950715323, "loss": 1.8802, "step": 21865 }, { "epoch": 0.5867861743237441, "grad_norm": 0.283203125, "learning_rate": 0.0010684185342612286, "loss": 1.8546, "step": 21866 }, { "epoch": 0.586813009875483, "grad_norm": 0.291015625, "learning_rate": 0.0010684009724233525, "loss": 1.8733, "step": 21867 }, { "epoch": 0.586839845427222, "grad_norm": 0.29296875, "learning_rate": 0.0010683834095579425, "loss": 1.9653, "step": 21868 }, { "epoch": 0.5868666809789609, "grad_norm": 0.283203125, "learning_rate": 0.001068365845665037, "loss": 1.879, "step": 21869 }, { "epoch": 0.5868935165306999, "grad_norm": 0.294921875, "learning_rate": 0.0010683482807446747, "loss": 1.9093, "step": 21870 }, { "epoch": 0.5869203520824389, "grad_norm": 0.291015625, "learning_rate": 0.0010683307147968941, "loss": 1.9149, "step": 21871 }, { "epoch": 0.5869471876341777, "grad_norm": 0.27734375, "learning_rate": 0.0010683131478217336, "loss": 1.8494, "step": 21872 }, { "epoch": 0.5869740231859167, "grad_norm": 0.28125, "learning_rate": 0.001068295579819232, "loss": 1.8601, "step": 21873 }, { "epoch": 0.5870008587376556, "grad_norm": 0.28515625, "learning_rate": 0.0010682780107894275, "loss": 1.8931, "step": 21874 }, { "epoch": 0.5870276942893946, "grad_norm": 0.294921875, "learning_rate": 0.001068260440732359, "loss": 1.9029, "step": 21875 }, { "epoch": 0.5870545298411335, "grad_norm": 0.287109375, "learning_rate": 0.0010682428696480646, "loss": 1.8468, "step": 21876 }, { "epoch": 0.5870813653928725, "grad_norm": 0.296875, "learning_rate": 0.0010682252975365834, "loss": 1.9562, "step": 21877 }, { "epoch": 0.5871082009446115, "grad_norm": 0.283203125, "learning_rate": 0.0010682077243979534, "loss": 1.8556, "step": 21878 }, { "epoch": 0.5871350364963503, "grad_norm": 0.28515625, "learning_rate": 0.0010681901502322135, "loss": 1.8935, "step": 21879 }, { "epoch": 0.5871618720480893, "grad_norm": 0.29296875, "learning_rate": 0.001068172575039402, "loss": 1.9871, "step": 21880 }, { "epoch": 0.5871887075998282, "grad_norm": 0.294921875, "learning_rate": 0.001068154998819558, "loss": 2.0127, "step": 21881 }, { "epoch": 0.5872155431515672, "grad_norm": 0.283203125, "learning_rate": 0.0010681374215727195, "loss": 1.7956, "step": 21882 }, { "epoch": 0.5872423787033061, "grad_norm": 0.287109375, "learning_rate": 0.001068119843298925, "loss": 1.8347, "step": 21883 }, { "epoch": 0.5872692142550451, "grad_norm": 0.2890625, "learning_rate": 0.0010681022639982133, "loss": 1.8877, "step": 21884 }, { "epoch": 0.5872960498067841, "grad_norm": 0.2890625, "learning_rate": 0.0010680846836706233, "loss": 1.8508, "step": 21885 }, { "epoch": 0.587322885358523, "grad_norm": 0.318359375, "learning_rate": 0.0010680671023161929, "loss": 1.9575, "step": 21886 }, { "epoch": 0.5873497209102619, "grad_norm": 0.31640625, "learning_rate": 0.0010680495199349611, "loss": 2.0656, "step": 21887 }, { "epoch": 0.5873765564620008, "grad_norm": 0.318359375, "learning_rate": 0.0010680319365269663, "loss": 2.0137, "step": 21888 }, { "epoch": 0.5874033920137398, "grad_norm": 0.306640625, "learning_rate": 0.0010680143520922472, "loss": 2.0597, "step": 21889 }, { "epoch": 0.5874302275654787, "grad_norm": 0.302734375, "learning_rate": 0.0010679967666308422, "loss": 1.9935, "step": 21890 }, { "epoch": 0.5874570631172177, "grad_norm": 0.294921875, "learning_rate": 0.0010679791801427902, "loss": 1.9435, "step": 21891 }, { "epoch": 0.5874838986689567, "grad_norm": 0.306640625, "learning_rate": 0.0010679615926281294, "loss": 1.9571, "step": 21892 }, { "epoch": 0.5875107342206956, "grad_norm": 0.291015625, "learning_rate": 0.0010679440040868985, "loss": 2.0542, "step": 21893 }, { "epoch": 0.5875375697724345, "grad_norm": 0.318359375, "learning_rate": 0.001067926414519136, "loss": 2.0634, "step": 21894 }, { "epoch": 0.5875644053241734, "grad_norm": 0.291015625, "learning_rate": 0.001067908823924881, "loss": 2.0642, "step": 21895 }, { "epoch": 0.5875912408759124, "grad_norm": 0.283203125, "learning_rate": 0.0010678912323041714, "loss": 1.9834, "step": 21896 }, { "epoch": 0.5876180764276514, "grad_norm": 0.291015625, "learning_rate": 0.0010678736396570463, "loss": 2.0621, "step": 21897 }, { "epoch": 0.5876449119793903, "grad_norm": 0.2890625, "learning_rate": 0.0010678560459835439, "loss": 2.1132, "step": 21898 }, { "epoch": 0.5876717475311293, "grad_norm": 0.29296875, "learning_rate": 0.001067838451283703, "loss": 2.0076, "step": 21899 }, { "epoch": 0.5876985830828682, "grad_norm": 0.287109375, "learning_rate": 0.0010678208555575624, "loss": 2.074, "step": 21900 }, { "epoch": 0.5877254186346071, "grad_norm": 0.26953125, "learning_rate": 0.0010678032588051602, "loss": 1.9216, "step": 21901 }, { "epoch": 0.587752254186346, "grad_norm": 0.283203125, "learning_rate": 0.0010677856610265355, "loss": 1.9638, "step": 21902 }, { "epoch": 0.587779089738085, "grad_norm": 0.27734375, "learning_rate": 0.0010677680622217267, "loss": 1.897, "step": 21903 }, { "epoch": 0.587805925289824, "grad_norm": 0.26953125, "learning_rate": 0.001067750462390772, "loss": 1.9154, "step": 21904 }, { "epoch": 0.5878327608415629, "grad_norm": 0.2890625, "learning_rate": 0.0010677328615337107, "loss": 1.9212, "step": 21905 }, { "epoch": 0.5878595963933019, "grad_norm": 0.296875, "learning_rate": 0.0010677152596505813, "loss": 2.0815, "step": 21906 }, { "epoch": 0.5878864319450408, "grad_norm": 0.29296875, "learning_rate": 0.001067697656741422, "loss": 2.0236, "step": 21907 }, { "epoch": 0.5879132674967797, "grad_norm": 0.275390625, "learning_rate": 0.0010676800528062714, "loss": 1.8729, "step": 21908 }, { "epoch": 0.5879401030485186, "grad_norm": 0.283203125, "learning_rate": 0.0010676624478451688, "loss": 1.9312, "step": 21909 }, { "epoch": 0.5879669386002576, "grad_norm": 0.283203125, "learning_rate": 0.0010676448418581522, "loss": 1.9301, "step": 21910 }, { "epoch": 0.5879937741519966, "grad_norm": 0.279296875, "learning_rate": 0.0010676272348452602, "loss": 1.9195, "step": 21911 }, { "epoch": 0.5880206097037355, "grad_norm": 0.279296875, "learning_rate": 0.001067609626806532, "loss": 2.0563, "step": 21912 }, { "epoch": 0.5880474452554745, "grad_norm": 0.27734375, "learning_rate": 0.0010675920177420056, "loss": 1.8972, "step": 21913 }, { "epoch": 0.5880742808072134, "grad_norm": 0.283203125, "learning_rate": 0.0010675744076517198, "loss": 2.0545, "step": 21914 }, { "epoch": 0.5881011163589523, "grad_norm": 0.28125, "learning_rate": 0.0010675567965357134, "loss": 1.9575, "step": 21915 }, { "epoch": 0.5881279519106913, "grad_norm": 0.28515625, "learning_rate": 0.0010675391843940252, "loss": 1.9639, "step": 21916 }, { "epoch": 0.5881547874624302, "grad_norm": 0.279296875, "learning_rate": 0.0010675215712266932, "loss": 1.9553, "step": 21917 }, { "epoch": 0.5881816230141692, "grad_norm": 0.275390625, "learning_rate": 0.0010675039570337567, "loss": 1.966, "step": 21918 }, { "epoch": 0.5882084585659081, "grad_norm": 0.28515625, "learning_rate": 0.001067486341815254, "loss": 1.8907, "step": 21919 }, { "epoch": 0.5882352941176471, "grad_norm": 0.29296875, "learning_rate": 0.0010674687255712236, "loss": 1.9984, "step": 21920 }, { "epoch": 0.588262129669386, "grad_norm": 0.28125, "learning_rate": 0.0010674511083017045, "loss": 1.9194, "step": 21921 }, { "epoch": 0.588288965221125, "grad_norm": 0.279296875, "learning_rate": 0.0010674334900067353, "loss": 1.9064, "step": 21922 }, { "epoch": 0.5883158007728639, "grad_norm": 0.29296875, "learning_rate": 0.0010674158706863543, "loss": 2.0023, "step": 21923 }, { "epoch": 0.5883426363246028, "grad_norm": 0.28515625, "learning_rate": 0.0010673982503406007, "loss": 1.9718, "step": 21924 }, { "epoch": 0.5883694718763418, "grad_norm": 0.26953125, "learning_rate": 0.0010673806289695127, "loss": 1.8598, "step": 21925 }, { "epoch": 0.5883963074280807, "grad_norm": 0.296875, "learning_rate": 0.001067363006573129, "loss": 1.9333, "step": 21926 }, { "epoch": 0.5884231429798197, "grad_norm": 0.28515625, "learning_rate": 0.0010673453831514885, "loss": 2.0129, "step": 21927 }, { "epoch": 0.5884499785315586, "grad_norm": 0.27734375, "learning_rate": 0.0010673277587046298, "loss": 1.9465, "step": 21928 }, { "epoch": 0.5884768140832976, "grad_norm": 0.279296875, "learning_rate": 0.0010673101332325911, "loss": 1.9588, "step": 21929 }, { "epoch": 0.5885036496350365, "grad_norm": 0.28515625, "learning_rate": 0.001067292506735412, "loss": 1.9061, "step": 21930 }, { "epoch": 0.5885304851867754, "grad_norm": 0.28125, "learning_rate": 0.0010672748792131303, "loss": 1.9037, "step": 21931 }, { "epoch": 0.5885573207385144, "grad_norm": 0.28515625, "learning_rate": 0.001067257250665785, "loss": 1.9382, "step": 21932 }, { "epoch": 0.5885841562902533, "grad_norm": 0.28125, "learning_rate": 0.0010672396210934148, "loss": 1.9152, "step": 21933 }, { "epoch": 0.5886109918419923, "grad_norm": 0.275390625, "learning_rate": 0.0010672219904960584, "loss": 1.9086, "step": 21934 }, { "epoch": 0.5886378273937312, "grad_norm": 0.294921875, "learning_rate": 0.0010672043588737543, "loss": 1.9824, "step": 21935 }, { "epoch": 0.5886646629454702, "grad_norm": 0.283203125, "learning_rate": 0.0010671867262265413, "loss": 1.8342, "step": 21936 }, { "epoch": 0.5886914984972091, "grad_norm": 0.291015625, "learning_rate": 0.001067169092554458, "loss": 1.9869, "step": 21937 }, { "epoch": 0.588718334048948, "grad_norm": 0.287109375, "learning_rate": 0.0010671514578575432, "loss": 1.8543, "step": 21938 }, { "epoch": 0.588745169600687, "grad_norm": 0.287109375, "learning_rate": 0.0010671338221358358, "loss": 1.8785, "step": 21939 }, { "epoch": 0.5887720051524259, "grad_norm": 0.28125, "learning_rate": 0.0010671161853893739, "loss": 1.903, "step": 21940 }, { "epoch": 0.5887988407041649, "grad_norm": 0.2890625, "learning_rate": 0.0010670985476181967, "loss": 1.9478, "step": 21941 }, { "epoch": 0.5888256762559039, "grad_norm": 0.28125, "learning_rate": 0.0010670809088223427, "loss": 1.8845, "step": 21942 }, { "epoch": 0.5888525118076428, "grad_norm": 0.28515625, "learning_rate": 0.0010670632690018503, "loss": 1.9997, "step": 21943 }, { "epoch": 0.5888793473593817, "grad_norm": 0.28515625, "learning_rate": 0.001067045628156759, "loss": 1.9068, "step": 21944 }, { "epoch": 0.5889061829111206, "grad_norm": 0.28515625, "learning_rate": 0.0010670279862871066, "loss": 1.8394, "step": 21945 }, { "epoch": 0.5889330184628596, "grad_norm": 0.28125, "learning_rate": 0.0010670103433929325, "loss": 1.8839, "step": 21946 }, { "epoch": 0.5889598540145985, "grad_norm": 0.28515625, "learning_rate": 0.0010669926994742748, "loss": 1.8727, "step": 21947 }, { "epoch": 0.5889866895663375, "grad_norm": 0.287109375, "learning_rate": 0.0010669750545311727, "loss": 1.8751, "step": 21948 }, { "epoch": 0.5890135251180765, "grad_norm": 0.283203125, "learning_rate": 0.0010669574085636646, "loss": 1.9454, "step": 21949 }, { "epoch": 0.5890403606698154, "grad_norm": 0.275390625, "learning_rate": 0.0010669397615717896, "loss": 1.8106, "step": 21950 }, { "epoch": 0.5890671962215543, "grad_norm": 0.2890625, "learning_rate": 0.0010669221135555858, "loss": 1.8452, "step": 21951 }, { "epoch": 0.5890940317732932, "grad_norm": 0.29296875, "learning_rate": 0.0010669044645150924, "loss": 1.957, "step": 21952 }, { "epoch": 0.5891208673250322, "grad_norm": 0.283203125, "learning_rate": 0.0010668868144503478, "loss": 1.8167, "step": 21953 }, { "epoch": 0.5891477028767711, "grad_norm": 0.275390625, "learning_rate": 0.0010668691633613913, "loss": 1.8227, "step": 21954 }, { "epoch": 0.5891745384285101, "grad_norm": 0.29296875, "learning_rate": 0.001066851511248261, "loss": 1.8702, "step": 21955 }, { "epoch": 0.5892013739802491, "grad_norm": 0.283203125, "learning_rate": 0.0010668338581109958, "loss": 1.8098, "step": 21956 }, { "epoch": 0.589228209531988, "grad_norm": 0.294921875, "learning_rate": 0.0010668162039496343, "loss": 1.9136, "step": 21957 }, { "epoch": 0.589255045083727, "grad_norm": 0.283203125, "learning_rate": 0.0010667985487642156, "loss": 1.8263, "step": 21958 }, { "epoch": 0.5892818806354658, "grad_norm": 0.3046875, "learning_rate": 0.0010667808925547782, "loss": 1.9186, "step": 21959 }, { "epoch": 0.5893087161872048, "grad_norm": 0.294921875, "learning_rate": 0.0010667632353213608, "loss": 1.882, "step": 21960 }, { "epoch": 0.5893355517389438, "grad_norm": 0.283203125, "learning_rate": 0.001066745577064002, "loss": 1.8911, "step": 21961 }, { "epoch": 0.5893623872906827, "grad_norm": 0.287109375, "learning_rate": 0.001066727917782741, "loss": 1.8484, "step": 21962 }, { "epoch": 0.5893892228424217, "grad_norm": 0.287109375, "learning_rate": 0.0010667102574776163, "loss": 1.8825, "step": 21963 }, { "epoch": 0.5894160583941606, "grad_norm": 0.302734375, "learning_rate": 0.0010666925961486664, "loss": 1.907, "step": 21964 }, { "epoch": 0.5894428939458995, "grad_norm": 0.333984375, "learning_rate": 0.0010666749337959304, "loss": 2.079, "step": 21965 }, { "epoch": 0.5894697294976384, "grad_norm": 0.33203125, "learning_rate": 0.0010666572704194468, "loss": 2.0512, "step": 21966 }, { "epoch": 0.5894965650493774, "grad_norm": 0.3046875, "learning_rate": 0.0010666396060192546, "loss": 1.9364, "step": 21967 }, { "epoch": 0.5895234006011164, "grad_norm": 0.302734375, "learning_rate": 0.0010666219405953922, "loss": 2.0369, "step": 21968 }, { "epoch": 0.5895502361528553, "grad_norm": 0.294921875, "learning_rate": 0.0010666042741478987, "loss": 1.9408, "step": 21969 }, { "epoch": 0.5895770717045943, "grad_norm": 0.306640625, "learning_rate": 0.0010665866066768125, "loss": 2.0739, "step": 21970 }, { "epoch": 0.5896039072563332, "grad_norm": 0.302734375, "learning_rate": 0.0010665689381821725, "loss": 2.0572, "step": 21971 }, { "epoch": 0.5896307428080722, "grad_norm": 0.291015625, "learning_rate": 0.0010665512686640177, "loss": 1.9875, "step": 21972 }, { "epoch": 0.589657578359811, "grad_norm": 0.28515625, "learning_rate": 0.0010665335981223866, "loss": 2.1142, "step": 21973 }, { "epoch": 0.58968441391155, "grad_norm": 0.28515625, "learning_rate": 0.0010665159265573184, "loss": 2.0289, "step": 21974 }, { "epoch": 0.589711249463289, "grad_norm": 0.291015625, "learning_rate": 0.001066498253968851, "loss": 2.0654, "step": 21975 }, { "epoch": 0.5897380850150279, "grad_norm": 0.298828125, "learning_rate": 0.001066480580357024, "loss": 2.0787, "step": 21976 }, { "epoch": 0.5897649205667669, "grad_norm": 0.28125, "learning_rate": 0.0010664629057218758, "loss": 2.1282, "step": 21977 }, { "epoch": 0.5897917561185058, "grad_norm": 0.291015625, "learning_rate": 0.001066445230063445, "loss": 2.0764, "step": 21978 }, { "epoch": 0.5898185916702448, "grad_norm": 0.29296875, "learning_rate": 0.0010664275533817707, "loss": 2.0309, "step": 21979 }, { "epoch": 0.5898454272219836, "grad_norm": 0.279296875, "learning_rate": 0.0010664098756768916, "loss": 1.9454, "step": 21980 }, { "epoch": 0.5898722627737226, "grad_norm": 0.275390625, "learning_rate": 0.0010663921969488465, "loss": 1.8755, "step": 21981 }, { "epoch": 0.5898990983254616, "grad_norm": 0.275390625, "learning_rate": 0.0010663745171976742, "loss": 1.9943, "step": 21982 }, { "epoch": 0.5899259338772005, "grad_norm": 0.291015625, "learning_rate": 0.0010663568364234135, "loss": 2.025, "step": 21983 }, { "epoch": 0.5899527694289395, "grad_norm": 0.283203125, "learning_rate": 0.0010663391546261029, "loss": 1.999, "step": 21984 }, { "epoch": 0.5899796049806784, "grad_norm": 0.279296875, "learning_rate": 0.0010663214718057811, "loss": 1.9681, "step": 21985 }, { "epoch": 0.5900064405324174, "grad_norm": 0.28125, "learning_rate": 0.0010663037879624876, "loss": 2.0073, "step": 21986 }, { "epoch": 0.5900332760841563, "grad_norm": 0.271484375, "learning_rate": 0.001066286103096261, "loss": 1.9834, "step": 21987 }, { "epoch": 0.5900601116358952, "grad_norm": 0.275390625, "learning_rate": 0.0010662684172071395, "loss": 1.9891, "step": 21988 }, { "epoch": 0.5900869471876342, "grad_norm": 0.28125, "learning_rate": 0.0010662507302951622, "loss": 1.9449, "step": 21989 }, { "epoch": 0.5901137827393731, "grad_norm": 0.275390625, "learning_rate": 0.001066233042360368, "loss": 1.9499, "step": 21990 }, { "epoch": 0.5901406182911121, "grad_norm": 0.28515625, "learning_rate": 0.0010662153534027959, "loss": 1.9901, "step": 21991 }, { "epoch": 0.590167453842851, "grad_norm": 0.279296875, "learning_rate": 0.0010661976634224843, "loss": 1.9034, "step": 21992 }, { "epoch": 0.59019428939459, "grad_norm": 0.27734375, "learning_rate": 0.0010661799724194722, "loss": 1.8679, "step": 21993 }, { "epoch": 0.590221124946329, "grad_norm": 0.27734375, "learning_rate": 0.0010661622803937985, "loss": 1.9239, "step": 21994 }, { "epoch": 0.5902479604980678, "grad_norm": 0.287109375, "learning_rate": 0.0010661445873455017, "loss": 1.9356, "step": 21995 }, { "epoch": 0.5902747960498068, "grad_norm": 0.279296875, "learning_rate": 0.001066126893274621, "loss": 1.9651, "step": 21996 }, { "epoch": 0.5903016316015457, "grad_norm": 0.28515625, "learning_rate": 0.001066109198181195, "loss": 1.9924, "step": 21997 }, { "epoch": 0.5903284671532847, "grad_norm": 0.28125, "learning_rate": 0.0010660915020652625, "loss": 1.98, "step": 21998 }, { "epoch": 0.5903553027050236, "grad_norm": 0.28125, "learning_rate": 0.0010660738049268624, "loss": 1.9908, "step": 21999 }, { "epoch": 0.5903821382567626, "grad_norm": 0.28125, "learning_rate": 0.0010660561067660334, "loss": 1.8744, "step": 22000 }, { "epoch": 0.5904089738085015, "grad_norm": 0.275390625, "learning_rate": 0.0010660384075828148, "loss": 1.7954, "step": 22001 }, { "epoch": 0.5904358093602404, "grad_norm": 0.294921875, "learning_rate": 0.0010660207073772447, "loss": 1.962, "step": 22002 }, { "epoch": 0.5904626449119794, "grad_norm": 0.28515625, "learning_rate": 0.0010660030061493623, "loss": 2.0049, "step": 22003 }, { "epoch": 0.5904894804637183, "grad_norm": 0.275390625, "learning_rate": 0.0010659853038992063, "loss": 1.8826, "step": 22004 }, { "epoch": 0.5905163160154573, "grad_norm": 0.28515625, "learning_rate": 0.001065967600626816, "loss": 1.9272, "step": 22005 }, { "epoch": 0.5905431515671962, "grad_norm": 0.275390625, "learning_rate": 0.0010659498963322296, "loss": 1.87, "step": 22006 }, { "epoch": 0.5905699871189352, "grad_norm": 0.29296875, "learning_rate": 0.001065932191015486, "loss": 1.9606, "step": 22007 }, { "epoch": 0.5905968226706741, "grad_norm": 0.283203125, "learning_rate": 0.0010659144846766246, "loss": 2.0114, "step": 22008 }, { "epoch": 0.590623658222413, "grad_norm": 0.294921875, "learning_rate": 0.001065896777315684, "loss": 2.0027, "step": 22009 }, { "epoch": 0.590650493774152, "grad_norm": 0.283203125, "learning_rate": 0.0010658790689327027, "loss": 1.8686, "step": 22010 }, { "epoch": 0.5906773293258909, "grad_norm": 0.28515625, "learning_rate": 0.0010658613595277199, "loss": 1.939, "step": 22011 }, { "epoch": 0.5907041648776299, "grad_norm": 0.283203125, "learning_rate": 0.0010658436491007741, "loss": 1.8276, "step": 22012 }, { "epoch": 0.5907310004293689, "grad_norm": 0.291015625, "learning_rate": 0.0010658259376519045, "loss": 1.9186, "step": 22013 }, { "epoch": 0.5907578359811078, "grad_norm": 0.283203125, "learning_rate": 0.00106580822518115, "loss": 1.8973, "step": 22014 }, { "epoch": 0.5907846715328468, "grad_norm": 0.28515625, "learning_rate": 0.001065790511688549, "loss": 1.9197, "step": 22015 }, { "epoch": 0.5908115070845856, "grad_norm": 0.275390625, "learning_rate": 0.0010657727971741411, "loss": 1.9184, "step": 22016 }, { "epoch": 0.5908383426363246, "grad_norm": 0.2890625, "learning_rate": 0.0010657550816379644, "loss": 1.8802, "step": 22017 }, { "epoch": 0.5908651781880635, "grad_norm": 0.30078125, "learning_rate": 0.0010657373650800582, "loss": 1.9608, "step": 22018 }, { "epoch": 0.5908920137398025, "grad_norm": 0.298828125, "learning_rate": 0.0010657196475004611, "loss": 1.9945, "step": 22019 }, { "epoch": 0.5909188492915415, "grad_norm": 0.2890625, "learning_rate": 0.0010657019288992122, "loss": 1.8876, "step": 22020 }, { "epoch": 0.5909456848432804, "grad_norm": 0.287109375, "learning_rate": 0.0010656842092763503, "loss": 1.9431, "step": 22021 }, { "epoch": 0.5909725203950194, "grad_norm": 0.279296875, "learning_rate": 0.0010656664886319143, "loss": 1.8997, "step": 22022 }, { "epoch": 0.5909993559467582, "grad_norm": 0.2734375, "learning_rate": 0.0010656487669659428, "loss": 1.8081, "step": 22023 }, { "epoch": 0.5910261914984972, "grad_norm": 0.294921875, "learning_rate": 0.0010656310442784752, "loss": 1.9688, "step": 22024 }, { "epoch": 0.5910530270502361, "grad_norm": 0.2890625, "learning_rate": 0.0010656133205695498, "loss": 1.8565, "step": 22025 }, { "epoch": 0.5910798626019751, "grad_norm": 0.283203125, "learning_rate": 0.0010655955958392058, "loss": 1.8826, "step": 22026 }, { "epoch": 0.5911066981537141, "grad_norm": 0.2890625, "learning_rate": 0.0010655778700874824, "loss": 1.9122, "step": 22027 }, { "epoch": 0.591133533705453, "grad_norm": 0.29296875, "learning_rate": 0.0010655601433144177, "loss": 1.9833, "step": 22028 }, { "epoch": 0.591160369257192, "grad_norm": 0.287109375, "learning_rate": 0.0010655424155200511, "loss": 1.8786, "step": 22029 }, { "epoch": 0.5911872048089308, "grad_norm": 0.2734375, "learning_rate": 0.0010655246867044216, "loss": 1.8233, "step": 22030 }, { "epoch": 0.5912140403606698, "grad_norm": 0.2890625, "learning_rate": 0.0010655069568675677, "loss": 1.8529, "step": 22031 }, { "epoch": 0.5912408759124088, "grad_norm": 0.294921875, "learning_rate": 0.0010654892260095288, "loss": 1.9001, "step": 22032 }, { "epoch": 0.5912677114641477, "grad_norm": 0.28515625, "learning_rate": 0.001065471494130343, "loss": 1.854, "step": 22033 }, { "epoch": 0.5912945470158867, "grad_norm": 0.283203125, "learning_rate": 0.0010654537612300502, "loss": 1.8217, "step": 22034 }, { "epoch": 0.5913213825676256, "grad_norm": 0.29296875, "learning_rate": 0.0010654360273086884, "loss": 1.8735, "step": 22035 }, { "epoch": 0.5913482181193646, "grad_norm": 0.2890625, "learning_rate": 0.001065418292366297, "loss": 1.854, "step": 22036 }, { "epoch": 0.5913750536711034, "grad_norm": 0.287109375, "learning_rate": 0.0010654005564029148, "loss": 1.734, "step": 22037 }, { "epoch": 0.5914018892228424, "grad_norm": 0.296875, "learning_rate": 0.0010653828194185806, "loss": 1.9664, "step": 22038 }, { "epoch": 0.5914287247745814, "grad_norm": 0.3046875, "learning_rate": 0.0010653650814133337, "loss": 1.9584, "step": 22039 }, { "epoch": 0.5914555603263203, "grad_norm": 0.28125, "learning_rate": 0.0010653473423872127, "loss": 1.8636, "step": 22040 }, { "epoch": 0.5914823958780593, "grad_norm": 0.27734375, "learning_rate": 0.0010653296023402563, "loss": 1.782, "step": 22041 }, { "epoch": 0.5915092314297982, "grad_norm": 0.318359375, "learning_rate": 0.0010653118612725036, "loss": 2.028, "step": 22042 }, { "epoch": 0.5915360669815372, "grad_norm": 0.318359375, "learning_rate": 0.0010652941191839939, "loss": 2.006, "step": 22043 }, { "epoch": 0.591562902533276, "grad_norm": 0.3125, "learning_rate": 0.0010652763760747655, "loss": 2.0423, "step": 22044 }, { "epoch": 0.591589738085015, "grad_norm": 0.30078125, "learning_rate": 0.0010652586319448577, "loss": 2.0083, "step": 22045 }, { "epoch": 0.591616573636754, "grad_norm": 0.3046875, "learning_rate": 0.0010652408867943095, "loss": 2.0714, "step": 22046 }, { "epoch": 0.5916434091884929, "grad_norm": 0.30859375, "learning_rate": 0.0010652231406231593, "loss": 2.0284, "step": 22047 }, { "epoch": 0.5916702447402319, "grad_norm": 0.287109375, "learning_rate": 0.0010652053934314468, "loss": 1.9988, "step": 22048 }, { "epoch": 0.5916970802919708, "grad_norm": 0.287109375, "learning_rate": 0.0010651876452192103, "loss": 2.04, "step": 22049 }, { "epoch": 0.5917239158437098, "grad_norm": 0.28515625, "learning_rate": 0.0010651698959864891, "loss": 1.9245, "step": 22050 }, { "epoch": 0.5917507513954486, "grad_norm": 0.291015625, "learning_rate": 0.001065152145733322, "loss": 2.0343, "step": 22051 }, { "epoch": 0.5917775869471876, "grad_norm": 0.28515625, "learning_rate": 0.0010651343944597478, "loss": 2.068, "step": 22052 }, { "epoch": 0.5918044224989266, "grad_norm": 0.28515625, "learning_rate": 0.0010651166421658056, "loss": 2.0178, "step": 22053 }, { "epoch": 0.5918312580506655, "grad_norm": 0.28515625, "learning_rate": 0.0010650988888515345, "loss": 2.0572, "step": 22054 }, { "epoch": 0.5918580936024045, "grad_norm": 0.28125, "learning_rate": 0.0010650811345169731, "loss": 2.0141, "step": 22055 }, { "epoch": 0.5918849291541434, "grad_norm": 0.287109375, "learning_rate": 0.0010650633791621606, "loss": 2.0287, "step": 22056 }, { "epoch": 0.5919117647058824, "grad_norm": 0.28125, "learning_rate": 0.0010650456227871358, "loss": 2.0514, "step": 22057 }, { "epoch": 0.5919386002576214, "grad_norm": 0.28125, "learning_rate": 0.0010650278653919379, "loss": 1.9867, "step": 22058 }, { "epoch": 0.5919654358093602, "grad_norm": 0.279296875, "learning_rate": 0.0010650101069766057, "loss": 2.007, "step": 22059 }, { "epoch": 0.5919922713610992, "grad_norm": 0.28515625, "learning_rate": 0.0010649923475411778, "loss": 2.0893, "step": 22060 }, { "epoch": 0.5920191069128381, "grad_norm": 0.283203125, "learning_rate": 0.0010649745870856936, "loss": 1.922, "step": 22061 }, { "epoch": 0.5920459424645771, "grad_norm": 0.287109375, "learning_rate": 0.0010649568256101921, "loss": 1.9145, "step": 22062 }, { "epoch": 0.592072778016316, "grad_norm": 0.275390625, "learning_rate": 0.001064939063114712, "loss": 1.9685, "step": 22063 }, { "epoch": 0.592099613568055, "grad_norm": 0.28515625, "learning_rate": 0.0010649212995992926, "loss": 2.03, "step": 22064 }, { "epoch": 0.592126449119794, "grad_norm": 0.28125, "learning_rate": 0.0010649035350639726, "loss": 1.9724, "step": 22065 }, { "epoch": 0.5921532846715328, "grad_norm": 0.275390625, "learning_rate": 0.0010648857695087907, "loss": 1.9315, "step": 22066 }, { "epoch": 0.5921801202232718, "grad_norm": 0.287109375, "learning_rate": 0.0010648680029337865, "loss": 1.9916, "step": 22067 }, { "epoch": 0.5922069557750107, "grad_norm": 0.27734375, "learning_rate": 0.0010648502353389985, "loss": 1.9695, "step": 22068 }, { "epoch": 0.5922337913267497, "grad_norm": 0.28515625, "learning_rate": 0.0010648324667244659, "loss": 2.0728, "step": 22069 }, { "epoch": 0.5922606268784886, "grad_norm": 0.279296875, "learning_rate": 0.0010648146970902276, "loss": 1.9524, "step": 22070 }, { "epoch": 0.5922874624302276, "grad_norm": 0.27734375, "learning_rate": 0.0010647969264363226, "loss": 1.9575, "step": 22071 }, { "epoch": 0.5923142979819666, "grad_norm": 0.271484375, "learning_rate": 0.0010647791547627901, "loss": 1.9293, "step": 22072 }, { "epoch": 0.5923411335337054, "grad_norm": 0.27734375, "learning_rate": 0.0010647613820696687, "loss": 1.9711, "step": 22073 }, { "epoch": 0.5923679690854444, "grad_norm": 0.283203125, "learning_rate": 0.0010647436083569975, "loss": 2.0554, "step": 22074 }, { "epoch": 0.5923948046371833, "grad_norm": 0.283203125, "learning_rate": 0.0010647258336248155, "loss": 1.9569, "step": 22075 }, { "epoch": 0.5924216401889223, "grad_norm": 0.279296875, "learning_rate": 0.0010647080578731621, "loss": 1.9412, "step": 22076 }, { "epoch": 0.5924484757406612, "grad_norm": 0.279296875, "learning_rate": 0.0010646902811020755, "loss": 1.9334, "step": 22077 }, { "epoch": 0.5924753112924002, "grad_norm": 0.27734375, "learning_rate": 0.0010646725033115955, "loss": 1.9228, "step": 22078 }, { "epoch": 0.5925021468441392, "grad_norm": 0.283203125, "learning_rate": 0.0010646547245017604, "loss": 2.0188, "step": 22079 }, { "epoch": 0.592528982395878, "grad_norm": 0.291015625, "learning_rate": 0.0010646369446726098, "loss": 2.0964, "step": 22080 }, { "epoch": 0.592555817947617, "grad_norm": 0.28515625, "learning_rate": 0.0010646191638241825, "loss": 2.0399, "step": 22081 }, { "epoch": 0.5925826534993559, "grad_norm": 0.29296875, "learning_rate": 0.001064601381956517, "loss": 2.007, "step": 22082 }, { "epoch": 0.5926094890510949, "grad_norm": 0.27734375, "learning_rate": 0.0010645835990696533, "loss": 1.9677, "step": 22083 }, { "epoch": 0.5926363246028339, "grad_norm": 0.28515625, "learning_rate": 0.0010645658151636295, "loss": 1.918, "step": 22084 }, { "epoch": 0.5926631601545728, "grad_norm": 0.27734375, "learning_rate": 0.0010645480302384851, "loss": 1.9666, "step": 22085 }, { "epoch": 0.5926899957063118, "grad_norm": 0.291015625, "learning_rate": 0.001064530244294259, "loss": 1.999, "step": 22086 }, { "epoch": 0.5927168312580506, "grad_norm": 0.275390625, "learning_rate": 0.0010645124573309902, "loss": 1.8893, "step": 22087 }, { "epoch": 0.5927436668097896, "grad_norm": 0.27734375, "learning_rate": 0.0010644946693487178, "loss": 1.9216, "step": 22088 }, { "epoch": 0.5927705023615285, "grad_norm": 0.28125, "learning_rate": 0.0010644768803474806, "loss": 1.9349, "step": 22089 }, { "epoch": 0.5927973379132675, "grad_norm": 0.287109375, "learning_rate": 0.0010644590903273181, "loss": 1.8958, "step": 22090 }, { "epoch": 0.5928241734650065, "grad_norm": 0.27734375, "learning_rate": 0.0010644412992882686, "loss": 1.8673, "step": 22091 }, { "epoch": 0.5928510090167454, "grad_norm": 0.279296875, "learning_rate": 0.0010644235072303717, "loss": 1.887, "step": 22092 }, { "epoch": 0.5928778445684844, "grad_norm": 0.298828125, "learning_rate": 0.0010644057141536664, "loss": 2.0237, "step": 22093 }, { "epoch": 0.5929046801202232, "grad_norm": 0.28515625, "learning_rate": 0.0010643879200581915, "loss": 1.9615, "step": 22094 }, { "epoch": 0.5929315156719622, "grad_norm": 0.28515625, "learning_rate": 0.001064370124943986, "loss": 1.9899, "step": 22095 }, { "epoch": 0.5929583512237011, "grad_norm": 0.2734375, "learning_rate": 0.0010643523288110892, "loss": 1.7761, "step": 22096 }, { "epoch": 0.5929851867754401, "grad_norm": 0.27734375, "learning_rate": 0.0010643345316595399, "loss": 1.9173, "step": 22097 }, { "epoch": 0.5930120223271791, "grad_norm": 0.279296875, "learning_rate": 0.0010643167334893774, "loss": 1.9061, "step": 22098 }, { "epoch": 0.593038857878918, "grad_norm": 0.287109375, "learning_rate": 0.0010642989343006403, "loss": 1.9034, "step": 22099 }, { "epoch": 0.593065693430657, "grad_norm": 0.27734375, "learning_rate": 0.0010642811340933682, "loss": 1.8609, "step": 22100 }, { "epoch": 0.5930925289823958, "grad_norm": 0.275390625, "learning_rate": 0.0010642633328675998, "loss": 1.8349, "step": 22101 }, { "epoch": 0.5931193645341348, "grad_norm": 0.283203125, "learning_rate": 0.0010642455306233744, "loss": 1.8544, "step": 22102 }, { "epoch": 0.5931462000858738, "grad_norm": 0.28515625, "learning_rate": 0.0010642277273607305, "loss": 1.9056, "step": 22103 }, { "epoch": 0.5931730356376127, "grad_norm": 0.296875, "learning_rate": 0.001064209923079708, "loss": 1.8804, "step": 22104 }, { "epoch": 0.5931998711893517, "grad_norm": 0.28515625, "learning_rate": 0.0010641921177803452, "loss": 1.8718, "step": 22105 }, { "epoch": 0.5932267067410906, "grad_norm": 0.2890625, "learning_rate": 0.0010641743114626816, "loss": 1.9984, "step": 22106 }, { "epoch": 0.5932535422928296, "grad_norm": 0.2734375, "learning_rate": 0.001064156504126756, "loss": 1.752, "step": 22107 }, { "epoch": 0.5932803778445684, "grad_norm": 0.28125, "learning_rate": 0.0010641386957726076, "loss": 1.9057, "step": 22108 }, { "epoch": 0.5933072133963074, "grad_norm": 0.283203125, "learning_rate": 0.0010641208864002754, "loss": 1.8498, "step": 22109 }, { "epoch": 0.5933340489480464, "grad_norm": 0.283203125, "learning_rate": 0.0010641030760097988, "loss": 1.772, "step": 22110 }, { "epoch": 0.5933608844997853, "grad_norm": 0.2890625, "learning_rate": 0.0010640852646012164, "loss": 1.8849, "step": 22111 }, { "epoch": 0.5933877200515243, "grad_norm": 0.279296875, "learning_rate": 0.0010640674521745675, "loss": 1.7989, "step": 22112 }, { "epoch": 0.5934145556032632, "grad_norm": 0.291015625, "learning_rate": 0.001064049638729891, "loss": 1.9349, "step": 22113 }, { "epoch": 0.5934413911550022, "grad_norm": 0.294921875, "learning_rate": 0.0010640318242672263, "loss": 1.8901, "step": 22114 }, { "epoch": 0.593468226706741, "grad_norm": 0.287109375, "learning_rate": 0.0010640140087866123, "loss": 1.9084, "step": 22115 }, { "epoch": 0.59349506225848, "grad_norm": 0.283203125, "learning_rate": 0.001063996192288088, "loss": 1.9248, "step": 22116 }, { "epoch": 0.593521897810219, "grad_norm": 0.287109375, "learning_rate": 0.0010639783747716926, "loss": 1.8908, "step": 22117 }, { "epoch": 0.5935487333619579, "grad_norm": 0.326171875, "learning_rate": 0.001063960556237465, "loss": 2.0371, "step": 22118 }, { "epoch": 0.5935755689136969, "grad_norm": 0.322265625, "learning_rate": 0.0010639427366854449, "loss": 1.9678, "step": 22119 }, { "epoch": 0.5936024044654358, "grad_norm": 0.30859375, "learning_rate": 0.0010639249161156705, "loss": 1.9984, "step": 22120 }, { "epoch": 0.5936292400171748, "grad_norm": 0.3203125, "learning_rate": 0.0010639070945281813, "loss": 2.0606, "step": 22121 }, { "epoch": 0.5936560755689136, "grad_norm": 0.31640625, "learning_rate": 0.0010638892719230166, "loss": 1.9643, "step": 22122 }, { "epoch": 0.5936829111206526, "grad_norm": 0.296875, "learning_rate": 0.0010638714483002152, "loss": 1.9754, "step": 22123 }, { "epoch": 0.5937097466723916, "grad_norm": 0.3046875, "learning_rate": 0.0010638536236598163, "loss": 2.1097, "step": 22124 }, { "epoch": 0.5937365822241305, "grad_norm": 0.29296875, "learning_rate": 0.0010638357980018592, "loss": 2.0294, "step": 22125 }, { "epoch": 0.5937634177758695, "grad_norm": 0.294921875, "learning_rate": 0.0010638179713263827, "loss": 2.0279, "step": 22126 }, { "epoch": 0.5937902533276084, "grad_norm": 0.28125, "learning_rate": 0.001063800143633426, "loss": 2.0736, "step": 22127 }, { "epoch": 0.5938170888793474, "grad_norm": 0.28515625, "learning_rate": 0.0010637823149230283, "loss": 1.947, "step": 22128 }, { "epoch": 0.5938439244310864, "grad_norm": 0.29296875, "learning_rate": 0.0010637644851952286, "loss": 2.1019, "step": 22129 }, { "epoch": 0.5938707599828252, "grad_norm": 0.294921875, "learning_rate": 0.0010637466544500662, "loss": 2.0334, "step": 22130 }, { "epoch": 0.5938975955345642, "grad_norm": 0.29296875, "learning_rate": 0.00106372882268758, "loss": 2.0744, "step": 22131 }, { "epoch": 0.5939244310863031, "grad_norm": 0.279296875, "learning_rate": 0.0010637109899078092, "loss": 1.9179, "step": 22132 }, { "epoch": 0.5939512666380421, "grad_norm": 0.28125, "learning_rate": 0.0010636931561107928, "loss": 1.9236, "step": 22133 }, { "epoch": 0.593978102189781, "grad_norm": 0.291015625, "learning_rate": 0.0010636753212965701, "loss": 1.9913, "step": 22134 }, { "epoch": 0.59400493774152, "grad_norm": 0.283203125, "learning_rate": 0.0010636574854651802, "loss": 1.922, "step": 22135 }, { "epoch": 0.594031773293259, "grad_norm": 0.283203125, "learning_rate": 0.001063639648616662, "loss": 2.0024, "step": 22136 }, { "epoch": 0.5940586088449978, "grad_norm": 0.28125, "learning_rate": 0.001063621810751055, "loss": 1.9579, "step": 22137 }, { "epoch": 0.5940854443967368, "grad_norm": 0.28515625, "learning_rate": 0.0010636039718683982, "loss": 1.9368, "step": 22138 }, { "epoch": 0.5941122799484757, "grad_norm": 0.28125, "learning_rate": 0.0010635861319687305, "loss": 2.0232, "step": 22139 }, { "epoch": 0.5941391155002147, "grad_norm": 0.279296875, "learning_rate": 0.0010635682910520915, "loss": 1.9457, "step": 22140 }, { "epoch": 0.5941659510519536, "grad_norm": 0.27734375, "learning_rate": 0.0010635504491185198, "loss": 2.0255, "step": 22141 }, { "epoch": 0.5941927866036926, "grad_norm": 0.2734375, "learning_rate": 0.0010635326061680548, "loss": 1.9531, "step": 22142 }, { "epoch": 0.5942196221554316, "grad_norm": 0.275390625, "learning_rate": 0.001063514762200736, "loss": 1.9656, "step": 22143 }, { "epoch": 0.5942464577071704, "grad_norm": 0.2890625, "learning_rate": 0.0010634969172166016, "loss": 2.0202, "step": 22144 }, { "epoch": 0.5942732932589094, "grad_norm": 0.275390625, "learning_rate": 0.0010634790712156915, "loss": 2.0168, "step": 22145 }, { "epoch": 0.5943001288106483, "grad_norm": 0.27734375, "learning_rate": 0.001063461224198045, "loss": 1.9302, "step": 22146 }, { "epoch": 0.5943269643623873, "grad_norm": 0.275390625, "learning_rate": 0.0010634433761637007, "loss": 1.8914, "step": 22147 }, { "epoch": 0.5943537999141262, "grad_norm": 0.279296875, "learning_rate": 0.001063425527112698, "loss": 2.0056, "step": 22148 }, { "epoch": 0.5943806354658652, "grad_norm": 0.287109375, "learning_rate": 0.001063407677045076, "loss": 1.9485, "step": 22149 }, { "epoch": 0.5944074710176042, "grad_norm": 0.275390625, "learning_rate": 0.001063389825960874, "loss": 1.946, "step": 22150 }, { "epoch": 0.594434306569343, "grad_norm": 0.291015625, "learning_rate": 0.001063371973860131, "loss": 1.9648, "step": 22151 }, { "epoch": 0.594461142121082, "grad_norm": 0.287109375, "learning_rate": 0.001063354120742886, "loss": 2.0702, "step": 22152 }, { "epoch": 0.5944879776728209, "grad_norm": 0.265625, "learning_rate": 0.0010633362666091785, "loss": 1.8612, "step": 22153 }, { "epoch": 0.5945148132245599, "grad_norm": 0.283203125, "learning_rate": 0.0010633184114590476, "loss": 2.0116, "step": 22154 }, { "epoch": 0.5945416487762989, "grad_norm": 0.271484375, "learning_rate": 0.0010633005552925325, "loss": 1.967, "step": 22155 }, { "epoch": 0.5945684843280378, "grad_norm": 0.28125, "learning_rate": 0.0010632826981096723, "loss": 1.9992, "step": 22156 }, { "epoch": 0.5945953198797768, "grad_norm": 0.28125, "learning_rate": 0.001063264839910506, "loss": 1.996, "step": 22157 }, { "epoch": 0.5946221554315156, "grad_norm": 0.271484375, "learning_rate": 0.001063246980695073, "loss": 1.8595, "step": 22158 }, { "epoch": 0.5946489909832546, "grad_norm": 0.275390625, "learning_rate": 0.0010632291204634124, "loss": 1.9702, "step": 22159 }, { "epoch": 0.5946758265349935, "grad_norm": 0.28515625, "learning_rate": 0.0010632112592155634, "loss": 2.056, "step": 22160 }, { "epoch": 0.5947026620867325, "grad_norm": 0.27734375, "learning_rate": 0.0010631933969515652, "loss": 1.8511, "step": 22161 }, { "epoch": 0.5947294976384715, "grad_norm": 0.2734375, "learning_rate": 0.0010631755336714568, "loss": 1.8402, "step": 22162 }, { "epoch": 0.5947563331902104, "grad_norm": 0.275390625, "learning_rate": 0.001063157669375278, "loss": 1.827, "step": 22163 }, { "epoch": 0.5947831687419494, "grad_norm": 0.275390625, "learning_rate": 0.001063139804063067, "loss": 1.8592, "step": 22164 }, { "epoch": 0.5948100042936882, "grad_norm": 0.28515625, "learning_rate": 0.001063121937734864, "loss": 2.0664, "step": 22165 }, { "epoch": 0.5948368398454272, "grad_norm": 0.283203125, "learning_rate": 0.0010631040703907074, "loss": 1.8553, "step": 22166 }, { "epoch": 0.5948636753971661, "grad_norm": 0.2734375, "learning_rate": 0.0010630862020306368, "loss": 1.9362, "step": 22167 }, { "epoch": 0.5948905109489051, "grad_norm": 0.2890625, "learning_rate": 0.0010630683326546912, "loss": 1.9824, "step": 22168 }, { "epoch": 0.5949173465006441, "grad_norm": 0.28125, "learning_rate": 0.0010630504622629103, "loss": 1.9693, "step": 22169 }, { "epoch": 0.594944182052383, "grad_norm": 0.27734375, "learning_rate": 0.0010630325908553323, "loss": 1.9103, "step": 22170 }, { "epoch": 0.594971017604122, "grad_norm": 0.2734375, "learning_rate": 0.0010630147184319976, "loss": 1.8747, "step": 22171 }, { "epoch": 0.5949978531558608, "grad_norm": 0.28515625, "learning_rate": 0.0010629968449929446, "loss": 1.8448, "step": 22172 }, { "epoch": 0.5950246887075998, "grad_norm": 0.271484375, "learning_rate": 0.0010629789705382127, "loss": 1.8213, "step": 22173 }, { "epoch": 0.5950515242593388, "grad_norm": 0.283203125, "learning_rate": 0.0010629610950678413, "loss": 1.8248, "step": 22174 }, { "epoch": 0.5950783598110777, "grad_norm": 0.279296875, "learning_rate": 0.0010629432185818692, "loss": 1.8943, "step": 22175 }, { "epoch": 0.5951051953628167, "grad_norm": 0.283203125, "learning_rate": 0.001062925341080336, "loss": 1.8243, "step": 22176 }, { "epoch": 0.5951320309145556, "grad_norm": 0.28125, "learning_rate": 0.001062907462563281, "loss": 1.9003, "step": 22177 }, { "epoch": 0.5951588664662946, "grad_norm": 0.29296875, "learning_rate": 0.0010628895830307428, "loss": 1.8622, "step": 22178 }, { "epoch": 0.5951857020180334, "grad_norm": 0.287109375, "learning_rate": 0.0010628717024827614, "loss": 1.8598, "step": 22179 }, { "epoch": 0.5952125375697724, "grad_norm": 0.2890625, "learning_rate": 0.0010628538209193756, "loss": 1.9023, "step": 22180 }, { "epoch": 0.5952393731215114, "grad_norm": 0.28515625, "learning_rate": 0.0010628359383406246, "loss": 1.8643, "step": 22181 }, { "epoch": 0.5952662086732503, "grad_norm": 0.2890625, "learning_rate": 0.0010628180547465477, "loss": 1.8655, "step": 22182 }, { "epoch": 0.5952930442249893, "grad_norm": 0.287109375, "learning_rate": 0.001062800170137184, "loss": 1.8196, "step": 22183 }, { "epoch": 0.5953198797767282, "grad_norm": 0.279296875, "learning_rate": 0.0010627822845125732, "loss": 1.7136, "step": 22184 }, { "epoch": 0.5953467153284672, "grad_norm": 0.2890625, "learning_rate": 0.0010627643978727542, "loss": 1.8712, "step": 22185 }, { "epoch": 0.595373550880206, "grad_norm": 0.283203125, "learning_rate": 0.001062746510217766, "loss": 1.7681, "step": 22186 }, { "epoch": 0.595400386431945, "grad_norm": 0.291015625, "learning_rate": 0.0010627286215476484, "loss": 1.9787, "step": 22187 }, { "epoch": 0.595427221983684, "grad_norm": 0.2734375, "learning_rate": 0.0010627107318624404, "loss": 1.7426, "step": 22188 }, { "epoch": 0.5954540575354229, "grad_norm": 0.2890625, "learning_rate": 0.001062692841162181, "loss": 1.8294, "step": 22189 }, { "epoch": 0.5954808930871619, "grad_norm": 0.29296875, "learning_rate": 0.0010626749494469093, "loss": 1.822, "step": 22190 }, { "epoch": 0.5955077286389008, "grad_norm": 0.296875, "learning_rate": 0.0010626570567166653, "loss": 1.9293, "step": 22191 }, { "epoch": 0.5955345641906398, "grad_norm": 0.2890625, "learning_rate": 0.0010626391629714876, "loss": 1.8903, "step": 22192 }, { "epoch": 0.5955613997423786, "grad_norm": 0.328125, "learning_rate": 0.001062621268211416, "loss": 2.0269, "step": 22193 }, { "epoch": 0.5955882352941176, "grad_norm": 0.31640625, "learning_rate": 0.0010626033724364895, "loss": 1.9899, "step": 22194 }, { "epoch": 0.5956150708458566, "grad_norm": 0.326171875, "learning_rate": 0.001062585475646747, "loss": 1.9988, "step": 22195 }, { "epoch": 0.5956419063975955, "grad_norm": 0.3125, "learning_rate": 0.001062567577842228, "loss": 2.0776, "step": 22196 }, { "epoch": 0.5956687419493345, "grad_norm": 0.30078125, "learning_rate": 0.0010625496790229718, "loss": 2.1687, "step": 22197 }, { "epoch": 0.5956955775010734, "grad_norm": 0.30859375, "learning_rate": 0.001062531779189018, "loss": 2.0461, "step": 22198 }, { "epoch": 0.5957224130528124, "grad_norm": 0.296875, "learning_rate": 0.0010625138783404054, "loss": 2.099, "step": 22199 }, { "epoch": 0.5957492486045514, "grad_norm": 0.287109375, "learning_rate": 0.0010624959764771735, "loss": 2.0605, "step": 22200 }, { "epoch": 0.5957760841562902, "grad_norm": 0.283203125, "learning_rate": 0.0010624780735993614, "loss": 2.0089, "step": 22201 }, { "epoch": 0.5958029197080292, "grad_norm": 0.291015625, "learning_rate": 0.0010624601697070085, "loss": 2.0987, "step": 22202 }, { "epoch": 0.5958297552597681, "grad_norm": 0.275390625, "learning_rate": 0.001062442264800154, "loss": 1.9991, "step": 22203 }, { "epoch": 0.5958565908115071, "grad_norm": 0.283203125, "learning_rate": 0.0010624243588788374, "loss": 2.062, "step": 22204 }, { "epoch": 0.595883426363246, "grad_norm": 0.30078125, "learning_rate": 0.0010624064519430974, "loss": 1.9866, "step": 22205 }, { "epoch": 0.595910261914985, "grad_norm": 0.291015625, "learning_rate": 0.001062388543992974, "loss": 1.977, "step": 22206 }, { "epoch": 0.595937097466724, "grad_norm": 0.28515625, "learning_rate": 0.0010623706350285063, "loss": 2.0771, "step": 22207 }, { "epoch": 0.5959639330184628, "grad_norm": 0.275390625, "learning_rate": 0.001062352725049733, "loss": 1.9635, "step": 22208 }, { "epoch": 0.5959907685702018, "grad_norm": 0.279296875, "learning_rate": 0.0010623348140566943, "loss": 1.926, "step": 22209 }, { "epoch": 0.5960176041219407, "grad_norm": 0.275390625, "learning_rate": 0.0010623169020494289, "loss": 2.0279, "step": 22210 }, { "epoch": 0.5960444396736797, "grad_norm": 0.291015625, "learning_rate": 0.0010622989890279762, "loss": 1.9878, "step": 22211 }, { "epoch": 0.5960712752254186, "grad_norm": 0.27734375, "learning_rate": 0.0010622810749923756, "loss": 2.0203, "step": 22212 }, { "epoch": 0.5960981107771576, "grad_norm": 0.279296875, "learning_rate": 0.001062263159942666, "loss": 1.9675, "step": 22213 }, { "epoch": 0.5961249463288966, "grad_norm": 0.283203125, "learning_rate": 0.0010622452438788876, "loss": 2.0369, "step": 22214 }, { "epoch": 0.5961517818806354, "grad_norm": 0.265625, "learning_rate": 0.0010622273268010786, "loss": 1.9005, "step": 22215 }, { "epoch": 0.5961786174323744, "grad_norm": 0.27734375, "learning_rate": 0.001062209408709279, "loss": 1.9961, "step": 22216 }, { "epoch": 0.5962054529841133, "grad_norm": 0.275390625, "learning_rate": 0.0010621914896035282, "loss": 1.8815, "step": 22217 }, { "epoch": 0.5962322885358523, "grad_norm": 0.28125, "learning_rate": 0.001062173569483865, "loss": 1.9636, "step": 22218 }, { "epoch": 0.5962591240875912, "grad_norm": 0.279296875, "learning_rate": 0.001062155648350329, "loss": 1.9088, "step": 22219 }, { "epoch": 0.5962859596393302, "grad_norm": 0.291015625, "learning_rate": 0.0010621377262029592, "loss": 2.0026, "step": 22220 }, { "epoch": 0.5963127951910692, "grad_norm": 0.2734375, "learning_rate": 0.0010621198030417956, "loss": 1.8808, "step": 22221 }, { "epoch": 0.596339630742808, "grad_norm": 0.2890625, "learning_rate": 0.001062101878866877, "loss": 2.0171, "step": 22222 }, { "epoch": 0.596366466294547, "grad_norm": 0.275390625, "learning_rate": 0.0010620839536782428, "loss": 1.9009, "step": 22223 }, { "epoch": 0.5963933018462859, "grad_norm": 0.287109375, "learning_rate": 0.0010620660274759323, "loss": 1.968, "step": 22224 }, { "epoch": 0.5964201373980249, "grad_norm": 0.29296875, "learning_rate": 0.001062048100259985, "loss": 2.0414, "step": 22225 }, { "epoch": 0.5964469729497639, "grad_norm": 0.27734375, "learning_rate": 0.00106203017203044, "loss": 2.0052, "step": 22226 }, { "epoch": 0.5964738085015028, "grad_norm": 0.283203125, "learning_rate": 0.0010620122427873369, "loss": 1.9084, "step": 22227 }, { "epoch": 0.5965006440532418, "grad_norm": 0.28125, "learning_rate": 0.0010619943125307145, "loss": 1.9914, "step": 22228 }, { "epoch": 0.5965274796049806, "grad_norm": 0.283203125, "learning_rate": 0.001061976381260613, "loss": 1.9541, "step": 22229 }, { "epoch": 0.5965543151567196, "grad_norm": 0.28125, "learning_rate": 0.0010619584489770708, "loss": 1.9686, "step": 22230 }, { "epoch": 0.5965811507084585, "grad_norm": 0.28125, "learning_rate": 0.0010619405156801279, "loss": 1.9757, "step": 22231 }, { "epoch": 0.5966079862601975, "grad_norm": 0.2890625, "learning_rate": 0.0010619225813698235, "loss": 1.9632, "step": 22232 }, { "epoch": 0.5966348218119365, "grad_norm": 0.28125, "learning_rate": 0.0010619046460461966, "loss": 1.9635, "step": 22233 }, { "epoch": 0.5966616573636754, "grad_norm": 0.2734375, "learning_rate": 0.001061886709709287, "loss": 1.9351, "step": 22234 }, { "epoch": 0.5966884929154144, "grad_norm": 0.2734375, "learning_rate": 0.0010618687723591338, "loss": 1.8693, "step": 22235 }, { "epoch": 0.5967153284671532, "grad_norm": 0.2734375, "learning_rate": 0.0010618508339957761, "loss": 1.9762, "step": 22236 }, { "epoch": 0.5967421640188922, "grad_norm": 0.28125, "learning_rate": 0.0010618328946192539, "loss": 1.976, "step": 22237 }, { "epoch": 0.5967689995706311, "grad_norm": 0.2734375, "learning_rate": 0.0010618149542296063, "loss": 1.8948, "step": 22238 }, { "epoch": 0.5967958351223701, "grad_norm": 0.291015625, "learning_rate": 0.0010617970128268721, "loss": 2.0524, "step": 22239 }, { "epoch": 0.5968226706741091, "grad_norm": 0.283203125, "learning_rate": 0.0010617790704110916, "loss": 1.9788, "step": 22240 }, { "epoch": 0.596849506225848, "grad_norm": 0.29296875, "learning_rate": 0.0010617611269823034, "loss": 2.0117, "step": 22241 }, { "epoch": 0.596876341777587, "grad_norm": 0.287109375, "learning_rate": 0.001061743182540547, "loss": 1.9234, "step": 22242 }, { "epoch": 0.5969031773293259, "grad_norm": 0.2734375, "learning_rate": 0.0010617252370858624, "loss": 1.8954, "step": 22243 }, { "epoch": 0.5969300128810648, "grad_norm": 0.27734375, "learning_rate": 0.0010617072906182882, "loss": 1.8451, "step": 22244 }, { "epoch": 0.5969568484328038, "grad_norm": 0.283203125, "learning_rate": 0.0010616893431378638, "loss": 1.9494, "step": 22245 }, { "epoch": 0.5969836839845427, "grad_norm": 0.2890625, "learning_rate": 0.0010616713946446292, "loss": 1.9222, "step": 22246 }, { "epoch": 0.5970105195362817, "grad_norm": 0.287109375, "learning_rate": 0.0010616534451386233, "loss": 1.9476, "step": 22247 }, { "epoch": 0.5970373550880206, "grad_norm": 0.27734375, "learning_rate": 0.0010616354946198854, "loss": 1.8557, "step": 22248 }, { "epoch": 0.5970641906397596, "grad_norm": 0.283203125, "learning_rate": 0.001061617543088455, "loss": 1.8838, "step": 22249 }, { "epoch": 0.5970910261914985, "grad_norm": 0.279296875, "learning_rate": 0.0010615995905443715, "loss": 1.8495, "step": 22250 }, { "epoch": 0.5971178617432374, "grad_norm": 0.28515625, "learning_rate": 0.0010615816369876747, "loss": 1.9018, "step": 22251 }, { "epoch": 0.5971446972949764, "grad_norm": 0.287109375, "learning_rate": 0.0010615636824184032, "loss": 1.9376, "step": 22252 }, { "epoch": 0.5971715328467153, "grad_norm": 0.28125, "learning_rate": 0.001061545726836597, "loss": 1.8769, "step": 22253 }, { "epoch": 0.5971983683984543, "grad_norm": 0.283203125, "learning_rate": 0.0010615277702422951, "loss": 1.883, "step": 22254 }, { "epoch": 0.5972252039501932, "grad_norm": 0.291015625, "learning_rate": 0.0010615098126355372, "loss": 1.8766, "step": 22255 }, { "epoch": 0.5972520395019322, "grad_norm": 0.26953125, "learning_rate": 0.0010614918540163625, "loss": 1.8287, "step": 22256 }, { "epoch": 0.597278875053671, "grad_norm": 0.291015625, "learning_rate": 0.0010614738943848102, "loss": 1.827, "step": 22257 }, { "epoch": 0.59730571060541, "grad_norm": 0.287109375, "learning_rate": 0.0010614559337409203, "loss": 1.8351, "step": 22258 }, { "epoch": 0.597332546157149, "grad_norm": 0.3046875, "learning_rate": 0.0010614379720847316, "loss": 1.9662, "step": 22259 }, { "epoch": 0.5973593817088879, "grad_norm": 0.287109375, "learning_rate": 0.001061420009416284, "loss": 1.867, "step": 22260 }, { "epoch": 0.5973862172606269, "grad_norm": 0.30078125, "learning_rate": 0.0010614020457356166, "loss": 1.8734, "step": 22261 }, { "epoch": 0.5974130528123658, "grad_norm": 0.291015625, "learning_rate": 0.0010613840810427687, "loss": 1.9619, "step": 22262 }, { "epoch": 0.5974398883641048, "grad_norm": 0.29296875, "learning_rate": 0.00106136611533778, "loss": 1.8702, "step": 22263 }, { "epoch": 0.5974667239158437, "grad_norm": 0.279296875, "learning_rate": 0.0010613481486206895, "loss": 1.8124, "step": 22264 }, { "epoch": 0.5974935594675826, "grad_norm": 0.283203125, "learning_rate": 0.0010613301808915373, "loss": 1.8521, "step": 22265 }, { "epoch": 0.5975203950193216, "grad_norm": 0.30859375, "learning_rate": 0.001061312212150362, "loss": 2.0077, "step": 22266 }, { "epoch": 0.5975472305710605, "grad_norm": 0.337890625, "learning_rate": 0.0010612942423972038, "loss": 1.9569, "step": 22267 }, { "epoch": 0.5975740661227995, "grad_norm": 0.32421875, "learning_rate": 0.0010612762716321017, "loss": 2.0328, "step": 22268 }, { "epoch": 0.5976009016745384, "grad_norm": 0.30859375, "learning_rate": 0.0010612582998550948, "loss": 2.0535, "step": 22269 }, { "epoch": 0.5976277372262774, "grad_norm": 0.302734375, "learning_rate": 0.0010612403270662232, "loss": 2.0907, "step": 22270 }, { "epoch": 0.5976545727780164, "grad_norm": 0.291015625, "learning_rate": 0.0010612223532655261, "loss": 2.0698, "step": 22271 }, { "epoch": 0.5976814083297552, "grad_norm": 0.30078125, "learning_rate": 0.0010612043784530426, "loss": 2.0366, "step": 22272 }, { "epoch": 0.5977082438814942, "grad_norm": 0.298828125, "learning_rate": 0.0010611864026288127, "loss": 2.0534, "step": 22273 }, { "epoch": 0.5977350794332331, "grad_norm": 0.279296875, "learning_rate": 0.001061168425792875, "loss": 1.9784, "step": 22274 }, { "epoch": 0.5977619149849721, "grad_norm": 0.279296875, "learning_rate": 0.00106115044794527, "loss": 1.9123, "step": 22275 }, { "epoch": 0.597788750536711, "grad_norm": 0.2734375, "learning_rate": 0.0010611324690860363, "loss": 1.966, "step": 22276 }, { "epoch": 0.59781558608845, "grad_norm": 0.2734375, "learning_rate": 0.0010611144892152136, "loss": 1.9519, "step": 22277 }, { "epoch": 0.597842421640189, "grad_norm": 0.279296875, "learning_rate": 0.0010610965083328416, "loss": 2.035, "step": 22278 }, { "epoch": 0.5978692571919278, "grad_norm": 0.2890625, "learning_rate": 0.0010610785264389593, "loss": 2.069, "step": 22279 }, { "epoch": 0.5978960927436668, "grad_norm": 0.27734375, "learning_rate": 0.0010610605435336065, "loss": 1.9605, "step": 22280 }, { "epoch": 0.5979229282954057, "grad_norm": 0.2890625, "learning_rate": 0.0010610425596168226, "loss": 2.0497, "step": 22281 }, { "epoch": 0.5979497638471447, "grad_norm": 0.287109375, "learning_rate": 0.0010610245746886468, "loss": 2.0386, "step": 22282 }, { "epoch": 0.5979765993988836, "grad_norm": 0.28125, "learning_rate": 0.0010610065887491187, "loss": 2.0537, "step": 22283 }, { "epoch": 0.5980034349506226, "grad_norm": 0.279296875, "learning_rate": 0.0010609886017982778, "loss": 2.0059, "step": 22284 }, { "epoch": 0.5980302705023616, "grad_norm": 0.287109375, "learning_rate": 0.0010609706138361634, "loss": 2.1017, "step": 22285 }, { "epoch": 0.5980571060541005, "grad_norm": 0.279296875, "learning_rate": 0.0010609526248628153, "loss": 2.0717, "step": 22286 }, { "epoch": 0.5980839416058394, "grad_norm": 0.27734375, "learning_rate": 0.0010609346348782727, "loss": 2.0578, "step": 22287 }, { "epoch": 0.5981107771575783, "grad_norm": 0.291015625, "learning_rate": 0.0010609166438825752, "loss": 2.0754, "step": 22288 }, { "epoch": 0.5981376127093173, "grad_norm": 0.271484375, "learning_rate": 0.0010608986518757621, "loss": 1.9419, "step": 22289 }, { "epoch": 0.5981644482610562, "grad_norm": 0.283203125, "learning_rate": 0.001060880658857873, "loss": 1.9471, "step": 22290 }, { "epoch": 0.5981912838127952, "grad_norm": 0.28125, "learning_rate": 0.0010608626648289472, "loss": 1.9844, "step": 22291 }, { "epoch": 0.5982181193645342, "grad_norm": 0.291015625, "learning_rate": 0.0010608446697890244, "loss": 2.0307, "step": 22292 }, { "epoch": 0.598244954916273, "grad_norm": 0.27734375, "learning_rate": 0.0010608266737381438, "loss": 1.9299, "step": 22293 }, { "epoch": 0.598271790468012, "grad_norm": 0.27734375, "learning_rate": 0.0010608086766763453, "loss": 1.9694, "step": 22294 }, { "epoch": 0.5982986260197509, "grad_norm": 0.275390625, "learning_rate": 0.001060790678603668, "loss": 1.9263, "step": 22295 }, { "epoch": 0.5983254615714899, "grad_norm": 0.279296875, "learning_rate": 0.0010607726795201517, "loss": 1.963, "step": 22296 }, { "epoch": 0.5983522971232289, "grad_norm": 0.28125, "learning_rate": 0.0010607546794258356, "loss": 1.977, "step": 22297 }, { "epoch": 0.5983791326749678, "grad_norm": 0.271484375, "learning_rate": 0.0010607366783207592, "loss": 1.9112, "step": 22298 }, { "epoch": 0.5984059682267068, "grad_norm": 0.279296875, "learning_rate": 0.0010607186762049622, "loss": 1.9538, "step": 22299 }, { "epoch": 0.5984328037784457, "grad_norm": 0.287109375, "learning_rate": 0.0010607006730784837, "loss": 1.9347, "step": 22300 }, { "epoch": 0.5984596393301846, "grad_norm": 0.275390625, "learning_rate": 0.0010606826689413637, "loss": 1.9321, "step": 22301 }, { "epoch": 0.5984864748819235, "grad_norm": 0.291015625, "learning_rate": 0.0010606646637936416, "loss": 1.9908, "step": 22302 }, { "epoch": 0.5985133104336625, "grad_norm": 0.283203125, "learning_rate": 0.0010606466576353567, "loss": 1.9051, "step": 22303 }, { "epoch": 0.5985401459854015, "grad_norm": 0.287109375, "learning_rate": 0.0010606286504665483, "loss": 1.9743, "step": 22304 }, { "epoch": 0.5985669815371404, "grad_norm": 0.287109375, "learning_rate": 0.0010606106422872564, "loss": 1.8897, "step": 22305 }, { "epoch": 0.5985938170888794, "grad_norm": 0.28125, "learning_rate": 0.0010605926330975201, "loss": 1.9701, "step": 22306 }, { "epoch": 0.5986206526406183, "grad_norm": 0.28515625, "learning_rate": 0.0010605746228973792, "loss": 1.9147, "step": 22307 }, { "epoch": 0.5986474881923572, "grad_norm": 0.287109375, "learning_rate": 0.0010605566116868729, "loss": 1.9245, "step": 22308 }, { "epoch": 0.5986743237440961, "grad_norm": 0.28515625, "learning_rate": 0.0010605385994660412, "loss": 1.8819, "step": 22309 }, { "epoch": 0.5987011592958351, "grad_norm": 0.275390625, "learning_rate": 0.0010605205862349231, "loss": 1.9731, "step": 22310 }, { "epoch": 0.5987279948475741, "grad_norm": 0.279296875, "learning_rate": 0.0010605025719935583, "loss": 1.922, "step": 22311 }, { "epoch": 0.598754830399313, "grad_norm": 0.28125, "learning_rate": 0.0010604845567419863, "loss": 2.0568, "step": 22312 }, { "epoch": 0.598781665951052, "grad_norm": 0.283203125, "learning_rate": 0.0010604665404802469, "loss": 1.8912, "step": 22313 }, { "epoch": 0.5988085015027909, "grad_norm": 0.279296875, "learning_rate": 0.0010604485232083793, "loss": 1.8679, "step": 22314 }, { "epoch": 0.5988353370545298, "grad_norm": 0.27734375, "learning_rate": 0.0010604305049264227, "loss": 1.9105, "step": 22315 }, { "epoch": 0.5988621726062688, "grad_norm": 0.291015625, "learning_rate": 0.0010604124856344176, "loss": 1.888, "step": 22316 }, { "epoch": 0.5988890081580077, "grad_norm": 0.29296875, "learning_rate": 0.0010603944653324027, "loss": 1.9947, "step": 22317 }, { "epoch": 0.5989158437097467, "grad_norm": 0.279296875, "learning_rate": 0.0010603764440204177, "loss": 1.8723, "step": 22318 }, { "epoch": 0.5989426792614856, "grad_norm": 0.2890625, "learning_rate": 0.0010603584216985022, "loss": 1.9431, "step": 22319 }, { "epoch": 0.5989695148132246, "grad_norm": 0.283203125, "learning_rate": 0.001060340398366696, "loss": 1.8914, "step": 22320 }, { "epoch": 0.5989963503649635, "grad_norm": 0.27734375, "learning_rate": 0.0010603223740250384, "loss": 1.8642, "step": 22321 }, { "epoch": 0.5990231859167024, "grad_norm": 0.283203125, "learning_rate": 0.001060304348673569, "loss": 1.8404, "step": 22322 }, { "epoch": 0.5990500214684414, "grad_norm": 0.28515625, "learning_rate": 0.0010602863223123271, "loss": 1.9917, "step": 22323 }, { "epoch": 0.5990768570201803, "grad_norm": 0.2734375, "learning_rate": 0.0010602682949413524, "loss": 1.8164, "step": 22324 }, { "epoch": 0.5991036925719193, "grad_norm": 0.287109375, "learning_rate": 0.0010602502665606845, "loss": 1.9214, "step": 22325 }, { "epoch": 0.5991305281236582, "grad_norm": 0.2890625, "learning_rate": 0.001060232237170363, "loss": 1.985, "step": 22326 }, { "epoch": 0.5991573636753972, "grad_norm": 0.275390625, "learning_rate": 0.0010602142067704275, "loss": 1.8982, "step": 22327 }, { "epoch": 0.5991841992271361, "grad_norm": 0.287109375, "learning_rate": 0.001060196175360917, "loss": 1.9426, "step": 22328 }, { "epoch": 0.599211034778875, "grad_norm": 0.291015625, "learning_rate": 0.0010601781429418718, "loss": 1.9167, "step": 22329 }, { "epoch": 0.599237870330614, "grad_norm": 0.28515625, "learning_rate": 0.001060160109513331, "loss": 1.8807, "step": 22330 }, { "epoch": 0.5992647058823529, "grad_norm": 0.287109375, "learning_rate": 0.0010601420750753344, "loss": 1.9445, "step": 22331 }, { "epoch": 0.5992915414340919, "grad_norm": 0.283203125, "learning_rate": 0.0010601240396279216, "loss": 1.9243, "step": 22332 }, { "epoch": 0.5993183769858308, "grad_norm": 0.298828125, "learning_rate": 0.0010601060031711319, "loss": 1.9762, "step": 22333 }, { "epoch": 0.5993452125375698, "grad_norm": 0.291015625, "learning_rate": 0.0010600879657050048, "loss": 1.9437, "step": 22334 }, { "epoch": 0.5993720480893087, "grad_norm": 0.279296875, "learning_rate": 0.0010600699272295803, "loss": 1.8833, "step": 22335 }, { "epoch": 0.5993988836410477, "grad_norm": 0.291015625, "learning_rate": 0.0010600518877448976, "loss": 1.9905, "step": 22336 }, { "epoch": 0.5994257191927866, "grad_norm": 0.291015625, "learning_rate": 0.0010600338472509965, "loss": 1.9318, "step": 22337 }, { "epoch": 0.5994525547445255, "grad_norm": 0.318359375, "learning_rate": 0.0010600158057479164, "loss": 2.0133, "step": 22338 }, { "epoch": 0.5994793902962645, "grad_norm": 0.345703125, "learning_rate": 0.001059997763235697, "loss": 2.1187, "step": 22339 }, { "epoch": 0.5995062258480034, "grad_norm": 0.330078125, "learning_rate": 0.001059979719714378, "loss": 2.2349, "step": 22340 }, { "epoch": 0.5995330613997424, "grad_norm": 0.296875, "learning_rate": 0.0010599616751839987, "loss": 1.9898, "step": 22341 }, { "epoch": 0.5995598969514814, "grad_norm": 0.3125, "learning_rate": 0.0010599436296445987, "loss": 2.0538, "step": 22342 }, { "epoch": 0.5995867325032203, "grad_norm": 0.30078125, "learning_rate": 0.0010599255830962178, "loss": 2.0829, "step": 22343 }, { "epoch": 0.5996135680549592, "grad_norm": 0.2890625, "learning_rate": 0.0010599075355388955, "loss": 1.9957, "step": 22344 }, { "epoch": 0.5996404036066981, "grad_norm": 0.283203125, "learning_rate": 0.0010598894869726712, "loss": 2.0087, "step": 22345 }, { "epoch": 0.5996672391584371, "grad_norm": 0.294921875, "learning_rate": 0.0010598714373975847, "loss": 2.1402, "step": 22346 }, { "epoch": 0.599694074710176, "grad_norm": 0.29296875, "learning_rate": 0.0010598533868136758, "loss": 1.9545, "step": 22347 }, { "epoch": 0.599720910261915, "grad_norm": 0.279296875, "learning_rate": 0.0010598353352209839, "loss": 2.0447, "step": 22348 }, { "epoch": 0.599747745813654, "grad_norm": 0.283203125, "learning_rate": 0.0010598172826195482, "loss": 1.9584, "step": 22349 }, { "epoch": 0.5997745813653929, "grad_norm": 0.287109375, "learning_rate": 0.0010597992290094088, "loss": 1.996, "step": 22350 }, { "epoch": 0.5998014169171318, "grad_norm": 0.28125, "learning_rate": 0.0010597811743906053, "loss": 2.0114, "step": 22351 }, { "epoch": 0.5998282524688707, "grad_norm": 0.287109375, "learning_rate": 0.0010597631187631773, "loss": 2.0638, "step": 22352 }, { "epoch": 0.5998550880206097, "grad_norm": 0.283203125, "learning_rate": 0.0010597450621271638, "loss": 1.974, "step": 22353 }, { "epoch": 0.5998819235723486, "grad_norm": 0.27734375, "learning_rate": 0.0010597270044826054, "loss": 2.0864, "step": 22354 }, { "epoch": 0.5999087591240876, "grad_norm": 0.2734375, "learning_rate": 0.0010597089458295407, "loss": 1.9204, "step": 22355 }, { "epoch": 0.5999355946758266, "grad_norm": 0.27734375, "learning_rate": 0.0010596908861680101, "loss": 1.9786, "step": 22356 }, { "epoch": 0.5999624302275655, "grad_norm": 0.27734375, "learning_rate": 0.001059672825498053, "loss": 1.9426, "step": 22357 }, { "epoch": 0.5999892657793044, "grad_norm": 0.291015625, "learning_rate": 0.001059654763819709, "loss": 2.0186, "step": 22358 }, { "epoch": 0.6000161013310433, "grad_norm": 0.279296875, "learning_rate": 0.0010596367011330174, "loss": 1.9794, "step": 22359 }, { "epoch": 0.6000429368827823, "grad_norm": 0.283203125, "learning_rate": 0.0010596186374380184, "loss": 2.0146, "step": 22360 }, { "epoch": 0.6000697724345213, "grad_norm": 0.275390625, "learning_rate": 0.001059600572734751, "loss": 1.9512, "step": 22361 }, { "epoch": 0.6000966079862602, "grad_norm": 0.28125, "learning_rate": 0.0010595825070232555, "loss": 1.9979, "step": 22362 }, { "epoch": 0.6001234435379992, "grad_norm": 0.2890625, "learning_rate": 0.0010595644403035712, "loss": 1.995, "step": 22363 }, { "epoch": 0.6001502790897381, "grad_norm": 0.283203125, "learning_rate": 0.0010595463725757374, "loss": 2.0296, "step": 22364 }, { "epoch": 0.600177114641477, "grad_norm": 0.275390625, "learning_rate": 0.0010595283038397944, "loss": 1.9355, "step": 22365 }, { "epoch": 0.6002039501932159, "grad_norm": 0.28125, "learning_rate": 0.0010595102340957812, "loss": 1.9815, "step": 22366 }, { "epoch": 0.6002307857449549, "grad_norm": 0.275390625, "learning_rate": 0.0010594921633437378, "loss": 1.9705, "step": 22367 }, { "epoch": 0.6002576212966939, "grad_norm": 0.27734375, "learning_rate": 0.001059474091583704, "loss": 1.9228, "step": 22368 }, { "epoch": 0.6002844568484328, "grad_norm": 0.28125, "learning_rate": 0.001059456018815719, "loss": 1.9541, "step": 22369 }, { "epoch": 0.6003112924001718, "grad_norm": 0.283203125, "learning_rate": 0.0010594379450398228, "loss": 2.0319, "step": 22370 }, { "epoch": 0.6003381279519107, "grad_norm": 0.283203125, "learning_rate": 0.001059419870256055, "loss": 1.9678, "step": 22371 }, { "epoch": 0.6003649635036497, "grad_norm": 0.2890625, "learning_rate": 0.001059401794464455, "loss": 2.0555, "step": 22372 }, { "epoch": 0.6003917990553885, "grad_norm": 0.279296875, "learning_rate": 0.0010593837176650625, "loss": 1.861, "step": 22373 }, { "epoch": 0.6004186346071275, "grad_norm": 0.2890625, "learning_rate": 0.0010593656398579173, "loss": 2.027, "step": 22374 }, { "epoch": 0.6004454701588665, "grad_norm": 0.28515625, "learning_rate": 0.0010593475610430592, "loss": 1.9911, "step": 22375 }, { "epoch": 0.6004723057106054, "grad_norm": 0.275390625, "learning_rate": 0.0010593294812205277, "loss": 1.9183, "step": 22376 }, { "epoch": 0.6004991412623444, "grad_norm": 0.28125, "learning_rate": 0.0010593114003903624, "loss": 1.9937, "step": 22377 }, { "epoch": 0.6005259768140833, "grad_norm": 0.275390625, "learning_rate": 0.001059293318552603, "loss": 2.0213, "step": 22378 }, { "epoch": 0.6005528123658223, "grad_norm": 0.2734375, "learning_rate": 0.0010592752357072891, "loss": 1.8968, "step": 22379 }, { "epoch": 0.6005796479175611, "grad_norm": 0.279296875, "learning_rate": 0.0010592571518544607, "loss": 2.0306, "step": 22380 }, { "epoch": 0.6006064834693001, "grad_norm": 0.2734375, "learning_rate": 0.0010592390669941572, "loss": 1.9357, "step": 22381 }, { "epoch": 0.6006333190210391, "grad_norm": 0.279296875, "learning_rate": 0.001059220981126418, "loss": 1.9157, "step": 22382 }, { "epoch": 0.600660154572778, "grad_norm": 0.294921875, "learning_rate": 0.0010592028942512835, "loss": 2.0758, "step": 22383 }, { "epoch": 0.600686990124517, "grad_norm": 0.283203125, "learning_rate": 0.0010591848063687926, "loss": 1.9266, "step": 22384 }, { "epoch": 0.6007138256762559, "grad_norm": 0.29296875, "learning_rate": 0.0010591667174789854, "loss": 2.0024, "step": 22385 }, { "epoch": 0.6007406612279949, "grad_norm": 0.298828125, "learning_rate": 0.0010591486275819016, "loss": 1.9673, "step": 22386 }, { "epoch": 0.6007674967797338, "grad_norm": 0.291015625, "learning_rate": 0.0010591305366775808, "loss": 2.0509, "step": 22387 }, { "epoch": 0.6007943323314727, "grad_norm": 0.291015625, "learning_rate": 0.0010591124447660624, "loss": 2.0053, "step": 22388 }, { "epoch": 0.6008211678832117, "grad_norm": 0.279296875, "learning_rate": 0.0010590943518473867, "loss": 1.8943, "step": 22389 }, { "epoch": 0.6008480034349506, "grad_norm": 0.279296875, "learning_rate": 0.0010590762579215929, "loss": 1.9185, "step": 22390 }, { "epoch": 0.6008748389866896, "grad_norm": 0.2734375, "learning_rate": 0.001059058162988721, "loss": 1.7634, "step": 22391 }, { "epoch": 0.6009016745384285, "grad_norm": 0.287109375, "learning_rate": 0.0010590400670488103, "loss": 1.9026, "step": 22392 }, { "epoch": 0.6009285100901675, "grad_norm": 0.27734375, "learning_rate": 0.001059021970101901, "loss": 1.9451, "step": 22393 }, { "epoch": 0.6009553456419064, "grad_norm": 0.279296875, "learning_rate": 0.0010590038721480324, "loss": 1.9113, "step": 22394 }, { "epoch": 0.6009821811936453, "grad_norm": 0.275390625, "learning_rate": 0.0010589857731872444, "loss": 1.859, "step": 22395 }, { "epoch": 0.6010090167453843, "grad_norm": 0.2734375, "learning_rate": 0.0010589676732195767, "loss": 1.8716, "step": 22396 }, { "epoch": 0.6010358522971232, "grad_norm": 0.279296875, "learning_rate": 0.0010589495722450686, "loss": 1.8608, "step": 22397 }, { "epoch": 0.6010626878488622, "grad_norm": 0.29296875, "learning_rate": 0.0010589314702637606, "loss": 2.0541, "step": 22398 }, { "epoch": 0.6010895234006011, "grad_norm": 0.283203125, "learning_rate": 0.0010589133672756917, "loss": 1.8785, "step": 22399 }, { "epoch": 0.6011163589523401, "grad_norm": 0.271484375, "learning_rate": 0.001058895263280902, "loss": 1.8878, "step": 22400 }, { "epoch": 0.601143194504079, "grad_norm": 0.283203125, "learning_rate": 0.0010588771582794312, "loss": 1.8947, "step": 22401 }, { "epoch": 0.6011700300558179, "grad_norm": 0.291015625, "learning_rate": 0.001058859052271319, "loss": 1.8859, "step": 22402 }, { "epoch": 0.6011968656075569, "grad_norm": 0.291015625, "learning_rate": 0.0010588409452566047, "loss": 1.8909, "step": 22403 }, { "epoch": 0.6012237011592958, "grad_norm": 0.28515625, "learning_rate": 0.0010588228372353287, "loss": 1.8834, "step": 22404 }, { "epoch": 0.6012505367110348, "grad_norm": 0.3046875, "learning_rate": 0.00105880472820753, "loss": 1.9312, "step": 22405 }, { "epoch": 0.6012773722627737, "grad_norm": 0.291015625, "learning_rate": 0.001058786618173249, "loss": 1.841, "step": 22406 }, { "epoch": 0.6013042078145127, "grad_norm": 0.29296875, "learning_rate": 0.001058768507132525, "loss": 1.9256, "step": 22407 }, { "epoch": 0.6013310433662516, "grad_norm": 0.30078125, "learning_rate": 0.001058750395085398, "loss": 1.9559, "step": 22408 }, { "epoch": 0.6013578789179905, "grad_norm": 0.333984375, "learning_rate": 0.0010587322820319075, "loss": 2.0465, "step": 22409 }, { "epoch": 0.6013847144697295, "grad_norm": 0.31640625, "learning_rate": 0.0010587141679720934, "loss": 1.9857, "step": 22410 }, { "epoch": 0.6014115500214684, "grad_norm": 0.2890625, "learning_rate": 0.0010586960529059954, "loss": 1.987, "step": 22411 }, { "epoch": 0.6014383855732074, "grad_norm": 0.330078125, "learning_rate": 0.0010586779368336532, "loss": 2.1786, "step": 22412 }, { "epoch": 0.6014652211249464, "grad_norm": 0.302734375, "learning_rate": 0.0010586598197551064, "loss": 2.0543, "step": 22413 }, { "epoch": 0.6014920566766853, "grad_norm": 0.302734375, "learning_rate": 0.001058641701670395, "loss": 2.0858, "step": 22414 }, { "epoch": 0.6015188922284243, "grad_norm": 0.283203125, "learning_rate": 0.0010586235825795585, "loss": 1.9585, "step": 22415 }, { "epoch": 0.6015457277801631, "grad_norm": 0.287109375, "learning_rate": 0.0010586054624826368, "loss": 2.0804, "step": 22416 }, { "epoch": 0.6015725633319021, "grad_norm": 0.275390625, "learning_rate": 0.00105858734137967, "loss": 1.9609, "step": 22417 }, { "epoch": 0.601599398883641, "grad_norm": 0.287109375, "learning_rate": 0.0010585692192706974, "loss": 1.9794, "step": 22418 }, { "epoch": 0.60162623443538, "grad_norm": 0.287109375, "learning_rate": 0.0010585510961557583, "loss": 1.9748, "step": 22419 }, { "epoch": 0.601653069987119, "grad_norm": 0.283203125, "learning_rate": 0.0010585329720348936, "loss": 2.039, "step": 22420 }, { "epoch": 0.6016799055388579, "grad_norm": 0.2890625, "learning_rate": 0.0010585148469081423, "loss": 1.9598, "step": 22421 }, { "epoch": 0.6017067410905969, "grad_norm": 0.28125, "learning_rate": 0.0010584967207755442, "loss": 1.9853, "step": 22422 }, { "epoch": 0.6017335766423357, "grad_norm": 0.279296875, "learning_rate": 0.0010584785936371393, "loss": 2.0406, "step": 22423 }, { "epoch": 0.6017604121940747, "grad_norm": 0.283203125, "learning_rate": 0.0010584604654929673, "loss": 2.0119, "step": 22424 }, { "epoch": 0.6017872477458136, "grad_norm": 0.2890625, "learning_rate": 0.0010584423363430677, "loss": 2.1331, "step": 22425 }, { "epoch": 0.6018140832975526, "grad_norm": 0.279296875, "learning_rate": 0.0010584242061874806, "loss": 1.9633, "step": 22426 }, { "epoch": 0.6018409188492916, "grad_norm": 0.29296875, "learning_rate": 0.0010584060750262459, "loss": 2.1062, "step": 22427 }, { "epoch": 0.6018677544010305, "grad_norm": 0.2890625, "learning_rate": 0.0010583879428594028, "loss": 2.0134, "step": 22428 }, { "epoch": 0.6018945899527695, "grad_norm": 0.275390625, "learning_rate": 0.0010583698096869915, "loss": 1.981, "step": 22429 }, { "epoch": 0.6019214255045083, "grad_norm": 0.28515625, "learning_rate": 0.0010583516755090518, "loss": 2.0521, "step": 22430 }, { "epoch": 0.6019482610562473, "grad_norm": 0.275390625, "learning_rate": 0.0010583335403256233, "loss": 1.965, "step": 22431 }, { "epoch": 0.6019750966079863, "grad_norm": 0.27734375, "learning_rate": 0.0010583154041367458, "loss": 1.8247, "step": 22432 }, { "epoch": 0.6020019321597252, "grad_norm": 0.275390625, "learning_rate": 0.0010582972669424592, "loss": 2.0, "step": 22433 }, { "epoch": 0.6020287677114642, "grad_norm": 0.271484375, "learning_rate": 0.0010582791287428032, "loss": 2.0427, "step": 22434 }, { "epoch": 0.6020556032632031, "grad_norm": 0.279296875, "learning_rate": 0.0010582609895378177, "loss": 2.0366, "step": 22435 }, { "epoch": 0.6020824388149421, "grad_norm": 0.2734375, "learning_rate": 0.0010582428493275424, "loss": 1.9919, "step": 22436 }, { "epoch": 0.6021092743666809, "grad_norm": 0.275390625, "learning_rate": 0.001058224708112017, "loss": 1.977, "step": 22437 }, { "epoch": 0.6021361099184199, "grad_norm": 0.28125, "learning_rate": 0.0010582065658912814, "loss": 1.9756, "step": 22438 }, { "epoch": 0.6021629454701589, "grad_norm": 0.2734375, "learning_rate": 0.0010581884226653754, "loss": 1.9763, "step": 22439 }, { "epoch": 0.6021897810218978, "grad_norm": 0.279296875, "learning_rate": 0.001058170278434339, "loss": 1.9708, "step": 22440 }, { "epoch": 0.6022166165736368, "grad_norm": 0.287109375, "learning_rate": 0.0010581521331982117, "loss": 1.9334, "step": 22441 }, { "epoch": 0.6022434521253757, "grad_norm": 0.279296875, "learning_rate": 0.0010581339869570334, "loss": 1.9328, "step": 22442 }, { "epoch": 0.6022702876771147, "grad_norm": 0.283203125, "learning_rate": 0.001058115839710844, "loss": 1.9724, "step": 22443 }, { "epoch": 0.6022971232288535, "grad_norm": 0.28125, "learning_rate": 0.001058097691459683, "loss": 1.9748, "step": 22444 }, { "epoch": 0.6023239587805925, "grad_norm": 0.267578125, "learning_rate": 0.0010580795422035906, "loss": 1.9099, "step": 22445 }, { "epoch": 0.6023507943323315, "grad_norm": 0.279296875, "learning_rate": 0.0010580613919426065, "loss": 1.9638, "step": 22446 }, { "epoch": 0.6023776298840704, "grad_norm": 0.2734375, "learning_rate": 0.0010580432406767704, "loss": 1.9238, "step": 22447 }, { "epoch": 0.6024044654358094, "grad_norm": 0.291015625, "learning_rate": 0.001058025088406122, "loss": 1.8916, "step": 22448 }, { "epoch": 0.6024313009875483, "grad_norm": 0.279296875, "learning_rate": 0.0010580069351307016, "loss": 1.9797, "step": 22449 }, { "epoch": 0.6024581365392873, "grad_norm": 0.279296875, "learning_rate": 0.0010579887808505484, "loss": 2.0121, "step": 22450 }, { "epoch": 0.6024849720910261, "grad_norm": 0.294921875, "learning_rate": 0.001057970625565703, "loss": 1.9981, "step": 22451 }, { "epoch": 0.6025118076427651, "grad_norm": 0.296875, "learning_rate": 0.0010579524692762045, "loss": 2.0283, "step": 22452 }, { "epoch": 0.6025386431945041, "grad_norm": 0.279296875, "learning_rate": 0.0010579343119820931, "loss": 1.955, "step": 22453 }, { "epoch": 0.602565478746243, "grad_norm": 0.279296875, "learning_rate": 0.0010579161536834086, "loss": 1.9009, "step": 22454 }, { "epoch": 0.602592314297982, "grad_norm": 0.2734375, "learning_rate": 0.0010578979943801907, "loss": 1.9086, "step": 22455 }, { "epoch": 0.6026191498497209, "grad_norm": 0.279296875, "learning_rate": 0.0010578798340724793, "loss": 2.0159, "step": 22456 }, { "epoch": 0.6026459854014599, "grad_norm": 0.28125, "learning_rate": 0.0010578616727603143, "loss": 1.936, "step": 22457 }, { "epoch": 0.6026728209531989, "grad_norm": 0.279296875, "learning_rate": 0.0010578435104437354, "loss": 1.9355, "step": 22458 }, { "epoch": 0.6026996565049377, "grad_norm": 0.2890625, "learning_rate": 0.0010578253471227826, "loss": 2.0341, "step": 22459 }, { "epoch": 0.6027264920566767, "grad_norm": 0.2890625, "learning_rate": 0.001057807182797496, "loss": 1.9529, "step": 22460 }, { "epoch": 0.6027533276084156, "grad_norm": 0.28125, "learning_rate": 0.0010577890174679144, "loss": 1.9949, "step": 22461 }, { "epoch": 0.6027801631601546, "grad_norm": 0.27734375, "learning_rate": 0.0010577708511340788, "loss": 1.898, "step": 22462 }, { "epoch": 0.6028069987118935, "grad_norm": 0.27734375, "learning_rate": 0.0010577526837960288, "loss": 1.9431, "step": 22463 }, { "epoch": 0.6028338342636325, "grad_norm": 0.287109375, "learning_rate": 0.0010577345154538038, "loss": 1.9207, "step": 22464 }, { "epoch": 0.6028606698153715, "grad_norm": 0.2890625, "learning_rate": 0.0010577163461074441, "loss": 1.9024, "step": 22465 }, { "epoch": 0.6028875053671103, "grad_norm": 0.2890625, "learning_rate": 0.0010576981757569894, "loss": 1.9662, "step": 22466 }, { "epoch": 0.6029143409188493, "grad_norm": 0.275390625, "learning_rate": 0.0010576800044024795, "loss": 1.7903, "step": 22467 }, { "epoch": 0.6029411764705882, "grad_norm": 0.28515625, "learning_rate": 0.0010576618320439545, "loss": 1.8392, "step": 22468 }, { "epoch": 0.6029680120223272, "grad_norm": 0.283203125, "learning_rate": 0.0010576436586814538, "loss": 1.8739, "step": 22469 }, { "epoch": 0.6029948475740661, "grad_norm": 0.279296875, "learning_rate": 0.0010576254843150175, "loss": 1.9301, "step": 22470 }, { "epoch": 0.6030216831258051, "grad_norm": 0.28125, "learning_rate": 0.001057607308944686, "loss": 1.9433, "step": 22471 }, { "epoch": 0.603048518677544, "grad_norm": 0.283203125, "learning_rate": 0.0010575891325704983, "loss": 1.9188, "step": 22472 }, { "epoch": 0.6030753542292829, "grad_norm": 0.275390625, "learning_rate": 0.0010575709551924946, "loss": 1.8884, "step": 22473 }, { "epoch": 0.6031021897810219, "grad_norm": 0.3046875, "learning_rate": 0.001057552776810715, "loss": 1.9227, "step": 22474 }, { "epoch": 0.6031290253327608, "grad_norm": 0.283203125, "learning_rate": 0.0010575345974251993, "loss": 1.9321, "step": 22475 }, { "epoch": 0.6031558608844998, "grad_norm": 0.28515625, "learning_rate": 0.0010575164170359872, "loss": 1.9053, "step": 22476 }, { "epoch": 0.6031826964362387, "grad_norm": 0.2890625, "learning_rate": 0.0010574982356431188, "loss": 2.026, "step": 22477 }, { "epoch": 0.6032095319879777, "grad_norm": 0.28515625, "learning_rate": 0.0010574800532466338, "loss": 1.8883, "step": 22478 }, { "epoch": 0.6032363675397167, "grad_norm": 0.322265625, "learning_rate": 0.001057461869846572, "loss": 2.0805, "step": 22479 }, { "epoch": 0.6032632030914555, "grad_norm": 0.302734375, "learning_rate": 0.0010574436854429736, "loss": 2.0378, "step": 22480 }, { "epoch": 0.6032900386431945, "grad_norm": 0.306640625, "learning_rate": 0.0010574255000358784, "loss": 2.0178, "step": 22481 }, { "epoch": 0.6033168741949334, "grad_norm": 0.30078125, "learning_rate": 0.001057407313625326, "loss": 2.1064, "step": 22482 }, { "epoch": 0.6033437097466724, "grad_norm": 0.294921875, "learning_rate": 0.0010573891262113568, "loss": 2.0055, "step": 22483 }, { "epoch": 0.6033705452984114, "grad_norm": 0.28515625, "learning_rate": 0.0010573709377940104, "loss": 1.9862, "step": 22484 }, { "epoch": 0.6033973808501503, "grad_norm": 0.279296875, "learning_rate": 0.0010573527483733265, "loss": 2.008, "step": 22485 }, { "epoch": 0.6034242164018893, "grad_norm": 0.298828125, "learning_rate": 0.0010573345579493454, "loss": 2.1313, "step": 22486 }, { "epoch": 0.6034510519536281, "grad_norm": 0.287109375, "learning_rate": 0.0010573163665221067, "loss": 2.1043, "step": 22487 }, { "epoch": 0.6034778875053671, "grad_norm": 0.28515625, "learning_rate": 0.0010572981740916505, "loss": 1.989, "step": 22488 }, { "epoch": 0.603504723057106, "grad_norm": 0.2734375, "learning_rate": 0.0010572799806580168, "loss": 2.0695, "step": 22489 }, { "epoch": 0.603531558608845, "grad_norm": 0.2734375, "learning_rate": 0.001057261786221245, "loss": 1.9807, "step": 22490 }, { "epoch": 0.603558394160584, "grad_norm": 0.28515625, "learning_rate": 0.0010572435907813757, "loss": 2.0148, "step": 22491 }, { "epoch": 0.6035852297123229, "grad_norm": 0.287109375, "learning_rate": 0.0010572253943384484, "loss": 2.0149, "step": 22492 }, { "epoch": 0.6036120652640619, "grad_norm": 0.271484375, "learning_rate": 0.0010572071968925028, "loss": 1.9036, "step": 22493 }, { "epoch": 0.6036389008158007, "grad_norm": 0.283203125, "learning_rate": 0.0010571889984435797, "loss": 2.0589, "step": 22494 }, { "epoch": 0.6036657363675397, "grad_norm": 0.275390625, "learning_rate": 0.0010571707989917181, "loss": 2.0084, "step": 22495 }, { "epoch": 0.6036925719192786, "grad_norm": 0.2890625, "learning_rate": 0.0010571525985369583, "loss": 2.0428, "step": 22496 }, { "epoch": 0.6037194074710176, "grad_norm": 0.275390625, "learning_rate": 0.00105713439707934, "loss": 1.9641, "step": 22497 }, { "epoch": 0.6037462430227566, "grad_norm": 0.279296875, "learning_rate": 0.0010571161946189038, "loss": 2.0591, "step": 22498 }, { "epoch": 0.6037730785744955, "grad_norm": 0.287109375, "learning_rate": 0.0010570979911556887, "loss": 2.052, "step": 22499 }, { "epoch": 0.6037999141262345, "grad_norm": 0.27734375, "learning_rate": 0.0010570797866897353, "loss": 1.9536, "step": 22500 }, { "epoch": 0.6038267496779733, "grad_norm": 0.271484375, "learning_rate": 0.0010570615812210834, "loss": 1.9265, "step": 22501 }, { "epoch": 0.6038535852297123, "grad_norm": 0.27734375, "learning_rate": 0.0010570433747497726, "loss": 2.0002, "step": 22502 }, { "epoch": 0.6038804207814513, "grad_norm": 0.28515625, "learning_rate": 0.0010570251672758432, "loss": 2.0816, "step": 22503 }, { "epoch": 0.6039072563331902, "grad_norm": 0.27734375, "learning_rate": 0.0010570069587993352, "loss": 1.9581, "step": 22504 }, { "epoch": 0.6039340918849292, "grad_norm": 0.279296875, "learning_rate": 0.0010569887493202884, "loss": 2.0439, "step": 22505 }, { "epoch": 0.6039609274366681, "grad_norm": 0.279296875, "learning_rate": 0.0010569705388387425, "loss": 1.8995, "step": 22506 }, { "epoch": 0.6039877629884071, "grad_norm": 0.271484375, "learning_rate": 0.0010569523273547378, "loss": 1.9035, "step": 22507 }, { "epoch": 0.6040145985401459, "grad_norm": 0.287109375, "learning_rate": 0.0010569341148683141, "loss": 1.9271, "step": 22508 }, { "epoch": 0.6040414340918849, "grad_norm": 0.275390625, "learning_rate": 0.0010569159013795112, "loss": 1.9068, "step": 22509 }, { "epoch": 0.6040682696436239, "grad_norm": 0.28515625, "learning_rate": 0.0010568976868883695, "loss": 2.0672, "step": 22510 }, { "epoch": 0.6040951051953628, "grad_norm": 0.27734375, "learning_rate": 0.0010568794713949287, "loss": 1.9225, "step": 22511 }, { "epoch": 0.6041219407471018, "grad_norm": 0.287109375, "learning_rate": 0.0010568612548992288, "loss": 2.0181, "step": 22512 }, { "epoch": 0.6041487762988407, "grad_norm": 0.2890625, "learning_rate": 0.0010568430374013097, "loss": 2.091, "step": 22513 }, { "epoch": 0.6041756118505797, "grad_norm": 0.275390625, "learning_rate": 0.0010568248189012112, "loss": 1.8803, "step": 22514 }, { "epoch": 0.6042024474023185, "grad_norm": 0.279296875, "learning_rate": 0.0010568065993989736, "loss": 1.9382, "step": 22515 }, { "epoch": 0.6042292829540575, "grad_norm": 0.275390625, "learning_rate": 0.0010567883788946367, "loss": 1.9326, "step": 22516 }, { "epoch": 0.6042561185057965, "grad_norm": 0.279296875, "learning_rate": 0.0010567701573882403, "loss": 1.8702, "step": 22517 }, { "epoch": 0.6042829540575354, "grad_norm": 0.28125, "learning_rate": 0.0010567519348798247, "loss": 1.9932, "step": 22518 }, { "epoch": 0.6043097896092744, "grad_norm": 0.279296875, "learning_rate": 0.0010567337113694297, "loss": 1.9275, "step": 22519 }, { "epoch": 0.6043366251610133, "grad_norm": 0.2734375, "learning_rate": 0.0010567154868570954, "loss": 1.8751, "step": 22520 }, { "epoch": 0.6043634607127523, "grad_norm": 0.279296875, "learning_rate": 0.0010566972613428615, "loss": 1.8597, "step": 22521 }, { "epoch": 0.6043902962644911, "grad_norm": 0.291015625, "learning_rate": 0.0010566790348267685, "loss": 1.9236, "step": 22522 }, { "epoch": 0.6044171318162301, "grad_norm": 0.27734375, "learning_rate": 0.0010566608073088557, "loss": 1.9267, "step": 22523 }, { "epoch": 0.6044439673679691, "grad_norm": 0.306640625, "learning_rate": 0.0010566425787891637, "loss": 2.055, "step": 22524 }, { "epoch": 0.604470802919708, "grad_norm": 0.275390625, "learning_rate": 0.0010566243492677323, "loss": 1.9148, "step": 22525 }, { "epoch": 0.604497638471447, "grad_norm": 0.283203125, "learning_rate": 0.001056606118744601, "loss": 1.9463, "step": 22526 }, { "epoch": 0.6045244740231859, "grad_norm": 0.279296875, "learning_rate": 0.0010565878872198105, "loss": 1.9261, "step": 22527 }, { "epoch": 0.6045513095749249, "grad_norm": 0.279296875, "learning_rate": 0.0010565696546934004, "loss": 1.9538, "step": 22528 }, { "epoch": 0.6045781451266639, "grad_norm": 0.2734375, "learning_rate": 0.001056551421165411, "loss": 1.8785, "step": 22529 }, { "epoch": 0.6046049806784027, "grad_norm": 0.28125, "learning_rate": 0.0010565331866358818, "loss": 2.0296, "step": 22530 }, { "epoch": 0.6046318162301417, "grad_norm": 0.2890625, "learning_rate": 0.0010565149511048534, "loss": 1.9332, "step": 22531 }, { "epoch": 0.6046586517818806, "grad_norm": 0.28125, "learning_rate": 0.0010564967145723653, "loss": 1.8687, "step": 22532 }, { "epoch": 0.6046854873336196, "grad_norm": 0.267578125, "learning_rate": 0.0010564784770384578, "loss": 1.8604, "step": 22533 }, { "epoch": 0.6047123228853585, "grad_norm": 0.27734375, "learning_rate": 0.001056460238503171, "loss": 1.9048, "step": 22534 }, { "epoch": 0.6047391584370975, "grad_norm": 0.271484375, "learning_rate": 0.0010564419989665442, "loss": 1.8271, "step": 22535 }, { "epoch": 0.6047659939888365, "grad_norm": 0.28515625, "learning_rate": 0.0010564237584286184, "loss": 1.8924, "step": 22536 }, { "epoch": 0.6047928295405753, "grad_norm": 0.2734375, "learning_rate": 0.001056405516889433, "loss": 1.8452, "step": 22537 }, { "epoch": 0.6048196650923143, "grad_norm": 0.28515625, "learning_rate": 0.0010563872743490283, "loss": 1.9845, "step": 22538 }, { "epoch": 0.6048465006440532, "grad_norm": 0.28515625, "learning_rate": 0.001056369030807444, "loss": 1.8585, "step": 22539 }, { "epoch": 0.6048733361957922, "grad_norm": 0.291015625, "learning_rate": 0.0010563507862647204, "loss": 2.036, "step": 22540 }, { "epoch": 0.6049001717475311, "grad_norm": 0.28515625, "learning_rate": 0.0010563325407208974, "loss": 1.8047, "step": 22541 }, { "epoch": 0.6049270072992701, "grad_norm": 0.29296875, "learning_rate": 0.0010563142941760152, "loss": 1.935, "step": 22542 }, { "epoch": 0.6049538428510091, "grad_norm": 0.283203125, "learning_rate": 0.0010562960466301136, "loss": 1.8899, "step": 22543 }, { "epoch": 0.6049806784027479, "grad_norm": 0.287109375, "learning_rate": 0.0010562777980832327, "loss": 1.9514, "step": 22544 }, { "epoch": 0.6050075139544869, "grad_norm": 0.27734375, "learning_rate": 0.0010562595485354126, "loss": 1.7928, "step": 22545 }, { "epoch": 0.6050343495062258, "grad_norm": 0.275390625, "learning_rate": 0.0010562412979866931, "loss": 1.8706, "step": 22546 }, { "epoch": 0.6050611850579648, "grad_norm": 0.314453125, "learning_rate": 0.0010562230464371148, "loss": 2.0541, "step": 22547 }, { "epoch": 0.6050880206097037, "grad_norm": 0.326171875, "learning_rate": 0.001056204793886717, "loss": 2.085, "step": 22548 }, { "epoch": 0.6051148561614427, "grad_norm": 0.298828125, "learning_rate": 0.0010561865403355402, "loss": 2.0353, "step": 22549 }, { "epoch": 0.6051416917131817, "grad_norm": 0.32421875, "learning_rate": 0.0010561682857836245, "loss": 2.1075, "step": 22550 }, { "epoch": 0.6051685272649205, "grad_norm": 0.28515625, "learning_rate": 0.0010561500302310096, "loss": 1.9452, "step": 22551 }, { "epoch": 0.6051953628166595, "grad_norm": 0.31640625, "learning_rate": 0.0010561317736777357, "loss": 2.1094, "step": 22552 }, { "epoch": 0.6052221983683984, "grad_norm": 0.2890625, "learning_rate": 0.001056113516123843, "loss": 1.9432, "step": 22553 }, { "epoch": 0.6052490339201374, "grad_norm": 0.28515625, "learning_rate": 0.0010560952575693714, "loss": 1.9821, "step": 22554 }, { "epoch": 0.6052758694718764, "grad_norm": 0.296875, "learning_rate": 0.001056076998014361, "loss": 2.1498, "step": 22555 }, { "epoch": 0.6053027050236153, "grad_norm": 0.283203125, "learning_rate": 0.001056058737458852, "loss": 2.0156, "step": 22556 }, { "epoch": 0.6053295405753543, "grad_norm": 0.28125, "learning_rate": 0.001056040475902884, "loss": 1.931, "step": 22557 }, { "epoch": 0.6053563761270931, "grad_norm": 0.298828125, "learning_rate": 0.0010560222133464976, "loss": 2.0356, "step": 22558 }, { "epoch": 0.6053832116788321, "grad_norm": 0.28515625, "learning_rate": 0.0010560039497897326, "loss": 1.9801, "step": 22559 }, { "epoch": 0.605410047230571, "grad_norm": 0.29296875, "learning_rate": 0.0010559856852326288, "loss": 2.1142, "step": 22560 }, { "epoch": 0.60543688278231, "grad_norm": 0.287109375, "learning_rate": 0.001055967419675227, "loss": 2.0195, "step": 22561 }, { "epoch": 0.605463718334049, "grad_norm": 0.283203125, "learning_rate": 0.0010559491531175666, "loss": 1.9726, "step": 22562 }, { "epoch": 0.6054905538857879, "grad_norm": 0.28515625, "learning_rate": 0.001055930885559688, "loss": 2.0435, "step": 22563 }, { "epoch": 0.6055173894375269, "grad_norm": 0.279296875, "learning_rate": 0.001055912617001631, "loss": 2.0235, "step": 22564 }, { "epoch": 0.6055442249892657, "grad_norm": 0.279296875, "learning_rate": 0.0010558943474434361, "loss": 2.1017, "step": 22565 }, { "epoch": 0.6055710605410047, "grad_norm": 0.2734375, "learning_rate": 0.0010558760768851429, "loss": 1.984, "step": 22566 }, { "epoch": 0.6055978960927436, "grad_norm": 0.26953125, "learning_rate": 0.0010558578053267917, "loss": 2.0085, "step": 22567 }, { "epoch": 0.6056247316444826, "grad_norm": 0.271484375, "learning_rate": 0.0010558395327684227, "loss": 1.9518, "step": 22568 }, { "epoch": 0.6056515671962216, "grad_norm": 0.271484375, "learning_rate": 0.001055821259210076, "loss": 1.9427, "step": 22569 }, { "epoch": 0.6056784027479605, "grad_norm": 0.28125, "learning_rate": 0.0010558029846517913, "loss": 2.039, "step": 22570 }, { "epoch": 0.6057052382996995, "grad_norm": 0.26953125, "learning_rate": 0.001055784709093609, "loss": 1.9411, "step": 22571 }, { "epoch": 0.6057320738514383, "grad_norm": 0.28125, "learning_rate": 0.0010557664325355693, "loss": 2.011, "step": 22572 }, { "epoch": 0.6057589094031773, "grad_norm": 0.27734375, "learning_rate": 0.0010557481549777119, "loss": 2.0184, "step": 22573 }, { "epoch": 0.6057857449549163, "grad_norm": 0.271484375, "learning_rate": 0.0010557298764200774, "loss": 1.9424, "step": 22574 }, { "epoch": 0.6058125805066552, "grad_norm": 0.283203125, "learning_rate": 0.0010557115968627055, "loss": 1.9655, "step": 22575 }, { "epoch": 0.6058394160583942, "grad_norm": 0.28125, "learning_rate": 0.0010556933163056363, "loss": 1.9716, "step": 22576 }, { "epoch": 0.6058662516101331, "grad_norm": 0.27734375, "learning_rate": 0.0010556750347489102, "loss": 1.9677, "step": 22577 }, { "epoch": 0.6058930871618721, "grad_norm": 0.27734375, "learning_rate": 0.001055656752192567, "loss": 2.0208, "step": 22578 }, { "epoch": 0.605919922713611, "grad_norm": 0.263671875, "learning_rate": 0.001055638468636647, "loss": 1.8967, "step": 22579 }, { "epoch": 0.6059467582653499, "grad_norm": 0.287109375, "learning_rate": 0.0010556201840811906, "loss": 2.0581, "step": 22580 }, { "epoch": 0.6059735938170889, "grad_norm": 0.279296875, "learning_rate": 0.0010556018985262371, "loss": 1.9984, "step": 22581 }, { "epoch": 0.6060004293688278, "grad_norm": 0.27734375, "learning_rate": 0.0010555836119718273, "loss": 2.0036, "step": 22582 }, { "epoch": 0.6060272649205668, "grad_norm": 0.29296875, "learning_rate": 0.001055565324418001, "loss": 2.0207, "step": 22583 }, { "epoch": 0.6060541004723057, "grad_norm": 0.279296875, "learning_rate": 0.0010555470358647983, "loss": 1.9883, "step": 22584 }, { "epoch": 0.6060809360240447, "grad_norm": 0.279296875, "learning_rate": 0.0010555287463122596, "loss": 1.9705, "step": 22585 }, { "epoch": 0.6061077715757835, "grad_norm": 0.28515625, "learning_rate": 0.0010555104557604248, "loss": 2.007, "step": 22586 }, { "epoch": 0.6061346071275225, "grad_norm": 0.28125, "learning_rate": 0.001055492164209334, "loss": 2.0194, "step": 22587 }, { "epoch": 0.6061614426792615, "grad_norm": 0.27734375, "learning_rate": 0.0010554738716590276, "loss": 2.0376, "step": 22588 }, { "epoch": 0.6061882782310004, "grad_norm": 0.28125, "learning_rate": 0.0010554555781095453, "loss": 1.977, "step": 22589 }, { "epoch": 0.6062151137827394, "grad_norm": 0.283203125, "learning_rate": 0.0010554372835609276, "loss": 1.9891, "step": 22590 }, { "epoch": 0.6062419493344783, "grad_norm": 0.294921875, "learning_rate": 0.0010554189880132145, "loss": 1.9582, "step": 22591 }, { "epoch": 0.6062687848862173, "grad_norm": 0.27734375, "learning_rate": 0.0010554006914664461, "loss": 1.9657, "step": 22592 }, { "epoch": 0.6062956204379562, "grad_norm": 0.28125, "learning_rate": 0.0010553823939206625, "loss": 2.034, "step": 22593 }, { "epoch": 0.6063224559896951, "grad_norm": 0.283203125, "learning_rate": 0.0010553640953759038, "loss": 1.9683, "step": 22594 }, { "epoch": 0.6063492915414341, "grad_norm": 0.275390625, "learning_rate": 0.0010553457958322103, "loss": 1.9268, "step": 22595 }, { "epoch": 0.606376127093173, "grad_norm": 0.275390625, "learning_rate": 0.0010553274952896225, "loss": 1.8846, "step": 22596 }, { "epoch": 0.606402962644912, "grad_norm": 0.27734375, "learning_rate": 0.0010553091937481798, "loss": 1.9612, "step": 22597 }, { "epoch": 0.6064297981966509, "grad_norm": 0.26953125, "learning_rate": 0.0010552908912079225, "loss": 1.874, "step": 22598 }, { "epoch": 0.6064566337483899, "grad_norm": 0.287109375, "learning_rate": 0.0010552725876688913, "loss": 1.9031, "step": 22599 }, { "epoch": 0.6064834693001289, "grad_norm": 0.28125, "learning_rate": 0.0010552542831311258, "loss": 1.8981, "step": 22600 }, { "epoch": 0.6065103048518677, "grad_norm": 0.283203125, "learning_rate": 0.0010552359775946663, "loss": 1.8482, "step": 22601 }, { "epoch": 0.6065371404036067, "grad_norm": 0.279296875, "learning_rate": 0.0010552176710595532, "loss": 1.9105, "step": 22602 }, { "epoch": 0.6065639759553456, "grad_norm": 0.291015625, "learning_rate": 0.0010551993635258261, "loss": 1.8873, "step": 22603 }, { "epoch": 0.6065908115070846, "grad_norm": 0.2890625, "learning_rate": 0.0010551810549935257, "loss": 1.9271, "step": 22604 }, { "epoch": 0.6066176470588235, "grad_norm": 0.279296875, "learning_rate": 0.001055162745462692, "loss": 1.9586, "step": 22605 }, { "epoch": 0.6066444826105625, "grad_norm": 0.28125, "learning_rate": 0.0010551444349333652, "loss": 1.8601, "step": 22606 }, { "epoch": 0.6066713181623015, "grad_norm": 0.271484375, "learning_rate": 0.0010551261234055854, "loss": 1.8188, "step": 22607 }, { "epoch": 0.6066981537140403, "grad_norm": 0.2734375, "learning_rate": 0.0010551078108793927, "loss": 1.9334, "step": 22608 }, { "epoch": 0.6067249892657793, "grad_norm": 0.26953125, "learning_rate": 0.0010550894973548276, "loss": 1.8898, "step": 22609 }, { "epoch": 0.6067518248175182, "grad_norm": 0.2890625, "learning_rate": 0.0010550711828319296, "loss": 1.8866, "step": 22610 }, { "epoch": 0.6067786603692572, "grad_norm": 0.28125, "learning_rate": 0.0010550528673107396, "loss": 1.8883, "step": 22611 }, { "epoch": 0.6068054959209961, "grad_norm": 0.294921875, "learning_rate": 0.0010550345507912974, "loss": 1.9081, "step": 22612 }, { "epoch": 0.6068323314727351, "grad_norm": 0.283203125, "learning_rate": 0.0010550162332736432, "loss": 1.8689, "step": 22613 }, { "epoch": 0.6068591670244741, "grad_norm": 0.30859375, "learning_rate": 0.0010549979147578175, "loss": 2.0231, "step": 22614 }, { "epoch": 0.606886002576213, "grad_norm": 0.31640625, "learning_rate": 0.0010549795952438602, "loss": 1.9809, "step": 22615 }, { "epoch": 0.6069128381279519, "grad_norm": 0.30859375, "learning_rate": 0.0010549612747318114, "loss": 1.9504, "step": 22616 }, { "epoch": 0.6069396736796908, "grad_norm": 0.322265625, "learning_rate": 0.0010549429532217115, "loss": 2.1351, "step": 22617 }, { "epoch": 0.6069665092314298, "grad_norm": 0.296875, "learning_rate": 0.0010549246307136005, "loss": 2.0845, "step": 22618 }, { "epoch": 0.6069933447831687, "grad_norm": 0.298828125, "learning_rate": 0.0010549063072075187, "loss": 2.095, "step": 22619 }, { "epoch": 0.6070201803349077, "grad_norm": 0.302734375, "learning_rate": 0.0010548879827035065, "loss": 2.1361, "step": 22620 }, { "epoch": 0.6070470158866467, "grad_norm": 0.2734375, "learning_rate": 0.0010548696572016036, "loss": 1.9804, "step": 22621 }, { "epoch": 0.6070738514383855, "grad_norm": 0.291015625, "learning_rate": 0.0010548513307018508, "loss": 2.1274, "step": 22622 }, { "epoch": 0.6071006869901245, "grad_norm": 0.28515625, "learning_rate": 0.0010548330032042878, "loss": 2.1067, "step": 22623 }, { "epoch": 0.6071275225418634, "grad_norm": 0.28515625, "learning_rate": 0.001054814674708955, "loss": 1.9383, "step": 22624 }, { "epoch": 0.6071543580936024, "grad_norm": 0.283203125, "learning_rate": 0.0010547963452158925, "loss": 2.0947, "step": 22625 }, { "epoch": 0.6071811936453414, "grad_norm": 0.2890625, "learning_rate": 0.001054778014725141, "loss": 1.9982, "step": 22626 }, { "epoch": 0.6072080291970803, "grad_norm": 0.271484375, "learning_rate": 0.00105475968323674, "loss": 1.9055, "step": 22627 }, { "epoch": 0.6072348647488193, "grad_norm": 0.28125, "learning_rate": 0.0010547413507507302, "loss": 2.0899, "step": 22628 }, { "epoch": 0.6072617003005581, "grad_norm": 0.27734375, "learning_rate": 0.0010547230172671515, "loss": 2.1039, "step": 22629 }, { "epoch": 0.6072885358522971, "grad_norm": 0.29296875, "learning_rate": 0.0010547046827860446, "loss": 2.0257, "step": 22630 }, { "epoch": 0.607315371404036, "grad_norm": 0.279296875, "learning_rate": 0.001054686347307449, "loss": 1.9364, "step": 22631 }, { "epoch": 0.607342206955775, "grad_norm": 0.287109375, "learning_rate": 0.0010546680108314056, "loss": 2.0455, "step": 22632 }, { "epoch": 0.607369042507514, "grad_norm": 0.271484375, "learning_rate": 0.0010546496733579544, "loss": 2.077, "step": 22633 }, { "epoch": 0.6073958780592529, "grad_norm": 0.275390625, "learning_rate": 0.0010546313348871353, "loss": 2.0273, "step": 22634 }, { "epoch": 0.6074227136109919, "grad_norm": 0.28125, "learning_rate": 0.001054612995418989, "loss": 2.0237, "step": 22635 }, { "epoch": 0.6074495491627308, "grad_norm": 0.310546875, "learning_rate": 0.0010545946549535556, "loss": 2.0325, "step": 22636 }, { "epoch": 0.6074763847144697, "grad_norm": 0.275390625, "learning_rate": 0.001054576313490875, "loss": 1.9767, "step": 22637 }, { "epoch": 0.6075032202662086, "grad_norm": 0.279296875, "learning_rate": 0.0010545579710309878, "loss": 2.0729, "step": 22638 }, { "epoch": 0.6075300558179476, "grad_norm": 0.26953125, "learning_rate": 0.0010545396275739344, "loss": 1.9806, "step": 22639 }, { "epoch": 0.6075568913696866, "grad_norm": 0.26953125, "learning_rate": 0.0010545212831197545, "loss": 1.953, "step": 22640 }, { "epoch": 0.6075837269214255, "grad_norm": 0.287109375, "learning_rate": 0.0010545029376684888, "loss": 1.9461, "step": 22641 }, { "epoch": 0.6076105624731645, "grad_norm": 0.2734375, "learning_rate": 0.0010544845912201773, "loss": 2.0072, "step": 22642 }, { "epoch": 0.6076373980249034, "grad_norm": 0.279296875, "learning_rate": 0.0010544662437748603, "loss": 1.9477, "step": 22643 }, { "epoch": 0.6076642335766423, "grad_norm": 0.27734375, "learning_rate": 0.001054447895332578, "loss": 2.0275, "step": 22644 }, { "epoch": 0.6076910691283813, "grad_norm": 0.283203125, "learning_rate": 0.0010544295458933708, "loss": 1.9433, "step": 22645 }, { "epoch": 0.6077179046801202, "grad_norm": 0.271484375, "learning_rate": 0.0010544111954572792, "loss": 1.9074, "step": 22646 }, { "epoch": 0.6077447402318592, "grad_norm": 0.283203125, "learning_rate": 0.0010543928440243425, "loss": 2.0837, "step": 22647 }, { "epoch": 0.6077715757835981, "grad_norm": 0.287109375, "learning_rate": 0.001054374491594602, "loss": 2.0506, "step": 22648 }, { "epoch": 0.6077984113353371, "grad_norm": 0.279296875, "learning_rate": 0.0010543561381680974, "loss": 1.9957, "step": 22649 }, { "epoch": 0.607825246887076, "grad_norm": 0.27734375, "learning_rate": 0.0010543377837448692, "loss": 1.9639, "step": 22650 }, { "epoch": 0.607852082438815, "grad_norm": 0.28515625, "learning_rate": 0.0010543194283249575, "loss": 2.0093, "step": 22651 }, { "epoch": 0.6078789179905539, "grad_norm": 0.26953125, "learning_rate": 0.0010543010719084027, "loss": 1.9465, "step": 22652 }, { "epoch": 0.6079057535422928, "grad_norm": 0.2734375, "learning_rate": 0.0010542827144952448, "loss": 1.9485, "step": 22653 }, { "epoch": 0.6079325890940318, "grad_norm": 0.287109375, "learning_rate": 0.0010542643560855246, "loss": 2.0492, "step": 22654 }, { "epoch": 0.6079594246457707, "grad_norm": 0.271484375, "learning_rate": 0.0010542459966792817, "loss": 1.9621, "step": 22655 }, { "epoch": 0.6079862601975097, "grad_norm": 0.271484375, "learning_rate": 0.001054227636276557, "loss": 1.9433, "step": 22656 }, { "epoch": 0.6080130957492486, "grad_norm": 0.263671875, "learning_rate": 0.0010542092748773903, "loss": 1.9071, "step": 22657 }, { "epoch": 0.6080399313009875, "grad_norm": 0.27734375, "learning_rate": 0.0010541909124818223, "loss": 1.9407, "step": 22658 }, { "epoch": 0.6080667668527265, "grad_norm": 0.267578125, "learning_rate": 0.001054172549089893, "loss": 1.8101, "step": 22659 }, { "epoch": 0.6080936024044654, "grad_norm": 0.275390625, "learning_rate": 0.0010541541847016427, "loss": 1.9691, "step": 22660 }, { "epoch": 0.6081204379562044, "grad_norm": 0.275390625, "learning_rate": 0.0010541358193171116, "loss": 1.9798, "step": 22661 }, { "epoch": 0.6081472735079433, "grad_norm": 0.2734375, "learning_rate": 0.0010541174529363404, "loss": 1.9166, "step": 22662 }, { "epoch": 0.6081741090596823, "grad_norm": 0.283203125, "learning_rate": 0.0010540990855593688, "loss": 1.911, "step": 22663 }, { "epoch": 0.6082009446114212, "grad_norm": 0.271484375, "learning_rate": 0.0010540807171862377, "loss": 1.8158, "step": 22664 }, { "epoch": 0.6082277801631601, "grad_norm": 0.283203125, "learning_rate": 0.0010540623478169867, "loss": 1.9092, "step": 22665 }, { "epoch": 0.6082546157148991, "grad_norm": 0.26953125, "learning_rate": 0.0010540439774516569, "loss": 1.8942, "step": 22666 }, { "epoch": 0.608281451266638, "grad_norm": 0.283203125, "learning_rate": 0.001054025606090288, "loss": 1.9581, "step": 22667 }, { "epoch": 0.608308286818377, "grad_norm": 0.283203125, "learning_rate": 0.0010540072337329206, "loss": 1.9198, "step": 22668 }, { "epoch": 0.6083351223701159, "grad_norm": 0.283203125, "learning_rate": 0.0010539888603795949, "loss": 2.0004, "step": 22669 }, { "epoch": 0.6083619579218549, "grad_norm": 0.279296875, "learning_rate": 0.0010539704860303512, "loss": 1.9648, "step": 22670 }, { "epoch": 0.6083887934735939, "grad_norm": 0.28125, "learning_rate": 0.0010539521106852298, "loss": 2.0038, "step": 22671 }, { "epoch": 0.6084156290253327, "grad_norm": 0.2890625, "learning_rate": 0.001053933734344271, "loss": 1.9055, "step": 22672 }, { "epoch": 0.6084424645770717, "grad_norm": 0.287109375, "learning_rate": 0.0010539153570075152, "loss": 1.8545, "step": 22673 }, { "epoch": 0.6084693001288106, "grad_norm": 0.27734375, "learning_rate": 0.0010538969786750025, "loss": 1.8253, "step": 22674 }, { "epoch": 0.6084961356805496, "grad_norm": 0.2734375, "learning_rate": 0.0010538785993467734, "loss": 1.8696, "step": 22675 }, { "epoch": 0.6085229712322885, "grad_norm": 0.28515625, "learning_rate": 0.0010538602190228682, "loss": 1.9054, "step": 22676 }, { "epoch": 0.6085498067840275, "grad_norm": 0.287109375, "learning_rate": 0.0010538418377033273, "loss": 1.9011, "step": 22677 }, { "epoch": 0.6085766423357665, "grad_norm": 0.279296875, "learning_rate": 0.0010538234553881909, "loss": 1.8557, "step": 22678 }, { "epoch": 0.6086034778875054, "grad_norm": 0.2890625, "learning_rate": 0.0010538050720774995, "loss": 1.9676, "step": 22679 }, { "epoch": 0.6086303134392443, "grad_norm": 0.30859375, "learning_rate": 0.001053786687771293, "loss": 2.0596, "step": 22680 }, { "epoch": 0.6086571489909832, "grad_norm": 0.326171875, "learning_rate": 0.0010537683024696121, "loss": 2.0417, "step": 22681 }, { "epoch": 0.6086839845427222, "grad_norm": 0.30859375, "learning_rate": 0.0010537499161724971, "loss": 2.0824, "step": 22682 }, { "epoch": 0.6087108200944611, "grad_norm": 0.298828125, "learning_rate": 0.0010537315288799882, "loss": 2.1957, "step": 22683 }, { "epoch": 0.6087376556462001, "grad_norm": 0.30078125, "learning_rate": 0.0010537131405921258, "loss": 2.1232, "step": 22684 }, { "epoch": 0.6087644911979391, "grad_norm": 0.298828125, "learning_rate": 0.0010536947513089504, "loss": 2.1398, "step": 22685 }, { "epoch": 0.608791326749678, "grad_norm": 0.29296875, "learning_rate": 0.001053676361030502, "loss": 2.0754, "step": 22686 }, { "epoch": 0.6088181623014169, "grad_norm": 0.275390625, "learning_rate": 0.0010536579697568214, "loss": 2.0084, "step": 22687 }, { "epoch": 0.6088449978531558, "grad_norm": 0.279296875, "learning_rate": 0.0010536395774879486, "loss": 1.951, "step": 22688 }, { "epoch": 0.6088718334048948, "grad_norm": 0.279296875, "learning_rate": 0.001053621184223924, "loss": 2.0795, "step": 22689 }, { "epoch": 0.6088986689566337, "grad_norm": 0.279296875, "learning_rate": 0.0010536027899647879, "loss": 2.0048, "step": 22690 }, { "epoch": 0.6089255045083727, "grad_norm": 0.29296875, "learning_rate": 0.0010535843947105806, "loss": 2.0867, "step": 22691 }, { "epoch": 0.6089523400601117, "grad_norm": 0.2734375, "learning_rate": 0.001053565998461343, "loss": 2.045, "step": 22692 }, { "epoch": 0.6089791756118506, "grad_norm": 0.275390625, "learning_rate": 0.0010535476012171147, "loss": 2.0947, "step": 22693 }, { "epoch": 0.6090060111635895, "grad_norm": 0.3046875, "learning_rate": 0.0010535292029779364, "loss": 2.125, "step": 22694 }, { "epoch": 0.6090328467153284, "grad_norm": 0.279296875, "learning_rate": 0.0010535108037438485, "loss": 1.9756, "step": 22695 }, { "epoch": 0.6090596822670674, "grad_norm": 0.2734375, "learning_rate": 0.0010534924035148915, "loss": 2.0672, "step": 22696 }, { "epoch": 0.6090865178188064, "grad_norm": 0.28125, "learning_rate": 0.0010534740022911054, "loss": 2.0694, "step": 22697 }, { "epoch": 0.6091133533705453, "grad_norm": 0.271484375, "learning_rate": 0.001053455600072531, "loss": 2.0024, "step": 22698 }, { "epoch": 0.6091401889222843, "grad_norm": 0.28125, "learning_rate": 0.0010534371968592081, "loss": 2.0765, "step": 22699 }, { "epoch": 0.6091670244740232, "grad_norm": 0.275390625, "learning_rate": 0.0010534187926511773, "loss": 2.0276, "step": 22700 }, { "epoch": 0.6091938600257621, "grad_norm": 0.279296875, "learning_rate": 0.0010534003874484795, "loss": 1.952, "step": 22701 }, { "epoch": 0.609220695577501, "grad_norm": 0.283203125, "learning_rate": 0.0010533819812511545, "loss": 2.0152, "step": 22702 }, { "epoch": 0.60924753112924, "grad_norm": 0.279296875, "learning_rate": 0.0010533635740592427, "loss": 2.0482, "step": 22703 }, { "epoch": 0.609274366680979, "grad_norm": 0.271484375, "learning_rate": 0.0010533451658727847, "loss": 2.0205, "step": 22704 }, { "epoch": 0.6093012022327179, "grad_norm": 0.26171875, "learning_rate": 0.0010533267566918208, "loss": 1.8257, "step": 22705 }, { "epoch": 0.6093280377844569, "grad_norm": 0.271484375, "learning_rate": 0.0010533083465163913, "loss": 2.0041, "step": 22706 }, { "epoch": 0.6093548733361958, "grad_norm": 0.271484375, "learning_rate": 0.0010532899353465366, "loss": 1.9989, "step": 22707 }, { "epoch": 0.6093817088879347, "grad_norm": 0.267578125, "learning_rate": 0.0010532715231822973, "loss": 1.935, "step": 22708 }, { "epoch": 0.6094085444396736, "grad_norm": 0.26953125, "learning_rate": 0.0010532531100237133, "loss": 1.9168, "step": 22709 }, { "epoch": 0.6094353799914126, "grad_norm": 0.27734375, "learning_rate": 0.0010532346958708257, "loss": 1.9657, "step": 22710 }, { "epoch": 0.6094622155431516, "grad_norm": 0.27734375, "learning_rate": 0.0010532162807236744, "loss": 1.9987, "step": 22711 }, { "epoch": 0.6094890510948905, "grad_norm": 0.28125, "learning_rate": 0.0010531978645823, "loss": 1.9474, "step": 22712 }, { "epoch": 0.6095158866466295, "grad_norm": 0.28515625, "learning_rate": 0.0010531794474467426, "loss": 2.0765, "step": 22713 }, { "epoch": 0.6095427221983684, "grad_norm": 0.2890625, "learning_rate": 0.0010531610293170429, "loss": 1.9065, "step": 22714 }, { "epoch": 0.6095695577501073, "grad_norm": 0.279296875, "learning_rate": 0.0010531426101932412, "loss": 2.0106, "step": 22715 }, { "epoch": 0.6095963933018463, "grad_norm": 0.275390625, "learning_rate": 0.0010531241900753783, "loss": 1.923, "step": 22716 }, { "epoch": 0.6096232288535852, "grad_norm": 0.28515625, "learning_rate": 0.0010531057689634937, "loss": 2.0395, "step": 22717 }, { "epoch": 0.6096500644053242, "grad_norm": 0.2734375, "learning_rate": 0.0010530873468576286, "loss": 2.0265, "step": 22718 }, { "epoch": 0.6096768999570631, "grad_norm": 0.265625, "learning_rate": 0.0010530689237578232, "loss": 1.886, "step": 22719 }, { "epoch": 0.6097037355088021, "grad_norm": 0.28125, "learning_rate": 0.0010530504996641176, "loss": 1.9643, "step": 22720 }, { "epoch": 0.609730571060541, "grad_norm": 0.279296875, "learning_rate": 0.0010530320745765528, "loss": 1.9886, "step": 22721 }, { "epoch": 0.60975740661228, "grad_norm": 0.279296875, "learning_rate": 0.001053013648495169, "loss": 1.8796, "step": 22722 }, { "epoch": 0.6097842421640189, "grad_norm": 0.275390625, "learning_rate": 0.0010529952214200062, "loss": 1.9869, "step": 22723 }, { "epoch": 0.6098110777157578, "grad_norm": 0.283203125, "learning_rate": 0.0010529767933511055, "loss": 1.9993, "step": 22724 }, { "epoch": 0.6098379132674968, "grad_norm": 0.279296875, "learning_rate": 0.0010529583642885066, "loss": 1.9627, "step": 22725 }, { "epoch": 0.6098647488192357, "grad_norm": 0.2734375, "learning_rate": 0.0010529399342322505, "loss": 1.9293, "step": 22726 }, { "epoch": 0.6098915843709747, "grad_norm": 0.291015625, "learning_rate": 0.0010529215031823776, "loss": 2.0122, "step": 22727 }, { "epoch": 0.6099184199227136, "grad_norm": 0.28125, "learning_rate": 0.0010529030711389279, "loss": 1.9517, "step": 22728 }, { "epoch": 0.6099452554744526, "grad_norm": 0.279296875, "learning_rate": 0.0010528846381019424, "loss": 1.9182, "step": 22729 }, { "epoch": 0.6099720910261915, "grad_norm": 0.2734375, "learning_rate": 0.001052866204071461, "loss": 1.9186, "step": 22730 }, { "epoch": 0.6099989265779304, "grad_norm": 0.2734375, "learning_rate": 0.0010528477690475244, "loss": 1.9383, "step": 22731 }, { "epoch": 0.6100257621296694, "grad_norm": 0.283203125, "learning_rate": 0.001052829333030173, "loss": 1.9102, "step": 22732 }, { "epoch": 0.6100525976814083, "grad_norm": 0.26953125, "learning_rate": 0.0010528108960194476, "loss": 1.9372, "step": 22733 }, { "epoch": 0.6100794332331473, "grad_norm": 0.275390625, "learning_rate": 0.0010527924580153879, "loss": 1.8598, "step": 22734 }, { "epoch": 0.6101062687848862, "grad_norm": 0.283203125, "learning_rate": 0.001052774019018035, "loss": 2.0449, "step": 22735 }, { "epoch": 0.6101331043366252, "grad_norm": 0.275390625, "learning_rate": 0.0010527555790274291, "loss": 1.8505, "step": 22736 }, { "epoch": 0.6101599398883641, "grad_norm": 0.279296875, "learning_rate": 0.0010527371380436106, "loss": 1.9244, "step": 22737 }, { "epoch": 0.610186775440103, "grad_norm": 0.279296875, "learning_rate": 0.00105271869606662, "loss": 1.8689, "step": 22738 }, { "epoch": 0.610213610991842, "grad_norm": 0.28125, "learning_rate": 0.0010527002530964977, "loss": 1.9972, "step": 22739 }, { "epoch": 0.6102404465435809, "grad_norm": 0.28125, "learning_rate": 0.0010526818091332844, "loss": 1.8668, "step": 22740 }, { "epoch": 0.6102672820953199, "grad_norm": 0.287109375, "learning_rate": 0.0010526633641770203, "loss": 1.9529, "step": 22741 }, { "epoch": 0.6102941176470589, "grad_norm": 0.29296875, "learning_rate": 0.001052644918227746, "loss": 2.029, "step": 22742 }, { "epoch": 0.6103209531987978, "grad_norm": 0.28125, "learning_rate": 0.0010526264712855018, "loss": 1.9343, "step": 22743 }, { "epoch": 0.6103477887505367, "grad_norm": 0.291015625, "learning_rate": 0.0010526080233503284, "loss": 2.0438, "step": 22744 }, { "epoch": 0.6103746243022756, "grad_norm": 0.30078125, "learning_rate": 0.0010525895744222663, "loss": 1.9303, "step": 22745 }, { "epoch": 0.6104014598540146, "grad_norm": 0.3203125, "learning_rate": 0.0010525711245013554, "loss": 2.1095, "step": 22746 }, { "epoch": 0.6104282954057535, "grad_norm": 0.3046875, "learning_rate": 0.001052552673587637, "loss": 2.0394, "step": 22747 }, { "epoch": 0.6104551309574925, "grad_norm": 0.2890625, "learning_rate": 0.001052534221681151, "loss": 2.1081, "step": 22748 }, { "epoch": 0.6104819665092315, "grad_norm": 0.29296875, "learning_rate": 0.001052515768781938, "loss": 2.0426, "step": 22749 }, { "epoch": 0.6105088020609704, "grad_norm": 0.30078125, "learning_rate": 0.0010524973148900386, "loss": 2.0691, "step": 22750 }, { "epoch": 0.6105356376127093, "grad_norm": 0.275390625, "learning_rate": 0.0010524788600054934, "loss": 2.0031, "step": 22751 }, { "epoch": 0.6105624731644482, "grad_norm": 0.2890625, "learning_rate": 0.0010524604041283424, "loss": 2.17, "step": 22752 }, { "epoch": 0.6105893087161872, "grad_norm": 0.27734375, "learning_rate": 0.0010524419472586265, "loss": 2.0018, "step": 22753 }, { "epoch": 0.6106161442679261, "grad_norm": 0.271484375, "learning_rate": 0.0010524234893963863, "loss": 2.029, "step": 22754 }, { "epoch": 0.6106429798196651, "grad_norm": 0.26953125, "learning_rate": 0.0010524050305416618, "loss": 1.9723, "step": 22755 }, { "epoch": 0.6106698153714041, "grad_norm": 0.283203125, "learning_rate": 0.0010523865706944938, "loss": 2.0514, "step": 22756 }, { "epoch": 0.610696650923143, "grad_norm": 0.279296875, "learning_rate": 0.0010523681098549228, "loss": 2.0584, "step": 22757 }, { "epoch": 0.610723486474882, "grad_norm": 0.28125, "learning_rate": 0.0010523496480229894, "loss": 1.9975, "step": 22758 }, { "epoch": 0.6107503220266208, "grad_norm": 0.291015625, "learning_rate": 0.0010523311851987336, "loss": 2.1234, "step": 22759 }, { "epoch": 0.6107771575783598, "grad_norm": 0.28515625, "learning_rate": 0.0010523127213821965, "loss": 2.0505, "step": 22760 }, { "epoch": 0.6108039931300988, "grad_norm": 0.283203125, "learning_rate": 0.0010522942565734184, "loss": 2.0031, "step": 22761 }, { "epoch": 0.6108308286818377, "grad_norm": 0.294921875, "learning_rate": 0.0010522757907724398, "loss": 2.1464, "step": 22762 }, { "epoch": 0.6108576642335767, "grad_norm": 0.27734375, "learning_rate": 0.0010522573239793009, "loss": 2.0291, "step": 22763 }, { "epoch": 0.6108844997853156, "grad_norm": 0.283203125, "learning_rate": 0.0010522388561940428, "loss": 2.1082, "step": 22764 }, { "epoch": 0.6109113353370546, "grad_norm": 0.283203125, "learning_rate": 0.0010522203874167055, "loss": 2.0088, "step": 22765 }, { "epoch": 0.6109381708887934, "grad_norm": 0.26953125, "learning_rate": 0.0010522019176473299, "loss": 1.9587, "step": 22766 }, { "epoch": 0.6109650064405324, "grad_norm": 0.2734375, "learning_rate": 0.0010521834468859561, "loss": 2.0304, "step": 22767 }, { "epoch": 0.6109918419922714, "grad_norm": 0.263671875, "learning_rate": 0.001052164975132625, "loss": 1.8791, "step": 22768 }, { "epoch": 0.6110186775440103, "grad_norm": 0.279296875, "learning_rate": 0.0010521465023873768, "loss": 1.9626, "step": 22769 }, { "epoch": 0.6110455130957493, "grad_norm": 0.279296875, "learning_rate": 0.0010521280286502524, "loss": 1.9855, "step": 22770 }, { "epoch": 0.6110723486474882, "grad_norm": 0.283203125, "learning_rate": 0.0010521095539212922, "loss": 1.8702, "step": 22771 }, { "epoch": 0.6110991841992272, "grad_norm": 0.267578125, "learning_rate": 0.0010520910782005363, "loss": 1.9336, "step": 22772 }, { "epoch": 0.611126019750966, "grad_norm": 0.279296875, "learning_rate": 0.0010520726014880259, "loss": 2.0053, "step": 22773 }, { "epoch": 0.611152855302705, "grad_norm": 0.2734375, "learning_rate": 0.001052054123783801, "loss": 1.9514, "step": 22774 }, { "epoch": 0.611179690854444, "grad_norm": 0.275390625, "learning_rate": 0.0010520356450879026, "loss": 2.0217, "step": 22775 }, { "epoch": 0.6112065264061829, "grad_norm": 0.287109375, "learning_rate": 0.0010520171654003708, "loss": 2.0082, "step": 22776 }, { "epoch": 0.6112333619579219, "grad_norm": 0.287109375, "learning_rate": 0.0010519986847212462, "loss": 1.9813, "step": 22777 }, { "epoch": 0.6112601975096608, "grad_norm": 0.27734375, "learning_rate": 0.0010519802030505697, "loss": 2.0078, "step": 22778 }, { "epoch": 0.6112870330613998, "grad_norm": 0.2734375, "learning_rate": 0.0010519617203883817, "loss": 1.9869, "step": 22779 }, { "epoch": 0.6113138686131386, "grad_norm": 0.27734375, "learning_rate": 0.0010519432367347224, "loss": 1.9491, "step": 22780 }, { "epoch": 0.6113407041648776, "grad_norm": 0.27734375, "learning_rate": 0.0010519247520896326, "loss": 1.9961, "step": 22781 }, { "epoch": 0.6113675397166166, "grad_norm": 0.275390625, "learning_rate": 0.001051906266453153, "loss": 1.9472, "step": 22782 }, { "epoch": 0.6113943752683555, "grad_norm": 0.27734375, "learning_rate": 0.001051887779825324, "loss": 1.9692, "step": 22783 }, { "epoch": 0.6114212108200945, "grad_norm": 0.283203125, "learning_rate": 0.0010518692922061863, "loss": 1.985, "step": 22784 }, { "epoch": 0.6114480463718334, "grad_norm": 0.275390625, "learning_rate": 0.00105185080359578, "loss": 2.0165, "step": 22785 }, { "epoch": 0.6114748819235724, "grad_norm": 0.2890625, "learning_rate": 0.0010518323139941462, "loss": 2.0507, "step": 22786 }, { "epoch": 0.6115017174753113, "grad_norm": 0.28515625, "learning_rate": 0.001051813823401325, "loss": 2.0566, "step": 22787 }, { "epoch": 0.6115285530270502, "grad_norm": 0.27734375, "learning_rate": 0.0010517953318173573, "loss": 1.961, "step": 22788 }, { "epoch": 0.6115553885787892, "grad_norm": 0.275390625, "learning_rate": 0.0010517768392422837, "loss": 2.0034, "step": 22789 }, { "epoch": 0.6115822241305281, "grad_norm": 0.283203125, "learning_rate": 0.0010517583456761445, "loss": 2.0479, "step": 22790 }, { "epoch": 0.6116090596822671, "grad_norm": 0.26953125, "learning_rate": 0.0010517398511189803, "loss": 1.8558, "step": 22791 }, { "epoch": 0.611635895234006, "grad_norm": 0.265625, "learning_rate": 0.001051721355570832, "loss": 1.9557, "step": 22792 }, { "epoch": 0.611662730785745, "grad_norm": 0.2734375, "learning_rate": 0.0010517028590317397, "loss": 1.9688, "step": 22793 }, { "epoch": 0.611689566337484, "grad_norm": 0.275390625, "learning_rate": 0.0010516843615017443, "loss": 1.95, "step": 22794 }, { "epoch": 0.6117164018892228, "grad_norm": 0.279296875, "learning_rate": 0.0010516658629808864, "loss": 1.9032, "step": 22795 }, { "epoch": 0.6117432374409618, "grad_norm": 0.283203125, "learning_rate": 0.0010516473634692065, "loss": 1.9067, "step": 22796 }, { "epoch": 0.6117700729927007, "grad_norm": 0.29296875, "learning_rate": 0.0010516288629667451, "loss": 2.0406, "step": 22797 }, { "epoch": 0.6117969085444397, "grad_norm": 0.27734375, "learning_rate": 0.0010516103614735429, "loss": 1.8998, "step": 22798 }, { "epoch": 0.6118237440961786, "grad_norm": 0.27734375, "learning_rate": 0.0010515918589896402, "loss": 2.015, "step": 22799 }, { "epoch": 0.6118505796479176, "grad_norm": 0.283203125, "learning_rate": 0.0010515733555150782, "loss": 2.0095, "step": 22800 }, { "epoch": 0.6118774151996565, "grad_norm": 0.275390625, "learning_rate": 0.001051554851049897, "loss": 1.8916, "step": 22801 }, { "epoch": 0.6119042507513954, "grad_norm": 0.2734375, "learning_rate": 0.0010515363455941371, "loss": 1.8245, "step": 22802 }, { "epoch": 0.6119310863031344, "grad_norm": 0.27734375, "learning_rate": 0.0010515178391478394, "loss": 1.8366, "step": 22803 }, { "epoch": 0.6119579218548733, "grad_norm": 0.283203125, "learning_rate": 0.0010514993317110443, "loss": 1.9041, "step": 22804 }, { "epoch": 0.6119847574066123, "grad_norm": 0.28515625, "learning_rate": 0.0010514808232837927, "loss": 2.003, "step": 22805 }, { "epoch": 0.6120115929583512, "grad_norm": 0.2890625, "learning_rate": 0.001051462313866125, "loss": 2.0045, "step": 22806 }, { "epoch": 0.6120384285100902, "grad_norm": 0.28125, "learning_rate": 0.0010514438034580815, "loss": 1.9449, "step": 22807 }, { "epoch": 0.6120652640618292, "grad_norm": 0.31640625, "learning_rate": 0.0010514252920597034, "loss": 2.1862, "step": 22808 }, { "epoch": 0.612092099613568, "grad_norm": 0.31640625, "learning_rate": 0.001051406779671031, "loss": 2.1659, "step": 22809 }, { "epoch": 0.612118935165307, "grad_norm": 0.298828125, "learning_rate": 0.0010513882662921048, "loss": 2.0396, "step": 22810 }, { "epoch": 0.6121457707170459, "grad_norm": 0.314453125, "learning_rate": 0.0010513697519229656, "loss": 2.1702, "step": 22811 }, { "epoch": 0.6121726062687849, "grad_norm": 0.29296875, "learning_rate": 0.0010513512365636541, "loss": 2.0236, "step": 22812 }, { "epoch": 0.6121994418205239, "grad_norm": 0.296875, "learning_rate": 0.0010513327202142108, "loss": 2.0432, "step": 22813 }, { "epoch": 0.6122262773722628, "grad_norm": 0.287109375, "learning_rate": 0.001051314202874676, "loss": 2.1103, "step": 22814 }, { "epoch": 0.6122531129240018, "grad_norm": 0.279296875, "learning_rate": 0.0010512956845450908, "loss": 2.0609, "step": 22815 }, { "epoch": 0.6122799484757406, "grad_norm": 0.29296875, "learning_rate": 0.0010512771652254956, "loss": 2.0477, "step": 22816 }, { "epoch": 0.6123067840274796, "grad_norm": 0.275390625, "learning_rate": 0.0010512586449159312, "loss": 2.017, "step": 22817 }, { "epoch": 0.6123336195792185, "grad_norm": 0.279296875, "learning_rate": 0.001051240123616438, "loss": 2.0379, "step": 22818 }, { "epoch": 0.6123604551309575, "grad_norm": 0.287109375, "learning_rate": 0.0010512216013270567, "loss": 2.0941, "step": 22819 }, { "epoch": 0.6123872906826965, "grad_norm": 0.279296875, "learning_rate": 0.0010512030780478279, "loss": 2.0106, "step": 22820 }, { "epoch": 0.6124141262344354, "grad_norm": 0.27734375, "learning_rate": 0.0010511845537787923, "loss": 2.0513, "step": 22821 }, { "epoch": 0.6124409617861744, "grad_norm": 0.283203125, "learning_rate": 0.0010511660285199907, "loss": 2.0415, "step": 22822 }, { "epoch": 0.6124677973379132, "grad_norm": 0.279296875, "learning_rate": 0.0010511475022714635, "loss": 2.0245, "step": 22823 }, { "epoch": 0.6124946328896522, "grad_norm": 0.27734375, "learning_rate": 0.0010511289750332513, "loss": 2.0071, "step": 22824 }, { "epoch": 0.6125214684413911, "grad_norm": 0.28125, "learning_rate": 0.0010511104468053948, "loss": 2.0556, "step": 22825 }, { "epoch": 0.6125483039931301, "grad_norm": 0.2734375, "learning_rate": 0.001051091917587935, "loss": 1.9217, "step": 22826 }, { "epoch": 0.6125751395448691, "grad_norm": 0.275390625, "learning_rate": 0.001051073387380912, "loss": 2.1, "step": 22827 }, { "epoch": 0.612601975096608, "grad_norm": 0.275390625, "learning_rate": 0.0010510548561843667, "loss": 2.0206, "step": 22828 }, { "epoch": 0.612628810648347, "grad_norm": 0.287109375, "learning_rate": 0.0010510363239983399, "loss": 2.0166, "step": 22829 }, { "epoch": 0.6126556462000858, "grad_norm": 0.26953125, "learning_rate": 0.0010510177908228718, "loss": 1.9889, "step": 22830 }, { "epoch": 0.6126824817518248, "grad_norm": 0.28515625, "learning_rate": 0.0010509992566580036, "loss": 2.0873, "step": 22831 }, { "epoch": 0.6127093173035638, "grad_norm": 0.279296875, "learning_rate": 0.001050980721503776, "loss": 2.0167, "step": 22832 }, { "epoch": 0.6127361528553027, "grad_norm": 0.2734375, "learning_rate": 0.0010509621853602288, "loss": 2.0401, "step": 22833 }, { "epoch": 0.6127629884070417, "grad_norm": 0.275390625, "learning_rate": 0.0010509436482274035, "loss": 2.0068, "step": 22834 }, { "epoch": 0.6127898239587806, "grad_norm": 0.271484375, "learning_rate": 0.0010509251101053405, "loss": 1.9346, "step": 22835 }, { "epoch": 0.6128166595105196, "grad_norm": 0.27734375, "learning_rate": 0.0010509065709940805, "loss": 2.0326, "step": 22836 }, { "epoch": 0.6128434950622584, "grad_norm": 0.28125, "learning_rate": 0.0010508880308936642, "loss": 2.1123, "step": 22837 }, { "epoch": 0.6128703306139974, "grad_norm": 0.275390625, "learning_rate": 0.001050869489804132, "loss": 1.969, "step": 22838 }, { "epoch": 0.6128971661657364, "grad_norm": 0.271484375, "learning_rate": 0.001050850947725525, "loss": 1.9364, "step": 22839 }, { "epoch": 0.6129240017174753, "grad_norm": 0.275390625, "learning_rate": 0.0010508324046578834, "loss": 1.9093, "step": 22840 }, { "epoch": 0.6129508372692143, "grad_norm": 0.275390625, "learning_rate": 0.0010508138606012485, "loss": 1.9449, "step": 22841 }, { "epoch": 0.6129776728209532, "grad_norm": 0.279296875, "learning_rate": 0.0010507953155556604, "loss": 1.9498, "step": 22842 }, { "epoch": 0.6130045083726922, "grad_norm": 0.2734375, "learning_rate": 0.0010507767695211601, "loss": 1.9644, "step": 22843 }, { "epoch": 0.613031343924431, "grad_norm": 0.279296875, "learning_rate": 0.0010507582224977881, "loss": 1.9737, "step": 22844 }, { "epoch": 0.61305817947617, "grad_norm": 0.275390625, "learning_rate": 0.0010507396744855854, "loss": 1.8499, "step": 22845 }, { "epoch": 0.613085015027909, "grad_norm": 0.28125, "learning_rate": 0.0010507211254845922, "loss": 2.0112, "step": 22846 }, { "epoch": 0.6131118505796479, "grad_norm": 0.27734375, "learning_rate": 0.0010507025754948494, "loss": 1.9773, "step": 22847 }, { "epoch": 0.6131386861313869, "grad_norm": 0.267578125, "learning_rate": 0.0010506840245163979, "loss": 1.9695, "step": 22848 }, { "epoch": 0.6131655216831258, "grad_norm": 0.28125, "learning_rate": 0.0010506654725492782, "loss": 2.0686, "step": 22849 }, { "epoch": 0.6131923572348648, "grad_norm": 0.2734375, "learning_rate": 0.001050646919593531, "loss": 1.8719, "step": 22850 }, { "epoch": 0.6132191927866036, "grad_norm": 0.279296875, "learning_rate": 0.0010506283656491974, "loss": 2.0401, "step": 22851 }, { "epoch": 0.6132460283383426, "grad_norm": 0.267578125, "learning_rate": 0.0010506098107163173, "loss": 1.9207, "step": 22852 }, { "epoch": 0.6132728638900816, "grad_norm": 0.271484375, "learning_rate": 0.001050591254794932, "loss": 1.9121, "step": 22853 }, { "epoch": 0.6132996994418205, "grad_norm": 0.267578125, "learning_rate": 0.001050572697885082, "loss": 1.8276, "step": 22854 }, { "epoch": 0.6133265349935595, "grad_norm": 0.28125, "learning_rate": 0.0010505541399868082, "loss": 2.0002, "step": 22855 }, { "epoch": 0.6133533705452984, "grad_norm": 0.2578125, "learning_rate": 0.001050535581100151, "loss": 1.799, "step": 22856 }, { "epoch": 0.6133802060970374, "grad_norm": 0.27734375, "learning_rate": 0.0010505170212251516, "loss": 1.8706, "step": 22857 }, { "epoch": 0.6134070416487764, "grad_norm": 0.287109375, "learning_rate": 0.00105049846036185, "loss": 1.982, "step": 22858 }, { "epoch": 0.6134338772005152, "grad_norm": 0.28515625, "learning_rate": 0.0010504798985102874, "loss": 1.9504, "step": 22859 }, { "epoch": 0.6134607127522542, "grad_norm": 0.27734375, "learning_rate": 0.0010504613356705046, "loss": 1.8778, "step": 22860 }, { "epoch": 0.6134875483039931, "grad_norm": 0.283203125, "learning_rate": 0.001050442771842542, "loss": 1.9474, "step": 22861 }, { "epoch": 0.6135143838557321, "grad_norm": 0.279296875, "learning_rate": 0.0010504242070264406, "loss": 1.9856, "step": 22862 }, { "epoch": 0.613541219407471, "grad_norm": 0.2890625, "learning_rate": 0.001050405641222241, "loss": 2.031, "step": 22863 }, { "epoch": 0.61356805495921, "grad_norm": 0.271484375, "learning_rate": 0.001050387074429984, "loss": 1.9073, "step": 22864 }, { "epoch": 0.613594890510949, "grad_norm": 0.287109375, "learning_rate": 0.0010503685066497101, "loss": 1.8898, "step": 22865 }, { "epoch": 0.6136217260626878, "grad_norm": 0.28125, "learning_rate": 0.0010503499378814601, "loss": 1.9761, "step": 22866 }, { "epoch": 0.6136485616144268, "grad_norm": 0.28125, "learning_rate": 0.001050331368125275, "loss": 1.9192, "step": 22867 }, { "epoch": 0.6136753971661657, "grad_norm": 0.27734375, "learning_rate": 0.0010503127973811954, "loss": 1.8078, "step": 22868 }, { "epoch": 0.6137022327179047, "grad_norm": 0.283203125, "learning_rate": 0.001050294225649262, "loss": 1.822, "step": 22869 }, { "epoch": 0.6137290682696436, "grad_norm": 0.28515625, "learning_rate": 0.0010502756529295154, "loss": 1.8584, "step": 22870 }, { "epoch": 0.6137559038213826, "grad_norm": 0.318359375, "learning_rate": 0.0010502570792219968, "loss": 2.1286, "step": 22871 }, { "epoch": 0.6137827393731216, "grad_norm": 0.30859375, "learning_rate": 0.0010502385045267463, "loss": 2.0795, "step": 22872 }, { "epoch": 0.6138095749248604, "grad_norm": 0.30078125, "learning_rate": 0.0010502199288438053, "loss": 1.9936, "step": 22873 }, { "epoch": 0.6138364104765994, "grad_norm": 0.306640625, "learning_rate": 0.0010502013521732137, "loss": 2.1189, "step": 22874 }, { "epoch": 0.6138632460283383, "grad_norm": 0.2890625, "learning_rate": 0.0010501827745150133, "loss": 2.0117, "step": 22875 }, { "epoch": 0.6138900815800773, "grad_norm": 0.302734375, "learning_rate": 0.0010501641958692441, "loss": 2.0233, "step": 22876 }, { "epoch": 0.6139169171318162, "grad_norm": 0.28125, "learning_rate": 0.0010501456162359472, "loss": 2.0054, "step": 22877 }, { "epoch": 0.6139437526835552, "grad_norm": 0.279296875, "learning_rate": 0.0010501270356151635, "loss": 2.0391, "step": 22878 }, { "epoch": 0.6139705882352942, "grad_norm": 0.287109375, "learning_rate": 0.001050108454006933, "loss": 2.1285, "step": 22879 }, { "epoch": 0.613997423787033, "grad_norm": 0.29296875, "learning_rate": 0.0010500898714112974, "loss": 2.1472, "step": 22880 }, { "epoch": 0.614024259338772, "grad_norm": 0.27734375, "learning_rate": 0.0010500712878282966, "loss": 2.0005, "step": 22881 }, { "epoch": 0.6140510948905109, "grad_norm": 0.28125, "learning_rate": 0.0010500527032579721, "loss": 1.9859, "step": 22882 }, { "epoch": 0.6140779304422499, "grad_norm": 0.287109375, "learning_rate": 0.0010500341177003645, "loss": 2.1048, "step": 22883 }, { "epoch": 0.6141047659939889, "grad_norm": 0.279296875, "learning_rate": 0.0010500155311555142, "loss": 2.0078, "step": 22884 }, { "epoch": 0.6141316015457278, "grad_norm": 0.29296875, "learning_rate": 0.0010499969436234622, "loss": 2.0548, "step": 22885 }, { "epoch": 0.6141584370974668, "grad_norm": 0.283203125, "learning_rate": 0.0010499783551042494, "loss": 1.9683, "step": 22886 }, { "epoch": 0.6141852726492056, "grad_norm": 0.28125, "learning_rate": 0.0010499597655979165, "loss": 2.0475, "step": 22887 }, { "epoch": 0.6142121082009446, "grad_norm": 0.2734375, "learning_rate": 0.0010499411751045044, "loss": 2.0051, "step": 22888 }, { "epoch": 0.6142389437526835, "grad_norm": 0.279296875, "learning_rate": 0.0010499225836240536, "loss": 2.0525, "step": 22889 }, { "epoch": 0.6142657793044225, "grad_norm": 0.291015625, "learning_rate": 0.001049903991156605, "loss": 2.0468, "step": 22890 }, { "epoch": 0.6142926148561615, "grad_norm": 0.27734375, "learning_rate": 0.0010498853977021993, "loss": 2.0098, "step": 22891 }, { "epoch": 0.6143194504079004, "grad_norm": 0.271484375, "learning_rate": 0.0010498668032608775, "loss": 2.0012, "step": 22892 }, { "epoch": 0.6143462859596394, "grad_norm": 0.283203125, "learning_rate": 0.0010498482078326802, "loss": 1.9876, "step": 22893 }, { "epoch": 0.6143731215113782, "grad_norm": 0.275390625, "learning_rate": 0.0010498296114176485, "loss": 1.894, "step": 22894 }, { "epoch": 0.6143999570631172, "grad_norm": 0.27734375, "learning_rate": 0.0010498110140158227, "loss": 1.9166, "step": 22895 }, { "epoch": 0.6144267926148561, "grad_norm": 0.275390625, "learning_rate": 0.0010497924156272441, "loss": 2.0446, "step": 22896 }, { "epoch": 0.6144536281665951, "grad_norm": 0.283203125, "learning_rate": 0.0010497738162519531, "loss": 1.9755, "step": 22897 }, { "epoch": 0.6144804637183341, "grad_norm": 0.27734375, "learning_rate": 0.0010497552158899909, "loss": 1.9578, "step": 22898 }, { "epoch": 0.614507299270073, "grad_norm": 0.28125, "learning_rate": 0.001049736614541398, "loss": 1.996, "step": 22899 }, { "epoch": 0.614534134821812, "grad_norm": 0.275390625, "learning_rate": 0.001049718012206215, "loss": 1.9841, "step": 22900 }, { "epoch": 0.6145609703735508, "grad_norm": 0.29296875, "learning_rate": 0.0010496994088844832, "loss": 2.123, "step": 22901 }, { "epoch": 0.6145878059252898, "grad_norm": 0.28125, "learning_rate": 0.0010496808045762432, "loss": 2.0021, "step": 22902 }, { "epoch": 0.6146146414770288, "grad_norm": 0.275390625, "learning_rate": 0.001049662199281536, "loss": 1.905, "step": 22903 }, { "epoch": 0.6146414770287677, "grad_norm": 0.279296875, "learning_rate": 0.0010496435930004019, "loss": 2.034, "step": 22904 }, { "epoch": 0.6146683125805067, "grad_norm": 0.279296875, "learning_rate": 0.0010496249857328821, "loss": 1.9977, "step": 22905 }, { "epoch": 0.6146951481322456, "grad_norm": 0.2734375, "learning_rate": 0.0010496063774790175, "loss": 2.0989, "step": 22906 }, { "epoch": 0.6147219836839846, "grad_norm": 0.271484375, "learning_rate": 0.0010495877682388489, "loss": 1.929, "step": 22907 }, { "epoch": 0.6147488192357234, "grad_norm": 0.28125, "learning_rate": 0.0010495691580124167, "loss": 1.9205, "step": 22908 }, { "epoch": 0.6147756547874624, "grad_norm": 0.287109375, "learning_rate": 0.0010495505467997622, "loss": 2.0328, "step": 22909 }, { "epoch": 0.6148024903392014, "grad_norm": 0.275390625, "learning_rate": 0.001049531934600926, "loss": 1.9515, "step": 22910 }, { "epoch": 0.6148293258909403, "grad_norm": 0.28125, "learning_rate": 0.0010495133214159492, "loss": 2.047, "step": 22911 }, { "epoch": 0.6148561614426793, "grad_norm": 0.27734375, "learning_rate": 0.001049494707244872, "loss": 2.0245, "step": 22912 }, { "epoch": 0.6148829969944182, "grad_norm": 0.27734375, "learning_rate": 0.001049476092087736, "loss": 1.9752, "step": 22913 }, { "epoch": 0.6149098325461572, "grad_norm": 0.275390625, "learning_rate": 0.0010494574759445817, "loss": 1.896, "step": 22914 }, { "epoch": 0.614936668097896, "grad_norm": 0.27734375, "learning_rate": 0.0010494388588154497, "loss": 1.9499, "step": 22915 }, { "epoch": 0.614963503649635, "grad_norm": 0.275390625, "learning_rate": 0.001049420240700381, "loss": 1.9522, "step": 22916 }, { "epoch": 0.614990339201374, "grad_norm": 0.271484375, "learning_rate": 0.0010494016215994168, "loss": 1.9028, "step": 22917 }, { "epoch": 0.6150171747531129, "grad_norm": 0.271484375, "learning_rate": 0.0010493830015125974, "loss": 1.9699, "step": 22918 }, { "epoch": 0.6150440103048519, "grad_norm": 0.267578125, "learning_rate": 0.0010493643804399641, "loss": 1.9071, "step": 22919 }, { "epoch": 0.6150708458565908, "grad_norm": 0.28125, "learning_rate": 0.0010493457583815576, "loss": 2.0316, "step": 22920 }, { "epoch": 0.6150976814083298, "grad_norm": 0.28125, "learning_rate": 0.0010493271353374186, "loss": 1.8567, "step": 22921 }, { "epoch": 0.6151245169600686, "grad_norm": 0.279296875, "learning_rate": 0.0010493085113075882, "loss": 1.9279, "step": 22922 }, { "epoch": 0.6151513525118076, "grad_norm": 0.28125, "learning_rate": 0.0010492898862921069, "loss": 1.9581, "step": 22923 }, { "epoch": 0.6151781880635466, "grad_norm": 0.263671875, "learning_rate": 0.001049271260291016, "loss": 1.7941, "step": 22924 }, { "epoch": 0.6152050236152855, "grad_norm": 0.27734375, "learning_rate": 0.0010492526333043559, "loss": 1.8548, "step": 22925 }, { "epoch": 0.6152318591670245, "grad_norm": 0.28515625, "learning_rate": 0.0010492340053321679, "loss": 1.9401, "step": 22926 }, { "epoch": 0.6152586947187634, "grad_norm": 0.275390625, "learning_rate": 0.0010492153763744924, "loss": 1.8872, "step": 22927 }, { "epoch": 0.6152855302705024, "grad_norm": 0.271484375, "learning_rate": 0.0010491967464313708, "loss": 1.9499, "step": 22928 }, { "epoch": 0.6153123658222414, "grad_norm": 0.275390625, "learning_rate": 0.0010491781155028436, "loss": 1.9273, "step": 22929 }, { "epoch": 0.6153392013739802, "grad_norm": 0.279296875, "learning_rate": 0.0010491594835889517, "loss": 1.8378, "step": 22930 }, { "epoch": 0.6153660369257192, "grad_norm": 0.27734375, "learning_rate": 0.0010491408506897362, "loss": 1.8754, "step": 22931 }, { "epoch": 0.6153928724774581, "grad_norm": 0.2734375, "learning_rate": 0.0010491222168052376, "loss": 1.9179, "step": 22932 }, { "epoch": 0.6154197080291971, "grad_norm": 0.314453125, "learning_rate": 0.001049103581935497, "loss": 2.1337, "step": 22933 }, { "epoch": 0.615446543580936, "grad_norm": 0.3203125, "learning_rate": 0.0010490849460805553, "loss": 2.0997, "step": 22934 }, { "epoch": 0.615473379132675, "grad_norm": 0.310546875, "learning_rate": 0.0010490663092404535, "loss": 2.0225, "step": 22935 }, { "epoch": 0.615500214684414, "grad_norm": 0.298828125, "learning_rate": 0.0010490476714152322, "loss": 2.0753, "step": 22936 }, { "epoch": 0.6155270502361528, "grad_norm": 0.306640625, "learning_rate": 0.0010490290326049324, "loss": 2.0747, "step": 22937 }, { "epoch": 0.6155538857878918, "grad_norm": 0.2890625, "learning_rate": 0.0010490103928095951, "loss": 2.0174, "step": 22938 }, { "epoch": 0.6155807213396307, "grad_norm": 0.2890625, "learning_rate": 0.0010489917520292611, "loss": 2.1436, "step": 22939 }, { "epoch": 0.6156075568913697, "grad_norm": 0.302734375, "learning_rate": 0.0010489731102639713, "loss": 2.0927, "step": 22940 }, { "epoch": 0.6156343924431086, "grad_norm": 0.287109375, "learning_rate": 0.0010489544675137666, "loss": 2.1249, "step": 22941 }, { "epoch": 0.6156612279948476, "grad_norm": 0.28125, "learning_rate": 0.0010489358237786876, "loss": 2.0548, "step": 22942 }, { "epoch": 0.6156880635465866, "grad_norm": 0.275390625, "learning_rate": 0.0010489171790587757, "loss": 2.1107, "step": 22943 }, { "epoch": 0.6157148990983254, "grad_norm": 0.26171875, "learning_rate": 0.0010488985333540718, "loss": 1.8976, "step": 22944 }, { "epoch": 0.6157417346500644, "grad_norm": 0.279296875, "learning_rate": 0.0010488798866646163, "loss": 2.0663, "step": 22945 }, { "epoch": 0.6157685702018033, "grad_norm": 0.291015625, "learning_rate": 0.0010488612389904504, "loss": 2.0261, "step": 22946 }, { "epoch": 0.6157954057535423, "grad_norm": 0.27734375, "learning_rate": 0.001048842590331615, "loss": 2.038, "step": 22947 }, { "epoch": 0.6158222413052812, "grad_norm": 0.294921875, "learning_rate": 0.0010488239406881512, "loss": 2.0029, "step": 22948 }, { "epoch": 0.6158490768570202, "grad_norm": 0.27734375, "learning_rate": 0.0010488052900600995, "loss": 2.1047, "step": 22949 }, { "epoch": 0.6158759124087592, "grad_norm": 0.28125, "learning_rate": 0.0010487866384475011, "loss": 2.0752, "step": 22950 }, { "epoch": 0.615902747960498, "grad_norm": 0.267578125, "learning_rate": 0.001048767985850397, "loss": 1.9257, "step": 22951 }, { "epoch": 0.615929583512237, "grad_norm": 0.26953125, "learning_rate": 0.0010487493322688278, "loss": 2.0498, "step": 22952 }, { "epoch": 0.6159564190639759, "grad_norm": 0.275390625, "learning_rate": 0.0010487306777028347, "loss": 2.0033, "step": 22953 }, { "epoch": 0.6159832546157149, "grad_norm": 0.265625, "learning_rate": 0.0010487120221524585, "loss": 2.0342, "step": 22954 }, { "epoch": 0.6160100901674539, "grad_norm": 0.2734375, "learning_rate": 0.0010486933656177401, "loss": 2.0261, "step": 22955 }, { "epoch": 0.6160369257191928, "grad_norm": 0.27734375, "learning_rate": 0.0010486747080987204, "loss": 2.0492, "step": 22956 }, { "epoch": 0.6160637612709318, "grad_norm": 0.271484375, "learning_rate": 0.0010486560495954404, "loss": 2.0316, "step": 22957 }, { "epoch": 0.6160905968226706, "grad_norm": 0.271484375, "learning_rate": 0.001048637390107941, "loss": 1.9612, "step": 22958 }, { "epoch": 0.6161174323744096, "grad_norm": 0.28125, "learning_rate": 0.0010486187296362634, "loss": 2.0512, "step": 22959 }, { "epoch": 0.6161442679261485, "grad_norm": 0.27734375, "learning_rate": 0.001048600068180448, "loss": 2.0035, "step": 22960 }, { "epoch": 0.6161711034778875, "grad_norm": 0.2734375, "learning_rate": 0.0010485814057405363, "loss": 1.9895, "step": 22961 }, { "epoch": 0.6161979390296265, "grad_norm": 0.28125, "learning_rate": 0.0010485627423165687, "loss": 2.0756, "step": 22962 }, { "epoch": 0.6162247745813654, "grad_norm": 0.271484375, "learning_rate": 0.0010485440779085865, "loss": 1.9081, "step": 22963 }, { "epoch": 0.6162516101331044, "grad_norm": 0.28125, "learning_rate": 0.0010485254125166307, "loss": 2.0077, "step": 22964 }, { "epoch": 0.6162784456848432, "grad_norm": 0.283203125, "learning_rate": 0.001048506746140742, "loss": 2.0352, "step": 22965 }, { "epoch": 0.6163052812365822, "grad_norm": 0.2890625, "learning_rate": 0.0010484880787809613, "loss": 2.0388, "step": 22966 }, { "epoch": 0.6163321167883211, "grad_norm": 0.27734375, "learning_rate": 0.00104846941043733, "loss": 1.9925, "step": 22967 }, { "epoch": 0.6163589523400601, "grad_norm": 0.271484375, "learning_rate": 0.0010484507411098886, "loss": 1.8512, "step": 22968 }, { "epoch": 0.6163857878917991, "grad_norm": 0.279296875, "learning_rate": 0.001048432070798678, "loss": 1.9457, "step": 22969 }, { "epoch": 0.616412623443538, "grad_norm": 0.275390625, "learning_rate": 0.0010484133995037395, "loss": 1.9501, "step": 22970 }, { "epoch": 0.616439458995277, "grad_norm": 0.2734375, "learning_rate": 0.0010483947272251141, "loss": 1.9967, "step": 22971 }, { "epoch": 0.6164662945470158, "grad_norm": 0.279296875, "learning_rate": 0.0010483760539628426, "loss": 1.943, "step": 22972 }, { "epoch": 0.6164931300987548, "grad_norm": 0.275390625, "learning_rate": 0.0010483573797169657, "loss": 1.983, "step": 22973 }, { "epoch": 0.6165199656504938, "grad_norm": 0.27734375, "learning_rate": 0.0010483387044875247, "loss": 1.9422, "step": 22974 }, { "epoch": 0.6165468012022327, "grad_norm": 0.27734375, "learning_rate": 0.0010483200282745606, "loss": 1.9086, "step": 22975 }, { "epoch": 0.6165736367539717, "grad_norm": 0.271484375, "learning_rate": 0.0010483013510781142, "loss": 1.9092, "step": 22976 }, { "epoch": 0.6166004723057106, "grad_norm": 0.283203125, "learning_rate": 0.0010482826728982265, "loss": 1.9154, "step": 22977 }, { "epoch": 0.6166273078574496, "grad_norm": 0.275390625, "learning_rate": 0.0010482639937349385, "loss": 1.8689, "step": 22978 }, { "epoch": 0.6166541434091884, "grad_norm": 0.279296875, "learning_rate": 0.0010482453135882914, "loss": 1.9019, "step": 22979 }, { "epoch": 0.6166809789609274, "grad_norm": 0.31640625, "learning_rate": 0.0010482266324583257, "loss": 2.0178, "step": 22980 }, { "epoch": 0.6167078145126664, "grad_norm": 0.283203125, "learning_rate": 0.0010482079503450826, "loss": 1.9398, "step": 22981 }, { "epoch": 0.6167346500644053, "grad_norm": 0.279296875, "learning_rate": 0.0010481892672486032, "loss": 1.8333, "step": 22982 }, { "epoch": 0.6167614856161443, "grad_norm": 0.275390625, "learning_rate": 0.0010481705831689285, "loss": 1.9106, "step": 22983 }, { "epoch": 0.6167883211678832, "grad_norm": 0.271484375, "learning_rate": 0.0010481518981060992, "loss": 1.8927, "step": 22984 }, { "epoch": 0.6168151567196222, "grad_norm": 0.2890625, "learning_rate": 0.0010481332120601567, "loss": 2.0366, "step": 22985 }, { "epoch": 0.616841992271361, "grad_norm": 0.291015625, "learning_rate": 0.0010481145250311418, "loss": 1.9607, "step": 22986 }, { "epoch": 0.6168688278231, "grad_norm": 0.275390625, "learning_rate": 0.0010480958370190953, "loss": 2.024, "step": 22987 }, { "epoch": 0.616895663374839, "grad_norm": 0.287109375, "learning_rate": 0.0010480771480240584, "loss": 1.9489, "step": 22988 }, { "epoch": 0.6169224989265779, "grad_norm": 0.279296875, "learning_rate": 0.0010480584580460721, "loss": 1.9408, "step": 22989 }, { "epoch": 0.6169493344783169, "grad_norm": 0.275390625, "learning_rate": 0.0010480397670851775, "loss": 1.9739, "step": 22990 }, { "epoch": 0.6169761700300558, "grad_norm": 0.275390625, "learning_rate": 0.0010480210751414151, "loss": 1.961, "step": 22991 }, { "epoch": 0.6170030055817948, "grad_norm": 0.2734375, "learning_rate": 0.0010480023822148265, "loss": 1.8931, "step": 22992 }, { "epoch": 0.6170298411335337, "grad_norm": 0.28515625, "learning_rate": 0.0010479836883054525, "loss": 2.1033, "step": 22993 }, { "epoch": 0.6170566766852726, "grad_norm": 0.298828125, "learning_rate": 0.001047964993413334, "loss": 2.0328, "step": 22994 }, { "epoch": 0.6170835122370116, "grad_norm": 0.310546875, "learning_rate": 0.0010479462975385122, "loss": 2.0417, "step": 22995 }, { "epoch": 0.6171103477887505, "grad_norm": 0.310546875, "learning_rate": 0.0010479276006810279, "loss": 2.1207, "step": 22996 }, { "epoch": 0.6171371833404895, "grad_norm": 0.302734375, "learning_rate": 0.0010479089028409224, "loss": 2.1446, "step": 22997 }, { "epoch": 0.6171640188922284, "grad_norm": 0.30078125, "learning_rate": 0.0010478902040182364, "loss": 2.1211, "step": 22998 }, { "epoch": 0.6171908544439674, "grad_norm": 0.291015625, "learning_rate": 0.001047871504213011, "loss": 2.0329, "step": 22999 }, { "epoch": 0.6172176899957064, "grad_norm": 0.287109375, "learning_rate": 0.0010478528034252874, "loss": 2.0181, "step": 23000 }, { "epoch": 0.6172445255474452, "grad_norm": 0.306640625, "learning_rate": 0.0010478341016551065, "loss": 2.1471, "step": 23001 }, { "epoch": 0.6172713610991842, "grad_norm": 0.287109375, "learning_rate": 0.0010478153989025094, "loss": 2.1118, "step": 23002 }, { "epoch": 0.6172981966509231, "grad_norm": 0.26953125, "learning_rate": 0.001047796695167537, "loss": 2.0396, "step": 23003 }, { "epoch": 0.6173250322026621, "grad_norm": 0.2890625, "learning_rate": 0.0010477779904502305, "loss": 2.1382, "step": 23004 }, { "epoch": 0.617351867754401, "grad_norm": 0.279296875, "learning_rate": 0.0010477592847506305, "loss": 2.0004, "step": 23005 }, { "epoch": 0.61737870330614, "grad_norm": 0.28125, "learning_rate": 0.0010477405780687787, "loss": 2.1283, "step": 23006 }, { "epoch": 0.617405538857879, "grad_norm": 0.275390625, "learning_rate": 0.0010477218704047155, "loss": 2.1088, "step": 23007 }, { "epoch": 0.6174323744096178, "grad_norm": 0.2734375, "learning_rate": 0.0010477031617584826, "loss": 2.1195, "step": 23008 }, { "epoch": 0.6174592099613568, "grad_norm": 0.291015625, "learning_rate": 0.0010476844521301206, "loss": 2.1058, "step": 23009 }, { "epoch": 0.6174860455130957, "grad_norm": 0.28125, "learning_rate": 0.0010476657415196705, "loss": 2.0375, "step": 23010 }, { "epoch": 0.6175128810648347, "grad_norm": 0.27734375, "learning_rate": 0.0010476470299271734, "loss": 1.9724, "step": 23011 }, { "epoch": 0.6175397166165736, "grad_norm": 0.275390625, "learning_rate": 0.0010476283173526703, "loss": 1.9699, "step": 23012 }, { "epoch": 0.6175665521683126, "grad_norm": 0.271484375, "learning_rate": 0.0010476096037962028, "loss": 2.005, "step": 23013 }, { "epoch": 0.6175933877200516, "grad_norm": 0.283203125, "learning_rate": 0.001047590889257811, "loss": 2.064, "step": 23014 }, { "epoch": 0.6176202232717904, "grad_norm": 0.28515625, "learning_rate": 0.0010475721737375368, "loss": 2.0915, "step": 23015 }, { "epoch": 0.6176470588235294, "grad_norm": 0.26953125, "learning_rate": 0.001047553457235421, "loss": 2.0559, "step": 23016 }, { "epoch": 0.6176738943752683, "grad_norm": 0.2734375, "learning_rate": 0.0010475347397515043, "loss": 2.0152, "step": 23017 }, { "epoch": 0.6177007299270073, "grad_norm": 0.26953125, "learning_rate": 0.0010475160212858284, "loss": 2.0156, "step": 23018 }, { "epoch": 0.6177275654787462, "grad_norm": 0.27734375, "learning_rate": 0.0010474973018384337, "loss": 2.0756, "step": 23019 }, { "epoch": 0.6177544010304852, "grad_norm": 0.28515625, "learning_rate": 0.0010474785814093618, "loss": 2.0956, "step": 23020 }, { "epoch": 0.6177812365822242, "grad_norm": 0.275390625, "learning_rate": 0.0010474598599986534, "loss": 1.9838, "step": 23021 }, { "epoch": 0.617808072133963, "grad_norm": 0.2734375, "learning_rate": 0.0010474411376063498, "loss": 1.9801, "step": 23022 }, { "epoch": 0.617834907685702, "grad_norm": 0.271484375, "learning_rate": 0.001047422414232492, "loss": 2.0338, "step": 23023 }, { "epoch": 0.6178617432374409, "grad_norm": 0.279296875, "learning_rate": 0.0010474036898771208, "loss": 2.0337, "step": 23024 }, { "epoch": 0.6178885787891799, "grad_norm": 0.279296875, "learning_rate": 0.0010473849645402779, "loss": 2.0536, "step": 23025 }, { "epoch": 0.6179154143409189, "grad_norm": 0.28125, "learning_rate": 0.0010473662382220036, "loss": 2.0828, "step": 23026 }, { "epoch": 0.6179422498926578, "grad_norm": 0.271484375, "learning_rate": 0.0010473475109223397, "loss": 1.9902, "step": 23027 }, { "epoch": 0.6179690854443968, "grad_norm": 0.283203125, "learning_rate": 0.001047328782641327, "loss": 2.1008, "step": 23028 }, { "epoch": 0.6179959209961357, "grad_norm": 0.28125, "learning_rate": 0.0010473100533790064, "loss": 2.0502, "step": 23029 }, { "epoch": 0.6180227565478746, "grad_norm": 0.2734375, "learning_rate": 0.0010472913231354191, "loss": 2.0353, "step": 23030 }, { "epoch": 0.6180495920996135, "grad_norm": 0.28125, "learning_rate": 0.0010472725919106066, "loss": 2.0176, "step": 23031 }, { "epoch": 0.6180764276513525, "grad_norm": 0.28125, "learning_rate": 0.0010472538597046093, "loss": 1.9551, "step": 23032 }, { "epoch": 0.6181032632030915, "grad_norm": 0.28515625, "learning_rate": 0.0010472351265174685, "loss": 2.0513, "step": 23033 }, { "epoch": 0.6181300987548304, "grad_norm": 0.2734375, "learning_rate": 0.0010472163923492257, "loss": 1.9299, "step": 23034 }, { "epoch": 0.6181569343065694, "grad_norm": 0.294921875, "learning_rate": 0.0010471976571999216, "loss": 2.1026, "step": 23035 }, { "epoch": 0.6181837698583083, "grad_norm": 0.271484375, "learning_rate": 0.0010471789210695973, "loss": 1.8866, "step": 23036 }, { "epoch": 0.6182106054100472, "grad_norm": 0.2734375, "learning_rate": 0.0010471601839582942, "loss": 2.0068, "step": 23037 }, { "epoch": 0.6182374409617861, "grad_norm": 0.2734375, "learning_rate": 0.0010471414458660532, "loss": 1.9082, "step": 23038 }, { "epoch": 0.6182642765135251, "grad_norm": 0.2734375, "learning_rate": 0.0010471227067929152, "loss": 1.8795, "step": 23039 }, { "epoch": 0.6182911120652641, "grad_norm": 0.283203125, "learning_rate": 0.0010471039667389217, "loss": 1.988, "step": 23040 }, { "epoch": 0.618317947617003, "grad_norm": 0.28125, "learning_rate": 0.0010470852257041137, "loss": 1.9915, "step": 23041 }, { "epoch": 0.618344783168742, "grad_norm": 0.29296875, "learning_rate": 0.001047066483688532, "loss": 1.9681, "step": 23042 }, { "epoch": 0.6183716187204809, "grad_norm": 0.28125, "learning_rate": 0.001047047740692218, "loss": 2.0865, "step": 23043 }, { "epoch": 0.6183984542722198, "grad_norm": 0.265625, "learning_rate": 0.0010470289967152132, "loss": 1.812, "step": 23044 }, { "epoch": 0.6184252898239588, "grad_norm": 0.279296875, "learning_rate": 0.0010470102517575579, "loss": 2.01, "step": 23045 }, { "epoch": 0.6184521253756977, "grad_norm": 0.26953125, "learning_rate": 0.0010469915058192937, "loss": 1.9376, "step": 23046 }, { "epoch": 0.6184789609274367, "grad_norm": 0.2734375, "learning_rate": 0.0010469727589004618, "loss": 1.9485, "step": 23047 }, { "epoch": 0.6185057964791756, "grad_norm": 0.2734375, "learning_rate": 0.001046954011001103, "loss": 1.8663, "step": 23048 }, { "epoch": 0.6185326320309146, "grad_norm": 0.302734375, "learning_rate": 0.0010469352621212584, "loss": 1.9716, "step": 23049 }, { "epoch": 0.6185594675826535, "grad_norm": 0.27734375, "learning_rate": 0.0010469165122609697, "loss": 1.9917, "step": 23050 }, { "epoch": 0.6185863031343924, "grad_norm": 0.275390625, "learning_rate": 0.0010468977614202775, "loss": 1.9307, "step": 23051 }, { "epoch": 0.6186131386861314, "grad_norm": 0.28515625, "learning_rate": 0.0010468790095992232, "loss": 1.9628, "step": 23052 }, { "epoch": 0.6186399742378703, "grad_norm": 0.291015625, "learning_rate": 0.0010468602567978477, "loss": 1.8921, "step": 23053 }, { "epoch": 0.6186668097896093, "grad_norm": 0.302734375, "learning_rate": 0.001046841503016192, "loss": 2.0936, "step": 23054 }, { "epoch": 0.6186936453413482, "grad_norm": 0.3125, "learning_rate": 0.001046822748254298, "loss": 1.9992, "step": 23055 }, { "epoch": 0.6187204808930872, "grad_norm": 0.322265625, "learning_rate": 0.0010468039925122061, "loss": 2.071, "step": 23056 }, { "epoch": 0.6187473164448261, "grad_norm": 0.30078125, "learning_rate": 0.0010467852357899578, "loss": 2.2054, "step": 23057 }, { "epoch": 0.618774151996565, "grad_norm": 0.296875, "learning_rate": 0.001046766478087594, "loss": 2.1078, "step": 23058 }, { "epoch": 0.618800987548304, "grad_norm": 0.287109375, "learning_rate": 0.001046747719405156, "loss": 2.0978, "step": 23059 }, { "epoch": 0.6188278231000429, "grad_norm": 0.283203125, "learning_rate": 0.001046728959742685, "loss": 2.0487, "step": 23060 }, { "epoch": 0.6188546586517819, "grad_norm": 0.2734375, "learning_rate": 0.0010467101991002222, "loss": 2.09, "step": 23061 }, { "epoch": 0.6188814942035208, "grad_norm": 0.26953125, "learning_rate": 0.0010466914374778085, "loss": 1.984, "step": 23062 }, { "epoch": 0.6189083297552598, "grad_norm": 0.2890625, "learning_rate": 0.0010466726748754852, "loss": 2.0731, "step": 23063 }, { "epoch": 0.6189351653069987, "grad_norm": 0.291015625, "learning_rate": 0.0010466539112932937, "loss": 2.0053, "step": 23064 }, { "epoch": 0.6189620008587376, "grad_norm": 0.283203125, "learning_rate": 0.0010466351467312747, "loss": 2.1041, "step": 23065 }, { "epoch": 0.6189888364104766, "grad_norm": 0.28515625, "learning_rate": 0.0010466163811894695, "loss": 2.0682, "step": 23066 }, { "epoch": 0.6190156719622155, "grad_norm": 0.2890625, "learning_rate": 0.0010465976146679194, "loss": 2.1036, "step": 23067 }, { "epoch": 0.6190425075139545, "grad_norm": 0.271484375, "learning_rate": 0.0010465788471666656, "loss": 1.9455, "step": 23068 }, { "epoch": 0.6190693430656934, "grad_norm": 0.28125, "learning_rate": 0.001046560078685749, "loss": 2.0433, "step": 23069 }, { "epoch": 0.6190961786174324, "grad_norm": 0.275390625, "learning_rate": 0.0010465413092252114, "loss": 1.9785, "step": 23070 }, { "epoch": 0.6191230141691714, "grad_norm": 0.26953125, "learning_rate": 0.0010465225387850934, "loss": 1.9505, "step": 23071 }, { "epoch": 0.6191498497209103, "grad_norm": 0.265625, "learning_rate": 0.0010465037673654362, "loss": 1.9283, "step": 23072 }, { "epoch": 0.6191766852726492, "grad_norm": 0.275390625, "learning_rate": 0.0010464849949662811, "loss": 2.011, "step": 23073 }, { "epoch": 0.6192035208243881, "grad_norm": 0.271484375, "learning_rate": 0.0010464662215876692, "loss": 1.9403, "step": 23074 }, { "epoch": 0.6192303563761271, "grad_norm": 0.271484375, "learning_rate": 0.001046447447229642, "loss": 2.0211, "step": 23075 }, { "epoch": 0.619257191927866, "grad_norm": 0.267578125, "learning_rate": 0.0010464286718922403, "loss": 1.9194, "step": 23076 }, { "epoch": 0.619284027479605, "grad_norm": 0.2734375, "learning_rate": 0.0010464098955755053, "loss": 2.0694, "step": 23077 }, { "epoch": 0.619310863031344, "grad_norm": 0.2734375, "learning_rate": 0.0010463911182794787, "loss": 1.9886, "step": 23078 }, { "epoch": 0.6193376985830829, "grad_norm": 0.283203125, "learning_rate": 0.0010463723400042008, "loss": 2.0427, "step": 23079 }, { "epoch": 0.6193645341348218, "grad_norm": 0.2734375, "learning_rate": 0.0010463535607497137, "loss": 2.0082, "step": 23080 }, { "epoch": 0.6193913696865607, "grad_norm": 0.287109375, "learning_rate": 0.0010463347805160581, "loss": 2.0629, "step": 23081 }, { "epoch": 0.6194182052382997, "grad_norm": 0.271484375, "learning_rate": 0.0010463159993032752, "loss": 1.9315, "step": 23082 }, { "epoch": 0.6194450407900386, "grad_norm": 0.27734375, "learning_rate": 0.0010462972171114066, "loss": 2.0584, "step": 23083 }, { "epoch": 0.6194718763417776, "grad_norm": 0.279296875, "learning_rate": 0.001046278433940493, "loss": 2.0279, "step": 23084 }, { "epoch": 0.6194987118935166, "grad_norm": 0.271484375, "learning_rate": 0.0010462596497905759, "loss": 2.0661, "step": 23085 }, { "epoch": 0.6195255474452555, "grad_norm": 0.275390625, "learning_rate": 0.0010462408646616963, "loss": 1.9654, "step": 23086 }, { "epoch": 0.6195523829969944, "grad_norm": 0.283203125, "learning_rate": 0.0010462220785538957, "loss": 2.1568, "step": 23087 }, { "epoch": 0.6195792185487333, "grad_norm": 0.267578125, "learning_rate": 0.0010462032914672148, "loss": 1.9391, "step": 23088 }, { "epoch": 0.6196060541004723, "grad_norm": 0.28515625, "learning_rate": 0.0010461845034016955, "loss": 1.9471, "step": 23089 }, { "epoch": 0.6196328896522112, "grad_norm": 0.283203125, "learning_rate": 0.0010461657143573785, "loss": 2.1035, "step": 23090 }, { "epoch": 0.6196597252039502, "grad_norm": 0.26953125, "learning_rate": 0.0010461469243343052, "loss": 2.0109, "step": 23091 }, { "epoch": 0.6196865607556892, "grad_norm": 0.27734375, "learning_rate": 0.0010461281333325168, "loss": 1.9997, "step": 23092 }, { "epoch": 0.619713396307428, "grad_norm": 0.28515625, "learning_rate": 0.0010461093413520548, "loss": 2.0392, "step": 23093 }, { "epoch": 0.619740231859167, "grad_norm": 0.28125, "learning_rate": 0.00104609054839296, "loss": 1.9988, "step": 23094 }, { "epoch": 0.6197670674109059, "grad_norm": 0.27734375, "learning_rate": 0.0010460717544552736, "loss": 2.0335, "step": 23095 }, { "epoch": 0.6197939029626449, "grad_norm": 0.26953125, "learning_rate": 0.001046052959539037, "loss": 1.991, "step": 23096 }, { "epoch": 0.6198207385143839, "grad_norm": 0.27734375, "learning_rate": 0.0010460341636442916, "loss": 1.9538, "step": 23097 }, { "epoch": 0.6198475740661228, "grad_norm": 0.27734375, "learning_rate": 0.0010460153667710783, "loss": 2.1019, "step": 23098 }, { "epoch": 0.6198744096178618, "grad_norm": 0.279296875, "learning_rate": 0.0010459965689194387, "loss": 1.9437, "step": 23099 }, { "epoch": 0.6199012451696007, "grad_norm": 0.2890625, "learning_rate": 0.0010459777700894137, "loss": 2.013, "step": 23100 }, { "epoch": 0.6199280807213396, "grad_norm": 0.2734375, "learning_rate": 0.001045958970281045, "loss": 1.9839, "step": 23101 }, { "epoch": 0.6199549162730785, "grad_norm": 0.26953125, "learning_rate": 0.0010459401694943732, "loss": 1.9607, "step": 23102 }, { "epoch": 0.6199817518248175, "grad_norm": 0.26953125, "learning_rate": 0.0010459213677294398, "loss": 2.0443, "step": 23103 }, { "epoch": 0.6200085873765565, "grad_norm": 0.267578125, "learning_rate": 0.0010459025649862863, "loss": 1.8919, "step": 23104 }, { "epoch": 0.6200354229282954, "grad_norm": 0.279296875, "learning_rate": 0.0010458837612649537, "loss": 1.9644, "step": 23105 }, { "epoch": 0.6200622584800344, "grad_norm": 0.265625, "learning_rate": 0.0010458649565654834, "loss": 1.8322, "step": 23106 }, { "epoch": 0.6200890940317733, "grad_norm": 0.28125, "learning_rate": 0.0010458461508879163, "loss": 1.9384, "step": 23107 }, { "epoch": 0.6201159295835122, "grad_norm": 0.28125, "learning_rate": 0.0010458273442322942, "loss": 1.9779, "step": 23108 }, { "epoch": 0.6201427651352511, "grad_norm": 0.2734375, "learning_rate": 0.001045808536598658, "loss": 1.8689, "step": 23109 }, { "epoch": 0.6201696006869901, "grad_norm": 0.294921875, "learning_rate": 0.001045789727987049, "loss": 1.997, "step": 23110 }, { "epoch": 0.6201964362387291, "grad_norm": 0.2734375, "learning_rate": 0.0010457709183975083, "loss": 1.8591, "step": 23111 }, { "epoch": 0.620223271790468, "grad_norm": 0.291015625, "learning_rate": 0.0010457521078300776, "loss": 1.9662, "step": 23112 }, { "epoch": 0.620250107342207, "grad_norm": 0.310546875, "learning_rate": 0.0010457332962847978, "loss": 2.0319, "step": 23113 }, { "epoch": 0.6202769428939459, "grad_norm": 0.31640625, "learning_rate": 0.0010457144837617103, "loss": 2.0302, "step": 23114 }, { "epoch": 0.6203037784456849, "grad_norm": 0.30078125, "learning_rate": 0.0010456956702608565, "loss": 2.1792, "step": 23115 }, { "epoch": 0.6203306139974238, "grad_norm": 0.296875, "learning_rate": 0.0010456768557822774, "loss": 2.0136, "step": 23116 }, { "epoch": 0.6203574495491627, "grad_norm": 0.30078125, "learning_rate": 0.0010456580403260143, "loss": 2.0503, "step": 23117 }, { "epoch": 0.6203842851009017, "grad_norm": 0.291015625, "learning_rate": 0.0010456392238921087, "loss": 1.9987, "step": 23118 }, { "epoch": 0.6204111206526406, "grad_norm": 0.283203125, "learning_rate": 0.001045620406480602, "loss": 2.0077, "step": 23119 }, { "epoch": 0.6204379562043796, "grad_norm": 0.294921875, "learning_rate": 0.0010456015880915348, "loss": 2.0865, "step": 23120 }, { "epoch": 0.6204647917561185, "grad_norm": 0.29296875, "learning_rate": 0.0010455827687249491, "loss": 2.0944, "step": 23121 }, { "epoch": 0.6204916273078575, "grad_norm": 0.279296875, "learning_rate": 0.0010455639483808855, "loss": 2.0352, "step": 23122 }, { "epoch": 0.6205184628595964, "grad_norm": 0.283203125, "learning_rate": 0.0010455451270593862, "loss": 2.1395, "step": 23123 }, { "epoch": 0.6205452984113353, "grad_norm": 0.283203125, "learning_rate": 0.0010455263047604918, "loss": 2.0024, "step": 23124 }, { "epoch": 0.6205721339630743, "grad_norm": 0.283203125, "learning_rate": 0.0010455074814842435, "loss": 2.1234, "step": 23125 }, { "epoch": 0.6205989695148132, "grad_norm": 0.28125, "learning_rate": 0.001045488657230683, "loss": 2.1663, "step": 23126 }, { "epoch": 0.6206258050665522, "grad_norm": 0.279296875, "learning_rate": 0.0010454698319998517, "loss": 2.0205, "step": 23127 }, { "epoch": 0.6206526406182911, "grad_norm": 0.279296875, "learning_rate": 0.0010454510057917904, "loss": 2.0779, "step": 23128 }, { "epoch": 0.62067947617003, "grad_norm": 0.26953125, "learning_rate": 0.0010454321786065406, "loss": 1.9557, "step": 23129 }, { "epoch": 0.620706311721769, "grad_norm": 0.26953125, "learning_rate": 0.0010454133504441438, "loss": 1.9507, "step": 23130 }, { "epoch": 0.6207331472735079, "grad_norm": 0.271484375, "learning_rate": 0.0010453945213046412, "loss": 2.0033, "step": 23131 }, { "epoch": 0.6207599828252469, "grad_norm": 0.279296875, "learning_rate": 0.0010453756911880739, "loss": 2.1322, "step": 23132 }, { "epoch": 0.6207868183769858, "grad_norm": 0.26953125, "learning_rate": 0.0010453568600944836, "loss": 1.9774, "step": 23133 }, { "epoch": 0.6208136539287248, "grad_norm": 0.28125, "learning_rate": 0.001045338028023911, "loss": 2.0893, "step": 23134 }, { "epoch": 0.6208404894804637, "grad_norm": 0.28125, "learning_rate": 0.001045319194976398, "loss": 2.0058, "step": 23135 }, { "epoch": 0.6208673250322027, "grad_norm": 0.2734375, "learning_rate": 0.0010453003609519856, "loss": 1.8646, "step": 23136 }, { "epoch": 0.6208941605839416, "grad_norm": 0.26953125, "learning_rate": 0.0010452815259507154, "loss": 1.952, "step": 23137 }, { "epoch": 0.6209209961356805, "grad_norm": 0.28125, "learning_rate": 0.0010452626899726284, "loss": 1.9024, "step": 23138 }, { "epoch": 0.6209478316874195, "grad_norm": 0.283203125, "learning_rate": 0.001045243853017766, "loss": 2.0956, "step": 23139 }, { "epoch": 0.6209746672391584, "grad_norm": 0.28125, "learning_rate": 0.0010452250150861698, "loss": 1.9836, "step": 23140 }, { "epoch": 0.6210015027908974, "grad_norm": 0.2734375, "learning_rate": 0.0010452061761778808, "loss": 1.9528, "step": 23141 }, { "epoch": 0.6210283383426364, "grad_norm": 0.27734375, "learning_rate": 0.0010451873362929405, "loss": 2.0304, "step": 23142 }, { "epoch": 0.6210551738943753, "grad_norm": 0.275390625, "learning_rate": 0.00104516849543139, "loss": 2.0249, "step": 23143 }, { "epoch": 0.6210820094461142, "grad_norm": 0.279296875, "learning_rate": 0.0010451496535932709, "loss": 2.0292, "step": 23144 }, { "epoch": 0.6211088449978531, "grad_norm": 0.271484375, "learning_rate": 0.0010451308107786242, "loss": 1.9128, "step": 23145 }, { "epoch": 0.6211356805495921, "grad_norm": 0.2734375, "learning_rate": 0.0010451119669874916, "loss": 2.0026, "step": 23146 }, { "epoch": 0.621162516101331, "grad_norm": 0.2734375, "learning_rate": 0.0010450931222199145, "loss": 1.9593, "step": 23147 }, { "epoch": 0.62118935165307, "grad_norm": 0.287109375, "learning_rate": 0.0010450742764759337, "loss": 2.0164, "step": 23148 }, { "epoch": 0.621216187204809, "grad_norm": 0.283203125, "learning_rate": 0.0010450554297555912, "loss": 1.984, "step": 23149 }, { "epoch": 0.6212430227565479, "grad_norm": 0.2734375, "learning_rate": 0.0010450365820589278, "loss": 2.0238, "step": 23150 }, { "epoch": 0.6212698583082868, "grad_norm": 0.275390625, "learning_rate": 0.0010450177333859853, "loss": 2.0679, "step": 23151 }, { "epoch": 0.6212966938600257, "grad_norm": 0.26953125, "learning_rate": 0.0010449988837368048, "loss": 1.9088, "step": 23152 }, { "epoch": 0.6213235294117647, "grad_norm": 0.27734375, "learning_rate": 0.0010449800331114275, "loss": 1.9475, "step": 23153 }, { "epoch": 0.6213503649635036, "grad_norm": 0.267578125, "learning_rate": 0.0010449611815098949, "loss": 1.9578, "step": 23154 }, { "epoch": 0.6213772005152426, "grad_norm": 0.2734375, "learning_rate": 0.0010449423289322485, "loss": 1.9409, "step": 23155 }, { "epoch": 0.6214040360669816, "grad_norm": 0.263671875, "learning_rate": 0.0010449234753785294, "loss": 1.887, "step": 23156 }, { "epoch": 0.6214308716187205, "grad_norm": 0.27734375, "learning_rate": 0.0010449046208487792, "loss": 1.9491, "step": 23157 }, { "epoch": 0.6214577071704595, "grad_norm": 0.271484375, "learning_rate": 0.0010448857653430391, "loss": 1.8891, "step": 23158 }, { "epoch": 0.6214845427221983, "grad_norm": 0.287109375, "learning_rate": 0.0010448669088613505, "loss": 2.0435, "step": 23159 }, { "epoch": 0.6215113782739373, "grad_norm": 0.283203125, "learning_rate": 0.0010448480514037549, "loss": 1.9738, "step": 23160 }, { "epoch": 0.6215382138256763, "grad_norm": 0.28515625, "learning_rate": 0.0010448291929702935, "loss": 1.91, "step": 23161 }, { "epoch": 0.6215650493774152, "grad_norm": 0.28515625, "learning_rate": 0.0010448103335610076, "loss": 1.9972, "step": 23162 }, { "epoch": 0.6215918849291542, "grad_norm": 0.296875, "learning_rate": 0.001044791473175939, "loss": 2.0142, "step": 23163 }, { "epoch": 0.6216187204808931, "grad_norm": 0.283203125, "learning_rate": 0.0010447726118151285, "loss": 2.0301, "step": 23164 }, { "epoch": 0.621645556032632, "grad_norm": 0.271484375, "learning_rate": 0.0010447537494786177, "loss": 1.8855, "step": 23165 }, { "epoch": 0.6216723915843709, "grad_norm": 0.271484375, "learning_rate": 0.0010447348861664482, "loss": 1.9041, "step": 23166 }, { "epoch": 0.6216992271361099, "grad_norm": 0.26953125, "learning_rate": 0.0010447160218786612, "loss": 1.8802, "step": 23167 }, { "epoch": 0.6217260626878489, "grad_norm": 0.26953125, "learning_rate": 0.001044697156615298, "loss": 1.8741, "step": 23168 }, { "epoch": 0.6217528982395878, "grad_norm": 0.279296875, "learning_rate": 0.0010446782903763998, "loss": 1.9012, "step": 23169 }, { "epoch": 0.6217797337913268, "grad_norm": 0.302734375, "learning_rate": 0.0010446594231620088, "loss": 1.9104, "step": 23170 }, { "epoch": 0.6218065693430657, "grad_norm": 0.310546875, "learning_rate": 0.0010446405549721656, "loss": 2.063, "step": 23171 }, { "epoch": 0.6218334048948047, "grad_norm": 0.302734375, "learning_rate": 0.0010446216858069118, "loss": 2.0814, "step": 23172 }, { "epoch": 0.6218602404465435, "grad_norm": 0.296875, "learning_rate": 0.0010446028156662888, "loss": 2.0232, "step": 23173 }, { "epoch": 0.6218870759982825, "grad_norm": 0.298828125, "learning_rate": 0.0010445839445503383, "loss": 2.0576, "step": 23174 }, { "epoch": 0.6219139115500215, "grad_norm": 0.2890625, "learning_rate": 0.001044565072459101, "loss": 2.0951, "step": 23175 }, { "epoch": 0.6219407471017604, "grad_norm": 0.29296875, "learning_rate": 0.0010445461993926189, "loss": 2.0839, "step": 23176 }, { "epoch": 0.6219675826534994, "grad_norm": 0.28515625, "learning_rate": 0.0010445273253509334, "loss": 2.0505, "step": 23177 }, { "epoch": 0.6219944182052383, "grad_norm": 0.28515625, "learning_rate": 0.0010445084503340855, "loss": 2.0155, "step": 23178 }, { "epoch": 0.6220212537569773, "grad_norm": 0.27734375, "learning_rate": 0.001044489574342117, "loss": 2.0171, "step": 23179 }, { "epoch": 0.6220480893087161, "grad_norm": 0.275390625, "learning_rate": 0.0010444706973750692, "loss": 2.0179, "step": 23180 }, { "epoch": 0.6220749248604551, "grad_norm": 0.28125, "learning_rate": 0.0010444518194329833, "loss": 2.0736, "step": 23181 }, { "epoch": 0.6221017604121941, "grad_norm": 0.283203125, "learning_rate": 0.001044432940515901, "loss": 2.1156, "step": 23182 }, { "epoch": 0.622128595963933, "grad_norm": 0.283203125, "learning_rate": 0.0010444140606238634, "loss": 2.1062, "step": 23183 }, { "epoch": 0.622155431515672, "grad_norm": 0.287109375, "learning_rate": 0.001044395179756912, "loss": 2.0705, "step": 23184 }, { "epoch": 0.6221822670674109, "grad_norm": 0.28125, "learning_rate": 0.0010443762979150885, "loss": 2.0929, "step": 23185 }, { "epoch": 0.6222091026191499, "grad_norm": 0.28125, "learning_rate": 0.0010443574150984342, "loss": 1.9572, "step": 23186 }, { "epoch": 0.6222359381708888, "grad_norm": 0.283203125, "learning_rate": 0.0010443385313069903, "loss": 2.0695, "step": 23187 }, { "epoch": 0.6222627737226277, "grad_norm": 0.2890625, "learning_rate": 0.0010443196465407987, "loss": 2.1112, "step": 23188 }, { "epoch": 0.6222896092743667, "grad_norm": 0.275390625, "learning_rate": 0.0010443007607999003, "loss": 1.9849, "step": 23189 }, { "epoch": 0.6223164448261056, "grad_norm": 0.279296875, "learning_rate": 0.0010442818740843366, "loss": 2.0925, "step": 23190 }, { "epoch": 0.6223432803778446, "grad_norm": 0.271484375, "learning_rate": 0.0010442629863941495, "loss": 1.943, "step": 23191 }, { "epoch": 0.6223701159295835, "grad_norm": 0.27734375, "learning_rate": 0.0010442440977293799, "loss": 2.0194, "step": 23192 }, { "epoch": 0.6223969514813225, "grad_norm": 0.275390625, "learning_rate": 0.0010442252080900695, "loss": 2.0463, "step": 23193 }, { "epoch": 0.6224237870330614, "grad_norm": 0.27734375, "learning_rate": 0.0010442063174762595, "loss": 1.9918, "step": 23194 }, { "epoch": 0.6224506225848003, "grad_norm": 0.2734375, "learning_rate": 0.0010441874258879918, "loss": 2.0211, "step": 23195 }, { "epoch": 0.6224774581365393, "grad_norm": 0.2890625, "learning_rate": 0.0010441685333253074, "loss": 2.149, "step": 23196 }, { "epoch": 0.6225042936882782, "grad_norm": 0.28515625, "learning_rate": 0.001044149639788248, "loss": 2.0895, "step": 23197 }, { "epoch": 0.6225311292400172, "grad_norm": 0.279296875, "learning_rate": 0.001044130745276855, "loss": 1.9539, "step": 23198 }, { "epoch": 0.6225579647917561, "grad_norm": 0.279296875, "learning_rate": 0.0010441118497911699, "loss": 2.0318, "step": 23199 }, { "epoch": 0.6225848003434951, "grad_norm": 0.2734375, "learning_rate": 0.0010440929533312338, "loss": 1.9651, "step": 23200 }, { "epoch": 0.622611635895234, "grad_norm": 0.28125, "learning_rate": 0.0010440740558970885, "loss": 1.9946, "step": 23201 }, { "epoch": 0.6226384714469729, "grad_norm": 0.287109375, "learning_rate": 0.0010440551574887754, "loss": 2.0931, "step": 23202 }, { "epoch": 0.6226653069987119, "grad_norm": 0.2890625, "learning_rate": 0.001044036258106336, "loss": 2.0476, "step": 23203 }, { "epoch": 0.6226921425504508, "grad_norm": 0.279296875, "learning_rate": 0.0010440173577498115, "loss": 2.0426, "step": 23204 }, { "epoch": 0.6227189781021898, "grad_norm": 0.267578125, "learning_rate": 0.0010439984564192437, "loss": 1.9311, "step": 23205 }, { "epoch": 0.6227458136539287, "grad_norm": 0.26953125, "learning_rate": 0.0010439795541146739, "loss": 1.9326, "step": 23206 }, { "epoch": 0.6227726492056677, "grad_norm": 0.28125, "learning_rate": 0.0010439606508361435, "loss": 2.0105, "step": 23207 }, { "epoch": 0.6227994847574067, "grad_norm": 0.2734375, "learning_rate": 0.001043941746583694, "loss": 1.9726, "step": 23208 }, { "epoch": 0.6228263203091455, "grad_norm": 0.267578125, "learning_rate": 0.001043922841357367, "loss": 1.9162, "step": 23209 }, { "epoch": 0.6228531558608845, "grad_norm": 0.279296875, "learning_rate": 0.001043903935157204, "loss": 2.0968, "step": 23210 }, { "epoch": 0.6228799914126234, "grad_norm": 0.2890625, "learning_rate": 0.0010438850279832464, "loss": 1.995, "step": 23211 }, { "epoch": 0.6229068269643624, "grad_norm": 0.28125, "learning_rate": 0.0010438661198355357, "loss": 1.9401, "step": 23212 }, { "epoch": 0.6229336625161014, "grad_norm": 0.283203125, "learning_rate": 0.0010438472107141131, "loss": 2.0309, "step": 23213 }, { "epoch": 0.6229604980678403, "grad_norm": 0.27734375, "learning_rate": 0.0010438283006190202, "loss": 2.0177, "step": 23214 }, { "epoch": 0.6229873336195793, "grad_norm": 0.283203125, "learning_rate": 0.0010438093895502986, "loss": 2.0089, "step": 23215 }, { "epoch": 0.6230141691713181, "grad_norm": 0.279296875, "learning_rate": 0.00104379047750799, "loss": 2.03, "step": 23216 }, { "epoch": 0.6230410047230571, "grad_norm": 0.28515625, "learning_rate": 0.0010437715644921355, "loss": 1.9585, "step": 23217 }, { "epoch": 0.623067840274796, "grad_norm": 0.283203125, "learning_rate": 0.0010437526505027769, "loss": 2.0666, "step": 23218 }, { "epoch": 0.623094675826535, "grad_norm": 0.28125, "learning_rate": 0.0010437337355399554, "loss": 1.9139, "step": 23219 }, { "epoch": 0.623121511378274, "grad_norm": 0.28515625, "learning_rate": 0.0010437148196037126, "loss": 1.938, "step": 23220 }, { "epoch": 0.6231483469300129, "grad_norm": 0.2890625, "learning_rate": 0.0010436959026940901, "loss": 2.0004, "step": 23221 }, { "epoch": 0.6231751824817519, "grad_norm": 0.28125, "learning_rate": 0.0010436769848111295, "loss": 2.023, "step": 23222 }, { "epoch": 0.6232020180334907, "grad_norm": 0.275390625, "learning_rate": 0.0010436580659548718, "loss": 1.9983, "step": 23223 }, { "epoch": 0.6232288535852297, "grad_norm": 0.279296875, "learning_rate": 0.001043639146125359, "loss": 2.0313, "step": 23224 }, { "epoch": 0.6232556891369686, "grad_norm": 0.27734375, "learning_rate": 0.0010436202253226324, "loss": 2.1068, "step": 23225 }, { "epoch": 0.6232825246887076, "grad_norm": 0.318359375, "learning_rate": 0.0010436013035467337, "loss": 2.1405, "step": 23226 }, { "epoch": 0.6233093602404466, "grad_norm": 0.306640625, "learning_rate": 0.001043582380797704, "loss": 2.1562, "step": 23227 }, { "epoch": 0.6233361957921855, "grad_norm": 0.330078125, "learning_rate": 0.0010435634570755852, "loss": 2.1577, "step": 23228 }, { "epoch": 0.6233630313439245, "grad_norm": 0.306640625, "learning_rate": 0.0010435445323804186, "loss": 2.1028, "step": 23229 }, { "epoch": 0.6233898668956633, "grad_norm": 0.3046875, "learning_rate": 0.001043525606712246, "loss": 2.1162, "step": 23230 }, { "epoch": 0.6234167024474023, "grad_norm": 0.28515625, "learning_rate": 0.0010435066800711084, "loss": 1.9572, "step": 23231 }, { "epoch": 0.6234435379991413, "grad_norm": 0.271484375, "learning_rate": 0.0010434877524570477, "loss": 2.0031, "step": 23232 }, { "epoch": 0.6234703735508802, "grad_norm": 0.279296875, "learning_rate": 0.0010434688238701055, "loss": 2.098, "step": 23233 }, { "epoch": 0.6234972091026192, "grad_norm": 0.29296875, "learning_rate": 0.0010434498943103232, "loss": 2.1683, "step": 23234 }, { "epoch": 0.6235240446543581, "grad_norm": 0.283203125, "learning_rate": 0.001043430963777742, "loss": 1.9818, "step": 23235 }, { "epoch": 0.6235508802060971, "grad_norm": 0.287109375, "learning_rate": 0.0010434120322724038, "loss": 2.1082, "step": 23236 }, { "epoch": 0.6235777157578359, "grad_norm": 0.28515625, "learning_rate": 0.0010433930997943504, "loss": 2.1707, "step": 23237 }, { "epoch": 0.6236045513095749, "grad_norm": 0.271484375, "learning_rate": 0.0010433741663436226, "loss": 2.0228, "step": 23238 }, { "epoch": 0.6236313868613139, "grad_norm": 0.271484375, "learning_rate": 0.0010433552319202624, "loss": 2.0674, "step": 23239 }, { "epoch": 0.6236582224130528, "grad_norm": 0.2734375, "learning_rate": 0.0010433362965243114, "loss": 2.1064, "step": 23240 }, { "epoch": 0.6236850579647918, "grad_norm": 0.27734375, "learning_rate": 0.001043317360155811, "loss": 1.994, "step": 23241 }, { "epoch": 0.6237118935165307, "grad_norm": 0.283203125, "learning_rate": 0.0010432984228148025, "loss": 2.0885, "step": 23242 }, { "epoch": 0.6237387290682697, "grad_norm": 0.2734375, "learning_rate": 0.001043279484501328, "loss": 2.0119, "step": 23243 }, { "epoch": 0.6237655646200085, "grad_norm": 0.279296875, "learning_rate": 0.0010432605452154284, "loss": 2.0851, "step": 23244 }, { "epoch": 0.6237924001717475, "grad_norm": 0.28515625, "learning_rate": 0.0010432416049571459, "loss": 2.0338, "step": 23245 }, { "epoch": 0.6238192357234865, "grad_norm": 0.2734375, "learning_rate": 0.0010432226637265216, "loss": 1.9635, "step": 23246 }, { "epoch": 0.6238460712752254, "grad_norm": 0.271484375, "learning_rate": 0.001043203721523597, "loss": 2.04, "step": 23247 }, { "epoch": 0.6238729068269644, "grad_norm": 0.26953125, "learning_rate": 0.001043184778348414, "loss": 1.9934, "step": 23248 }, { "epoch": 0.6238997423787033, "grad_norm": 0.267578125, "learning_rate": 0.0010431658342010138, "loss": 2.012, "step": 23249 }, { "epoch": 0.6239265779304423, "grad_norm": 0.267578125, "learning_rate": 0.0010431468890814385, "loss": 2.081, "step": 23250 }, { "epoch": 0.6239534134821811, "grad_norm": 0.279296875, "learning_rate": 0.001043127942989729, "loss": 2.066, "step": 23251 }, { "epoch": 0.6239802490339201, "grad_norm": 0.275390625, "learning_rate": 0.0010431089959259273, "loss": 2.0551, "step": 23252 }, { "epoch": 0.6240070845856591, "grad_norm": 0.275390625, "learning_rate": 0.0010430900478900747, "loss": 2.0041, "step": 23253 }, { "epoch": 0.624033920137398, "grad_norm": 0.2734375, "learning_rate": 0.001043071098882213, "loss": 2.0045, "step": 23254 }, { "epoch": 0.624060755689137, "grad_norm": 0.283203125, "learning_rate": 0.0010430521489023837, "loss": 2.011, "step": 23255 }, { "epoch": 0.6240875912408759, "grad_norm": 0.265625, "learning_rate": 0.0010430331979506282, "loss": 1.943, "step": 23256 }, { "epoch": 0.6241144267926149, "grad_norm": 0.275390625, "learning_rate": 0.0010430142460269884, "loss": 2.0494, "step": 23257 }, { "epoch": 0.6241412623443539, "grad_norm": 0.26953125, "learning_rate": 0.0010429952931315055, "loss": 1.9669, "step": 23258 }, { "epoch": 0.6241680978960927, "grad_norm": 0.28515625, "learning_rate": 0.0010429763392642212, "loss": 2.0724, "step": 23259 }, { "epoch": 0.6241949334478317, "grad_norm": 0.283203125, "learning_rate": 0.0010429573844251772, "loss": 2.1391, "step": 23260 }, { "epoch": 0.6242217689995706, "grad_norm": 0.2734375, "learning_rate": 0.0010429384286144152, "loss": 1.9666, "step": 23261 }, { "epoch": 0.6242486045513096, "grad_norm": 0.26953125, "learning_rate": 0.0010429194718319765, "loss": 1.9345, "step": 23262 }, { "epoch": 0.6242754401030485, "grad_norm": 0.27734375, "learning_rate": 0.0010429005140779027, "loss": 1.9963, "step": 23263 }, { "epoch": 0.6243022756547875, "grad_norm": 0.275390625, "learning_rate": 0.0010428815553522356, "loss": 2.0054, "step": 23264 }, { "epoch": 0.6243291112065265, "grad_norm": 0.279296875, "learning_rate": 0.0010428625956550165, "loss": 1.9423, "step": 23265 }, { "epoch": 0.6243559467582653, "grad_norm": 0.26953125, "learning_rate": 0.0010428436349862874, "loss": 1.9637, "step": 23266 }, { "epoch": 0.6243827823100043, "grad_norm": 0.28515625, "learning_rate": 0.0010428246733460895, "loss": 2.0287, "step": 23267 }, { "epoch": 0.6244096178617432, "grad_norm": 0.263671875, "learning_rate": 0.0010428057107344645, "loss": 1.9134, "step": 23268 }, { "epoch": 0.6244364534134822, "grad_norm": 0.263671875, "learning_rate": 0.0010427867471514542, "loss": 1.9502, "step": 23269 }, { "epoch": 0.6244632889652211, "grad_norm": 0.27734375, "learning_rate": 0.0010427677825971, "loss": 2.0699, "step": 23270 }, { "epoch": 0.6244901245169601, "grad_norm": 0.26171875, "learning_rate": 0.0010427488170714433, "loss": 1.8871, "step": 23271 }, { "epoch": 0.6245169600686991, "grad_norm": 0.291015625, "learning_rate": 0.0010427298505745262, "loss": 1.9771, "step": 23272 }, { "epoch": 0.6245437956204379, "grad_norm": 0.28125, "learning_rate": 0.00104271088310639, "loss": 1.9273, "step": 23273 }, { "epoch": 0.6245706311721769, "grad_norm": 0.28125, "learning_rate": 0.0010426919146670763, "loss": 2.0565, "step": 23274 }, { "epoch": 0.6245974667239158, "grad_norm": 0.28125, "learning_rate": 0.001042672945256627, "loss": 1.9332, "step": 23275 }, { "epoch": 0.6246243022756548, "grad_norm": 0.27734375, "learning_rate": 0.0010426539748750834, "loss": 2.012, "step": 23276 }, { "epoch": 0.6246511378273937, "grad_norm": 0.265625, "learning_rate": 0.001042635003522487, "loss": 1.9747, "step": 23277 }, { "epoch": 0.6246779733791327, "grad_norm": 0.283203125, "learning_rate": 0.00104261603119888, "loss": 1.9765, "step": 23278 }, { "epoch": 0.6247048089308717, "grad_norm": 0.28125, "learning_rate": 0.0010425970579043035, "loss": 1.9431, "step": 23279 }, { "epoch": 0.6247316444826105, "grad_norm": 0.275390625, "learning_rate": 0.0010425780836387992, "loss": 1.925, "step": 23280 }, { "epoch": 0.6247584800343495, "grad_norm": 0.287109375, "learning_rate": 0.0010425591084024088, "loss": 2.0151, "step": 23281 }, { "epoch": 0.6247853155860884, "grad_norm": 0.3046875, "learning_rate": 0.0010425401321951739, "loss": 2.0802, "step": 23282 }, { "epoch": 0.6248121511378274, "grad_norm": 0.302734375, "learning_rate": 0.001042521155017136, "loss": 2.0803, "step": 23283 }, { "epoch": 0.6248389866895664, "grad_norm": 0.296875, "learning_rate": 0.001042502176868337, "loss": 2.1502, "step": 23284 }, { "epoch": 0.6248658222413053, "grad_norm": 0.30859375, "learning_rate": 0.0010424831977488185, "loss": 2.134, "step": 23285 }, { "epoch": 0.6248926577930443, "grad_norm": 0.279296875, "learning_rate": 0.0010424642176586221, "loss": 2.0477, "step": 23286 }, { "epoch": 0.6249194933447831, "grad_norm": 0.2890625, "learning_rate": 0.0010424452365977892, "loss": 2.0473, "step": 23287 }, { "epoch": 0.6249463288965221, "grad_norm": 0.28515625, "learning_rate": 0.0010424262545663616, "loss": 2.1322, "step": 23288 }, { "epoch": 0.624973164448261, "grad_norm": 0.28515625, "learning_rate": 0.0010424072715643812, "loss": 2.1743, "step": 23289 }, { "epoch": 0.625, "grad_norm": 0.28515625, "learning_rate": 0.0010423882875918893, "loss": 2.1836, "step": 23290 }, { "epoch": 0.625026835551739, "grad_norm": 0.27734375, "learning_rate": 0.0010423693026489276, "loss": 2.0384, "step": 23291 }, { "epoch": 0.6250536711034779, "grad_norm": 0.28125, "learning_rate": 0.0010423503167355378, "loss": 2.0714, "step": 23292 }, { "epoch": 0.6250805066552169, "grad_norm": 0.283203125, "learning_rate": 0.0010423313298517616, "loss": 2.1357, "step": 23293 }, { "epoch": 0.6251073422069557, "grad_norm": 0.283203125, "learning_rate": 0.0010423123419976406, "loss": 2.0698, "step": 23294 }, { "epoch": 0.6251341777586947, "grad_norm": 0.283203125, "learning_rate": 0.0010422933531732165, "loss": 2.0757, "step": 23295 }, { "epoch": 0.6251610133104336, "grad_norm": 0.275390625, "learning_rate": 0.0010422743633785308, "loss": 2.0989, "step": 23296 }, { "epoch": 0.6251878488621726, "grad_norm": 0.28125, "learning_rate": 0.0010422553726136252, "loss": 2.1152, "step": 23297 }, { "epoch": 0.6252146844139116, "grad_norm": 0.28515625, "learning_rate": 0.0010422363808785414, "loss": 2.1025, "step": 23298 }, { "epoch": 0.6252415199656505, "grad_norm": 0.265625, "learning_rate": 0.0010422173881733214, "loss": 1.9371, "step": 23299 }, { "epoch": 0.6252683555173895, "grad_norm": 0.2734375, "learning_rate": 0.0010421983944980061, "loss": 2.0289, "step": 23300 }, { "epoch": 0.6252951910691283, "grad_norm": 0.267578125, "learning_rate": 0.001042179399852638, "loss": 1.9649, "step": 23301 }, { "epoch": 0.6253220266208673, "grad_norm": 0.279296875, "learning_rate": 0.0010421604042372583, "loss": 2.1058, "step": 23302 }, { "epoch": 0.6253488621726063, "grad_norm": 0.275390625, "learning_rate": 0.0010421414076519087, "loss": 1.9953, "step": 23303 }, { "epoch": 0.6253756977243452, "grad_norm": 0.279296875, "learning_rate": 0.001042122410096631, "loss": 2.0014, "step": 23304 }, { "epoch": 0.6254025332760842, "grad_norm": 0.26953125, "learning_rate": 0.0010421034115714667, "loss": 2.0352, "step": 23305 }, { "epoch": 0.6254293688278231, "grad_norm": 0.275390625, "learning_rate": 0.0010420844120764578, "loss": 1.9847, "step": 23306 }, { "epoch": 0.6254562043795621, "grad_norm": 0.26953125, "learning_rate": 0.0010420654116116454, "loss": 2.0608, "step": 23307 }, { "epoch": 0.6254830399313009, "grad_norm": 0.271484375, "learning_rate": 0.0010420464101770718, "loss": 2.1442, "step": 23308 }, { "epoch": 0.6255098754830399, "grad_norm": 0.279296875, "learning_rate": 0.0010420274077727783, "loss": 2.1374, "step": 23309 }, { "epoch": 0.6255367110347789, "grad_norm": 0.2734375, "learning_rate": 0.001042008404398807, "loss": 1.9582, "step": 23310 }, { "epoch": 0.6255635465865178, "grad_norm": 0.271484375, "learning_rate": 0.001041989400055199, "loss": 1.9782, "step": 23311 }, { "epoch": 0.6255903821382568, "grad_norm": 0.27734375, "learning_rate": 0.0010419703947419964, "loss": 2.0674, "step": 23312 }, { "epoch": 0.6256172176899957, "grad_norm": 0.279296875, "learning_rate": 0.0010419513884592408, "loss": 1.9728, "step": 23313 }, { "epoch": 0.6256440532417347, "grad_norm": 0.2734375, "learning_rate": 0.0010419323812069737, "loss": 1.9844, "step": 23314 }, { "epoch": 0.6256708887934735, "grad_norm": 0.279296875, "learning_rate": 0.001041913372985237, "loss": 2.0164, "step": 23315 }, { "epoch": 0.6256977243452125, "grad_norm": 0.283203125, "learning_rate": 0.0010418943637940728, "loss": 2.067, "step": 23316 }, { "epoch": 0.6257245598969515, "grad_norm": 0.28515625, "learning_rate": 0.001041875353633522, "loss": 2.0586, "step": 23317 }, { "epoch": 0.6257513954486904, "grad_norm": 0.287109375, "learning_rate": 0.0010418563425036267, "loss": 2.023, "step": 23318 }, { "epoch": 0.6257782310004294, "grad_norm": 0.283203125, "learning_rate": 0.0010418373304044285, "loss": 1.9152, "step": 23319 }, { "epoch": 0.6258050665521683, "grad_norm": 0.27734375, "learning_rate": 0.0010418183173359693, "loss": 1.9747, "step": 23320 }, { "epoch": 0.6258319021039073, "grad_norm": 0.28125, "learning_rate": 0.001041799303298291, "loss": 1.9598, "step": 23321 }, { "epoch": 0.6258587376556461, "grad_norm": 0.279296875, "learning_rate": 0.0010417802882914345, "loss": 2.0541, "step": 23322 }, { "epoch": 0.6258855732073851, "grad_norm": 0.27734375, "learning_rate": 0.0010417612723154423, "loss": 2.0294, "step": 23323 }, { "epoch": 0.6259124087591241, "grad_norm": 0.267578125, "learning_rate": 0.0010417422553703557, "loss": 1.8946, "step": 23324 }, { "epoch": 0.625939244310863, "grad_norm": 0.275390625, "learning_rate": 0.0010417232374562166, "loss": 2.0224, "step": 23325 }, { "epoch": 0.625966079862602, "grad_norm": 0.2890625, "learning_rate": 0.0010417042185730668, "loss": 2.0745, "step": 23326 }, { "epoch": 0.6259929154143409, "grad_norm": 0.2734375, "learning_rate": 0.0010416851987209478, "loss": 2.0327, "step": 23327 }, { "epoch": 0.6260197509660799, "grad_norm": 0.283203125, "learning_rate": 0.0010416661778999013, "loss": 2.1122, "step": 23328 }, { "epoch": 0.6260465865178189, "grad_norm": 0.283203125, "learning_rate": 0.0010416471561099695, "loss": 1.9941, "step": 23329 }, { "epoch": 0.6260734220695577, "grad_norm": 0.28125, "learning_rate": 0.0010416281333511933, "loss": 2.0714, "step": 23330 }, { "epoch": 0.6261002576212967, "grad_norm": 0.2890625, "learning_rate": 0.0010416091096236153, "loss": 2.0318, "step": 23331 }, { "epoch": 0.6261270931730356, "grad_norm": 0.314453125, "learning_rate": 0.0010415900849272766, "loss": 2.0783, "step": 23332 }, { "epoch": 0.6261539287247746, "grad_norm": 0.2734375, "learning_rate": 0.0010415710592622193, "loss": 1.9343, "step": 23333 }, { "epoch": 0.6261807642765135, "grad_norm": 0.2734375, "learning_rate": 0.001041552032628485, "loss": 1.8681, "step": 23334 }, { "epoch": 0.6262075998282525, "grad_norm": 0.283203125, "learning_rate": 0.0010415330050261155, "loss": 2.0653, "step": 23335 }, { "epoch": 0.6262344353799915, "grad_norm": 0.30078125, "learning_rate": 0.0010415139764551522, "loss": 2.09, "step": 23336 }, { "epoch": 0.6262612709317303, "grad_norm": 0.345703125, "learning_rate": 0.0010414949469156373, "loss": 2.1595, "step": 23337 }, { "epoch": 0.6262881064834693, "grad_norm": 0.302734375, "learning_rate": 0.0010414759164076124, "loss": 2.0602, "step": 23338 }, { "epoch": 0.6263149420352082, "grad_norm": 0.306640625, "learning_rate": 0.0010414568849311191, "loss": 2.1198, "step": 23339 }, { "epoch": 0.6263417775869472, "grad_norm": 0.30859375, "learning_rate": 0.0010414378524861996, "loss": 2.2347, "step": 23340 }, { "epoch": 0.6263686131386861, "grad_norm": 0.30078125, "learning_rate": 0.001041418819072895, "loss": 2.153, "step": 23341 }, { "epoch": 0.6263954486904251, "grad_norm": 0.29296875, "learning_rate": 0.0010413997846912477, "loss": 2.0907, "step": 23342 }, { "epoch": 0.6264222842421641, "grad_norm": 0.28125, "learning_rate": 0.0010413807493412988, "loss": 2.0427, "step": 23343 }, { "epoch": 0.6264491197939029, "grad_norm": 0.271484375, "learning_rate": 0.0010413617130230904, "loss": 2.0525, "step": 23344 }, { "epoch": 0.6264759553456419, "grad_norm": 0.26953125, "learning_rate": 0.0010413426757366643, "loss": 2.0815, "step": 23345 }, { "epoch": 0.6265027908973808, "grad_norm": 0.267578125, "learning_rate": 0.0010413236374820626, "loss": 2.0445, "step": 23346 }, { "epoch": 0.6265296264491198, "grad_norm": 0.287109375, "learning_rate": 0.0010413045982593263, "loss": 2.074, "step": 23347 }, { "epoch": 0.6265564620008587, "grad_norm": 0.279296875, "learning_rate": 0.0010412855580684975, "loss": 2.0433, "step": 23348 }, { "epoch": 0.6265832975525977, "grad_norm": 0.28125, "learning_rate": 0.0010412665169096183, "loss": 1.9877, "step": 23349 }, { "epoch": 0.6266101331043367, "grad_norm": 0.279296875, "learning_rate": 0.00104124747478273, "loss": 2.0629, "step": 23350 }, { "epoch": 0.6266369686560755, "grad_norm": 0.2734375, "learning_rate": 0.0010412284316878744, "loss": 2.0492, "step": 23351 }, { "epoch": 0.6266638042078145, "grad_norm": 0.279296875, "learning_rate": 0.0010412093876250937, "loss": 2.0433, "step": 23352 }, { "epoch": 0.6266906397595534, "grad_norm": 0.2734375, "learning_rate": 0.0010411903425944294, "loss": 2.0604, "step": 23353 }, { "epoch": 0.6267174753112924, "grad_norm": 0.279296875, "learning_rate": 0.001041171296595923, "loss": 2.0315, "step": 23354 }, { "epoch": 0.6267443108630314, "grad_norm": 0.2734375, "learning_rate": 0.001041152249629617, "loss": 1.9777, "step": 23355 }, { "epoch": 0.6267711464147703, "grad_norm": 0.28125, "learning_rate": 0.0010411332016955525, "loss": 2.1059, "step": 23356 }, { "epoch": 0.6267979819665093, "grad_norm": 0.2734375, "learning_rate": 0.0010411141527937714, "loss": 2.082, "step": 23357 }, { "epoch": 0.6268248175182481, "grad_norm": 0.27734375, "learning_rate": 0.001041095102924316, "loss": 2.0694, "step": 23358 }, { "epoch": 0.6268516530699871, "grad_norm": 0.283203125, "learning_rate": 0.0010410760520872275, "loss": 2.0753, "step": 23359 }, { "epoch": 0.626878488621726, "grad_norm": 0.2734375, "learning_rate": 0.0010410570002825476, "loss": 2.0845, "step": 23360 }, { "epoch": 0.626905324173465, "grad_norm": 0.27734375, "learning_rate": 0.001041037947510319, "loss": 1.9389, "step": 23361 }, { "epoch": 0.626932159725204, "grad_norm": 0.279296875, "learning_rate": 0.0010410188937705826, "loss": 2.0419, "step": 23362 }, { "epoch": 0.6269589952769429, "grad_norm": 0.283203125, "learning_rate": 0.0010409998390633803, "loss": 2.0587, "step": 23363 }, { "epoch": 0.6269858308286819, "grad_norm": 0.287109375, "learning_rate": 0.0010409807833887544, "loss": 2.0108, "step": 23364 }, { "epoch": 0.6270126663804207, "grad_norm": 0.279296875, "learning_rate": 0.0010409617267467463, "loss": 1.9612, "step": 23365 }, { "epoch": 0.6270395019321597, "grad_norm": 0.2734375, "learning_rate": 0.001040942669137398, "loss": 2.0664, "step": 23366 }, { "epoch": 0.6270663374838986, "grad_norm": 0.28125, "learning_rate": 0.001040923610560751, "loss": 2.0633, "step": 23367 }, { "epoch": 0.6270931730356376, "grad_norm": 0.275390625, "learning_rate": 0.0010409045510168474, "loss": 2.0276, "step": 23368 }, { "epoch": 0.6271200085873766, "grad_norm": 0.2734375, "learning_rate": 0.0010408854905057292, "loss": 2.0079, "step": 23369 }, { "epoch": 0.6271468441391155, "grad_norm": 0.267578125, "learning_rate": 0.0010408664290274376, "loss": 1.864, "step": 23370 }, { "epoch": 0.6271736796908545, "grad_norm": 0.2734375, "learning_rate": 0.0010408473665820149, "loss": 2.0675, "step": 23371 }, { "epoch": 0.6272005152425933, "grad_norm": 0.27734375, "learning_rate": 0.0010408283031695027, "loss": 1.9189, "step": 23372 }, { "epoch": 0.6272273507943323, "grad_norm": 0.271484375, "learning_rate": 0.0010408092387899429, "loss": 1.9395, "step": 23373 }, { "epoch": 0.6272541863460713, "grad_norm": 0.275390625, "learning_rate": 0.0010407901734433775, "loss": 2.0156, "step": 23374 }, { "epoch": 0.6272810218978102, "grad_norm": 0.26953125, "learning_rate": 0.001040771107129848, "loss": 1.9472, "step": 23375 }, { "epoch": 0.6273078574495492, "grad_norm": 0.279296875, "learning_rate": 0.0010407520398493964, "loss": 1.9753, "step": 23376 }, { "epoch": 0.6273346930012881, "grad_norm": 0.283203125, "learning_rate": 0.0010407329716020644, "loss": 1.9084, "step": 23377 }, { "epoch": 0.6273615285530271, "grad_norm": 0.279296875, "learning_rate": 0.001040713902387894, "loss": 2.062, "step": 23378 }, { "epoch": 0.627388364104766, "grad_norm": 0.275390625, "learning_rate": 0.0010406948322069268, "loss": 1.9931, "step": 23379 }, { "epoch": 0.6274151996565049, "grad_norm": 0.279296875, "learning_rate": 0.001040675761059205, "loss": 1.9989, "step": 23380 }, { "epoch": 0.6274420352082439, "grad_norm": 0.287109375, "learning_rate": 0.0010406566889447702, "loss": 1.9757, "step": 23381 }, { "epoch": 0.6274688707599828, "grad_norm": 0.26953125, "learning_rate": 0.0010406376158636644, "loss": 1.9509, "step": 23382 }, { "epoch": 0.6274957063117218, "grad_norm": 0.291015625, "learning_rate": 0.001040618541815929, "loss": 2.1088, "step": 23383 }, { "epoch": 0.6275225418634607, "grad_norm": 0.28515625, "learning_rate": 0.0010405994668016064, "loss": 2.0903, "step": 23384 }, { "epoch": 0.6275493774151997, "grad_norm": 0.28515625, "learning_rate": 0.0010405803908207381, "loss": 1.9955, "step": 23385 }, { "epoch": 0.6275762129669386, "grad_norm": 0.27734375, "learning_rate": 0.001040561313873366, "loss": 1.9548, "step": 23386 }, { "epoch": 0.6276030485186775, "grad_norm": 0.27734375, "learning_rate": 0.001040542235959532, "loss": 1.9861, "step": 23387 }, { "epoch": 0.6276298840704165, "grad_norm": 0.283203125, "learning_rate": 0.0010405231570792782, "loss": 1.9947, "step": 23388 }, { "epoch": 0.6276567196221554, "grad_norm": 0.28515625, "learning_rate": 0.0010405040772326459, "loss": 2.0573, "step": 23389 }, { "epoch": 0.6276835551738944, "grad_norm": 0.275390625, "learning_rate": 0.0010404849964196774, "loss": 1.9261, "step": 23390 }, { "epoch": 0.6277103907256333, "grad_norm": 0.3125, "learning_rate": 0.0010404659146404144, "loss": 2.0517, "step": 23391 }, { "epoch": 0.6277372262773723, "grad_norm": 0.318359375, "learning_rate": 0.0010404468318948984, "loss": 2.1816, "step": 23392 }, { "epoch": 0.6277640618291112, "grad_norm": 0.296875, "learning_rate": 0.0010404277481831721, "loss": 2.0568, "step": 23393 }, { "epoch": 0.6277908973808501, "grad_norm": 0.291015625, "learning_rate": 0.0010404086635052768, "loss": 2.0451, "step": 23394 }, { "epoch": 0.6278177329325891, "grad_norm": 0.302734375, "learning_rate": 0.0010403895778612544, "loss": 2.155, "step": 23395 }, { "epoch": 0.627844568484328, "grad_norm": 0.283203125, "learning_rate": 0.0010403704912511468, "loss": 2.0299, "step": 23396 }, { "epoch": 0.627871404036067, "grad_norm": 0.279296875, "learning_rate": 0.001040351403674996, "loss": 2.0395, "step": 23397 }, { "epoch": 0.6278982395878059, "grad_norm": 0.283203125, "learning_rate": 0.0010403323151328436, "loss": 2.0763, "step": 23398 }, { "epoch": 0.6279250751395449, "grad_norm": 0.287109375, "learning_rate": 0.0010403132256247317, "loss": 2.1564, "step": 23399 }, { "epoch": 0.6279519106912839, "grad_norm": 0.271484375, "learning_rate": 0.0010402941351507022, "loss": 1.9392, "step": 23400 }, { "epoch": 0.6279787462430227, "grad_norm": 0.2734375, "learning_rate": 0.001040275043710797, "loss": 2.0814, "step": 23401 }, { "epoch": 0.6280055817947617, "grad_norm": 0.283203125, "learning_rate": 0.0010402559513050579, "loss": 2.0862, "step": 23402 }, { "epoch": 0.6280324173465006, "grad_norm": 0.271484375, "learning_rate": 0.0010402368579335265, "loss": 2.0307, "step": 23403 }, { "epoch": 0.6280592528982396, "grad_norm": 0.283203125, "learning_rate": 0.001040217763596245, "loss": 2.1068, "step": 23404 }, { "epoch": 0.6280860884499785, "grad_norm": 0.279296875, "learning_rate": 0.0010401986682932554, "loss": 2.1249, "step": 23405 }, { "epoch": 0.6281129240017175, "grad_norm": 0.2734375, "learning_rate": 0.0010401795720245994, "loss": 2.0484, "step": 23406 }, { "epoch": 0.6281397595534565, "grad_norm": 0.26953125, "learning_rate": 0.0010401604747903189, "loss": 2.072, "step": 23407 }, { "epoch": 0.6281665951051953, "grad_norm": 0.26953125, "learning_rate": 0.0010401413765904557, "loss": 1.9985, "step": 23408 }, { "epoch": 0.6281934306569343, "grad_norm": 0.279296875, "learning_rate": 0.001040122277425052, "loss": 2.0717, "step": 23409 }, { "epoch": 0.6282202662086732, "grad_norm": 0.271484375, "learning_rate": 0.0010401031772941495, "loss": 2.108, "step": 23410 }, { "epoch": 0.6282471017604122, "grad_norm": 0.263671875, "learning_rate": 0.00104008407619779, "loss": 2.0664, "step": 23411 }, { "epoch": 0.6282739373121511, "grad_norm": 0.26953125, "learning_rate": 0.0010400649741360154, "loss": 2.1047, "step": 23412 }, { "epoch": 0.6283007728638901, "grad_norm": 0.263671875, "learning_rate": 0.0010400458711088678, "loss": 1.9553, "step": 23413 }, { "epoch": 0.6283276084156291, "grad_norm": 0.28125, "learning_rate": 0.0010400267671163892, "loss": 2.0986, "step": 23414 }, { "epoch": 0.628354443967368, "grad_norm": 0.267578125, "learning_rate": 0.0010400076621586212, "loss": 2.0918, "step": 23415 }, { "epoch": 0.6283812795191069, "grad_norm": 0.2734375, "learning_rate": 0.001039988556235606, "loss": 2.1278, "step": 23416 }, { "epoch": 0.6284081150708458, "grad_norm": 0.267578125, "learning_rate": 0.001039969449347385, "loss": 2.0332, "step": 23417 }, { "epoch": 0.6284349506225848, "grad_norm": 0.265625, "learning_rate": 0.0010399503414940006, "loss": 2.0267, "step": 23418 }, { "epoch": 0.6284617861743237, "grad_norm": 0.2734375, "learning_rate": 0.0010399312326754948, "loss": 2.0712, "step": 23419 }, { "epoch": 0.6284886217260627, "grad_norm": 0.265625, "learning_rate": 0.0010399121228919091, "loss": 1.8888, "step": 23420 }, { "epoch": 0.6285154572778017, "grad_norm": 0.27734375, "learning_rate": 0.0010398930121432858, "loss": 2.1122, "step": 23421 }, { "epoch": 0.6285422928295405, "grad_norm": 0.26953125, "learning_rate": 0.0010398739004296666, "loss": 1.9984, "step": 23422 }, { "epoch": 0.6285691283812795, "grad_norm": 0.271484375, "learning_rate": 0.0010398547877510933, "loss": 1.934, "step": 23423 }, { "epoch": 0.6285959639330184, "grad_norm": 0.2734375, "learning_rate": 0.0010398356741076083, "loss": 2.0351, "step": 23424 }, { "epoch": 0.6286227994847574, "grad_norm": 0.2734375, "learning_rate": 0.001039816559499253, "loss": 2.0663, "step": 23425 }, { "epoch": 0.6286496350364964, "grad_norm": 0.26953125, "learning_rate": 0.0010397974439260696, "loss": 2.0287, "step": 23426 }, { "epoch": 0.6286764705882353, "grad_norm": 0.27734375, "learning_rate": 0.0010397783273881, "loss": 2.0233, "step": 23427 }, { "epoch": 0.6287033061399743, "grad_norm": 0.27734375, "learning_rate": 0.001039759209885386, "loss": 2.038, "step": 23428 }, { "epoch": 0.6287301416917132, "grad_norm": 0.26953125, "learning_rate": 0.00103974009141797, "loss": 2.0362, "step": 23429 }, { "epoch": 0.6287569772434521, "grad_norm": 0.265625, "learning_rate": 0.0010397209719858933, "loss": 1.9631, "step": 23430 }, { "epoch": 0.628783812795191, "grad_norm": 0.279296875, "learning_rate": 0.0010397018515891983, "loss": 2.0381, "step": 23431 }, { "epoch": 0.62881064834693, "grad_norm": 0.2734375, "learning_rate": 0.0010396827302279268, "loss": 2.0262, "step": 23432 }, { "epoch": 0.628837483898669, "grad_norm": 0.275390625, "learning_rate": 0.0010396636079021207, "loss": 2.0066, "step": 23433 }, { "epoch": 0.6288643194504079, "grad_norm": 0.26953125, "learning_rate": 0.0010396444846118222, "loss": 1.9862, "step": 23434 }, { "epoch": 0.6288911550021469, "grad_norm": 0.2734375, "learning_rate": 0.0010396253603570727, "loss": 2.0448, "step": 23435 }, { "epoch": 0.6289179905538858, "grad_norm": 0.27734375, "learning_rate": 0.0010396062351379148, "loss": 1.9493, "step": 23436 }, { "epoch": 0.6289448261056247, "grad_norm": 0.283203125, "learning_rate": 0.00103958710895439, "loss": 2.0332, "step": 23437 }, { "epoch": 0.6289716616573636, "grad_norm": 0.2734375, "learning_rate": 0.0010395679818065405, "loss": 1.9929, "step": 23438 }, { "epoch": 0.6289984972091026, "grad_norm": 0.283203125, "learning_rate": 0.001039548853694408, "loss": 2.0349, "step": 23439 }, { "epoch": 0.6290253327608416, "grad_norm": 0.279296875, "learning_rate": 0.0010395297246180348, "loss": 1.9533, "step": 23440 }, { "epoch": 0.6290521683125805, "grad_norm": 0.2578125, "learning_rate": 0.0010395105945774626, "loss": 1.8637, "step": 23441 }, { "epoch": 0.6290790038643195, "grad_norm": 0.279296875, "learning_rate": 0.0010394914635727336, "loss": 1.9349, "step": 23442 }, { "epoch": 0.6291058394160584, "grad_norm": 0.287109375, "learning_rate": 0.0010394723316038896, "loss": 2.0814, "step": 23443 }, { "epoch": 0.6291326749677973, "grad_norm": 0.31640625, "learning_rate": 0.0010394531986709723, "loss": 2.1295, "step": 23444 }, { "epoch": 0.6291595105195363, "grad_norm": 0.314453125, "learning_rate": 0.0010394340647740244, "loss": 2.1117, "step": 23445 }, { "epoch": 0.6291863460712752, "grad_norm": 0.302734375, "learning_rate": 0.001039414929913087, "loss": 2.1695, "step": 23446 }, { "epoch": 0.6292131816230142, "grad_norm": 0.30078125, "learning_rate": 0.001039395794088203, "loss": 2.1297, "step": 23447 }, { "epoch": 0.6292400171747531, "grad_norm": 0.29296875, "learning_rate": 0.0010393766572994137, "loss": 2.1439, "step": 23448 }, { "epoch": 0.6292668527264921, "grad_norm": 0.298828125, "learning_rate": 0.0010393575195467612, "loss": 2.2291, "step": 23449 }, { "epoch": 0.629293688278231, "grad_norm": 0.27734375, "learning_rate": 0.0010393383808302878, "loss": 2.1241, "step": 23450 }, { "epoch": 0.62932052382997, "grad_norm": 0.28515625, "learning_rate": 0.001039319241150035, "loss": 2.0721, "step": 23451 }, { "epoch": 0.6293473593817089, "grad_norm": 0.279296875, "learning_rate": 0.001039300100506045, "loss": 2.0607, "step": 23452 }, { "epoch": 0.6293741949334478, "grad_norm": 0.27734375, "learning_rate": 0.0010392809588983599, "loss": 2.0895, "step": 23453 }, { "epoch": 0.6294010304851868, "grad_norm": 0.28125, "learning_rate": 0.0010392618163270217, "loss": 2.1062, "step": 23454 }, { "epoch": 0.6294278660369257, "grad_norm": 0.291015625, "learning_rate": 0.0010392426727920722, "loss": 2.0297, "step": 23455 }, { "epoch": 0.6294547015886647, "grad_norm": 0.28515625, "learning_rate": 0.0010392235282935536, "loss": 2.068, "step": 23456 }, { "epoch": 0.6294815371404036, "grad_norm": 0.28125, "learning_rate": 0.0010392043828315075, "loss": 2.1089, "step": 23457 }, { "epoch": 0.6295083726921425, "grad_norm": 0.28515625, "learning_rate": 0.0010391852364059766, "loss": 2.1628, "step": 23458 }, { "epoch": 0.6295352082438815, "grad_norm": 0.28125, "learning_rate": 0.0010391660890170023, "loss": 2.1313, "step": 23459 }, { "epoch": 0.6295620437956204, "grad_norm": 0.279296875, "learning_rate": 0.001039146940664627, "loss": 2.1022, "step": 23460 }, { "epoch": 0.6295888793473594, "grad_norm": 0.265625, "learning_rate": 0.0010391277913488922, "loss": 1.9576, "step": 23461 }, { "epoch": 0.6296157148990983, "grad_norm": 0.283203125, "learning_rate": 0.0010391086410698404, "loss": 2.1798, "step": 23462 }, { "epoch": 0.6296425504508373, "grad_norm": 0.267578125, "learning_rate": 0.0010390894898275134, "loss": 2.0895, "step": 23463 }, { "epoch": 0.6296693860025762, "grad_norm": 0.265625, "learning_rate": 0.0010390703376219534, "loss": 2.0027, "step": 23464 }, { "epoch": 0.6296962215543151, "grad_norm": 0.271484375, "learning_rate": 0.001039051184453202, "loss": 2.1323, "step": 23465 }, { "epoch": 0.6297230571060541, "grad_norm": 0.279296875, "learning_rate": 0.0010390320303213017, "loss": 2.0978, "step": 23466 }, { "epoch": 0.629749892657793, "grad_norm": 0.267578125, "learning_rate": 0.0010390128752262942, "loss": 1.9927, "step": 23467 }, { "epoch": 0.629776728209532, "grad_norm": 0.275390625, "learning_rate": 0.0010389937191682215, "loss": 2.0712, "step": 23468 }, { "epoch": 0.6298035637612709, "grad_norm": 0.271484375, "learning_rate": 0.0010389745621471257, "loss": 2.0189, "step": 23469 }, { "epoch": 0.6298303993130099, "grad_norm": 0.27734375, "learning_rate": 0.001038955404163049, "loss": 2.0002, "step": 23470 }, { "epoch": 0.6298572348647489, "grad_norm": 0.2890625, "learning_rate": 0.0010389362452160333, "loss": 2.1083, "step": 23471 }, { "epoch": 0.6298840704164878, "grad_norm": 0.283203125, "learning_rate": 0.0010389170853061208, "loss": 2.0047, "step": 23472 }, { "epoch": 0.6299109059682267, "grad_norm": 0.275390625, "learning_rate": 0.001038897924433353, "loss": 2.0403, "step": 23473 }, { "epoch": 0.6299377415199656, "grad_norm": 0.275390625, "learning_rate": 0.0010388787625977723, "loss": 2.0247, "step": 23474 }, { "epoch": 0.6299645770717046, "grad_norm": 0.2890625, "learning_rate": 0.001038859599799421, "loss": 2.0405, "step": 23475 }, { "epoch": 0.6299914126234435, "grad_norm": 0.26953125, "learning_rate": 0.0010388404360383405, "loss": 2.0484, "step": 23476 }, { "epoch": 0.6300182481751825, "grad_norm": 0.27734375, "learning_rate": 0.0010388212713145733, "loss": 2.0142, "step": 23477 }, { "epoch": 0.6300450837269215, "grad_norm": 0.271484375, "learning_rate": 0.0010388021056281615, "loss": 2.1217, "step": 23478 }, { "epoch": 0.6300719192786604, "grad_norm": 0.275390625, "learning_rate": 0.0010387829389791469, "loss": 2.0312, "step": 23479 }, { "epoch": 0.6300987548303993, "grad_norm": 0.283203125, "learning_rate": 0.0010387637713675717, "loss": 2.0893, "step": 23480 }, { "epoch": 0.6301255903821382, "grad_norm": 0.27734375, "learning_rate": 0.0010387446027934775, "loss": 2.0059, "step": 23481 }, { "epoch": 0.6301524259338772, "grad_norm": 0.2734375, "learning_rate": 0.0010387254332569072, "loss": 1.9847, "step": 23482 }, { "epoch": 0.6301792614856161, "grad_norm": 0.283203125, "learning_rate": 0.0010387062627579022, "loss": 1.9787, "step": 23483 }, { "epoch": 0.6302060970373551, "grad_norm": 0.28515625, "learning_rate": 0.0010386870912965046, "loss": 2.0652, "step": 23484 }, { "epoch": 0.6302329325890941, "grad_norm": 0.294921875, "learning_rate": 0.0010386679188727566, "loss": 2.1324, "step": 23485 }, { "epoch": 0.630259768140833, "grad_norm": 0.2734375, "learning_rate": 0.0010386487454867005, "loss": 1.9909, "step": 23486 }, { "epoch": 0.630286603692572, "grad_norm": 0.271484375, "learning_rate": 0.0010386295711383778, "loss": 1.9194, "step": 23487 }, { "epoch": 0.6303134392443108, "grad_norm": 0.26953125, "learning_rate": 0.0010386103958278312, "loss": 1.9382, "step": 23488 }, { "epoch": 0.6303402747960498, "grad_norm": 0.2734375, "learning_rate": 0.001038591219555102, "loss": 1.9175, "step": 23489 }, { "epoch": 0.6303671103477888, "grad_norm": 0.2734375, "learning_rate": 0.001038572042320233, "loss": 2.0084, "step": 23490 }, { "epoch": 0.6303939458995277, "grad_norm": 0.275390625, "learning_rate": 0.0010385528641232659, "loss": 2.0886, "step": 23491 }, { "epoch": 0.6304207814512667, "grad_norm": 0.279296875, "learning_rate": 0.0010385336849642428, "loss": 1.915, "step": 23492 }, { "epoch": 0.6304476170030056, "grad_norm": 0.28125, "learning_rate": 0.001038514504843206, "loss": 2.0151, "step": 23493 }, { "epoch": 0.6304744525547445, "grad_norm": 0.279296875, "learning_rate": 0.0010384953237601972, "loss": 1.984, "step": 23494 }, { "epoch": 0.6305012881064834, "grad_norm": 0.283203125, "learning_rate": 0.0010384761417152587, "loss": 2.0369, "step": 23495 }, { "epoch": 0.6305281236582224, "grad_norm": 0.33203125, "learning_rate": 0.0010384569587084326, "loss": 2.1486, "step": 23496 }, { "epoch": 0.6305549592099614, "grad_norm": 0.30078125, "learning_rate": 0.0010384377747397608, "loss": 2.098, "step": 23497 }, { "epoch": 0.6305817947617003, "grad_norm": 0.29296875, "learning_rate": 0.0010384185898092855, "loss": 2.1293, "step": 23498 }, { "epoch": 0.6306086303134393, "grad_norm": 0.2890625, "learning_rate": 0.001038399403917049, "loss": 2.079, "step": 23499 }, { "epoch": 0.6306354658651782, "grad_norm": 0.294921875, "learning_rate": 0.0010383802170630932, "loss": 2.0678, "step": 23500 }, { "epoch": 0.6306623014169171, "grad_norm": 0.291015625, "learning_rate": 0.00103836102924746, "loss": 2.1066, "step": 23501 }, { "epoch": 0.630689136968656, "grad_norm": 0.287109375, "learning_rate": 0.0010383418404701916, "loss": 2.0497, "step": 23502 }, { "epoch": 0.630715972520395, "grad_norm": 0.287109375, "learning_rate": 0.0010383226507313303, "loss": 2.0297, "step": 23503 }, { "epoch": 0.630742808072134, "grad_norm": 0.287109375, "learning_rate": 0.001038303460030918, "loss": 2.1981, "step": 23504 }, { "epoch": 0.6307696436238729, "grad_norm": 0.275390625, "learning_rate": 0.0010382842683689972, "loss": 2.0805, "step": 23505 }, { "epoch": 0.6307964791756119, "grad_norm": 0.28125, "learning_rate": 0.0010382650757456092, "loss": 2.0917, "step": 23506 }, { "epoch": 0.6308233147273508, "grad_norm": 0.2890625, "learning_rate": 0.0010382458821607968, "loss": 2.0312, "step": 23507 }, { "epoch": 0.6308501502790897, "grad_norm": 0.279296875, "learning_rate": 0.001038226687614602, "loss": 2.0383, "step": 23508 }, { "epoch": 0.6308769858308286, "grad_norm": 0.279296875, "learning_rate": 0.0010382074921070664, "loss": 2.1287, "step": 23509 }, { "epoch": 0.6309038213825676, "grad_norm": 0.2734375, "learning_rate": 0.0010381882956382326, "loss": 2.0723, "step": 23510 }, { "epoch": 0.6309306569343066, "grad_norm": 0.279296875, "learning_rate": 0.0010381690982081426, "loss": 2.111, "step": 23511 }, { "epoch": 0.6309574924860455, "grad_norm": 0.271484375, "learning_rate": 0.0010381498998168386, "loss": 2.0606, "step": 23512 }, { "epoch": 0.6309843280377845, "grad_norm": 0.27734375, "learning_rate": 0.0010381307004643627, "loss": 2.0665, "step": 23513 }, { "epoch": 0.6310111635895234, "grad_norm": 0.279296875, "learning_rate": 0.0010381115001507567, "loss": 2.1011, "step": 23514 }, { "epoch": 0.6310379991412624, "grad_norm": 0.275390625, "learning_rate": 0.0010380922988760632, "loss": 2.1456, "step": 23515 }, { "epoch": 0.6310648346930013, "grad_norm": 0.279296875, "learning_rate": 0.001038073096640324, "loss": 2.1009, "step": 23516 }, { "epoch": 0.6310916702447402, "grad_norm": 0.26953125, "learning_rate": 0.0010380538934435814, "loss": 1.9438, "step": 23517 }, { "epoch": 0.6311185057964792, "grad_norm": 0.26953125, "learning_rate": 0.0010380346892858773, "loss": 2.0523, "step": 23518 }, { "epoch": 0.6311453413482181, "grad_norm": 0.2734375, "learning_rate": 0.0010380154841672538, "loss": 2.0593, "step": 23519 }, { "epoch": 0.6311721768999571, "grad_norm": 0.2890625, "learning_rate": 0.0010379962780877536, "loss": 2.1563, "step": 23520 }, { "epoch": 0.631199012451696, "grad_norm": 0.28125, "learning_rate": 0.001037977071047418, "loss": 2.0566, "step": 23521 }, { "epoch": 0.631225848003435, "grad_norm": 0.283203125, "learning_rate": 0.00103795786304629, "loss": 2.122, "step": 23522 }, { "epoch": 0.6312526835551739, "grad_norm": 0.26953125, "learning_rate": 0.001037938654084411, "loss": 2.0568, "step": 23523 }, { "epoch": 0.6312795191069128, "grad_norm": 0.27734375, "learning_rate": 0.0010379194441618234, "loss": 2.1147, "step": 23524 }, { "epoch": 0.6313063546586518, "grad_norm": 0.263671875, "learning_rate": 0.0010379002332785697, "loss": 1.9755, "step": 23525 }, { "epoch": 0.6313331902103907, "grad_norm": 0.26953125, "learning_rate": 0.0010378810214346913, "loss": 2.0863, "step": 23526 }, { "epoch": 0.6313600257621297, "grad_norm": 0.275390625, "learning_rate": 0.0010378618086302309, "loss": 2.0187, "step": 23527 }, { "epoch": 0.6313868613138686, "grad_norm": 0.28125, "learning_rate": 0.0010378425948652308, "loss": 2.0645, "step": 23528 }, { "epoch": 0.6314136968656076, "grad_norm": 0.267578125, "learning_rate": 0.0010378233801397326, "loss": 2.0059, "step": 23529 }, { "epoch": 0.6314405324173465, "grad_norm": 0.26171875, "learning_rate": 0.0010378041644537787, "loss": 1.9673, "step": 23530 }, { "epoch": 0.6314673679690854, "grad_norm": 0.283203125, "learning_rate": 0.0010377849478074113, "loss": 2.1242, "step": 23531 }, { "epoch": 0.6314942035208244, "grad_norm": 0.275390625, "learning_rate": 0.0010377657302006725, "loss": 2.0589, "step": 23532 }, { "epoch": 0.6315210390725633, "grad_norm": 0.28125, "learning_rate": 0.0010377465116336046, "loss": 2.0756, "step": 23533 }, { "epoch": 0.6315478746243023, "grad_norm": 0.2734375, "learning_rate": 0.0010377272921062498, "loss": 1.9174, "step": 23534 }, { "epoch": 0.6315747101760412, "grad_norm": 0.28125, "learning_rate": 0.0010377080716186498, "loss": 2.035, "step": 23535 }, { "epoch": 0.6316015457277802, "grad_norm": 0.283203125, "learning_rate": 0.001037688850170847, "loss": 2.0677, "step": 23536 }, { "epoch": 0.6316283812795191, "grad_norm": 0.283203125, "learning_rate": 0.0010376696277628837, "loss": 2.0051, "step": 23537 }, { "epoch": 0.631655216831258, "grad_norm": 0.28125, "learning_rate": 0.0010376504043948021, "loss": 2.0073, "step": 23538 }, { "epoch": 0.631682052382997, "grad_norm": 0.2734375, "learning_rate": 0.0010376311800666443, "loss": 2.0769, "step": 23539 }, { "epoch": 0.6317088879347359, "grad_norm": 0.283203125, "learning_rate": 0.0010376119547784524, "loss": 2.0529, "step": 23540 }, { "epoch": 0.6317357234864749, "grad_norm": 0.30078125, "learning_rate": 0.0010375927285302686, "loss": 1.9883, "step": 23541 }, { "epoch": 0.6317625590382139, "grad_norm": 0.28515625, "learning_rate": 0.0010375735013221352, "loss": 1.9506, "step": 23542 }, { "epoch": 0.6317893945899528, "grad_norm": 0.27734375, "learning_rate": 0.0010375542731540942, "loss": 2.0004, "step": 23543 }, { "epoch": 0.6318162301416917, "grad_norm": 0.27734375, "learning_rate": 0.0010375350440261877, "loss": 1.9638, "step": 23544 }, { "epoch": 0.6318430656934306, "grad_norm": 0.28125, "learning_rate": 0.0010375158139384582, "loss": 1.9849, "step": 23545 }, { "epoch": 0.6318699012451696, "grad_norm": 0.30078125, "learning_rate": 0.001037496582890948, "loss": 2.0996, "step": 23546 }, { "epoch": 0.6318967367969085, "grad_norm": 0.328125, "learning_rate": 0.0010374773508836984, "loss": 2.2088, "step": 23547 }, { "epoch": 0.6319235723486475, "grad_norm": 0.3046875, "learning_rate": 0.0010374581179167525, "loss": 2.1832, "step": 23548 }, { "epoch": 0.6319504079003865, "grad_norm": 0.28515625, "learning_rate": 0.0010374388839901522, "loss": 1.9911, "step": 23549 }, { "epoch": 0.6319772434521254, "grad_norm": 0.2890625, "learning_rate": 0.0010374196491039398, "loss": 2.1329, "step": 23550 }, { "epoch": 0.6320040790038643, "grad_norm": 0.29296875, "learning_rate": 0.001037400413258157, "loss": 2.1055, "step": 23551 }, { "epoch": 0.6320309145556032, "grad_norm": 0.27734375, "learning_rate": 0.0010373811764528469, "loss": 2.1086, "step": 23552 }, { "epoch": 0.6320577501073422, "grad_norm": 0.279296875, "learning_rate": 0.0010373619386880508, "loss": 2.1452, "step": 23553 }, { "epoch": 0.6320845856590811, "grad_norm": 0.275390625, "learning_rate": 0.0010373426999638114, "loss": 2.0244, "step": 23554 }, { "epoch": 0.6321114212108201, "grad_norm": 0.28515625, "learning_rate": 0.001037323460280171, "loss": 2.1892, "step": 23555 }, { "epoch": 0.6321382567625591, "grad_norm": 0.283203125, "learning_rate": 0.0010373042196371713, "loss": 2.1457, "step": 23556 }, { "epoch": 0.632165092314298, "grad_norm": 0.2734375, "learning_rate": 0.0010372849780348548, "loss": 2.1584, "step": 23557 }, { "epoch": 0.632191927866037, "grad_norm": 0.291015625, "learning_rate": 0.0010372657354732638, "loss": 2.1161, "step": 23558 }, { "epoch": 0.6322187634177758, "grad_norm": 0.275390625, "learning_rate": 0.0010372464919524405, "loss": 2.0768, "step": 23559 }, { "epoch": 0.6322455989695148, "grad_norm": 0.28125, "learning_rate": 0.0010372272474724268, "loss": 2.0964, "step": 23560 }, { "epoch": 0.6322724345212538, "grad_norm": 0.28515625, "learning_rate": 0.0010372080020332652, "loss": 2.216, "step": 23561 }, { "epoch": 0.6322992700729927, "grad_norm": 0.2734375, "learning_rate": 0.001037188755634998, "loss": 2.1751, "step": 23562 }, { "epoch": 0.6323261056247317, "grad_norm": 0.271484375, "learning_rate": 0.0010371695082776672, "loss": 2.0824, "step": 23563 }, { "epoch": 0.6323529411764706, "grad_norm": 0.275390625, "learning_rate": 0.0010371502599613154, "loss": 2.1029, "step": 23564 }, { "epoch": 0.6323797767282096, "grad_norm": 0.267578125, "learning_rate": 0.0010371310106859844, "loss": 2.1072, "step": 23565 }, { "epoch": 0.6324066122799484, "grad_norm": 0.271484375, "learning_rate": 0.0010371117604517164, "loss": 2.0341, "step": 23566 }, { "epoch": 0.6324334478316874, "grad_norm": 0.28125, "learning_rate": 0.0010370925092585538, "loss": 2.1277, "step": 23567 }, { "epoch": 0.6324602833834264, "grad_norm": 0.287109375, "learning_rate": 0.0010370732571065389, "loss": 2.0861, "step": 23568 }, { "epoch": 0.6324871189351653, "grad_norm": 0.275390625, "learning_rate": 0.001037054003995714, "loss": 2.0885, "step": 23569 }, { "epoch": 0.6325139544869043, "grad_norm": 0.283203125, "learning_rate": 0.001037034749926121, "loss": 2.1073, "step": 23570 }, { "epoch": 0.6325407900386432, "grad_norm": 0.26953125, "learning_rate": 0.0010370154948978025, "loss": 2.1113, "step": 23571 }, { "epoch": 0.6325676255903822, "grad_norm": 0.2734375, "learning_rate": 0.0010369962389108005, "loss": 2.154, "step": 23572 }, { "epoch": 0.632594461142121, "grad_norm": 0.265625, "learning_rate": 0.0010369769819651572, "loss": 2.0863, "step": 23573 }, { "epoch": 0.63262129669386, "grad_norm": 0.291015625, "learning_rate": 0.001036957724060915, "loss": 2.1373, "step": 23574 }, { "epoch": 0.632648132245599, "grad_norm": 0.275390625, "learning_rate": 0.0010369384651981162, "loss": 2.0808, "step": 23575 }, { "epoch": 0.6326749677973379, "grad_norm": 0.26953125, "learning_rate": 0.0010369192053768029, "loss": 1.9721, "step": 23576 }, { "epoch": 0.6327018033490769, "grad_norm": 0.26953125, "learning_rate": 0.0010368999445970176, "loss": 2.0321, "step": 23577 }, { "epoch": 0.6327286389008158, "grad_norm": 0.26953125, "learning_rate": 0.001036880682858802, "loss": 2.0697, "step": 23578 }, { "epoch": 0.6327554744525548, "grad_norm": 0.275390625, "learning_rate": 0.001036861420162199, "loss": 2.0036, "step": 23579 }, { "epoch": 0.6327823100042936, "grad_norm": 0.275390625, "learning_rate": 0.0010368421565072506, "loss": 2.1415, "step": 23580 }, { "epoch": 0.6328091455560326, "grad_norm": 0.271484375, "learning_rate": 0.001036822891893999, "loss": 2.174, "step": 23581 }, { "epoch": 0.6328359811077716, "grad_norm": 0.275390625, "learning_rate": 0.0010368036263224863, "loss": 2.0474, "step": 23582 }, { "epoch": 0.6328628166595105, "grad_norm": 0.2734375, "learning_rate": 0.001036784359792755, "loss": 2.0094, "step": 23583 }, { "epoch": 0.6328896522112495, "grad_norm": 0.26953125, "learning_rate": 0.0010367650923048474, "loss": 2.0188, "step": 23584 }, { "epoch": 0.6329164877629884, "grad_norm": 0.26953125, "learning_rate": 0.0010367458238588056, "loss": 2.0013, "step": 23585 }, { "epoch": 0.6329433233147274, "grad_norm": 0.279296875, "learning_rate": 0.001036726554454672, "loss": 2.041, "step": 23586 }, { "epoch": 0.6329701588664663, "grad_norm": 0.275390625, "learning_rate": 0.001036707284092489, "loss": 1.9819, "step": 23587 }, { "epoch": 0.6329969944182052, "grad_norm": 0.26953125, "learning_rate": 0.0010366880127722984, "loss": 1.9641, "step": 23588 }, { "epoch": 0.6330238299699442, "grad_norm": 0.28515625, "learning_rate": 0.001036668740494143, "loss": 1.9837, "step": 23589 }, { "epoch": 0.6330506655216831, "grad_norm": 0.265625, "learning_rate": 0.0010366494672580647, "loss": 1.9565, "step": 23590 }, { "epoch": 0.6330775010734221, "grad_norm": 0.271484375, "learning_rate": 0.0010366301930641061, "loss": 2.0072, "step": 23591 }, { "epoch": 0.633104336625161, "grad_norm": 0.27734375, "learning_rate": 0.0010366109179123092, "loss": 2.0127, "step": 23592 }, { "epoch": 0.6331311721769, "grad_norm": 0.28125, "learning_rate": 0.0010365916418027163, "loss": 2.1053, "step": 23593 }, { "epoch": 0.633158007728639, "grad_norm": 0.275390625, "learning_rate": 0.00103657236473537, "loss": 1.9767, "step": 23594 }, { "epoch": 0.6331848432803778, "grad_norm": 0.28125, "learning_rate": 0.0010365530867103123, "loss": 2.0212, "step": 23595 }, { "epoch": 0.6332116788321168, "grad_norm": 0.271484375, "learning_rate": 0.0010365338077275854, "loss": 1.9929, "step": 23596 }, { "epoch": 0.6332385143838557, "grad_norm": 0.283203125, "learning_rate": 0.001036514527787232, "loss": 2.054, "step": 23597 }, { "epoch": 0.6332653499355947, "grad_norm": 0.31640625, "learning_rate": 0.001036495246889294, "loss": 2.17, "step": 23598 }, { "epoch": 0.6332921854873336, "grad_norm": 0.30078125, "learning_rate": 0.0010364759650338137, "loss": 2.2589, "step": 23599 }, { "epoch": 0.6333190210390726, "grad_norm": 0.291015625, "learning_rate": 0.001036456682220834, "loss": 2.1462, "step": 23600 }, { "epoch": 0.6333458565908116, "grad_norm": 0.30859375, "learning_rate": 0.0010364373984503965, "loss": 2.1089, "step": 23601 }, { "epoch": 0.6333726921425504, "grad_norm": 0.2890625, "learning_rate": 0.0010364181137225436, "loss": 2.1137, "step": 23602 }, { "epoch": 0.6333995276942894, "grad_norm": 0.2890625, "learning_rate": 0.001036398828037318, "loss": 2.1927, "step": 23603 }, { "epoch": 0.6334263632460283, "grad_norm": 0.287109375, "learning_rate": 0.0010363795413947615, "loss": 2.1364, "step": 23604 }, { "epoch": 0.6334531987977673, "grad_norm": 0.283203125, "learning_rate": 0.001036360253794917, "loss": 2.219, "step": 23605 }, { "epoch": 0.6334800343495062, "grad_norm": 0.296875, "learning_rate": 0.001036340965237826, "loss": 2.1502, "step": 23606 }, { "epoch": 0.6335068699012452, "grad_norm": 0.28125, "learning_rate": 0.0010363216757235318, "loss": 2.0459, "step": 23607 }, { "epoch": 0.6335337054529842, "grad_norm": 0.28515625, "learning_rate": 0.0010363023852520761, "loss": 1.998, "step": 23608 }, { "epoch": 0.633560541004723, "grad_norm": 0.2890625, "learning_rate": 0.0010362830938235013, "loss": 2.12, "step": 23609 }, { "epoch": 0.633587376556462, "grad_norm": 0.271484375, "learning_rate": 0.0010362638014378496, "loss": 2.1066, "step": 23610 }, { "epoch": 0.6336142121082009, "grad_norm": 0.28515625, "learning_rate": 0.0010362445080951636, "loss": 2.011, "step": 23611 }, { "epoch": 0.6336410476599399, "grad_norm": 0.291015625, "learning_rate": 0.0010362252137954854, "loss": 2.1925, "step": 23612 }, { "epoch": 0.6336678832116789, "grad_norm": 0.283203125, "learning_rate": 0.0010362059185388575, "loss": 2.1255, "step": 23613 }, { "epoch": 0.6336947187634178, "grad_norm": 0.267578125, "learning_rate": 0.001036186622325322, "loss": 2.0142, "step": 23614 }, { "epoch": 0.6337215543151568, "grad_norm": 0.271484375, "learning_rate": 0.0010361673251549216, "loss": 2.0085, "step": 23615 }, { "epoch": 0.6337483898668956, "grad_norm": 0.275390625, "learning_rate": 0.0010361480270276982, "loss": 2.0759, "step": 23616 }, { "epoch": 0.6337752254186346, "grad_norm": 0.28515625, "learning_rate": 0.0010361287279436946, "loss": 2.0752, "step": 23617 }, { "epoch": 0.6338020609703735, "grad_norm": 0.267578125, "learning_rate": 0.0010361094279029528, "loss": 2.1022, "step": 23618 }, { "epoch": 0.6338288965221125, "grad_norm": 0.275390625, "learning_rate": 0.001036090126905515, "loss": 2.1072, "step": 23619 }, { "epoch": 0.6338557320738515, "grad_norm": 0.267578125, "learning_rate": 0.001036070824951424, "loss": 2.0563, "step": 23620 }, { "epoch": 0.6338825676255904, "grad_norm": 0.275390625, "learning_rate": 0.0010360515220407217, "loss": 2.1325, "step": 23621 }, { "epoch": 0.6339094031773294, "grad_norm": 0.28125, "learning_rate": 0.0010360322181734508, "loss": 2.1153, "step": 23622 }, { "epoch": 0.6339362387290682, "grad_norm": 0.2734375, "learning_rate": 0.0010360129133496535, "loss": 2.0855, "step": 23623 }, { "epoch": 0.6339630742808072, "grad_norm": 0.26953125, "learning_rate": 0.001035993607569372, "loss": 2.0026, "step": 23624 }, { "epoch": 0.6339899098325461, "grad_norm": 0.27734375, "learning_rate": 0.0010359743008326492, "loss": 2.0775, "step": 23625 }, { "epoch": 0.6340167453842851, "grad_norm": 0.267578125, "learning_rate": 0.0010359549931395267, "loss": 2.0039, "step": 23626 }, { "epoch": 0.6340435809360241, "grad_norm": 0.279296875, "learning_rate": 0.001035935684490047, "loss": 2.0703, "step": 23627 }, { "epoch": 0.634070416487763, "grad_norm": 0.279296875, "learning_rate": 0.001035916374884253, "loss": 1.995, "step": 23628 }, { "epoch": 0.634097252039502, "grad_norm": 0.27734375, "learning_rate": 0.001035897064322187, "loss": 2.1784, "step": 23629 }, { "epoch": 0.6341240875912408, "grad_norm": 0.28515625, "learning_rate": 0.0010358777528038906, "loss": 2.0847, "step": 23630 }, { "epoch": 0.6341509231429798, "grad_norm": 0.275390625, "learning_rate": 0.0010358584403294066, "loss": 2.0434, "step": 23631 }, { "epoch": 0.6341777586947188, "grad_norm": 0.283203125, "learning_rate": 0.0010358391268987777, "loss": 2.0809, "step": 23632 }, { "epoch": 0.6342045942464577, "grad_norm": 0.2734375, "learning_rate": 0.0010358198125120458, "loss": 2.006, "step": 23633 }, { "epoch": 0.6342314297981967, "grad_norm": 0.275390625, "learning_rate": 0.0010358004971692536, "loss": 2.1201, "step": 23634 }, { "epoch": 0.6342582653499356, "grad_norm": 0.27734375, "learning_rate": 0.0010357811808704433, "loss": 2.1377, "step": 23635 }, { "epoch": 0.6342851009016746, "grad_norm": 0.279296875, "learning_rate": 0.0010357618636156571, "loss": 2.0204, "step": 23636 }, { "epoch": 0.6343119364534134, "grad_norm": 0.283203125, "learning_rate": 0.0010357425454049379, "loss": 2.026, "step": 23637 }, { "epoch": 0.6343387720051524, "grad_norm": 0.28515625, "learning_rate": 0.0010357232262383277, "loss": 2.1009, "step": 23638 }, { "epoch": 0.6343656075568914, "grad_norm": 0.267578125, "learning_rate": 0.0010357039061158685, "loss": 1.939, "step": 23639 }, { "epoch": 0.6343924431086303, "grad_norm": 0.275390625, "learning_rate": 0.0010356845850376037, "loss": 2.0773, "step": 23640 }, { "epoch": 0.6344192786603693, "grad_norm": 0.287109375, "learning_rate": 0.0010356652630035745, "loss": 2.1119, "step": 23641 }, { "epoch": 0.6344461142121082, "grad_norm": 0.271484375, "learning_rate": 0.0010356459400138243, "loss": 1.956, "step": 23642 }, { "epoch": 0.6344729497638472, "grad_norm": 0.279296875, "learning_rate": 0.001035626616068395, "loss": 2.0196, "step": 23643 }, { "epoch": 0.634499785315586, "grad_norm": 0.275390625, "learning_rate": 0.0010356072911673292, "loss": 2.0156, "step": 23644 }, { "epoch": 0.634526620867325, "grad_norm": 0.2734375, "learning_rate": 0.001035587965310669, "loss": 1.9632, "step": 23645 }, { "epoch": 0.634553456419064, "grad_norm": 0.310546875, "learning_rate": 0.001035568638498457, "loss": 2.1519, "step": 23646 }, { "epoch": 0.6345802919708029, "grad_norm": 0.326171875, "learning_rate": 0.0010355493107307356, "loss": 2.17, "step": 23647 }, { "epoch": 0.6346071275225419, "grad_norm": 0.296875, "learning_rate": 0.001035529982007547, "loss": 2.1157, "step": 23648 }, { "epoch": 0.6346339630742808, "grad_norm": 0.29296875, "learning_rate": 0.0010355106523289338, "loss": 2.0586, "step": 23649 }, { "epoch": 0.6346607986260198, "grad_norm": 0.302734375, "learning_rate": 0.0010354913216949384, "loss": 2.1262, "step": 23650 }, { "epoch": 0.6346876341777586, "grad_norm": 0.302734375, "learning_rate": 0.001035471990105603, "loss": 2.2332, "step": 23651 }, { "epoch": 0.6347144697294976, "grad_norm": 0.2734375, "learning_rate": 0.0010354526575609704, "loss": 2.0556, "step": 23652 }, { "epoch": 0.6347413052812366, "grad_norm": 0.265625, "learning_rate": 0.0010354333240610828, "loss": 1.9576, "step": 23653 }, { "epoch": 0.6347681408329755, "grad_norm": 0.28125, "learning_rate": 0.0010354139896059824, "loss": 2.1154, "step": 23654 }, { "epoch": 0.6347949763847145, "grad_norm": 0.275390625, "learning_rate": 0.001035394654195712, "loss": 2.1236, "step": 23655 }, { "epoch": 0.6348218119364534, "grad_norm": 0.2890625, "learning_rate": 0.0010353753178303138, "loss": 2.0887, "step": 23656 }, { "epoch": 0.6348486474881924, "grad_norm": 0.291015625, "learning_rate": 0.0010353559805098302, "loss": 2.185, "step": 23657 }, { "epoch": 0.6348754830399314, "grad_norm": 0.296875, "learning_rate": 0.0010353366422343035, "loss": 2.1803, "step": 23658 }, { "epoch": 0.6349023185916702, "grad_norm": 0.271484375, "learning_rate": 0.0010353173030037766, "loss": 1.9996, "step": 23659 }, { "epoch": 0.6349291541434092, "grad_norm": 0.283203125, "learning_rate": 0.0010352979628182914, "loss": 2.2321, "step": 23660 }, { "epoch": 0.6349559896951481, "grad_norm": 0.279296875, "learning_rate": 0.0010352786216778905, "loss": 2.089, "step": 23661 }, { "epoch": 0.6349828252468871, "grad_norm": 0.287109375, "learning_rate": 0.0010352592795826164, "loss": 2.164, "step": 23662 }, { "epoch": 0.635009660798626, "grad_norm": 0.26953125, "learning_rate": 0.0010352399365325116, "loss": 2.005, "step": 23663 }, { "epoch": 0.635036496350365, "grad_norm": 0.27734375, "learning_rate": 0.0010352205925276182, "loss": 2.09, "step": 23664 }, { "epoch": 0.635063331902104, "grad_norm": 0.28125, "learning_rate": 0.001035201247567979, "loss": 2.0997, "step": 23665 }, { "epoch": 0.6350901674538428, "grad_norm": 0.275390625, "learning_rate": 0.0010351819016536366, "loss": 2.0648, "step": 23666 }, { "epoch": 0.6351170030055818, "grad_norm": 0.27734375, "learning_rate": 0.0010351625547846328, "loss": 2.1243, "step": 23667 }, { "epoch": 0.6351438385573207, "grad_norm": 0.279296875, "learning_rate": 0.0010351432069610105, "loss": 2.1256, "step": 23668 }, { "epoch": 0.6351706741090597, "grad_norm": 0.27734375, "learning_rate": 0.001035123858182812, "loss": 2.1043, "step": 23669 }, { "epoch": 0.6351975096607986, "grad_norm": 0.27734375, "learning_rate": 0.0010351045084500796, "loss": 1.9995, "step": 23670 }, { "epoch": 0.6352243452125376, "grad_norm": 0.29296875, "learning_rate": 0.001035085157762856, "loss": 2.1466, "step": 23671 }, { "epoch": 0.6352511807642766, "grad_norm": 0.275390625, "learning_rate": 0.0010350658061211836, "loss": 2.0633, "step": 23672 }, { "epoch": 0.6352780163160154, "grad_norm": 0.2734375, "learning_rate": 0.001035046453525105, "loss": 2.0199, "step": 23673 }, { "epoch": 0.6353048518677544, "grad_norm": 0.291015625, "learning_rate": 0.0010350270999746622, "loss": 2.103, "step": 23674 }, { "epoch": 0.6353316874194933, "grad_norm": 0.26953125, "learning_rate": 0.001035007745469898, "loss": 2.0083, "step": 23675 }, { "epoch": 0.6353585229712323, "grad_norm": 0.27734375, "learning_rate": 0.001034988390010855, "loss": 2.102, "step": 23676 }, { "epoch": 0.6353853585229712, "grad_norm": 0.28125, "learning_rate": 0.0010349690335975752, "loss": 2.0208, "step": 23677 }, { "epoch": 0.6354121940747102, "grad_norm": 0.2734375, "learning_rate": 0.0010349496762301015, "loss": 1.9553, "step": 23678 }, { "epoch": 0.6354390296264492, "grad_norm": 0.27734375, "learning_rate": 0.001034930317908476, "loss": 2.0895, "step": 23679 }, { "epoch": 0.635465865178188, "grad_norm": 0.26953125, "learning_rate": 0.0010349109586327413, "loss": 1.9661, "step": 23680 }, { "epoch": 0.635492700729927, "grad_norm": 0.279296875, "learning_rate": 0.0010348915984029403, "loss": 2.145, "step": 23681 }, { "epoch": 0.6355195362816659, "grad_norm": 0.275390625, "learning_rate": 0.0010348722372191146, "loss": 1.9819, "step": 23682 }, { "epoch": 0.6355463718334049, "grad_norm": 0.287109375, "learning_rate": 0.0010348528750813073, "loss": 2.0807, "step": 23683 }, { "epoch": 0.6355732073851439, "grad_norm": 0.2890625, "learning_rate": 0.0010348335119895608, "loss": 2.0811, "step": 23684 }, { "epoch": 0.6356000429368828, "grad_norm": 0.275390625, "learning_rate": 0.0010348141479439175, "loss": 2.0593, "step": 23685 }, { "epoch": 0.6356268784886218, "grad_norm": 0.283203125, "learning_rate": 0.00103479478294442, "loss": 2.0834, "step": 23686 }, { "epoch": 0.6356537140403606, "grad_norm": 0.283203125, "learning_rate": 0.0010347754169911104, "loss": 2.0506, "step": 23687 }, { "epoch": 0.6356805495920996, "grad_norm": 0.28515625, "learning_rate": 0.0010347560500840315, "loss": 2.0364, "step": 23688 }, { "epoch": 0.6357073851438385, "grad_norm": 0.28515625, "learning_rate": 0.001034736682223226, "loss": 2.0113, "step": 23689 }, { "epoch": 0.6357342206955775, "grad_norm": 0.26953125, "learning_rate": 0.001034717313408736, "loss": 1.9509, "step": 23690 }, { "epoch": 0.6357610562473165, "grad_norm": 0.2734375, "learning_rate": 0.0010346979436406038, "loss": 2.0666, "step": 23691 }, { "epoch": 0.6357878917990554, "grad_norm": 0.283203125, "learning_rate": 0.0010346785729188725, "loss": 2.079, "step": 23692 }, { "epoch": 0.6358147273507944, "grad_norm": 0.279296875, "learning_rate": 0.0010346592012435841, "loss": 1.9126, "step": 23693 }, { "epoch": 0.6358415629025332, "grad_norm": 0.275390625, "learning_rate": 0.0010346398286147816, "loss": 2.0403, "step": 23694 }, { "epoch": 0.6358683984542722, "grad_norm": 0.30859375, "learning_rate": 0.001034620455032507, "loss": 2.0879, "step": 23695 }, { "epoch": 0.6358952340060111, "grad_norm": 0.326171875, "learning_rate": 0.001034601080496803, "loss": 2.2207, "step": 23696 }, { "epoch": 0.6359220695577501, "grad_norm": 0.2890625, "learning_rate": 0.0010345817050077123, "loss": 2.1852, "step": 23697 }, { "epoch": 0.6359489051094891, "grad_norm": 0.291015625, "learning_rate": 0.001034562328565277, "loss": 2.2264, "step": 23698 }, { "epoch": 0.635975740661228, "grad_norm": 0.294921875, "learning_rate": 0.0010345429511695397, "loss": 2.1556, "step": 23699 }, { "epoch": 0.636002576212967, "grad_norm": 0.296875, "learning_rate": 0.001034523572820543, "loss": 2.2278, "step": 23700 }, { "epoch": 0.6360294117647058, "grad_norm": 0.287109375, "learning_rate": 0.0010345041935183295, "loss": 2.1092, "step": 23701 }, { "epoch": 0.6360562473164448, "grad_norm": 0.28125, "learning_rate": 0.0010344848132629418, "loss": 2.1659, "step": 23702 }, { "epoch": 0.6360830828681838, "grad_norm": 0.271484375, "learning_rate": 0.0010344654320544222, "loss": 2.1054, "step": 23703 }, { "epoch": 0.6361099184199227, "grad_norm": 0.2734375, "learning_rate": 0.001034446049892813, "loss": 2.0978, "step": 23704 }, { "epoch": 0.6361367539716617, "grad_norm": 0.287109375, "learning_rate": 0.0010344266667781572, "loss": 2.2177, "step": 23705 }, { "epoch": 0.6361635895234006, "grad_norm": 0.28515625, "learning_rate": 0.0010344072827104971, "loss": 2.2256, "step": 23706 }, { "epoch": 0.6361904250751396, "grad_norm": 0.275390625, "learning_rate": 0.001034387897689875, "loss": 2.1295, "step": 23707 }, { "epoch": 0.6362172606268784, "grad_norm": 0.2734375, "learning_rate": 0.001034368511716334, "loss": 2.0574, "step": 23708 }, { "epoch": 0.6362440961786174, "grad_norm": 0.2890625, "learning_rate": 0.0010343491247899162, "loss": 2.0961, "step": 23709 }, { "epoch": 0.6362709317303564, "grad_norm": 0.28125, "learning_rate": 0.0010343297369106638, "loss": 2.1712, "step": 23710 }, { "epoch": 0.6362977672820953, "grad_norm": 0.279296875, "learning_rate": 0.0010343103480786203, "loss": 2.0835, "step": 23711 }, { "epoch": 0.6363246028338343, "grad_norm": 0.271484375, "learning_rate": 0.0010342909582938273, "loss": 2.0901, "step": 23712 }, { "epoch": 0.6363514383855732, "grad_norm": 0.267578125, "learning_rate": 0.001034271567556328, "loss": 2.0634, "step": 23713 }, { "epoch": 0.6363782739373122, "grad_norm": 0.275390625, "learning_rate": 0.0010342521758661643, "loss": 2.0522, "step": 23714 }, { "epoch": 0.636405109489051, "grad_norm": 0.275390625, "learning_rate": 0.0010342327832233795, "loss": 2.1281, "step": 23715 }, { "epoch": 0.63643194504079, "grad_norm": 0.2734375, "learning_rate": 0.0010342133896280154, "loss": 2.0338, "step": 23716 }, { "epoch": 0.636458780592529, "grad_norm": 0.2734375, "learning_rate": 0.001034193995080115, "loss": 2.1786, "step": 23717 }, { "epoch": 0.6364856161442679, "grad_norm": 0.283203125, "learning_rate": 0.0010341745995797206, "loss": 2.1062, "step": 23718 }, { "epoch": 0.6365124516960069, "grad_norm": 0.28515625, "learning_rate": 0.0010341552031268748, "loss": 2.0699, "step": 23719 }, { "epoch": 0.6365392872477458, "grad_norm": 0.27734375, "learning_rate": 0.0010341358057216205, "loss": 2.0909, "step": 23720 }, { "epoch": 0.6365661227994848, "grad_norm": 0.28125, "learning_rate": 0.0010341164073639998, "loss": 2.0551, "step": 23721 }, { "epoch": 0.6365929583512236, "grad_norm": 0.28515625, "learning_rate": 0.0010340970080540556, "loss": 2.2156, "step": 23722 }, { "epoch": 0.6366197939029626, "grad_norm": 0.275390625, "learning_rate": 0.00103407760779183, "loss": 2.0913, "step": 23723 }, { "epoch": 0.6366466294547016, "grad_norm": 0.267578125, "learning_rate": 0.0010340582065773661, "loss": 2.0797, "step": 23724 }, { "epoch": 0.6366734650064405, "grad_norm": 0.267578125, "learning_rate": 0.001034038804410706, "loss": 2.0056, "step": 23725 }, { "epoch": 0.6367003005581795, "grad_norm": 0.28125, "learning_rate": 0.0010340194012918927, "loss": 2.1189, "step": 23726 }, { "epoch": 0.6367271361099184, "grad_norm": 0.283203125, "learning_rate": 0.0010339999972209682, "loss": 2.0557, "step": 23727 }, { "epoch": 0.6367539716616574, "grad_norm": 0.259765625, "learning_rate": 0.0010339805921979754, "loss": 1.9821, "step": 23728 }, { "epoch": 0.6367808072133964, "grad_norm": 0.26953125, "learning_rate": 0.001033961186222957, "loss": 2.053, "step": 23729 }, { "epoch": 0.6368076427651352, "grad_norm": 0.275390625, "learning_rate": 0.0010339417792959555, "loss": 2.108, "step": 23730 }, { "epoch": 0.6368344783168742, "grad_norm": 0.279296875, "learning_rate": 0.0010339223714170133, "loss": 2.0155, "step": 23731 }, { "epoch": 0.6368613138686131, "grad_norm": 0.279296875, "learning_rate": 0.001033902962586173, "loss": 2.043, "step": 23732 }, { "epoch": 0.6368881494203521, "grad_norm": 0.2734375, "learning_rate": 0.0010338835528034775, "loss": 2.0329, "step": 23733 }, { "epoch": 0.636914984972091, "grad_norm": 0.279296875, "learning_rate": 0.001033864142068969, "loss": 2.063, "step": 23734 }, { "epoch": 0.63694182052383, "grad_norm": 0.2734375, "learning_rate": 0.0010338447303826902, "loss": 2.0958, "step": 23735 }, { "epoch": 0.636968656075569, "grad_norm": 0.271484375, "learning_rate": 0.0010338253177446838, "loss": 1.9941, "step": 23736 }, { "epoch": 0.6369954916273078, "grad_norm": 0.279296875, "learning_rate": 0.0010338059041549922, "loss": 2.0082, "step": 23737 }, { "epoch": 0.6370223271790468, "grad_norm": 0.271484375, "learning_rate": 0.001033786489613658, "loss": 1.9824, "step": 23738 }, { "epoch": 0.6370491627307857, "grad_norm": 0.2734375, "learning_rate": 0.001033767074120724, "loss": 2.0484, "step": 23739 }, { "epoch": 0.6370759982825247, "grad_norm": 0.271484375, "learning_rate": 0.0010337476576762325, "loss": 2.0163, "step": 23740 }, { "epoch": 0.6371028338342636, "grad_norm": 0.26953125, "learning_rate": 0.0010337282402802265, "loss": 1.976, "step": 23741 }, { "epoch": 0.6371296693860026, "grad_norm": 0.291015625, "learning_rate": 0.0010337088219327481, "loss": 2.0567, "step": 23742 }, { "epoch": 0.6371565049377416, "grad_norm": 0.310546875, "learning_rate": 0.0010336894026338403, "loss": 2.1622, "step": 23743 }, { "epoch": 0.6371833404894804, "grad_norm": 0.3125, "learning_rate": 0.0010336699823835455, "loss": 2.1625, "step": 23744 }, { "epoch": 0.6372101760412194, "grad_norm": 0.302734375, "learning_rate": 0.0010336505611819064, "loss": 2.198, "step": 23745 }, { "epoch": 0.6372370115929583, "grad_norm": 0.28515625, "learning_rate": 0.0010336311390289653, "loss": 2.2366, "step": 23746 }, { "epoch": 0.6372638471446973, "grad_norm": 0.294921875, "learning_rate": 0.0010336117159247653, "loss": 2.1994, "step": 23747 }, { "epoch": 0.6372906826964362, "grad_norm": 0.298828125, "learning_rate": 0.0010335922918693488, "loss": 2.2655, "step": 23748 }, { "epoch": 0.6373175182481752, "grad_norm": 0.291015625, "learning_rate": 0.0010335728668627582, "loss": 2.1675, "step": 23749 }, { "epoch": 0.6373443537999142, "grad_norm": 0.28125, "learning_rate": 0.0010335534409050364, "loss": 2.0747, "step": 23750 }, { "epoch": 0.637371189351653, "grad_norm": 0.271484375, "learning_rate": 0.0010335340139962256, "loss": 2.177, "step": 23751 }, { "epoch": 0.637398024903392, "grad_norm": 0.2734375, "learning_rate": 0.0010335145861363692, "loss": 2.0745, "step": 23752 }, { "epoch": 0.6374248604551309, "grad_norm": 0.28125, "learning_rate": 0.0010334951573255091, "loss": 2.1144, "step": 23753 }, { "epoch": 0.6374516960068699, "grad_norm": 0.275390625, "learning_rate": 0.0010334757275636882, "loss": 2.1, "step": 23754 }, { "epoch": 0.6374785315586089, "grad_norm": 0.294921875, "learning_rate": 0.001033456296850949, "loss": 2.1389, "step": 23755 }, { "epoch": 0.6375053671103478, "grad_norm": 0.275390625, "learning_rate": 0.0010334368651873343, "loss": 2.1401, "step": 23756 }, { "epoch": 0.6375322026620868, "grad_norm": 0.27734375, "learning_rate": 0.0010334174325728866, "loss": 2.1157, "step": 23757 }, { "epoch": 0.6375590382138256, "grad_norm": 0.265625, "learning_rate": 0.0010333979990076487, "loss": 2.0158, "step": 23758 }, { "epoch": 0.6375858737655646, "grad_norm": 0.271484375, "learning_rate": 0.001033378564491663, "loss": 2.0915, "step": 23759 }, { "epoch": 0.6376127093173035, "grad_norm": 0.279296875, "learning_rate": 0.001033359129024972, "loss": 2.1508, "step": 23760 }, { "epoch": 0.6376395448690425, "grad_norm": 0.275390625, "learning_rate": 0.001033339692607619, "loss": 2.064, "step": 23761 }, { "epoch": 0.6376663804207815, "grad_norm": 0.265625, "learning_rate": 0.001033320255239646, "loss": 2.1197, "step": 23762 }, { "epoch": 0.6376932159725204, "grad_norm": 0.275390625, "learning_rate": 0.0010333008169210959, "loss": 2.0912, "step": 23763 }, { "epoch": 0.6377200515242594, "grad_norm": 0.265625, "learning_rate": 0.001033281377652011, "loss": 2.0328, "step": 23764 }, { "epoch": 0.6377468870759982, "grad_norm": 0.26953125, "learning_rate": 0.0010332619374324346, "loss": 2.1618, "step": 23765 }, { "epoch": 0.6377737226277372, "grad_norm": 0.271484375, "learning_rate": 0.0010332424962624087, "loss": 2.1502, "step": 23766 }, { "epoch": 0.6378005581794761, "grad_norm": 0.271484375, "learning_rate": 0.0010332230541419765, "loss": 2.0567, "step": 23767 }, { "epoch": 0.6378273937312151, "grad_norm": 0.26953125, "learning_rate": 0.0010332036110711803, "loss": 2.1, "step": 23768 }, { "epoch": 0.6378542292829541, "grad_norm": 0.28125, "learning_rate": 0.0010331841670500627, "loss": 2.1213, "step": 23769 }, { "epoch": 0.637881064834693, "grad_norm": 0.265625, "learning_rate": 0.0010331647220786664, "loss": 1.9856, "step": 23770 }, { "epoch": 0.637907900386432, "grad_norm": 0.2734375, "learning_rate": 0.0010331452761570344, "loss": 1.9665, "step": 23771 }, { "epoch": 0.6379347359381708, "grad_norm": 0.28125, "learning_rate": 0.001033125829285209, "loss": 2.1131, "step": 23772 }, { "epoch": 0.6379615714899098, "grad_norm": 0.26953125, "learning_rate": 0.0010331063814632327, "loss": 2.0753, "step": 23773 }, { "epoch": 0.6379884070416488, "grad_norm": 0.271484375, "learning_rate": 0.0010330869326911488, "loss": 2.1258, "step": 23774 }, { "epoch": 0.6380152425933877, "grad_norm": 0.271484375, "learning_rate": 0.0010330674829689994, "loss": 1.9974, "step": 23775 }, { "epoch": 0.6380420781451267, "grad_norm": 0.2734375, "learning_rate": 0.0010330480322968275, "loss": 2.051, "step": 23776 }, { "epoch": 0.6380689136968656, "grad_norm": 0.265625, "learning_rate": 0.0010330285806746755, "loss": 2.0032, "step": 23777 }, { "epoch": 0.6380957492486046, "grad_norm": 0.271484375, "learning_rate": 0.0010330091281025861, "loss": 2.0068, "step": 23778 }, { "epoch": 0.6381225848003435, "grad_norm": 0.2734375, "learning_rate": 0.0010329896745806023, "loss": 2.0337, "step": 23779 }, { "epoch": 0.6381494203520824, "grad_norm": 0.267578125, "learning_rate": 0.0010329702201087663, "loss": 1.9764, "step": 23780 }, { "epoch": 0.6381762559038214, "grad_norm": 0.2734375, "learning_rate": 0.0010329507646871213, "loss": 1.9194, "step": 23781 }, { "epoch": 0.6382030914555603, "grad_norm": 0.2734375, "learning_rate": 0.0010329313083157093, "loss": 2.0344, "step": 23782 }, { "epoch": 0.6382299270072993, "grad_norm": 0.27734375, "learning_rate": 0.0010329118509945737, "loss": 2.0742, "step": 23783 }, { "epoch": 0.6382567625590382, "grad_norm": 0.283203125, "learning_rate": 0.0010328923927237568, "loss": 2.0324, "step": 23784 }, { "epoch": 0.6382835981107772, "grad_norm": 0.287109375, "learning_rate": 0.0010328729335033016, "loss": 2.1403, "step": 23785 }, { "epoch": 0.638310433662516, "grad_norm": 0.2734375, "learning_rate": 0.00103285347333325, "loss": 1.9516, "step": 23786 }, { "epoch": 0.638337269214255, "grad_norm": 0.287109375, "learning_rate": 0.0010328340122136455, "loss": 2.0091, "step": 23787 }, { "epoch": 0.638364104765994, "grad_norm": 0.283203125, "learning_rate": 0.0010328145501445306, "loss": 2.0069, "step": 23788 }, { "epoch": 0.6383909403177329, "grad_norm": 0.279296875, "learning_rate": 0.001032795087125948, "loss": 1.959, "step": 23789 }, { "epoch": 0.6384177758694719, "grad_norm": 0.302734375, "learning_rate": 0.00103277562315794, "loss": 2.0953, "step": 23790 }, { "epoch": 0.6384446114212108, "grad_norm": 0.30078125, "learning_rate": 0.0010327561582405497, "loss": 1.9804, "step": 23791 }, { "epoch": 0.6384714469729498, "grad_norm": 0.30078125, "learning_rate": 0.0010327366923738198, "loss": 2.1031, "step": 23792 }, { "epoch": 0.6384982825246887, "grad_norm": 0.3046875, "learning_rate": 0.0010327172255577928, "loss": 2.249, "step": 23793 }, { "epoch": 0.6385251180764276, "grad_norm": 0.294921875, "learning_rate": 0.0010326977577925116, "loss": 2.1789, "step": 23794 }, { "epoch": 0.6385519536281666, "grad_norm": 0.296875, "learning_rate": 0.0010326782890780187, "loss": 2.2527, "step": 23795 }, { "epoch": 0.6385787891799055, "grad_norm": 0.279296875, "learning_rate": 0.001032658819414357, "loss": 2.1581, "step": 23796 }, { "epoch": 0.6386056247316445, "grad_norm": 0.279296875, "learning_rate": 0.0010326393488015691, "loss": 2.0729, "step": 23797 }, { "epoch": 0.6386324602833834, "grad_norm": 0.2734375, "learning_rate": 0.0010326198772396978, "loss": 2.0311, "step": 23798 }, { "epoch": 0.6386592958351224, "grad_norm": 0.279296875, "learning_rate": 0.001032600404728786, "loss": 2.1311, "step": 23799 }, { "epoch": 0.6386861313868614, "grad_norm": 0.279296875, "learning_rate": 0.0010325809312688758, "loss": 2.0289, "step": 23800 }, { "epoch": 0.6387129669386002, "grad_norm": 0.26953125, "learning_rate": 0.0010325614568600105, "loss": 2.142, "step": 23801 }, { "epoch": 0.6387398024903392, "grad_norm": 0.27734375, "learning_rate": 0.0010325419815022325, "loss": 2.1367, "step": 23802 }, { "epoch": 0.6387666380420781, "grad_norm": 0.279296875, "learning_rate": 0.0010325225051955847, "loss": 2.1288, "step": 23803 }, { "epoch": 0.6387934735938171, "grad_norm": 0.2734375, "learning_rate": 0.00103250302794011, "loss": 2.089, "step": 23804 }, { "epoch": 0.638820309145556, "grad_norm": 0.298828125, "learning_rate": 0.0010324835497358506, "loss": 2.1346, "step": 23805 }, { "epoch": 0.638847144697295, "grad_norm": 0.265625, "learning_rate": 0.0010324640705828498, "loss": 2.059, "step": 23806 }, { "epoch": 0.638873980249034, "grad_norm": 0.265625, "learning_rate": 0.0010324445904811499, "loss": 2.0913, "step": 23807 }, { "epoch": 0.6389008158007728, "grad_norm": 0.28125, "learning_rate": 0.0010324251094307939, "loss": 2.1798, "step": 23808 }, { "epoch": 0.6389276513525118, "grad_norm": 0.27734375, "learning_rate": 0.0010324056274318243, "loss": 2.1164, "step": 23809 }, { "epoch": 0.6389544869042507, "grad_norm": 0.26953125, "learning_rate": 0.0010323861444842838, "loss": 2.0845, "step": 23810 }, { "epoch": 0.6389813224559897, "grad_norm": 0.2734375, "learning_rate": 0.0010323666605882157, "loss": 2.1811, "step": 23811 }, { "epoch": 0.6390081580077286, "grad_norm": 0.2578125, "learning_rate": 0.0010323471757436621, "loss": 1.9575, "step": 23812 }, { "epoch": 0.6390349935594676, "grad_norm": 0.275390625, "learning_rate": 0.001032327689950666, "loss": 2.0714, "step": 23813 }, { "epoch": 0.6390618291112066, "grad_norm": 0.271484375, "learning_rate": 0.0010323082032092702, "loss": 2.1, "step": 23814 }, { "epoch": 0.6390886646629454, "grad_norm": 0.2734375, "learning_rate": 0.0010322887155195175, "loss": 2.058, "step": 23815 }, { "epoch": 0.6391155002146844, "grad_norm": 0.28125, "learning_rate": 0.0010322692268814504, "loss": 2.1899, "step": 23816 }, { "epoch": 0.6391423357664233, "grad_norm": 0.2734375, "learning_rate": 0.0010322497372951117, "loss": 2.0644, "step": 23817 }, { "epoch": 0.6391691713181623, "grad_norm": 0.267578125, "learning_rate": 0.0010322302467605444, "loss": 2.0211, "step": 23818 }, { "epoch": 0.6391960068699012, "grad_norm": 0.275390625, "learning_rate": 0.0010322107552777912, "loss": 2.1432, "step": 23819 }, { "epoch": 0.6392228424216402, "grad_norm": 0.27734375, "learning_rate": 0.0010321912628468944, "loss": 2.1126, "step": 23820 }, { "epoch": 0.6392496779733792, "grad_norm": 0.267578125, "learning_rate": 0.0010321717694678975, "loss": 2.1235, "step": 23821 }, { "epoch": 0.639276513525118, "grad_norm": 0.27734375, "learning_rate": 0.0010321522751408428, "loss": 2.0446, "step": 23822 }, { "epoch": 0.639303349076857, "grad_norm": 0.2734375, "learning_rate": 0.0010321327798657731, "loss": 2.0923, "step": 23823 }, { "epoch": 0.6393301846285959, "grad_norm": 0.279296875, "learning_rate": 0.0010321132836427311, "loss": 2.0449, "step": 23824 }, { "epoch": 0.6393570201803349, "grad_norm": 0.267578125, "learning_rate": 0.0010320937864717599, "loss": 2.025, "step": 23825 }, { "epoch": 0.6393838557320739, "grad_norm": 0.26953125, "learning_rate": 0.001032074288352902, "loss": 2.0314, "step": 23826 }, { "epoch": 0.6394106912838128, "grad_norm": 0.26953125, "learning_rate": 0.0010320547892862, "loss": 2.0122, "step": 23827 }, { "epoch": 0.6394375268355518, "grad_norm": 0.294921875, "learning_rate": 0.001032035289271697, "loss": 2.161, "step": 23828 }, { "epoch": 0.6394643623872907, "grad_norm": 0.279296875, "learning_rate": 0.0010320157883094358, "loss": 2.0566, "step": 23829 }, { "epoch": 0.6394911979390296, "grad_norm": 0.26953125, "learning_rate": 0.0010319962863994591, "loss": 2.0455, "step": 23830 }, { "epoch": 0.6395180334907685, "grad_norm": 0.27734375, "learning_rate": 0.0010319767835418095, "loss": 1.9669, "step": 23831 }, { "epoch": 0.6395448690425075, "grad_norm": 0.279296875, "learning_rate": 0.0010319572797365302, "loss": 2.0098, "step": 23832 }, { "epoch": 0.6395717045942465, "grad_norm": 0.2734375, "learning_rate": 0.0010319377749836633, "loss": 1.984, "step": 23833 }, { "epoch": 0.6395985401459854, "grad_norm": 0.2734375, "learning_rate": 0.0010319182692832523, "loss": 2.0705, "step": 23834 }, { "epoch": 0.6396253756977244, "grad_norm": 0.279296875, "learning_rate": 0.0010318987626353395, "loss": 2.0766, "step": 23835 }, { "epoch": 0.6396522112494633, "grad_norm": 0.279296875, "learning_rate": 0.0010318792550399682, "loss": 1.9922, "step": 23836 }, { "epoch": 0.6396790468012022, "grad_norm": 0.310546875, "learning_rate": 0.0010318597464971807, "loss": 2.1199, "step": 23837 }, { "epoch": 0.6397058823529411, "grad_norm": 0.302734375, "learning_rate": 0.0010318402370070197, "loss": 2.0751, "step": 23838 }, { "epoch": 0.6397327179046801, "grad_norm": 0.294921875, "learning_rate": 0.0010318207265695286, "loss": 2.2557, "step": 23839 }, { "epoch": 0.6397595534564191, "grad_norm": 0.2890625, "learning_rate": 0.0010318012151847497, "loss": 2.1473, "step": 23840 }, { "epoch": 0.639786389008158, "grad_norm": 0.32421875, "learning_rate": 0.001031781702852726, "loss": 2.1399, "step": 23841 }, { "epoch": 0.639813224559897, "grad_norm": 0.3125, "learning_rate": 0.0010317621895735004, "loss": 2.2242, "step": 23842 }, { "epoch": 0.6398400601116359, "grad_norm": 0.294921875, "learning_rate": 0.0010317426753471155, "loss": 2.0283, "step": 23843 }, { "epoch": 0.6398668956633748, "grad_norm": 0.28125, "learning_rate": 0.001031723160173614, "loss": 2.0924, "step": 23844 }, { "epoch": 0.6398937312151138, "grad_norm": 0.291015625, "learning_rate": 0.0010317036440530393, "loss": 2.1501, "step": 23845 }, { "epoch": 0.6399205667668527, "grad_norm": 0.287109375, "learning_rate": 0.0010316841269854334, "loss": 2.2178, "step": 23846 }, { "epoch": 0.6399474023185917, "grad_norm": 0.294921875, "learning_rate": 0.0010316646089708397, "loss": 2.1364, "step": 23847 }, { "epoch": 0.6399742378703306, "grad_norm": 0.302734375, "learning_rate": 0.001031645090009301, "loss": 2.1867, "step": 23848 }, { "epoch": 0.6400010734220696, "grad_norm": 0.28125, "learning_rate": 0.00103162557010086, "loss": 2.144, "step": 23849 }, { "epoch": 0.6400279089738085, "grad_norm": 0.28125, "learning_rate": 0.0010316060492455593, "loss": 2.1478, "step": 23850 }, { "epoch": 0.6400547445255474, "grad_norm": 0.287109375, "learning_rate": 0.0010315865274434417, "loss": 2.0781, "step": 23851 }, { "epoch": 0.6400815800772864, "grad_norm": 0.28515625, "learning_rate": 0.0010315670046945507, "loss": 2.1276, "step": 23852 }, { "epoch": 0.6400815800772864, "eval_loss": 3.0481598377227783, "eval_runtime": 582.7401, "eval_samples_per_second": 82.263, "eval_steps_per_second": 20.567, "step": 23852 }, { "epoch": 0.6401084156290253, "grad_norm": 0.283203125, "learning_rate": 0.0010315474809989283, "loss": 2.0428, "step": 23853 }, { "epoch": 0.6401352511807643, "grad_norm": 0.26953125, "learning_rate": 0.001031527956356618, "loss": 2.0744, "step": 23854 }, { "epoch": 0.6401620867325032, "grad_norm": 0.265625, "learning_rate": 0.0010315084307676618, "loss": 2.1177, "step": 23855 }, { "epoch": 0.6401889222842422, "grad_norm": 0.26953125, "learning_rate": 0.0010314889042321034, "loss": 2.1119, "step": 23856 }, { "epoch": 0.6402157578359811, "grad_norm": 0.27734375, "learning_rate": 0.0010314693767499853, "loss": 2.1004, "step": 23857 }, { "epoch": 0.64024259338772, "grad_norm": 0.283203125, "learning_rate": 0.0010314498483213505, "loss": 2.2266, "step": 23858 }, { "epoch": 0.640269428939459, "grad_norm": 0.2734375, "learning_rate": 0.0010314303189462414, "loss": 1.9781, "step": 23859 }, { "epoch": 0.6402962644911979, "grad_norm": 0.26953125, "learning_rate": 0.001031410788624701, "loss": 2.0793, "step": 23860 }, { "epoch": 0.6403231000429369, "grad_norm": 0.2734375, "learning_rate": 0.0010313912573567726, "loss": 2.0931, "step": 23861 }, { "epoch": 0.6403499355946758, "grad_norm": 0.26171875, "learning_rate": 0.0010313717251424985, "loss": 2.0255, "step": 23862 }, { "epoch": 0.6403767711464148, "grad_norm": 0.263671875, "learning_rate": 0.0010313521919819218, "loss": 2.0116, "step": 23863 }, { "epoch": 0.6404036066981537, "grad_norm": 0.283203125, "learning_rate": 0.0010313326578750852, "loss": 2.0625, "step": 23864 }, { "epoch": 0.6404304422498927, "grad_norm": 0.279296875, "learning_rate": 0.001031313122822032, "loss": 2.1927, "step": 23865 }, { "epoch": 0.6404572778016316, "grad_norm": 0.28515625, "learning_rate": 0.0010312935868228043, "loss": 2.1532, "step": 23866 }, { "epoch": 0.6404841133533705, "grad_norm": 0.28515625, "learning_rate": 0.0010312740498774455, "loss": 2.0049, "step": 23867 }, { "epoch": 0.6405109489051095, "grad_norm": 0.263671875, "learning_rate": 0.0010312545119859984, "loss": 1.9447, "step": 23868 }, { "epoch": 0.6405377844568484, "grad_norm": 0.271484375, "learning_rate": 0.0010312349731485056, "loss": 1.9591, "step": 23869 }, { "epoch": 0.6405646200085874, "grad_norm": 0.26953125, "learning_rate": 0.0010312154333650101, "loss": 2.0131, "step": 23870 }, { "epoch": 0.6405914555603264, "grad_norm": 0.26953125, "learning_rate": 0.001031195892635555, "loss": 2.1509, "step": 23871 }, { "epoch": 0.6406182911120653, "grad_norm": 0.275390625, "learning_rate": 0.001031176350960183, "loss": 2.1323, "step": 23872 }, { "epoch": 0.6406451266638042, "grad_norm": 0.28125, "learning_rate": 0.0010311568083389367, "loss": 2.1978, "step": 23873 }, { "epoch": 0.6406719622155431, "grad_norm": 0.26953125, "learning_rate": 0.0010311372647718595, "loss": 1.985, "step": 23874 }, { "epoch": 0.6406987977672821, "grad_norm": 0.279296875, "learning_rate": 0.0010311177202589937, "loss": 2.0531, "step": 23875 }, { "epoch": 0.640725633319021, "grad_norm": 0.275390625, "learning_rate": 0.0010310981748003828, "loss": 1.9938, "step": 23876 }, { "epoch": 0.64075246887076, "grad_norm": 0.267578125, "learning_rate": 0.0010310786283960688, "loss": 1.92, "step": 23877 }, { "epoch": 0.640779304422499, "grad_norm": 0.2734375, "learning_rate": 0.0010310590810460956, "loss": 2.0005, "step": 23878 }, { "epoch": 0.6408061399742379, "grad_norm": 0.275390625, "learning_rate": 0.0010310395327505055, "loss": 1.9929, "step": 23879 }, { "epoch": 0.6408329755259768, "grad_norm": 0.287109375, "learning_rate": 0.0010310199835093412, "loss": 2.0795, "step": 23880 }, { "epoch": 0.6408598110777157, "grad_norm": 0.2734375, "learning_rate": 0.0010310004333226462, "loss": 1.9374, "step": 23881 }, { "epoch": 0.6408866466294547, "grad_norm": 0.283203125, "learning_rate": 0.001030980882190463, "loss": 2.0475, "step": 23882 }, { "epoch": 0.6409134821811936, "grad_norm": 0.29296875, "learning_rate": 0.0010309613301128344, "loss": 2.0901, "step": 23883 }, { "epoch": 0.6409403177329326, "grad_norm": 0.306640625, "learning_rate": 0.0010309417770898036, "loss": 2.1415, "step": 23884 }, { "epoch": 0.6409671532846716, "grad_norm": 0.3125, "learning_rate": 0.0010309222231214131, "loss": 2.2004, "step": 23885 }, { "epoch": 0.6409939888364105, "grad_norm": 0.294921875, "learning_rate": 0.0010309026682077062, "loss": 2.1639, "step": 23886 }, { "epoch": 0.6410208243881494, "grad_norm": 0.298828125, "learning_rate": 0.0010308831123487256, "loss": 2.2284, "step": 23887 }, { "epoch": 0.6410476599398883, "grad_norm": 0.306640625, "learning_rate": 0.0010308635555445142, "loss": 2.162, "step": 23888 }, { "epoch": 0.6410744954916273, "grad_norm": 0.2890625, "learning_rate": 0.0010308439977951148, "loss": 2.1503, "step": 23889 }, { "epoch": 0.6411013310433663, "grad_norm": 0.28515625, "learning_rate": 0.0010308244391005706, "loss": 2.2122, "step": 23890 }, { "epoch": 0.6411281665951052, "grad_norm": 0.2890625, "learning_rate": 0.0010308048794609242, "loss": 2.1333, "step": 23891 }, { "epoch": 0.6411550021468442, "grad_norm": 0.265625, "learning_rate": 0.0010307853188762188, "loss": 2.1198, "step": 23892 }, { "epoch": 0.6411818376985831, "grad_norm": 0.271484375, "learning_rate": 0.001030765757346497, "loss": 2.0481, "step": 23893 }, { "epoch": 0.641208673250322, "grad_norm": 0.28515625, "learning_rate": 0.001030746194871802, "loss": 2.1381, "step": 23894 }, { "epoch": 0.6412355088020609, "grad_norm": 0.275390625, "learning_rate": 0.0010307266314521764, "loss": 2.0279, "step": 23895 }, { "epoch": 0.6412623443537999, "grad_norm": 0.279296875, "learning_rate": 0.0010307070670876634, "loss": 2.1311, "step": 23896 }, { "epoch": 0.6412891799055389, "grad_norm": 0.28515625, "learning_rate": 0.001030687501778306, "loss": 2.1177, "step": 23897 }, { "epoch": 0.6413160154572778, "grad_norm": 0.283203125, "learning_rate": 0.0010306679355241467, "loss": 2.1613, "step": 23898 }, { "epoch": 0.6413428510090168, "grad_norm": 0.283203125, "learning_rate": 0.0010306483683252287, "loss": 2.1664, "step": 23899 }, { "epoch": 0.6413696865607557, "grad_norm": 0.271484375, "learning_rate": 0.0010306288001815946, "loss": 1.9972, "step": 23900 }, { "epoch": 0.6413965221124946, "grad_norm": 0.279296875, "learning_rate": 0.001030609231093288, "loss": 2.1398, "step": 23901 }, { "epoch": 0.6414233576642335, "grad_norm": 0.28125, "learning_rate": 0.0010305896610603511, "loss": 2.1728, "step": 23902 }, { "epoch": 0.6414501932159725, "grad_norm": 0.27734375, "learning_rate": 0.0010305700900828277, "loss": 2.0798, "step": 23903 }, { "epoch": 0.6414770287677115, "grad_norm": 0.275390625, "learning_rate": 0.0010305505181607598, "loss": 2.0903, "step": 23904 }, { "epoch": 0.6415038643194504, "grad_norm": 0.26171875, "learning_rate": 0.0010305309452941906, "loss": 1.9765, "step": 23905 }, { "epoch": 0.6415306998711894, "grad_norm": 0.296875, "learning_rate": 0.0010305113714831635, "loss": 2.1997, "step": 23906 }, { "epoch": 0.6415575354229283, "grad_norm": 0.2734375, "learning_rate": 0.001030491796727721, "loss": 2.1495, "step": 23907 }, { "epoch": 0.6415843709746673, "grad_norm": 0.28515625, "learning_rate": 0.0010304722210279062, "loss": 2.0964, "step": 23908 }, { "epoch": 0.6416112065264061, "grad_norm": 0.28515625, "learning_rate": 0.001030452644383762, "loss": 2.1101, "step": 23909 }, { "epoch": 0.6416380420781451, "grad_norm": 0.275390625, "learning_rate": 0.001030433066795331, "loss": 2.2031, "step": 23910 }, { "epoch": 0.6416648776298841, "grad_norm": 0.271484375, "learning_rate": 0.0010304134882626569, "loss": 2.0979, "step": 23911 }, { "epoch": 0.641691713181623, "grad_norm": 0.2734375, "learning_rate": 0.001030393908785782, "loss": 2.123, "step": 23912 }, { "epoch": 0.641718548733362, "grad_norm": 0.275390625, "learning_rate": 0.0010303743283647496, "loss": 2.1099, "step": 23913 }, { "epoch": 0.6417453842851009, "grad_norm": 0.265625, "learning_rate": 0.0010303547469996024, "loss": 2.0656, "step": 23914 }, { "epoch": 0.6417722198368399, "grad_norm": 0.2734375, "learning_rate": 0.0010303351646903836, "loss": 2.1039, "step": 23915 }, { "epoch": 0.6417990553885788, "grad_norm": 0.2734375, "learning_rate": 0.0010303155814371362, "loss": 2.1326, "step": 23916 }, { "epoch": 0.6418258909403177, "grad_norm": 0.2734375, "learning_rate": 0.001030295997239903, "loss": 2.0534, "step": 23917 }, { "epoch": 0.6418527264920567, "grad_norm": 0.275390625, "learning_rate": 0.0010302764120987266, "loss": 2.0489, "step": 23918 }, { "epoch": 0.6418795620437956, "grad_norm": 0.271484375, "learning_rate": 0.0010302568260136505, "loss": 1.9419, "step": 23919 }, { "epoch": 0.6419063975955346, "grad_norm": 0.28125, "learning_rate": 0.001030237238984718, "loss": 2.097, "step": 23920 }, { "epoch": 0.6419332331472735, "grad_norm": 0.271484375, "learning_rate": 0.0010302176510119708, "loss": 2.0205, "step": 23921 }, { "epoch": 0.6419600686990125, "grad_norm": 0.28125, "learning_rate": 0.0010301980620954533, "loss": 2.097, "step": 23922 }, { "epoch": 0.6419869042507514, "grad_norm": 0.2734375, "learning_rate": 0.0010301784722352076, "loss": 2.1033, "step": 23923 }, { "epoch": 0.6420137398024903, "grad_norm": 0.27734375, "learning_rate": 0.0010301588814312767, "loss": 1.9985, "step": 23924 }, { "epoch": 0.6420405753542293, "grad_norm": 0.275390625, "learning_rate": 0.001030139289683704, "loss": 2.0774, "step": 23925 }, { "epoch": 0.6420674109059682, "grad_norm": 0.275390625, "learning_rate": 0.001030119696992532, "loss": 2.002, "step": 23926 }, { "epoch": 0.6420942464577072, "grad_norm": 0.27734375, "learning_rate": 0.0010301001033578042, "loss": 2.1441, "step": 23927 }, { "epoch": 0.6421210820094461, "grad_norm": 0.279296875, "learning_rate": 0.001030080508779563, "loss": 2.1251, "step": 23928 }, { "epoch": 0.642147917561185, "grad_norm": 0.310546875, "learning_rate": 0.001030060913257852, "loss": 2.065, "step": 23929 }, { "epoch": 0.642174753112924, "grad_norm": 0.3203125, "learning_rate": 0.0010300413167927138, "loss": 2.2001, "step": 23930 }, { "epoch": 0.6422015886646629, "grad_norm": 0.298828125, "learning_rate": 0.0010300217193841914, "loss": 2.199, "step": 23931 }, { "epoch": 0.6422284242164019, "grad_norm": 0.30078125, "learning_rate": 0.001030002121032328, "loss": 2.1889, "step": 23932 }, { "epoch": 0.6422552597681408, "grad_norm": 0.3046875, "learning_rate": 0.0010299825217371663, "loss": 2.0145, "step": 23933 }, { "epoch": 0.6422820953198798, "grad_norm": 0.298828125, "learning_rate": 0.0010299629214987496, "loss": 2.162, "step": 23934 }, { "epoch": 0.6423089308716187, "grad_norm": 0.287109375, "learning_rate": 0.0010299433203171207, "loss": 2.1538, "step": 23935 }, { "epoch": 0.6423357664233577, "grad_norm": 0.28125, "learning_rate": 0.0010299237181923223, "loss": 2.1305, "step": 23936 }, { "epoch": 0.6423626019750966, "grad_norm": 0.28515625, "learning_rate": 0.0010299041151243982, "loss": 2.2032, "step": 23937 }, { "epoch": 0.6423894375268355, "grad_norm": 0.27734375, "learning_rate": 0.0010298845111133908, "loss": 2.1687, "step": 23938 }, { "epoch": 0.6424162730785745, "grad_norm": 0.283203125, "learning_rate": 0.0010298649061593432, "loss": 2.072, "step": 23939 }, { "epoch": 0.6424431086303134, "grad_norm": 0.279296875, "learning_rate": 0.0010298453002622985, "loss": 2.205, "step": 23940 }, { "epoch": 0.6424699441820524, "grad_norm": 0.28125, "learning_rate": 0.0010298256934222999, "loss": 2.1556, "step": 23941 }, { "epoch": 0.6424967797337914, "grad_norm": 0.28125, "learning_rate": 0.0010298060856393898, "loss": 2.1851, "step": 23942 }, { "epoch": 0.6425236152855303, "grad_norm": 0.28515625, "learning_rate": 0.0010297864769136119, "loss": 2.1785, "step": 23943 }, { "epoch": 0.6425504508372692, "grad_norm": 0.275390625, "learning_rate": 0.0010297668672450085, "loss": 2.1396, "step": 23944 }, { "epoch": 0.6425772863890081, "grad_norm": 0.27734375, "learning_rate": 0.0010297472566336236, "loss": 2.0894, "step": 23945 }, { "epoch": 0.6426041219407471, "grad_norm": 0.275390625, "learning_rate": 0.0010297276450794994, "loss": 2.0846, "step": 23946 }, { "epoch": 0.642630957492486, "grad_norm": 0.28515625, "learning_rate": 0.001029708032582679, "loss": 2.1733, "step": 23947 }, { "epoch": 0.642657793044225, "grad_norm": 0.271484375, "learning_rate": 0.0010296884191432058, "loss": 2.0424, "step": 23948 }, { "epoch": 0.642684628595964, "grad_norm": 0.26171875, "learning_rate": 0.0010296688047611226, "loss": 2.0355, "step": 23949 }, { "epoch": 0.6427114641477029, "grad_norm": 0.26171875, "learning_rate": 0.0010296491894364725, "loss": 2.0135, "step": 23950 }, { "epoch": 0.6427382996994419, "grad_norm": 0.265625, "learning_rate": 0.0010296295731692983, "loss": 2.0966, "step": 23951 }, { "epoch": 0.6427651352511807, "grad_norm": 0.27734375, "learning_rate": 0.0010296099559596434, "loss": 2.1041, "step": 23952 }, { "epoch": 0.6427919708029197, "grad_norm": 0.283203125, "learning_rate": 0.0010295903378075505, "loss": 2.1684, "step": 23953 }, { "epoch": 0.6428188063546586, "grad_norm": 0.26953125, "learning_rate": 0.0010295707187130629, "loss": 2.0657, "step": 23954 }, { "epoch": 0.6428456419063976, "grad_norm": 0.287109375, "learning_rate": 0.0010295510986762235, "loss": 2.2088, "step": 23955 }, { "epoch": 0.6428724774581366, "grad_norm": 0.28125, "learning_rate": 0.0010295314776970752, "loss": 2.0669, "step": 23956 }, { "epoch": 0.6428993130098755, "grad_norm": 0.275390625, "learning_rate": 0.0010295118557756616, "loss": 2.0849, "step": 23957 }, { "epoch": 0.6429261485616145, "grad_norm": 0.265625, "learning_rate": 0.001029492232912025, "loss": 2.108, "step": 23958 }, { "epoch": 0.6429529841133533, "grad_norm": 0.27734375, "learning_rate": 0.0010294726091062089, "loss": 2.0612, "step": 23959 }, { "epoch": 0.6429798196650923, "grad_norm": 0.265625, "learning_rate": 0.0010294529843582563, "loss": 2.0317, "step": 23960 }, { "epoch": 0.6430066552168313, "grad_norm": 0.275390625, "learning_rate": 0.00102943335866821, "loss": 2.0364, "step": 23961 }, { "epoch": 0.6430334907685702, "grad_norm": 0.28125, "learning_rate": 0.0010294137320361137, "loss": 2.1563, "step": 23962 }, { "epoch": 0.6430603263203092, "grad_norm": 0.271484375, "learning_rate": 0.0010293941044620097, "loss": 2.1213, "step": 23963 }, { "epoch": 0.6430871618720481, "grad_norm": 0.283203125, "learning_rate": 0.0010293744759459413, "loss": 2.1567, "step": 23964 }, { "epoch": 0.643113997423787, "grad_norm": 0.28125, "learning_rate": 0.0010293548464879517, "loss": 1.9917, "step": 23965 }, { "epoch": 0.6431408329755259, "grad_norm": 0.279296875, "learning_rate": 0.0010293352160880838, "loss": 2.0065, "step": 23966 }, { "epoch": 0.6431676685272649, "grad_norm": 0.27734375, "learning_rate": 0.0010293155847463807, "loss": 2.0115, "step": 23967 }, { "epoch": 0.6431945040790039, "grad_norm": 0.2734375, "learning_rate": 0.0010292959524628859, "loss": 1.9977, "step": 23968 }, { "epoch": 0.6432213396307428, "grad_norm": 0.265625, "learning_rate": 0.0010292763192376417, "loss": 2.0277, "step": 23969 }, { "epoch": 0.6432481751824818, "grad_norm": 0.28515625, "learning_rate": 0.001029256685070692, "loss": 2.052, "step": 23970 }, { "epoch": 0.6432750107342207, "grad_norm": 0.26953125, "learning_rate": 0.0010292370499620789, "loss": 1.9878, "step": 23971 }, { "epoch": 0.6433018462859597, "grad_norm": 0.265625, "learning_rate": 0.0010292174139118462, "loss": 2.0372, "step": 23972 }, { "epoch": 0.6433286818376985, "grad_norm": 0.27734375, "learning_rate": 0.0010291977769200367, "loss": 1.9939, "step": 23973 }, { "epoch": 0.6433555173894375, "grad_norm": 0.296875, "learning_rate": 0.0010291781389866936, "loss": 2.1382, "step": 23974 }, { "epoch": 0.6433823529411765, "grad_norm": 0.33203125, "learning_rate": 0.00102915850011186, "loss": 2.2252, "step": 23975 }, { "epoch": 0.6434091884929154, "grad_norm": 0.31640625, "learning_rate": 0.0010291388602955789, "loss": 2.2309, "step": 23976 }, { "epoch": 0.6434360240446544, "grad_norm": 0.322265625, "learning_rate": 0.0010291192195378935, "loss": 2.1945, "step": 23977 }, { "epoch": 0.6434628595963933, "grad_norm": 0.294921875, "learning_rate": 0.0010290995778388465, "loss": 2.1884, "step": 23978 }, { "epoch": 0.6434896951481323, "grad_norm": 0.30078125, "learning_rate": 0.0010290799351984814, "loss": 2.2287, "step": 23979 }, { "epoch": 0.6435165306998711, "grad_norm": 0.28125, "learning_rate": 0.0010290602916168414, "loss": 2.239, "step": 23980 }, { "epoch": 0.6435433662516101, "grad_norm": 0.28515625, "learning_rate": 0.0010290406470939688, "loss": 2.117, "step": 23981 }, { "epoch": 0.6435702018033491, "grad_norm": 0.28515625, "learning_rate": 0.0010290210016299076, "loss": 2.1625, "step": 23982 }, { "epoch": 0.643597037355088, "grad_norm": 0.32421875, "learning_rate": 0.0010290013552247007, "loss": 2.0396, "step": 23983 }, { "epoch": 0.643623872906827, "grad_norm": 0.275390625, "learning_rate": 0.0010289817078783907, "loss": 2.1417, "step": 23984 }, { "epoch": 0.6436507084585659, "grad_norm": 0.26953125, "learning_rate": 0.0010289620595910214, "loss": 1.9804, "step": 23985 }, { "epoch": 0.6436775440103049, "grad_norm": 0.28125, "learning_rate": 0.0010289424103626353, "loss": 2.1804, "step": 23986 }, { "epoch": 0.6437043795620438, "grad_norm": 0.283203125, "learning_rate": 0.0010289227601932756, "loss": 2.1984, "step": 23987 }, { "epoch": 0.6437312151137827, "grad_norm": 0.283203125, "learning_rate": 0.0010289031090829858, "loss": 2.196, "step": 23988 }, { "epoch": 0.6437580506655217, "grad_norm": 0.2734375, "learning_rate": 0.0010288834570318086, "loss": 2.1596, "step": 23989 }, { "epoch": 0.6437848862172606, "grad_norm": 0.283203125, "learning_rate": 0.0010288638040397873, "loss": 2.1232, "step": 23990 }, { "epoch": 0.6438117217689996, "grad_norm": 0.271484375, "learning_rate": 0.0010288441501069652, "loss": 2.1845, "step": 23991 }, { "epoch": 0.6438385573207385, "grad_norm": 0.267578125, "learning_rate": 0.0010288244952333851, "loss": 2.0522, "step": 23992 }, { "epoch": 0.6438653928724775, "grad_norm": 0.271484375, "learning_rate": 0.0010288048394190901, "loss": 2.0419, "step": 23993 }, { "epoch": 0.6438922284242165, "grad_norm": 0.283203125, "learning_rate": 0.0010287851826641238, "loss": 2.0419, "step": 23994 }, { "epoch": 0.6439190639759553, "grad_norm": 0.283203125, "learning_rate": 0.0010287655249685283, "loss": 2.0661, "step": 23995 }, { "epoch": 0.6439458995276943, "grad_norm": 0.2734375, "learning_rate": 0.0010287458663323478, "loss": 2.0958, "step": 23996 }, { "epoch": 0.6439727350794332, "grad_norm": 0.28125, "learning_rate": 0.001028726206755625, "loss": 2.1577, "step": 23997 }, { "epoch": 0.6439995706311722, "grad_norm": 0.2734375, "learning_rate": 0.0010287065462384031, "loss": 2.0334, "step": 23998 }, { "epoch": 0.6440264061829111, "grad_norm": 0.275390625, "learning_rate": 0.001028686884780725, "loss": 2.0269, "step": 23999 }, { "epoch": 0.6440532417346501, "grad_norm": 0.27734375, "learning_rate": 0.0010286672223826343, "loss": 2.1395, "step": 24000 }, { "epoch": 0.644080077286389, "grad_norm": 0.283203125, "learning_rate": 0.0010286475590441735, "loss": 2.1086, "step": 24001 }, { "epoch": 0.6441069128381279, "grad_norm": 0.2734375, "learning_rate": 0.0010286278947653862, "loss": 2.1441, "step": 24002 }, { "epoch": 0.6441337483898669, "grad_norm": 0.2734375, "learning_rate": 0.0010286082295463154, "loss": 2.021, "step": 24003 }, { "epoch": 0.6441605839416058, "grad_norm": 0.283203125, "learning_rate": 0.0010285885633870041, "loss": 2.1569, "step": 24004 }, { "epoch": 0.6441874194933448, "grad_norm": 0.28125, "learning_rate": 0.001028568896287496, "loss": 2.0684, "step": 24005 }, { "epoch": 0.6442142550450837, "grad_norm": 0.271484375, "learning_rate": 0.0010285492282478333, "loss": 1.9995, "step": 24006 }, { "epoch": 0.6442410905968227, "grad_norm": 0.271484375, "learning_rate": 0.0010285295592680598, "loss": 1.998, "step": 24007 }, { "epoch": 0.6442679261485617, "grad_norm": 0.27734375, "learning_rate": 0.0010285098893482187, "loss": 2.1076, "step": 24008 }, { "epoch": 0.6442947617003005, "grad_norm": 0.283203125, "learning_rate": 0.001028490218488353, "loss": 2.0212, "step": 24009 }, { "epoch": 0.6443215972520395, "grad_norm": 0.28125, "learning_rate": 0.0010284705466885055, "loss": 2.056, "step": 24010 }, { "epoch": 0.6443484328037784, "grad_norm": 0.2734375, "learning_rate": 0.00102845087394872, "loss": 1.9422, "step": 24011 }, { "epoch": 0.6443752683555174, "grad_norm": 0.26953125, "learning_rate": 0.0010284312002690392, "loss": 2.0487, "step": 24012 }, { "epoch": 0.6444021039072564, "grad_norm": 0.275390625, "learning_rate": 0.0010284115256495065, "loss": 1.9856, "step": 24013 }, { "epoch": 0.6444289394589953, "grad_norm": 0.2734375, "learning_rate": 0.001028391850090165, "loss": 1.9668, "step": 24014 }, { "epoch": 0.6444557750107343, "grad_norm": 0.275390625, "learning_rate": 0.0010283721735910576, "loss": 2.0, "step": 24015 }, { "epoch": 0.6444826105624731, "grad_norm": 0.27734375, "learning_rate": 0.0010283524961522278, "loss": 2.0941, "step": 24016 }, { "epoch": 0.6445094461142121, "grad_norm": 0.27734375, "learning_rate": 0.0010283328177737186, "loss": 2.0104, "step": 24017 }, { "epoch": 0.644536281665951, "grad_norm": 0.27734375, "learning_rate": 0.0010283131384555733, "loss": 2.0005, "step": 24018 }, { "epoch": 0.64456311721769, "grad_norm": 0.287109375, "learning_rate": 0.001028293458197835, "loss": 2.079, "step": 24019 }, { "epoch": 0.644589952769429, "grad_norm": 0.30859375, "learning_rate": 0.0010282737770005465, "loss": 2.3102, "step": 24020 }, { "epoch": 0.6446167883211679, "grad_norm": 0.3125, "learning_rate": 0.0010282540948637518, "loss": 2.2423, "step": 24021 }, { "epoch": 0.6446436238729069, "grad_norm": 0.314453125, "learning_rate": 0.0010282344117874933, "loss": 2.1467, "step": 24022 }, { "epoch": 0.6446704594246457, "grad_norm": 0.3046875, "learning_rate": 0.0010282147277718148, "loss": 2.22, "step": 24023 }, { "epoch": 0.6446972949763847, "grad_norm": 0.28125, "learning_rate": 0.0010281950428167588, "loss": 2.1574, "step": 24024 }, { "epoch": 0.6447241305281236, "grad_norm": 0.30078125, "learning_rate": 0.0010281753569223691, "loss": 2.1758, "step": 24025 }, { "epoch": 0.6447509660798626, "grad_norm": 0.283203125, "learning_rate": 0.0010281556700886885, "loss": 2.1837, "step": 24026 }, { "epoch": 0.6447778016316016, "grad_norm": 0.279296875, "learning_rate": 0.0010281359823157605, "loss": 2.1225, "step": 24027 }, { "epoch": 0.6448046371833405, "grad_norm": 0.283203125, "learning_rate": 0.0010281162936036279, "loss": 2.1547, "step": 24028 }, { "epoch": 0.6448314727350795, "grad_norm": 0.29296875, "learning_rate": 0.0010280966039523344, "loss": 2.2193, "step": 24029 }, { "epoch": 0.6448583082868183, "grad_norm": 0.2734375, "learning_rate": 0.0010280769133619227, "loss": 2.0562, "step": 24030 }, { "epoch": 0.6448851438385573, "grad_norm": 0.28515625, "learning_rate": 0.001028057221832436, "loss": 2.1885, "step": 24031 }, { "epoch": 0.6449119793902963, "grad_norm": 0.26953125, "learning_rate": 0.001028037529363918, "loss": 2.1433, "step": 24032 }, { "epoch": 0.6449388149420352, "grad_norm": 0.275390625, "learning_rate": 0.0010280178359564115, "loss": 2.087, "step": 24033 }, { "epoch": 0.6449656504937742, "grad_norm": 0.28125, "learning_rate": 0.0010279981416099599, "loss": 2.0958, "step": 24034 }, { "epoch": 0.6449924860455131, "grad_norm": 0.271484375, "learning_rate": 0.001027978446324606, "loss": 2.1347, "step": 24035 }, { "epoch": 0.6450193215972521, "grad_norm": 0.275390625, "learning_rate": 0.0010279587501003935, "loss": 2.0439, "step": 24036 }, { "epoch": 0.6450461571489909, "grad_norm": 0.267578125, "learning_rate": 0.0010279390529373655, "loss": 2.1446, "step": 24037 }, { "epoch": 0.6450729927007299, "grad_norm": 0.2734375, "learning_rate": 0.001027919354835565, "loss": 2.203, "step": 24038 }, { "epoch": 0.6450998282524689, "grad_norm": 0.279296875, "learning_rate": 0.0010278996557950353, "loss": 2.2266, "step": 24039 }, { "epoch": 0.6451266638042078, "grad_norm": 0.271484375, "learning_rate": 0.0010278799558158197, "loss": 2.1115, "step": 24040 }, { "epoch": 0.6451534993559468, "grad_norm": 0.265625, "learning_rate": 0.0010278602548979615, "loss": 2.0197, "step": 24041 }, { "epoch": 0.6451803349076857, "grad_norm": 0.2734375, "learning_rate": 0.0010278405530415036, "loss": 2.0884, "step": 24042 }, { "epoch": 0.6452071704594247, "grad_norm": 0.271484375, "learning_rate": 0.0010278208502464894, "loss": 2.1435, "step": 24043 }, { "epoch": 0.6452340060111635, "grad_norm": 0.26953125, "learning_rate": 0.0010278011465129622, "loss": 2.1388, "step": 24044 }, { "epoch": 0.6452608415629025, "grad_norm": 0.28125, "learning_rate": 0.001027781441840965, "loss": 2.158, "step": 24045 }, { "epoch": 0.6452876771146415, "grad_norm": 0.279296875, "learning_rate": 0.0010277617362305412, "loss": 2.0692, "step": 24046 }, { "epoch": 0.6453145126663804, "grad_norm": 0.28125, "learning_rate": 0.0010277420296817342, "loss": 2.1542, "step": 24047 }, { "epoch": 0.6453413482181194, "grad_norm": 0.287109375, "learning_rate": 0.0010277223221945868, "loss": 2.1509, "step": 24048 }, { "epoch": 0.6453681837698583, "grad_norm": 0.271484375, "learning_rate": 0.0010277026137691426, "loss": 1.9592, "step": 24049 }, { "epoch": 0.6453950193215973, "grad_norm": 0.28515625, "learning_rate": 0.0010276829044054447, "loss": 2.1004, "step": 24050 }, { "epoch": 0.6454218548733361, "grad_norm": 0.279296875, "learning_rate": 0.0010276631941035363, "loss": 2.1955, "step": 24051 }, { "epoch": 0.6454486904250751, "grad_norm": 0.275390625, "learning_rate": 0.0010276434828634608, "loss": 2.0323, "step": 24052 }, { "epoch": 0.6454755259768141, "grad_norm": 0.26953125, "learning_rate": 0.0010276237706852611, "loss": 2.1203, "step": 24053 }, { "epoch": 0.645502361528553, "grad_norm": 0.283203125, "learning_rate": 0.0010276040575689807, "loss": 2.1569, "step": 24054 }, { "epoch": 0.645529197080292, "grad_norm": 0.275390625, "learning_rate": 0.0010275843435146627, "loss": 2.0146, "step": 24055 }, { "epoch": 0.6455560326320309, "grad_norm": 0.28515625, "learning_rate": 0.0010275646285223506, "loss": 2.0516, "step": 24056 }, { "epoch": 0.6455828681837699, "grad_norm": 0.291015625, "learning_rate": 0.0010275449125920876, "loss": 2.0653, "step": 24057 }, { "epoch": 0.6456097037355089, "grad_norm": 0.28515625, "learning_rate": 0.0010275251957239167, "loss": 2.0629, "step": 24058 }, { "epoch": 0.6456365392872477, "grad_norm": 0.287109375, "learning_rate": 0.0010275054779178812, "loss": 2.1129, "step": 24059 }, { "epoch": 0.6456633748389867, "grad_norm": 0.275390625, "learning_rate": 0.0010274857591740247, "loss": 2.0518, "step": 24060 }, { "epoch": 0.6456902103907256, "grad_norm": 0.291015625, "learning_rate": 0.0010274660394923901, "loss": 2.0194, "step": 24061 }, { "epoch": 0.6457170459424646, "grad_norm": 0.275390625, "learning_rate": 0.0010274463188730209, "loss": 1.9978, "step": 24062 }, { "epoch": 0.6457438814942035, "grad_norm": 0.29296875, "learning_rate": 0.00102742659731596, "loss": 2.0793, "step": 24063 }, { "epoch": 0.6457707170459425, "grad_norm": 0.314453125, "learning_rate": 0.001027406874821251, "loss": 2.2185, "step": 24064 }, { "epoch": 0.6457975525976815, "grad_norm": 0.3125, "learning_rate": 0.001027387151388937, "loss": 2.2811, "step": 24065 }, { "epoch": 0.6458243881494203, "grad_norm": 0.298828125, "learning_rate": 0.0010273674270190613, "loss": 2.2542, "step": 24066 }, { "epoch": 0.6458512237011593, "grad_norm": 0.330078125, "learning_rate": 0.0010273477017116676, "loss": 2.3238, "step": 24067 }, { "epoch": 0.6458780592528982, "grad_norm": 0.318359375, "learning_rate": 0.0010273279754667982, "loss": 2.2516, "step": 24068 }, { "epoch": 0.6459048948046372, "grad_norm": 0.291015625, "learning_rate": 0.0010273082482844973, "loss": 2.1689, "step": 24069 }, { "epoch": 0.6459317303563761, "grad_norm": 0.28515625, "learning_rate": 0.0010272885201648076, "loss": 2.1914, "step": 24070 }, { "epoch": 0.6459585659081151, "grad_norm": 0.28125, "learning_rate": 0.0010272687911077728, "loss": 2.2741, "step": 24071 }, { "epoch": 0.6459854014598541, "grad_norm": 0.279296875, "learning_rate": 0.0010272490611134358, "loss": 2.1797, "step": 24072 }, { "epoch": 0.6460122370115929, "grad_norm": 0.2890625, "learning_rate": 0.0010272293301818401, "loss": 2.1407, "step": 24073 }, { "epoch": 0.6460390725633319, "grad_norm": 0.28515625, "learning_rate": 0.0010272095983130292, "loss": 2.19, "step": 24074 }, { "epoch": 0.6460659081150708, "grad_norm": 0.275390625, "learning_rate": 0.0010271898655070458, "loss": 2.2204, "step": 24075 }, { "epoch": 0.6460927436668098, "grad_norm": 0.267578125, "learning_rate": 0.0010271701317639337, "loss": 2.0853, "step": 24076 }, { "epoch": 0.6461195792185487, "grad_norm": 0.2890625, "learning_rate": 0.001027150397083736, "loss": 2.2305, "step": 24077 }, { "epoch": 0.6461464147702877, "grad_norm": 0.283203125, "learning_rate": 0.001027130661466496, "loss": 2.1641, "step": 24078 }, { "epoch": 0.6461732503220267, "grad_norm": 0.26953125, "learning_rate": 0.001027110924912257, "loss": 2.1242, "step": 24079 }, { "epoch": 0.6462000858737655, "grad_norm": 0.27734375, "learning_rate": 0.0010270911874210621, "loss": 2.1092, "step": 24080 }, { "epoch": 0.6462269214255045, "grad_norm": 0.2734375, "learning_rate": 0.0010270714489929549, "loss": 2.1803, "step": 24081 }, { "epoch": 0.6462537569772434, "grad_norm": 0.271484375, "learning_rate": 0.0010270517096279785, "loss": 2.0924, "step": 24082 }, { "epoch": 0.6462805925289824, "grad_norm": 0.279296875, "learning_rate": 0.0010270319693261765, "loss": 2.0739, "step": 24083 }, { "epoch": 0.6463074280807214, "grad_norm": 0.267578125, "learning_rate": 0.0010270122280875919, "loss": 2.1595, "step": 24084 }, { "epoch": 0.6463342636324603, "grad_norm": 0.267578125, "learning_rate": 0.0010269924859122681, "loss": 2.0931, "step": 24085 }, { "epoch": 0.6463610991841993, "grad_norm": 0.279296875, "learning_rate": 0.0010269727428002483, "loss": 2.0941, "step": 24086 }, { "epoch": 0.6463879347359381, "grad_norm": 0.26953125, "learning_rate": 0.001026952998751576, "loss": 2.0436, "step": 24087 }, { "epoch": 0.6464147702876771, "grad_norm": 0.271484375, "learning_rate": 0.0010269332537662945, "loss": 2.0492, "step": 24088 }, { "epoch": 0.646441605839416, "grad_norm": 0.283203125, "learning_rate": 0.001026913507844447, "loss": 2.1332, "step": 24089 }, { "epoch": 0.646468441391155, "grad_norm": 0.27734375, "learning_rate": 0.0010268937609860767, "loss": 2.1926, "step": 24090 }, { "epoch": 0.646495276942894, "grad_norm": 0.28515625, "learning_rate": 0.0010268740131912273, "loss": 2.1319, "step": 24091 }, { "epoch": 0.6465221124946329, "grad_norm": 0.2734375, "learning_rate": 0.001026854264459942, "loss": 2.0737, "step": 24092 }, { "epoch": 0.6465489480463719, "grad_norm": 0.27734375, "learning_rate": 0.0010268345147922638, "loss": 2.0282, "step": 24093 }, { "epoch": 0.6465757835981107, "grad_norm": 0.2734375, "learning_rate": 0.0010268147641882364, "loss": 2.1132, "step": 24094 }, { "epoch": 0.6466026191498497, "grad_norm": 0.287109375, "learning_rate": 0.0010267950126479027, "loss": 2.1751, "step": 24095 }, { "epoch": 0.6466294547015886, "grad_norm": 0.283203125, "learning_rate": 0.0010267752601713068, "loss": 2.1251, "step": 24096 }, { "epoch": 0.6466562902533276, "grad_norm": 0.271484375, "learning_rate": 0.0010267555067584911, "loss": 2.0758, "step": 24097 }, { "epoch": 0.6466831258050666, "grad_norm": 0.267578125, "learning_rate": 0.0010267357524094995, "loss": 2.0724, "step": 24098 }, { "epoch": 0.6467099613568055, "grad_norm": 0.279296875, "learning_rate": 0.0010267159971243753, "loss": 1.9774, "step": 24099 }, { "epoch": 0.6467367969085445, "grad_norm": 0.26953125, "learning_rate": 0.0010266962409031617, "loss": 2.0408, "step": 24100 }, { "epoch": 0.6467636324602833, "grad_norm": 0.28515625, "learning_rate": 0.0010266764837459019, "loss": 2.1631, "step": 24101 }, { "epoch": 0.6467904680120223, "grad_norm": 0.283203125, "learning_rate": 0.0010266567256526395, "loss": 2.1871, "step": 24102 }, { "epoch": 0.6468173035637613, "grad_norm": 0.2734375, "learning_rate": 0.0010266369666234179, "loss": 2.0578, "step": 24103 }, { "epoch": 0.6468441391155002, "grad_norm": 0.27734375, "learning_rate": 0.0010266172066582802, "loss": 2.0693, "step": 24104 }, { "epoch": 0.6468709746672392, "grad_norm": 0.283203125, "learning_rate": 0.00102659744575727, "loss": 2.0152, "step": 24105 }, { "epoch": 0.6468978102189781, "grad_norm": 0.306640625, "learning_rate": 0.0010265776839204304, "loss": 2.2256, "step": 24106 }, { "epoch": 0.6469246457707171, "grad_norm": 0.337890625, "learning_rate": 0.001026557921147805, "loss": 2.3026, "step": 24107 }, { "epoch": 0.646951481322456, "grad_norm": 0.3046875, "learning_rate": 0.0010265381574394367, "loss": 2.1875, "step": 24108 }, { "epoch": 0.6469783168741949, "grad_norm": 0.3125, "learning_rate": 0.0010265183927953693, "loss": 2.1754, "step": 24109 }, { "epoch": 0.6470051524259339, "grad_norm": 0.296875, "learning_rate": 0.0010264986272156462, "loss": 2.2959, "step": 24110 }, { "epoch": 0.6470319879776728, "grad_norm": 0.29296875, "learning_rate": 0.0010264788607003107, "loss": 2.2045, "step": 24111 }, { "epoch": 0.6470588235294118, "grad_norm": 0.279296875, "learning_rate": 0.0010264590932494058, "loss": 2.2121, "step": 24112 }, { "epoch": 0.6470856590811507, "grad_norm": 0.283203125, "learning_rate": 0.001026439324862975, "loss": 2.2808, "step": 24113 }, { "epoch": 0.6471124946328897, "grad_norm": 0.279296875, "learning_rate": 0.0010264195555410621, "loss": 2.1427, "step": 24114 }, { "epoch": 0.6471393301846285, "grad_norm": 0.275390625, "learning_rate": 0.0010263997852837097, "loss": 2.2978, "step": 24115 }, { "epoch": 0.6471661657363675, "grad_norm": 0.28515625, "learning_rate": 0.001026380014090962, "loss": 2.1021, "step": 24116 }, { "epoch": 0.6471930012881065, "grad_norm": 0.294921875, "learning_rate": 0.0010263602419628619, "loss": 2.1659, "step": 24117 }, { "epoch": 0.6472198368398454, "grad_norm": 0.287109375, "learning_rate": 0.001026340468899453, "loss": 2.2568, "step": 24118 }, { "epoch": 0.6472466723915844, "grad_norm": 0.28125, "learning_rate": 0.0010263206949007785, "loss": 2.2665, "step": 24119 }, { "epoch": 0.6472735079433233, "grad_norm": 0.271484375, "learning_rate": 0.0010263009199668815, "loss": 2.1242, "step": 24120 }, { "epoch": 0.6473003434950623, "grad_norm": 0.28515625, "learning_rate": 0.0010262811440978062, "loss": 2.1284, "step": 24121 }, { "epoch": 0.6473271790468011, "grad_norm": 0.275390625, "learning_rate": 0.0010262613672935952, "loss": 2.1806, "step": 24122 }, { "epoch": 0.6473540145985401, "grad_norm": 0.283203125, "learning_rate": 0.001026241589554292, "loss": 2.1611, "step": 24123 }, { "epoch": 0.6473808501502791, "grad_norm": 0.263671875, "learning_rate": 0.0010262218108799404, "loss": 2.0074, "step": 24124 }, { "epoch": 0.647407685702018, "grad_norm": 0.283203125, "learning_rate": 0.0010262020312705837, "loss": 2.206, "step": 24125 }, { "epoch": 0.647434521253757, "grad_norm": 0.2734375, "learning_rate": 0.0010261822507262647, "loss": 2.1098, "step": 24126 }, { "epoch": 0.6474613568054959, "grad_norm": 0.271484375, "learning_rate": 0.0010261624692470275, "loss": 2.0852, "step": 24127 }, { "epoch": 0.6474881923572349, "grad_norm": 0.27734375, "learning_rate": 0.001026142686832915, "loss": 2.1383, "step": 24128 }, { "epoch": 0.6475150279089739, "grad_norm": 0.2734375, "learning_rate": 0.0010261229034839712, "loss": 2.109, "step": 24129 }, { "epoch": 0.6475418634607127, "grad_norm": 0.2734375, "learning_rate": 0.0010261031192002389, "loss": 2.1252, "step": 24130 }, { "epoch": 0.6475686990124517, "grad_norm": 0.275390625, "learning_rate": 0.0010260833339817619, "loss": 2.1628, "step": 24131 }, { "epoch": 0.6475955345641906, "grad_norm": 0.2734375, "learning_rate": 0.0010260635478285829, "loss": 2.0915, "step": 24132 }, { "epoch": 0.6476223701159296, "grad_norm": 0.2734375, "learning_rate": 0.0010260437607407464, "loss": 2.1503, "step": 24133 }, { "epoch": 0.6476492056676685, "grad_norm": 0.2734375, "learning_rate": 0.0010260239727182948, "loss": 2.0651, "step": 24134 }, { "epoch": 0.6476760412194075, "grad_norm": 0.28125, "learning_rate": 0.0010260041837612723, "loss": 2.0753, "step": 24135 }, { "epoch": 0.6477028767711465, "grad_norm": 0.279296875, "learning_rate": 0.0010259843938697217, "loss": 2.0758, "step": 24136 }, { "epoch": 0.6477297123228853, "grad_norm": 0.27734375, "learning_rate": 0.0010259646030436867, "loss": 2.1375, "step": 24137 }, { "epoch": 0.6477565478746243, "grad_norm": 0.271484375, "learning_rate": 0.0010259448112832107, "loss": 2.1452, "step": 24138 }, { "epoch": 0.6477833834263632, "grad_norm": 0.2734375, "learning_rate": 0.0010259250185883373, "loss": 2.0968, "step": 24139 }, { "epoch": 0.6478102189781022, "grad_norm": 0.29296875, "learning_rate": 0.0010259052249591096, "loss": 2.1194, "step": 24140 }, { "epoch": 0.6478370545298411, "grad_norm": 0.26953125, "learning_rate": 0.001025885430395571, "loss": 2.0414, "step": 24141 }, { "epoch": 0.6478638900815801, "grad_norm": 0.27734375, "learning_rate": 0.0010258656348977652, "loss": 2.1319, "step": 24142 }, { "epoch": 0.6478907256333191, "grad_norm": 0.275390625, "learning_rate": 0.0010258458384657355, "loss": 2.0803, "step": 24143 }, { "epoch": 0.6479175611850579, "grad_norm": 0.26953125, "learning_rate": 0.0010258260410995251, "loss": 2.0472, "step": 24144 }, { "epoch": 0.6479443967367969, "grad_norm": 0.271484375, "learning_rate": 0.0010258062427991779, "loss": 2.0993, "step": 24145 }, { "epoch": 0.6479712322885358, "grad_norm": 0.267578125, "learning_rate": 0.0010257864435647371, "loss": 2.0283, "step": 24146 }, { "epoch": 0.6479980678402748, "grad_norm": 0.28125, "learning_rate": 0.0010257666433962458, "loss": 2.0564, "step": 24147 }, { "epoch": 0.6480249033920137, "grad_norm": 0.279296875, "learning_rate": 0.0010257468422937481, "loss": 2.0722, "step": 24148 }, { "epoch": 0.6480517389437527, "grad_norm": 0.30859375, "learning_rate": 0.001025727040257287, "loss": 2.3317, "step": 24149 }, { "epoch": 0.6480785744954917, "grad_norm": 0.328125, "learning_rate": 0.0010257072372869059, "loss": 2.2707, "step": 24150 }, { "epoch": 0.6481054100472305, "grad_norm": 0.30078125, "learning_rate": 0.0010256874333826483, "loss": 2.2013, "step": 24151 }, { "epoch": 0.6481322455989695, "grad_norm": 0.30078125, "learning_rate": 0.001025667628544558, "loss": 2.1612, "step": 24152 }, { "epoch": 0.6481590811507084, "grad_norm": 0.310546875, "learning_rate": 0.0010256478227726779, "loss": 2.2631, "step": 24153 }, { "epoch": 0.6481859167024474, "grad_norm": 0.291015625, "learning_rate": 0.0010256280160670518, "loss": 2.1884, "step": 24154 }, { "epoch": 0.6482127522541864, "grad_norm": 0.291015625, "learning_rate": 0.0010256082084277231, "loss": 2.1202, "step": 24155 }, { "epoch": 0.6482395878059253, "grad_norm": 0.283203125, "learning_rate": 0.001025588399854735, "loss": 2.2709, "step": 24156 }, { "epoch": 0.6482664233576643, "grad_norm": 0.26953125, "learning_rate": 0.0010255685903481314, "loss": 2.122, "step": 24157 }, { "epoch": 0.6482932589094031, "grad_norm": 0.271484375, "learning_rate": 0.0010255487799079553, "loss": 2.1418, "step": 24158 }, { "epoch": 0.6483200944611421, "grad_norm": 0.279296875, "learning_rate": 0.0010255289685342506, "loss": 2.0478, "step": 24159 }, { "epoch": 0.648346930012881, "grad_norm": 0.283203125, "learning_rate": 0.0010255091562270606, "loss": 2.2063, "step": 24160 }, { "epoch": 0.64837376556462, "grad_norm": 0.275390625, "learning_rate": 0.0010254893429864284, "loss": 2.1274, "step": 24161 }, { "epoch": 0.648400601116359, "grad_norm": 0.283203125, "learning_rate": 0.001025469528812398, "loss": 2.2138, "step": 24162 }, { "epoch": 0.6484274366680979, "grad_norm": 0.275390625, "learning_rate": 0.0010254497137050125, "loss": 2.0674, "step": 24163 }, { "epoch": 0.6484542722198369, "grad_norm": 0.27734375, "learning_rate": 0.0010254298976643155, "loss": 2.1579, "step": 24164 }, { "epoch": 0.6484811077715757, "grad_norm": 0.287109375, "learning_rate": 0.0010254100806903505, "loss": 2.1814, "step": 24165 }, { "epoch": 0.6485079433233147, "grad_norm": 0.28515625, "learning_rate": 0.001025390262783161, "loss": 2.2359, "step": 24166 }, { "epoch": 0.6485347788750536, "grad_norm": 0.263671875, "learning_rate": 0.0010253704439427904, "loss": 2.0844, "step": 24167 }, { "epoch": 0.6485616144267926, "grad_norm": 0.275390625, "learning_rate": 0.001025350624169282, "loss": 2.2057, "step": 24168 }, { "epoch": 0.6485884499785316, "grad_norm": 0.2734375, "learning_rate": 0.0010253308034626797, "loss": 2.1787, "step": 24169 }, { "epoch": 0.6486152855302705, "grad_norm": 0.26171875, "learning_rate": 0.0010253109818230267, "loss": 2.1096, "step": 24170 }, { "epoch": 0.6486421210820095, "grad_norm": 0.279296875, "learning_rate": 0.0010252911592503664, "loss": 2.2057, "step": 24171 }, { "epoch": 0.6486689566337484, "grad_norm": 0.267578125, "learning_rate": 0.0010252713357447427, "loss": 2.0662, "step": 24172 }, { "epoch": 0.6486957921854873, "grad_norm": 0.2578125, "learning_rate": 0.0010252515113061986, "loss": 2.0014, "step": 24173 }, { "epoch": 0.6487226277372263, "grad_norm": 0.265625, "learning_rate": 0.0010252316859347777, "loss": 2.0611, "step": 24174 }, { "epoch": 0.6487494632889652, "grad_norm": 0.28125, "learning_rate": 0.0010252118596305237, "loss": 2.1924, "step": 24175 }, { "epoch": 0.6487762988407042, "grad_norm": 0.28125, "learning_rate": 0.0010251920323934801, "loss": 2.0378, "step": 24176 }, { "epoch": 0.6488031343924431, "grad_norm": 0.26953125, "learning_rate": 0.00102517220422369, "loss": 2.129, "step": 24177 }, { "epoch": 0.6488299699441821, "grad_norm": 0.271484375, "learning_rate": 0.0010251523751211974, "loss": 2.0577, "step": 24178 }, { "epoch": 0.648856805495921, "grad_norm": 0.279296875, "learning_rate": 0.0010251325450860455, "loss": 2.1421, "step": 24179 }, { "epoch": 0.6488836410476599, "grad_norm": 0.2734375, "learning_rate": 0.0010251127141182779, "loss": 1.9665, "step": 24180 }, { "epoch": 0.6489104765993989, "grad_norm": 0.271484375, "learning_rate": 0.001025092882217938, "loss": 2.1146, "step": 24181 }, { "epoch": 0.6489373121511378, "grad_norm": 0.28125, "learning_rate": 0.0010250730493850694, "loss": 2.1813, "step": 24182 }, { "epoch": 0.6489641477028768, "grad_norm": 0.279296875, "learning_rate": 0.0010250532156197156, "loss": 2.1653, "step": 24183 }, { "epoch": 0.6489909832546157, "grad_norm": 0.26171875, "learning_rate": 0.0010250333809219202, "loss": 1.9472, "step": 24184 }, { "epoch": 0.6490178188063547, "grad_norm": 0.265625, "learning_rate": 0.0010250135452917265, "loss": 2.0488, "step": 24185 }, { "epoch": 0.6490446543580936, "grad_norm": 0.271484375, "learning_rate": 0.001024993708729178, "loss": 2.1012, "step": 24186 }, { "epoch": 0.6490714899098325, "grad_norm": 0.27734375, "learning_rate": 0.0010249738712343186, "loss": 2.0346, "step": 24187 }, { "epoch": 0.6490983254615715, "grad_norm": 0.271484375, "learning_rate": 0.0010249540328071914, "loss": 2.0526, "step": 24188 }, { "epoch": 0.6491251610133104, "grad_norm": 0.2734375, "learning_rate": 0.0010249341934478403, "loss": 2.0615, "step": 24189 }, { "epoch": 0.6491519965650494, "grad_norm": 0.265625, "learning_rate": 0.0010249143531563083, "loss": 2.075, "step": 24190 }, { "epoch": 0.6491788321167883, "grad_norm": 0.27734375, "learning_rate": 0.0010248945119326392, "loss": 2.0978, "step": 24191 }, { "epoch": 0.6492056676685273, "grad_norm": 0.361328125, "learning_rate": 0.0010248746697768768, "loss": 2.3418, "step": 24192 }, { "epoch": 0.6492325032202662, "grad_norm": 0.3046875, "learning_rate": 0.0010248548266890642, "loss": 2.1275, "step": 24193 }, { "epoch": 0.6492593387720051, "grad_norm": 0.296875, "learning_rate": 0.0010248349826692453, "loss": 2.2352, "step": 24194 }, { "epoch": 0.6492861743237441, "grad_norm": 0.291015625, "learning_rate": 0.0010248151377174632, "loss": 2.2387, "step": 24195 }, { "epoch": 0.649313009875483, "grad_norm": 0.283203125, "learning_rate": 0.0010247952918337618, "loss": 2.112, "step": 24196 }, { "epoch": 0.649339845427222, "grad_norm": 0.275390625, "learning_rate": 0.0010247754450181845, "loss": 2.1898, "step": 24197 }, { "epoch": 0.6493666809789609, "grad_norm": 0.267578125, "learning_rate": 0.0010247555972707747, "loss": 2.1077, "step": 24198 }, { "epoch": 0.6493935165306999, "grad_norm": 0.279296875, "learning_rate": 0.0010247357485915762, "loss": 2.1935, "step": 24199 }, { "epoch": 0.6494203520824389, "grad_norm": 0.28125, "learning_rate": 0.0010247158989806326, "loss": 2.2062, "step": 24200 }, { "epoch": 0.6494471876341777, "grad_norm": 0.27734375, "learning_rate": 0.0010246960484379872, "loss": 2.1639, "step": 24201 }, { "epoch": 0.6494740231859167, "grad_norm": 0.28515625, "learning_rate": 0.0010246761969636834, "loss": 2.2495, "step": 24202 }, { "epoch": 0.6495008587376556, "grad_norm": 0.26953125, "learning_rate": 0.0010246563445577652, "loss": 2.1151, "step": 24203 }, { "epoch": 0.6495276942893946, "grad_norm": 0.2734375, "learning_rate": 0.0010246364912202756, "loss": 2.0794, "step": 24204 }, { "epoch": 0.6495545298411335, "grad_norm": 0.2890625, "learning_rate": 0.0010246166369512588, "loss": 2.2548, "step": 24205 }, { "epoch": 0.6495813653928725, "grad_norm": 0.279296875, "learning_rate": 0.0010245967817507577, "loss": 2.1637, "step": 24206 }, { "epoch": 0.6496082009446115, "grad_norm": 0.27734375, "learning_rate": 0.0010245769256188165, "loss": 2.1225, "step": 24207 }, { "epoch": 0.6496350364963503, "grad_norm": 0.271484375, "learning_rate": 0.0010245570685554783, "loss": 2.0478, "step": 24208 }, { "epoch": 0.6496618720480893, "grad_norm": 0.28125, "learning_rate": 0.0010245372105607867, "loss": 2.1833, "step": 24209 }, { "epoch": 0.6496887075998282, "grad_norm": 0.26953125, "learning_rate": 0.0010245173516347855, "loss": 2.1239, "step": 24210 }, { "epoch": 0.6497155431515672, "grad_norm": 0.279296875, "learning_rate": 0.0010244974917775183, "loss": 2.1105, "step": 24211 }, { "epoch": 0.6497423787033061, "grad_norm": 0.283203125, "learning_rate": 0.0010244776309890282, "loss": 2.1531, "step": 24212 }, { "epoch": 0.6497692142550451, "grad_norm": 0.271484375, "learning_rate": 0.001024457769269359, "loss": 2.1822, "step": 24213 }, { "epoch": 0.6497960498067841, "grad_norm": 0.267578125, "learning_rate": 0.0010244379066185545, "loss": 1.995, "step": 24214 }, { "epoch": 0.649822885358523, "grad_norm": 0.265625, "learning_rate": 0.0010244180430366583, "loss": 2.1128, "step": 24215 }, { "epoch": 0.6498497209102619, "grad_norm": 0.26953125, "learning_rate": 0.0010243981785237135, "loss": 2.0855, "step": 24216 }, { "epoch": 0.6498765564620008, "grad_norm": 0.265625, "learning_rate": 0.001024378313079764, "loss": 2.1191, "step": 24217 }, { "epoch": 0.6499033920137398, "grad_norm": 0.275390625, "learning_rate": 0.0010243584467048534, "loss": 2.112, "step": 24218 }, { "epoch": 0.6499302275654787, "grad_norm": 0.2734375, "learning_rate": 0.001024338579399025, "loss": 2.1298, "step": 24219 }, { "epoch": 0.6499570631172177, "grad_norm": 0.28125, "learning_rate": 0.0010243187111623228, "loss": 2.1331, "step": 24220 }, { "epoch": 0.6499838986689567, "grad_norm": 0.28125, "learning_rate": 0.0010242988419947902, "loss": 2.1017, "step": 24221 }, { "epoch": 0.6500107342206956, "grad_norm": 0.26953125, "learning_rate": 0.0010242789718964706, "loss": 2.1776, "step": 24222 }, { "epoch": 0.6500375697724345, "grad_norm": 0.275390625, "learning_rate": 0.001024259100867408, "loss": 2.1164, "step": 24223 }, { "epoch": 0.6500644053241734, "grad_norm": 0.28125, "learning_rate": 0.0010242392289076456, "loss": 2.1534, "step": 24224 }, { "epoch": 0.6500912408759124, "grad_norm": 0.26953125, "learning_rate": 0.0010242193560172272, "loss": 2.0588, "step": 24225 }, { "epoch": 0.6501180764276514, "grad_norm": 0.265625, "learning_rate": 0.0010241994821961963, "loss": 2.0317, "step": 24226 }, { "epoch": 0.6501449119793903, "grad_norm": 0.279296875, "learning_rate": 0.0010241796074445966, "loss": 2.1099, "step": 24227 }, { "epoch": 0.6501717475311293, "grad_norm": 0.279296875, "learning_rate": 0.0010241597317624715, "loss": 2.2198, "step": 24228 }, { "epoch": 0.6501985830828682, "grad_norm": 0.27734375, "learning_rate": 0.0010241398551498648, "loss": 2.0622, "step": 24229 }, { "epoch": 0.6502254186346071, "grad_norm": 0.275390625, "learning_rate": 0.0010241199776068203, "loss": 2.1169, "step": 24230 }, { "epoch": 0.650252254186346, "grad_norm": 0.2734375, "learning_rate": 0.001024100099133381, "loss": 2.0132, "step": 24231 }, { "epoch": 0.650279089738085, "grad_norm": 0.283203125, "learning_rate": 0.001024080219729591, "loss": 2.1453, "step": 24232 }, { "epoch": 0.650305925289824, "grad_norm": 0.328125, "learning_rate": 0.0010240603393954937, "loss": 2.3076, "step": 24233 }, { "epoch": 0.6503327608415629, "grad_norm": 0.306640625, "learning_rate": 0.0010240404581311328, "loss": 2.1938, "step": 24234 }, { "epoch": 0.6503595963933019, "grad_norm": 0.29296875, "learning_rate": 0.001024020575936552, "loss": 2.244, "step": 24235 }, { "epoch": 0.6503864319450408, "grad_norm": 0.30078125, "learning_rate": 0.0010240006928117946, "loss": 2.2011, "step": 24236 }, { "epoch": 0.6504132674967797, "grad_norm": 0.291015625, "learning_rate": 0.0010239808087569044, "loss": 2.0994, "step": 24237 }, { "epoch": 0.6504401030485186, "grad_norm": 0.283203125, "learning_rate": 0.0010239609237719252, "loss": 2.1284, "step": 24238 }, { "epoch": 0.6504669386002576, "grad_norm": 0.283203125, "learning_rate": 0.0010239410378569006, "loss": 2.2141, "step": 24239 }, { "epoch": 0.6504937741519966, "grad_norm": 0.2734375, "learning_rate": 0.001023921151011874, "loss": 2.1776, "step": 24240 }, { "epoch": 0.6505206097037355, "grad_norm": 0.27734375, "learning_rate": 0.001023901263236889, "loss": 2.2091, "step": 24241 }, { "epoch": 0.6505474452554745, "grad_norm": 0.283203125, "learning_rate": 0.0010238813745319893, "loss": 2.2638, "step": 24242 }, { "epoch": 0.6505742808072134, "grad_norm": 0.27734375, "learning_rate": 0.0010238614848972186, "loss": 2.194, "step": 24243 }, { "epoch": 0.6506011163589523, "grad_norm": 0.283203125, "learning_rate": 0.0010238415943326208, "loss": 2.2407, "step": 24244 }, { "epoch": 0.6506279519106913, "grad_norm": 0.2734375, "learning_rate": 0.0010238217028382388, "loss": 2.128, "step": 24245 }, { "epoch": 0.6506547874624302, "grad_norm": 0.2734375, "learning_rate": 0.0010238018104141169, "loss": 2.1179, "step": 24246 }, { "epoch": 0.6506816230141692, "grad_norm": 0.271484375, "learning_rate": 0.0010237819170602984, "loss": 2.1669, "step": 24247 }, { "epoch": 0.6507084585659081, "grad_norm": 0.27734375, "learning_rate": 0.0010237620227768272, "loss": 2.1395, "step": 24248 }, { "epoch": 0.6507352941176471, "grad_norm": 0.267578125, "learning_rate": 0.0010237421275637467, "loss": 2.1144, "step": 24249 }, { "epoch": 0.650762129669386, "grad_norm": 0.279296875, "learning_rate": 0.0010237222314211007, "loss": 2.1802, "step": 24250 }, { "epoch": 0.650788965221125, "grad_norm": 0.279296875, "learning_rate": 0.001023702334348933, "loss": 2.1758, "step": 24251 }, { "epoch": 0.6508158007728639, "grad_norm": 0.271484375, "learning_rate": 0.0010236824363472864, "loss": 2.1554, "step": 24252 }, { "epoch": 0.6508426363246028, "grad_norm": 0.263671875, "learning_rate": 0.0010236625374162058, "loss": 2.0331, "step": 24253 }, { "epoch": 0.6508694718763418, "grad_norm": 0.26953125, "learning_rate": 0.001023642637555734, "loss": 2.1535, "step": 24254 }, { "epoch": 0.6508963074280807, "grad_norm": 0.287109375, "learning_rate": 0.0010236227367659148, "loss": 2.1711, "step": 24255 }, { "epoch": 0.6509231429798197, "grad_norm": 0.271484375, "learning_rate": 0.001023602835046792, "loss": 2.1329, "step": 24256 }, { "epoch": 0.6509499785315586, "grad_norm": 0.287109375, "learning_rate": 0.0010235829323984092, "loss": 2.1629, "step": 24257 }, { "epoch": 0.6509768140832976, "grad_norm": 0.271484375, "learning_rate": 0.0010235630288208102, "loss": 2.1039, "step": 24258 }, { "epoch": 0.6510036496350365, "grad_norm": 0.26171875, "learning_rate": 0.0010235431243140385, "loss": 2.088, "step": 24259 }, { "epoch": 0.6510304851867754, "grad_norm": 0.275390625, "learning_rate": 0.0010235232188781378, "loss": 2.0829, "step": 24260 }, { "epoch": 0.6510573207385144, "grad_norm": 0.267578125, "learning_rate": 0.0010235033125131515, "loss": 2.1463, "step": 24261 }, { "epoch": 0.6510841562902533, "grad_norm": 0.2734375, "learning_rate": 0.0010234834052191238, "loss": 2.1674, "step": 24262 }, { "epoch": 0.6511109918419923, "grad_norm": 0.27734375, "learning_rate": 0.001023463496996098, "loss": 2.1409, "step": 24263 }, { "epoch": 0.6511378273937312, "grad_norm": 0.275390625, "learning_rate": 0.001023443587844118, "loss": 2.1324, "step": 24264 }, { "epoch": 0.6511646629454702, "grad_norm": 0.28125, "learning_rate": 0.0010234236777632271, "loss": 2.0892, "step": 24265 }, { "epoch": 0.6511914984972091, "grad_norm": 0.26953125, "learning_rate": 0.0010234037667534695, "loss": 2.0923, "step": 24266 }, { "epoch": 0.651218334048948, "grad_norm": 0.267578125, "learning_rate": 0.0010233838548148885, "loss": 1.9896, "step": 24267 }, { "epoch": 0.651245169600687, "grad_norm": 0.271484375, "learning_rate": 0.0010233639419475278, "loss": 1.9953, "step": 24268 }, { "epoch": 0.6512720051524259, "grad_norm": 0.28125, "learning_rate": 0.0010233440281514313, "loss": 2.0752, "step": 24269 }, { "epoch": 0.6512988407041649, "grad_norm": 0.2734375, "learning_rate": 0.0010233241134266424, "loss": 2.0639, "step": 24270 }, { "epoch": 0.6513256762559039, "grad_norm": 0.275390625, "learning_rate": 0.001023304197773205, "loss": 2.077, "step": 24271 }, { "epoch": 0.6513525118076428, "grad_norm": 0.283203125, "learning_rate": 0.0010232842811911628, "loss": 2.0481, "step": 24272 }, { "epoch": 0.6513793473593817, "grad_norm": 0.28125, "learning_rate": 0.0010232643636805593, "loss": 2.1753, "step": 24273 }, { "epoch": 0.6514061829111206, "grad_norm": 0.298828125, "learning_rate": 0.0010232444452414385, "loss": 2.2079, "step": 24274 }, { "epoch": 0.6514330184628596, "grad_norm": 0.287109375, "learning_rate": 0.0010232245258738437, "loss": 2.1096, "step": 24275 }, { "epoch": 0.6514598540145985, "grad_norm": 0.291015625, "learning_rate": 0.001023204605577819, "loss": 2.1071, "step": 24276 }, { "epoch": 0.6514866895663375, "grad_norm": 0.283203125, "learning_rate": 0.0010231846843534076, "loss": 2.2282, "step": 24277 }, { "epoch": 0.6515135251180765, "grad_norm": 0.29296875, "learning_rate": 0.001023164762200654, "loss": 2.2109, "step": 24278 }, { "epoch": 0.6515403606698154, "grad_norm": 0.294921875, "learning_rate": 0.0010231448391196008, "loss": 2.1867, "step": 24279 }, { "epoch": 0.6515671962215543, "grad_norm": 0.275390625, "learning_rate": 0.001023124915110293, "loss": 2.2907, "step": 24280 }, { "epoch": 0.6515940317732932, "grad_norm": 0.30859375, "learning_rate": 0.001023104990172773, "loss": 2.2089, "step": 24281 }, { "epoch": 0.6516208673250322, "grad_norm": 0.27734375, "learning_rate": 0.0010230850643070854, "loss": 2.2308, "step": 24282 }, { "epoch": 0.6516477028767711, "grad_norm": 0.2734375, "learning_rate": 0.0010230651375132736, "loss": 2.2315, "step": 24283 }, { "epoch": 0.6516745384285101, "grad_norm": 0.27734375, "learning_rate": 0.0010230452097913813, "loss": 2.1915, "step": 24284 }, { "epoch": 0.6517013739802491, "grad_norm": 0.26953125, "learning_rate": 0.0010230252811414525, "loss": 2.1484, "step": 24285 }, { "epoch": 0.651728209531988, "grad_norm": 0.279296875, "learning_rate": 0.0010230053515635305, "loss": 2.0982, "step": 24286 }, { "epoch": 0.651755045083727, "grad_norm": 0.279296875, "learning_rate": 0.0010229854210576591, "loss": 2.1904, "step": 24287 }, { "epoch": 0.6517818806354658, "grad_norm": 0.2734375, "learning_rate": 0.0010229654896238823, "loss": 2.1455, "step": 24288 }, { "epoch": 0.6518087161872048, "grad_norm": 0.275390625, "learning_rate": 0.0010229455572622439, "loss": 2.1057, "step": 24289 }, { "epoch": 0.6518355517389438, "grad_norm": 0.291015625, "learning_rate": 0.001022925623972787, "loss": 2.3178, "step": 24290 }, { "epoch": 0.6518623872906827, "grad_norm": 0.267578125, "learning_rate": 0.0010229056897555558, "loss": 2.1712, "step": 24291 }, { "epoch": 0.6518892228424217, "grad_norm": 0.2734375, "learning_rate": 0.001022885754610594, "loss": 2.146, "step": 24292 }, { "epoch": 0.6519160583941606, "grad_norm": 0.2734375, "learning_rate": 0.001022865818537945, "loss": 2.1954, "step": 24293 }, { "epoch": 0.6519428939458995, "grad_norm": 0.2734375, "learning_rate": 0.001022845881537653, "loss": 2.1492, "step": 24294 }, { "epoch": 0.6519697294976384, "grad_norm": 0.265625, "learning_rate": 0.0010228259436097615, "loss": 2.1074, "step": 24295 }, { "epoch": 0.6519965650493774, "grad_norm": 0.265625, "learning_rate": 0.0010228060047543146, "loss": 2.0734, "step": 24296 }, { "epoch": 0.6520234006011164, "grad_norm": 0.28515625, "learning_rate": 0.0010227860649713555, "loss": 2.1576, "step": 24297 }, { "epoch": 0.6520502361528553, "grad_norm": 0.283203125, "learning_rate": 0.001022766124260928, "loss": 2.2014, "step": 24298 }, { "epoch": 0.6520770717045943, "grad_norm": 0.275390625, "learning_rate": 0.001022746182623076, "loss": 2.1327, "step": 24299 }, { "epoch": 0.6521039072563332, "grad_norm": 0.2734375, "learning_rate": 0.0010227262400578435, "loss": 2.0825, "step": 24300 }, { "epoch": 0.6521307428080722, "grad_norm": 0.27734375, "learning_rate": 0.0010227062965652738, "loss": 2.1797, "step": 24301 }, { "epoch": 0.652157578359811, "grad_norm": 0.2734375, "learning_rate": 0.001022686352145411, "loss": 2.0914, "step": 24302 }, { "epoch": 0.65218441391155, "grad_norm": 0.2734375, "learning_rate": 0.0010226664067982986, "loss": 2.0931, "step": 24303 }, { "epoch": 0.652211249463289, "grad_norm": 0.271484375, "learning_rate": 0.0010226464605239804, "loss": 1.9977, "step": 24304 }, { "epoch": 0.6522380850150279, "grad_norm": 0.263671875, "learning_rate": 0.0010226265133225002, "loss": 1.9993, "step": 24305 }, { "epoch": 0.6522649205667669, "grad_norm": 0.275390625, "learning_rate": 0.001022606565193902, "loss": 2.1004, "step": 24306 }, { "epoch": 0.6522917561185058, "grad_norm": 0.275390625, "learning_rate": 0.0010225866161382292, "loss": 2.0833, "step": 24307 }, { "epoch": 0.6523185916702448, "grad_norm": 0.28125, "learning_rate": 0.0010225666661555256, "loss": 2.0974, "step": 24308 }, { "epoch": 0.6523454272219836, "grad_norm": 0.26953125, "learning_rate": 0.001022546715245835, "loss": 2.1342, "step": 24309 }, { "epoch": 0.6523722627737226, "grad_norm": 0.27734375, "learning_rate": 0.0010225267634092015, "loss": 2.0974, "step": 24310 }, { "epoch": 0.6523990983254616, "grad_norm": 0.27734375, "learning_rate": 0.0010225068106456683, "loss": 2.1723, "step": 24311 }, { "epoch": 0.6524259338772005, "grad_norm": 0.265625, "learning_rate": 0.0010224868569552795, "loss": 2.0004, "step": 24312 }, { "epoch": 0.6524527694289395, "grad_norm": 0.2734375, "learning_rate": 0.0010224669023380791, "loss": 2.0635, "step": 24313 }, { "epoch": 0.6524796049806784, "grad_norm": 0.279296875, "learning_rate": 0.0010224469467941103, "loss": 2.0396, "step": 24314 }, { "epoch": 0.6525064405324174, "grad_norm": 0.298828125, "learning_rate": 0.0010224269903234173, "loss": 2.1497, "step": 24315 }, { "epoch": 0.6525332760841563, "grad_norm": 0.294921875, "learning_rate": 0.001022407032926044, "loss": 2.2149, "step": 24316 }, { "epoch": 0.6525601116358952, "grad_norm": 0.28515625, "learning_rate": 0.0010223870746020338, "loss": 2.1472, "step": 24317 }, { "epoch": 0.6525869471876342, "grad_norm": 0.32421875, "learning_rate": 0.0010223671153514304, "loss": 2.3109, "step": 24318 }, { "epoch": 0.6526137827393731, "grad_norm": 0.296875, "learning_rate": 0.001022347155174278, "loss": 2.18, "step": 24319 }, { "epoch": 0.6526406182911121, "grad_norm": 0.28515625, "learning_rate": 0.00102232719407062, "loss": 2.3224, "step": 24320 }, { "epoch": 0.652667453842851, "grad_norm": 0.28125, "learning_rate": 0.0010223072320405006, "loss": 2.1332, "step": 24321 }, { "epoch": 0.65269428939459, "grad_norm": 0.271484375, "learning_rate": 0.0010222872690839634, "loss": 2.0653, "step": 24322 }, { "epoch": 0.652721124946329, "grad_norm": 0.2734375, "learning_rate": 0.001022267305201052, "loss": 2.1958, "step": 24323 }, { "epoch": 0.6527479604980678, "grad_norm": 0.2890625, "learning_rate": 0.0010222473403918104, "loss": 2.2675, "step": 24324 }, { "epoch": 0.6527747960498068, "grad_norm": 0.28125, "learning_rate": 0.0010222273746562827, "loss": 2.1962, "step": 24325 }, { "epoch": 0.6528016316015457, "grad_norm": 0.27734375, "learning_rate": 0.001022207407994512, "loss": 2.2538, "step": 24326 }, { "epoch": 0.6528284671532847, "grad_norm": 0.275390625, "learning_rate": 0.0010221874404065425, "loss": 2.1623, "step": 24327 }, { "epoch": 0.6528553027050236, "grad_norm": 0.271484375, "learning_rate": 0.001022167471892418, "loss": 2.0815, "step": 24328 }, { "epoch": 0.6528821382567626, "grad_norm": 0.275390625, "learning_rate": 0.0010221475024521822, "loss": 2.2223, "step": 24329 }, { "epoch": 0.6529089738085015, "grad_norm": 0.2734375, "learning_rate": 0.001022127532085879, "loss": 2.1449, "step": 24330 }, { "epoch": 0.6529358093602404, "grad_norm": 0.275390625, "learning_rate": 0.0010221075607935524, "loss": 2.1825, "step": 24331 }, { "epoch": 0.6529626449119794, "grad_norm": 0.267578125, "learning_rate": 0.001022087588575246, "loss": 2.0688, "step": 24332 }, { "epoch": 0.6529894804637183, "grad_norm": 0.265625, "learning_rate": 0.0010220676154310032, "loss": 2.0401, "step": 24333 }, { "epoch": 0.6530163160154573, "grad_norm": 0.265625, "learning_rate": 0.0010220476413608686, "loss": 2.0706, "step": 24334 }, { "epoch": 0.6530431515671962, "grad_norm": 0.28515625, "learning_rate": 0.0010220276663648856, "loss": 2.2288, "step": 24335 }, { "epoch": 0.6530699871189352, "grad_norm": 0.271484375, "learning_rate": 0.0010220076904430978, "loss": 2.1304, "step": 24336 }, { "epoch": 0.6530968226706741, "grad_norm": 0.265625, "learning_rate": 0.0010219877135955494, "loss": 2.0742, "step": 24337 }, { "epoch": 0.653123658222413, "grad_norm": 0.267578125, "learning_rate": 0.0010219677358222843, "loss": 2.1408, "step": 24338 }, { "epoch": 0.653150493774152, "grad_norm": 0.275390625, "learning_rate": 0.001021947757123346, "loss": 2.0138, "step": 24339 }, { "epoch": 0.6531773293258909, "grad_norm": 0.275390625, "learning_rate": 0.0010219277774987784, "loss": 2.0814, "step": 24340 }, { "epoch": 0.6532041648776299, "grad_norm": 0.263671875, "learning_rate": 0.0010219077969486254, "loss": 2.1079, "step": 24341 }, { "epoch": 0.6532310004293689, "grad_norm": 0.26953125, "learning_rate": 0.0010218878154729311, "loss": 2.0839, "step": 24342 }, { "epoch": 0.6532578359811078, "grad_norm": 0.28125, "learning_rate": 0.0010218678330717386, "loss": 2.2092, "step": 24343 }, { "epoch": 0.6532846715328468, "grad_norm": 0.26953125, "learning_rate": 0.0010218478497450926, "loss": 2.0086, "step": 24344 }, { "epoch": 0.6533115070845856, "grad_norm": 0.26953125, "learning_rate": 0.0010218278654930363, "loss": 2.0736, "step": 24345 }, { "epoch": 0.6533383426363246, "grad_norm": 0.263671875, "learning_rate": 0.0010218078803156138, "loss": 2.0992, "step": 24346 }, { "epoch": 0.6533651781880635, "grad_norm": 0.267578125, "learning_rate": 0.001021787894212869, "loss": 2.1175, "step": 24347 }, { "epoch": 0.6533920137398025, "grad_norm": 0.26953125, "learning_rate": 0.0010217679071848455, "loss": 2.075, "step": 24348 }, { "epoch": 0.6534188492915415, "grad_norm": 0.2734375, "learning_rate": 0.0010217479192315875, "loss": 2.1009, "step": 24349 }, { "epoch": 0.6534456848432804, "grad_norm": 0.267578125, "learning_rate": 0.0010217279303531383, "loss": 2.1372, "step": 24350 }, { "epoch": 0.6534725203950194, "grad_norm": 0.27734375, "learning_rate": 0.0010217079405495425, "loss": 2.0678, "step": 24351 }, { "epoch": 0.6534993559467582, "grad_norm": 0.271484375, "learning_rate": 0.0010216879498208434, "loss": 2.0577, "step": 24352 }, { "epoch": 0.6535261914984972, "grad_norm": 0.283203125, "learning_rate": 0.0010216679581670847, "loss": 2.0729, "step": 24353 }, { "epoch": 0.6535530270502361, "grad_norm": 0.271484375, "learning_rate": 0.001021647965588311, "loss": 2.0624, "step": 24354 }, { "epoch": 0.6535798626019751, "grad_norm": 0.302734375, "learning_rate": 0.0010216279720845656, "loss": 2.2528, "step": 24355 }, { "epoch": 0.6536066981537141, "grad_norm": 0.298828125, "learning_rate": 0.0010216079776558923, "loss": 2.2393, "step": 24356 }, { "epoch": 0.653633533705453, "grad_norm": 0.2890625, "learning_rate": 0.0010215879823023352, "loss": 2.2418, "step": 24357 }, { "epoch": 0.653660369257192, "grad_norm": 0.287109375, "learning_rate": 0.0010215679860239382, "loss": 2.1674, "step": 24358 }, { "epoch": 0.6536872048089308, "grad_norm": 0.302734375, "learning_rate": 0.001021547988820745, "loss": 2.2257, "step": 24359 }, { "epoch": 0.6537140403606698, "grad_norm": 0.294921875, "learning_rate": 0.0010215279906927997, "loss": 2.2476, "step": 24360 }, { "epoch": 0.6537408759124088, "grad_norm": 0.2734375, "learning_rate": 0.0010215079916401457, "loss": 2.1146, "step": 24361 }, { "epoch": 0.6537677114641477, "grad_norm": 0.271484375, "learning_rate": 0.0010214879916628273, "loss": 2.2434, "step": 24362 }, { "epoch": 0.6537945470158867, "grad_norm": 0.29296875, "learning_rate": 0.0010214679907608881, "loss": 2.2596, "step": 24363 }, { "epoch": 0.6538213825676256, "grad_norm": 0.279296875, "learning_rate": 0.0010214479889343725, "loss": 2.1838, "step": 24364 }, { "epoch": 0.6538482181193646, "grad_norm": 0.27734375, "learning_rate": 0.0010214279861833237, "loss": 2.1352, "step": 24365 }, { "epoch": 0.6538750536711034, "grad_norm": 0.271484375, "learning_rate": 0.001021407982507786, "loss": 2.2359, "step": 24366 }, { "epoch": 0.6539018892228424, "grad_norm": 0.2734375, "learning_rate": 0.0010213879779078032, "loss": 2.19, "step": 24367 }, { "epoch": 0.6539287247745814, "grad_norm": 0.2734375, "learning_rate": 0.001021367972383419, "loss": 2.0796, "step": 24368 }, { "epoch": 0.6539555603263203, "grad_norm": 0.2890625, "learning_rate": 0.0010213479659346777, "loss": 2.2201, "step": 24369 }, { "epoch": 0.6539823958780593, "grad_norm": 0.275390625, "learning_rate": 0.0010213279585616227, "loss": 2.1321, "step": 24370 }, { "epoch": 0.6540092314297982, "grad_norm": 0.275390625, "learning_rate": 0.0010213079502642982, "loss": 2.1988, "step": 24371 }, { "epoch": 0.6540360669815372, "grad_norm": 0.28125, "learning_rate": 0.001021287941042748, "loss": 2.2104, "step": 24372 }, { "epoch": 0.654062902533276, "grad_norm": 0.275390625, "learning_rate": 0.001021267930897016, "loss": 2.178, "step": 24373 }, { "epoch": 0.654089738085015, "grad_norm": 0.27734375, "learning_rate": 0.001021247919827146, "loss": 2.1999, "step": 24374 }, { "epoch": 0.654116573636754, "grad_norm": 0.26953125, "learning_rate": 0.001021227907833182, "loss": 2.1331, "step": 24375 }, { "epoch": 0.6541434091884929, "grad_norm": 0.26953125, "learning_rate": 0.0010212078949151681, "loss": 2.1348, "step": 24376 }, { "epoch": 0.6541702447402319, "grad_norm": 0.267578125, "learning_rate": 0.0010211878810731478, "loss": 2.1255, "step": 24377 }, { "epoch": 0.6541970802919708, "grad_norm": 0.271484375, "learning_rate": 0.0010211678663071652, "loss": 2.0893, "step": 24378 }, { "epoch": 0.6542239158437098, "grad_norm": 0.2890625, "learning_rate": 0.0010211478506172644, "loss": 2.1186, "step": 24379 }, { "epoch": 0.6542507513954486, "grad_norm": 0.279296875, "learning_rate": 0.001021127834003489, "loss": 2.1728, "step": 24380 }, { "epoch": 0.6542775869471876, "grad_norm": 0.263671875, "learning_rate": 0.001021107816465883, "loss": 1.9894, "step": 24381 }, { "epoch": 0.6543044224989266, "grad_norm": 0.2734375, "learning_rate": 0.0010210877980044903, "loss": 2.1329, "step": 24382 }, { "epoch": 0.6543312580506655, "grad_norm": 0.271484375, "learning_rate": 0.001021067778619355, "loss": 2.1562, "step": 24383 }, { "epoch": 0.6543580936024045, "grad_norm": 0.26953125, "learning_rate": 0.001021047758310521, "loss": 2.0686, "step": 24384 }, { "epoch": 0.6543849291541434, "grad_norm": 0.267578125, "learning_rate": 0.001021027737078032, "loss": 2.1352, "step": 24385 }, { "epoch": 0.6544117647058824, "grad_norm": 0.26953125, "learning_rate": 0.0010210077149219318, "loss": 2.0975, "step": 24386 }, { "epoch": 0.6544386002576214, "grad_norm": 0.265625, "learning_rate": 0.0010209876918422648, "loss": 2.0826, "step": 24387 }, { "epoch": 0.6544654358093602, "grad_norm": 0.2734375, "learning_rate": 0.0010209676678390742, "loss": 2.0839, "step": 24388 }, { "epoch": 0.6544922713610992, "grad_norm": 0.27734375, "learning_rate": 0.0010209476429124049, "loss": 2.0864, "step": 24389 }, { "epoch": 0.6545191069128381, "grad_norm": 0.27734375, "learning_rate": 0.0010209276170623, "loss": 2.0769, "step": 24390 }, { "epoch": 0.6545459424645771, "grad_norm": 0.283203125, "learning_rate": 0.0010209075902888039, "loss": 2.1436, "step": 24391 }, { "epoch": 0.654572778016316, "grad_norm": 0.279296875, "learning_rate": 0.0010208875625919603, "loss": 2.0923, "step": 24392 }, { "epoch": 0.654599613568055, "grad_norm": 0.27734375, "learning_rate": 0.0010208675339718133, "loss": 2.097, "step": 24393 }, { "epoch": 0.654626449119794, "grad_norm": 0.26953125, "learning_rate": 0.0010208475044284069, "loss": 2.1384, "step": 24394 }, { "epoch": 0.6546532846715328, "grad_norm": 0.298828125, "learning_rate": 0.0010208274739617845, "loss": 2.1298, "step": 24395 }, { "epoch": 0.6546801202232718, "grad_norm": 0.302734375, "learning_rate": 0.0010208074425719907, "loss": 2.1889, "step": 24396 }, { "epoch": 0.6547069557750107, "grad_norm": 0.291015625, "learning_rate": 0.0010207874102590691, "loss": 2.2094, "step": 24397 }, { "epoch": 0.6547337913267497, "grad_norm": 0.32421875, "learning_rate": 0.0010207673770230637, "loss": 2.1822, "step": 24398 }, { "epoch": 0.6547606268784886, "grad_norm": 0.291015625, "learning_rate": 0.0010207473428640184, "loss": 2.1421, "step": 24399 }, { "epoch": 0.6547874624302276, "grad_norm": 0.294921875, "learning_rate": 0.0010207273077819776, "loss": 2.1515, "step": 24400 }, { "epoch": 0.6548142979819666, "grad_norm": 0.287109375, "learning_rate": 0.0010207072717769844, "loss": 2.2062, "step": 24401 }, { "epoch": 0.6548411335337054, "grad_norm": 0.2890625, "learning_rate": 0.0010206872348490835, "loss": 2.182, "step": 24402 }, { "epoch": 0.6548679690854444, "grad_norm": 0.2734375, "learning_rate": 0.0010206671969983185, "loss": 2.0898, "step": 24403 }, { "epoch": 0.6548948046371833, "grad_norm": 0.275390625, "learning_rate": 0.0010206471582247334, "loss": 2.2488, "step": 24404 }, { "epoch": 0.6549216401889223, "grad_norm": 0.2734375, "learning_rate": 0.001020627118528372, "loss": 2.1565, "step": 24405 }, { "epoch": 0.6549484757406612, "grad_norm": 0.2890625, "learning_rate": 0.0010206070779092788, "loss": 2.1618, "step": 24406 }, { "epoch": 0.6549753112924002, "grad_norm": 0.275390625, "learning_rate": 0.0010205870363674972, "loss": 2.093, "step": 24407 }, { "epoch": 0.6550021468441392, "grad_norm": 0.283203125, "learning_rate": 0.0010205669939030715, "loss": 2.2284, "step": 24408 }, { "epoch": 0.655028982395878, "grad_norm": 0.26953125, "learning_rate": 0.0010205469505160454, "loss": 2.152, "step": 24409 }, { "epoch": 0.655055817947617, "grad_norm": 0.267578125, "learning_rate": 0.0010205269062064633, "loss": 2.1374, "step": 24410 }, { "epoch": 0.6550826534993559, "grad_norm": 0.263671875, "learning_rate": 0.0010205068609743686, "loss": 2.127, "step": 24411 }, { "epoch": 0.6551094890510949, "grad_norm": 0.26953125, "learning_rate": 0.0010204868148198057, "loss": 2.1563, "step": 24412 }, { "epoch": 0.6551363246028339, "grad_norm": 0.275390625, "learning_rate": 0.0010204667677428184, "loss": 2.1787, "step": 24413 }, { "epoch": 0.6551631601545728, "grad_norm": 0.265625, "learning_rate": 0.0010204467197434508, "loss": 2.0666, "step": 24414 }, { "epoch": 0.6551899957063118, "grad_norm": 0.267578125, "learning_rate": 0.0010204266708217465, "loss": 2.1319, "step": 24415 }, { "epoch": 0.6552168312580506, "grad_norm": 0.265625, "learning_rate": 0.00102040662097775, "loss": 2.1183, "step": 24416 }, { "epoch": 0.6552436668097896, "grad_norm": 0.27734375, "learning_rate": 0.001020386570211505, "loss": 2.1392, "step": 24417 }, { "epoch": 0.6552705023615285, "grad_norm": 0.275390625, "learning_rate": 0.0010203665185230556, "loss": 2.0724, "step": 24418 }, { "epoch": 0.6552973379132675, "grad_norm": 0.26953125, "learning_rate": 0.0010203464659124459, "loss": 2.0801, "step": 24419 }, { "epoch": 0.6553241734650065, "grad_norm": 0.26953125, "learning_rate": 0.0010203264123797196, "loss": 2.1135, "step": 24420 }, { "epoch": 0.6553510090167454, "grad_norm": 0.27734375, "learning_rate": 0.0010203063579249205, "loss": 2.1819, "step": 24421 }, { "epoch": 0.6553778445684844, "grad_norm": 0.279296875, "learning_rate": 0.0010202863025480934, "loss": 2.1201, "step": 24422 }, { "epoch": 0.6554046801202232, "grad_norm": 0.26171875, "learning_rate": 0.0010202662462492813, "loss": 2.1382, "step": 24423 }, { "epoch": 0.6554315156719622, "grad_norm": 0.2734375, "learning_rate": 0.0010202461890285292, "loss": 2.1389, "step": 24424 }, { "epoch": 0.6554583512237011, "grad_norm": 0.2734375, "learning_rate": 0.0010202261308858802, "loss": 2.0875, "step": 24425 }, { "epoch": 0.6554851867754401, "grad_norm": 0.26953125, "learning_rate": 0.001020206071821379, "loss": 2.0582, "step": 24426 }, { "epoch": 0.6555120223271791, "grad_norm": 0.26953125, "learning_rate": 0.0010201860118350691, "loss": 2.0733, "step": 24427 }, { "epoch": 0.655538857878918, "grad_norm": 0.271484375, "learning_rate": 0.0010201659509269949, "loss": 2.1338, "step": 24428 }, { "epoch": 0.655565693430657, "grad_norm": 0.271484375, "learning_rate": 0.0010201458890972, "loss": 2.0887, "step": 24429 }, { "epoch": 0.6555925289823958, "grad_norm": 0.28125, "learning_rate": 0.0010201258263457286, "loss": 2.123, "step": 24430 }, { "epoch": 0.6556193645341348, "grad_norm": 0.263671875, "learning_rate": 0.001020105762672625, "loss": 1.988, "step": 24431 }, { "epoch": 0.6556462000858738, "grad_norm": 0.267578125, "learning_rate": 0.001020085698077933, "loss": 1.9876, "step": 24432 }, { "epoch": 0.6556730356376127, "grad_norm": 0.259765625, "learning_rate": 0.001020065632561696, "loss": 2.0625, "step": 24433 }, { "epoch": 0.6556998711893517, "grad_norm": 0.275390625, "learning_rate": 0.0010200455661239591, "loss": 2.0264, "step": 24434 }, { "epoch": 0.6557267067410906, "grad_norm": 0.310546875, "learning_rate": 0.0010200254987647657, "loss": 2.2516, "step": 24435 }, { "epoch": 0.6557535422928296, "grad_norm": 0.328125, "learning_rate": 0.0010200054304841599, "loss": 2.1951, "step": 24436 }, { "epoch": 0.6557803778445684, "grad_norm": 0.3046875, "learning_rate": 0.0010199853612821858, "loss": 2.1723, "step": 24437 }, { "epoch": 0.6558072133963074, "grad_norm": 0.291015625, "learning_rate": 0.0010199652911588874, "loss": 2.1595, "step": 24438 }, { "epoch": 0.6558340489480464, "grad_norm": 0.2890625, "learning_rate": 0.0010199452201143087, "loss": 2.227, "step": 24439 }, { "epoch": 0.6558608844997853, "grad_norm": 0.29296875, "learning_rate": 0.0010199251481484936, "loss": 2.1539, "step": 24440 }, { "epoch": 0.6558877200515243, "grad_norm": 0.26953125, "learning_rate": 0.0010199050752614865, "loss": 2.1171, "step": 24441 }, { "epoch": 0.6559145556032632, "grad_norm": 0.263671875, "learning_rate": 0.001019885001453331, "loss": 2.0499, "step": 24442 }, { "epoch": 0.6559413911550022, "grad_norm": 0.2734375, "learning_rate": 0.0010198649267240713, "loss": 2.1664, "step": 24443 }, { "epoch": 0.655968226706741, "grad_norm": 0.27734375, "learning_rate": 0.0010198448510737516, "loss": 2.1521, "step": 24444 }, { "epoch": 0.65599506225848, "grad_norm": 0.2734375, "learning_rate": 0.0010198247745024159, "loss": 2.0794, "step": 24445 }, { "epoch": 0.656021897810219, "grad_norm": 0.275390625, "learning_rate": 0.001019804697010108, "loss": 2.1842, "step": 24446 }, { "epoch": 0.6560487333619579, "grad_norm": 0.265625, "learning_rate": 0.001019784618596872, "loss": 2.1003, "step": 24447 }, { "epoch": 0.6560755689136969, "grad_norm": 0.28125, "learning_rate": 0.0010197645392627523, "loss": 2.1134, "step": 24448 }, { "epoch": 0.6561024044654358, "grad_norm": 0.27734375, "learning_rate": 0.0010197444590077924, "loss": 2.0484, "step": 24449 }, { "epoch": 0.6561292400171748, "grad_norm": 0.27734375, "learning_rate": 0.001019724377832037, "loss": 2.1401, "step": 24450 }, { "epoch": 0.6561560755689136, "grad_norm": 0.2734375, "learning_rate": 0.0010197042957355298, "loss": 2.1272, "step": 24451 }, { "epoch": 0.6561829111206526, "grad_norm": 0.2734375, "learning_rate": 0.0010196842127183146, "loss": 2.2319, "step": 24452 }, { "epoch": 0.6562097466723916, "grad_norm": 0.275390625, "learning_rate": 0.001019664128780436, "loss": 2.179, "step": 24453 }, { "epoch": 0.6562365822241305, "grad_norm": 0.2890625, "learning_rate": 0.0010196440439219374, "loss": 2.291, "step": 24454 }, { "epoch": 0.6562634177758695, "grad_norm": 0.263671875, "learning_rate": 0.0010196239581428633, "loss": 2.0691, "step": 24455 }, { "epoch": 0.6562902533276084, "grad_norm": 0.271484375, "learning_rate": 0.001019603871443258, "loss": 2.1423, "step": 24456 }, { "epoch": 0.6563170888793474, "grad_norm": 0.275390625, "learning_rate": 0.001019583783823165, "loss": 2.1625, "step": 24457 }, { "epoch": 0.6563439244310864, "grad_norm": 0.28515625, "learning_rate": 0.0010195636952826286, "loss": 2.2517, "step": 24458 }, { "epoch": 0.6563707599828252, "grad_norm": 0.271484375, "learning_rate": 0.001019543605821693, "loss": 2.1071, "step": 24459 }, { "epoch": 0.6563975955345642, "grad_norm": 0.271484375, "learning_rate": 0.0010195235154404022, "loss": 1.987, "step": 24460 }, { "epoch": 0.6564244310863031, "grad_norm": 0.259765625, "learning_rate": 0.0010195034241388002, "loss": 2.0161, "step": 24461 }, { "epoch": 0.6564512666380421, "grad_norm": 0.26953125, "learning_rate": 0.001019483331916931, "loss": 2.0915, "step": 24462 }, { "epoch": 0.656478102189781, "grad_norm": 0.26953125, "learning_rate": 0.0010194632387748388, "loss": 2.0644, "step": 24463 }, { "epoch": 0.65650493774152, "grad_norm": 0.26953125, "learning_rate": 0.0010194431447125678, "loss": 2.0841, "step": 24464 }, { "epoch": 0.656531773293259, "grad_norm": 0.26953125, "learning_rate": 0.0010194230497301618, "loss": 2.1121, "step": 24465 }, { "epoch": 0.6565586088449978, "grad_norm": 0.26953125, "learning_rate": 0.001019402953827665, "loss": 2.1058, "step": 24466 }, { "epoch": 0.6565854443967368, "grad_norm": 0.279296875, "learning_rate": 0.0010193828570051217, "loss": 2.1634, "step": 24467 }, { "epoch": 0.6566122799484757, "grad_norm": 0.267578125, "learning_rate": 0.0010193627592625758, "loss": 2.0749, "step": 24468 }, { "epoch": 0.6566391155002147, "grad_norm": 0.283203125, "learning_rate": 0.001019342660600071, "loss": 2.1078, "step": 24469 }, { "epoch": 0.6566659510519536, "grad_norm": 0.275390625, "learning_rate": 0.0010193225610176523, "loss": 2.1623, "step": 24470 }, { "epoch": 0.6566927866036926, "grad_norm": 0.271484375, "learning_rate": 0.001019302460515363, "loss": 2.0884, "step": 24471 }, { "epoch": 0.6567196221554316, "grad_norm": 0.271484375, "learning_rate": 0.0010192823590932474, "loss": 2.0242, "step": 24472 }, { "epoch": 0.6567464577071704, "grad_norm": 0.265625, "learning_rate": 0.0010192622567513497, "loss": 2.1602, "step": 24473 }, { "epoch": 0.6567732932589094, "grad_norm": 0.2734375, "learning_rate": 0.001019242153489714, "loss": 2.0926, "step": 24474 }, { "epoch": 0.6568001288106483, "grad_norm": 0.318359375, "learning_rate": 0.0010192220493083844, "loss": 2.1248, "step": 24475 }, { "epoch": 0.6568269643623873, "grad_norm": 0.3046875, "learning_rate": 0.001019201944207405, "loss": 2.1849, "step": 24476 }, { "epoch": 0.6568537999141262, "grad_norm": 0.28515625, "learning_rate": 0.0010191818381868195, "loss": 2.1534, "step": 24477 }, { "epoch": 0.6568806354658652, "grad_norm": 0.30078125, "learning_rate": 0.0010191617312466728, "loss": 2.3084, "step": 24478 }, { "epoch": 0.6569074710176042, "grad_norm": 0.287109375, "learning_rate": 0.0010191416233870086, "loss": 2.1048, "step": 24479 }, { "epoch": 0.656934306569343, "grad_norm": 0.271484375, "learning_rate": 0.0010191215146078706, "loss": 2.134, "step": 24480 }, { "epoch": 0.656961142121082, "grad_norm": 0.28125, "learning_rate": 0.0010191014049093035, "loss": 2.2576, "step": 24481 }, { "epoch": 0.6569879776728209, "grad_norm": 0.26953125, "learning_rate": 0.0010190812942913513, "loss": 2.2113, "step": 24482 }, { "epoch": 0.6570148132245599, "grad_norm": 0.279296875, "learning_rate": 0.0010190611827540581, "loss": 2.1996, "step": 24483 }, { "epoch": 0.6570416487762989, "grad_norm": 0.275390625, "learning_rate": 0.0010190410702974678, "loss": 2.0933, "step": 24484 }, { "epoch": 0.6570684843280378, "grad_norm": 0.283203125, "learning_rate": 0.0010190209569216247, "loss": 2.2629, "step": 24485 }, { "epoch": 0.6570953198797768, "grad_norm": 0.271484375, "learning_rate": 0.0010190008426265728, "loss": 2.1573, "step": 24486 }, { "epoch": 0.6571221554315156, "grad_norm": 0.2734375, "learning_rate": 0.0010189807274123565, "loss": 2.0925, "step": 24487 }, { "epoch": 0.6571489909832546, "grad_norm": 0.2734375, "learning_rate": 0.0010189606112790198, "loss": 2.2298, "step": 24488 }, { "epoch": 0.6571758265349935, "grad_norm": 0.2734375, "learning_rate": 0.0010189404942266066, "loss": 2.1855, "step": 24489 }, { "epoch": 0.6572026620867325, "grad_norm": 0.271484375, "learning_rate": 0.0010189203762551613, "loss": 2.1755, "step": 24490 }, { "epoch": 0.6572294976384715, "grad_norm": 0.275390625, "learning_rate": 0.0010189002573647277, "loss": 2.1762, "step": 24491 }, { "epoch": 0.6572563331902104, "grad_norm": 0.26953125, "learning_rate": 0.0010188801375553504, "loss": 2.1777, "step": 24492 }, { "epoch": 0.6572831687419494, "grad_norm": 0.263671875, "learning_rate": 0.0010188600168270735, "loss": 2.1441, "step": 24493 }, { "epoch": 0.6573100042936882, "grad_norm": 0.2578125, "learning_rate": 0.0010188398951799407, "loss": 2.0596, "step": 24494 }, { "epoch": 0.6573368398454272, "grad_norm": 0.267578125, "learning_rate": 0.0010188197726139965, "loss": 2.1436, "step": 24495 }, { "epoch": 0.6573636753971661, "grad_norm": 0.26953125, "learning_rate": 0.001018799649129285, "loss": 2.2036, "step": 24496 }, { "epoch": 0.6573905109489051, "grad_norm": 0.26953125, "learning_rate": 0.00101877952472585, "loss": 2.0264, "step": 24497 }, { "epoch": 0.6574173465006441, "grad_norm": 0.267578125, "learning_rate": 0.0010187593994037362, "loss": 2.136, "step": 24498 }, { "epoch": 0.657444182052383, "grad_norm": 0.279296875, "learning_rate": 0.0010187392731629873, "loss": 2.1971, "step": 24499 }, { "epoch": 0.657471017604122, "grad_norm": 0.29296875, "learning_rate": 0.0010187191460036477, "loss": 2.1601, "step": 24500 }, { "epoch": 0.6574978531558608, "grad_norm": 0.26953125, "learning_rate": 0.0010186990179257616, "loss": 2.1395, "step": 24501 }, { "epoch": 0.6575246887075998, "grad_norm": 0.267578125, "learning_rate": 0.001018678888929373, "loss": 2.1109, "step": 24502 }, { "epoch": 0.6575515242593388, "grad_norm": 0.263671875, "learning_rate": 0.001018658759014526, "loss": 2.052, "step": 24503 }, { "epoch": 0.6575783598110777, "grad_norm": 0.267578125, "learning_rate": 0.0010186386281812647, "loss": 2.061, "step": 24504 }, { "epoch": 0.6576051953628167, "grad_norm": 0.275390625, "learning_rate": 0.0010186184964296337, "loss": 2.1817, "step": 24505 }, { "epoch": 0.6576320309145556, "grad_norm": 0.26953125, "learning_rate": 0.0010185983637596766, "loss": 2.0971, "step": 24506 }, { "epoch": 0.6576588664662946, "grad_norm": 0.267578125, "learning_rate": 0.0010185782301714381, "loss": 2.1049, "step": 24507 }, { "epoch": 0.6576857020180334, "grad_norm": 0.26953125, "learning_rate": 0.001018558095664962, "loss": 2.1229, "step": 24508 }, { "epoch": 0.6577125375697724, "grad_norm": 0.267578125, "learning_rate": 0.0010185379602402928, "loss": 2.149, "step": 24509 }, { "epoch": 0.6577393731215114, "grad_norm": 0.267578125, "learning_rate": 0.0010185178238974744, "loss": 1.9687, "step": 24510 }, { "epoch": 0.6577662086732503, "grad_norm": 0.275390625, "learning_rate": 0.0010184976866365507, "loss": 2.0121, "step": 24511 }, { "epoch": 0.6577930442249893, "grad_norm": 0.2734375, "learning_rate": 0.0010184775484575663, "loss": 2.063, "step": 24512 }, { "epoch": 0.6578198797767282, "grad_norm": 0.28125, "learning_rate": 0.0010184574093605654, "loss": 2.1349, "step": 24513 }, { "epoch": 0.6578467153284672, "grad_norm": 0.279296875, "learning_rate": 0.001018437269345592, "loss": 2.1007, "step": 24514 }, { "epoch": 0.657873550880206, "grad_norm": 0.314453125, "learning_rate": 0.0010184171284126904, "loss": 2.238, "step": 24515 }, { "epoch": 0.657900386431945, "grad_norm": 0.30078125, "learning_rate": 0.0010183969865619046, "loss": 2.1484, "step": 24516 }, { "epoch": 0.657927221983684, "grad_norm": 0.2890625, "learning_rate": 0.001018376843793279, "loss": 2.1109, "step": 24517 }, { "epoch": 0.6579540575354229, "grad_norm": 0.2734375, "learning_rate": 0.0010183567001068577, "loss": 2.1095, "step": 24518 }, { "epoch": 0.6579808930871619, "grad_norm": 0.28515625, "learning_rate": 0.0010183365555026848, "loss": 2.2562, "step": 24519 }, { "epoch": 0.6580077286389008, "grad_norm": 0.28125, "learning_rate": 0.0010183164099808047, "loss": 2.2075, "step": 24520 }, { "epoch": 0.6580345641906398, "grad_norm": 0.27734375, "learning_rate": 0.0010182962635412614, "loss": 2.1957, "step": 24521 }, { "epoch": 0.6580613997423786, "grad_norm": 0.287109375, "learning_rate": 0.001018276116184099, "loss": 2.1337, "step": 24522 }, { "epoch": 0.6580882352941176, "grad_norm": 0.2734375, "learning_rate": 0.001018255967909362, "loss": 2.0868, "step": 24523 }, { "epoch": 0.6581150708458566, "grad_norm": 0.275390625, "learning_rate": 0.0010182358187170947, "loss": 2.2472, "step": 24524 }, { "epoch": 0.6581419063975955, "grad_norm": 0.275390625, "learning_rate": 0.0010182156686073408, "loss": 2.1999, "step": 24525 }, { "epoch": 0.6581687419493345, "grad_norm": 0.283203125, "learning_rate": 0.0010181955175801447, "loss": 2.1589, "step": 24526 }, { "epoch": 0.6581955775010734, "grad_norm": 0.275390625, "learning_rate": 0.0010181753656355508, "loss": 2.2185, "step": 24527 }, { "epoch": 0.6582224130528124, "grad_norm": 0.26171875, "learning_rate": 0.001018155212773603, "loss": 2.1825, "step": 24528 }, { "epoch": 0.6582492486045514, "grad_norm": 0.28125, "learning_rate": 0.0010181350589943462, "loss": 2.1604, "step": 24529 }, { "epoch": 0.6582760841562902, "grad_norm": 0.26171875, "learning_rate": 0.0010181149042978236, "loss": 1.9612, "step": 24530 }, { "epoch": 0.6583029197080292, "grad_norm": 0.265625, "learning_rate": 0.00101809474868408, "loss": 2.0828, "step": 24531 }, { "epoch": 0.6583297552597681, "grad_norm": 0.263671875, "learning_rate": 0.0010180745921531595, "loss": 2.0233, "step": 24532 }, { "epoch": 0.6583565908115071, "grad_norm": 0.265625, "learning_rate": 0.0010180544347051063, "loss": 2.1828, "step": 24533 }, { "epoch": 0.658383426363246, "grad_norm": 0.265625, "learning_rate": 0.001018034276339965, "loss": 2.0924, "step": 24534 }, { "epoch": 0.658410261914985, "grad_norm": 0.26171875, "learning_rate": 0.0010180141170577793, "loss": 2.1112, "step": 24535 }, { "epoch": 0.658437097466724, "grad_norm": 0.26171875, "learning_rate": 0.0010179939568585935, "loss": 2.0353, "step": 24536 }, { "epoch": 0.6584639330184628, "grad_norm": 0.275390625, "learning_rate": 0.0010179737957424519, "loss": 2.1371, "step": 24537 }, { "epoch": 0.6584907685702018, "grad_norm": 0.279296875, "learning_rate": 0.001017953633709399, "loss": 2.1163, "step": 24538 }, { "epoch": 0.6585176041219407, "grad_norm": 0.26953125, "learning_rate": 0.0010179334707594784, "loss": 2.0498, "step": 24539 }, { "epoch": 0.6585444396736797, "grad_norm": 0.267578125, "learning_rate": 0.0010179133068927352, "loss": 1.9952, "step": 24540 }, { "epoch": 0.6585712752254186, "grad_norm": 0.271484375, "learning_rate": 0.0010178931421092128, "loss": 2.099, "step": 24541 }, { "epoch": 0.6585981107771576, "grad_norm": 0.263671875, "learning_rate": 0.001017872976408956, "loss": 2.1468, "step": 24542 }, { "epoch": 0.6586249463288966, "grad_norm": 0.283203125, "learning_rate": 0.0010178528097920087, "loss": 2.1514, "step": 24543 }, { "epoch": 0.6586517818806354, "grad_norm": 0.279296875, "learning_rate": 0.0010178326422584155, "loss": 2.0835, "step": 24544 }, { "epoch": 0.6586786174323744, "grad_norm": 0.26953125, "learning_rate": 0.0010178124738082202, "loss": 2.0332, "step": 24545 }, { "epoch": 0.6587054529841133, "grad_norm": 0.26953125, "learning_rate": 0.0010177923044414673, "loss": 2.0526, "step": 24546 }, { "epoch": 0.6587322885358523, "grad_norm": 0.271484375, "learning_rate": 0.001017772134158201, "loss": 2.0063, "step": 24547 }, { "epoch": 0.6587591240875912, "grad_norm": 0.27734375, "learning_rate": 0.0010177519629584655, "loss": 2.0701, "step": 24548 }, { "epoch": 0.6587859596393302, "grad_norm": 0.265625, "learning_rate": 0.0010177317908423053, "loss": 2.0802, "step": 24549 }, { "epoch": 0.6588127951910692, "grad_norm": 0.26171875, "learning_rate": 0.001017711617809764, "loss": 1.9328, "step": 24550 }, { "epoch": 0.658839630742808, "grad_norm": 0.26953125, "learning_rate": 0.0010176914438608869, "loss": 2.0202, "step": 24551 }, { "epoch": 0.658866466294547, "grad_norm": 0.279296875, "learning_rate": 0.0010176712689957174, "loss": 2.0525, "step": 24552 }, { "epoch": 0.6588933018462859, "grad_norm": 0.265625, "learning_rate": 0.0010176510932142996, "loss": 2.1063, "step": 24553 }, { "epoch": 0.6589201373980249, "grad_norm": 0.291015625, "learning_rate": 0.0010176309165166788, "loss": 2.094, "step": 24554 }, { "epoch": 0.6589469729497639, "grad_norm": 0.30078125, "learning_rate": 0.0010176107389028985, "loss": 2.1498, "step": 24555 }, { "epoch": 0.6589738085015028, "grad_norm": 0.302734375, "learning_rate": 0.0010175905603730028, "loss": 2.2153, "step": 24556 }, { "epoch": 0.6590006440532418, "grad_norm": 0.29296875, "learning_rate": 0.0010175703809270364, "loss": 2.1527, "step": 24557 }, { "epoch": 0.6590274796049806, "grad_norm": 0.28515625, "learning_rate": 0.0010175502005650434, "loss": 2.1586, "step": 24558 }, { "epoch": 0.6590543151567196, "grad_norm": 0.28125, "learning_rate": 0.0010175300192870683, "loss": 2.1713, "step": 24559 }, { "epoch": 0.6590811507084585, "grad_norm": 0.287109375, "learning_rate": 0.0010175098370931552, "loss": 2.1968, "step": 24560 }, { "epoch": 0.6591079862601975, "grad_norm": 0.28515625, "learning_rate": 0.001017489653983348, "loss": 2.1079, "step": 24561 }, { "epoch": 0.6591348218119365, "grad_norm": 0.265625, "learning_rate": 0.0010174694699576917, "loss": 2.0648, "step": 24562 }, { "epoch": 0.6591616573636754, "grad_norm": 0.271484375, "learning_rate": 0.0010174492850162301, "loss": 2.2671, "step": 24563 }, { "epoch": 0.6591884929154144, "grad_norm": 0.279296875, "learning_rate": 0.0010174290991590074, "loss": 2.237, "step": 24564 }, { "epoch": 0.6592153284671532, "grad_norm": 0.2734375, "learning_rate": 0.0010174089123860684, "loss": 2.2045, "step": 24565 }, { "epoch": 0.6592421640188922, "grad_norm": 0.2734375, "learning_rate": 0.0010173887246974567, "loss": 2.1045, "step": 24566 }, { "epoch": 0.6592689995706311, "grad_norm": 0.279296875, "learning_rate": 0.0010173685360932172, "loss": 2.1834, "step": 24567 }, { "epoch": 0.6592958351223701, "grad_norm": 0.255859375, "learning_rate": 0.0010173483465733936, "loss": 2.0609, "step": 24568 }, { "epoch": 0.6593226706741091, "grad_norm": 0.271484375, "learning_rate": 0.001017328156138031, "loss": 2.0619, "step": 24569 }, { "epoch": 0.659349506225848, "grad_norm": 0.26953125, "learning_rate": 0.0010173079647871727, "loss": 2.0764, "step": 24570 }, { "epoch": 0.659376341777587, "grad_norm": 0.263671875, "learning_rate": 0.0010172877725208639, "loss": 2.0666, "step": 24571 }, { "epoch": 0.6594031773293259, "grad_norm": 0.271484375, "learning_rate": 0.0010172675793391481, "loss": 2.0807, "step": 24572 }, { "epoch": 0.6594300128810648, "grad_norm": 0.26953125, "learning_rate": 0.0010172473852420704, "loss": 2.1494, "step": 24573 }, { "epoch": 0.6594568484328038, "grad_norm": 0.259765625, "learning_rate": 0.0010172271902296746, "loss": 2.0749, "step": 24574 }, { "epoch": 0.6594836839845427, "grad_norm": 0.26171875, "learning_rate": 0.0010172069943020048, "loss": 2.045, "step": 24575 }, { "epoch": 0.6595105195362817, "grad_norm": 0.265625, "learning_rate": 0.001017186797459106, "loss": 2.021, "step": 24576 }, { "epoch": 0.6595373550880206, "grad_norm": 0.267578125, "learning_rate": 0.0010171665997010218, "loss": 2.0811, "step": 24577 }, { "epoch": 0.6595641906397596, "grad_norm": 0.275390625, "learning_rate": 0.0010171464010277967, "loss": 2.1277, "step": 24578 }, { "epoch": 0.6595910261914985, "grad_norm": 0.263671875, "learning_rate": 0.0010171262014394757, "loss": 2.0011, "step": 24579 }, { "epoch": 0.6596178617432374, "grad_norm": 0.265625, "learning_rate": 0.0010171060009361022, "loss": 2.0837, "step": 24580 }, { "epoch": 0.6596446972949764, "grad_norm": 0.26953125, "learning_rate": 0.0010170857995177205, "loss": 1.9577, "step": 24581 }, { "epoch": 0.6596715328467153, "grad_norm": 0.26953125, "learning_rate": 0.0010170655971843757, "loss": 1.9809, "step": 24582 }, { "epoch": 0.6596983683984543, "grad_norm": 0.267578125, "learning_rate": 0.0010170453939361117, "loss": 2.0795, "step": 24583 }, { "epoch": 0.6597252039501932, "grad_norm": 0.26171875, "learning_rate": 0.0010170251897729727, "loss": 2.0532, "step": 24584 }, { "epoch": 0.6597520395019322, "grad_norm": 0.2734375, "learning_rate": 0.001017004984695003, "loss": 2.0424, "step": 24585 }, { "epoch": 0.659778875053671, "grad_norm": 0.265625, "learning_rate": 0.001016984778702247, "loss": 2.0779, "step": 24586 }, { "epoch": 0.65980571060541, "grad_norm": 0.26171875, "learning_rate": 0.0010169645717947493, "loss": 2.0538, "step": 24587 }, { "epoch": 0.659832546157149, "grad_norm": 0.263671875, "learning_rate": 0.001016944363972554, "loss": 1.9177, "step": 24588 }, { "epoch": 0.6598593817088879, "grad_norm": 0.259765625, "learning_rate": 0.0010169241552357051, "loss": 2.0881, "step": 24589 }, { "epoch": 0.6598862172606269, "grad_norm": 0.26953125, "learning_rate": 0.0010169039455842478, "loss": 2.0149, "step": 24590 }, { "epoch": 0.6599130528123658, "grad_norm": 0.271484375, "learning_rate": 0.0010168837350182256, "loss": 1.9675, "step": 24591 }, { "epoch": 0.6599398883641048, "grad_norm": 0.26171875, "learning_rate": 0.0010168635235376833, "loss": 2.0408, "step": 24592 }, { "epoch": 0.6599667239158437, "grad_norm": 0.2734375, "learning_rate": 0.0010168433111426647, "loss": 2.071, "step": 24593 }, { "epoch": 0.6599935594675826, "grad_norm": 0.28515625, "learning_rate": 0.0010168230978332147, "loss": 2.121, "step": 24594 }, { "epoch": 0.6600203950193216, "grad_norm": 0.32421875, "learning_rate": 0.0010168028836093775, "loss": 2.1073, "step": 24595 }, { "epoch": 0.6600472305710605, "grad_norm": 0.294921875, "learning_rate": 0.0010167826684711974, "loss": 2.1033, "step": 24596 }, { "epoch": 0.6600740661227995, "grad_norm": 0.28515625, "learning_rate": 0.0010167624524187189, "loss": 2.1165, "step": 24597 }, { "epoch": 0.6601009016745384, "grad_norm": 0.28515625, "learning_rate": 0.001016742235451986, "loss": 2.0814, "step": 24598 }, { "epoch": 0.6601277372262774, "grad_norm": 0.275390625, "learning_rate": 0.0010167220175710432, "loss": 2.068, "step": 24599 }, { "epoch": 0.6601545727780164, "grad_norm": 0.275390625, "learning_rate": 0.0010167017987759348, "loss": 2.164, "step": 24600 }, { "epoch": 0.6601814083297552, "grad_norm": 0.26953125, "learning_rate": 0.0010166815790667056, "loss": 2.0235, "step": 24601 }, { "epoch": 0.6602082438814942, "grad_norm": 0.263671875, "learning_rate": 0.0010166613584433994, "loss": 2.0921, "step": 24602 }, { "epoch": 0.6602350794332331, "grad_norm": 0.267578125, "learning_rate": 0.0010166411369060607, "loss": 2.153, "step": 24603 }, { "epoch": 0.6602619149849721, "grad_norm": 0.26953125, "learning_rate": 0.0010166209144547341, "loss": 2.1105, "step": 24604 }, { "epoch": 0.660288750536711, "grad_norm": 0.263671875, "learning_rate": 0.0010166006910894637, "loss": 1.9698, "step": 24605 }, { "epoch": 0.66031558608845, "grad_norm": 0.267578125, "learning_rate": 0.001016580466810294, "loss": 2.0681, "step": 24606 }, { "epoch": 0.660342421640189, "grad_norm": 0.259765625, "learning_rate": 0.0010165602416172693, "loss": 2.0518, "step": 24607 }, { "epoch": 0.6603692571919278, "grad_norm": 0.267578125, "learning_rate": 0.0010165400155104337, "loss": 2.0947, "step": 24608 }, { "epoch": 0.6603960927436668, "grad_norm": 0.271484375, "learning_rate": 0.0010165197884898323, "loss": 2.0989, "step": 24609 }, { "epoch": 0.6604229282954057, "grad_norm": 0.2578125, "learning_rate": 0.0010164995605555088, "loss": 2.0527, "step": 24610 }, { "epoch": 0.6604497638471447, "grad_norm": 0.26171875, "learning_rate": 0.0010164793317075079, "loss": 1.9719, "step": 24611 }, { "epoch": 0.6604765993988836, "grad_norm": 0.267578125, "learning_rate": 0.0010164591019458736, "loss": 2.0517, "step": 24612 }, { "epoch": 0.6605034349506226, "grad_norm": 0.2734375, "learning_rate": 0.0010164388712706508, "loss": 2.0147, "step": 24613 }, { "epoch": 0.6605302705023616, "grad_norm": 0.26171875, "learning_rate": 0.0010164186396818835, "loss": 2.0323, "step": 24614 }, { "epoch": 0.6605571060541005, "grad_norm": 0.259765625, "learning_rate": 0.0010163984071796164, "loss": 1.9933, "step": 24615 }, { "epoch": 0.6605839416058394, "grad_norm": 0.26953125, "learning_rate": 0.0010163781737638938, "loss": 2.0545, "step": 24616 }, { "epoch": 0.6606107771575783, "grad_norm": 0.279296875, "learning_rate": 0.0010163579394347597, "loss": 2.0445, "step": 24617 }, { "epoch": 0.6606376127093173, "grad_norm": 0.255859375, "learning_rate": 0.0010163377041922587, "loss": 2.044, "step": 24618 }, { "epoch": 0.6606644482610562, "grad_norm": 0.265625, "learning_rate": 0.0010163174680364354, "loss": 2.1362, "step": 24619 }, { "epoch": 0.6606912838127952, "grad_norm": 0.267578125, "learning_rate": 0.0010162972309673342, "loss": 2.0139, "step": 24620 }, { "epoch": 0.6607181193645342, "grad_norm": 0.271484375, "learning_rate": 0.0010162769929849991, "loss": 2.0803, "step": 24621 }, { "epoch": 0.660744954916273, "grad_norm": 0.255859375, "learning_rate": 0.001016256754089475, "loss": 2.0032, "step": 24622 }, { "epoch": 0.660771790468012, "grad_norm": 0.255859375, "learning_rate": 0.0010162365142808059, "loss": 2.0264, "step": 24623 }, { "epoch": 0.6607986260197509, "grad_norm": 0.265625, "learning_rate": 0.0010162162735590364, "loss": 1.9911, "step": 24624 }, { "epoch": 0.6608254615714899, "grad_norm": 0.26953125, "learning_rate": 0.001016196031924211, "loss": 2.0823, "step": 24625 }, { "epoch": 0.6608522971232289, "grad_norm": 0.259765625, "learning_rate": 0.0010161757893763735, "loss": 2.0016, "step": 24626 }, { "epoch": 0.6608791326749678, "grad_norm": 0.255859375, "learning_rate": 0.0010161555459155692, "loss": 1.9668, "step": 24627 }, { "epoch": 0.6609059682267068, "grad_norm": 0.26953125, "learning_rate": 0.0010161353015418417, "loss": 1.9869, "step": 24628 }, { "epoch": 0.6609328037784457, "grad_norm": 0.271484375, "learning_rate": 0.0010161150562552363, "loss": 1.9981, "step": 24629 }, { "epoch": 0.6609596393301846, "grad_norm": 0.2578125, "learning_rate": 0.0010160948100557965, "loss": 1.9242, "step": 24630 }, { "epoch": 0.6609864748819235, "grad_norm": 0.25390625, "learning_rate": 0.001016074562943567, "loss": 1.8967, "step": 24631 }, { "epoch": 0.6610133104336625, "grad_norm": 0.265625, "learning_rate": 0.0010160543149185925, "loss": 2.0212, "step": 24632 }, { "epoch": 0.6610401459854015, "grad_norm": 0.267578125, "learning_rate": 0.0010160340659809174, "loss": 1.9962, "step": 24633 }, { "epoch": 0.6610669815371404, "grad_norm": 0.283203125, "learning_rate": 0.0010160138161305857, "loss": 2.0972, "step": 24634 }, { "epoch": 0.6610938170888794, "grad_norm": 0.28125, "learning_rate": 0.0010159935653676422, "loss": 2.1848, "step": 24635 }, { "epoch": 0.6611206526406183, "grad_norm": 0.283203125, "learning_rate": 0.0010159733136921312, "loss": 2.1364, "step": 24636 }, { "epoch": 0.6611474881923572, "grad_norm": 0.2734375, "learning_rate": 0.001015953061104097, "loss": 2.1701, "step": 24637 }, { "epoch": 0.6611743237440961, "grad_norm": 0.28515625, "learning_rate": 0.0010159328076035845, "loss": 2.1228, "step": 24638 }, { "epoch": 0.6612011592958351, "grad_norm": 0.2890625, "learning_rate": 0.0010159125531906375, "loss": 2.2363, "step": 24639 }, { "epoch": 0.6612279948475741, "grad_norm": 0.275390625, "learning_rate": 0.0010158922978653008, "loss": 2.1173, "step": 24640 }, { "epoch": 0.661254830399313, "grad_norm": 0.265625, "learning_rate": 0.0010158720416276186, "loss": 2.1148, "step": 24641 }, { "epoch": 0.661281665951052, "grad_norm": 0.255859375, "learning_rate": 0.0010158517844776358, "loss": 2.0619, "step": 24642 }, { "epoch": 0.6613085015027909, "grad_norm": 0.26171875, "learning_rate": 0.0010158315264153963, "loss": 2.0306, "step": 24643 }, { "epoch": 0.6613353370545298, "grad_norm": 0.265625, "learning_rate": 0.0010158112674409448, "loss": 2.0277, "step": 24644 }, { "epoch": 0.6613621726062688, "grad_norm": 0.267578125, "learning_rate": 0.0010157910075543257, "loss": 2.05, "step": 24645 }, { "epoch": 0.6613890081580077, "grad_norm": 0.28125, "learning_rate": 0.0010157707467555836, "loss": 2.1059, "step": 24646 }, { "epoch": 0.6614158437097467, "grad_norm": 0.259765625, "learning_rate": 0.0010157504850447625, "loss": 1.954, "step": 24647 }, { "epoch": 0.6614426792614856, "grad_norm": 0.26953125, "learning_rate": 0.0010157302224219073, "loss": 2.054, "step": 24648 }, { "epoch": 0.6614695148132246, "grad_norm": 0.259765625, "learning_rate": 0.0010157099588870625, "loss": 2.0676, "step": 24649 }, { "epoch": 0.6614963503649635, "grad_norm": 0.275390625, "learning_rate": 0.0010156896944402722, "loss": 2.065, "step": 24650 }, { "epoch": 0.6615231859167024, "grad_norm": 0.26171875, "learning_rate": 0.0010156694290815809, "loss": 2.087, "step": 24651 }, { "epoch": 0.6615500214684414, "grad_norm": 0.259765625, "learning_rate": 0.0010156491628110333, "loss": 2.0088, "step": 24652 }, { "epoch": 0.6615768570201803, "grad_norm": 0.25, "learning_rate": 0.0010156288956286738, "loss": 1.996, "step": 24653 }, { "epoch": 0.6616036925719193, "grad_norm": 0.279296875, "learning_rate": 0.0010156086275345468, "loss": 2.0354, "step": 24654 }, { "epoch": 0.6616305281236582, "grad_norm": 0.26171875, "learning_rate": 0.0010155883585286965, "loss": 2.1038, "step": 24655 }, { "epoch": 0.6616573636753972, "grad_norm": 0.265625, "learning_rate": 0.0010155680886111678, "loss": 2.0501, "step": 24656 }, { "epoch": 0.6616841992271361, "grad_norm": 0.26953125, "learning_rate": 0.0010155478177820048, "loss": 2.029, "step": 24657 }, { "epoch": 0.661711034778875, "grad_norm": 0.26953125, "learning_rate": 0.0010155275460412524, "loss": 2.1705, "step": 24658 }, { "epoch": 0.661737870330614, "grad_norm": 0.263671875, "learning_rate": 0.0010155072733889546, "loss": 2.1014, "step": 24659 }, { "epoch": 0.6617647058823529, "grad_norm": 0.271484375, "learning_rate": 0.0010154869998251564, "loss": 2.1089, "step": 24660 }, { "epoch": 0.6617915414340919, "grad_norm": 0.2734375, "learning_rate": 0.0010154667253499017, "loss": 2.0767, "step": 24661 }, { "epoch": 0.6618183769858308, "grad_norm": 0.263671875, "learning_rate": 0.001015446449963235, "loss": 2.1334, "step": 24662 }, { "epoch": 0.6618452125375698, "grad_norm": 0.26171875, "learning_rate": 0.0010154261736652014, "loss": 2.0312, "step": 24663 }, { "epoch": 0.6618720480893087, "grad_norm": 0.263671875, "learning_rate": 0.001015405896455845, "loss": 1.9349, "step": 24664 }, { "epoch": 0.6618988836410477, "grad_norm": 0.2578125, "learning_rate": 0.00101538561833521, "loss": 1.9446, "step": 24665 }, { "epoch": 0.6619257191927866, "grad_norm": 0.26171875, "learning_rate": 0.0010153653393033413, "loss": 2.0857, "step": 24666 }, { "epoch": 0.6619525547445255, "grad_norm": 0.265625, "learning_rate": 0.0010153450593602831, "loss": 2.0778, "step": 24667 }, { "epoch": 0.6619793902962645, "grad_norm": 0.2490234375, "learning_rate": 0.0010153247785060803, "loss": 1.9087, "step": 24668 }, { "epoch": 0.6620062258480034, "grad_norm": 0.263671875, "learning_rate": 0.001015304496740777, "loss": 1.8989, "step": 24669 }, { "epoch": 0.6620330613997424, "grad_norm": 0.267578125, "learning_rate": 0.0010152842140644179, "loss": 2.0423, "step": 24670 }, { "epoch": 0.6620598969514814, "grad_norm": 0.275390625, "learning_rate": 0.0010152639304770474, "loss": 2.1325, "step": 24671 }, { "epoch": 0.6620867325032203, "grad_norm": 0.267578125, "learning_rate": 0.0010152436459787099, "loss": 2.0071, "step": 24672 }, { "epoch": 0.6621135680549592, "grad_norm": 0.275390625, "learning_rate": 0.0010152233605694502, "loss": 2.0216, "step": 24673 }, { "epoch": 0.6621404036066981, "grad_norm": 0.31640625, "learning_rate": 0.0010152030742493123, "loss": 2.0637, "step": 24674 }, { "epoch": 0.6621672391584371, "grad_norm": 0.28125, "learning_rate": 0.0010151827870183412, "loss": 2.0623, "step": 24675 }, { "epoch": 0.662194074710176, "grad_norm": 0.27734375, "learning_rate": 0.0010151624988765811, "loss": 1.9879, "step": 24676 }, { "epoch": 0.662220910261915, "grad_norm": 0.263671875, "learning_rate": 0.0010151422098240769, "loss": 2.0576, "step": 24677 }, { "epoch": 0.662247745813654, "grad_norm": 0.27734375, "learning_rate": 0.0010151219198608727, "loss": 2.0294, "step": 24678 }, { "epoch": 0.6622745813653929, "grad_norm": 0.265625, "learning_rate": 0.0010151016289870129, "loss": 2.0504, "step": 24679 }, { "epoch": 0.6623014169171318, "grad_norm": 0.265625, "learning_rate": 0.0010150813372025425, "loss": 2.074, "step": 24680 }, { "epoch": 0.6623282524688707, "grad_norm": 0.267578125, "learning_rate": 0.0010150610445075058, "loss": 2.1134, "step": 24681 }, { "epoch": 0.6623550880206097, "grad_norm": 0.26171875, "learning_rate": 0.001015040750901947, "loss": 1.936, "step": 24682 }, { "epoch": 0.6623819235723486, "grad_norm": 0.27734375, "learning_rate": 0.001015020456385911, "loss": 2.0414, "step": 24683 }, { "epoch": 0.6624087591240876, "grad_norm": 0.255859375, "learning_rate": 0.0010150001609594424, "loss": 1.9991, "step": 24684 }, { "epoch": 0.6624355946758266, "grad_norm": 0.259765625, "learning_rate": 0.0010149798646225856, "loss": 1.9575, "step": 24685 }, { "epoch": 0.6624624302275655, "grad_norm": 0.271484375, "learning_rate": 0.001014959567375385, "loss": 2.0355, "step": 24686 }, { "epoch": 0.6624892657793044, "grad_norm": 0.263671875, "learning_rate": 0.0010149392692178848, "loss": 1.9537, "step": 24687 }, { "epoch": 0.6625161013310433, "grad_norm": 0.26171875, "learning_rate": 0.0010149189701501304, "loss": 2.0393, "step": 24688 }, { "epoch": 0.6625429368827823, "grad_norm": 0.26171875, "learning_rate": 0.0010148986701721656, "loss": 1.941, "step": 24689 }, { "epoch": 0.6625697724345213, "grad_norm": 0.265625, "learning_rate": 0.001014878369284035, "loss": 2.0512, "step": 24690 }, { "epoch": 0.6625966079862602, "grad_norm": 0.2578125, "learning_rate": 0.0010148580674857839, "loss": 1.8605, "step": 24691 }, { "epoch": 0.6626234435379992, "grad_norm": 0.251953125, "learning_rate": 0.001014837764777456, "loss": 1.9909, "step": 24692 }, { "epoch": 0.6626502790897381, "grad_norm": 0.275390625, "learning_rate": 0.0010148174611590958, "loss": 2.116, "step": 24693 }, { "epoch": 0.662677114641477, "grad_norm": 0.251953125, "learning_rate": 0.0010147971566307483, "loss": 2.0185, "step": 24694 }, { "epoch": 0.6627039501932159, "grad_norm": 0.259765625, "learning_rate": 0.001014776851192458, "loss": 1.9972, "step": 24695 }, { "epoch": 0.6627307857449549, "grad_norm": 0.25390625, "learning_rate": 0.0010147565448442692, "loss": 1.9202, "step": 24696 }, { "epoch": 0.6627576212966939, "grad_norm": 0.25390625, "learning_rate": 0.0010147362375862265, "loss": 1.9199, "step": 24697 }, { "epoch": 0.6627844568484328, "grad_norm": 0.255859375, "learning_rate": 0.0010147159294183747, "loss": 1.9451, "step": 24698 }, { "epoch": 0.6628112924001718, "grad_norm": 0.26171875, "learning_rate": 0.001014695620340758, "loss": 2.0324, "step": 24699 }, { "epoch": 0.6628381279519107, "grad_norm": 0.263671875, "learning_rate": 0.0010146753103534212, "loss": 2.0511, "step": 24700 }, { "epoch": 0.6628649635036497, "grad_norm": 0.267578125, "learning_rate": 0.0010146549994564087, "loss": 1.9738, "step": 24701 }, { "epoch": 0.6628917990553885, "grad_norm": 0.26171875, "learning_rate": 0.0010146346876497654, "loss": 1.8921, "step": 24702 }, { "epoch": 0.6629186346071275, "grad_norm": 0.265625, "learning_rate": 0.0010146143749335352, "loss": 2.0479, "step": 24703 }, { "epoch": 0.6629454701588665, "grad_norm": 0.255859375, "learning_rate": 0.0010145940613077633, "loss": 1.9022, "step": 24704 }, { "epoch": 0.6629723057106054, "grad_norm": 0.25390625, "learning_rate": 0.001014573746772494, "loss": 1.9415, "step": 24705 }, { "epoch": 0.6629991412623444, "grad_norm": 0.265625, "learning_rate": 0.0010145534313277717, "loss": 1.9881, "step": 24706 }, { "epoch": 0.6630259768140833, "grad_norm": 0.2578125, "learning_rate": 0.0010145331149736413, "loss": 1.8977, "step": 24707 }, { "epoch": 0.6630528123658223, "grad_norm": 0.259765625, "learning_rate": 0.001014512797710147, "loss": 1.9823, "step": 24708 }, { "epoch": 0.6630796479175611, "grad_norm": 0.251953125, "learning_rate": 0.0010144924795373338, "loss": 1.9852, "step": 24709 }, { "epoch": 0.6631064834693001, "grad_norm": 0.2578125, "learning_rate": 0.0010144721604552459, "loss": 1.9968, "step": 24710 }, { "epoch": 0.6631333190210391, "grad_norm": 0.2578125, "learning_rate": 0.001014451840463928, "loss": 1.9424, "step": 24711 }, { "epoch": 0.663160154572778, "grad_norm": 0.25390625, "learning_rate": 0.0010144315195634247, "loss": 1.9639, "step": 24712 }, { "epoch": 0.663186990124517, "grad_norm": 0.2578125, "learning_rate": 0.0010144111977537808, "loss": 1.961, "step": 24713 }, { "epoch": 0.6632138256762559, "grad_norm": 0.3125, "learning_rate": 0.0010143908750350405, "loss": 2.121, "step": 24714 }, { "epoch": 0.6632406612279949, "grad_norm": 0.291015625, "learning_rate": 0.0010143705514072483, "loss": 2.0928, "step": 24715 }, { "epoch": 0.6632674967797338, "grad_norm": 0.2734375, "learning_rate": 0.0010143502268704493, "loss": 2.0336, "step": 24716 }, { "epoch": 0.6632943323314727, "grad_norm": 0.28515625, "learning_rate": 0.0010143299014246877, "loss": 2.1648, "step": 24717 }, { "epoch": 0.6633211678832117, "grad_norm": 0.265625, "learning_rate": 0.0010143095750700082, "loss": 2.0603, "step": 24718 }, { "epoch": 0.6633480034349506, "grad_norm": 0.2734375, "learning_rate": 0.0010142892478064555, "loss": 2.1354, "step": 24719 }, { "epoch": 0.6633748389866896, "grad_norm": 0.28125, "learning_rate": 0.001014268919634074, "loss": 2.0224, "step": 24720 }, { "epoch": 0.6634016745384285, "grad_norm": 0.26171875, "learning_rate": 0.0010142485905529084, "loss": 2.0083, "step": 24721 }, { "epoch": 0.6634285100901675, "grad_norm": 0.267578125, "learning_rate": 0.0010142282605630033, "loss": 2.0839, "step": 24722 }, { "epoch": 0.6634553456419064, "grad_norm": 0.267578125, "learning_rate": 0.001014207929664403, "loss": 2.0321, "step": 24723 }, { "epoch": 0.6634821811936453, "grad_norm": 0.265625, "learning_rate": 0.0010141875978571528, "loss": 1.9902, "step": 24724 }, { "epoch": 0.6635090167453843, "grad_norm": 0.287109375, "learning_rate": 0.0010141672651412964, "loss": 2.0593, "step": 24725 }, { "epoch": 0.6635358522971232, "grad_norm": 0.275390625, "learning_rate": 0.001014146931516879, "loss": 2.1468, "step": 24726 }, { "epoch": 0.6635626878488622, "grad_norm": 0.271484375, "learning_rate": 0.0010141265969839453, "loss": 1.9413, "step": 24727 }, { "epoch": 0.6635895234006011, "grad_norm": 0.2734375, "learning_rate": 0.0010141062615425397, "loss": 2.0693, "step": 24728 }, { "epoch": 0.6636163589523401, "grad_norm": 0.26953125, "learning_rate": 0.0010140859251927066, "loss": 2.0316, "step": 24729 }, { "epoch": 0.663643194504079, "grad_norm": 0.267578125, "learning_rate": 0.0010140655879344908, "loss": 2.164, "step": 24730 }, { "epoch": 0.6636700300558179, "grad_norm": 0.265625, "learning_rate": 0.0010140452497679371, "loss": 2.0519, "step": 24731 }, { "epoch": 0.6636968656075569, "grad_norm": 0.26171875, "learning_rate": 0.0010140249106930898, "loss": 1.949, "step": 24732 }, { "epoch": 0.6637237011592958, "grad_norm": 0.263671875, "learning_rate": 0.0010140045707099936, "loss": 2.039, "step": 24733 }, { "epoch": 0.6637505367110348, "grad_norm": 0.25390625, "learning_rate": 0.0010139842298186933, "loss": 2.0881, "step": 24734 }, { "epoch": 0.6637773722627737, "grad_norm": 0.265625, "learning_rate": 0.0010139638880192333, "loss": 1.8784, "step": 24735 }, { "epoch": 0.6638042078145127, "grad_norm": 0.263671875, "learning_rate": 0.0010139435453116585, "loss": 1.9487, "step": 24736 }, { "epoch": 0.6638310433662516, "grad_norm": 0.267578125, "learning_rate": 0.001013923201696013, "loss": 1.9767, "step": 24737 }, { "epoch": 0.6638578789179905, "grad_norm": 0.275390625, "learning_rate": 0.001013902857172342, "loss": 2.0638, "step": 24738 }, { "epoch": 0.6638847144697295, "grad_norm": 0.265625, "learning_rate": 0.00101388251174069, "loss": 2.0435, "step": 24739 }, { "epoch": 0.6639115500214684, "grad_norm": 0.2578125, "learning_rate": 0.0010138621654011012, "loss": 1.9345, "step": 24740 }, { "epoch": 0.6639383855732074, "grad_norm": 0.255859375, "learning_rate": 0.001013841818153621, "loss": 2.0062, "step": 24741 }, { "epoch": 0.6639652211249464, "grad_norm": 0.2578125, "learning_rate": 0.0010138214699982934, "loss": 1.8653, "step": 24742 }, { "epoch": 0.6639920566766853, "grad_norm": 0.26171875, "learning_rate": 0.001013801120935163, "loss": 1.9531, "step": 24743 }, { "epoch": 0.6640188922284243, "grad_norm": 0.275390625, "learning_rate": 0.001013780770964275, "loss": 2.086, "step": 24744 }, { "epoch": 0.6640457277801631, "grad_norm": 0.26953125, "learning_rate": 0.0010137604200856735, "loss": 1.9741, "step": 24745 }, { "epoch": 0.6640725633319021, "grad_norm": 0.265625, "learning_rate": 0.0010137400682994036, "loss": 2.0619, "step": 24746 }, { "epoch": 0.664099398883641, "grad_norm": 0.271484375, "learning_rate": 0.0010137197156055097, "loss": 2.0245, "step": 24747 }, { "epoch": 0.66412623443538, "grad_norm": 0.263671875, "learning_rate": 0.0010136993620040364, "loss": 1.969, "step": 24748 }, { "epoch": 0.664153069987119, "grad_norm": 0.25390625, "learning_rate": 0.0010136790074950284, "loss": 1.9601, "step": 24749 }, { "epoch": 0.6641799055388579, "grad_norm": 0.283203125, "learning_rate": 0.0010136586520785305, "loss": 1.9741, "step": 24750 }, { "epoch": 0.6642067410905969, "grad_norm": 0.271484375, "learning_rate": 0.001013638295754587, "loss": 1.9536, "step": 24751 }, { "epoch": 0.6642335766423357, "grad_norm": 0.267578125, "learning_rate": 0.0010136179385232427, "loss": 2.0006, "step": 24752 }, { "epoch": 0.6642604121940747, "grad_norm": 0.26171875, "learning_rate": 0.0010135975803845425, "loss": 1.8922, "step": 24753 }, { "epoch": 0.6642872477458136, "grad_norm": 0.279296875, "learning_rate": 0.001013577221338531, "loss": 2.0042, "step": 24754 }, { "epoch": 0.6643140832975526, "grad_norm": 0.302734375, "learning_rate": 0.0010135568613852525, "loss": 2.0337, "step": 24755 }, { "epoch": 0.6643409188492916, "grad_norm": 0.27734375, "learning_rate": 0.001013536500524752, "loss": 2.0684, "step": 24756 }, { "epoch": 0.6643677544010305, "grad_norm": 0.287109375, "learning_rate": 0.001013516138757074, "loss": 2.0287, "step": 24757 }, { "epoch": 0.6643945899527695, "grad_norm": 0.287109375, "learning_rate": 0.0010134957760822634, "loss": 2.0319, "step": 24758 }, { "epoch": 0.6644214255045083, "grad_norm": 0.27734375, "learning_rate": 0.0010134754125003648, "loss": 2.0466, "step": 24759 }, { "epoch": 0.6644482610562473, "grad_norm": 0.265625, "learning_rate": 0.0010134550480114227, "loss": 2.1122, "step": 24760 }, { "epoch": 0.6644750966079863, "grad_norm": 0.26171875, "learning_rate": 0.0010134346826154818, "loss": 1.9564, "step": 24761 }, { "epoch": 0.6645019321597252, "grad_norm": 0.2734375, "learning_rate": 0.001013414316312587, "loss": 1.9834, "step": 24762 }, { "epoch": 0.6645287677114642, "grad_norm": 0.2578125, "learning_rate": 0.0010133939491027825, "loss": 1.9229, "step": 24763 }, { "epoch": 0.6645556032632031, "grad_norm": 0.2578125, "learning_rate": 0.0010133735809861137, "loss": 2.0334, "step": 24764 }, { "epoch": 0.6645824388149421, "grad_norm": 0.255859375, "learning_rate": 0.0010133532119626248, "loss": 1.9276, "step": 24765 }, { "epoch": 0.6646092743666809, "grad_norm": 0.26953125, "learning_rate": 0.0010133328420323603, "loss": 2.1245, "step": 24766 }, { "epoch": 0.6646361099184199, "grad_norm": 0.26171875, "learning_rate": 0.0010133124711953652, "loss": 2.0502, "step": 24767 }, { "epoch": 0.6646629454701589, "grad_norm": 0.2578125, "learning_rate": 0.0010132920994516844, "loss": 2.0639, "step": 24768 }, { "epoch": 0.6646897810218978, "grad_norm": 0.25390625, "learning_rate": 0.001013271726801362, "loss": 2.0396, "step": 24769 }, { "epoch": 0.6647166165736368, "grad_norm": 0.255859375, "learning_rate": 0.0010132513532444432, "loss": 2.0025, "step": 24770 }, { "epoch": 0.6647434521253757, "grad_norm": 0.259765625, "learning_rate": 0.0010132309787809725, "loss": 1.8956, "step": 24771 }, { "epoch": 0.6647702876771147, "grad_norm": 0.251953125, "learning_rate": 0.0010132106034109946, "loss": 2.0598, "step": 24772 }, { "epoch": 0.6647971232288535, "grad_norm": 0.251953125, "learning_rate": 0.0010131902271345542, "loss": 1.8907, "step": 24773 }, { "epoch": 0.6648239587805925, "grad_norm": 0.267578125, "learning_rate": 0.0010131698499516962, "loss": 2.079, "step": 24774 }, { "epoch": 0.6648507943323315, "grad_norm": 0.263671875, "learning_rate": 0.0010131494718624648, "loss": 2.0276, "step": 24775 }, { "epoch": 0.6648776298840704, "grad_norm": 0.259765625, "learning_rate": 0.0010131290928669051, "loss": 2.0363, "step": 24776 }, { "epoch": 0.6649044654358094, "grad_norm": 0.265625, "learning_rate": 0.0010131087129650617, "loss": 2.0437, "step": 24777 }, { "epoch": 0.6649313009875483, "grad_norm": 0.271484375, "learning_rate": 0.0010130883321569793, "loss": 2.0251, "step": 24778 }, { "epoch": 0.6649581365392873, "grad_norm": 0.255859375, "learning_rate": 0.0010130679504427027, "loss": 1.9275, "step": 24779 }, { "epoch": 0.6649849720910261, "grad_norm": 0.25, "learning_rate": 0.0010130475678222766, "loss": 1.8929, "step": 24780 }, { "epoch": 0.6650118076427651, "grad_norm": 0.251953125, "learning_rate": 0.0010130271842957457, "loss": 2.0683, "step": 24781 }, { "epoch": 0.6650386431945041, "grad_norm": 0.26953125, "learning_rate": 0.0010130067998631543, "loss": 1.967, "step": 24782 }, { "epoch": 0.665065478746243, "grad_norm": 0.263671875, "learning_rate": 0.001012986414524548, "loss": 1.9899, "step": 24783 }, { "epoch": 0.665092314297982, "grad_norm": 0.255859375, "learning_rate": 0.0010129660282799707, "loss": 1.9381, "step": 24784 }, { "epoch": 0.6651191498497209, "grad_norm": 0.2578125, "learning_rate": 0.0010129456411294673, "loss": 1.9563, "step": 24785 }, { "epoch": 0.6651459854014599, "grad_norm": 0.26171875, "learning_rate": 0.001012925253073083, "loss": 1.9468, "step": 24786 }, { "epoch": 0.6651728209531989, "grad_norm": 0.263671875, "learning_rate": 0.0010129048641108619, "loss": 2.0168, "step": 24787 }, { "epoch": 0.6651996565049377, "grad_norm": 0.248046875, "learning_rate": 0.0010128844742428493, "loss": 1.8467, "step": 24788 }, { "epoch": 0.6652264920566767, "grad_norm": 0.267578125, "learning_rate": 0.0010128640834690893, "loss": 1.9105, "step": 24789 }, { "epoch": 0.6652533276084156, "grad_norm": 0.263671875, "learning_rate": 0.0010128436917896274, "loss": 2.0612, "step": 24790 }, { "epoch": 0.6652801631601546, "grad_norm": 0.26953125, "learning_rate": 0.0010128232992045075, "loss": 1.9937, "step": 24791 }, { "epoch": 0.6653069987118935, "grad_norm": 0.275390625, "learning_rate": 0.0010128029057137747, "loss": 1.9963, "step": 24792 }, { "epoch": 0.6653338342636325, "grad_norm": 0.259765625, "learning_rate": 0.001012782511317474, "loss": 1.9453, "step": 24793 }, { "epoch": 0.6653606698153715, "grad_norm": 0.26171875, "learning_rate": 0.00101276211601565, "loss": 2.0722, "step": 24794 }, { "epoch": 0.6653875053671103, "grad_norm": 0.2734375, "learning_rate": 0.001012741719808347, "loss": 2.0842, "step": 24795 }, { "epoch": 0.6654143409188493, "grad_norm": 0.265625, "learning_rate": 0.0010127213226956103, "loss": 1.9565, "step": 24796 }, { "epoch": 0.6654411764705882, "grad_norm": 0.251953125, "learning_rate": 0.0010127009246774846, "loss": 1.9369, "step": 24797 }, { "epoch": 0.6654680120223272, "grad_norm": 0.27734375, "learning_rate": 0.0010126805257540143, "loss": 2.1137, "step": 24798 }, { "epoch": 0.6654948475740661, "grad_norm": 0.26171875, "learning_rate": 0.0010126601259252443, "loss": 2.0641, "step": 24799 }, { "epoch": 0.6655216831258051, "grad_norm": 0.271484375, "learning_rate": 0.0010126397251912194, "loss": 2.15, "step": 24800 }, { "epoch": 0.665548518677544, "grad_norm": 0.271484375, "learning_rate": 0.0010126193235519847, "loss": 2.1427, "step": 24801 }, { "epoch": 0.6655753542292829, "grad_norm": 0.259765625, "learning_rate": 0.0010125989210075842, "loss": 1.8925, "step": 24802 }, { "epoch": 0.6656021897810219, "grad_norm": 0.259765625, "learning_rate": 0.0010125785175580631, "loss": 2.0445, "step": 24803 }, { "epoch": 0.6656290253327608, "grad_norm": 0.263671875, "learning_rate": 0.0010125581132034661, "loss": 2.034, "step": 24804 }, { "epoch": 0.6656558608844998, "grad_norm": 0.265625, "learning_rate": 0.0010125377079438383, "loss": 2.0252, "step": 24805 }, { "epoch": 0.6656826964362387, "grad_norm": 0.255859375, "learning_rate": 0.0010125173017792237, "loss": 2.0543, "step": 24806 }, { "epoch": 0.6657095319879777, "grad_norm": 0.259765625, "learning_rate": 0.0010124968947096678, "loss": 2.0488, "step": 24807 }, { "epoch": 0.6657363675397167, "grad_norm": 0.255859375, "learning_rate": 0.0010124764867352149, "loss": 2.0634, "step": 24808 }, { "epoch": 0.6657632030914555, "grad_norm": 0.259765625, "learning_rate": 0.0010124560778559102, "loss": 2.0545, "step": 24809 }, { "epoch": 0.6657900386431945, "grad_norm": 0.271484375, "learning_rate": 0.0010124356680717979, "loss": 2.0102, "step": 24810 }, { "epoch": 0.6658168741949334, "grad_norm": 0.26171875, "learning_rate": 0.0010124152573829232, "loss": 2.0159, "step": 24811 }, { "epoch": 0.6658437097466724, "grad_norm": 0.25390625, "learning_rate": 0.0010123948457893307, "loss": 1.9265, "step": 24812 }, { "epoch": 0.6658705452984114, "grad_norm": 0.2578125, "learning_rate": 0.0010123744332910653, "loss": 2.0312, "step": 24813 }, { "epoch": 0.6658973808501503, "grad_norm": 0.255859375, "learning_rate": 0.0010123540198881717, "loss": 1.9548, "step": 24814 }, { "epoch": 0.6659242164018893, "grad_norm": 0.25, "learning_rate": 0.0010123336055806948, "loss": 1.9657, "step": 24815 }, { "epoch": 0.6659510519536281, "grad_norm": 0.2734375, "learning_rate": 0.001012313190368679, "loss": 1.9515, "step": 24816 }, { "epoch": 0.6659778875053671, "grad_norm": 0.25390625, "learning_rate": 0.0010122927742521697, "loss": 1.9052, "step": 24817 }, { "epoch": 0.666004723057106, "grad_norm": 0.259765625, "learning_rate": 0.0010122723572312112, "loss": 1.9892, "step": 24818 }, { "epoch": 0.666031558608845, "grad_norm": 0.271484375, "learning_rate": 0.0010122519393058484, "loss": 2.0155, "step": 24819 }, { "epoch": 0.666058394160584, "grad_norm": 0.24609375, "learning_rate": 0.0010122315204761263, "loss": 1.9309, "step": 24820 }, { "epoch": 0.6660852297123229, "grad_norm": 0.2734375, "learning_rate": 0.0010122111007420894, "loss": 2.0237, "step": 24821 }, { "epoch": 0.6661120652640619, "grad_norm": 0.267578125, "learning_rate": 0.0010121906801037826, "loss": 1.9615, "step": 24822 }, { "epoch": 0.6661389008158007, "grad_norm": 0.259765625, "learning_rate": 0.0010121702585612507, "loss": 1.9812, "step": 24823 }, { "epoch": 0.6661657363675397, "grad_norm": 0.26953125, "learning_rate": 0.0010121498361145384, "loss": 1.9872, "step": 24824 }, { "epoch": 0.6661925719192786, "grad_norm": 0.27734375, "learning_rate": 0.001012129412763691, "loss": 2.075, "step": 24825 }, { "epoch": 0.6662194074710176, "grad_norm": 0.259765625, "learning_rate": 0.0010121089885087528, "loss": 1.9503, "step": 24826 }, { "epoch": 0.6662462430227566, "grad_norm": 0.26953125, "learning_rate": 0.0010120885633497685, "loss": 2.0181, "step": 24827 }, { "epoch": 0.6662730785744955, "grad_norm": 0.255859375, "learning_rate": 0.0010120681372867834, "loss": 1.9909, "step": 24828 }, { "epoch": 0.6662999141262345, "grad_norm": 0.271484375, "learning_rate": 0.0010120477103198417, "loss": 2.0114, "step": 24829 }, { "epoch": 0.6663267496779733, "grad_norm": 0.267578125, "learning_rate": 0.0010120272824489887, "loss": 2.124, "step": 24830 }, { "epoch": 0.6663535852297123, "grad_norm": 0.2578125, "learning_rate": 0.001012006853674269, "loss": 1.9305, "step": 24831 }, { "epoch": 0.6663804207814513, "grad_norm": 0.26171875, "learning_rate": 0.0010119864239957277, "loss": 1.9858, "step": 24832 }, { "epoch": 0.6664072563331902, "grad_norm": 0.2578125, "learning_rate": 0.0010119659934134093, "loss": 1.9569, "step": 24833 }, { "epoch": 0.6664340918849292, "grad_norm": 0.283203125, "learning_rate": 0.0010119455619273586, "loss": 2.0547, "step": 24834 }, { "epoch": 0.6664609274366681, "grad_norm": 0.279296875, "learning_rate": 0.0010119251295376205, "loss": 1.9901, "step": 24835 }, { "epoch": 0.6664877629884071, "grad_norm": 0.267578125, "learning_rate": 0.00101190469624424, "loss": 1.9336, "step": 24836 }, { "epoch": 0.6665145985401459, "grad_norm": 0.263671875, "learning_rate": 0.001011884262047262, "loss": 1.9153, "step": 24837 }, { "epoch": 0.6665414340918849, "grad_norm": 0.28125, "learning_rate": 0.0010118638269467307, "loss": 1.9722, "step": 24838 }, { "epoch": 0.6665682696436239, "grad_norm": 0.267578125, "learning_rate": 0.0010118433909426915, "loss": 2.0379, "step": 24839 }, { "epoch": 0.6665951051953628, "grad_norm": 0.28125, "learning_rate": 0.001011822954035189, "loss": 2.0222, "step": 24840 }, { "epoch": 0.6666219407471018, "grad_norm": 0.28125, "learning_rate": 0.0010118025162242682, "loss": 2.0653, "step": 24841 }, { "epoch": 0.6666487762988407, "grad_norm": 0.263671875, "learning_rate": 0.0010117820775099738, "loss": 2.0565, "step": 24842 }, { "epoch": 0.6666756118505797, "grad_norm": 0.259765625, "learning_rate": 0.0010117616378923506, "loss": 2.0535, "step": 24843 }, { "epoch": 0.6667024474023185, "grad_norm": 0.26171875, "learning_rate": 0.0010117411973714437, "loss": 1.9528, "step": 24844 }, { "epoch": 0.6667292829540575, "grad_norm": 0.26171875, "learning_rate": 0.0010117207559472976, "loss": 1.9147, "step": 24845 }, { "epoch": 0.6667561185057965, "grad_norm": 0.2578125, "learning_rate": 0.0010117003136199574, "loss": 2.0067, "step": 24846 }, { "epoch": 0.6667829540575354, "grad_norm": 0.255859375, "learning_rate": 0.0010116798703894677, "loss": 1.9111, "step": 24847 }, { "epoch": 0.6668097896092744, "grad_norm": 0.2578125, "learning_rate": 0.0010116594262558735, "loss": 1.8454, "step": 24848 }, { "epoch": 0.6668366251610133, "grad_norm": 0.263671875, "learning_rate": 0.0010116389812192196, "loss": 1.9566, "step": 24849 }, { "epoch": 0.6668634607127523, "grad_norm": 0.24609375, "learning_rate": 0.0010116185352795509, "loss": 1.818, "step": 24850 }, { "epoch": 0.6668902962644911, "grad_norm": 0.2578125, "learning_rate": 0.0010115980884369124, "loss": 1.7391, "step": 24851 }, { "epoch": 0.6669171318162301, "grad_norm": 0.255859375, "learning_rate": 0.0010115776406913485, "loss": 1.923, "step": 24852 }, { "epoch": 0.6669439673679691, "grad_norm": 0.26953125, "learning_rate": 0.0010115571920429045, "loss": 2.1169, "step": 24853 }, { "epoch": 0.666970802919708, "grad_norm": 0.26171875, "learning_rate": 0.0010115367424916252, "loss": 1.9584, "step": 24854 }, { "epoch": 0.666997638471447, "grad_norm": 0.265625, "learning_rate": 0.0010115162920375552, "loss": 2.0127, "step": 24855 }, { "epoch": 0.6670244740231859, "grad_norm": 0.263671875, "learning_rate": 0.0010114958406807397, "loss": 2.0211, "step": 24856 }, { "epoch": 0.6670513095749249, "grad_norm": 0.26171875, "learning_rate": 0.0010114753884212235, "loss": 2.0274, "step": 24857 }, { "epoch": 0.6670781451266639, "grad_norm": 0.244140625, "learning_rate": 0.001011454935259051, "loss": 1.7971, "step": 24858 }, { "epoch": 0.6671049806784027, "grad_norm": 0.236328125, "learning_rate": 0.0010114344811942674, "loss": 1.8439, "step": 24859 }, { "epoch": 0.6671318162301417, "grad_norm": 0.255859375, "learning_rate": 0.001011414026226918, "loss": 1.9367, "step": 24860 }, { "epoch": 0.6671586517818806, "grad_norm": 0.25390625, "learning_rate": 0.001011393570357047, "loss": 1.8477, "step": 24861 }, { "epoch": 0.6671854873336196, "grad_norm": 0.265625, "learning_rate": 0.0010113731135846995, "loss": 2.033, "step": 24862 }, { "epoch": 0.6672123228853585, "grad_norm": 0.251953125, "learning_rate": 0.0010113526559099204, "loss": 1.8644, "step": 24863 }, { "epoch": 0.6672391584370975, "grad_norm": 0.2490234375, "learning_rate": 0.0010113321973327547, "loss": 1.8134, "step": 24864 }, { "epoch": 0.6672659939888365, "grad_norm": 0.26171875, "learning_rate": 0.0010113117378532472, "loss": 2.0094, "step": 24865 }, { "epoch": 0.6672928295405753, "grad_norm": 0.25390625, "learning_rate": 0.0010112912774714428, "loss": 1.8684, "step": 24866 }, { "epoch": 0.6673196650923143, "grad_norm": 0.25390625, "learning_rate": 0.0010112708161873862, "loss": 1.9119, "step": 24867 }, { "epoch": 0.6673465006440532, "grad_norm": 0.26171875, "learning_rate": 0.0010112503540011225, "loss": 1.9532, "step": 24868 }, { "epoch": 0.6673733361957922, "grad_norm": 0.25390625, "learning_rate": 0.0010112298909126965, "loss": 1.8632, "step": 24869 }, { "epoch": 0.6674001717475311, "grad_norm": 0.2578125, "learning_rate": 0.001011209426922153, "loss": 2.0112, "step": 24870 }, { "epoch": 0.6674270072992701, "grad_norm": 0.25, "learning_rate": 0.0010111889620295373, "loss": 1.8863, "step": 24871 }, { "epoch": 0.6674538428510091, "grad_norm": 0.25390625, "learning_rate": 0.0010111684962348935, "loss": 1.8629, "step": 24872 }, { "epoch": 0.6674806784027479, "grad_norm": 0.265625, "learning_rate": 0.0010111480295382674, "loss": 1.9265, "step": 24873 }, { "epoch": 0.6675075139544869, "grad_norm": 0.28515625, "learning_rate": 0.0010111275619397034, "loss": 2.0871, "step": 24874 }, { "epoch": 0.6675343495062258, "grad_norm": 0.28515625, "learning_rate": 0.0010111070934392464, "loss": 2.0856, "step": 24875 }, { "epoch": 0.6675611850579648, "grad_norm": 0.2578125, "learning_rate": 0.0010110866240369415, "loss": 1.9488, "step": 24876 }, { "epoch": 0.6675880206097037, "grad_norm": 0.255859375, "learning_rate": 0.0010110661537328335, "loss": 2.0052, "step": 24877 }, { "epoch": 0.6676148561614427, "grad_norm": 0.26171875, "learning_rate": 0.0010110456825269671, "loss": 2.0102, "step": 24878 }, { "epoch": 0.6676416917131817, "grad_norm": 0.267578125, "learning_rate": 0.0010110252104193875, "loss": 2.1031, "step": 24879 }, { "epoch": 0.6676685272649205, "grad_norm": 0.24609375, "learning_rate": 0.0010110047374101395, "loss": 1.8355, "step": 24880 }, { "epoch": 0.6676953628166595, "grad_norm": 0.271484375, "learning_rate": 0.0010109842634992681, "loss": 2.0259, "step": 24881 }, { "epoch": 0.6677221983683984, "grad_norm": 0.26953125, "learning_rate": 0.001010963788686818, "loss": 2.0436, "step": 24882 }, { "epoch": 0.6677490339201374, "grad_norm": 0.265625, "learning_rate": 0.0010109433129728343, "loss": 1.9919, "step": 24883 }, { "epoch": 0.6677758694718764, "grad_norm": 0.265625, "learning_rate": 0.001010922836357362, "loss": 2.0198, "step": 24884 }, { "epoch": 0.6678027050236153, "grad_norm": 0.259765625, "learning_rate": 0.001010902358840446, "loss": 2.0376, "step": 24885 }, { "epoch": 0.6678295405753543, "grad_norm": 0.2734375, "learning_rate": 0.0010108818804221307, "loss": 2.0788, "step": 24886 }, { "epoch": 0.6678563761270931, "grad_norm": 0.25, "learning_rate": 0.0010108614011024618, "loss": 1.9487, "step": 24887 }, { "epoch": 0.6678832116788321, "grad_norm": 0.26953125, "learning_rate": 0.0010108409208814837, "loss": 2.0598, "step": 24888 }, { "epoch": 0.667910047230571, "grad_norm": 0.255859375, "learning_rate": 0.0010108204397592416, "loss": 1.9631, "step": 24889 }, { "epoch": 0.66793688278231, "grad_norm": 0.265625, "learning_rate": 0.0010107999577357802, "loss": 1.9497, "step": 24890 }, { "epoch": 0.667963718334049, "grad_norm": 0.259765625, "learning_rate": 0.0010107794748111447, "loss": 2.0095, "step": 24891 }, { "epoch": 0.6679905538857879, "grad_norm": 0.26171875, "learning_rate": 0.0010107589909853795, "loss": 2.069, "step": 24892 }, { "epoch": 0.6680173894375269, "grad_norm": 0.271484375, "learning_rate": 0.0010107385062585305, "loss": 2.1597, "step": 24893 }, { "epoch": 0.6680442249892657, "grad_norm": 0.251953125, "learning_rate": 0.0010107180206306416, "loss": 2.0605, "step": 24894 }, { "epoch": 0.6680710605410047, "grad_norm": 0.25, "learning_rate": 0.0010106975341017583, "loss": 1.9893, "step": 24895 }, { "epoch": 0.6680978960927436, "grad_norm": 0.267578125, "learning_rate": 0.0010106770466719256, "loss": 2.1356, "step": 24896 }, { "epoch": 0.6681247316444826, "grad_norm": 0.25, "learning_rate": 0.0010106565583411881, "loss": 1.9387, "step": 24897 }, { "epoch": 0.6681515671962216, "grad_norm": 0.251953125, "learning_rate": 0.001010636069109591, "loss": 2.0931, "step": 24898 }, { "epoch": 0.6681784027479605, "grad_norm": 0.267578125, "learning_rate": 0.0010106155789771792, "loss": 2.0402, "step": 24899 }, { "epoch": 0.6682052382996995, "grad_norm": 0.275390625, "learning_rate": 0.0010105950879439976, "loss": 2.0427, "step": 24900 }, { "epoch": 0.6682320738514383, "grad_norm": 0.26171875, "learning_rate": 0.0010105745960100913, "loss": 2.0559, "step": 24901 }, { "epoch": 0.6682589094031773, "grad_norm": 0.263671875, "learning_rate": 0.0010105541031755051, "loss": 2.0226, "step": 24902 }, { "epoch": 0.6682857449549163, "grad_norm": 0.255859375, "learning_rate": 0.0010105336094402837, "loss": 1.9421, "step": 24903 }, { "epoch": 0.6683125805066552, "grad_norm": 0.255859375, "learning_rate": 0.0010105131148044728, "loss": 2.0176, "step": 24904 }, { "epoch": 0.6683394160583942, "grad_norm": 0.259765625, "learning_rate": 0.0010104926192681164, "loss": 1.9271, "step": 24905 }, { "epoch": 0.6683662516101331, "grad_norm": 0.251953125, "learning_rate": 0.0010104721228312602, "loss": 1.872, "step": 24906 }, { "epoch": 0.6683930871618721, "grad_norm": 0.26171875, "learning_rate": 0.001010451625493949, "loss": 2.0071, "step": 24907 }, { "epoch": 0.668419922713611, "grad_norm": 0.255859375, "learning_rate": 0.0010104311272562275, "loss": 1.9516, "step": 24908 }, { "epoch": 0.6684467582653499, "grad_norm": 0.275390625, "learning_rate": 0.001010410628118141, "loss": 2.0702, "step": 24909 }, { "epoch": 0.6684735938170889, "grad_norm": 0.25390625, "learning_rate": 0.0010103901280797344, "loss": 1.9841, "step": 24910 }, { "epoch": 0.6685004293688278, "grad_norm": 0.25390625, "learning_rate": 0.0010103696271410525, "loss": 1.9959, "step": 24911 }, { "epoch": 0.6685272649205668, "grad_norm": 0.3046875, "learning_rate": 0.0010103491253021404, "loss": 2.1996, "step": 24912 }, { "epoch": 0.6685541004723057, "grad_norm": 0.26171875, "learning_rate": 0.001010328622563043, "loss": 1.9274, "step": 24913 }, { "epoch": 0.6685809360240447, "grad_norm": 0.26171875, "learning_rate": 0.0010103081189238052, "loss": 2.0266, "step": 24914 }, { "epoch": 0.6686077715757835, "grad_norm": 0.26953125, "learning_rate": 0.0010102876143844722, "loss": 2.0957, "step": 24915 }, { "epoch": 0.6686346071275225, "grad_norm": 0.265625, "learning_rate": 0.001010267108945089, "loss": 2.0349, "step": 24916 }, { "epoch": 0.6686614426792615, "grad_norm": 0.2578125, "learning_rate": 0.0010102466026057004, "loss": 1.9876, "step": 24917 }, { "epoch": 0.6686882782310004, "grad_norm": 0.251953125, "learning_rate": 0.0010102260953663515, "loss": 2.0494, "step": 24918 }, { "epoch": 0.6687151137827394, "grad_norm": 0.263671875, "learning_rate": 0.0010102055872270872, "loss": 1.9874, "step": 24919 }, { "epoch": 0.6687419493344783, "grad_norm": 0.26171875, "learning_rate": 0.0010101850781879525, "loss": 2.0776, "step": 24920 }, { "epoch": 0.6687687848862173, "grad_norm": 0.255859375, "learning_rate": 0.0010101645682489924, "loss": 1.8903, "step": 24921 }, { "epoch": 0.6687956204379562, "grad_norm": 0.263671875, "learning_rate": 0.001010144057410252, "loss": 2.0272, "step": 24922 }, { "epoch": 0.6688224559896951, "grad_norm": 0.2412109375, "learning_rate": 0.001010123545671776, "loss": 1.9677, "step": 24923 }, { "epoch": 0.6688492915414341, "grad_norm": 0.25, "learning_rate": 0.0010101030330336097, "loss": 1.9569, "step": 24924 }, { "epoch": 0.668876127093173, "grad_norm": 0.2578125, "learning_rate": 0.0010100825194957982, "loss": 1.9717, "step": 24925 }, { "epoch": 0.668902962644912, "grad_norm": 0.251953125, "learning_rate": 0.001010062005058386, "loss": 2.0496, "step": 24926 }, { "epoch": 0.6689297981966509, "grad_norm": 0.2490234375, "learning_rate": 0.0010100414897214185, "loss": 1.9266, "step": 24927 }, { "epoch": 0.6689566337483899, "grad_norm": 0.2578125, "learning_rate": 0.0010100209734849408, "loss": 2.0199, "step": 24928 }, { "epoch": 0.6689834693001289, "grad_norm": 0.2578125, "learning_rate": 0.0010100004563489975, "loss": 1.9384, "step": 24929 }, { "epoch": 0.6690103048518677, "grad_norm": 0.27734375, "learning_rate": 0.0010099799383136339, "loss": 1.959, "step": 24930 }, { "epoch": 0.6690371404036067, "grad_norm": 0.248046875, "learning_rate": 0.0010099594193788948, "loss": 2.0224, "step": 24931 }, { "epoch": 0.6690639759553456, "grad_norm": 0.259765625, "learning_rate": 0.0010099388995448255, "loss": 1.9438, "step": 24932 }, { "epoch": 0.6690908115070846, "grad_norm": 0.244140625, "learning_rate": 0.001009918378811471, "loss": 1.9081, "step": 24933 }, { "epoch": 0.6691176470588235, "grad_norm": 0.259765625, "learning_rate": 0.0010098978571788759, "loss": 1.9672, "step": 24934 }, { "epoch": 0.6691444826105625, "grad_norm": 0.255859375, "learning_rate": 0.0010098773346470856, "loss": 1.9478, "step": 24935 }, { "epoch": 0.6691713181623015, "grad_norm": 0.2578125, "learning_rate": 0.001009856811216145, "loss": 1.946, "step": 24936 }, { "epoch": 0.6691981537140403, "grad_norm": 0.25390625, "learning_rate": 0.0010098362868860991, "loss": 1.9222, "step": 24937 }, { "epoch": 0.6692249892657793, "grad_norm": 0.259765625, "learning_rate": 0.001009815761656993, "loss": 2.0408, "step": 24938 }, { "epoch": 0.6692518248175182, "grad_norm": 0.265625, "learning_rate": 0.0010097952355288715, "loss": 2.0361, "step": 24939 }, { "epoch": 0.6692786603692572, "grad_norm": 0.25390625, "learning_rate": 0.0010097747085017802, "loss": 1.8519, "step": 24940 }, { "epoch": 0.6693054959209961, "grad_norm": 0.259765625, "learning_rate": 0.0010097541805757636, "loss": 2.0268, "step": 24941 }, { "epoch": 0.6693323314727351, "grad_norm": 0.259765625, "learning_rate": 0.0010097336517508667, "loss": 2.0921, "step": 24942 }, { "epoch": 0.6693591670244741, "grad_norm": 0.25390625, "learning_rate": 0.001009713122027135, "loss": 1.9802, "step": 24943 }, { "epoch": 0.669386002576213, "grad_norm": 0.255859375, "learning_rate": 0.0010096925914046132, "loss": 1.9623, "step": 24944 }, { "epoch": 0.6694128381279519, "grad_norm": 0.25390625, "learning_rate": 0.001009672059883346, "loss": 1.8999, "step": 24945 }, { "epoch": 0.6694396736796908, "grad_norm": 0.2578125, "learning_rate": 0.0010096515274633793, "loss": 1.989, "step": 24946 }, { "epoch": 0.6694665092314298, "grad_norm": 0.255859375, "learning_rate": 0.0010096309941447575, "loss": 1.8694, "step": 24947 }, { "epoch": 0.6694933447831687, "grad_norm": 0.255859375, "learning_rate": 0.0010096104599275257, "loss": 1.9132, "step": 24948 }, { "epoch": 0.6695201803349077, "grad_norm": 0.259765625, "learning_rate": 0.0010095899248117291, "loss": 1.8767, "step": 24949 }, { "epoch": 0.6695470158866467, "grad_norm": 0.271484375, "learning_rate": 0.0010095693887974128, "loss": 2.099, "step": 24950 }, { "epoch": 0.6695738514383855, "grad_norm": 0.291015625, "learning_rate": 0.0010095488518846217, "loss": 2.125, "step": 24951 }, { "epoch": 0.6696006869901245, "grad_norm": 0.28515625, "learning_rate": 0.001009528314073401, "loss": 1.9888, "step": 24952 }, { "epoch": 0.6696275225418634, "grad_norm": 0.265625, "learning_rate": 0.0010095077753637957, "loss": 2.0176, "step": 24953 }, { "epoch": 0.6696543580936024, "grad_norm": 0.27734375, "learning_rate": 0.0010094872357558507, "loss": 2.1043, "step": 24954 }, { "epoch": 0.6696811936453414, "grad_norm": 0.267578125, "learning_rate": 0.0010094666952496112, "loss": 1.9917, "step": 24955 }, { "epoch": 0.6697080291970803, "grad_norm": 0.26171875, "learning_rate": 0.0010094461538451222, "loss": 1.9749, "step": 24956 }, { "epoch": 0.6697348647488193, "grad_norm": 0.2578125, "learning_rate": 0.0010094256115424289, "loss": 2.0553, "step": 24957 }, { "epoch": 0.6697617003005581, "grad_norm": 0.25390625, "learning_rate": 0.0010094050683415762, "loss": 1.9785, "step": 24958 }, { "epoch": 0.6697885358522971, "grad_norm": 0.2578125, "learning_rate": 0.0010093845242426093, "loss": 2.0523, "step": 24959 }, { "epoch": 0.669815371404036, "grad_norm": 0.259765625, "learning_rate": 0.001009363979245573, "loss": 1.9595, "step": 24960 }, { "epoch": 0.669842206955775, "grad_norm": 0.2578125, "learning_rate": 0.0010093434333505126, "loss": 2.0391, "step": 24961 }, { "epoch": 0.669869042507514, "grad_norm": 0.259765625, "learning_rate": 0.0010093228865574735, "loss": 2.0754, "step": 24962 }, { "epoch": 0.6698958780592529, "grad_norm": 0.251953125, "learning_rate": 0.0010093023388664999, "loss": 1.9237, "step": 24963 }, { "epoch": 0.6699227136109919, "grad_norm": 0.2578125, "learning_rate": 0.0010092817902776377, "loss": 2.0106, "step": 24964 }, { "epoch": 0.6699495491627308, "grad_norm": 0.26171875, "learning_rate": 0.0010092612407909317, "loss": 2.1072, "step": 24965 }, { "epoch": 0.6699763847144697, "grad_norm": 0.2578125, "learning_rate": 0.001009240690406427, "loss": 1.9965, "step": 24966 }, { "epoch": 0.6700032202662086, "grad_norm": 0.263671875, "learning_rate": 0.0010092201391241683, "loss": 2.135, "step": 24967 }, { "epoch": 0.6700300558179476, "grad_norm": 0.26171875, "learning_rate": 0.001009199586944201, "loss": 2.0549, "step": 24968 }, { "epoch": 0.6700568913696866, "grad_norm": 0.240234375, "learning_rate": 0.0010091790338665704, "loss": 1.9412, "step": 24969 }, { "epoch": 0.6700837269214255, "grad_norm": 0.2470703125, "learning_rate": 0.0010091584798913214, "loss": 1.9225, "step": 24970 }, { "epoch": 0.6701105624731645, "grad_norm": 0.255859375, "learning_rate": 0.001009137925018499, "loss": 2.0848, "step": 24971 }, { "epoch": 0.6701373980249034, "grad_norm": 0.263671875, "learning_rate": 0.0010091173692481482, "loss": 2.0128, "step": 24972 }, { "epoch": 0.6701642335766423, "grad_norm": 0.255859375, "learning_rate": 0.0010090968125803146, "loss": 2.0263, "step": 24973 }, { "epoch": 0.6701910691283813, "grad_norm": 0.251953125, "learning_rate": 0.0010090762550150427, "loss": 2.0008, "step": 24974 }, { "epoch": 0.6702179046801202, "grad_norm": 0.263671875, "learning_rate": 0.001009055696552378, "loss": 2.1062, "step": 24975 }, { "epoch": 0.6702447402318592, "grad_norm": 0.271484375, "learning_rate": 0.0010090351371923655, "loss": 2.0031, "step": 24976 }, { "epoch": 0.6702715757835981, "grad_norm": 0.259765625, "learning_rate": 0.0010090145769350502, "loss": 1.9744, "step": 24977 }, { "epoch": 0.6702984113353371, "grad_norm": 0.248046875, "learning_rate": 0.001008994015780477, "loss": 1.9703, "step": 24978 }, { "epoch": 0.670325246887076, "grad_norm": 0.265625, "learning_rate": 0.0010089734537286916, "loss": 2.0415, "step": 24979 }, { "epoch": 0.670352082438815, "grad_norm": 0.2470703125, "learning_rate": 0.0010089528907797386, "loss": 1.9679, "step": 24980 }, { "epoch": 0.6703789179905539, "grad_norm": 0.267578125, "learning_rate": 0.001008932326933663, "loss": 1.9914, "step": 24981 }, { "epoch": 0.6704057535422928, "grad_norm": 0.263671875, "learning_rate": 0.0010089117621905107, "loss": 1.971, "step": 24982 }, { "epoch": 0.6704325890940318, "grad_norm": 0.25390625, "learning_rate": 0.001008891196550326, "loss": 2.0176, "step": 24983 }, { "epoch": 0.6704594246457707, "grad_norm": 0.259765625, "learning_rate": 0.0010088706300131543, "loss": 1.9362, "step": 24984 }, { "epoch": 0.6704862601975097, "grad_norm": 0.26171875, "learning_rate": 0.0010088500625790408, "loss": 2.0217, "step": 24985 }, { "epoch": 0.6705130957492486, "grad_norm": 0.251953125, "learning_rate": 0.0010088294942480307, "loss": 1.9633, "step": 24986 }, { "epoch": 0.6705399313009875, "grad_norm": 0.2578125, "learning_rate": 0.0010088089250201687, "loss": 2.0354, "step": 24987 }, { "epoch": 0.6705667668527265, "grad_norm": 0.244140625, "learning_rate": 0.0010087883548955004, "loss": 1.9262, "step": 24988 }, { "epoch": 0.6705936024044654, "grad_norm": 0.275390625, "learning_rate": 0.0010087677838740705, "loss": 2.0255, "step": 24989 }, { "epoch": 0.6706204379562044, "grad_norm": 0.28515625, "learning_rate": 0.0010087472119559245, "loss": 2.0798, "step": 24990 }, { "epoch": 0.6706472735079433, "grad_norm": 0.271484375, "learning_rate": 0.0010087266391411074, "loss": 1.8659, "step": 24991 }, { "epoch": 0.6706741090596823, "grad_norm": 0.263671875, "learning_rate": 0.0010087060654296641, "loss": 2.0049, "step": 24992 }, { "epoch": 0.6707009446114212, "grad_norm": 0.271484375, "learning_rate": 0.0010086854908216403, "loss": 2.1012, "step": 24993 }, { "epoch": 0.6707277801631601, "grad_norm": 0.3046875, "learning_rate": 0.0010086649153170806, "loss": 2.0763, "step": 24994 }, { "epoch": 0.6707546157148991, "grad_norm": 0.2578125, "learning_rate": 0.0010086443389160302, "loss": 1.9665, "step": 24995 }, { "epoch": 0.670781451266638, "grad_norm": 0.267578125, "learning_rate": 0.0010086237616185342, "loss": 1.9747, "step": 24996 }, { "epoch": 0.670808286818377, "grad_norm": 0.251953125, "learning_rate": 0.0010086031834246382, "loss": 1.9469, "step": 24997 }, { "epoch": 0.6708351223701159, "grad_norm": 0.26171875, "learning_rate": 0.001008582604334387, "loss": 2.0491, "step": 24998 }, { "epoch": 0.6708619579218549, "grad_norm": 0.267578125, "learning_rate": 0.0010085620243478258, "loss": 2.0956, "step": 24999 }, { "epoch": 0.6708887934735939, "grad_norm": 0.2734375, "learning_rate": 0.0010085414434649997, "loss": 2.145, "step": 25000 }, { "epoch": 0.6709156290253327, "grad_norm": 0.259765625, "learning_rate": 0.0010085208616859537, "loss": 2.0716, "step": 25001 }, { "epoch": 0.6709424645770717, "grad_norm": 0.2578125, "learning_rate": 0.0010085002790107332, "loss": 2.1073, "step": 25002 }, { "epoch": 0.6709693001288106, "grad_norm": 0.26171875, "learning_rate": 0.0010084796954393832, "loss": 2.1063, "step": 25003 }, { "epoch": 0.6709961356805496, "grad_norm": 0.251953125, "learning_rate": 0.0010084591109719488, "loss": 1.9336, "step": 25004 }, { "epoch": 0.6710229712322885, "grad_norm": 0.25390625, "learning_rate": 0.0010084385256084755, "loss": 1.9716, "step": 25005 }, { "epoch": 0.6710498067840275, "grad_norm": 0.25390625, "learning_rate": 0.0010084179393490085, "loss": 1.9921, "step": 25006 }, { "epoch": 0.6710766423357665, "grad_norm": 0.251953125, "learning_rate": 0.0010083973521935923, "loss": 2.0517, "step": 25007 }, { "epoch": 0.6711034778875054, "grad_norm": 0.26171875, "learning_rate": 0.0010083767641422725, "loss": 1.9874, "step": 25008 }, { "epoch": 0.6711303134392443, "grad_norm": 0.26171875, "learning_rate": 0.0010083561751950943, "loss": 2.0681, "step": 25009 }, { "epoch": 0.6711571489909832, "grad_norm": 0.25390625, "learning_rate": 0.001008335585352103, "loss": 1.9865, "step": 25010 }, { "epoch": 0.6711839845427222, "grad_norm": 0.2578125, "learning_rate": 0.0010083149946133432, "loss": 2.0236, "step": 25011 }, { "epoch": 0.6712108200944611, "grad_norm": 0.25390625, "learning_rate": 0.0010082944029788604, "loss": 1.9114, "step": 25012 }, { "epoch": 0.6712376556462001, "grad_norm": 0.26171875, "learning_rate": 0.0010082738104487002, "loss": 2.0231, "step": 25013 }, { "epoch": 0.6712644911979391, "grad_norm": 0.25390625, "learning_rate": 0.0010082532170229072, "loss": 2.0064, "step": 25014 }, { "epoch": 0.671291326749678, "grad_norm": 0.255859375, "learning_rate": 0.0010082326227015266, "loss": 1.9166, "step": 25015 }, { "epoch": 0.6713181623014169, "grad_norm": 0.263671875, "learning_rate": 0.0010082120274846039, "loss": 1.9274, "step": 25016 }, { "epoch": 0.6713449978531558, "grad_norm": 0.259765625, "learning_rate": 0.0010081914313721841, "loss": 1.9662, "step": 25017 }, { "epoch": 0.6713718334048948, "grad_norm": 0.259765625, "learning_rate": 0.0010081708343643123, "loss": 1.9331, "step": 25018 }, { "epoch": 0.6713986689566337, "grad_norm": 0.251953125, "learning_rate": 0.0010081502364610339, "loss": 2.0317, "step": 25019 }, { "epoch": 0.6714255045083727, "grad_norm": 0.25390625, "learning_rate": 0.0010081296376623938, "loss": 1.9173, "step": 25020 }, { "epoch": 0.6714523400601117, "grad_norm": 0.2578125, "learning_rate": 0.0010081090379684373, "loss": 1.9694, "step": 25021 }, { "epoch": 0.6714791756118506, "grad_norm": 0.255859375, "learning_rate": 0.00100808843737921, "loss": 1.9695, "step": 25022 }, { "epoch": 0.6715060111635895, "grad_norm": 0.2470703125, "learning_rate": 0.0010080678358947565, "loss": 1.8532, "step": 25023 }, { "epoch": 0.6715328467153284, "grad_norm": 0.2578125, "learning_rate": 0.0010080472335151221, "loss": 2.012, "step": 25024 }, { "epoch": 0.6715596822670674, "grad_norm": 0.26171875, "learning_rate": 0.0010080266302403523, "loss": 1.9605, "step": 25025 }, { "epoch": 0.6715865178188064, "grad_norm": 0.259765625, "learning_rate": 0.001008006026070492, "loss": 1.9476, "step": 25026 }, { "epoch": 0.6716133533705453, "grad_norm": 0.27734375, "learning_rate": 0.0010079854210055864, "loss": 2.0313, "step": 25027 }, { "epoch": 0.6716401889222843, "grad_norm": 0.275390625, "learning_rate": 0.0010079648150456812, "loss": 2.2024, "step": 25028 }, { "epoch": 0.6716670244740232, "grad_norm": 0.27734375, "learning_rate": 0.0010079442081908211, "loss": 2.1097, "step": 25029 }, { "epoch": 0.6716938600257621, "grad_norm": 0.275390625, "learning_rate": 0.0010079236004410512, "loss": 2.1059, "step": 25030 }, { "epoch": 0.671720695577501, "grad_norm": 0.2734375, "learning_rate": 0.001007902991796417, "loss": 1.9994, "step": 25031 }, { "epoch": 0.67174753112924, "grad_norm": 0.26953125, "learning_rate": 0.0010078823822569638, "loss": 2.0645, "step": 25032 }, { "epoch": 0.671774366680979, "grad_norm": 0.26953125, "learning_rate": 0.0010078617718227365, "loss": 2.0803, "step": 25033 }, { "epoch": 0.6718012022327179, "grad_norm": 0.265625, "learning_rate": 0.0010078411604937806, "loss": 2.1778, "step": 25034 }, { "epoch": 0.6718280377844569, "grad_norm": 0.265625, "learning_rate": 0.001007820548270141, "loss": 2.0943, "step": 25035 }, { "epoch": 0.6718548733361958, "grad_norm": 0.267578125, "learning_rate": 0.0010077999351518633, "loss": 2.0566, "step": 25036 }, { "epoch": 0.6718817088879347, "grad_norm": 0.259765625, "learning_rate": 0.0010077793211389924, "loss": 2.0497, "step": 25037 }, { "epoch": 0.6719085444396736, "grad_norm": 0.263671875, "learning_rate": 0.0010077587062315737, "loss": 2.0479, "step": 25038 }, { "epoch": 0.6719353799914126, "grad_norm": 0.265625, "learning_rate": 0.0010077380904296523, "loss": 1.9891, "step": 25039 }, { "epoch": 0.6719622155431516, "grad_norm": 0.265625, "learning_rate": 0.0010077174737332735, "loss": 2.0165, "step": 25040 }, { "epoch": 0.6719890510948905, "grad_norm": 0.26953125, "learning_rate": 0.0010076968561424826, "loss": 2.0855, "step": 25041 }, { "epoch": 0.6720158866466295, "grad_norm": 0.26953125, "learning_rate": 0.0010076762376573247, "loss": 2.0536, "step": 25042 }, { "epoch": 0.6720427221983684, "grad_norm": 0.25390625, "learning_rate": 0.001007655618277845, "loss": 2.1131, "step": 25043 }, { "epoch": 0.6720695577501073, "grad_norm": 0.267578125, "learning_rate": 0.001007634998004089, "loss": 2.0112, "step": 25044 }, { "epoch": 0.6720963933018463, "grad_norm": 0.2734375, "learning_rate": 0.0010076143768361016, "loss": 2.0233, "step": 25045 }, { "epoch": 0.6721232288535852, "grad_norm": 0.263671875, "learning_rate": 0.001007593754773928, "loss": 2.0224, "step": 25046 }, { "epoch": 0.6721500644053242, "grad_norm": 0.275390625, "learning_rate": 0.001007573131817614, "loss": 2.0763, "step": 25047 }, { "epoch": 0.6721768999570631, "grad_norm": 0.263671875, "learning_rate": 0.001007552507967204, "loss": 1.9975, "step": 25048 }, { "epoch": 0.6722037355088021, "grad_norm": 0.2578125, "learning_rate": 0.0010075318832227442, "loss": 2.0287, "step": 25049 }, { "epoch": 0.672230571060541, "grad_norm": 0.2578125, "learning_rate": 0.0010075112575842792, "loss": 2.0046, "step": 25050 }, { "epoch": 0.67225740661228, "grad_norm": 0.25390625, "learning_rate": 0.0010074906310518542, "loss": 2.0475, "step": 25051 }, { "epoch": 0.6722842421640189, "grad_norm": 0.267578125, "learning_rate": 0.0010074700036255147, "loss": 2.1154, "step": 25052 }, { "epoch": 0.6723110777157578, "grad_norm": 0.259765625, "learning_rate": 0.001007449375305306, "loss": 1.9972, "step": 25053 }, { "epoch": 0.6723379132674968, "grad_norm": 0.26953125, "learning_rate": 0.001007428746091273, "loss": 2.075, "step": 25054 }, { "epoch": 0.6723647488192357, "grad_norm": 0.259765625, "learning_rate": 0.0010074081159834616, "loss": 2.0227, "step": 25055 }, { "epoch": 0.6723915843709747, "grad_norm": 0.259765625, "learning_rate": 0.0010073874849819164, "loss": 2.0451, "step": 25056 }, { "epoch": 0.6724184199227136, "grad_norm": 0.267578125, "learning_rate": 0.0010073668530866827, "loss": 2.0749, "step": 25057 }, { "epoch": 0.6724452554744526, "grad_norm": 0.2578125, "learning_rate": 0.0010073462202978064, "loss": 1.9337, "step": 25058 }, { "epoch": 0.6724720910261915, "grad_norm": 0.263671875, "learning_rate": 0.0010073255866153322, "loss": 2.0639, "step": 25059 }, { "epoch": 0.6724989265779304, "grad_norm": 0.267578125, "learning_rate": 0.0010073049520393053, "loss": 2.1737, "step": 25060 }, { "epoch": 0.6725257621296694, "grad_norm": 0.265625, "learning_rate": 0.0010072843165697715, "loss": 1.9933, "step": 25061 }, { "epoch": 0.6725525976814083, "grad_norm": 0.26171875, "learning_rate": 0.0010072636802067754, "loss": 2.0039, "step": 25062 }, { "epoch": 0.6725794332331473, "grad_norm": 0.2734375, "learning_rate": 0.0010072430429503628, "loss": 2.0895, "step": 25063 }, { "epoch": 0.6726062687848862, "grad_norm": 0.287109375, "learning_rate": 0.0010072224048005788, "loss": 2.1088, "step": 25064 }, { "epoch": 0.6726331043366252, "grad_norm": 0.2734375, "learning_rate": 0.0010072017657574684, "loss": 2.0935, "step": 25065 }, { "epoch": 0.6726599398883641, "grad_norm": 0.279296875, "learning_rate": 0.0010071811258210773, "loss": 2.0222, "step": 25066 }, { "epoch": 0.672686775440103, "grad_norm": 0.259765625, "learning_rate": 0.0010071604849914509, "loss": 1.9819, "step": 25067 }, { "epoch": 0.672713610991842, "grad_norm": 0.275390625, "learning_rate": 0.0010071398432686337, "loss": 2.1247, "step": 25068 }, { "epoch": 0.6727404465435809, "grad_norm": 0.265625, "learning_rate": 0.0010071192006526717, "loss": 1.9747, "step": 25069 }, { "epoch": 0.6727672820953199, "grad_norm": 0.2734375, "learning_rate": 0.00100709855714361, "loss": 2.1607, "step": 25070 }, { "epoch": 0.6727941176470589, "grad_norm": 0.2578125, "learning_rate": 0.0010070779127414936, "loss": 2.022, "step": 25071 }, { "epoch": 0.6728209531987978, "grad_norm": 0.251953125, "learning_rate": 0.0010070572674463684, "loss": 1.9445, "step": 25072 }, { "epoch": 0.6728477887505367, "grad_norm": 0.259765625, "learning_rate": 0.001007036621258279, "loss": 2.0103, "step": 25073 }, { "epoch": 0.6728746243022756, "grad_norm": 0.251953125, "learning_rate": 0.001007015974177271, "loss": 1.9804, "step": 25074 }, { "epoch": 0.6729014598540146, "grad_norm": 0.271484375, "learning_rate": 0.00100699532620339, "loss": 2.177, "step": 25075 }, { "epoch": 0.6729282954057535, "grad_norm": 0.267578125, "learning_rate": 0.0010069746773366806, "loss": 2.1588, "step": 25076 }, { "epoch": 0.6729551309574925, "grad_norm": 0.25390625, "learning_rate": 0.0010069540275771888, "loss": 2.0967, "step": 25077 }, { "epoch": 0.6729819665092315, "grad_norm": 0.251953125, "learning_rate": 0.0010069333769249593, "loss": 2.0201, "step": 25078 }, { "epoch": 0.6730088020609704, "grad_norm": 0.26953125, "learning_rate": 0.001006912725380038, "loss": 2.045, "step": 25079 }, { "epoch": 0.6730356376127093, "grad_norm": 0.255859375, "learning_rate": 0.0010068920729424697, "loss": 2.0569, "step": 25080 }, { "epoch": 0.6730624731644482, "grad_norm": 0.255859375, "learning_rate": 0.0010068714196123, "loss": 1.9893, "step": 25081 }, { "epoch": 0.6730893087161872, "grad_norm": 0.267578125, "learning_rate": 0.001006850765389574, "loss": 2.0202, "step": 25082 }, { "epoch": 0.6731161442679261, "grad_norm": 0.26953125, "learning_rate": 0.0010068301102743372, "loss": 2.1206, "step": 25083 }, { "epoch": 0.6731429798196651, "grad_norm": 0.25390625, "learning_rate": 0.001006809454266635, "loss": 1.9021, "step": 25084 }, { "epoch": 0.6731698153714041, "grad_norm": 0.26171875, "learning_rate": 0.0010067887973665122, "loss": 2.0542, "step": 25085 }, { "epoch": 0.673196650923143, "grad_norm": 0.248046875, "learning_rate": 0.0010067681395740146, "loss": 1.9, "step": 25086 }, { "epoch": 0.673223486474882, "grad_norm": 0.259765625, "learning_rate": 0.0010067474808891875, "loss": 2.017, "step": 25087 }, { "epoch": 0.6732503220266208, "grad_norm": 0.263671875, "learning_rate": 0.001006726821312076, "loss": 1.9454, "step": 25088 }, { "epoch": 0.6732771575783598, "grad_norm": 0.271484375, "learning_rate": 0.0010067061608427254, "loss": 2.0971, "step": 25089 }, { "epoch": 0.6733039931300988, "grad_norm": 0.26171875, "learning_rate": 0.0010066854994811812, "loss": 1.9814, "step": 25090 }, { "epoch": 0.6733308286818377, "grad_norm": 0.26171875, "learning_rate": 0.0010066648372274887, "loss": 2.0621, "step": 25091 }, { "epoch": 0.6733576642335767, "grad_norm": 0.263671875, "learning_rate": 0.0010066441740816932, "loss": 1.9352, "step": 25092 }, { "epoch": 0.6733844997853156, "grad_norm": 0.251953125, "learning_rate": 0.0010066235100438402, "loss": 1.9707, "step": 25093 }, { "epoch": 0.6734113353370546, "grad_norm": 0.263671875, "learning_rate": 0.0010066028451139743, "loss": 1.9602, "step": 25094 }, { "epoch": 0.6734381708887934, "grad_norm": 0.2578125, "learning_rate": 0.001006582179292142, "loss": 2.0082, "step": 25095 }, { "epoch": 0.6734650064405324, "grad_norm": 0.26171875, "learning_rate": 0.0010065615125783877, "loss": 2.0202, "step": 25096 }, { "epoch": 0.6734918419922714, "grad_norm": 0.2490234375, "learning_rate": 0.0010065408449727571, "loss": 1.9361, "step": 25097 }, { "epoch": 0.6735186775440103, "grad_norm": 0.27734375, "learning_rate": 0.0010065201764752956, "loss": 2.0496, "step": 25098 }, { "epoch": 0.6735455130957493, "grad_norm": 0.26171875, "learning_rate": 0.0010064995070860482, "loss": 1.9856, "step": 25099 }, { "epoch": 0.6735723486474882, "grad_norm": 0.28125, "learning_rate": 0.0010064788368050607, "loss": 2.1758, "step": 25100 }, { "epoch": 0.6735991841992272, "grad_norm": 0.27734375, "learning_rate": 0.0010064581656323783, "loss": 2.1327, "step": 25101 }, { "epoch": 0.673626019750966, "grad_norm": 0.28515625, "learning_rate": 0.001006437493568046, "loss": 2.2096, "step": 25102 }, { "epoch": 0.673652855302705, "grad_norm": 0.27734375, "learning_rate": 0.0010064168206121095, "loss": 2.2143, "step": 25103 }, { "epoch": 0.673679690854444, "grad_norm": 0.267578125, "learning_rate": 0.001006396146764614, "loss": 2.1317, "step": 25104 }, { "epoch": 0.6737065264061829, "grad_norm": 0.26171875, "learning_rate": 0.001006375472025605, "loss": 2.1751, "step": 25105 }, { "epoch": 0.6737333619579219, "grad_norm": 0.2578125, "learning_rate": 0.001006354796395128, "loss": 2.1165, "step": 25106 }, { "epoch": 0.6737601975096608, "grad_norm": 0.26953125, "learning_rate": 0.0010063341198732277, "loss": 2.2029, "step": 25107 }, { "epoch": 0.6737870330613998, "grad_norm": 0.25390625, "learning_rate": 0.00100631344245995, "loss": 2.1601, "step": 25108 }, { "epoch": 0.6738138686131386, "grad_norm": 0.26953125, "learning_rate": 0.0010062927641553403, "loss": 2.1215, "step": 25109 }, { "epoch": 0.6738407041648776, "grad_norm": 0.251953125, "learning_rate": 0.0010062720849594437, "loss": 2.1188, "step": 25110 }, { "epoch": 0.6738675397166166, "grad_norm": 0.26171875, "learning_rate": 0.0010062514048723057, "loss": 2.0553, "step": 25111 }, { "epoch": 0.6738943752683555, "grad_norm": 0.263671875, "learning_rate": 0.0010062307238939718, "loss": 2.0803, "step": 25112 }, { "epoch": 0.6739212108200945, "grad_norm": 0.26171875, "learning_rate": 0.0010062100420244868, "loss": 2.1542, "step": 25113 }, { "epoch": 0.6739480463718334, "grad_norm": 0.265625, "learning_rate": 0.0010061893592638968, "loss": 2.0387, "step": 25114 }, { "epoch": 0.6739748819235724, "grad_norm": 0.255859375, "learning_rate": 0.001006168675612247, "loss": 2.1665, "step": 25115 }, { "epoch": 0.6740017174753113, "grad_norm": 0.255859375, "learning_rate": 0.001006147991069582, "loss": 2.0962, "step": 25116 }, { "epoch": 0.6740285530270502, "grad_norm": 0.25, "learning_rate": 0.0010061273056359485, "loss": 2.094, "step": 25117 }, { "epoch": 0.6740553885787892, "grad_norm": 0.2578125, "learning_rate": 0.001006106619311391, "loss": 1.9782, "step": 25118 }, { "epoch": 0.6740822241305281, "grad_norm": 0.255859375, "learning_rate": 0.0010060859320959548, "loss": 2.1993, "step": 25119 }, { "epoch": 0.6741090596822671, "grad_norm": 0.25390625, "learning_rate": 0.0010060652439896857, "loss": 2.0639, "step": 25120 }, { "epoch": 0.674135895234006, "grad_norm": 0.26171875, "learning_rate": 0.0010060445549926287, "loss": 2.1793, "step": 25121 }, { "epoch": 0.674162730785745, "grad_norm": 0.255859375, "learning_rate": 0.0010060238651048298, "loss": 2.0987, "step": 25122 }, { "epoch": 0.674189566337484, "grad_norm": 0.265625, "learning_rate": 0.0010060031743263339, "loss": 2.1462, "step": 25123 }, { "epoch": 0.6742164018892228, "grad_norm": 0.255859375, "learning_rate": 0.0010059824826571862, "loss": 2.0679, "step": 25124 }, { "epoch": 0.6742432374409618, "grad_norm": 0.2578125, "learning_rate": 0.0010059617900974325, "loss": 2.0533, "step": 25125 }, { "epoch": 0.6742700729927007, "grad_norm": 0.259765625, "learning_rate": 0.0010059410966471183, "loss": 2.1245, "step": 25126 }, { "epoch": 0.6742969085444397, "grad_norm": 0.26171875, "learning_rate": 0.0010059204023062883, "loss": 2.0898, "step": 25127 }, { "epoch": 0.6743237440961786, "grad_norm": 0.263671875, "learning_rate": 0.0010058997070749888, "loss": 2.0167, "step": 25128 }, { "epoch": 0.6743505796479176, "grad_norm": 0.263671875, "learning_rate": 0.0010058790109532644, "loss": 2.0481, "step": 25129 }, { "epoch": 0.6743774151996565, "grad_norm": 0.267578125, "learning_rate": 0.0010058583139411611, "loss": 2.0902, "step": 25130 }, { "epoch": 0.6744042507513954, "grad_norm": 0.267578125, "learning_rate": 0.001005837616038724, "loss": 2.1485, "step": 25131 }, { "epoch": 0.6744310863031344, "grad_norm": 0.265625, "learning_rate": 0.0010058169172459986, "loss": 2.1802, "step": 25132 }, { "epoch": 0.6744579218548733, "grad_norm": 0.255859375, "learning_rate": 0.0010057962175630305, "loss": 2.071, "step": 25133 }, { "epoch": 0.6744847574066123, "grad_norm": 0.26171875, "learning_rate": 0.0010057755169898645, "loss": 1.9582, "step": 25134 }, { "epoch": 0.6745115929583512, "grad_norm": 0.28125, "learning_rate": 0.0010057548155265467, "loss": 2.1163, "step": 25135 }, { "epoch": 0.6745384285100902, "grad_norm": 0.279296875, "learning_rate": 0.001005734113173122, "loss": 2.247, "step": 25136 }, { "epoch": 0.6745652640618292, "grad_norm": 0.271484375, "learning_rate": 0.0010057134099296361, "loss": 2.0467, "step": 25137 }, { "epoch": 0.674592099613568, "grad_norm": 0.26171875, "learning_rate": 0.0010056927057961345, "loss": 2.0217, "step": 25138 }, { "epoch": 0.674618935165307, "grad_norm": 0.265625, "learning_rate": 0.001005672000772662, "loss": 2.2027, "step": 25139 }, { "epoch": 0.6746457707170459, "grad_norm": 0.28125, "learning_rate": 0.001005651294859265, "loss": 2.2333, "step": 25140 }, { "epoch": 0.6746726062687849, "grad_norm": 0.263671875, "learning_rate": 0.0010056305880559882, "loss": 2.1296, "step": 25141 }, { "epoch": 0.6746994418205239, "grad_norm": 0.2578125, "learning_rate": 0.0010056098803628772, "loss": 2.024, "step": 25142 }, { "epoch": 0.6747262773722628, "grad_norm": 0.259765625, "learning_rate": 0.0010055891717799773, "loss": 2.0478, "step": 25143 }, { "epoch": 0.6747531129240018, "grad_norm": 0.267578125, "learning_rate": 0.0010055684623073344, "loss": 2.1497, "step": 25144 }, { "epoch": 0.6747799484757406, "grad_norm": 0.25, "learning_rate": 0.0010055477519449933, "loss": 2.0555, "step": 25145 }, { "epoch": 0.6748067840274796, "grad_norm": 0.271484375, "learning_rate": 0.0010055270406929997, "loss": 2.1932, "step": 25146 }, { "epoch": 0.6748336195792185, "grad_norm": 0.259765625, "learning_rate": 0.0010055063285513994, "loss": 2.0594, "step": 25147 }, { "epoch": 0.6748604551309575, "grad_norm": 0.271484375, "learning_rate": 0.0010054856155202376, "loss": 2.0469, "step": 25148 }, { "epoch": 0.6748872906826965, "grad_norm": 0.265625, "learning_rate": 0.0010054649015995595, "loss": 2.0386, "step": 25149 }, { "epoch": 0.6749141262344354, "grad_norm": 0.25, "learning_rate": 0.0010054441867894104, "loss": 2.0423, "step": 25150 }, { "epoch": 0.6749409617861744, "grad_norm": 0.265625, "learning_rate": 0.0010054234710898365, "loss": 2.1005, "step": 25151 }, { "epoch": 0.6749677973379132, "grad_norm": 0.25, "learning_rate": 0.0010054027545008824, "loss": 2.0642, "step": 25152 }, { "epoch": 0.6749946328896522, "grad_norm": 0.251953125, "learning_rate": 0.0010053820370225942, "loss": 2.0329, "step": 25153 }, { "epoch": 0.6750214684413911, "grad_norm": 0.263671875, "learning_rate": 0.0010053613186550168, "loss": 2.0088, "step": 25154 }, { "epoch": 0.6750483039931301, "grad_norm": 0.255859375, "learning_rate": 0.0010053405993981964, "loss": 2.1915, "step": 25155 }, { "epoch": 0.6750751395448691, "grad_norm": 0.259765625, "learning_rate": 0.0010053198792521775, "loss": 2.0061, "step": 25156 }, { "epoch": 0.675101975096608, "grad_norm": 0.267578125, "learning_rate": 0.0010052991582170064, "loss": 2.0348, "step": 25157 }, { "epoch": 0.675128810648347, "grad_norm": 0.263671875, "learning_rate": 0.001005278436292728, "loss": 2.1292, "step": 25158 }, { "epoch": 0.6751556462000858, "grad_norm": 0.255859375, "learning_rate": 0.0010052577134793878, "loss": 2.1185, "step": 25159 }, { "epoch": 0.6751824817518248, "grad_norm": 0.255859375, "learning_rate": 0.0010052369897770314, "loss": 2.0462, "step": 25160 }, { "epoch": 0.6752093173035638, "grad_norm": 0.259765625, "learning_rate": 0.0010052162651857045, "loss": 2.0684, "step": 25161 }, { "epoch": 0.6752361528553027, "grad_norm": 0.259765625, "learning_rate": 0.0010051955397054524, "loss": 2.0762, "step": 25162 }, { "epoch": 0.6752629884070417, "grad_norm": 0.26171875, "learning_rate": 0.0010051748133363201, "loss": 2.0079, "step": 25163 }, { "epoch": 0.6752898239587806, "grad_norm": 0.2578125, "learning_rate": 0.0010051540860783537, "loss": 1.9849, "step": 25164 }, { "epoch": 0.6753166595105196, "grad_norm": 0.26171875, "learning_rate": 0.0010051333579315985, "loss": 2.0454, "step": 25165 }, { "epoch": 0.6753434950622584, "grad_norm": 0.2578125, "learning_rate": 0.0010051126288961, "loss": 1.9443, "step": 25166 }, { "epoch": 0.6753703306139974, "grad_norm": 0.263671875, "learning_rate": 0.0010050918989719033, "loss": 2.0147, "step": 25167 }, { "epoch": 0.6753971661657364, "grad_norm": 0.259765625, "learning_rate": 0.0010050711681590543, "loss": 2.0576, "step": 25168 }, { "epoch": 0.6754240017174753, "grad_norm": 0.28515625, "learning_rate": 0.0010050504364575983, "loss": 2.1056, "step": 25169 }, { "epoch": 0.6754508372692143, "grad_norm": 0.27734375, "learning_rate": 0.001005029703867581, "loss": 2.0973, "step": 25170 }, { "epoch": 0.6754776728209532, "grad_norm": 0.296875, "learning_rate": 0.0010050089703890473, "loss": 2.1518, "step": 25171 }, { "epoch": 0.6755045083726922, "grad_norm": 0.27734375, "learning_rate": 0.0010049882360220433, "loss": 2.188, "step": 25172 }, { "epoch": 0.675531343924431, "grad_norm": 0.263671875, "learning_rate": 0.0010049675007666142, "loss": 2.2094, "step": 25173 }, { "epoch": 0.67555817947617, "grad_norm": 0.25390625, "learning_rate": 0.0010049467646228057, "loss": 2.1034, "step": 25174 }, { "epoch": 0.675585015027909, "grad_norm": 0.279296875, "learning_rate": 0.001004926027590663, "loss": 2.2245, "step": 25175 }, { "epoch": 0.6756118505796479, "grad_norm": 0.263671875, "learning_rate": 0.0010049052896702315, "loss": 2.0983, "step": 25176 }, { "epoch": 0.6756386861313869, "grad_norm": 0.259765625, "learning_rate": 0.0010048845508615573, "loss": 2.1485, "step": 25177 }, { "epoch": 0.6756655216831258, "grad_norm": 0.265625, "learning_rate": 0.0010048638111646855, "loss": 2.1125, "step": 25178 }, { "epoch": 0.6756923572348648, "grad_norm": 0.255859375, "learning_rate": 0.0010048430705796615, "loss": 2.1325, "step": 25179 }, { "epoch": 0.6757191927866036, "grad_norm": 0.283203125, "learning_rate": 0.0010048223291065307, "loss": 2.1572, "step": 25180 }, { "epoch": 0.6757460283383426, "grad_norm": 0.2734375, "learning_rate": 0.0010048015867453392, "loss": 2.1035, "step": 25181 }, { "epoch": 0.6757728638900816, "grad_norm": 0.259765625, "learning_rate": 0.001004780843496132, "loss": 2.1032, "step": 25182 }, { "epoch": 0.6757996994418205, "grad_norm": 0.291015625, "learning_rate": 0.0010047600993589546, "loss": 2.1741, "step": 25183 }, { "epoch": 0.6758265349935595, "grad_norm": 0.26171875, "learning_rate": 0.0010047393543338528, "loss": 2.0954, "step": 25184 }, { "epoch": 0.6758533705452984, "grad_norm": 0.259765625, "learning_rate": 0.0010047186084208716, "loss": 2.0996, "step": 25185 }, { "epoch": 0.6758802060970374, "grad_norm": 0.2734375, "learning_rate": 0.001004697861620057, "loss": 2.2149, "step": 25186 }, { "epoch": 0.6759070416487764, "grad_norm": 0.263671875, "learning_rate": 0.0010046771139314546, "loss": 2.1165, "step": 25187 }, { "epoch": 0.6759338772005152, "grad_norm": 0.267578125, "learning_rate": 0.0010046563653551093, "loss": 2.0948, "step": 25188 }, { "epoch": 0.6759607127522542, "grad_norm": 0.271484375, "learning_rate": 0.0010046356158910671, "loss": 2.1868, "step": 25189 }, { "epoch": 0.6759875483039931, "grad_norm": 0.26171875, "learning_rate": 0.0010046148655393735, "loss": 2.052, "step": 25190 }, { "epoch": 0.6760143838557321, "grad_norm": 0.263671875, "learning_rate": 0.0010045941143000741, "loss": 2.1169, "step": 25191 }, { "epoch": 0.676041219407471, "grad_norm": 0.26953125, "learning_rate": 0.0010045733621732139, "loss": 2.2557, "step": 25192 }, { "epoch": 0.67606805495921, "grad_norm": 0.2578125, "learning_rate": 0.001004552609158839, "loss": 2.0821, "step": 25193 }, { "epoch": 0.676094890510949, "grad_norm": 0.265625, "learning_rate": 0.0010045318552569943, "loss": 2.1502, "step": 25194 }, { "epoch": 0.6761217260626878, "grad_norm": 0.265625, "learning_rate": 0.0010045111004677262, "loss": 2.1194, "step": 25195 }, { "epoch": 0.6761485616144268, "grad_norm": 0.255859375, "learning_rate": 0.0010044903447910796, "loss": 2.0283, "step": 25196 }, { "epoch": 0.6761753971661657, "grad_norm": 0.255859375, "learning_rate": 0.0010044695882271, "loss": 2.2087, "step": 25197 }, { "epoch": 0.6762022327179047, "grad_norm": 0.26171875, "learning_rate": 0.0010044488307758333, "loss": 2.1051, "step": 25198 }, { "epoch": 0.6762290682696436, "grad_norm": 0.259765625, "learning_rate": 0.001004428072437325, "loss": 2.0992, "step": 25199 }, { "epoch": 0.6762559038213826, "grad_norm": 0.265625, "learning_rate": 0.0010044073132116204, "loss": 1.9991, "step": 25200 }, { "epoch": 0.6762827393731216, "grad_norm": 0.27734375, "learning_rate": 0.001004386553098765, "loss": 2.1983, "step": 25201 }, { "epoch": 0.6763095749248604, "grad_norm": 0.2578125, "learning_rate": 0.0010043657920988044, "loss": 2.1267, "step": 25202 }, { "epoch": 0.6763364104765994, "grad_norm": 0.27734375, "learning_rate": 0.0010043450302117845, "loss": 2.1128, "step": 25203 }, { "epoch": 0.6763632460283383, "grad_norm": 0.279296875, "learning_rate": 0.0010043242674377506, "loss": 2.2114, "step": 25204 }, { "epoch": 0.6763900815800773, "grad_norm": 0.267578125, "learning_rate": 0.0010043035037767478, "loss": 2.112, "step": 25205 }, { "epoch": 0.6764169171318162, "grad_norm": 0.27734375, "learning_rate": 0.0010042827392288223, "loss": 2.1555, "step": 25206 }, { "epoch": 0.6764437526835552, "grad_norm": 0.26953125, "learning_rate": 0.0010042619737940194, "loss": 2.1874, "step": 25207 }, { "epoch": 0.6764705882352942, "grad_norm": 0.267578125, "learning_rate": 0.0010042412074723847, "loss": 2.1459, "step": 25208 }, { "epoch": 0.676497423787033, "grad_norm": 0.26171875, "learning_rate": 0.0010042204402639636, "loss": 2.1477, "step": 25209 }, { "epoch": 0.676524259338772, "grad_norm": 0.287109375, "learning_rate": 0.001004199672168802, "loss": 2.191, "step": 25210 }, { "epoch": 0.6765510948905109, "grad_norm": 0.271484375, "learning_rate": 0.001004178903186945, "loss": 2.2019, "step": 25211 }, { "epoch": 0.6765779304422499, "grad_norm": 0.267578125, "learning_rate": 0.0010041581333184385, "loss": 2.1942, "step": 25212 }, { "epoch": 0.6766047659939889, "grad_norm": 0.28125, "learning_rate": 0.001004137362563328, "loss": 2.2261, "step": 25213 }, { "epoch": 0.6766316015457278, "grad_norm": 0.259765625, "learning_rate": 0.001004116590921659, "loss": 2.0932, "step": 25214 }, { "epoch": 0.6766584370974668, "grad_norm": 0.265625, "learning_rate": 0.001004095818393477, "loss": 2.1186, "step": 25215 }, { "epoch": 0.6766852726492056, "grad_norm": 0.2578125, "learning_rate": 0.0010040750449788278, "loss": 2.1783, "step": 25216 }, { "epoch": 0.6767121082009446, "grad_norm": 0.259765625, "learning_rate": 0.0010040542706777568, "loss": 2.2485, "step": 25217 }, { "epoch": 0.6767389437526835, "grad_norm": 0.26171875, "learning_rate": 0.0010040334954903097, "loss": 2.1175, "step": 25218 }, { "epoch": 0.6767657793044225, "grad_norm": 0.255859375, "learning_rate": 0.0010040127194165316, "loss": 2.2428, "step": 25219 }, { "epoch": 0.6767926148561615, "grad_norm": 0.25390625, "learning_rate": 0.0010039919424564689, "loss": 2.1368, "step": 25220 }, { "epoch": 0.6768194504079004, "grad_norm": 0.26953125, "learning_rate": 0.0010039711646101666, "loss": 2.1117, "step": 25221 }, { "epoch": 0.6768462859596394, "grad_norm": 0.259765625, "learning_rate": 0.0010039503858776703, "loss": 2.1711, "step": 25222 }, { "epoch": 0.6768731215113782, "grad_norm": 0.255859375, "learning_rate": 0.0010039296062590258, "loss": 2.02, "step": 25223 }, { "epoch": 0.6768999570631172, "grad_norm": 0.265625, "learning_rate": 0.0010039088257542787, "loss": 2.1542, "step": 25224 }, { "epoch": 0.6769267926148561, "grad_norm": 0.259765625, "learning_rate": 0.0010038880443634741, "loss": 2.1648, "step": 25225 }, { "epoch": 0.6769536281665951, "grad_norm": 0.265625, "learning_rate": 0.0010038672620866582, "loss": 2.1581, "step": 25226 }, { "epoch": 0.6769804637183341, "grad_norm": 0.271484375, "learning_rate": 0.0010038464789238763, "loss": 2.1164, "step": 25227 }, { "epoch": 0.677007299270073, "grad_norm": 0.259765625, "learning_rate": 0.0010038256948751742, "loss": 2.0302, "step": 25228 }, { "epoch": 0.677034134821812, "grad_norm": 0.271484375, "learning_rate": 0.0010038049099405971, "loss": 2.1153, "step": 25229 }, { "epoch": 0.6770609703735508, "grad_norm": 0.27734375, "learning_rate": 0.0010037841241201907, "loss": 2.165, "step": 25230 }, { "epoch": 0.6770878059252898, "grad_norm": 0.2578125, "learning_rate": 0.001003763337414001, "loss": 2.0847, "step": 25231 }, { "epoch": 0.6771146414770288, "grad_norm": 0.26953125, "learning_rate": 0.0010037425498220735, "loss": 2.061, "step": 25232 }, { "epoch": 0.6771414770287677, "grad_norm": 0.2578125, "learning_rate": 0.0010037217613444534, "loss": 2.0336, "step": 25233 }, { "epoch": 0.6771683125805067, "grad_norm": 0.263671875, "learning_rate": 0.0010037009719811865, "loss": 2.1231, "step": 25234 }, { "epoch": 0.6771951481322456, "grad_norm": 0.26953125, "learning_rate": 0.0010036801817323185, "loss": 2.1331, "step": 25235 }, { "epoch": 0.6772219836839846, "grad_norm": 0.259765625, "learning_rate": 0.0010036593905978948, "loss": 2.0343, "step": 25236 }, { "epoch": 0.6772488192357234, "grad_norm": 0.265625, "learning_rate": 0.0010036385985779613, "loss": 2.0291, "step": 25237 }, { "epoch": 0.6772756547874624, "grad_norm": 0.2890625, "learning_rate": 0.0010036178056725634, "loss": 2.1537, "step": 25238 }, { "epoch": 0.6773024903392014, "grad_norm": 0.275390625, "learning_rate": 0.0010035970118817468, "loss": 2.1689, "step": 25239 }, { "epoch": 0.6773293258909403, "grad_norm": 0.28125, "learning_rate": 0.001003576217205557, "loss": 2.2727, "step": 25240 }, { "epoch": 0.6773561614426793, "grad_norm": 0.259765625, "learning_rate": 0.00100355542164404, "loss": 2.1015, "step": 25241 }, { "epoch": 0.6773829969944182, "grad_norm": 0.267578125, "learning_rate": 0.001003534625197241, "loss": 2.2022, "step": 25242 }, { "epoch": 0.6774098325461572, "grad_norm": 0.27734375, "learning_rate": 0.0010035138278652056, "loss": 2.2045, "step": 25243 }, { "epoch": 0.677436668097896, "grad_norm": 0.26953125, "learning_rate": 0.0010034930296479797, "loss": 2.1484, "step": 25244 }, { "epoch": 0.677463503649635, "grad_norm": 0.2578125, "learning_rate": 0.0010034722305456087, "loss": 2.1572, "step": 25245 }, { "epoch": 0.677490339201374, "grad_norm": 0.26171875, "learning_rate": 0.0010034514305581387, "loss": 2.0621, "step": 25246 }, { "epoch": 0.6775171747531129, "grad_norm": 0.265625, "learning_rate": 0.0010034306296856144, "loss": 2.1353, "step": 25247 }, { "epoch": 0.6775440103048519, "grad_norm": 0.263671875, "learning_rate": 0.0010034098279280825, "loss": 2.1621, "step": 25248 }, { "epoch": 0.6775708458565908, "grad_norm": 0.267578125, "learning_rate": 0.0010033890252855878, "loss": 2.157, "step": 25249 }, { "epoch": 0.6775976814083298, "grad_norm": 0.26953125, "learning_rate": 0.0010033682217581764, "loss": 2.1674, "step": 25250 }, { "epoch": 0.6776245169600686, "grad_norm": 0.263671875, "learning_rate": 0.0010033474173458937, "loss": 2.2253, "step": 25251 }, { "epoch": 0.6776513525118076, "grad_norm": 0.267578125, "learning_rate": 0.0010033266120487855, "loss": 2.1963, "step": 25252 }, { "epoch": 0.6776781880635466, "grad_norm": 0.251953125, "learning_rate": 0.0010033058058668972, "loss": 2.127, "step": 25253 }, { "epoch": 0.6777050236152855, "grad_norm": 0.2578125, "learning_rate": 0.0010032849988002748, "loss": 2.1189, "step": 25254 }, { "epoch": 0.6777318591670245, "grad_norm": 0.255859375, "learning_rate": 0.001003264190848964, "loss": 2.0804, "step": 25255 }, { "epoch": 0.6777586947187634, "grad_norm": 0.255859375, "learning_rate": 0.0010032433820130096, "loss": 2.044, "step": 25256 }, { "epoch": 0.6777855302705024, "grad_norm": 0.26171875, "learning_rate": 0.0010032225722924584, "loss": 2.2602, "step": 25257 }, { "epoch": 0.6778123658222414, "grad_norm": 0.263671875, "learning_rate": 0.0010032017616873551, "loss": 2.1446, "step": 25258 }, { "epoch": 0.6778392013739802, "grad_norm": 0.263671875, "learning_rate": 0.001003180950197746, "loss": 2.0116, "step": 25259 }, { "epoch": 0.6778660369257192, "grad_norm": 0.259765625, "learning_rate": 0.0010031601378236766, "loss": 2.1632, "step": 25260 }, { "epoch": 0.6778928724774581, "grad_norm": 0.2578125, "learning_rate": 0.0010031393245651923, "loss": 2.188, "step": 25261 }, { "epoch": 0.6779197080291971, "grad_norm": 0.2734375, "learning_rate": 0.001003118510422339, "loss": 2.2383, "step": 25262 }, { "epoch": 0.677946543580936, "grad_norm": 0.2578125, "learning_rate": 0.0010030976953951623, "loss": 2.0549, "step": 25263 }, { "epoch": 0.677973379132675, "grad_norm": 0.2578125, "learning_rate": 0.0010030768794837077, "loss": 2.1723, "step": 25264 }, { "epoch": 0.678000214684414, "grad_norm": 0.259765625, "learning_rate": 0.0010030560626880212, "loss": 2.081, "step": 25265 }, { "epoch": 0.6780270502361528, "grad_norm": 0.263671875, "learning_rate": 0.0010030352450081483, "loss": 2.1628, "step": 25266 }, { "epoch": 0.6780538857878918, "grad_norm": 0.265625, "learning_rate": 0.0010030144264441343, "loss": 2.1597, "step": 25267 }, { "epoch": 0.6780807213396307, "grad_norm": 0.271484375, "learning_rate": 0.0010029936069960255, "loss": 2.1021, "step": 25268 }, { "epoch": 0.6781075568913697, "grad_norm": 0.255859375, "learning_rate": 0.0010029727866638673, "loss": 1.9459, "step": 25269 }, { "epoch": 0.6781343924431086, "grad_norm": 0.26171875, "learning_rate": 0.0010029519654477053, "loss": 2.1497, "step": 25270 }, { "epoch": 0.6781612279948476, "grad_norm": 0.265625, "learning_rate": 0.0010029311433475852, "loss": 2.1828, "step": 25271 }, { "epoch": 0.6781880635465866, "grad_norm": 0.283203125, "learning_rate": 0.0010029103203635528, "loss": 2.1943, "step": 25272 }, { "epoch": 0.6782148990983254, "grad_norm": 0.275390625, "learning_rate": 0.0010028894964956537, "loss": 2.092, "step": 25273 }, { "epoch": 0.6782417346500644, "grad_norm": 0.27734375, "learning_rate": 0.0010028686717439336, "loss": 2.2757, "step": 25274 }, { "epoch": 0.6782685702018033, "grad_norm": 0.26171875, "learning_rate": 0.001002847846108438, "loss": 2.1277, "step": 25275 }, { "epoch": 0.6782954057535423, "grad_norm": 0.265625, "learning_rate": 0.0010028270195892128, "loss": 2.1617, "step": 25276 }, { "epoch": 0.6783222413052812, "grad_norm": 0.26171875, "learning_rate": 0.0010028061921863037, "loss": 2.1735, "step": 25277 }, { "epoch": 0.6783490768570202, "grad_norm": 0.265625, "learning_rate": 0.0010027853638997562, "loss": 2.1643, "step": 25278 }, { "epoch": 0.6783759124087592, "grad_norm": 0.26953125, "learning_rate": 0.0010027645347296164, "loss": 2.2408, "step": 25279 }, { "epoch": 0.678402747960498, "grad_norm": 0.2578125, "learning_rate": 0.0010027437046759293, "loss": 2.0863, "step": 25280 }, { "epoch": 0.678429583512237, "grad_norm": 0.26171875, "learning_rate": 0.0010027228737387415, "loss": 2.164, "step": 25281 }, { "epoch": 0.6784564190639759, "grad_norm": 0.259765625, "learning_rate": 0.0010027020419180977, "loss": 2.1604, "step": 25282 }, { "epoch": 0.6784832546157149, "grad_norm": 0.26171875, "learning_rate": 0.0010026812092140444, "loss": 2.1938, "step": 25283 }, { "epoch": 0.6785100901674539, "grad_norm": 0.255859375, "learning_rate": 0.0010026603756266269, "loss": 2.1254, "step": 25284 }, { "epoch": 0.6785369257191928, "grad_norm": 0.25, "learning_rate": 0.001002639541155891, "loss": 1.9706, "step": 25285 }, { "epoch": 0.6785637612709318, "grad_norm": 0.2578125, "learning_rate": 0.0010026187058018825, "loss": 2.115, "step": 25286 }, { "epoch": 0.6785905968226706, "grad_norm": 0.2578125, "learning_rate": 0.0010025978695646469, "loss": 2.1711, "step": 25287 }, { "epoch": 0.6786174323744096, "grad_norm": 0.265625, "learning_rate": 0.00100257703244423, "loss": 2.101, "step": 25288 }, { "epoch": 0.6786442679261485, "grad_norm": 0.259765625, "learning_rate": 0.0010025561944406775, "loss": 2.1894, "step": 25289 }, { "epoch": 0.6786711034778875, "grad_norm": 0.267578125, "learning_rate": 0.0010025353555540354, "loss": 2.1174, "step": 25290 }, { "epoch": 0.6786979390296265, "grad_norm": 0.25390625, "learning_rate": 0.001002514515784349, "loss": 2.027, "step": 25291 }, { "epoch": 0.6787247745813654, "grad_norm": 0.25390625, "learning_rate": 0.001002493675131664, "loss": 1.9976, "step": 25292 }, { "epoch": 0.6787516101331044, "grad_norm": 0.26953125, "learning_rate": 0.0010024728335960263, "loss": 2.1435, "step": 25293 }, { "epoch": 0.6787784456848432, "grad_norm": 0.26171875, "learning_rate": 0.001002451991177482, "loss": 2.125, "step": 25294 }, { "epoch": 0.6788052812365822, "grad_norm": 0.275390625, "learning_rate": 0.001002431147876076, "loss": 2.1564, "step": 25295 }, { "epoch": 0.6788321167883211, "grad_norm": 0.251953125, "learning_rate": 0.0010024103036918547, "loss": 2.0555, "step": 25296 }, { "epoch": 0.6788589523400601, "grad_norm": 0.2578125, "learning_rate": 0.0010023894586248636, "loss": 2.1449, "step": 25297 }, { "epoch": 0.6788857878917991, "grad_norm": 0.2734375, "learning_rate": 0.0010023686126751483, "loss": 2.1974, "step": 25298 }, { "epoch": 0.678912623443538, "grad_norm": 0.259765625, "learning_rate": 0.0010023477658427547, "loss": 2.1793, "step": 25299 }, { "epoch": 0.678939458995277, "grad_norm": 0.259765625, "learning_rate": 0.0010023269181277284, "loss": 2.0427, "step": 25300 }, { "epoch": 0.6789662945470158, "grad_norm": 0.267578125, "learning_rate": 0.0010023060695301153, "loss": 2.2057, "step": 25301 }, { "epoch": 0.6789931300987548, "grad_norm": 0.26953125, "learning_rate": 0.001002285220049961, "loss": 2.0618, "step": 25302 }, { "epoch": 0.6790199656504938, "grad_norm": 0.263671875, "learning_rate": 0.0010022643696873113, "loss": 2.1735, "step": 25303 }, { "epoch": 0.6790468012022327, "grad_norm": 0.259765625, "learning_rate": 0.001002243518442212, "loss": 2.1562, "step": 25304 }, { "epoch": 0.6790736367539717, "grad_norm": 0.271484375, "learning_rate": 0.0010022226663147086, "loss": 2.0628, "step": 25305 }, { "epoch": 0.6791004723057106, "grad_norm": 0.287109375, "learning_rate": 0.001002201813304847, "loss": 2.2764, "step": 25306 }, { "epoch": 0.6791273078574496, "grad_norm": 0.26953125, "learning_rate": 0.001002180959412673, "loss": 2.1956, "step": 25307 }, { "epoch": 0.6791541434091884, "grad_norm": 0.27734375, "learning_rate": 0.0010021601046382325, "loss": 2.1715, "step": 25308 }, { "epoch": 0.6791809789609274, "grad_norm": 0.279296875, "learning_rate": 0.001002139248981571, "loss": 2.1408, "step": 25309 }, { "epoch": 0.6792078145126664, "grad_norm": 0.28515625, "learning_rate": 0.001002118392442734, "loss": 2.1626, "step": 25310 }, { "epoch": 0.6792346500644053, "grad_norm": 0.275390625, "learning_rate": 0.0010020975350217677, "loss": 2.2313, "step": 25311 }, { "epoch": 0.6792614856161443, "grad_norm": 0.279296875, "learning_rate": 0.0010020766767187177, "loss": 2.1557, "step": 25312 }, { "epoch": 0.6792883211678832, "grad_norm": 0.265625, "learning_rate": 0.00100205581753363, "loss": 2.1395, "step": 25313 }, { "epoch": 0.6793151567196222, "grad_norm": 0.25390625, "learning_rate": 0.0010020349574665498, "loss": 2.1138, "step": 25314 }, { "epoch": 0.679341992271361, "grad_norm": 0.25390625, "learning_rate": 0.0010020140965175232, "loss": 2.0612, "step": 25315 }, { "epoch": 0.6793688278231, "grad_norm": 0.27734375, "learning_rate": 0.0010019932346865962, "loss": 2.2297, "step": 25316 }, { "epoch": 0.679395663374839, "grad_norm": 0.259765625, "learning_rate": 0.001001972371973814, "loss": 2.1496, "step": 25317 }, { "epoch": 0.6794224989265779, "grad_norm": 0.259765625, "learning_rate": 0.0010019515083792229, "loss": 2.1756, "step": 25318 }, { "epoch": 0.6794493344783169, "grad_norm": 0.259765625, "learning_rate": 0.0010019306439028686, "loss": 2.1053, "step": 25319 }, { "epoch": 0.6794761700300558, "grad_norm": 0.26171875, "learning_rate": 0.0010019097785447964, "loss": 2.1913, "step": 25320 }, { "epoch": 0.6795030055817948, "grad_norm": 0.2734375, "learning_rate": 0.0010018889123050525, "loss": 2.1912, "step": 25321 }, { "epoch": 0.6795298411335337, "grad_norm": 0.263671875, "learning_rate": 0.0010018680451836825, "loss": 2.1274, "step": 25322 }, { "epoch": 0.6795566766852726, "grad_norm": 0.259765625, "learning_rate": 0.0010018471771807323, "loss": 2.2265, "step": 25323 }, { "epoch": 0.6795835122370116, "grad_norm": 0.26171875, "learning_rate": 0.0010018263082962478, "loss": 2.078, "step": 25324 }, { "epoch": 0.6796103477887505, "grad_norm": 0.255859375, "learning_rate": 0.0010018054385302743, "loss": 2.1015, "step": 25325 }, { "epoch": 0.6796371833404895, "grad_norm": 0.25390625, "learning_rate": 0.001001784567882858, "loss": 2.0135, "step": 25326 }, { "epoch": 0.6796640188922284, "grad_norm": 0.2890625, "learning_rate": 0.0010017636963540447, "loss": 2.2021, "step": 25327 }, { "epoch": 0.6796908544439674, "grad_norm": 0.259765625, "learning_rate": 0.0010017428239438796, "loss": 2.0711, "step": 25328 }, { "epoch": 0.6797176899957064, "grad_norm": 0.259765625, "learning_rate": 0.0010017219506524093, "loss": 2.0783, "step": 25329 }, { "epoch": 0.6797445255474452, "grad_norm": 0.26171875, "learning_rate": 0.0010017010764796794, "loss": 2.0812, "step": 25330 }, { "epoch": 0.6797713610991842, "grad_norm": 0.2734375, "learning_rate": 0.0010016802014257353, "loss": 2.1136, "step": 25331 }, { "epoch": 0.6797981966509231, "grad_norm": 0.265625, "learning_rate": 0.001001659325490623, "loss": 2.0866, "step": 25332 }, { "epoch": 0.6798250322026621, "grad_norm": 0.265625, "learning_rate": 0.0010016384486743882, "loss": 2.1319, "step": 25333 }, { "epoch": 0.679851867754401, "grad_norm": 0.2578125, "learning_rate": 0.001001617570977077, "loss": 2.0006, "step": 25334 }, { "epoch": 0.67987870330614, "grad_norm": 0.26953125, "learning_rate": 0.0010015966923987351, "loss": 2.1179, "step": 25335 }, { "epoch": 0.679905538857879, "grad_norm": 0.267578125, "learning_rate": 0.0010015758129394078, "loss": 2.1973, "step": 25336 }, { "epoch": 0.6799323744096178, "grad_norm": 0.25390625, "learning_rate": 0.0010015549325991416, "loss": 2.1083, "step": 25337 }, { "epoch": 0.6799592099613568, "grad_norm": 0.259765625, "learning_rate": 0.001001534051377982, "loss": 2.0852, "step": 25338 }, { "epoch": 0.6799860455130957, "grad_norm": 0.279296875, "learning_rate": 0.0010015131692759748, "loss": 2.1813, "step": 25339 }, { "epoch": 0.6800128810648347, "grad_norm": 0.267578125, "learning_rate": 0.001001492286293166, "loss": 2.2219, "step": 25340 }, { "epoch": 0.6800397166165736, "grad_norm": 0.267578125, "learning_rate": 0.001001471402429601, "loss": 2.1987, "step": 25341 }, { "epoch": 0.6800665521683126, "grad_norm": 0.255859375, "learning_rate": 0.001001450517685326, "loss": 2.1712, "step": 25342 }, { "epoch": 0.6800933877200516, "grad_norm": 0.265625, "learning_rate": 0.0010014296320603865, "loss": 2.0558, "step": 25343 }, { "epoch": 0.6801202232717904, "grad_norm": 0.275390625, "learning_rate": 0.0010014087455548287, "loss": 2.1757, "step": 25344 }, { "epoch": 0.6801470588235294, "grad_norm": 0.263671875, "learning_rate": 0.0010013878581686978, "loss": 2.07, "step": 25345 }, { "epoch": 0.6801738943752683, "grad_norm": 0.263671875, "learning_rate": 0.0010013669699020405, "loss": 2.1915, "step": 25346 }, { "epoch": 0.6802007299270073, "grad_norm": 0.267578125, "learning_rate": 0.001001346080754902, "loss": 2.2595, "step": 25347 }, { "epoch": 0.6802275654787462, "grad_norm": 0.263671875, "learning_rate": 0.0010013251907273282, "loss": 2.1687, "step": 25348 }, { "epoch": 0.6802544010304852, "grad_norm": 0.259765625, "learning_rate": 0.0010013042998193653, "loss": 2.0616, "step": 25349 }, { "epoch": 0.6802812365822242, "grad_norm": 0.251953125, "learning_rate": 0.0010012834080310585, "loss": 2.1001, "step": 25350 }, { "epoch": 0.680308072133963, "grad_norm": 0.259765625, "learning_rate": 0.001001262515362454, "loss": 2.1243, "step": 25351 }, { "epoch": 0.680334907685702, "grad_norm": 0.265625, "learning_rate": 0.0010012416218135977, "loss": 2.1783, "step": 25352 }, { "epoch": 0.6803617432374409, "grad_norm": 0.251953125, "learning_rate": 0.0010012207273845353, "loss": 2.1404, "step": 25353 }, { "epoch": 0.6803885787891799, "grad_norm": 0.263671875, "learning_rate": 0.0010011998320753127, "loss": 2.1611, "step": 25354 }, { "epoch": 0.6804154143409189, "grad_norm": 0.259765625, "learning_rate": 0.0010011789358859756, "loss": 2.133, "step": 25355 }, { "epoch": 0.6804422498926578, "grad_norm": 0.255859375, "learning_rate": 0.00100115803881657, "loss": 2.119, "step": 25356 }, { "epoch": 0.6804690854443968, "grad_norm": 0.255859375, "learning_rate": 0.0010011371408671416, "loss": 2.0654, "step": 25357 }, { "epoch": 0.6804959209961357, "grad_norm": 0.255859375, "learning_rate": 0.0010011162420377365, "loss": 2.1059, "step": 25358 }, { "epoch": 0.6805227565478746, "grad_norm": 0.255859375, "learning_rate": 0.0010010953423284002, "loss": 2.1621, "step": 25359 }, { "epoch": 0.6805495920996135, "grad_norm": 0.25390625, "learning_rate": 0.0010010744417391787, "loss": 2.1315, "step": 25360 }, { "epoch": 0.6805764276513525, "grad_norm": 0.267578125, "learning_rate": 0.0010010535402701182, "loss": 2.1527, "step": 25361 }, { "epoch": 0.6806032632030915, "grad_norm": 0.25390625, "learning_rate": 0.001001032637921264, "loss": 2.0165, "step": 25362 }, { "epoch": 0.6806300987548304, "grad_norm": 0.259765625, "learning_rate": 0.0010010117346926622, "loss": 2.0973, "step": 25363 }, { "epoch": 0.6806569343065694, "grad_norm": 0.255859375, "learning_rate": 0.0010009908305843584, "loss": 2.1637, "step": 25364 }, { "epoch": 0.6806837698583083, "grad_norm": 0.267578125, "learning_rate": 0.001000969925596399, "loss": 2.1411, "step": 25365 }, { "epoch": 0.6807106054100472, "grad_norm": 0.2578125, "learning_rate": 0.0010009490197288296, "loss": 2.0625, "step": 25366 }, { "epoch": 0.6807374409617861, "grad_norm": 0.255859375, "learning_rate": 0.0010009281129816957, "loss": 2.0934, "step": 25367 }, { "epoch": 0.6807642765135251, "grad_norm": 0.2578125, "learning_rate": 0.0010009072053550438, "loss": 2.2051, "step": 25368 }, { "epoch": 0.6807911120652641, "grad_norm": 0.26171875, "learning_rate": 0.0010008862968489192, "loss": 2.0463, "step": 25369 }, { "epoch": 0.680817947617003, "grad_norm": 0.25390625, "learning_rate": 0.0010008653874633681, "loss": 2.072, "step": 25370 }, { "epoch": 0.680844783168742, "grad_norm": 0.28125, "learning_rate": 0.0010008444771984364, "loss": 2.1802, "step": 25371 }, { "epoch": 0.6808716187204809, "grad_norm": 0.26953125, "learning_rate": 0.0010008235660541698, "loss": 2.1076, "step": 25372 }, { "epoch": 0.6808984542722198, "grad_norm": 0.259765625, "learning_rate": 0.001000802654030614, "loss": 2.2273, "step": 25373 }, { "epoch": 0.6809252898239588, "grad_norm": 0.265625, "learning_rate": 0.0010007817411278152, "loss": 2.0716, "step": 25374 }, { "epoch": 0.6809521253756977, "grad_norm": 0.279296875, "learning_rate": 0.0010007608273458193, "loss": 2.1208, "step": 25375 }, { "epoch": 0.6809789609274367, "grad_norm": 0.259765625, "learning_rate": 0.0010007399126846718, "loss": 2.1045, "step": 25376 }, { "epoch": 0.6810057964791756, "grad_norm": 0.27734375, "learning_rate": 0.001000718997144419, "loss": 2.2559, "step": 25377 }, { "epoch": 0.6810326320309146, "grad_norm": 0.26171875, "learning_rate": 0.0010006980807251067, "loss": 2.2093, "step": 25378 }, { "epoch": 0.6810594675826535, "grad_norm": 0.267578125, "learning_rate": 0.0010006771634267805, "loss": 2.2151, "step": 25379 }, { "epoch": 0.6810863031343924, "grad_norm": 0.2578125, "learning_rate": 0.0010006562452494867, "loss": 2.1705, "step": 25380 }, { "epoch": 0.6811131386861314, "grad_norm": 0.259765625, "learning_rate": 0.001000635326193271, "loss": 2.21, "step": 25381 }, { "epoch": 0.6811399742378703, "grad_norm": 0.267578125, "learning_rate": 0.001000614406258179, "loss": 2.1718, "step": 25382 }, { "epoch": 0.6811668097896093, "grad_norm": 0.255859375, "learning_rate": 0.001000593485444257, "loss": 2.1644, "step": 25383 }, { "epoch": 0.6811936453413482, "grad_norm": 0.248046875, "learning_rate": 0.0010005725637515506, "loss": 2.0604, "step": 25384 }, { "epoch": 0.6812204808930872, "grad_norm": 0.251953125, "learning_rate": 0.0010005516411801059, "loss": 2.0465, "step": 25385 }, { "epoch": 0.6812473164448261, "grad_norm": 0.2578125, "learning_rate": 0.001000530717729969, "loss": 2.1405, "step": 25386 }, { "epoch": 0.681274151996565, "grad_norm": 0.265625, "learning_rate": 0.0010005097934011851, "loss": 2.1517, "step": 25387 }, { "epoch": 0.681300987548304, "grad_norm": 0.267578125, "learning_rate": 0.001000488868193801, "loss": 2.0958, "step": 25388 }, { "epoch": 0.6813278231000429, "grad_norm": 0.265625, "learning_rate": 0.0010004679421078619, "loss": 2.1024, "step": 25389 }, { "epoch": 0.6813546586517819, "grad_norm": 0.2578125, "learning_rate": 0.0010004470151434139, "loss": 2.1901, "step": 25390 }, { "epoch": 0.6813814942035208, "grad_norm": 0.267578125, "learning_rate": 0.0010004260873005032, "loss": 2.1745, "step": 25391 }, { "epoch": 0.6814083297552598, "grad_norm": 0.263671875, "learning_rate": 0.0010004051585791754, "loss": 2.1817, "step": 25392 }, { "epoch": 0.6814351653069987, "grad_norm": 0.2578125, "learning_rate": 0.0010003842289794764, "loss": 2.1608, "step": 25393 }, { "epoch": 0.6814620008587376, "grad_norm": 0.25390625, "learning_rate": 0.001000363298501452, "loss": 2.1713, "step": 25394 }, { "epoch": 0.6814888364104766, "grad_norm": 0.255859375, "learning_rate": 0.0010003423671451487, "loss": 2.0679, "step": 25395 }, { "epoch": 0.6815156719622155, "grad_norm": 0.265625, "learning_rate": 0.0010003214349106118, "loss": 2.2492, "step": 25396 }, { "epoch": 0.6815425075139545, "grad_norm": 0.26171875, "learning_rate": 0.0010003005017978875, "loss": 2.1624, "step": 25397 }, { "epoch": 0.6815693430656934, "grad_norm": 0.259765625, "learning_rate": 0.0010002795678070216, "loss": 2.2207, "step": 25398 }, { "epoch": 0.6815961786174324, "grad_norm": 0.263671875, "learning_rate": 0.0010002586329380602, "loss": 2.1097, "step": 25399 }, { "epoch": 0.6816230141691714, "grad_norm": 0.25390625, "learning_rate": 0.0010002376971910492, "loss": 2.0373, "step": 25400 }, { "epoch": 0.6816498497209103, "grad_norm": 0.2578125, "learning_rate": 0.0010002167605660344, "loss": 2.1594, "step": 25401 }, { "epoch": 0.6816766852726492, "grad_norm": 0.265625, "learning_rate": 0.0010001958230630617, "loss": 2.0436, "step": 25402 }, { "epoch": 0.6817035208243881, "grad_norm": 0.267578125, "learning_rate": 0.001000174884682177, "loss": 2.2027, "step": 25403 }, { "epoch": 0.6817303563761271, "grad_norm": 0.271484375, "learning_rate": 0.0010001539454234263, "loss": 2.2411, "step": 25404 }, { "epoch": 0.681757191927866, "grad_norm": 0.259765625, "learning_rate": 0.0010001330052868559, "loss": 2.0852, "step": 25405 }, { "epoch": 0.681784027479605, "grad_norm": 0.265625, "learning_rate": 0.0010001120642725114, "loss": 2.2791, "step": 25406 }, { "epoch": 0.681810863031344, "grad_norm": 0.265625, "learning_rate": 0.0010000911223804383, "loss": 2.2323, "step": 25407 }, { "epoch": 0.6818376985830829, "grad_norm": 0.265625, "learning_rate": 0.0010000701796106834, "loss": 2.2829, "step": 25408 }, { "epoch": 0.6818645341348218, "grad_norm": 0.267578125, "learning_rate": 0.0010000492359632921, "loss": 2.2301, "step": 25409 }, { "epoch": 0.6818913696865607, "grad_norm": 0.25, "learning_rate": 0.0010000282914383104, "loss": 2.1356, "step": 25410 }, { "epoch": 0.6819182052382997, "grad_norm": 0.25390625, "learning_rate": 0.0010000073460357843, "loss": 2.1094, "step": 25411 }, { "epoch": 0.6819450407900386, "grad_norm": 0.2578125, "learning_rate": 0.0009999863997557598, "loss": 2.1422, "step": 25412 }, { "epoch": 0.6819718763417776, "grad_norm": 0.2734375, "learning_rate": 0.0009999654525982829, "loss": 2.2113, "step": 25413 }, { "epoch": 0.6819987118935166, "grad_norm": 0.248046875, "learning_rate": 0.0009999445045633995, "loss": 2.2535, "step": 25414 }, { "epoch": 0.6820255474452555, "grad_norm": 0.265625, "learning_rate": 0.0009999235556511553, "loss": 2.3296, "step": 25415 }, { "epoch": 0.6820523829969944, "grad_norm": 0.263671875, "learning_rate": 0.0009999026058615966, "loss": 2.1646, "step": 25416 }, { "epoch": 0.6820792185487333, "grad_norm": 0.25390625, "learning_rate": 0.0009998816551947693, "loss": 2.0329, "step": 25417 }, { "epoch": 0.6821060541004723, "grad_norm": 0.265625, "learning_rate": 0.0009998607036507193, "loss": 2.0573, "step": 25418 }, { "epoch": 0.6821328896522112, "grad_norm": 0.263671875, "learning_rate": 0.0009998397512294924, "loss": 2.2188, "step": 25419 }, { "epoch": 0.6821597252039502, "grad_norm": 0.265625, "learning_rate": 0.000999818797931135, "loss": 2.1672, "step": 25420 }, { "epoch": 0.6821865607556892, "grad_norm": 0.25, "learning_rate": 0.0009997978437556925, "loss": 2.1221, "step": 25421 }, { "epoch": 0.682213396307428, "grad_norm": 0.263671875, "learning_rate": 0.0009997768887032112, "loss": 2.1797, "step": 25422 }, { "epoch": 0.682240231859167, "grad_norm": 0.265625, "learning_rate": 0.0009997559327737373, "loss": 2.1557, "step": 25423 }, { "epoch": 0.6822670674109059, "grad_norm": 0.26171875, "learning_rate": 0.0009997349759673162, "loss": 2.0705, "step": 25424 }, { "epoch": 0.6822939029626449, "grad_norm": 0.255859375, "learning_rate": 0.0009997140182839942, "loss": 2.1374, "step": 25425 }, { "epoch": 0.6823207385143839, "grad_norm": 0.265625, "learning_rate": 0.0009996930597238175, "loss": 2.2223, "step": 25426 }, { "epoch": 0.6823475740661228, "grad_norm": 0.259765625, "learning_rate": 0.0009996721002868314, "loss": 2.1013, "step": 25427 }, { "epoch": 0.6823744096178618, "grad_norm": 0.26171875, "learning_rate": 0.0009996511399730827, "loss": 2.09, "step": 25428 }, { "epoch": 0.6824012451696007, "grad_norm": 0.265625, "learning_rate": 0.0009996301787826168, "loss": 2.186, "step": 25429 }, { "epoch": 0.6824280807213396, "grad_norm": 0.265625, "learning_rate": 0.0009996092167154798, "loss": 2.1658, "step": 25430 }, { "epoch": 0.6824549162730785, "grad_norm": 0.26171875, "learning_rate": 0.0009995882537717179, "loss": 2.0534, "step": 25431 }, { "epoch": 0.6824817518248175, "grad_norm": 0.259765625, "learning_rate": 0.0009995672899513765, "loss": 2.0435, "step": 25432 }, { "epoch": 0.6825085873765565, "grad_norm": 0.251953125, "learning_rate": 0.0009995463252545026, "loss": 2.1415, "step": 25433 }, { "epoch": 0.6825354229282954, "grad_norm": 0.259765625, "learning_rate": 0.0009995253596811412, "loss": 2.1424, "step": 25434 }, { "epoch": 0.6825622584800344, "grad_norm": 0.26953125, "learning_rate": 0.000999504393231339, "loss": 2.0806, "step": 25435 }, { "epoch": 0.6825890940317733, "grad_norm": 0.287109375, "learning_rate": 0.0009994834259051415, "loss": 2.203, "step": 25436 }, { "epoch": 0.6826159295835122, "grad_norm": 0.287109375, "learning_rate": 0.000999462457702595, "loss": 2.2211, "step": 25437 }, { "epoch": 0.6826427651352511, "grad_norm": 0.26953125, "learning_rate": 0.0009994414886237452, "loss": 2.1847, "step": 25438 }, { "epoch": 0.6826696006869901, "grad_norm": 0.271484375, "learning_rate": 0.0009994205186686384, "loss": 2.3076, "step": 25439 }, { "epoch": 0.6826964362387291, "grad_norm": 0.26171875, "learning_rate": 0.0009993995478373206, "loss": 2.1664, "step": 25440 }, { "epoch": 0.682723271790468, "grad_norm": 0.2578125, "learning_rate": 0.0009993785761298376, "loss": 2.1911, "step": 25441 }, { "epoch": 0.682750107342207, "grad_norm": 0.263671875, "learning_rate": 0.0009993576035462358, "loss": 2.1777, "step": 25442 }, { "epoch": 0.6827769428939459, "grad_norm": 0.26953125, "learning_rate": 0.0009993366300865604, "loss": 2.287, "step": 25443 }, { "epoch": 0.6828037784456849, "grad_norm": 0.25390625, "learning_rate": 0.000999315655750858, "loss": 2.2059, "step": 25444 }, { "epoch": 0.6828306139974238, "grad_norm": 0.259765625, "learning_rate": 0.0009992946805391749, "loss": 2.154, "step": 25445 }, { "epoch": 0.6828574495491627, "grad_norm": 0.267578125, "learning_rate": 0.0009992737044515565, "loss": 2.2225, "step": 25446 }, { "epoch": 0.6828842851009017, "grad_norm": 0.27734375, "learning_rate": 0.000999252727488049, "loss": 2.2766, "step": 25447 }, { "epoch": 0.6829111206526406, "grad_norm": 0.263671875, "learning_rate": 0.0009992317496486987, "loss": 2.1362, "step": 25448 }, { "epoch": 0.6829379562043796, "grad_norm": 0.255859375, "learning_rate": 0.000999210770933551, "loss": 2.1841, "step": 25449 }, { "epoch": 0.6829647917561185, "grad_norm": 0.263671875, "learning_rate": 0.0009991897913426528, "loss": 2.2757, "step": 25450 }, { "epoch": 0.6829916273078575, "grad_norm": 0.26171875, "learning_rate": 0.0009991688108760494, "loss": 2.1639, "step": 25451 }, { "epoch": 0.6830184628595964, "grad_norm": 0.259765625, "learning_rate": 0.0009991478295337873, "loss": 2.1767, "step": 25452 }, { "epoch": 0.6830452984113353, "grad_norm": 0.2578125, "learning_rate": 0.000999126847315912, "loss": 2.2071, "step": 25453 }, { "epoch": 0.6830721339630743, "grad_norm": 0.267578125, "learning_rate": 0.0009991058642224698, "loss": 2.2046, "step": 25454 }, { "epoch": 0.6830989695148132, "grad_norm": 0.271484375, "learning_rate": 0.000999084880253507, "loss": 2.0978, "step": 25455 }, { "epoch": 0.6831258050665522, "grad_norm": 0.25, "learning_rate": 0.0009990638954090693, "loss": 2.1694, "step": 25456 }, { "epoch": 0.6831526406182911, "grad_norm": 0.2734375, "learning_rate": 0.0009990429096892026, "loss": 2.2655, "step": 25457 }, { "epoch": 0.68317947617003, "grad_norm": 0.265625, "learning_rate": 0.0009990219230939534, "loss": 2.1713, "step": 25458 }, { "epoch": 0.683206311721769, "grad_norm": 0.25, "learning_rate": 0.0009990009356233673, "loss": 2.1698, "step": 25459 }, { "epoch": 0.6832331472735079, "grad_norm": 0.26171875, "learning_rate": 0.0009989799472774907, "loss": 2.2292, "step": 25460 }, { "epoch": 0.6832599828252469, "grad_norm": 0.259765625, "learning_rate": 0.0009989589580563694, "loss": 2.0734, "step": 25461 }, { "epoch": 0.6832868183769858, "grad_norm": 0.2734375, "learning_rate": 0.0009989379679600497, "loss": 2.1405, "step": 25462 }, { "epoch": 0.6833136539287248, "grad_norm": 0.265625, "learning_rate": 0.0009989169769885771, "loss": 2.1693, "step": 25463 }, { "epoch": 0.6833404894804637, "grad_norm": 0.265625, "learning_rate": 0.0009988959851419984, "loss": 2.1399, "step": 25464 }, { "epoch": 0.6833673250322027, "grad_norm": 0.26171875, "learning_rate": 0.000998874992420359, "loss": 2.0503, "step": 25465 }, { "epoch": 0.6833941605839416, "grad_norm": 0.27734375, "learning_rate": 0.0009988539988237055, "loss": 2.2682, "step": 25466 }, { "epoch": 0.6834209961356805, "grad_norm": 0.283203125, "learning_rate": 0.0009988330043520833, "loss": 2.3113, "step": 25467 }, { "epoch": 0.6834478316874195, "grad_norm": 0.271484375, "learning_rate": 0.0009988120090055391, "loss": 2.1713, "step": 25468 }, { "epoch": 0.6834746672391584, "grad_norm": 0.2578125, "learning_rate": 0.0009987910127841186, "loss": 2.2176, "step": 25469 }, { "epoch": 0.6835015027908974, "grad_norm": 0.263671875, "learning_rate": 0.000998770015687868, "loss": 2.168, "step": 25470 }, { "epoch": 0.6835283383426364, "grad_norm": 0.263671875, "learning_rate": 0.0009987490177168332, "loss": 2.1891, "step": 25471 }, { "epoch": 0.6835551738943753, "grad_norm": 0.26171875, "learning_rate": 0.0009987280188710605, "loss": 2.2801, "step": 25472 }, { "epoch": 0.6835820094461142, "grad_norm": 0.25390625, "learning_rate": 0.0009987070191505958, "loss": 2.1736, "step": 25473 }, { "epoch": 0.6836088449978531, "grad_norm": 0.26171875, "learning_rate": 0.000998686018555485, "loss": 2.2274, "step": 25474 }, { "epoch": 0.6836356805495921, "grad_norm": 0.259765625, "learning_rate": 0.0009986650170857746, "loss": 2.0948, "step": 25475 }, { "epoch": 0.683662516101331, "grad_norm": 0.265625, "learning_rate": 0.0009986440147415105, "loss": 2.0547, "step": 25476 }, { "epoch": 0.68368935165307, "grad_norm": 0.263671875, "learning_rate": 0.0009986230115227384, "loss": 2.1814, "step": 25477 }, { "epoch": 0.683716187204809, "grad_norm": 0.263671875, "learning_rate": 0.0009986020074295048, "loss": 2.1409, "step": 25478 }, { "epoch": 0.6837430227565479, "grad_norm": 0.26953125, "learning_rate": 0.0009985810024618557, "loss": 2.1932, "step": 25479 }, { "epoch": 0.6837698583082868, "grad_norm": 0.26171875, "learning_rate": 0.0009985599966198372, "loss": 2.2575, "step": 25480 }, { "epoch": 0.6837966938600257, "grad_norm": 0.267578125, "learning_rate": 0.0009985389899034953, "loss": 2.2668, "step": 25481 }, { "epoch": 0.6838235294117647, "grad_norm": 0.26171875, "learning_rate": 0.0009985179823128762, "loss": 2.2132, "step": 25482 }, { "epoch": 0.6838503649635036, "grad_norm": 0.265625, "learning_rate": 0.0009984969738480259, "loss": 2.2728, "step": 25483 }, { "epoch": 0.6838772005152426, "grad_norm": 0.25, "learning_rate": 0.0009984759645089904, "loss": 2.154, "step": 25484 }, { "epoch": 0.6839040360669816, "grad_norm": 0.255859375, "learning_rate": 0.0009984549542958157, "loss": 2.2059, "step": 25485 }, { "epoch": 0.6839308716187205, "grad_norm": 0.255859375, "learning_rate": 0.0009984339432085483, "loss": 2.2045, "step": 25486 }, { "epoch": 0.6839577071704595, "grad_norm": 0.2470703125, "learning_rate": 0.000998412931247234, "loss": 2.1359, "step": 25487 }, { "epoch": 0.6839845427221983, "grad_norm": 0.25390625, "learning_rate": 0.000998391918411919, "loss": 2.1579, "step": 25488 }, { "epoch": 0.6840113782739373, "grad_norm": 0.26171875, "learning_rate": 0.0009983709047026491, "loss": 2.0623, "step": 25489 }, { "epoch": 0.6840382138256763, "grad_norm": 0.265625, "learning_rate": 0.0009983498901194707, "loss": 2.1478, "step": 25490 }, { "epoch": 0.6840650493774152, "grad_norm": 0.263671875, "learning_rate": 0.00099832887466243, "loss": 2.1973, "step": 25491 }, { "epoch": 0.6840918849291542, "grad_norm": 0.263671875, "learning_rate": 0.0009983078583315728, "loss": 2.066, "step": 25492 }, { "epoch": 0.6841187204808931, "grad_norm": 0.26171875, "learning_rate": 0.0009982868411269456, "loss": 2.147, "step": 25493 }, { "epoch": 0.684145556032632, "grad_norm": 0.2734375, "learning_rate": 0.0009982658230485938, "loss": 2.1139, "step": 25494 }, { "epoch": 0.6841723915843709, "grad_norm": 0.2578125, "learning_rate": 0.0009982448040965644, "loss": 2.1476, "step": 25495 }, { "epoch": 0.6841992271361099, "grad_norm": 0.2578125, "learning_rate": 0.0009982237842709027, "loss": 2.1245, "step": 25496 }, { "epoch": 0.6842260626878489, "grad_norm": 0.2578125, "learning_rate": 0.0009982027635716555, "loss": 2.1659, "step": 25497 }, { "epoch": 0.6842528982395878, "grad_norm": 0.279296875, "learning_rate": 0.0009981817419988683, "loss": 2.1521, "step": 25498 }, { "epoch": 0.6842797337913268, "grad_norm": 0.267578125, "learning_rate": 0.0009981607195525878, "loss": 2.2606, "step": 25499 }, { "epoch": 0.6843065693430657, "grad_norm": 0.27734375, "learning_rate": 0.0009981396962328594, "loss": 2.1996, "step": 25500 }, { "epoch": 0.6843334048948047, "grad_norm": 0.2578125, "learning_rate": 0.00099811867203973, "loss": 2.2359, "step": 25501 }, { "epoch": 0.6843602404465435, "grad_norm": 0.259765625, "learning_rate": 0.0009980976469732455, "loss": 2.2068, "step": 25502 }, { "epoch": 0.6843870759982825, "grad_norm": 0.263671875, "learning_rate": 0.0009980766210334515, "loss": 2.2491, "step": 25503 }, { "epoch": 0.6844139115500215, "grad_norm": 0.265625, "learning_rate": 0.0009980555942203945, "loss": 2.201, "step": 25504 }, { "epoch": 0.6844407471017604, "grad_norm": 0.26953125, "learning_rate": 0.000998034566534121, "loss": 2.2289, "step": 25505 }, { "epoch": 0.6844675826534994, "grad_norm": 0.279296875, "learning_rate": 0.0009980135379746766, "loss": 2.3098, "step": 25506 }, { "epoch": 0.6844944182052383, "grad_norm": 0.25390625, "learning_rate": 0.0009979925085421076, "loss": 2.2065, "step": 25507 }, { "epoch": 0.6845212537569773, "grad_norm": 0.2578125, "learning_rate": 0.00099797147823646, "loss": 2.0946, "step": 25508 }, { "epoch": 0.6845480893087161, "grad_norm": 0.255859375, "learning_rate": 0.0009979504470577802, "loss": 2.1406, "step": 25509 }, { "epoch": 0.6845749248604551, "grad_norm": 0.271484375, "learning_rate": 0.000997929415006114, "loss": 2.2029, "step": 25510 }, { "epoch": 0.6846017604121941, "grad_norm": 0.255859375, "learning_rate": 0.000997908382081508, "loss": 2.2295, "step": 25511 }, { "epoch": 0.684628595963933, "grad_norm": 0.251953125, "learning_rate": 0.0009978873482840078, "loss": 2.1577, "step": 25512 }, { "epoch": 0.684655431515672, "grad_norm": 0.275390625, "learning_rate": 0.00099786631361366, "loss": 2.2743, "step": 25513 }, { "epoch": 0.6846822670674109, "grad_norm": 0.2578125, "learning_rate": 0.0009978452780705107, "loss": 2.1097, "step": 25514 }, { "epoch": 0.6847091026191499, "grad_norm": 0.25390625, "learning_rate": 0.0009978242416546056, "loss": 2.1011, "step": 25515 }, { "epoch": 0.6847359381708888, "grad_norm": 0.25390625, "learning_rate": 0.0009978032043659914, "loss": 2.1584, "step": 25516 }, { "epoch": 0.6847627737226277, "grad_norm": 0.244140625, "learning_rate": 0.0009977821662047138, "loss": 2.0129, "step": 25517 }, { "epoch": 0.6847896092743667, "grad_norm": 0.26953125, "learning_rate": 0.0009977611271708192, "loss": 2.2929, "step": 25518 }, { "epoch": 0.6848164448261056, "grad_norm": 0.25, "learning_rate": 0.0009977400872643537, "loss": 2.0644, "step": 25519 }, { "epoch": 0.6848432803778446, "grad_norm": 0.26171875, "learning_rate": 0.0009977190464853637, "loss": 2.2079, "step": 25520 }, { "epoch": 0.6848701159295835, "grad_norm": 0.265625, "learning_rate": 0.000997698004833895, "loss": 2.2661, "step": 25521 }, { "epoch": 0.6848969514813225, "grad_norm": 0.271484375, "learning_rate": 0.000997676962309994, "loss": 2.2241, "step": 25522 }, { "epoch": 0.6849237870330614, "grad_norm": 0.267578125, "learning_rate": 0.0009976559189137065, "loss": 2.0313, "step": 25523 }, { "epoch": 0.6849506225848003, "grad_norm": 0.255859375, "learning_rate": 0.0009976348746450789, "loss": 2.099, "step": 25524 }, { "epoch": 0.6849774581365393, "grad_norm": 0.265625, "learning_rate": 0.0009976138295041575, "loss": 2.2107, "step": 25525 }, { "epoch": 0.6850042936882782, "grad_norm": 0.265625, "learning_rate": 0.0009975927834909883, "loss": 2.2535, "step": 25526 }, { "epoch": 0.6850311292400172, "grad_norm": 0.2490234375, "learning_rate": 0.0009975717366056173, "loss": 2.1535, "step": 25527 }, { "epoch": 0.6850579647917561, "grad_norm": 0.27734375, "learning_rate": 0.000997550688848091, "loss": 2.1645, "step": 25528 }, { "epoch": 0.6850848003434951, "grad_norm": 0.27734375, "learning_rate": 0.0009975296402184558, "loss": 2.2136, "step": 25529 }, { "epoch": 0.685111635895234, "grad_norm": 0.263671875, "learning_rate": 0.0009975085907167572, "loss": 2.1991, "step": 25530 }, { "epoch": 0.6851384714469729, "grad_norm": 0.267578125, "learning_rate": 0.0009974875403430418, "loss": 2.2999, "step": 25531 }, { "epoch": 0.6851653069987119, "grad_norm": 0.255859375, "learning_rate": 0.0009974664890973557, "loss": 2.2401, "step": 25532 }, { "epoch": 0.6851921425504508, "grad_norm": 0.26171875, "learning_rate": 0.000997445436979745, "loss": 2.2375, "step": 25533 }, { "epoch": 0.6852189781021898, "grad_norm": 0.267578125, "learning_rate": 0.0009974243839902558, "loss": 2.2102, "step": 25534 }, { "epoch": 0.6852458136539287, "grad_norm": 0.26953125, "learning_rate": 0.0009974033301289346, "loss": 2.229, "step": 25535 }, { "epoch": 0.6852726492056677, "grad_norm": 0.267578125, "learning_rate": 0.0009973822753958272, "loss": 2.1272, "step": 25536 }, { "epoch": 0.6852994847574067, "grad_norm": 0.267578125, "learning_rate": 0.0009973612197909803, "loss": 2.1648, "step": 25537 }, { "epoch": 0.6853263203091455, "grad_norm": 0.265625, "learning_rate": 0.0009973401633144397, "loss": 2.2384, "step": 25538 }, { "epoch": 0.6853531558608845, "grad_norm": 0.255859375, "learning_rate": 0.0009973191059662516, "loss": 2.2024, "step": 25539 }, { "epoch": 0.6853799914126234, "grad_norm": 0.2578125, "learning_rate": 0.0009972980477464624, "loss": 2.1518, "step": 25540 }, { "epoch": 0.6854068269643624, "grad_norm": 0.26171875, "learning_rate": 0.000997276988655118, "loss": 2.2616, "step": 25541 }, { "epoch": 0.6854336625161014, "grad_norm": 0.26171875, "learning_rate": 0.0009972559286922649, "loss": 2.1272, "step": 25542 }, { "epoch": 0.6854604980678403, "grad_norm": 0.255859375, "learning_rate": 0.000997234867857949, "loss": 2.2505, "step": 25543 }, { "epoch": 0.6854873336195793, "grad_norm": 0.26953125, "learning_rate": 0.0009972138061522169, "loss": 2.1852, "step": 25544 }, { "epoch": 0.6855141691713181, "grad_norm": 0.265625, "learning_rate": 0.0009971927435751145, "loss": 2.2515, "step": 25545 }, { "epoch": 0.6855410047230571, "grad_norm": 0.265625, "learning_rate": 0.000997171680126688, "loss": 2.1692, "step": 25546 }, { "epoch": 0.685567840274796, "grad_norm": 0.26171875, "learning_rate": 0.0009971506158069836, "loss": 2.2355, "step": 25547 }, { "epoch": 0.685594675826535, "grad_norm": 0.265625, "learning_rate": 0.0009971295506160478, "loss": 2.1854, "step": 25548 }, { "epoch": 0.685621511378274, "grad_norm": 0.259765625, "learning_rate": 0.0009971084845539267, "loss": 2.1543, "step": 25549 }, { "epoch": 0.6856483469300129, "grad_norm": 0.259765625, "learning_rate": 0.0009970874176206661, "loss": 2.1441, "step": 25550 }, { "epoch": 0.6856751824817519, "grad_norm": 0.26171875, "learning_rate": 0.0009970663498163126, "loss": 2.0931, "step": 25551 }, { "epoch": 0.6857020180334907, "grad_norm": 0.255859375, "learning_rate": 0.0009970452811409124, "loss": 2.1008, "step": 25552 }, { "epoch": 0.6857288535852297, "grad_norm": 0.255859375, "learning_rate": 0.0009970242115945119, "loss": 2.1799, "step": 25553 }, { "epoch": 0.6857556891369686, "grad_norm": 0.275390625, "learning_rate": 0.0009970031411771568, "loss": 2.1962, "step": 25554 }, { "epoch": 0.6857825246887076, "grad_norm": 0.259765625, "learning_rate": 0.0009969820698888937, "loss": 2.1081, "step": 25555 }, { "epoch": 0.6858093602404466, "grad_norm": 0.2578125, "learning_rate": 0.0009969609977297685, "loss": 2.1532, "step": 25556 }, { "epoch": 0.6858361957921855, "grad_norm": 0.265625, "learning_rate": 0.000996939924699828, "loss": 2.1521, "step": 25557 }, { "epoch": 0.6858630313439245, "grad_norm": 0.259765625, "learning_rate": 0.0009969188507991179, "loss": 2.1427, "step": 25558 }, { "epoch": 0.6858898668956633, "grad_norm": 0.26953125, "learning_rate": 0.0009968977760276847, "loss": 2.2055, "step": 25559 }, { "epoch": 0.6859167024474023, "grad_norm": 0.26953125, "learning_rate": 0.0009968767003855745, "loss": 2.1868, "step": 25560 }, { "epoch": 0.6859435379991413, "grad_norm": 0.271484375, "learning_rate": 0.0009968556238728334, "loss": 2.3273, "step": 25561 }, { "epoch": 0.6859703735508802, "grad_norm": 0.267578125, "learning_rate": 0.0009968345464895082, "loss": 2.1819, "step": 25562 }, { "epoch": 0.6859972091026192, "grad_norm": 0.267578125, "learning_rate": 0.0009968134682356445, "loss": 2.2817, "step": 25563 }, { "epoch": 0.6860240446543581, "grad_norm": 0.259765625, "learning_rate": 0.000996792389111289, "loss": 2.217, "step": 25564 }, { "epoch": 0.6860508802060971, "grad_norm": 0.267578125, "learning_rate": 0.0009967713091164874, "loss": 2.3529, "step": 25565 }, { "epoch": 0.6860777157578359, "grad_norm": 0.263671875, "learning_rate": 0.0009967502282512866, "loss": 2.1895, "step": 25566 }, { "epoch": 0.6861045513095749, "grad_norm": 0.267578125, "learning_rate": 0.0009967291465157325, "loss": 2.2702, "step": 25567 }, { "epoch": 0.6861313868613139, "grad_norm": 0.259765625, "learning_rate": 0.0009967080639098713, "loss": 2.2893, "step": 25568 }, { "epoch": 0.6861582224130528, "grad_norm": 0.271484375, "learning_rate": 0.000996686980433749, "loss": 2.2419, "step": 25569 }, { "epoch": 0.6861850579647918, "grad_norm": 0.255859375, "learning_rate": 0.0009966658960874127, "loss": 2.2085, "step": 25570 }, { "epoch": 0.6862118935165307, "grad_norm": 0.259765625, "learning_rate": 0.0009966448108709078, "loss": 2.2357, "step": 25571 }, { "epoch": 0.6862387290682697, "grad_norm": 0.251953125, "learning_rate": 0.000996623724784281, "loss": 2.3116, "step": 25572 }, { "epoch": 0.6862655646200085, "grad_norm": 0.26953125, "learning_rate": 0.0009966026378275784, "loss": 2.2085, "step": 25573 }, { "epoch": 0.6862924001717475, "grad_norm": 0.271484375, "learning_rate": 0.0009965815500008464, "loss": 2.2307, "step": 25574 }, { "epoch": 0.6863192357234865, "grad_norm": 0.251953125, "learning_rate": 0.000996560461304131, "loss": 2.1921, "step": 25575 }, { "epoch": 0.6863460712752254, "grad_norm": 0.255859375, "learning_rate": 0.0009965393717374785, "loss": 2.1072, "step": 25576 }, { "epoch": 0.6863729068269644, "grad_norm": 0.267578125, "learning_rate": 0.0009965182813009357, "loss": 2.134, "step": 25577 }, { "epoch": 0.6863997423787033, "grad_norm": 0.2578125, "learning_rate": 0.0009964971899945481, "loss": 2.1318, "step": 25578 }, { "epoch": 0.6864265779304423, "grad_norm": 0.265625, "learning_rate": 0.0009964760978183626, "loss": 2.1504, "step": 25579 }, { "epoch": 0.6864534134821811, "grad_norm": 0.26171875, "learning_rate": 0.000996455004772425, "loss": 2.1281, "step": 25580 }, { "epoch": 0.6864802490339201, "grad_norm": 0.259765625, "learning_rate": 0.0009964339108567816, "loss": 2.1766, "step": 25581 }, { "epoch": 0.6865070845856591, "grad_norm": 0.267578125, "learning_rate": 0.0009964128160714792, "loss": 2.1129, "step": 25582 }, { "epoch": 0.686533920137398, "grad_norm": 0.259765625, "learning_rate": 0.0009963917204165633, "loss": 2.14, "step": 25583 }, { "epoch": 0.686560755689137, "grad_norm": 0.26953125, "learning_rate": 0.000996370623892081, "loss": 2.1916, "step": 25584 }, { "epoch": 0.6865875912408759, "grad_norm": 0.2578125, "learning_rate": 0.0009963495264980779, "loss": 2.1901, "step": 25585 }, { "epoch": 0.6866144267926149, "grad_norm": 0.259765625, "learning_rate": 0.0009963284282346007, "loss": 2.0897, "step": 25586 }, { "epoch": 0.6866412623443539, "grad_norm": 0.265625, "learning_rate": 0.0009963073291016952, "loss": 2.1867, "step": 25587 }, { "epoch": 0.6866680978960927, "grad_norm": 0.2578125, "learning_rate": 0.0009962862290994084, "loss": 2.1661, "step": 25588 }, { "epoch": 0.6866949334478317, "grad_norm": 0.2734375, "learning_rate": 0.000996265128227786, "loss": 2.2663, "step": 25589 }, { "epoch": 0.6867217689995706, "grad_norm": 0.263671875, "learning_rate": 0.0009962440264868746, "loss": 2.194, "step": 25590 }, { "epoch": 0.6867486045513096, "grad_norm": 0.263671875, "learning_rate": 0.0009962229238767203, "loss": 2.2402, "step": 25591 }, { "epoch": 0.6867754401030485, "grad_norm": 0.255859375, "learning_rate": 0.0009962018203973696, "loss": 2.159, "step": 25592 }, { "epoch": 0.6868022756547875, "grad_norm": 0.26171875, "learning_rate": 0.0009961807160488684, "loss": 2.1825, "step": 25593 }, { "epoch": 0.6868291112065265, "grad_norm": 0.267578125, "learning_rate": 0.0009961596108312635, "loss": 2.2616, "step": 25594 }, { "epoch": 0.6868559467582653, "grad_norm": 0.265625, "learning_rate": 0.0009961385047446009, "loss": 2.1984, "step": 25595 }, { "epoch": 0.6868827823100043, "grad_norm": 0.263671875, "learning_rate": 0.000996117397788927, "loss": 2.2869, "step": 25596 }, { "epoch": 0.6869096178617432, "grad_norm": 0.267578125, "learning_rate": 0.000996096289964288, "loss": 2.2537, "step": 25597 }, { "epoch": 0.6869364534134822, "grad_norm": 0.25, "learning_rate": 0.0009960751812707303, "loss": 2.1293, "step": 25598 }, { "epoch": 0.6869632889652211, "grad_norm": 0.255859375, "learning_rate": 0.0009960540717083002, "loss": 2.2451, "step": 25599 }, { "epoch": 0.6869901245169601, "grad_norm": 0.25390625, "learning_rate": 0.000996032961277044, "loss": 2.1722, "step": 25600 }, { "epoch": 0.6870169600686991, "grad_norm": 0.265625, "learning_rate": 0.0009960118499770078, "loss": 2.1942, "step": 25601 }, { "epoch": 0.6870437956204379, "grad_norm": 0.2578125, "learning_rate": 0.0009959907378082385, "loss": 2.2294, "step": 25602 }, { "epoch": 0.6870706311721769, "grad_norm": 0.2451171875, "learning_rate": 0.0009959696247707817, "loss": 2.1246, "step": 25603 }, { "epoch": 0.6870974667239158, "grad_norm": 0.263671875, "learning_rate": 0.0009959485108646843, "loss": 2.1945, "step": 25604 }, { "epoch": 0.6871243022756548, "grad_norm": 0.259765625, "learning_rate": 0.0009959273960899922, "loss": 2.1268, "step": 25605 }, { "epoch": 0.6871511378273937, "grad_norm": 0.2578125, "learning_rate": 0.000995906280446752, "loss": 2.2002, "step": 25606 }, { "epoch": 0.6871779733791327, "grad_norm": 0.271484375, "learning_rate": 0.0009958851639350098, "loss": 2.2423, "step": 25607 }, { "epoch": 0.6872048089308717, "grad_norm": 0.26953125, "learning_rate": 0.0009958640465548118, "loss": 2.2416, "step": 25608 }, { "epoch": 0.6872316444826105, "grad_norm": 0.263671875, "learning_rate": 0.0009958429283062047, "loss": 2.1125, "step": 25609 }, { "epoch": 0.6872584800343495, "grad_norm": 0.263671875, "learning_rate": 0.0009958218091892347, "loss": 2.1979, "step": 25610 }, { "epoch": 0.6872853155860884, "grad_norm": 0.248046875, "learning_rate": 0.0009958006892039483, "loss": 2.0974, "step": 25611 }, { "epoch": 0.6873121511378274, "grad_norm": 0.267578125, "learning_rate": 0.0009957795683503915, "loss": 2.1726, "step": 25612 }, { "epoch": 0.6873389866895664, "grad_norm": 0.255859375, "learning_rate": 0.0009957584466286106, "loss": 2.2104, "step": 25613 }, { "epoch": 0.6873658222413053, "grad_norm": 0.2578125, "learning_rate": 0.0009957373240386523, "loss": 2.2216, "step": 25614 }, { "epoch": 0.6873926577930443, "grad_norm": 0.255859375, "learning_rate": 0.0009957162005805628, "loss": 2.0606, "step": 25615 }, { "epoch": 0.6874194933447831, "grad_norm": 0.265625, "learning_rate": 0.0009956950762543882, "loss": 2.1621, "step": 25616 }, { "epoch": 0.6874463288965221, "grad_norm": 0.2578125, "learning_rate": 0.000995673951060175, "loss": 2.1686, "step": 25617 }, { "epoch": 0.687473164448261, "grad_norm": 0.279296875, "learning_rate": 0.0009956528249979695, "loss": 2.2645, "step": 25618 }, { "epoch": 0.6875, "grad_norm": 0.2734375, "learning_rate": 0.0009956316980678183, "loss": 2.197, "step": 25619 }, { "epoch": 0.687526835551739, "grad_norm": 0.26953125, "learning_rate": 0.0009956105702697675, "loss": 2.2314, "step": 25620 }, { "epoch": 0.6875536711034779, "grad_norm": 0.26953125, "learning_rate": 0.0009955894416038634, "loss": 2.2465, "step": 25621 }, { "epoch": 0.6875805066552169, "grad_norm": 0.271484375, "learning_rate": 0.0009955683120701525, "loss": 2.2417, "step": 25622 }, { "epoch": 0.6876073422069557, "grad_norm": 0.28125, "learning_rate": 0.0009955471816686812, "loss": 2.1308, "step": 25623 }, { "epoch": 0.6876341777586947, "grad_norm": 0.271484375, "learning_rate": 0.0009955260503994958, "loss": 2.2214, "step": 25624 }, { "epoch": 0.6876610133104336, "grad_norm": 0.251953125, "learning_rate": 0.0009955049182626425, "loss": 2.2113, "step": 25625 }, { "epoch": 0.6876878488621726, "grad_norm": 0.263671875, "learning_rate": 0.0009954837852581679, "loss": 2.2302, "step": 25626 }, { "epoch": 0.6877146844139116, "grad_norm": 0.25390625, "learning_rate": 0.0009954626513861181, "loss": 2.176, "step": 25627 }, { "epoch": 0.6877415199656505, "grad_norm": 0.267578125, "learning_rate": 0.0009954415166465393, "loss": 2.2609, "step": 25628 }, { "epoch": 0.6877683555173895, "grad_norm": 0.26953125, "learning_rate": 0.0009954203810394785, "loss": 2.3005, "step": 25629 }, { "epoch": 0.6877951910691283, "grad_norm": 0.267578125, "learning_rate": 0.0009953992445649819, "loss": 2.2346, "step": 25630 }, { "epoch": 0.6878220266208673, "grad_norm": 0.263671875, "learning_rate": 0.0009953781072230953, "loss": 2.1467, "step": 25631 }, { "epoch": 0.6878488621726063, "grad_norm": 0.25, "learning_rate": 0.0009953569690138657, "loss": 2.1797, "step": 25632 }, { "epoch": 0.6878756977243452, "grad_norm": 0.25390625, "learning_rate": 0.000995335829937339, "loss": 2.148, "step": 25633 }, { "epoch": 0.6879025332760842, "grad_norm": 0.25390625, "learning_rate": 0.0009953146899935618, "loss": 2.1847, "step": 25634 }, { "epoch": 0.6879293688278231, "grad_norm": 0.26953125, "learning_rate": 0.0009952935491825809, "loss": 2.2216, "step": 25635 }, { "epoch": 0.6879562043795621, "grad_norm": 0.267578125, "learning_rate": 0.0009952724075044416, "loss": 2.21, "step": 25636 }, { "epoch": 0.6879830399313009, "grad_norm": 0.2578125, "learning_rate": 0.0009952512649591915, "loss": 2.1754, "step": 25637 }, { "epoch": 0.6880098754830399, "grad_norm": 0.248046875, "learning_rate": 0.000995230121546876, "loss": 2.0527, "step": 25638 }, { "epoch": 0.6880367110347789, "grad_norm": 0.271484375, "learning_rate": 0.0009952089772675423, "loss": 2.2023, "step": 25639 }, { "epoch": 0.6880635465865178, "grad_norm": 0.259765625, "learning_rate": 0.000995187832121236, "loss": 2.1858, "step": 25640 }, { "epoch": 0.6880903821382568, "grad_norm": 0.267578125, "learning_rate": 0.0009951666861080042, "loss": 2.2874, "step": 25641 }, { "epoch": 0.6881172176899957, "grad_norm": 0.263671875, "learning_rate": 0.0009951455392278926, "loss": 2.2643, "step": 25642 }, { "epoch": 0.6881440532417347, "grad_norm": 0.26171875, "learning_rate": 0.0009951243914809481, "loss": 2.1526, "step": 25643 }, { "epoch": 0.6881708887934735, "grad_norm": 0.271484375, "learning_rate": 0.000995103242867217, "loss": 2.2568, "step": 25644 }, { "epoch": 0.6881977243452125, "grad_norm": 0.251953125, "learning_rate": 0.0009950820933867456, "loss": 2.1284, "step": 25645 }, { "epoch": 0.6882245598969515, "grad_norm": 0.26171875, "learning_rate": 0.0009950609430395803, "loss": 2.175, "step": 25646 }, { "epoch": 0.6882513954486904, "grad_norm": 0.263671875, "learning_rate": 0.0009950397918257674, "loss": 2.156, "step": 25647 }, { "epoch": 0.6882782310004294, "grad_norm": 0.271484375, "learning_rate": 0.0009950186397453536, "loss": 2.1084, "step": 25648 }, { "epoch": 0.6883050665521683, "grad_norm": 0.26953125, "learning_rate": 0.000994997486798385, "loss": 2.2486, "step": 25649 }, { "epoch": 0.6883319021039073, "grad_norm": 0.26171875, "learning_rate": 0.0009949763329849082, "loss": 2.208, "step": 25650 }, { "epoch": 0.6883587376556461, "grad_norm": 0.26953125, "learning_rate": 0.0009949551783049694, "loss": 2.125, "step": 25651 }, { "epoch": 0.6883855732073851, "grad_norm": 0.26953125, "learning_rate": 0.0009949340227586154, "loss": 2.2304, "step": 25652 }, { "epoch": 0.6884124087591241, "grad_norm": 0.2734375, "learning_rate": 0.000994912866345892, "loss": 2.3362, "step": 25653 }, { "epoch": 0.688439244310863, "grad_norm": 0.26171875, "learning_rate": 0.0009948917090668462, "loss": 2.2168, "step": 25654 }, { "epoch": 0.688466079862602, "grad_norm": 0.263671875, "learning_rate": 0.0009948705509215242, "loss": 2.2518, "step": 25655 }, { "epoch": 0.6884929154143409, "grad_norm": 0.265625, "learning_rate": 0.0009948493919099723, "loss": 2.3099, "step": 25656 }, { "epoch": 0.6885197509660799, "grad_norm": 0.2734375, "learning_rate": 0.0009948282320322369, "loss": 2.2702, "step": 25657 }, { "epoch": 0.6885465865178189, "grad_norm": 0.263671875, "learning_rate": 0.0009948070712883646, "loss": 2.2164, "step": 25658 }, { "epoch": 0.6885734220695577, "grad_norm": 0.2470703125, "learning_rate": 0.0009947859096784018, "loss": 2.2185, "step": 25659 }, { "epoch": 0.6886002576212967, "grad_norm": 0.259765625, "learning_rate": 0.0009947647472023948, "loss": 2.1546, "step": 25660 }, { "epoch": 0.6886270931730356, "grad_norm": 0.2578125, "learning_rate": 0.0009947435838603901, "loss": 2.234, "step": 25661 }, { "epoch": 0.6886539287247746, "grad_norm": 0.265625, "learning_rate": 0.000994722419652434, "loss": 2.3027, "step": 25662 }, { "epoch": 0.6886807642765135, "grad_norm": 0.26171875, "learning_rate": 0.0009947012545785732, "loss": 2.2162, "step": 25663 }, { "epoch": 0.6887075998282525, "grad_norm": 0.263671875, "learning_rate": 0.000994680088638854, "loss": 2.1597, "step": 25664 }, { "epoch": 0.6887344353799915, "grad_norm": 0.2734375, "learning_rate": 0.0009946589218333228, "loss": 2.3321, "step": 25665 }, { "epoch": 0.6887612709317303, "grad_norm": 0.275390625, "learning_rate": 0.0009946377541620258, "loss": 2.251, "step": 25666 }, { "epoch": 0.6887881064834693, "grad_norm": 0.265625, "learning_rate": 0.0009946165856250099, "loss": 2.1201, "step": 25667 }, { "epoch": 0.6888149420352082, "grad_norm": 0.255859375, "learning_rate": 0.0009945954162223212, "loss": 2.1037, "step": 25668 }, { "epoch": 0.6888417775869472, "grad_norm": 0.265625, "learning_rate": 0.0009945742459540063, "loss": 2.2873, "step": 25669 }, { "epoch": 0.6888686131386861, "grad_norm": 0.2470703125, "learning_rate": 0.0009945530748201115, "loss": 2.1786, "step": 25670 }, { "epoch": 0.6888954486904251, "grad_norm": 0.2451171875, "learning_rate": 0.0009945319028206836, "loss": 2.1257, "step": 25671 }, { "epoch": 0.6889222842421641, "grad_norm": 0.263671875, "learning_rate": 0.0009945107299557685, "loss": 2.0977, "step": 25672 }, { "epoch": 0.6889491197939029, "grad_norm": 0.26171875, "learning_rate": 0.000994489556225413, "loss": 2.0697, "step": 25673 }, { "epoch": 0.6889759553456419, "grad_norm": 0.267578125, "learning_rate": 0.0009944683816296635, "loss": 2.1692, "step": 25674 }, { "epoch": 0.6890027908973808, "grad_norm": 0.25, "learning_rate": 0.0009944472061685665, "loss": 2.17, "step": 25675 }, { "epoch": 0.6890296264491198, "grad_norm": 0.26171875, "learning_rate": 0.000994426029842168, "loss": 2.0936, "step": 25676 }, { "epoch": 0.6890564620008587, "grad_norm": 0.263671875, "learning_rate": 0.0009944048526505152, "loss": 2.0879, "step": 25677 }, { "epoch": 0.6890832975525977, "grad_norm": 0.27734375, "learning_rate": 0.000994383674593654, "loss": 2.2749, "step": 25678 }, { "epoch": 0.6891101331043367, "grad_norm": 0.28125, "learning_rate": 0.0009943624956716315, "loss": 2.3533, "step": 25679 }, { "epoch": 0.6891369686560755, "grad_norm": 0.26953125, "learning_rate": 0.0009943413158844932, "loss": 2.2348, "step": 25680 }, { "epoch": 0.6891638042078145, "grad_norm": 0.27734375, "learning_rate": 0.0009943201352322864, "loss": 2.1495, "step": 25681 }, { "epoch": 0.6891906397595534, "grad_norm": 0.26953125, "learning_rate": 0.000994298953715057, "loss": 2.2867, "step": 25682 }, { "epoch": 0.6892174753112924, "grad_norm": 0.2578125, "learning_rate": 0.0009942777713328518, "loss": 2.2522, "step": 25683 }, { "epoch": 0.6892443108630314, "grad_norm": 0.259765625, "learning_rate": 0.000994256588085717, "loss": 2.2182, "step": 25684 }, { "epoch": 0.6892711464147703, "grad_norm": 0.2431640625, "learning_rate": 0.0009942354039736995, "loss": 2.2486, "step": 25685 }, { "epoch": 0.6892979819665093, "grad_norm": 0.251953125, "learning_rate": 0.0009942142189968454, "loss": 2.2161, "step": 25686 }, { "epoch": 0.6893248175182481, "grad_norm": 0.255859375, "learning_rate": 0.0009941930331552012, "loss": 2.1405, "step": 25687 }, { "epoch": 0.6893516530699871, "grad_norm": 0.26953125, "learning_rate": 0.0009941718464488137, "loss": 2.3374, "step": 25688 }, { "epoch": 0.689378488621726, "grad_norm": 0.259765625, "learning_rate": 0.0009941506588777287, "loss": 2.2784, "step": 25689 }, { "epoch": 0.689405324173465, "grad_norm": 0.255859375, "learning_rate": 0.0009941294704419936, "loss": 2.1898, "step": 25690 }, { "epoch": 0.689432159725204, "grad_norm": 0.25, "learning_rate": 0.000994108281141654, "loss": 2.11, "step": 25691 }, { "epoch": 0.6894589952769429, "grad_norm": 0.26171875, "learning_rate": 0.000994087090976757, "loss": 2.2416, "step": 25692 }, { "epoch": 0.6894858308286819, "grad_norm": 0.251953125, "learning_rate": 0.000994065899947349, "loss": 2.0896, "step": 25693 }, { "epoch": 0.6895126663804207, "grad_norm": 0.25390625, "learning_rate": 0.000994044708053476, "loss": 2.2061, "step": 25694 }, { "epoch": 0.6895395019321597, "grad_norm": 0.248046875, "learning_rate": 0.0009940235152951851, "loss": 2.1924, "step": 25695 }, { "epoch": 0.6895663374838986, "grad_norm": 0.265625, "learning_rate": 0.0009940023216725225, "loss": 2.1338, "step": 25696 }, { "epoch": 0.6895931730356376, "grad_norm": 0.265625, "learning_rate": 0.0009939811271855346, "loss": 2.2084, "step": 25697 }, { "epoch": 0.6896200085873766, "grad_norm": 0.265625, "learning_rate": 0.000993959931834268, "loss": 2.2419, "step": 25698 }, { "epoch": 0.6896468441391155, "grad_norm": 0.259765625, "learning_rate": 0.0009939387356187692, "loss": 2.1937, "step": 25699 }, { "epoch": 0.6896736796908545, "grad_norm": 0.251953125, "learning_rate": 0.0009939175385390848, "loss": 2.1717, "step": 25700 }, { "epoch": 0.6897005152425933, "grad_norm": 0.26171875, "learning_rate": 0.0009938963405952614, "loss": 2.1178, "step": 25701 }, { "epoch": 0.6897273507943323, "grad_norm": 0.265625, "learning_rate": 0.000993875141787345, "loss": 2.1472, "step": 25702 }, { "epoch": 0.6897541863460713, "grad_norm": 0.26171875, "learning_rate": 0.0009938539421153827, "loss": 2.1056, "step": 25703 }, { "epoch": 0.6897810218978102, "grad_norm": 0.263671875, "learning_rate": 0.0009938327415794205, "loss": 2.1877, "step": 25704 }, { "epoch": 0.6898078574495492, "grad_norm": 0.259765625, "learning_rate": 0.0009938115401795053, "loss": 2.0696, "step": 25705 }, { "epoch": 0.6898346930012881, "grad_norm": 0.263671875, "learning_rate": 0.0009937903379156833, "loss": 2.2206, "step": 25706 }, { "epoch": 0.6898615285530271, "grad_norm": 0.26171875, "learning_rate": 0.000993769134788001, "loss": 2.0634, "step": 25707 }, { "epoch": 0.689888364104766, "grad_norm": 0.265625, "learning_rate": 0.0009937479307965055, "loss": 2.1941, "step": 25708 }, { "epoch": 0.6899151996565049, "grad_norm": 0.267578125, "learning_rate": 0.0009937267259412426, "loss": 2.1152, "step": 25709 }, { "epoch": 0.6899420352082439, "grad_norm": 0.265625, "learning_rate": 0.0009937055202222593, "loss": 2.2412, "step": 25710 }, { "epoch": 0.6899688707599828, "grad_norm": 0.259765625, "learning_rate": 0.0009936843136396018, "loss": 2.3039, "step": 25711 }, { "epoch": 0.6899957063117218, "grad_norm": 0.26953125, "learning_rate": 0.0009936631061933166, "loss": 2.1486, "step": 25712 }, { "epoch": 0.6900225418634607, "grad_norm": 0.259765625, "learning_rate": 0.0009936418978834503, "loss": 2.2191, "step": 25713 }, { "epoch": 0.6900493774151997, "grad_norm": 0.259765625, "learning_rate": 0.0009936206887100499, "loss": 2.2686, "step": 25714 }, { "epoch": 0.6900762129669386, "grad_norm": 0.255859375, "learning_rate": 0.0009935994786731612, "loss": 2.2294, "step": 25715 }, { "epoch": 0.6901030485186775, "grad_norm": 0.255859375, "learning_rate": 0.0009935782677728312, "loss": 2.2501, "step": 25716 }, { "epoch": 0.6901298840704165, "grad_norm": 0.263671875, "learning_rate": 0.0009935570560091062, "loss": 2.2892, "step": 25717 }, { "epoch": 0.6901567196221554, "grad_norm": 0.2578125, "learning_rate": 0.0009935358433820326, "loss": 2.1149, "step": 25718 }, { "epoch": 0.6901835551738944, "grad_norm": 0.2734375, "learning_rate": 0.0009935146298916575, "loss": 2.207, "step": 25719 }, { "epoch": 0.6902103907256333, "grad_norm": 0.271484375, "learning_rate": 0.000993493415538027, "loss": 2.2862, "step": 25720 }, { "epoch": 0.6902372262773723, "grad_norm": 0.251953125, "learning_rate": 0.0009934722003211876, "loss": 2.1806, "step": 25721 }, { "epoch": 0.6902640618291112, "grad_norm": 0.259765625, "learning_rate": 0.000993450984241186, "loss": 2.2107, "step": 25722 }, { "epoch": 0.6902908973808501, "grad_norm": 0.2578125, "learning_rate": 0.0009934297672980687, "loss": 2.0899, "step": 25723 }, { "epoch": 0.6903177329325891, "grad_norm": 0.2734375, "learning_rate": 0.000993408549491882, "loss": 2.2648, "step": 25724 }, { "epoch": 0.690344568484328, "grad_norm": 0.255859375, "learning_rate": 0.000993387330822673, "loss": 2.1494, "step": 25725 }, { "epoch": 0.690371404036067, "grad_norm": 0.259765625, "learning_rate": 0.0009933661112904878, "loss": 2.1591, "step": 25726 }, { "epoch": 0.6903982395878059, "grad_norm": 0.265625, "learning_rate": 0.000993344890895373, "loss": 2.2297, "step": 25727 }, { "epoch": 0.6904250751395449, "grad_norm": 0.255859375, "learning_rate": 0.0009933236696373757, "loss": 2.1336, "step": 25728 }, { "epoch": 0.6904519106912839, "grad_norm": 0.2451171875, "learning_rate": 0.0009933024475165413, "loss": 2.1181, "step": 25729 }, { "epoch": 0.6904787462430227, "grad_norm": 0.26953125, "learning_rate": 0.0009932812245329176, "loss": 2.2426, "step": 25730 }, { "epoch": 0.6905055817947617, "grad_norm": 0.271484375, "learning_rate": 0.0009932600006865504, "loss": 2.1317, "step": 25731 }, { "epoch": 0.6905324173465006, "grad_norm": 0.255859375, "learning_rate": 0.0009932387759774863, "loss": 2.2315, "step": 25732 }, { "epoch": 0.6905592528982396, "grad_norm": 0.2578125, "learning_rate": 0.0009932175504057722, "loss": 2.1217, "step": 25733 }, { "epoch": 0.6905860884499785, "grad_norm": 0.251953125, "learning_rate": 0.0009931963239714545, "loss": 2.1554, "step": 25734 }, { "epoch": 0.6906129240017175, "grad_norm": 0.251953125, "learning_rate": 0.0009931750966745796, "loss": 2.1997, "step": 25735 }, { "epoch": 0.6906397595534565, "grad_norm": 0.25390625, "learning_rate": 0.0009931538685151943, "loss": 2.194, "step": 25736 }, { "epoch": 0.6906665951051953, "grad_norm": 0.259765625, "learning_rate": 0.000993132639493345, "loss": 2.2781, "step": 25737 }, { "epoch": 0.6906934306569343, "grad_norm": 0.263671875, "learning_rate": 0.0009931114096090785, "loss": 2.182, "step": 25738 }, { "epoch": 0.6907202662086732, "grad_norm": 0.263671875, "learning_rate": 0.0009930901788624412, "loss": 2.221, "step": 25739 }, { "epoch": 0.6907471017604122, "grad_norm": 0.271484375, "learning_rate": 0.0009930689472534796, "loss": 2.2655, "step": 25740 }, { "epoch": 0.6907739373121511, "grad_norm": 0.265625, "learning_rate": 0.0009930477147822403, "loss": 2.2203, "step": 25741 }, { "epoch": 0.6908007728638901, "grad_norm": 0.267578125, "learning_rate": 0.0009930264814487701, "loss": 2.2381, "step": 25742 }, { "epoch": 0.6908276084156291, "grad_norm": 0.263671875, "learning_rate": 0.0009930052472531155, "loss": 2.2003, "step": 25743 }, { "epoch": 0.690854443967368, "grad_norm": 0.248046875, "learning_rate": 0.0009929840121953228, "loss": 2.2, "step": 25744 }, { "epoch": 0.6908812795191069, "grad_norm": 0.259765625, "learning_rate": 0.000992962776275439, "loss": 2.2382, "step": 25745 }, { "epoch": 0.6909081150708458, "grad_norm": 0.251953125, "learning_rate": 0.0009929415394935104, "loss": 2.3252, "step": 25746 }, { "epoch": 0.6909349506225848, "grad_norm": 0.259765625, "learning_rate": 0.0009929203018495835, "loss": 2.2279, "step": 25747 }, { "epoch": 0.6909617861743237, "grad_norm": 0.2451171875, "learning_rate": 0.0009928990633437053, "loss": 2.2384, "step": 25748 }, { "epoch": 0.6909886217260627, "grad_norm": 0.259765625, "learning_rate": 0.000992877823975922, "loss": 2.2564, "step": 25749 }, { "epoch": 0.6910154572778017, "grad_norm": 0.2578125, "learning_rate": 0.0009928565837462803, "loss": 2.2294, "step": 25750 }, { "epoch": 0.6910422928295405, "grad_norm": 0.263671875, "learning_rate": 0.000992835342654827, "loss": 2.2078, "step": 25751 }, { "epoch": 0.6910691283812795, "grad_norm": 0.26171875, "learning_rate": 0.0009928141007016086, "loss": 2.2088, "step": 25752 }, { "epoch": 0.6910959639330184, "grad_norm": 0.255859375, "learning_rate": 0.0009927928578866714, "loss": 2.1522, "step": 25753 }, { "epoch": 0.6911227994847574, "grad_norm": 0.2578125, "learning_rate": 0.0009927716142100623, "loss": 2.1836, "step": 25754 }, { "epoch": 0.6911496350364964, "grad_norm": 0.251953125, "learning_rate": 0.0009927503696718278, "loss": 2.1582, "step": 25755 }, { "epoch": 0.6911764705882353, "grad_norm": 0.26171875, "learning_rate": 0.0009927291242720147, "loss": 2.2632, "step": 25756 }, { "epoch": 0.6912033061399743, "grad_norm": 0.251953125, "learning_rate": 0.0009927078780106692, "loss": 2.1526, "step": 25757 }, { "epoch": 0.6912301416917132, "grad_norm": 0.248046875, "learning_rate": 0.0009926866308878383, "loss": 2.1602, "step": 25758 }, { "epoch": 0.6912569772434521, "grad_norm": 0.25390625, "learning_rate": 0.0009926653829035684, "loss": 2.1533, "step": 25759 }, { "epoch": 0.691283812795191, "grad_norm": 0.259765625, "learning_rate": 0.0009926441340579063, "loss": 2.1984, "step": 25760 }, { "epoch": 0.69131064834693, "grad_norm": 0.263671875, "learning_rate": 0.0009926228843508986, "loss": 2.2718, "step": 25761 }, { "epoch": 0.691337483898669, "grad_norm": 0.263671875, "learning_rate": 0.0009926016337825914, "loss": 2.1534, "step": 25762 }, { "epoch": 0.6913643194504079, "grad_norm": 0.26953125, "learning_rate": 0.0009925803823530321, "loss": 2.2394, "step": 25763 }, { "epoch": 0.6913911550021469, "grad_norm": 0.2578125, "learning_rate": 0.0009925591300622666, "loss": 2.0799, "step": 25764 }, { "epoch": 0.6914179905538858, "grad_norm": 0.2734375, "learning_rate": 0.000992537876910342, "loss": 2.1659, "step": 25765 }, { "epoch": 0.6914448261056247, "grad_norm": 0.265625, "learning_rate": 0.000992516622897305, "loss": 2.2062, "step": 25766 }, { "epoch": 0.6914716616573636, "grad_norm": 0.265625, "learning_rate": 0.000992495368023202, "loss": 2.202, "step": 25767 }, { "epoch": 0.6914984972091026, "grad_norm": 0.26171875, "learning_rate": 0.0009924741122880791, "loss": 2.2715, "step": 25768 }, { "epoch": 0.6915253327608416, "grad_norm": 0.265625, "learning_rate": 0.0009924528556919839, "loss": 2.2252, "step": 25769 }, { "epoch": 0.6915521683125805, "grad_norm": 0.263671875, "learning_rate": 0.0009924315982349627, "loss": 2.1872, "step": 25770 }, { "epoch": 0.6915790038643195, "grad_norm": 0.263671875, "learning_rate": 0.0009924103399170617, "loss": 2.3128, "step": 25771 }, { "epoch": 0.6916058394160584, "grad_norm": 0.2578125, "learning_rate": 0.0009923890807383281, "loss": 2.2122, "step": 25772 }, { "epoch": 0.6916326749677973, "grad_norm": 0.255859375, "learning_rate": 0.0009923678206988083, "loss": 2.2862, "step": 25773 }, { "epoch": 0.6916595105195363, "grad_norm": 0.251953125, "learning_rate": 0.0009923465597985487, "loss": 2.2251, "step": 25774 }, { "epoch": 0.6916863460712752, "grad_norm": 0.251953125, "learning_rate": 0.0009923252980375964, "loss": 2.2852, "step": 25775 }, { "epoch": 0.6917131816230142, "grad_norm": 0.267578125, "learning_rate": 0.0009923040354159977, "loss": 2.3236, "step": 25776 }, { "epoch": 0.6917400171747531, "grad_norm": 0.2578125, "learning_rate": 0.0009922827719337994, "loss": 2.1972, "step": 25777 }, { "epoch": 0.6917668527264921, "grad_norm": 0.259765625, "learning_rate": 0.0009922615075910483, "loss": 2.3485, "step": 25778 }, { "epoch": 0.691793688278231, "grad_norm": 0.2470703125, "learning_rate": 0.0009922402423877905, "loss": 2.2264, "step": 25779 }, { "epoch": 0.69182052382997, "grad_norm": 0.26171875, "learning_rate": 0.0009922189763240732, "loss": 2.1841, "step": 25780 }, { "epoch": 0.6918473593817089, "grad_norm": 0.2490234375, "learning_rate": 0.0009921977093999429, "loss": 2.1545, "step": 25781 }, { "epoch": 0.6918741949334478, "grad_norm": 0.2578125, "learning_rate": 0.000992176441615446, "loss": 2.111, "step": 25782 }, { "epoch": 0.6919010304851868, "grad_norm": 0.259765625, "learning_rate": 0.0009921551729706298, "loss": 2.2422, "step": 25783 }, { "epoch": 0.6919278660369257, "grad_norm": 0.2578125, "learning_rate": 0.00099213390346554, "loss": 2.1149, "step": 25784 }, { "epoch": 0.6919547015886647, "grad_norm": 0.259765625, "learning_rate": 0.000992112633100224, "loss": 2.221, "step": 25785 }, { "epoch": 0.6919815371404036, "grad_norm": 0.259765625, "learning_rate": 0.0009920913618747284, "loss": 2.2627, "step": 25786 }, { "epoch": 0.6920083726921425, "grad_norm": 0.267578125, "learning_rate": 0.0009920700897890998, "loss": 2.1078, "step": 25787 }, { "epoch": 0.6920352082438815, "grad_norm": 0.2578125, "learning_rate": 0.0009920488168433843, "loss": 2.2, "step": 25788 }, { "epoch": 0.6920620437956204, "grad_norm": 0.263671875, "learning_rate": 0.0009920275430376295, "loss": 2.1713, "step": 25789 }, { "epoch": 0.6920888793473594, "grad_norm": 0.25390625, "learning_rate": 0.0009920062683718813, "loss": 2.1399, "step": 25790 }, { "epoch": 0.6921157148990983, "grad_norm": 0.2578125, "learning_rate": 0.0009919849928461868, "loss": 2.185, "step": 25791 }, { "epoch": 0.6921425504508373, "grad_norm": 0.267578125, "learning_rate": 0.0009919637164605925, "loss": 2.1268, "step": 25792 }, { "epoch": 0.6921693860025762, "grad_norm": 0.24609375, "learning_rate": 0.0009919424392151452, "loss": 2.1124, "step": 25793 }, { "epoch": 0.6921962215543151, "grad_norm": 0.251953125, "learning_rate": 0.0009919211611098913, "loss": 2.0787, "step": 25794 }, { "epoch": 0.6922230571060541, "grad_norm": 0.259765625, "learning_rate": 0.000991899882144878, "loss": 2.1612, "step": 25795 }, { "epoch": 0.692249892657793, "grad_norm": 0.267578125, "learning_rate": 0.0009918786023201513, "loss": 2.1954, "step": 25796 }, { "epoch": 0.692276728209532, "grad_norm": 0.263671875, "learning_rate": 0.0009918573216357587, "loss": 2.259, "step": 25797 }, { "epoch": 0.6923035637612709, "grad_norm": 0.26171875, "learning_rate": 0.0009918360400917462, "loss": 2.2415, "step": 25798 }, { "epoch": 0.6923303993130099, "grad_norm": 0.259765625, "learning_rate": 0.0009918147576881604, "loss": 2.2392, "step": 25799 }, { "epoch": 0.6923572348647489, "grad_norm": 0.265625, "learning_rate": 0.0009917934744250487, "loss": 2.1977, "step": 25800 }, { "epoch": 0.6923840704164878, "grad_norm": 0.26171875, "learning_rate": 0.0009917721903024573, "loss": 2.2524, "step": 25801 }, { "epoch": 0.6924109059682267, "grad_norm": 0.259765625, "learning_rate": 0.0009917509053204328, "loss": 2.1318, "step": 25802 }, { "epoch": 0.6924377415199656, "grad_norm": 0.26171875, "learning_rate": 0.0009917296194790221, "loss": 2.294, "step": 25803 }, { "epoch": 0.6924645770717046, "grad_norm": 0.2734375, "learning_rate": 0.000991708332778272, "loss": 2.2422, "step": 25804 }, { "epoch": 0.6924914126234435, "grad_norm": 0.26171875, "learning_rate": 0.0009916870452182291, "loss": 2.21, "step": 25805 }, { "epoch": 0.6925182481751825, "grad_norm": 0.263671875, "learning_rate": 0.0009916657567989398, "loss": 2.2663, "step": 25806 }, { "epoch": 0.6925450837269215, "grad_norm": 0.2470703125, "learning_rate": 0.0009916444675204512, "loss": 2.1026, "step": 25807 }, { "epoch": 0.6925719192786604, "grad_norm": 0.2578125, "learning_rate": 0.00099162317738281, "loss": 2.1978, "step": 25808 }, { "epoch": 0.6925987548303993, "grad_norm": 0.2578125, "learning_rate": 0.0009916018863860628, "loss": 2.2659, "step": 25809 }, { "epoch": 0.6926255903821382, "grad_norm": 0.2578125, "learning_rate": 0.0009915805945302561, "loss": 2.2542, "step": 25810 }, { "epoch": 0.6926524259338772, "grad_norm": 0.24609375, "learning_rate": 0.0009915593018154367, "loss": 2.195, "step": 25811 }, { "epoch": 0.6926792614856161, "grad_norm": 0.251953125, "learning_rate": 0.0009915380082416516, "loss": 2.1928, "step": 25812 }, { "epoch": 0.6927060970373551, "grad_norm": 0.259765625, "learning_rate": 0.000991516713808947, "loss": 2.2902, "step": 25813 }, { "epoch": 0.6927329325890941, "grad_norm": 0.251953125, "learning_rate": 0.0009914954185173703, "loss": 2.2719, "step": 25814 }, { "epoch": 0.692759768140833, "grad_norm": 0.267578125, "learning_rate": 0.0009914741223669675, "loss": 2.1896, "step": 25815 }, { "epoch": 0.692786603692572, "grad_norm": 0.2490234375, "learning_rate": 0.000991452825357786, "loss": 2.1696, "step": 25816 }, { "epoch": 0.6928134392443108, "grad_norm": 0.255859375, "learning_rate": 0.0009914315274898717, "loss": 2.2687, "step": 25817 }, { "epoch": 0.6928402747960498, "grad_norm": 0.236328125, "learning_rate": 0.000991410228763272, "loss": 2.0081, "step": 25818 }, { "epoch": 0.6928671103477888, "grad_norm": 0.25, "learning_rate": 0.0009913889291780335, "loss": 2.0739, "step": 25819 }, { "epoch": 0.6928939458995277, "grad_norm": 0.2578125, "learning_rate": 0.000991367628734203, "loss": 2.2137, "step": 25820 }, { "epoch": 0.6929207814512667, "grad_norm": 0.2578125, "learning_rate": 0.0009913463274318268, "loss": 2.0833, "step": 25821 }, { "epoch": 0.6929476170030056, "grad_norm": 0.2490234375, "learning_rate": 0.000991325025270952, "loss": 2.1629, "step": 25822 }, { "epoch": 0.6929744525547445, "grad_norm": 0.255859375, "learning_rate": 0.0009913037222516253, "loss": 2.2291, "step": 25823 }, { "epoch": 0.6930012881064834, "grad_norm": 0.248046875, "learning_rate": 0.000991282418373893, "loss": 2.068, "step": 25824 }, { "epoch": 0.6930281236582224, "grad_norm": 0.26171875, "learning_rate": 0.0009912611136378025, "loss": 2.2611, "step": 25825 }, { "epoch": 0.6930549592099614, "grad_norm": 0.271484375, "learning_rate": 0.0009912398080434, "loss": 2.1863, "step": 25826 }, { "epoch": 0.6930817947617003, "grad_norm": 0.26953125, "learning_rate": 0.0009912185015907328, "loss": 2.2008, "step": 25827 }, { "epoch": 0.6931086303134393, "grad_norm": 0.255859375, "learning_rate": 0.0009911971942798473, "loss": 2.1949, "step": 25828 }, { "epoch": 0.6931354658651782, "grad_norm": 0.25, "learning_rate": 0.00099117588611079, "loss": 2.3072, "step": 25829 }, { "epoch": 0.6931623014169171, "grad_norm": 0.259765625, "learning_rate": 0.0009911545770836078, "loss": 2.1932, "step": 25830 }, { "epoch": 0.693189136968656, "grad_norm": 0.267578125, "learning_rate": 0.0009911332671983478, "loss": 2.301, "step": 25831 }, { "epoch": 0.693215972520395, "grad_norm": 0.265625, "learning_rate": 0.0009911119564550562, "loss": 2.3591, "step": 25832 }, { "epoch": 0.693242808072134, "grad_norm": 0.2451171875, "learning_rate": 0.0009910906448537802, "loss": 2.1243, "step": 25833 }, { "epoch": 0.6932696436238729, "grad_norm": 0.2578125, "learning_rate": 0.0009910693323945665, "loss": 2.234, "step": 25834 }, { "epoch": 0.6932964791756119, "grad_norm": 0.255859375, "learning_rate": 0.0009910480190774618, "loss": 2.2562, "step": 25835 }, { "epoch": 0.6933233147273508, "grad_norm": 0.2578125, "learning_rate": 0.0009910267049025125, "loss": 2.2273, "step": 25836 }, { "epoch": 0.6933501502790897, "grad_norm": 0.2578125, "learning_rate": 0.000991005389869766, "loss": 2.281, "step": 25837 }, { "epoch": 0.6933769858308286, "grad_norm": 0.259765625, "learning_rate": 0.0009909840739792683, "loss": 2.2416, "step": 25838 }, { "epoch": 0.6934038213825676, "grad_norm": 0.259765625, "learning_rate": 0.0009909627572310669, "loss": 2.1654, "step": 25839 }, { "epoch": 0.6934306569343066, "grad_norm": 0.2578125, "learning_rate": 0.0009909414396252082, "loss": 2.2266, "step": 25840 }, { "epoch": 0.6934574924860455, "grad_norm": 0.2578125, "learning_rate": 0.0009909201211617388, "loss": 2.1864, "step": 25841 }, { "epoch": 0.6934843280377845, "grad_norm": 0.251953125, "learning_rate": 0.0009908988018407056, "loss": 2.1082, "step": 25842 }, { "epoch": 0.6935111635895234, "grad_norm": 0.251953125, "learning_rate": 0.000990877481662156, "loss": 2.1425, "step": 25843 }, { "epoch": 0.6935379991412624, "grad_norm": 0.2431640625, "learning_rate": 0.0009908561606261357, "loss": 2.0958, "step": 25844 }, { "epoch": 0.6935648346930013, "grad_norm": 0.255859375, "learning_rate": 0.0009908348387326921, "loss": 2.1443, "step": 25845 }, { "epoch": 0.6935916702447402, "grad_norm": 0.25, "learning_rate": 0.0009908135159818717, "loss": 2.1442, "step": 25846 }, { "epoch": 0.6936185057964792, "grad_norm": 0.251953125, "learning_rate": 0.0009907921923737214, "loss": 2.2661, "step": 25847 }, { "epoch": 0.6936453413482181, "grad_norm": 0.24609375, "learning_rate": 0.0009907708679082882, "loss": 2.1181, "step": 25848 }, { "epoch": 0.6936721768999571, "grad_norm": 0.25, "learning_rate": 0.0009907495425856188, "loss": 2.1302, "step": 25849 }, { "epoch": 0.693699012451696, "grad_norm": 0.244140625, "learning_rate": 0.0009907282164057596, "loss": 2.1376, "step": 25850 }, { "epoch": 0.693725848003435, "grad_norm": 0.25390625, "learning_rate": 0.0009907068893687577, "loss": 2.1716, "step": 25851 }, { "epoch": 0.6937526835551739, "grad_norm": 0.251953125, "learning_rate": 0.00099068556147466, "loss": 2.1393, "step": 25852 }, { "epoch": 0.6937795191069128, "grad_norm": 0.251953125, "learning_rate": 0.000990664232723513, "loss": 2.1535, "step": 25853 }, { "epoch": 0.6938063546586518, "grad_norm": 0.248046875, "learning_rate": 0.0009906429031153637, "loss": 2.1925, "step": 25854 }, { "epoch": 0.6938331902103907, "grad_norm": 0.26953125, "learning_rate": 0.0009906215726502587, "loss": 2.2155, "step": 25855 }, { "epoch": 0.6938600257621297, "grad_norm": 0.259765625, "learning_rate": 0.0009906002413282448, "loss": 2.2872, "step": 25856 }, { "epoch": 0.6938868613138686, "grad_norm": 0.259765625, "learning_rate": 0.000990578909149369, "loss": 2.2015, "step": 25857 }, { "epoch": 0.6939136968656076, "grad_norm": 0.271484375, "learning_rate": 0.000990557576113678, "loss": 2.208, "step": 25858 }, { "epoch": 0.6939405324173465, "grad_norm": 0.24609375, "learning_rate": 0.0009905362422212188, "loss": 2.0561, "step": 25859 }, { "epoch": 0.6939673679690854, "grad_norm": 0.263671875, "learning_rate": 0.0009905149074720375, "loss": 2.3093, "step": 25860 }, { "epoch": 0.6939942035208244, "grad_norm": 0.25390625, "learning_rate": 0.0009904935718661816, "loss": 2.2859, "step": 25861 }, { "epoch": 0.6940210390725633, "grad_norm": 0.255859375, "learning_rate": 0.0009904722354036979, "loss": 2.1541, "step": 25862 }, { "epoch": 0.6940478746243023, "grad_norm": 0.255859375, "learning_rate": 0.0009904508980846326, "loss": 2.1874, "step": 25863 }, { "epoch": 0.6940747101760412, "grad_norm": 0.251953125, "learning_rate": 0.0009904295599090331, "loss": 2.3364, "step": 25864 }, { "epoch": 0.6941015457277802, "grad_norm": 0.244140625, "learning_rate": 0.0009904082208769462, "loss": 2.2326, "step": 25865 }, { "epoch": 0.6941283812795191, "grad_norm": 0.259765625, "learning_rate": 0.0009903868809884183, "loss": 2.2931, "step": 25866 }, { "epoch": 0.694155216831258, "grad_norm": 0.2578125, "learning_rate": 0.0009903655402434964, "loss": 2.1596, "step": 25867 }, { "epoch": 0.694182052382997, "grad_norm": 0.251953125, "learning_rate": 0.0009903441986422274, "loss": 2.161, "step": 25868 }, { "epoch": 0.6942088879347359, "grad_norm": 0.263671875, "learning_rate": 0.0009903228561846582, "loss": 2.2881, "step": 25869 }, { "epoch": 0.6942357234864749, "grad_norm": 0.25390625, "learning_rate": 0.0009903015128708355, "loss": 2.1883, "step": 25870 }, { "epoch": 0.6942625590382139, "grad_norm": 0.2578125, "learning_rate": 0.0009902801687008058, "loss": 2.2347, "step": 25871 }, { "epoch": 0.6942893945899528, "grad_norm": 0.255859375, "learning_rate": 0.0009902588236746165, "loss": 2.2068, "step": 25872 }, { "epoch": 0.6943162301416917, "grad_norm": 0.2578125, "learning_rate": 0.000990237477792314, "loss": 2.2636, "step": 25873 }, { "epoch": 0.6943430656934306, "grad_norm": 0.25, "learning_rate": 0.0009902161310539454, "loss": 2.144, "step": 25874 }, { "epoch": 0.6943699012451696, "grad_norm": 0.251953125, "learning_rate": 0.0009901947834595574, "loss": 2.0649, "step": 25875 }, { "epoch": 0.6943967367969085, "grad_norm": 0.25390625, "learning_rate": 0.000990173435009197, "loss": 2.1477, "step": 25876 }, { "epoch": 0.6944235723486475, "grad_norm": 0.26171875, "learning_rate": 0.0009901520857029105, "loss": 2.0973, "step": 25877 }, { "epoch": 0.6944504079003865, "grad_norm": 0.259765625, "learning_rate": 0.0009901307355407454, "loss": 2.2039, "step": 25878 }, { "epoch": 0.6944772434521254, "grad_norm": 0.267578125, "learning_rate": 0.0009901093845227481, "loss": 2.1665, "step": 25879 }, { "epoch": 0.6945040790038643, "grad_norm": 0.26171875, "learning_rate": 0.0009900880326489656, "loss": 2.1966, "step": 25880 }, { "epoch": 0.6945309145556032, "grad_norm": 0.25390625, "learning_rate": 0.000990066679919445, "loss": 2.1382, "step": 25881 }, { "epoch": 0.6945577501073422, "grad_norm": 0.259765625, "learning_rate": 0.0009900453263342324, "loss": 2.229, "step": 25882 }, { "epoch": 0.6945845856590811, "grad_norm": 0.26171875, "learning_rate": 0.0009900239718933755, "loss": 2.2256, "step": 25883 }, { "epoch": 0.6946114212108201, "grad_norm": 0.275390625, "learning_rate": 0.0009900026165969205, "loss": 2.2427, "step": 25884 }, { "epoch": 0.6946382567625591, "grad_norm": 0.26953125, "learning_rate": 0.0009899812604449146, "loss": 2.333, "step": 25885 }, { "epoch": 0.694665092314298, "grad_norm": 0.265625, "learning_rate": 0.0009899599034374044, "loss": 2.3219, "step": 25886 }, { "epoch": 0.694691927866037, "grad_norm": 0.251953125, "learning_rate": 0.000989938545574437, "loss": 2.1979, "step": 25887 }, { "epoch": 0.6947187634177758, "grad_norm": 0.267578125, "learning_rate": 0.0009899171868560594, "loss": 2.2111, "step": 25888 }, { "epoch": 0.6947455989695148, "grad_norm": 0.259765625, "learning_rate": 0.000989895827282318, "loss": 2.1798, "step": 25889 }, { "epoch": 0.6947724345212538, "grad_norm": 0.259765625, "learning_rate": 0.0009898744668532598, "loss": 2.2589, "step": 25890 }, { "epoch": 0.6947992700729927, "grad_norm": 0.2578125, "learning_rate": 0.0009898531055689316, "loss": 2.2973, "step": 25891 }, { "epoch": 0.6948261056247317, "grad_norm": 0.25, "learning_rate": 0.0009898317434293806, "loss": 2.1868, "step": 25892 }, { "epoch": 0.6948529411764706, "grad_norm": 0.25390625, "learning_rate": 0.0009898103804346537, "loss": 2.1383, "step": 25893 }, { "epoch": 0.6948797767282096, "grad_norm": 0.2392578125, "learning_rate": 0.000989789016584797, "loss": 2.1111, "step": 25894 }, { "epoch": 0.6949066122799484, "grad_norm": 0.251953125, "learning_rate": 0.0009897676518798582, "loss": 2.1895, "step": 25895 }, { "epoch": 0.6949334478316874, "grad_norm": 0.248046875, "learning_rate": 0.0009897462863198835, "loss": 2.0831, "step": 25896 }, { "epoch": 0.6949602833834264, "grad_norm": 0.255859375, "learning_rate": 0.0009897249199049205, "loss": 2.1015, "step": 25897 }, { "epoch": 0.6949871189351653, "grad_norm": 0.2490234375, "learning_rate": 0.0009897035526350154, "loss": 2.1584, "step": 25898 }, { "epoch": 0.6950139544869043, "grad_norm": 0.251953125, "learning_rate": 0.0009896821845102155, "loss": 2.1583, "step": 25899 }, { "epoch": 0.6950407900386432, "grad_norm": 0.2578125, "learning_rate": 0.0009896608155305676, "loss": 2.1869, "step": 25900 }, { "epoch": 0.6950676255903822, "grad_norm": 0.2734375, "learning_rate": 0.0009896394456961185, "loss": 2.2078, "step": 25901 }, { "epoch": 0.695094461142121, "grad_norm": 0.25390625, "learning_rate": 0.0009896180750069148, "loss": 2.1103, "step": 25902 }, { "epoch": 0.69512129669386, "grad_norm": 0.2412109375, "learning_rate": 0.000989596703463004, "loss": 2.1372, "step": 25903 }, { "epoch": 0.695148132245599, "grad_norm": 0.25, "learning_rate": 0.0009895753310644325, "loss": 2.2244, "step": 25904 }, { "epoch": 0.6951749677973379, "grad_norm": 0.2578125, "learning_rate": 0.0009895539578112474, "loss": 2.2083, "step": 25905 }, { "epoch": 0.6952018033490769, "grad_norm": 0.255859375, "learning_rate": 0.0009895325837034955, "loss": 2.2438, "step": 25906 }, { "epoch": 0.6952286389008158, "grad_norm": 0.251953125, "learning_rate": 0.0009895112087412237, "loss": 2.1787, "step": 25907 }, { "epoch": 0.6952554744525548, "grad_norm": 0.251953125, "learning_rate": 0.000989489832924479, "loss": 2.1093, "step": 25908 }, { "epoch": 0.6952823100042936, "grad_norm": 0.244140625, "learning_rate": 0.0009894684562533082, "loss": 2.0698, "step": 25909 }, { "epoch": 0.6953091455560326, "grad_norm": 0.25390625, "learning_rate": 0.000989447078727758, "loss": 2.2082, "step": 25910 }, { "epoch": 0.6953359811077716, "grad_norm": 0.25390625, "learning_rate": 0.0009894257003478756, "loss": 2.1934, "step": 25911 }, { "epoch": 0.6953628166595105, "grad_norm": 0.2734375, "learning_rate": 0.0009894043211137078, "loss": 2.2564, "step": 25912 }, { "epoch": 0.6953896522112495, "grad_norm": 0.271484375, "learning_rate": 0.0009893829410253015, "loss": 2.2377, "step": 25913 }, { "epoch": 0.6954164877629884, "grad_norm": 0.267578125, "learning_rate": 0.0009893615600827035, "loss": 2.2589, "step": 25914 }, { "epoch": 0.6954433233147274, "grad_norm": 0.25390625, "learning_rate": 0.000989340178285961, "loss": 2.3029, "step": 25915 }, { "epoch": 0.6954701588664663, "grad_norm": 0.26171875, "learning_rate": 0.0009893187956351204, "loss": 2.321, "step": 25916 }, { "epoch": 0.6954969944182052, "grad_norm": 0.265625, "learning_rate": 0.0009892974121302291, "loss": 2.2204, "step": 25917 }, { "epoch": 0.6955238299699442, "grad_norm": 0.265625, "learning_rate": 0.0009892760277713337, "loss": 2.2879, "step": 25918 }, { "epoch": 0.6955506655216831, "grad_norm": 0.25, "learning_rate": 0.0009892546425584812, "loss": 2.216, "step": 25919 }, { "epoch": 0.6955775010734221, "grad_norm": 0.26171875, "learning_rate": 0.0009892332564917187, "loss": 2.1287, "step": 25920 }, { "epoch": 0.695604336625161, "grad_norm": 0.263671875, "learning_rate": 0.000989211869571093, "loss": 2.2004, "step": 25921 }, { "epoch": 0.6956311721769, "grad_norm": 0.251953125, "learning_rate": 0.0009891904817966508, "loss": 2.2067, "step": 25922 }, { "epoch": 0.695658007728639, "grad_norm": 0.26171875, "learning_rate": 0.0009891690931684391, "loss": 2.1694, "step": 25923 }, { "epoch": 0.6956848432803778, "grad_norm": 0.2578125, "learning_rate": 0.000989147703686505, "loss": 2.2523, "step": 25924 }, { "epoch": 0.6957116788321168, "grad_norm": 0.263671875, "learning_rate": 0.0009891263133508953, "loss": 2.2091, "step": 25925 }, { "epoch": 0.6957385143838557, "grad_norm": 0.267578125, "learning_rate": 0.000989104922161657, "loss": 2.222, "step": 25926 }, { "epoch": 0.6957653499355947, "grad_norm": 0.251953125, "learning_rate": 0.000989083530118837, "loss": 2.2349, "step": 25927 }, { "epoch": 0.6957921854873336, "grad_norm": 0.25390625, "learning_rate": 0.000989062137222482, "loss": 2.1598, "step": 25928 }, { "epoch": 0.6958190210390726, "grad_norm": 0.26953125, "learning_rate": 0.0009890407434726394, "loss": 2.143, "step": 25929 }, { "epoch": 0.6958458565908116, "grad_norm": 0.26171875, "learning_rate": 0.0009890193488693558, "loss": 2.2446, "step": 25930 }, { "epoch": 0.6958726921425504, "grad_norm": 0.248046875, "learning_rate": 0.0009889979534126782, "loss": 2.1239, "step": 25931 }, { "epoch": 0.6958995276942894, "grad_norm": 0.25390625, "learning_rate": 0.0009889765571026536, "loss": 2.1542, "step": 25932 }, { "epoch": 0.6959263632460283, "grad_norm": 0.248046875, "learning_rate": 0.0009889551599393286, "loss": 2.1247, "step": 25933 }, { "epoch": 0.6959531987977673, "grad_norm": 0.255859375, "learning_rate": 0.0009889337619227506, "loss": 2.1988, "step": 25934 }, { "epoch": 0.6959800343495062, "grad_norm": 0.255859375, "learning_rate": 0.0009889123630529664, "loss": 2.1554, "step": 25935 }, { "epoch": 0.6960068699012452, "grad_norm": 0.25390625, "learning_rate": 0.0009888909633300228, "loss": 2.1432, "step": 25936 }, { "epoch": 0.6960337054529842, "grad_norm": 0.255859375, "learning_rate": 0.000988869562753967, "loss": 2.1537, "step": 25937 }, { "epoch": 0.696060541004723, "grad_norm": 0.26171875, "learning_rate": 0.0009888481613248457, "loss": 2.2737, "step": 25938 }, { "epoch": 0.696087376556462, "grad_norm": 0.2490234375, "learning_rate": 0.0009888267590427058, "loss": 2.0738, "step": 25939 }, { "epoch": 0.6961142121082009, "grad_norm": 0.248046875, "learning_rate": 0.0009888053559075946, "loss": 2.1482, "step": 25940 }, { "epoch": 0.6961410476599399, "grad_norm": 0.265625, "learning_rate": 0.0009887839519195586, "loss": 2.23, "step": 25941 }, { "epoch": 0.6961678832116789, "grad_norm": 0.251953125, "learning_rate": 0.000988762547078645, "loss": 2.1491, "step": 25942 }, { "epoch": 0.6961947187634178, "grad_norm": 0.25, "learning_rate": 0.000988741141384901, "loss": 2.1339, "step": 25943 }, { "epoch": 0.6962215543151568, "grad_norm": 0.2578125, "learning_rate": 0.0009887197348383732, "loss": 2.279, "step": 25944 }, { "epoch": 0.6962483898668956, "grad_norm": 0.255859375, "learning_rate": 0.0009886983274391085, "loss": 2.2353, "step": 25945 }, { "epoch": 0.6962752254186346, "grad_norm": 0.25390625, "learning_rate": 0.0009886769191871543, "loss": 2.2208, "step": 25946 }, { "epoch": 0.6963020609703735, "grad_norm": 0.255859375, "learning_rate": 0.0009886555100825569, "loss": 2.2077, "step": 25947 }, { "epoch": 0.6963288965221125, "grad_norm": 0.26171875, "learning_rate": 0.0009886341001253641, "loss": 2.2695, "step": 25948 }, { "epoch": 0.6963557320738515, "grad_norm": 0.255859375, "learning_rate": 0.0009886126893156222, "loss": 2.1868, "step": 25949 }, { "epoch": 0.6963825676255904, "grad_norm": 0.248046875, "learning_rate": 0.0009885912776533784, "loss": 2.2478, "step": 25950 }, { "epoch": 0.6964094031773294, "grad_norm": 0.255859375, "learning_rate": 0.0009885698651386798, "loss": 2.2617, "step": 25951 }, { "epoch": 0.6964362387290682, "grad_norm": 0.251953125, "learning_rate": 0.000988548451771573, "loss": 2.1095, "step": 25952 }, { "epoch": 0.6964630742808072, "grad_norm": 0.24609375, "learning_rate": 0.0009885270375521054, "loss": 2.1016, "step": 25953 }, { "epoch": 0.6964899098325461, "grad_norm": 0.25390625, "learning_rate": 0.0009885056224803236, "loss": 2.1633, "step": 25954 }, { "epoch": 0.6965167453842851, "grad_norm": 0.259765625, "learning_rate": 0.0009884842065562749, "loss": 2.2774, "step": 25955 }, { "epoch": 0.6965435809360241, "grad_norm": 0.2578125, "learning_rate": 0.000988462789780006, "loss": 2.1449, "step": 25956 }, { "epoch": 0.696570416487763, "grad_norm": 0.2470703125, "learning_rate": 0.0009884413721515642, "loss": 2.2077, "step": 25957 }, { "epoch": 0.696597252039502, "grad_norm": 0.248046875, "learning_rate": 0.0009884199536709963, "loss": 2.2614, "step": 25958 }, { "epoch": 0.6966240875912408, "grad_norm": 0.25390625, "learning_rate": 0.0009883985343383493, "loss": 2.2109, "step": 25959 }, { "epoch": 0.6966509231429798, "grad_norm": 0.251953125, "learning_rate": 0.0009883771141536702, "loss": 2.1586, "step": 25960 }, { "epoch": 0.6966777586947188, "grad_norm": 0.2578125, "learning_rate": 0.0009883556931170059, "loss": 2.1912, "step": 25961 }, { "epoch": 0.6967045942464577, "grad_norm": 0.255859375, "learning_rate": 0.0009883342712284032, "loss": 2.1865, "step": 25962 }, { "epoch": 0.6967314297981967, "grad_norm": 0.25390625, "learning_rate": 0.0009883128484879098, "loss": 2.199, "step": 25963 }, { "epoch": 0.6967582653499356, "grad_norm": 0.244140625, "learning_rate": 0.0009882914248955722, "loss": 2.0851, "step": 25964 }, { "epoch": 0.6967851009016746, "grad_norm": 0.2451171875, "learning_rate": 0.0009882700004514374, "loss": 2.1369, "step": 25965 }, { "epoch": 0.6968119364534134, "grad_norm": 0.25390625, "learning_rate": 0.0009882485751555523, "loss": 2.13, "step": 25966 }, { "epoch": 0.6968387720051524, "grad_norm": 0.255859375, "learning_rate": 0.000988227149007964, "loss": 2.194, "step": 25967 }, { "epoch": 0.6968656075568914, "grad_norm": 0.255859375, "learning_rate": 0.0009882057220087198, "loss": 2.1965, "step": 25968 }, { "epoch": 0.6968924431086303, "grad_norm": 0.255859375, "learning_rate": 0.0009881842941578663, "loss": 2.2143, "step": 25969 }, { "epoch": 0.6969192786603693, "grad_norm": 0.26953125, "learning_rate": 0.0009881628654554507, "loss": 2.2463, "step": 25970 }, { "epoch": 0.6969461142121082, "grad_norm": 0.2578125, "learning_rate": 0.00098814143590152, "loss": 2.2612, "step": 25971 }, { "epoch": 0.6969729497638472, "grad_norm": 0.25390625, "learning_rate": 0.000988120005496121, "loss": 2.3175, "step": 25972 }, { "epoch": 0.696999785315586, "grad_norm": 0.24609375, "learning_rate": 0.000988098574239301, "loss": 2.238, "step": 25973 }, { "epoch": 0.697026620867325, "grad_norm": 0.259765625, "learning_rate": 0.0009880771421311072, "loss": 2.1764, "step": 25974 }, { "epoch": 0.697053456419064, "grad_norm": 0.25390625, "learning_rate": 0.000988055709171586, "loss": 2.2744, "step": 25975 }, { "epoch": 0.6970802919708029, "grad_norm": 0.255859375, "learning_rate": 0.0009880342753607848, "loss": 2.1363, "step": 25976 }, { "epoch": 0.6971071275225419, "grad_norm": 0.26171875, "learning_rate": 0.0009880128406987508, "loss": 2.2469, "step": 25977 }, { "epoch": 0.6971339630742808, "grad_norm": 0.259765625, "learning_rate": 0.0009879914051855304, "loss": 2.2363, "step": 25978 }, { "epoch": 0.6971607986260198, "grad_norm": 0.2490234375, "learning_rate": 0.0009879699688211713, "loss": 2.2718, "step": 25979 }, { "epoch": 0.6971876341777586, "grad_norm": 0.26171875, "learning_rate": 0.0009879485316057202, "loss": 2.1835, "step": 25980 }, { "epoch": 0.6972144697294976, "grad_norm": 0.2490234375, "learning_rate": 0.0009879270935392239, "loss": 2.3148, "step": 25981 }, { "epoch": 0.6972413052812366, "grad_norm": 0.259765625, "learning_rate": 0.00098790565462173, "loss": 2.2379, "step": 25982 }, { "epoch": 0.6972681408329755, "grad_norm": 0.263671875, "learning_rate": 0.000987884214853285, "loss": 2.1914, "step": 25983 }, { "epoch": 0.6972949763847145, "grad_norm": 0.25, "learning_rate": 0.0009878627742339362, "loss": 2.1714, "step": 25984 }, { "epoch": 0.6973218119364534, "grad_norm": 0.25, "learning_rate": 0.0009878413327637306, "loss": 2.0723, "step": 25985 }, { "epoch": 0.6973486474881924, "grad_norm": 0.26171875, "learning_rate": 0.0009878198904427152, "loss": 2.1615, "step": 25986 }, { "epoch": 0.6973754830399314, "grad_norm": 0.24609375, "learning_rate": 0.000987798447270937, "loss": 2.2455, "step": 25987 }, { "epoch": 0.6974023185916702, "grad_norm": 0.251953125, "learning_rate": 0.0009877770032484434, "loss": 2.1419, "step": 25988 }, { "epoch": 0.6974291541434092, "grad_norm": 0.263671875, "learning_rate": 0.0009877555583752808, "loss": 2.1763, "step": 25989 }, { "epoch": 0.6974559896951481, "grad_norm": 0.259765625, "learning_rate": 0.000987734112651497, "loss": 2.1287, "step": 25990 }, { "epoch": 0.6974828252468871, "grad_norm": 0.263671875, "learning_rate": 0.000987712666077138, "loss": 2.1816, "step": 25991 }, { "epoch": 0.697509660798626, "grad_norm": 0.255859375, "learning_rate": 0.0009876912186522519, "loss": 2.21, "step": 25992 }, { "epoch": 0.697536496350365, "grad_norm": 0.265625, "learning_rate": 0.0009876697703768852, "loss": 2.3043, "step": 25993 }, { "epoch": 0.697563331902104, "grad_norm": 0.23828125, "learning_rate": 0.0009876483212510855, "loss": 2.1479, "step": 25994 }, { "epoch": 0.6975901674538428, "grad_norm": 0.248046875, "learning_rate": 0.0009876268712748987, "loss": 2.2463, "step": 25995 }, { "epoch": 0.6976170030055818, "grad_norm": 0.2578125, "learning_rate": 0.0009876054204483729, "loss": 2.1768, "step": 25996 }, { "epoch": 0.6976438385573207, "grad_norm": 0.265625, "learning_rate": 0.000987583968771555, "loss": 2.1857, "step": 25997 }, { "epoch": 0.6976706741090597, "grad_norm": 0.263671875, "learning_rate": 0.0009875625162444918, "loss": 2.2191, "step": 25998 }, { "epoch": 0.6976975096607986, "grad_norm": 0.2578125, "learning_rate": 0.0009875410628672305, "loss": 2.1602, "step": 25999 }, { "epoch": 0.6977243452125376, "grad_norm": 0.25, "learning_rate": 0.000987519608639818, "loss": 2.1679, "step": 26000 }, { "epoch": 0.6977511807642766, "grad_norm": 0.265625, "learning_rate": 0.0009874981535623016, "loss": 2.2396, "step": 26001 }, { "epoch": 0.6977780163160154, "grad_norm": 0.25390625, "learning_rate": 0.0009874766976347281, "loss": 2.1907, "step": 26002 }, { "epoch": 0.6978048518677544, "grad_norm": 0.251953125, "learning_rate": 0.000987455240857145, "loss": 2.181, "step": 26003 }, { "epoch": 0.6978316874194933, "grad_norm": 0.2490234375, "learning_rate": 0.0009874337832295988, "loss": 2.1682, "step": 26004 }, { "epoch": 0.6978585229712323, "grad_norm": 0.25390625, "learning_rate": 0.000987412324752137, "loss": 2.2985, "step": 26005 }, { "epoch": 0.6978853585229712, "grad_norm": 0.2490234375, "learning_rate": 0.0009873908654248066, "loss": 2.1282, "step": 26006 }, { "epoch": 0.6979121940747102, "grad_norm": 0.255859375, "learning_rate": 0.0009873694052476545, "loss": 2.2461, "step": 26007 }, { "epoch": 0.6979390296264492, "grad_norm": 0.25, "learning_rate": 0.000987347944220728, "loss": 2.1459, "step": 26008 }, { "epoch": 0.697965865178188, "grad_norm": 0.26171875, "learning_rate": 0.000987326482344074, "loss": 2.1415, "step": 26009 }, { "epoch": 0.697992700729927, "grad_norm": 0.2490234375, "learning_rate": 0.0009873050196177397, "loss": 2.3201, "step": 26010 }, { "epoch": 0.6980195362816659, "grad_norm": 0.2451171875, "learning_rate": 0.000987283556041772, "loss": 2.102, "step": 26011 }, { "epoch": 0.6980463718334049, "grad_norm": 0.2431640625, "learning_rate": 0.0009872620916162183, "loss": 2.1302, "step": 26012 }, { "epoch": 0.6980732073851439, "grad_norm": 0.2578125, "learning_rate": 0.0009872406263411254, "loss": 2.204, "step": 26013 }, { "epoch": 0.6981000429368828, "grad_norm": 0.2490234375, "learning_rate": 0.0009872191602165405, "loss": 2.1931, "step": 26014 }, { "epoch": 0.6981268784886218, "grad_norm": 0.2490234375, "learning_rate": 0.0009871976932425107, "loss": 2.0506, "step": 26015 }, { "epoch": 0.6981537140403606, "grad_norm": 0.248046875, "learning_rate": 0.0009871762254190831, "loss": 2.1594, "step": 26016 }, { "epoch": 0.6981805495920996, "grad_norm": 0.2431640625, "learning_rate": 0.0009871547567463048, "loss": 2.0284, "step": 26017 }, { "epoch": 0.6982073851438385, "grad_norm": 0.2470703125, "learning_rate": 0.0009871332872242228, "loss": 2.2198, "step": 26018 }, { "epoch": 0.6982342206955775, "grad_norm": 0.259765625, "learning_rate": 0.0009871118168528843, "loss": 2.1964, "step": 26019 }, { "epoch": 0.6982610562473165, "grad_norm": 0.2578125, "learning_rate": 0.0009870903456323363, "loss": 2.3272, "step": 26020 }, { "epoch": 0.6982878917990554, "grad_norm": 0.255859375, "learning_rate": 0.000987068873562626, "loss": 2.2468, "step": 26021 }, { "epoch": 0.6983147273507944, "grad_norm": 0.248046875, "learning_rate": 0.0009870474006438003, "loss": 2.2194, "step": 26022 }, { "epoch": 0.6983415629025332, "grad_norm": 0.25, "learning_rate": 0.0009870259268759066, "loss": 2.1316, "step": 26023 }, { "epoch": 0.6983683984542722, "grad_norm": 0.2578125, "learning_rate": 0.000987004452258992, "loss": 2.1793, "step": 26024 }, { "epoch": 0.6983952340060111, "grad_norm": 0.259765625, "learning_rate": 0.0009869829767931034, "loss": 2.1598, "step": 26025 }, { "epoch": 0.6984220695577501, "grad_norm": 0.2578125, "learning_rate": 0.0009869615004782881, "loss": 2.1903, "step": 26026 }, { "epoch": 0.6984489051094891, "grad_norm": 0.279296875, "learning_rate": 0.000986940023314593, "loss": 2.3429, "step": 26027 }, { "epoch": 0.698475740661228, "grad_norm": 0.259765625, "learning_rate": 0.0009869185453020653, "loss": 2.2763, "step": 26028 }, { "epoch": 0.698502576212967, "grad_norm": 0.263671875, "learning_rate": 0.0009868970664407522, "loss": 2.2058, "step": 26029 }, { "epoch": 0.6985294117647058, "grad_norm": 0.255859375, "learning_rate": 0.0009868755867307008, "loss": 2.3051, "step": 26030 }, { "epoch": 0.6985562473164448, "grad_norm": 0.2578125, "learning_rate": 0.0009868541061719583, "loss": 2.1189, "step": 26031 }, { "epoch": 0.6985830828681838, "grad_norm": 0.2578125, "learning_rate": 0.0009868326247645714, "loss": 2.2225, "step": 26032 }, { "epoch": 0.6986099184199227, "grad_norm": 0.24609375, "learning_rate": 0.0009868111425085878, "loss": 2.1569, "step": 26033 }, { "epoch": 0.6986367539716617, "grad_norm": 0.265625, "learning_rate": 0.0009867896594040542, "loss": 2.1911, "step": 26034 }, { "epoch": 0.6986635895234006, "grad_norm": 0.26171875, "learning_rate": 0.000986768175451018, "loss": 2.214, "step": 26035 }, { "epoch": 0.6986904250751396, "grad_norm": 0.265625, "learning_rate": 0.000986746690649526, "loss": 2.2003, "step": 26036 }, { "epoch": 0.6987172606268784, "grad_norm": 0.248046875, "learning_rate": 0.0009867252049996257, "loss": 2.1938, "step": 26037 }, { "epoch": 0.6987440961786174, "grad_norm": 0.2578125, "learning_rate": 0.0009867037185013639, "loss": 2.2507, "step": 26038 }, { "epoch": 0.6987709317303564, "grad_norm": 0.2578125, "learning_rate": 0.0009866822311547882, "loss": 2.1785, "step": 26039 }, { "epoch": 0.6987977672820953, "grad_norm": 0.25, "learning_rate": 0.0009866607429599454, "loss": 2.0874, "step": 26040 }, { "epoch": 0.6988246028338343, "grad_norm": 0.25, "learning_rate": 0.0009866392539168826, "loss": 2.1176, "step": 26041 }, { "epoch": 0.6988514383855732, "grad_norm": 0.251953125, "learning_rate": 0.000986617764025647, "loss": 2.1629, "step": 26042 }, { "epoch": 0.6988782739373122, "grad_norm": 0.255859375, "learning_rate": 0.0009865962732862857, "loss": 2.1829, "step": 26043 }, { "epoch": 0.698905109489051, "grad_norm": 0.2578125, "learning_rate": 0.000986574781698846, "loss": 2.1268, "step": 26044 }, { "epoch": 0.69893194504079, "grad_norm": 0.255859375, "learning_rate": 0.000986553289263375, "loss": 2.1139, "step": 26045 }, { "epoch": 0.698958780592529, "grad_norm": 0.2578125, "learning_rate": 0.0009865317959799197, "loss": 2.2012, "step": 26046 }, { "epoch": 0.6989856161442679, "grad_norm": 0.25, "learning_rate": 0.0009865103018485276, "loss": 2.1253, "step": 26047 }, { "epoch": 0.6990124516960069, "grad_norm": 0.248046875, "learning_rate": 0.0009864888068692455, "loss": 2.1557, "step": 26048 }, { "epoch": 0.6990392872477458, "grad_norm": 0.248046875, "learning_rate": 0.0009864673110421206, "loss": 2.1809, "step": 26049 }, { "epoch": 0.6990661227994848, "grad_norm": 0.248046875, "learning_rate": 0.0009864458143672, "loss": 2.1191, "step": 26050 }, { "epoch": 0.6990929583512236, "grad_norm": 0.26171875, "learning_rate": 0.0009864243168445311, "loss": 2.2051, "step": 26051 }, { "epoch": 0.6991197939029626, "grad_norm": 0.2578125, "learning_rate": 0.0009864028184741612, "loss": 2.1775, "step": 26052 }, { "epoch": 0.6991466294547016, "grad_norm": 0.25, "learning_rate": 0.0009863813192561367, "loss": 2.0448, "step": 26053 }, { "epoch": 0.6991734650064405, "grad_norm": 0.2451171875, "learning_rate": 0.0009863598191905056, "loss": 2.1231, "step": 26054 }, { "epoch": 0.6992003005581795, "grad_norm": 0.259765625, "learning_rate": 0.0009863383182773147, "loss": 2.2132, "step": 26055 }, { "epoch": 0.6992271361099184, "grad_norm": 0.26953125, "learning_rate": 0.0009863168165166112, "loss": 2.1623, "step": 26056 }, { "epoch": 0.6992539716616574, "grad_norm": 0.26171875, "learning_rate": 0.000986295313908442, "loss": 2.182, "step": 26057 }, { "epoch": 0.6992808072133964, "grad_norm": 0.25390625, "learning_rate": 0.0009862738104528548, "loss": 2.2455, "step": 26058 }, { "epoch": 0.6993076427651352, "grad_norm": 0.23828125, "learning_rate": 0.0009862523061498964, "loss": 2.0954, "step": 26059 }, { "epoch": 0.6993344783168742, "grad_norm": 0.26171875, "learning_rate": 0.0009862308009996141, "loss": 2.3551, "step": 26060 }, { "epoch": 0.6993613138686131, "grad_norm": 0.2470703125, "learning_rate": 0.000986209295002055, "loss": 2.136, "step": 26061 }, { "epoch": 0.6993881494203521, "grad_norm": 0.2470703125, "learning_rate": 0.0009861877881572664, "loss": 2.1341, "step": 26062 }, { "epoch": 0.699414984972091, "grad_norm": 0.2470703125, "learning_rate": 0.0009861662804652954, "loss": 2.1969, "step": 26063 }, { "epoch": 0.69944182052383, "grad_norm": 0.248046875, "learning_rate": 0.0009861447719261892, "loss": 2.1708, "step": 26064 }, { "epoch": 0.699468656075569, "grad_norm": 0.2578125, "learning_rate": 0.000986123262539995, "loss": 2.1418, "step": 26065 }, { "epoch": 0.6994954916273078, "grad_norm": 0.259765625, "learning_rate": 0.00098610175230676, "loss": 2.2302, "step": 26066 }, { "epoch": 0.6995223271790468, "grad_norm": 0.265625, "learning_rate": 0.000986080241226531, "loss": 2.2622, "step": 26067 }, { "epoch": 0.6995491627307857, "grad_norm": 0.2578125, "learning_rate": 0.000986058729299356, "loss": 2.0834, "step": 26068 }, { "epoch": 0.6995759982825247, "grad_norm": 0.2490234375, "learning_rate": 0.0009860372165252816, "loss": 2.2595, "step": 26069 }, { "epoch": 0.6996028338342636, "grad_norm": 0.251953125, "learning_rate": 0.000986015702904355, "loss": 2.2046, "step": 26070 }, { "epoch": 0.6996296693860026, "grad_norm": 0.248046875, "learning_rate": 0.0009859941884366235, "loss": 2.1672, "step": 26071 }, { "epoch": 0.6996565049377416, "grad_norm": 0.2421875, "learning_rate": 0.0009859726731221343, "loss": 2.0993, "step": 26072 }, { "epoch": 0.6996833404894804, "grad_norm": 0.25, "learning_rate": 0.0009859511569609348, "loss": 2.1437, "step": 26073 }, { "epoch": 0.6997101760412194, "grad_norm": 0.24609375, "learning_rate": 0.000985929639953072, "loss": 2.1802, "step": 26074 }, { "epoch": 0.6997370115929583, "grad_norm": 0.2451171875, "learning_rate": 0.0009859081220985931, "loss": 2.0617, "step": 26075 }, { "epoch": 0.6997638471446973, "grad_norm": 0.251953125, "learning_rate": 0.000985886603397545, "loss": 2.0734, "step": 26076 }, { "epoch": 0.6997906826964362, "grad_norm": 0.2578125, "learning_rate": 0.0009858650838499756, "loss": 2.1391, "step": 26077 }, { "epoch": 0.6998175182481752, "grad_norm": 0.2490234375, "learning_rate": 0.0009858435634559316, "loss": 2.1943, "step": 26078 }, { "epoch": 0.6998443537999142, "grad_norm": 0.25390625, "learning_rate": 0.0009858220422154602, "loss": 2.1019, "step": 26079 }, { "epoch": 0.699871189351653, "grad_norm": 0.25, "learning_rate": 0.000985800520128609, "loss": 2.2079, "step": 26080 }, { "epoch": 0.699898024903392, "grad_norm": 0.2412109375, "learning_rate": 0.0009857789971954248, "loss": 2.1055, "step": 26081 }, { "epoch": 0.6999248604551309, "grad_norm": 0.2470703125, "learning_rate": 0.000985757473415955, "loss": 2.1486, "step": 26082 }, { "epoch": 0.6999516960068699, "grad_norm": 0.251953125, "learning_rate": 0.0009857359487902468, "loss": 2.1613, "step": 26083 }, { "epoch": 0.6999785315586089, "grad_norm": 0.26171875, "learning_rate": 0.0009857144233183473, "loss": 2.2932, "step": 26084 }, { "epoch": 0.7000053671103478, "grad_norm": 0.265625, "learning_rate": 0.0009856928970003041, "loss": 2.2632, "step": 26085 }, { "epoch": 0.7000322026620868, "grad_norm": 0.2490234375, "learning_rate": 0.000985671369836164, "loss": 2.1374, "step": 26086 }, { "epoch": 0.7000590382138256, "grad_norm": 0.26953125, "learning_rate": 0.0009856498418259745, "loss": 2.3079, "step": 26087 }, { "epoch": 0.7000858737655646, "grad_norm": 0.265625, "learning_rate": 0.0009856283129697826, "loss": 2.1845, "step": 26088 }, { "epoch": 0.7001127093173035, "grad_norm": 0.259765625, "learning_rate": 0.0009856067832676356, "loss": 2.3277, "step": 26089 }, { "epoch": 0.7001395448690425, "grad_norm": 0.255859375, "learning_rate": 0.0009855852527195807, "loss": 2.0845, "step": 26090 }, { "epoch": 0.7001663804207815, "grad_norm": 0.259765625, "learning_rate": 0.0009855637213256653, "loss": 2.2032, "step": 26091 }, { "epoch": 0.7001932159725204, "grad_norm": 0.2490234375, "learning_rate": 0.0009855421890859366, "loss": 2.1433, "step": 26092 }, { "epoch": 0.7002200515242594, "grad_norm": 0.251953125, "learning_rate": 0.0009855206560004416, "loss": 2.2081, "step": 26093 }, { "epoch": 0.7002468870759982, "grad_norm": 0.244140625, "learning_rate": 0.000985499122069228, "loss": 2.0877, "step": 26094 }, { "epoch": 0.7002737226277372, "grad_norm": 0.2490234375, "learning_rate": 0.0009854775872923424, "loss": 2.2752, "step": 26095 }, { "epoch": 0.7003005581794761, "grad_norm": 0.251953125, "learning_rate": 0.0009854560516698325, "loss": 2.0743, "step": 26096 }, { "epoch": 0.7003273937312151, "grad_norm": 0.26953125, "learning_rate": 0.0009854345152017455, "loss": 2.18, "step": 26097 }, { "epoch": 0.7003542292829541, "grad_norm": 0.2431640625, "learning_rate": 0.0009854129778881285, "loss": 2.1823, "step": 26098 }, { "epoch": 0.700381064834693, "grad_norm": 0.2431640625, "learning_rate": 0.0009853914397290288, "loss": 2.2183, "step": 26099 }, { "epoch": 0.700407900386432, "grad_norm": 0.255859375, "learning_rate": 0.0009853699007244937, "loss": 2.2503, "step": 26100 }, { "epoch": 0.7004347359381708, "grad_norm": 0.248046875, "learning_rate": 0.0009853483608745703, "loss": 2.2446, "step": 26101 }, { "epoch": 0.7004615714899098, "grad_norm": 0.2578125, "learning_rate": 0.0009853268201793063, "loss": 2.2004, "step": 26102 }, { "epoch": 0.7004884070416488, "grad_norm": 0.2490234375, "learning_rate": 0.0009853052786387483, "loss": 2.1984, "step": 26103 }, { "epoch": 0.7005152425933877, "grad_norm": 0.248046875, "learning_rate": 0.000985283736252944, "loss": 2.186, "step": 26104 }, { "epoch": 0.7005420781451267, "grad_norm": 0.244140625, "learning_rate": 0.0009852621930219406, "loss": 2.2271, "step": 26105 }, { "epoch": 0.7005689136968656, "grad_norm": 0.234375, "learning_rate": 0.0009852406489457852, "loss": 2.108, "step": 26106 }, { "epoch": 0.7005957492486046, "grad_norm": 0.251953125, "learning_rate": 0.0009852191040245252, "loss": 2.1345, "step": 26107 }, { "epoch": 0.7006225848003435, "grad_norm": 0.26171875, "learning_rate": 0.0009851975582582076, "loss": 2.0628, "step": 26108 }, { "epoch": 0.7006494203520824, "grad_norm": 0.251953125, "learning_rate": 0.00098517601164688, "loss": 2.1594, "step": 26109 }, { "epoch": 0.7006762559038214, "grad_norm": 0.2470703125, "learning_rate": 0.0009851544641905899, "loss": 2.0645, "step": 26110 }, { "epoch": 0.7007030914555603, "grad_norm": 0.267578125, "learning_rate": 0.0009851329158893838, "loss": 2.3565, "step": 26111 }, { "epoch": 0.7007299270072993, "grad_norm": 0.259765625, "learning_rate": 0.0009851113667433094, "loss": 2.169, "step": 26112 }, { "epoch": 0.7007567625590382, "grad_norm": 0.255859375, "learning_rate": 0.0009850898167524142, "loss": 2.1672, "step": 26113 }, { "epoch": 0.7007835981107772, "grad_norm": 0.244140625, "learning_rate": 0.0009850682659167452, "loss": 2.1481, "step": 26114 }, { "epoch": 0.700810433662516, "grad_norm": 0.2431640625, "learning_rate": 0.0009850467142363494, "loss": 2.2374, "step": 26115 }, { "epoch": 0.700837269214255, "grad_norm": 0.265625, "learning_rate": 0.0009850251617112748, "loss": 2.3202, "step": 26116 }, { "epoch": 0.700864104765994, "grad_norm": 0.25390625, "learning_rate": 0.000985003608341568, "loss": 2.2073, "step": 26117 }, { "epoch": 0.7008909403177329, "grad_norm": 0.2353515625, "learning_rate": 0.0009849820541272768, "loss": 2.1979, "step": 26118 }, { "epoch": 0.7009177758694719, "grad_norm": 0.2451171875, "learning_rate": 0.000984960499068448, "loss": 2.1928, "step": 26119 }, { "epoch": 0.7009446114212108, "grad_norm": 0.2373046875, "learning_rate": 0.0009849389431651294, "loss": 2.193, "step": 26120 }, { "epoch": 0.7009714469729498, "grad_norm": 0.24609375, "learning_rate": 0.0009849173864173678, "loss": 2.2182, "step": 26121 }, { "epoch": 0.7009982825246887, "grad_norm": 0.2451171875, "learning_rate": 0.0009848958288252107, "loss": 2.214, "step": 26122 }, { "epoch": 0.7010251180764276, "grad_norm": 0.2421875, "learning_rate": 0.0009848742703887053, "loss": 2.0874, "step": 26123 }, { "epoch": 0.7010519536281666, "grad_norm": 0.255859375, "learning_rate": 0.0009848527111078992, "loss": 2.2355, "step": 26124 }, { "epoch": 0.7010787891799055, "grad_norm": 0.244140625, "learning_rate": 0.0009848311509828393, "loss": 2.1197, "step": 26125 }, { "epoch": 0.7011056247316445, "grad_norm": 0.25390625, "learning_rate": 0.000984809590013573, "loss": 2.224, "step": 26126 }, { "epoch": 0.7011324602833834, "grad_norm": 0.25, "learning_rate": 0.0009847880282001478, "loss": 2.2252, "step": 26127 }, { "epoch": 0.7011592958351224, "grad_norm": 0.248046875, "learning_rate": 0.0009847664655426112, "loss": 2.1646, "step": 26128 }, { "epoch": 0.7011861313868614, "grad_norm": 0.244140625, "learning_rate": 0.0009847449020410097, "loss": 2.1242, "step": 26129 }, { "epoch": 0.7012129669386002, "grad_norm": 0.255859375, "learning_rate": 0.0009847233376953912, "loss": 2.3344, "step": 26130 }, { "epoch": 0.7012398024903392, "grad_norm": 0.248046875, "learning_rate": 0.000984701772505803, "loss": 2.2159, "step": 26131 }, { "epoch": 0.7012666380420781, "grad_norm": 0.2578125, "learning_rate": 0.0009846802064722923, "loss": 2.3059, "step": 26132 }, { "epoch": 0.7012934735938171, "grad_norm": 0.248046875, "learning_rate": 0.0009846586395949061, "loss": 2.2175, "step": 26133 }, { "epoch": 0.701320309145556, "grad_norm": 0.2470703125, "learning_rate": 0.0009846370718736923, "loss": 2.1854, "step": 26134 }, { "epoch": 0.701347144697295, "grad_norm": 0.26171875, "learning_rate": 0.0009846155033086978, "loss": 2.3188, "step": 26135 }, { "epoch": 0.701373980249034, "grad_norm": 0.2470703125, "learning_rate": 0.0009845939338999701, "loss": 2.0977, "step": 26136 }, { "epoch": 0.7014008158007728, "grad_norm": 0.2490234375, "learning_rate": 0.0009845723636475567, "loss": 2.1413, "step": 26137 }, { "epoch": 0.7014276513525118, "grad_norm": 0.248046875, "learning_rate": 0.0009845507925515043, "loss": 2.1602, "step": 26138 }, { "epoch": 0.7014544869042507, "grad_norm": 0.248046875, "learning_rate": 0.0009845292206118609, "loss": 2.1336, "step": 26139 }, { "epoch": 0.7014813224559897, "grad_norm": 0.2578125, "learning_rate": 0.0009845076478286732, "loss": 2.1663, "step": 26140 }, { "epoch": 0.7015081580077286, "grad_norm": 0.251953125, "learning_rate": 0.000984486074201989, "loss": 2.198, "step": 26141 }, { "epoch": 0.7015349935594676, "grad_norm": 0.2490234375, "learning_rate": 0.0009844644997318555, "loss": 2.2072, "step": 26142 }, { "epoch": 0.7015618291112066, "grad_norm": 0.244140625, "learning_rate": 0.00098444292441832, "loss": 2.0756, "step": 26143 }, { "epoch": 0.7015886646629454, "grad_norm": 0.267578125, "learning_rate": 0.0009844213482614298, "loss": 2.2048, "step": 26144 }, { "epoch": 0.7016155002146844, "grad_norm": 0.25390625, "learning_rate": 0.0009843997712612323, "loss": 2.2528, "step": 26145 }, { "epoch": 0.7016423357664233, "grad_norm": 0.251953125, "learning_rate": 0.0009843781934177746, "loss": 2.2378, "step": 26146 }, { "epoch": 0.7016691713181623, "grad_norm": 0.24609375, "learning_rate": 0.0009843566147311044, "loss": 2.2023, "step": 26147 }, { "epoch": 0.7016960068699012, "grad_norm": 0.251953125, "learning_rate": 0.000984335035201269, "loss": 2.2019, "step": 26148 }, { "epoch": 0.7017228424216402, "grad_norm": 0.2490234375, "learning_rate": 0.0009843134548283153, "loss": 2.2217, "step": 26149 }, { "epoch": 0.7017496779733792, "grad_norm": 0.2431640625, "learning_rate": 0.0009842918736122912, "loss": 2.1825, "step": 26150 }, { "epoch": 0.701776513525118, "grad_norm": 0.25390625, "learning_rate": 0.0009842702915532436, "loss": 2.2545, "step": 26151 }, { "epoch": 0.701803349076857, "grad_norm": 0.2490234375, "learning_rate": 0.00098424870865122, "loss": 2.2281, "step": 26152 }, { "epoch": 0.7018301846285959, "grad_norm": 0.255859375, "learning_rate": 0.0009842271249062681, "loss": 2.2674, "step": 26153 }, { "epoch": 0.7018570201803349, "grad_norm": 0.2373046875, "learning_rate": 0.0009842055403184347, "loss": 2.1318, "step": 26154 }, { "epoch": 0.7018838557320739, "grad_norm": 0.26171875, "learning_rate": 0.0009841839548877676, "loss": 2.2813, "step": 26155 }, { "epoch": 0.7019106912838128, "grad_norm": 0.25, "learning_rate": 0.0009841623686143138, "loss": 2.1564, "step": 26156 }, { "epoch": 0.7019375268355518, "grad_norm": 0.2431640625, "learning_rate": 0.0009841407814981204, "loss": 2.0776, "step": 26157 }, { "epoch": 0.7019643623872907, "grad_norm": 0.2431640625, "learning_rate": 0.0009841191935392357, "loss": 2.1361, "step": 26158 }, { "epoch": 0.7019911979390296, "grad_norm": 0.25390625, "learning_rate": 0.0009840976047377063, "loss": 2.2801, "step": 26159 }, { "epoch": 0.7020180334907685, "grad_norm": 0.2392578125, "learning_rate": 0.0009840760150935798, "loss": 2.1903, "step": 26160 }, { "epoch": 0.7020448690425075, "grad_norm": 0.2470703125, "learning_rate": 0.0009840544246069033, "loss": 2.2037, "step": 26161 }, { "epoch": 0.7020717045942465, "grad_norm": 0.2451171875, "learning_rate": 0.0009840328332777245, "loss": 2.2277, "step": 26162 }, { "epoch": 0.7020985401459854, "grad_norm": 0.236328125, "learning_rate": 0.0009840112411060907, "loss": 2.127, "step": 26163 }, { "epoch": 0.7021253756977244, "grad_norm": 0.251953125, "learning_rate": 0.0009839896480920492, "loss": 2.1092, "step": 26164 }, { "epoch": 0.7021522112494633, "grad_norm": 0.2373046875, "learning_rate": 0.0009839680542356472, "loss": 2.0642, "step": 26165 }, { "epoch": 0.7021790468012022, "grad_norm": 0.2431640625, "learning_rate": 0.0009839464595369325, "loss": 2.0926, "step": 26166 }, { "epoch": 0.7022058823529411, "grad_norm": 0.263671875, "learning_rate": 0.0009839248639959522, "loss": 2.2173, "step": 26167 }, { "epoch": 0.7022327179046801, "grad_norm": 0.263671875, "learning_rate": 0.0009839032676127537, "loss": 2.2554, "step": 26168 }, { "epoch": 0.7022595534564191, "grad_norm": 0.2431640625, "learning_rate": 0.0009838816703873843, "loss": 2.105, "step": 26169 }, { "epoch": 0.702286389008158, "grad_norm": 0.25390625, "learning_rate": 0.0009838600723198916, "loss": 2.1123, "step": 26170 }, { "epoch": 0.702313224559897, "grad_norm": 0.26171875, "learning_rate": 0.0009838384734103226, "loss": 2.26, "step": 26171 }, { "epoch": 0.7023400601116359, "grad_norm": 0.24609375, "learning_rate": 0.0009838168736587252, "loss": 2.1986, "step": 26172 }, { "epoch": 0.7023668956633748, "grad_norm": 0.240234375, "learning_rate": 0.0009837952730651464, "loss": 2.2164, "step": 26173 }, { "epoch": 0.7023937312151138, "grad_norm": 0.2470703125, "learning_rate": 0.0009837736716296335, "loss": 2.2012, "step": 26174 }, { "epoch": 0.7024205667668527, "grad_norm": 0.251953125, "learning_rate": 0.000983752069352234, "loss": 2.2392, "step": 26175 }, { "epoch": 0.7024474023185917, "grad_norm": 0.248046875, "learning_rate": 0.0009837304662329957, "loss": 2.2544, "step": 26176 }, { "epoch": 0.7024742378703306, "grad_norm": 0.251953125, "learning_rate": 0.0009837088622719654, "loss": 2.2065, "step": 26177 }, { "epoch": 0.7025010734220696, "grad_norm": 0.244140625, "learning_rate": 0.000983687257469191, "loss": 2.1883, "step": 26178 }, { "epoch": 0.7025279089738085, "grad_norm": 0.251953125, "learning_rate": 0.0009836656518247195, "loss": 2.1822, "step": 26179 }, { "epoch": 0.7025547445255474, "grad_norm": 0.236328125, "learning_rate": 0.0009836440453385984, "loss": 2.0695, "step": 26180 }, { "epoch": 0.7025815800772864, "grad_norm": 0.23828125, "learning_rate": 0.0009836224380108752, "loss": 2.0956, "step": 26181 }, { "epoch": 0.7026084156290253, "grad_norm": 0.25, "learning_rate": 0.0009836008298415972, "loss": 2.1152, "step": 26182 }, { "epoch": 0.7026352511807643, "grad_norm": 0.240234375, "learning_rate": 0.0009835792208308118, "loss": 2.1735, "step": 26183 }, { "epoch": 0.7026620867325032, "grad_norm": 0.259765625, "learning_rate": 0.0009835576109785665, "loss": 2.2442, "step": 26184 }, { "epoch": 0.7026889222842422, "grad_norm": 0.255859375, "learning_rate": 0.0009835360002849087, "loss": 2.2149, "step": 26185 }, { "epoch": 0.7027157578359811, "grad_norm": 0.24609375, "learning_rate": 0.0009835143887498856, "loss": 2.1568, "step": 26186 }, { "epoch": 0.70274259338772, "grad_norm": 0.2490234375, "learning_rate": 0.000983492776373545, "loss": 2.2502, "step": 26187 }, { "epoch": 0.702769428939459, "grad_norm": 0.251953125, "learning_rate": 0.0009834711631559337, "loss": 2.1279, "step": 26188 }, { "epoch": 0.7027962644911979, "grad_norm": 0.2431640625, "learning_rate": 0.0009834495490970998, "loss": 2.2456, "step": 26189 }, { "epoch": 0.7028231000429369, "grad_norm": 0.2421875, "learning_rate": 0.0009834279341970904, "loss": 2.2145, "step": 26190 }, { "epoch": 0.7028499355946758, "grad_norm": 0.2578125, "learning_rate": 0.0009834063184559528, "loss": 2.2295, "step": 26191 }, { "epoch": 0.7028767711464148, "grad_norm": 0.2392578125, "learning_rate": 0.0009833847018737346, "loss": 2.1254, "step": 26192 }, { "epoch": 0.7029036066981537, "grad_norm": 0.2490234375, "learning_rate": 0.0009833630844504832, "loss": 2.2782, "step": 26193 }, { "epoch": 0.7029304422498927, "grad_norm": 0.2490234375, "learning_rate": 0.0009833414661862456, "loss": 2.132, "step": 26194 }, { "epoch": 0.7029572778016316, "grad_norm": 0.25390625, "learning_rate": 0.00098331984708107, "loss": 2.162, "step": 26195 }, { "epoch": 0.7029841133533705, "grad_norm": 0.267578125, "learning_rate": 0.0009832982271350032, "loss": 2.2673, "step": 26196 }, { "epoch": 0.7030109489051095, "grad_norm": 0.259765625, "learning_rate": 0.0009832766063480932, "loss": 2.2208, "step": 26197 }, { "epoch": 0.7030377844568484, "grad_norm": 0.2578125, "learning_rate": 0.0009832549847203869, "loss": 2.2262, "step": 26198 }, { "epoch": 0.7030646200085874, "grad_norm": 0.259765625, "learning_rate": 0.0009832333622519318, "loss": 2.2762, "step": 26199 }, { "epoch": 0.7030914555603264, "grad_norm": 0.259765625, "learning_rate": 0.0009832117389427756, "loss": 2.2171, "step": 26200 }, { "epoch": 0.7031182911120653, "grad_norm": 0.2578125, "learning_rate": 0.0009831901147929656, "loss": 2.2424, "step": 26201 }, { "epoch": 0.7031451266638042, "grad_norm": 0.248046875, "learning_rate": 0.000983168489802549, "loss": 2.2031, "step": 26202 }, { "epoch": 0.7031719622155431, "grad_norm": 0.25390625, "learning_rate": 0.0009831468639715738, "loss": 2.2211, "step": 26203 }, { "epoch": 0.7031987977672821, "grad_norm": 0.251953125, "learning_rate": 0.000983125237300087, "loss": 2.1996, "step": 26204 }, { "epoch": 0.703225633319021, "grad_norm": 0.2451171875, "learning_rate": 0.000983103609788136, "loss": 2.175, "step": 26205 }, { "epoch": 0.70325246887076, "grad_norm": 0.240234375, "learning_rate": 0.0009830819814357684, "loss": 2.1055, "step": 26206 }, { "epoch": 0.703279304422499, "grad_norm": 0.255859375, "learning_rate": 0.0009830603522430316, "loss": 2.1908, "step": 26207 }, { "epoch": 0.7033061399742379, "grad_norm": 0.25390625, "learning_rate": 0.0009830387222099733, "loss": 2.2211, "step": 26208 }, { "epoch": 0.7033329755259768, "grad_norm": 0.25, "learning_rate": 0.0009830170913366406, "loss": 2.2497, "step": 26209 }, { "epoch": 0.7033598110777157, "grad_norm": 0.2421875, "learning_rate": 0.000982995459623081, "loss": 2.127, "step": 26210 }, { "epoch": 0.7033866466294547, "grad_norm": 0.2490234375, "learning_rate": 0.0009829738270693423, "loss": 2.1475, "step": 26211 }, { "epoch": 0.7034134821811936, "grad_norm": 0.25, "learning_rate": 0.0009829521936754715, "loss": 2.1655, "step": 26212 }, { "epoch": 0.7034403177329326, "grad_norm": 0.2431640625, "learning_rate": 0.0009829305594415162, "loss": 2.1588, "step": 26213 }, { "epoch": 0.7034671532846716, "grad_norm": 0.25390625, "learning_rate": 0.0009829089243675241, "loss": 2.2502, "step": 26214 }, { "epoch": 0.7034939888364105, "grad_norm": 0.263671875, "learning_rate": 0.0009828872884535425, "loss": 2.3056, "step": 26215 }, { "epoch": 0.7035208243881494, "grad_norm": 0.27734375, "learning_rate": 0.0009828656516996186, "loss": 2.2807, "step": 26216 }, { "epoch": 0.7035476599398883, "grad_norm": 0.26171875, "learning_rate": 0.0009828440141058002, "loss": 2.2068, "step": 26217 }, { "epoch": 0.7035744954916273, "grad_norm": 0.240234375, "learning_rate": 0.0009828223756721346, "loss": 2.2141, "step": 26218 }, { "epoch": 0.7036013310433663, "grad_norm": 0.2470703125, "learning_rate": 0.0009828007363986697, "loss": 2.0688, "step": 26219 }, { "epoch": 0.7036281665951052, "grad_norm": 0.2470703125, "learning_rate": 0.0009827790962854523, "loss": 2.1773, "step": 26220 }, { "epoch": 0.7036550021468442, "grad_norm": 0.25, "learning_rate": 0.00098275745533253, "loss": 2.2032, "step": 26221 }, { "epoch": 0.7036818376985831, "grad_norm": 0.263671875, "learning_rate": 0.0009827358135399507, "loss": 2.1756, "step": 26222 }, { "epoch": 0.703708673250322, "grad_norm": 0.265625, "learning_rate": 0.0009827141709077617, "loss": 2.2117, "step": 26223 }, { "epoch": 0.7037355088020609, "grad_norm": 0.25, "learning_rate": 0.0009826925274360103, "loss": 2.149, "step": 26224 }, { "epoch": 0.7037623443537999, "grad_norm": 0.25390625, "learning_rate": 0.000982670883124744, "loss": 2.2111, "step": 26225 }, { "epoch": 0.7037891799055389, "grad_norm": 0.25390625, "learning_rate": 0.0009826492379740104, "loss": 2.234, "step": 26226 }, { "epoch": 0.7038160154572778, "grad_norm": 0.259765625, "learning_rate": 0.0009826275919838571, "loss": 2.2165, "step": 26227 }, { "epoch": 0.7038428510090168, "grad_norm": 0.2431640625, "learning_rate": 0.0009826059451543312, "loss": 2.1555, "step": 26228 }, { "epoch": 0.7038696865607557, "grad_norm": 0.25, "learning_rate": 0.0009825842974854806, "loss": 2.2136, "step": 26229 }, { "epoch": 0.7038965221124946, "grad_norm": 0.2470703125, "learning_rate": 0.0009825626489773525, "loss": 2.1468, "step": 26230 }, { "epoch": 0.7039233576642335, "grad_norm": 0.255859375, "learning_rate": 0.0009825409996299947, "loss": 2.2285, "step": 26231 }, { "epoch": 0.7039501932159725, "grad_norm": 0.240234375, "learning_rate": 0.0009825193494434543, "loss": 2.2163, "step": 26232 }, { "epoch": 0.7039770287677115, "grad_norm": 0.2490234375, "learning_rate": 0.000982497698417779, "loss": 2.2406, "step": 26233 }, { "epoch": 0.7040038643194504, "grad_norm": 0.2431640625, "learning_rate": 0.0009824760465530164, "loss": 2.2002, "step": 26234 }, { "epoch": 0.7040306998711894, "grad_norm": 0.2451171875, "learning_rate": 0.0009824543938492137, "loss": 2.1587, "step": 26235 }, { "epoch": 0.7040575354229283, "grad_norm": 0.25, "learning_rate": 0.0009824327403064187, "loss": 2.2156, "step": 26236 }, { "epoch": 0.7040843709746673, "grad_norm": 0.244140625, "learning_rate": 0.0009824110859246787, "loss": 2.1654, "step": 26237 }, { "epoch": 0.7041112065264061, "grad_norm": 0.2470703125, "learning_rate": 0.0009823894307040414, "loss": 2.2426, "step": 26238 }, { "epoch": 0.7041380420781451, "grad_norm": 0.24609375, "learning_rate": 0.0009823677746445543, "loss": 2.1726, "step": 26239 }, { "epoch": 0.7041648776298841, "grad_norm": 0.255859375, "learning_rate": 0.0009823461177462646, "loss": 2.1733, "step": 26240 }, { "epoch": 0.704191713181623, "grad_norm": 0.2490234375, "learning_rate": 0.00098232446000922, "loss": 2.1596, "step": 26241 }, { "epoch": 0.704218548733362, "grad_norm": 0.2451171875, "learning_rate": 0.000982302801433468, "loss": 2.1862, "step": 26242 }, { "epoch": 0.7042453842851009, "grad_norm": 0.2421875, "learning_rate": 0.0009822811420190562, "loss": 2.1619, "step": 26243 }, { "epoch": 0.7042722198368399, "grad_norm": 0.251953125, "learning_rate": 0.000982259481766032, "loss": 2.0349, "step": 26244 }, { "epoch": 0.7042990553885788, "grad_norm": 0.2470703125, "learning_rate": 0.000982237820674443, "loss": 2.16, "step": 26245 }, { "epoch": 0.7043258909403177, "grad_norm": 0.24609375, "learning_rate": 0.0009822161587443366, "loss": 2.1304, "step": 26246 }, { "epoch": 0.7043527264920567, "grad_norm": 0.255859375, "learning_rate": 0.0009821944959757605, "loss": 2.1617, "step": 26247 }, { "epoch": 0.7043795620437956, "grad_norm": 0.2421875, "learning_rate": 0.0009821728323687621, "loss": 2.0581, "step": 26248 }, { "epoch": 0.7044063975955346, "grad_norm": 0.25, "learning_rate": 0.0009821511679233889, "loss": 2.1844, "step": 26249 }, { "epoch": 0.7044332331472735, "grad_norm": 0.251953125, "learning_rate": 0.0009821295026396883, "loss": 2.1096, "step": 26250 }, { "epoch": 0.7044600686990125, "grad_norm": 0.26171875, "learning_rate": 0.0009821078365177085, "loss": 2.2104, "step": 26251 }, { "epoch": 0.7044869042507514, "grad_norm": 0.26171875, "learning_rate": 0.000982086169557496, "loss": 2.2114, "step": 26252 }, { "epoch": 0.7045137398024903, "grad_norm": 0.255859375, "learning_rate": 0.0009820645017590993, "loss": 2.2221, "step": 26253 }, { "epoch": 0.7045405753542293, "grad_norm": 0.25, "learning_rate": 0.000982042833122565, "loss": 2.2348, "step": 26254 }, { "epoch": 0.7045674109059682, "grad_norm": 0.2451171875, "learning_rate": 0.0009820211636479414, "loss": 2.1397, "step": 26255 }, { "epoch": 0.7045942464577072, "grad_norm": 0.251953125, "learning_rate": 0.000981999493335276, "loss": 2.2487, "step": 26256 }, { "epoch": 0.7046210820094461, "grad_norm": 0.259765625, "learning_rate": 0.0009819778221846157, "loss": 2.2006, "step": 26257 }, { "epoch": 0.704647917561185, "grad_norm": 0.25390625, "learning_rate": 0.0009819561501960084, "loss": 2.2432, "step": 26258 }, { "epoch": 0.704674753112924, "grad_norm": 0.259765625, "learning_rate": 0.0009819344773695018, "loss": 2.3018, "step": 26259 }, { "epoch": 0.7047015886646629, "grad_norm": 0.248046875, "learning_rate": 0.0009819128037051436, "loss": 2.2138, "step": 26260 }, { "epoch": 0.7047284242164019, "grad_norm": 0.251953125, "learning_rate": 0.0009818911292029807, "loss": 2.3528, "step": 26261 }, { "epoch": 0.7047552597681408, "grad_norm": 0.2470703125, "learning_rate": 0.0009818694538630613, "loss": 2.167, "step": 26262 }, { "epoch": 0.7047820953198798, "grad_norm": 0.244140625, "learning_rate": 0.0009818477776854324, "loss": 2.1266, "step": 26263 }, { "epoch": 0.7048089308716187, "grad_norm": 0.2578125, "learning_rate": 0.000981826100670142, "loss": 2.1577, "step": 26264 }, { "epoch": 0.7048357664233577, "grad_norm": 0.26953125, "learning_rate": 0.0009818044228172376, "loss": 2.2355, "step": 26265 }, { "epoch": 0.7048626019750966, "grad_norm": 0.25390625, "learning_rate": 0.0009817827441267661, "loss": 2.1891, "step": 26266 }, { "epoch": 0.7048894375268355, "grad_norm": 0.2490234375, "learning_rate": 0.000981761064598776, "loss": 2.2291, "step": 26267 }, { "epoch": 0.7049162730785745, "grad_norm": 0.236328125, "learning_rate": 0.0009817393842333144, "loss": 2.0879, "step": 26268 }, { "epoch": 0.7049431086303134, "grad_norm": 0.2470703125, "learning_rate": 0.0009817177030304288, "loss": 2.1269, "step": 26269 }, { "epoch": 0.7049699441820524, "grad_norm": 0.25390625, "learning_rate": 0.000981696020990167, "loss": 2.1703, "step": 26270 }, { "epoch": 0.7049967797337914, "grad_norm": 0.23828125, "learning_rate": 0.0009816743381125762, "loss": 2.1598, "step": 26271 }, { "epoch": 0.7050236152855303, "grad_norm": 0.2470703125, "learning_rate": 0.0009816526543977044, "loss": 2.156, "step": 26272 }, { "epoch": 0.7050504508372692, "grad_norm": 0.25, "learning_rate": 0.0009816309698455988, "loss": 2.1479, "step": 26273 }, { "epoch": 0.7050772863890081, "grad_norm": 0.259765625, "learning_rate": 0.0009816092844563075, "loss": 2.2102, "step": 26274 }, { "epoch": 0.7051041219407471, "grad_norm": 0.24609375, "learning_rate": 0.0009815875982298772, "loss": 2.1244, "step": 26275 }, { "epoch": 0.705130957492486, "grad_norm": 0.25, "learning_rate": 0.0009815659111663563, "loss": 2.074, "step": 26276 }, { "epoch": 0.705157793044225, "grad_norm": 0.2421875, "learning_rate": 0.000981544223265792, "loss": 2.0771, "step": 26277 }, { "epoch": 0.705184628595964, "grad_norm": 0.25, "learning_rate": 0.0009815225345282319, "loss": 2.1547, "step": 26278 }, { "epoch": 0.7052114641477029, "grad_norm": 0.26953125, "learning_rate": 0.0009815008449537235, "loss": 2.302, "step": 26279 }, { "epoch": 0.7052382996994419, "grad_norm": 0.25, "learning_rate": 0.0009814791545423144, "loss": 2.2185, "step": 26280 }, { "epoch": 0.7052651352511807, "grad_norm": 0.251953125, "learning_rate": 0.0009814574632940527, "loss": 2.0193, "step": 26281 }, { "epoch": 0.7052919708029197, "grad_norm": 0.25390625, "learning_rate": 0.0009814357712089852, "loss": 2.273, "step": 26282 }, { "epoch": 0.7053188063546586, "grad_norm": 0.24609375, "learning_rate": 0.0009814140782871599, "loss": 2.1844, "step": 26283 }, { "epoch": 0.7053456419063976, "grad_norm": 0.2470703125, "learning_rate": 0.0009813923845286244, "loss": 2.1281, "step": 26284 }, { "epoch": 0.7053724774581366, "grad_norm": 0.2470703125, "learning_rate": 0.0009813706899334262, "loss": 2.2412, "step": 26285 }, { "epoch": 0.7053993130098755, "grad_norm": 0.25, "learning_rate": 0.0009813489945016127, "loss": 2.1742, "step": 26286 }, { "epoch": 0.7054261485616145, "grad_norm": 0.25390625, "learning_rate": 0.000981327298233232, "loss": 2.1597, "step": 26287 }, { "epoch": 0.7054529841133533, "grad_norm": 0.255859375, "learning_rate": 0.0009813056011283314, "loss": 2.1842, "step": 26288 }, { "epoch": 0.7054798196650923, "grad_norm": 0.240234375, "learning_rate": 0.0009812839031869583, "loss": 2.1241, "step": 26289 }, { "epoch": 0.7055066552168313, "grad_norm": 0.244140625, "learning_rate": 0.0009812622044091605, "loss": 2.162, "step": 26290 }, { "epoch": 0.7055334907685702, "grad_norm": 0.25390625, "learning_rate": 0.0009812405047949855, "loss": 2.201, "step": 26291 }, { "epoch": 0.7055603263203092, "grad_norm": 0.2353515625, "learning_rate": 0.0009812188043444813, "loss": 2.135, "step": 26292 }, { "epoch": 0.7055871618720481, "grad_norm": 0.244140625, "learning_rate": 0.000981197103057695, "loss": 2.2328, "step": 26293 }, { "epoch": 0.705613997423787, "grad_norm": 0.251953125, "learning_rate": 0.0009811754009346745, "loss": 2.1576, "step": 26294 }, { "epoch": 0.7056408329755259, "grad_norm": 0.2353515625, "learning_rate": 0.0009811536979754674, "loss": 2.0913, "step": 26295 }, { "epoch": 0.7056676685272649, "grad_norm": 0.23828125, "learning_rate": 0.000981131994180121, "loss": 2.2562, "step": 26296 }, { "epoch": 0.7056945040790039, "grad_norm": 0.2421875, "learning_rate": 0.000981110289548683, "loss": 2.1513, "step": 26297 }, { "epoch": 0.7057213396307428, "grad_norm": 0.259765625, "learning_rate": 0.0009810885840812016, "loss": 2.1883, "step": 26298 }, { "epoch": 0.7057481751824818, "grad_norm": 0.2451171875, "learning_rate": 0.0009810668777777238, "loss": 2.1093, "step": 26299 }, { "epoch": 0.7057750107342207, "grad_norm": 0.2431640625, "learning_rate": 0.0009810451706382972, "loss": 2.1736, "step": 26300 }, { "epoch": 0.7058018462859597, "grad_norm": 0.248046875, "learning_rate": 0.0009810234626629697, "loss": 2.1998, "step": 26301 }, { "epoch": 0.7058286818376985, "grad_norm": 0.255859375, "learning_rate": 0.000981001753851789, "loss": 2.0915, "step": 26302 }, { "epoch": 0.7058555173894375, "grad_norm": 0.251953125, "learning_rate": 0.0009809800442048024, "loss": 2.192, "step": 26303 }, { "epoch": 0.7058823529411765, "grad_norm": 0.251953125, "learning_rate": 0.0009809583337220575, "loss": 2.1384, "step": 26304 }, { "epoch": 0.7059091884929154, "grad_norm": 0.2412109375, "learning_rate": 0.0009809366224036021, "loss": 2.0858, "step": 26305 }, { "epoch": 0.7059360240446544, "grad_norm": 0.240234375, "learning_rate": 0.0009809149102494841, "loss": 2.1895, "step": 26306 }, { "epoch": 0.7059628595963933, "grad_norm": 0.251953125, "learning_rate": 0.000980893197259751, "loss": 2.1382, "step": 26307 }, { "epoch": 0.7059896951481323, "grad_norm": 0.259765625, "learning_rate": 0.0009808714834344497, "loss": 2.1251, "step": 26308 }, { "epoch": 0.7060165306998711, "grad_norm": 0.25390625, "learning_rate": 0.0009808497687736287, "loss": 2.1286, "step": 26309 }, { "epoch": 0.7060433662516101, "grad_norm": 0.23828125, "learning_rate": 0.0009808280532773357, "loss": 2.1357, "step": 26310 }, { "epoch": 0.7060702018033491, "grad_norm": 0.2578125, "learning_rate": 0.0009808063369456173, "loss": 2.2489, "step": 26311 }, { "epoch": 0.706097037355088, "grad_norm": 0.2392578125, "learning_rate": 0.0009807846197785223, "loss": 2.2005, "step": 26312 }, { "epoch": 0.706123872906827, "grad_norm": 0.2431640625, "learning_rate": 0.0009807629017760978, "loss": 2.2573, "step": 26313 }, { "epoch": 0.7061507084585659, "grad_norm": 0.2421875, "learning_rate": 0.0009807411829383917, "loss": 2.2242, "step": 26314 }, { "epoch": 0.7061775440103049, "grad_norm": 0.244140625, "learning_rate": 0.000980719463265451, "loss": 2.2141, "step": 26315 }, { "epoch": 0.7062043795620438, "grad_norm": 0.2314453125, "learning_rate": 0.0009806977427573242, "loss": 2.152, "step": 26316 }, { "epoch": 0.7062312151137827, "grad_norm": 0.255859375, "learning_rate": 0.0009806760214140582, "loss": 2.2127, "step": 26317 }, { "epoch": 0.7062580506655217, "grad_norm": 0.2578125, "learning_rate": 0.0009806542992357014, "loss": 2.1614, "step": 26318 }, { "epoch": 0.7062848862172606, "grad_norm": 0.2412109375, "learning_rate": 0.000980632576222301, "loss": 2.2267, "step": 26319 }, { "epoch": 0.7063117217689996, "grad_norm": 0.240234375, "learning_rate": 0.0009806108523739046, "loss": 2.1251, "step": 26320 }, { "epoch": 0.7063385573207385, "grad_norm": 0.2490234375, "learning_rate": 0.00098058912769056, "loss": 2.2932, "step": 26321 }, { "epoch": 0.7063653928724775, "grad_norm": 0.251953125, "learning_rate": 0.0009805674021723147, "loss": 2.1472, "step": 26322 }, { "epoch": 0.7063922284242165, "grad_norm": 0.265625, "learning_rate": 0.0009805456758192165, "loss": 2.1889, "step": 26323 }, { "epoch": 0.7064190639759553, "grad_norm": 0.244140625, "learning_rate": 0.0009805239486313133, "loss": 2.1353, "step": 26324 }, { "epoch": 0.7064458995276943, "grad_norm": 0.2578125, "learning_rate": 0.000980502220608652, "loss": 2.217, "step": 26325 }, { "epoch": 0.7064727350794332, "grad_norm": 0.24609375, "learning_rate": 0.0009804804917512813, "loss": 2.1741, "step": 26326 }, { "epoch": 0.7064995706311722, "grad_norm": 0.2431640625, "learning_rate": 0.0009804587620592482, "loss": 2.0701, "step": 26327 }, { "epoch": 0.7065264061829111, "grad_norm": 0.2392578125, "learning_rate": 0.0009804370315326003, "loss": 2.1268, "step": 26328 }, { "epoch": 0.7065532417346501, "grad_norm": 0.251953125, "learning_rate": 0.0009804153001713857, "loss": 2.1491, "step": 26329 }, { "epoch": 0.706580077286389, "grad_norm": 0.248046875, "learning_rate": 0.0009803935679756517, "loss": 2.1503, "step": 26330 }, { "epoch": 0.7066069128381279, "grad_norm": 0.234375, "learning_rate": 0.0009803718349454464, "loss": 2.13, "step": 26331 }, { "epoch": 0.7066337483898669, "grad_norm": 0.2431640625, "learning_rate": 0.0009803501010808167, "loss": 2.1173, "step": 26332 }, { "epoch": 0.7066605839416058, "grad_norm": 0.2353515625, "learning_rate": 0.000980328366381811, "loss": 2.0484, "step": 26333 }, { "epoch": 0.7066874194933448, "grad_norm": 0.2431640625, "learning_rate": 0.0009803066308484772, "loss": 2.1442, "step": 26334 }, { "epoch": 0.7067142550450837, "grad_norm": 0.25, "learning_rate": 0.0009802848944808623, "loss": 2.2024, "step": 26335 }, { "epoch": 0.7067410905968227, "grad_norm": 0.2578125, "learning_rate": 0.000980263157279014, "loss": 2.2218, "step": 26336 }, { "epoch": 0.7067679261485617, "grad_norm": 0.2431640625, "learning_rate": 0.0009802414192429804, "loss": 2.1623, "step": 26337 }, { "epoch": 0.7067947617003005, "grad_norm": 0.2392578125, "learning_rate": 0.000980219680372809, "loss": 2.2231, "step": 26338 }, { "epoch": 0.7068215972520395, "grad_norm": 0.2412109375, "learning_rate": 0.000980197940668547, "loss": 2.1608, "step": 26339 }, { "epoch": 0.7068484328037784, "grad_norm": 0.2412109375, "learning_rate": 0.0009801762001302432, "loss": 2.1623, "step": 26340 }, { "epoch": 0.7068752683555174, "grad_norm": 0.24609375, "learning_rate": 0.0009801544587579444, "loss": 2.1544, "step": 26341 }, { "epoch": 0.7069021039072564, "grad_norm": 0.255859375, "learning_rate": 0.0009801327165516985, "loss": 2.2063, "step": 26342 }, { "epoch": 0.7069289394589953, "grad_norm": 0.2470703125, "learning_rate": 0.0009801109735115535, "loss": 2.1654, "step": 26343 }, { "epoch": 0.7069557750107343, "grad_norm": 0.2470703125, "learning_rate": 0.0009800892296375565, "loss": 2.1327, "step": 26344 }, { "epoch": 0.7069826105624731, "grad_norm": 0.251953125, "learning_rate": 0.000980067484929756, "loss": 2.3133, "step": 26345 }, { "epoch": 0.7070094461142121, "grad_norm": 0.2412109375, "learning_rate": 0.000980045739388199, "loss": 2.1182, "step": 26346 }, { "epoch": 0.707036281665951, "grad_norm": 0.2392578125, "learning_rate": 0.0009800239930129334, "loss": 2.2053, "step": 26347 }, { "epoch": 0.70706311721769, "grad_norm": 0.244140625, "learning_rate": 0.000980002245804007, "loss": 2.1992, "step": 26348 }, { "epoch": 0.707089952769429, "grad_norm": 0.244140625, "learning_rate": 0.0009799804977614675, "loss": 2.1042, "step": 26349 }, { "epoch": 0.7071167883211679, "grad_norm": 0.263671875, "learning_rate": 0.0009799587488853628, "loss": 2.3653, "step": 26350 }, { "epoch": 0.7071436238729069, "grad_norm": 0.2431640625, "learning_rate": 0.00097993699917574, "loss": 2.2215, "step": 26351 }, { "epoch": 0.7071704594246457, "grad_norm": 0.2470703125, "learning_rate": 0.0009799152486326475, "loss": 2.1729, "step": 26352 }, { "epoch": 0.7071972949763847, "grad_norm": 0.25, "learning_rate": 0.0009798934972561324, "loss": 2.174, "step": 26353 }, { "epoch": 0.7072241305281236, "grad_norm": 0.2421875, "learning_rate": 0.000979871745046243, "loss": 2.2157, "step": 26354 }, { "epoch": 0.7072509660798626, "grad_norm": 0.2451171875, "learning_rate": 0.0009798499920030267, "loss": 2.1766, "step": 26355 }, { "epoch": 0.7072778016316016, "grad_norm": 0.240234375, "learning_rate": 0.0009798282381265312, "loss": 2.1465, "step": 26356 }, { "epoch": 0.7073046371833405, "grad_norm": 0.2451171875, "learning_rate": 0.0009798064834168044, "loss": 2.1446, "step": 26357 }, { "epoch": 0.7073314727350795, "grad_norm": 0.263671875, "learning_rate": 0.000979784727873894, "loss": 2.1325, "step": 26358 }, { "epoch": 0.7073583082868183, "grad_norm": 0.2470703125, "learning_rate": 0.0009797629714978474, "loss": 2.1409, "step": 26359 }, { "epoch": 0.7073851438385573, "grad_norm": 0.25, "learning_rate": 0.0009797412142887128, "loss": 2.1345, "step": 26360 }, { "epoch": 0.7074119793902963, "grad_norm": 0.23046875, "learning_rate": 0.0009797194562465375, "loss": 2.1621, "step": 26361 }, { "epoch": 0.7074388149420352, "grad_norm": 0.263671875, "learning_rate": 0.0009796976973713693, "loss": 2.1189, "step": 26362 }, { "epoch": 0.7074656504937742, "grad_norm": 0.25390625, "learning_rate": 0.0009796759376632564, "loss": 2.1535, "step": 26363 }, { "epoch": 0.7074924860455131, "grad_norm": 0.25390625, "learning_rate": 0.000979654177122246, "loss": 2.154, "step": 26364 }, { "epoch": 0.7075193215972521, "grad_norm": 0.25390625, "learning_rate": 0.000979632415748386, "loss": 2.2806, "step": 26365 }, { "epoch": 0.7075461571489909, "grad_norm": 0.2392578125, "learning_rate": 0.0009796106535417245, "loss": 2.122, "step": 26366 }, { "epoch": 0.7075729927007299, "grad_norm": 0.251953125, "learning_rate": 0.0009795888905023086, "loss": 2.1822, "step": 26367 }, { "epoch": 0.7075998282524689, "grad_norm": 0.2490234375, "learning_rate": 0.0009795671266301864, "loss": 2.2565, "step": 26368 }, { "epoch": 0.7076266638042078, "grad_norm": 0.26171875, "learning_rate": 0.0009795453619254057, "loss": 2.2234, "step": 26369 }, { "epoch": 0.7076534993559468, "grad_norm": 0.240234375, "learning_rate": 0.000979523596388014, "loss": 2.1891, "step": 26370 }, { "epoch": 0.7076803349076857, "grad_norm": 0.2431640625, "learning_rate": 0.000979501830018059, "loss": 2.1241, "step": 26371 }, { "epoch": 0.7077071704594247, "grad_norm": 0.2431640625, "learning_rate": 0.0009794800628155891, "loss": 2.1521, "step": 26372 }, { "epoch": 0.7077340060111635, "grad_norm": 0.2392578125, "learning_rate": 0.0009794582947806514, "loss": 2.1677, "step": 26373 }, { "epoch": 0.7077608415629025, "grad_norm": 0.255859375, "learning_rate": 0.0009794365259132936, "loss": 2.2517, "step": 26374 }, { "epoch": 0.7077876771146415, "grad_norm": 0.251953125, "learning_rate": 0.000979414756213564, "loss": 2.2732, "step": 26375 }, { "epoch": 0.7078145126663804, "grad_norm": 0.240234375, "learning_rate": 0.0009793929856815099, "loss": 2.1689, "step": 26376 }, { "epoch": 0.7078413482181194, "grad_norm": 0.248046875, "learning_rate": 0.0009793712143171794, "loss": 2.2054, "step": 26377 }, { "epoch": 0.7078681837698583, "grad_norm": 0.2392578125, "learning_rate": 0.0009793494421206199, "loss": 2.1894, "step": 26378 }, { "epoch": 0.7078950193215973, "grad_norm": 0.2353515625, "learning_rate": 0.0009793276690918792, "loss": 2.2019, "step": 26379 }, { "epoch": 0.7079218548733361, "grad_norm": 0.2451171875, "learning_rate": 0.0009793058952310054, "loss": 2.2145, "step": 26380 }, { "epoch": 0.7079486904250751, "grad_norm": 0.2451171875, "learning_rate": 0.0009792841205380459, "loss": 2.2544, "step": 26381 }, { "epoch": 0.7079755259768141, "grad_norm": 0.248046875, "learning_rate": 0.000979262345013049, "loss": 2.1697, "step": 26382 }, { "epoch": 0.708002361528553, "grad_norm": 0.2470703125, "learning_rate": 0.0009792405686560616, "loss": 2.1888, "step": 26383 }, { "epoch": 0.708029197080292, "grad_norm": 0.236328125, "learning_rate": 0.0009792187914671323, "loss": 2.0997, "step": 26384 }, { "epoch": 0.7080560326320309, "grad_norm": 0.2431640625, "learning_rate": 0.0009791970134463085, "loss": 2.1804, "step": 26385 }, { "epoch": 0.7080828681837699, "grad_norm": 0.2421875, "learning_rate": 0.0009791752345936378, "loss": 2.1097, "step": 26386 }, { "epoch": 0.7081097037355089, "grad_norm": 0.251953125, "learning_rate": 0.0009791534549091685, "loss": 2.1372, "step": 26387 }, { "epoch": 0.7081365392872477, "grad_norm": 0.25390625, "learning_rate": 0.0009791316743929478, "loss": 2.2398, "step": 26388 }, { "epoch": 0.7081633748389867, "grad_norm": 0.236328125, "learning_rate": 0.000979109893045024, "loss": 2.1174, "step": 26389 }, { "epoch": 0.7081902103907256, "grad_norm": 0.25390625, "learning_rate": 0.0009790881108654443, "loss": 2.24, "step": 26390 }, { "epoch": 0.7082170459424646, "grad_norm": 0.25390625, "learning_rate": 0.000979066327854257, "loss": 2.2429, "step": 26391 }, { "epoch": 0.7082438814942035, "grad_norm": 0.2578125, "learning_rate": 0.0009790445440115098, "loss": 2.224, "step": 26392 }, { "epoch": 0.7082707170459425, "grad_norm": 0.2431640625, "learning_rate": 0.0009790227593372504, "loss": 2.2221, "step": 26393 }, { "epoch": 0.7082975525976815, "grad_norm": 0.2431640625, "learning_rate": 0.0009790009738315263, "loss": 2.1678, "step": 26394 }, { "epoch": 0.7083243881494203, "grad_norm": 0.2353515625, "learning_rate": 0.0009789791874943855, "loss": 2.0576, "step": 26395 }, { "epoch": 0.7083512237011593, "grad_norm": 0.24609375, "learning_rate": 0.0009789574003258763, "loss": 2.2515, "step": 26396 }, { "epoch": 0.7083780592528982, "grad_norm": 0.2412109375, "learning_rate": 0.0009789356123260458, "loss": 2.1447, "step": 26397 }, { "epoch": 0.7084048948046372, "grad_norm": 0.2392578125, "learning_rate": 0.0009789138234949423, "loss": 2.183, "step": 26398 }, { "epoch": 0.7084317303563761, "grad_norm": 0.2578125, "learning_rate": 0.000978892033832613, "loss": 2.2181, "step": 26399 }, { "epoch": 0.7084585659081151, "grad_norm": 0.2451171875, "learning_rate": 0.0009788702433391062, "loss": 2.149, "step": 26400 }, { "epoch": 0.7084854014598541, "grad_norm": 0.251953125, "learning_rate": 0.0009788484520144695, "loss": 2.2869, "step": 26401 }, { "epoch": 0.7085122370115929, "grad_norm": 0.23828125, "learning_rate": 0.0009788266598587507, "loss": 2.1161, "step": 26402 }, { "epoch": 0.7085390725633319, "grad_norm": 0.240234375, "learning_rate": 0.0009788048668719978, "loss": 2.1264, "step": 26403 }, { "epoch": 0.7085659081150708, "grad_norm": 0.2314453125, "learning_rate": 0.0009787830730542584, "loss": 2.0939, "step": 26404 }, { "epoch": 0.7085927436668098, "grad_norm": 0.236328125, "learning_rate": 0.0009787612784055806, "loss": 2.178, "step": 26405 }, { "epoch": 0.7086195792185487, "grad_norm": 0.2490234375, "learning_rate": 0.0009787394829260116, "loss": 2.1793, "step": 26406 }, { "epoch": 0.7086464147702877, "grad_norm": 0.251953125, "learning_rate": 0.0009787176866156, "loss": 2.2755, "step": 26407 }, { "epoch": 0.7086732503220267, "grad_norm": 0.234375, "learning_rate": 0.000978695889474393, "loss": 2.1329, "step": 26408 }, { "epoch": 0.7087000858737655, "grad_norm": 0.244140625, "learning_rate": 0.0009786740915024386, "loss": 2.2209, "step": 26409 }, { "epoch": 0.7087269214255045, "grad_norm": 0.2353515625, "learning_rate": 0.0009786522926997848, "loss": 2.1934, "step": 26410 }, { "epoch": 0.7087537569772434, "grad_norm": 0.2451171875, "learning_rate": 0.000978630493066479, "loss": 2.1866, "step": 26411 }, { "epoch": 0.7087805925289824, "grad_norm": 0.248046875, "learning_rate": 0.0009786086926025694, "loss": 2.234, "step": 26412 }, { "epoch": 0.7088074280807214, "grad_norm": 0.251953125, "learning_rate": 0.0009785868913081038, "loss": 2.2021, "step": 26413 }, { "epoch": 0.7088342636324603, "grad_norm": 0.2431640625, "learning_rate": 0.00097856508918313, "loss": 2.1326, "step": 26414 }, { "epoch": 0.7088610991841993, "grad_norm": 0.2470703125, "learning_rate": 0.0009785432862276955, "loss": 2.0893, "step": 26415 }, { "epoch": 0.7088879347359381, "grad_norm": 0.236328125, "learning_rate": 0.0009785214824418488, "loss": 2.148, "step": 26416 }, { "epoch": 0.7089147702876771, "grad_norm": 0.2451171875, "learning_rate": 0.000978499677825637, "loss": 2.2105, "step": 26417 }, { "epoch": 0.708941605839416, "grad_norm": 0.25390625, "learning_rate": 0.0009784778723791086, "loss": 2.1715, "step": 26418 }, { "epoch": 0.708968441391155, "grad_norm": 0.25390625, "learning_rate": 0.0009784560661023106, "loss": 2.2722, "step": 26419 }, { "epoch": 0.708995276942894, "grad_norm": 0.244140625, "learning_rate": 0.0009784342589952916, "loss": 2.1278, "step": 26420 }, { "epoch": 0.7090221124946329, "grad_norm": 0.236328125, "learning_rate": 0.0009784124510580992, "loss": 2.205, "step": 26421 }, { "epoch": 0.7090489480463719, "grad_norm": 0.2412109375, "learning_rate": 0.0009783906422907813, "loss": 2.2716, "step": 26422 }, { "epoch": 0.7090757835981107, "grad_norm": 0.2412109375, "learning_rate": 0.0009783688326933857, "loss": 2.0918, "step": 26423 }, { "epoch": 0.7091026191498497, "grad_norm": 0.2412109375, "learning_rate": 0.0009783470222659598, "loss": 2.1883, "step": 26424 }, { "epoch": 0.7091294547015886, "grad_norm": 0.251953125, "learning_rate": 0.0009783252110085522, "loss": 2.1599, "step": 26425 }, { "epoch": 0.7091562902533276, "grad_norm": 0.24609375, "learning_rate": 0.0009783033989212103, "loss": 2.2314, "step": 26426 }, { "epoch": 0.7091831258050666, "grad_norm": 0.2451171875, "learning_rate": 0.000978281586003982, "loss": 2.221, "step": 26427 }, { "epoch": 0.7092099613568055, "grad_norm": 0.25, "learning_rate": 0.0009782597722569153, "loss": 2.2254, "step": 26428 }, { "epoch": 0.7092367969085445, "grad_norm": 0.2412109375, "learning_rate": 0.000978237957680058, "loss": 2.116, "step": 26429 }, { "epoch": 0.7092636324602833, "grad_norm": 0.23828125, "learning_rate": 0.0009782161422734576, "loss": 2.1204, "step": 26430 }, { "epoch": 0.7092904680120223, "grad_norm": 0.25390625, "learning_rate": 0.0009781943260371625, "loss": 2.3069, "step": 26431 }, { "epoch": 0.7093173035637613, "grad_norm": 0.24609375, "learning_rate": 0.0009781725089712202, "loss": 2.1667, "step": 26432 }, { "epoch": 0.7093441391155002, "grad_norm": 0.251953125, "learning_rate": 0.0009781506910756788, "loss": 2.2027, "step": 26433 }, { "epoch": 0.7093709746672392, "grad_norm": 0.24609375, "learning_rate": 0.0009781288723505858, "loss": 2.2004, "step": 26434 }, { "epoch": 0.7093978102189781, "grad_norm": 0.2470703125, "learning_rate": 0.0009781070527959896, "loss": 2.1082, "step": 26435 }, { "epoch": 0.7094246457707171, "grad_norm": 0.234375, "learning_rate": 0.0009780852324119376, "loss": 2.1582, "step": 26436 }, { "epoch": 0.709451481322456, "grad_norm": 0.248046875, "learning_rate": 0.0009780634111984778, "loss": 2.2212, "step": 26437 }, { "epoch": 0.7094783168741949, "grad_norm": 0.23828125, "learning_rate": 0.000978041589155658, "loss": 2.1605, "step": 26438 }, { "epoch": 0.7095051524259339, "grad_norm": 0.2392578125, "learning_rate": 0.0009780197662835265, "loss": 2.1584, "step": 26439 }, { "epoch": 0.7095319879776728, "grad_norm": 0.2431640625, "learning_rate": 0.0009779979425821308, "loss": 2.1329, "step": 26440 }, { "epoch": 0.7095588235294118, "grad_norm": 0.255859375, "learning_rate": 0.0009779761180515186, "loss": 2.2934, "step": 26441 }, { "epoch": 0.7095856590811507, "grad_norm": 0.2412109375, "learning_rate": 0.000977954292691738, "loss": 2.1296, "step": 26442 }, { "epoch": 0.7096124946328897, "grad_norm": 0.2470703125, "learning_rate": 0.0009779324665028372, "loss": 2.182, "step": 26443 }, { "epoch": 0.7096393301846285, "grad_norm": 0.2333984375, "learning_rate": 0.0009779106394848635, "loss": 2.1824, "step": 26444 }, { "epoch": 0.7096661657363675, "grad_norm": 0.2451171875, "learning_rate": 0.0009778888116378649, "loss": 2.2611, "step": 26445 }, { "epoch": 0.7096930012881065, "grad_norm": 0.24609375, "learning_rate": 0.0009778669829618896, "loss": 2.2044, "step": 26446 }, { "epoch": 0.7097198368398454, "grad_norm": 0.24609375, "learning_rate": 0.0009778451534569854, "loss": 2.2022, "step": 26447 }, { "epoch": 0.7097466723915844, "grad_norm": 0.2421875, "learning_rate": 0.0009778233231232, "loss": 2.2161, "step": 26448 }, { "epoch": 0.7097735079433233, "grad_norm": 0.2412109375, "learning_rate": 0.0009778014919605814, "loss": 2.2332, "step": 26449 }, { "epoch": 0.7098003434950623, "grad_norm": 0.251953125, "learning_rate": 0.0009777796599691776, "loss": 2.2038, "step": 26450 }, { "epoch": 0.7098271790468011, "grad_norm": 0.251953125, "learning_rate": 0.0009777578271490362, "loss": 2.2558, "step": 26451 }, { "epoch": 0.7098540145985401, "grad_norm": 0.234375, "learning_rate": 0.0009777359935002053, "loss": 2.1396, "step": 26452 }, { "epoch": 0.7098808501502791, "grad_norm": 0.2490234375, "learning_rate": 0.0009777141590227327, "loss": 2.2377, "step": 26453 }, { "epoch": 0.709907685702018, "grad_norm": 0.2373046875, "learning_rate": 0.0009776923237166666, "loss": 2.1555, "step": 26454 }, { "epoch": 0.709934521253757, "grad_norm": 0.2421875, "learning_rate": 0.0009776704875820543, "loss": 2.1974, "step": 26455 }, { "epoch": 0.7099613568054959, "grad_norm": 0.2451171875, "learning_rate": 0.0009776486506189444, "loss": 2.2103, "step": 26456 }, { "epoch": 0.7099881923572349, "grad_norm": 0.25390625, "learning_rate": 0.000977626812827384, "loss": 2.3101, "step": 26457 }, { "epoch": 0.7100150279089739, "grad_norm": 0.2431640625, "learning_rate": 0.0009776049742074222, "loss": 2.2177, "step": 26458 }, { "epoch": 0.7100418634607127, "grad_norm": 0.2431640625, "learning_rate": 0.0009775831347591058, "loss": 2.1974, "step": 26459 }, { "epoch": 0.7100686990124517, "grad_norm": 0.2392578125, "learning_rate": 0.000977561294482483, "loss": 2.128, "step": 26460 }, { "epoch": 0.7100955345641906, "grad_norm": 0.244140625, "learning_rate": 0.0009775394533776017, "loss": 2.1983, "step": 26461 }, { "epoch": 0.7101223701159296, "grad_norm": 0.23828125, "learning_rate": 0.00097751761144451, "loss": 2.0925, "step": 26462 }, { "epoch": 0.7101492056676685, "grad_norm": 0.2314453125, "learning_rate": 0.0009774957686832558, "loss": 2.2125, "step": 26463 }, { "epoch": 0.7101760412194075, "grad_norm": 0.248046875, "learning_rate": 0.000977473925093887, "loss": 2.1312, "step": 26464 }, { "epoch": 0.7102028767711465, "grad_norm": 0.25, "learning_rate": 0.0009774520806764514, "loss": 2.2626, "step": 26465 }, { "epoch": 0.7102297123228853, "grad_norm": 0.251953125, "learning_rate": 0.000977430235430997, "loss": 2.1457, "step": 26466 }, { "epoch": 0.7102565478746243, "grad_norm": 0.240234375, "learning_rate": 0.0009774083893575717, "loss": 2.1084, "step": 26467 }, { "epoch": 0.7102833834263632, "grad_norm": 0.2353515625, "learning_rate": 0.0009773865424562234, "loss": 2.0869, "step": 26468 }, { "epoch": 0.7103102189781022, "grad_norm": 0.244140625, "learning_rate": 0.0009773646947270002, "loss": 2.1887, "step": 26469 }, { "epoch": 0.7103370545298411, "grad_norm": 0.23828125, "learning_rate": 0.0009773428461699496, "loss": 2.077, "step": 26470 }, { "epoch": 0.7103638900815801, "grad_norm": 0.244140625, "learning_rate": 0.0009773209967851202, "loss": 2.1494, "step": 26471 }, { "epoch": 0.7103907256333191, "grad_norm": 0.25, "learning_rate": 0.0009772991465725593, "loss": 2.2402, "step": 26472 }, { "epoch": 0.7104175611850579, "grad_norm": 0.25, "learning_rate": 0.000977277295532315, "loss": 2.2166, "step": 26473 }, { "epoch": 0.7104443967367969, "grad_norm": 0.2470703125, "learning_rate": 0.0009772554436644355, "loss": 2.2346, "step": 26474 }, { "epoch": 0.7104712322885358, "grad_norm": 0.23828125, "learning_rate": 0.0009772335909689685, "loss": 2.2399, "step": 26475 }, { "epoch": 0.7104980678402748, "grad_norm": 0.2392578125, "learning_rate": 0.0009772117374459617, "loss": 2.154, "step": 26476 }, { "epoch": 0.7105249033920137, "grad_norm": 0.2373046875, "learning_rate": 0.0009771898830954638, "loss": 2.2133, "step": 26477 }, { "epoch": 0.7105517389437527, "grad_norm": 0.2578125, "learning_rate": 0.0009771680279175219, "loss": 2.2143, "step": 26478 }, { "epoch": 0.7105785744954917, "grad_norm": 0.2373046875, "learning_rate": 0.0009771461719121843, "loss": 2.0925, "step": 26479 }, { "epoch": 0.7106054100472305, "grad_norm": 0.25, "learning_rate": 0.0009771243150794992, "loss": 2.2242, "step": 26480 }, { "epoch": 0.7106322455989695, "grad_norm": 0.23828125, "learning_rate": 0.0009771024574195143, "loss": 2.1797, "step": 26481 }, { "epoch": 0.7106590811507084, "grad_norm": 0.25, "learning_rate": 0.0009770805989322772, "loss": 2.2197, "step": 26482 }, { "epoch": 0.7106859167024474, "grad_norm": 0.2353515625, "learning_rate": 0.0009770587396178366, "loss": 2.1906, "step": 26483 }, { "epoch": 0.7107127522541864, "grad_norm": 0.23828125, "learning_rate": 0.00097703687947624, "loss": 2.1095, "step": 26484 }, { "epoch": 0.7107395878059253, "grad_norm": 0.2421875, "learning_rate": 0.0009770150185075353, "loss": 2.2697, "step": 26485 }, { "epoch": 0.7107664233576643, "grad_norm": 0.2333984375, "learning_rate": 0.0009769931567117705, "loss": 2.2014, "step": 26486 }, { "epoch": 0.7107932589094031, "grad_norm": 0.2392578125, "learning_rate": 0.000976971294088994, "loss": 2.2167, "step": 26487 }, { "epoch": 0.7108200944611421, "grad_norm": 0.23828125, "learning_rate": 0.0009769494306392527, "loss": 2.2007, "step": 26488 }, { "epoch": 0.710846930012881, "grad_norm": 0.2431640625, "learning_rate": 0.000976927566362596, "loss": 2.1636, "step": 26489 }, { "epoch": 0.71087376556462, "grad_norm": 0.2451171875, "learning_rate": 0.0009769057012590705, "loss": 2.2399, "step": 26490 }, { "epoch": 0.710900601116359, "grad_norm": 0.236328125, "learning_rate": 0.0009768838353287252, "loss": 2.1216, "step": 26491 }, { "epoch": 0.7109274366680979, "grad_norm": 0.236328125, "learning_rate": 0.0009768619685716073, "loss": 2.1687, "step": 26492 }, { "epoch": 0.7109542722198369, "grad_norm": 0.25390625, "learning_rate": 0.0009768401009877655, "loss": 2.2657, "step": 26493 }, { "epoch": 0.7109811077715757, "grad_norm": 0.24609375, "learning_rate": 0.0009768182325772471, "loss": 2.1147, "step": 26494 }, { "epoch": 0.7110079433233147, "grad_norm": 0.2412109375, "learning_rate": 0.0009767963633401007, "loss": 2.2564, "step": 26495 }, { "epoch": 0.7110347788750536, "grad_norm": 0.240234375, "learning_rate": 0.0009767744932763737, "loss": 2.1911, "step": 26496 }, { "epoch": 0.7110616144267926, "grad_norm": 0.2421875, "learning_rate": 0.0009767526223861142, "loss": 2.2025, "step": 26497 }, { "epoch": 0.7110884499785316, "grad_norm": 0.2431640625, "learning_rate": 0.0009767307506693705, "loss": 2.2017, "step": 26498 }, { "epoch": 0.7111152855302705, "grad_norm": 0.240234375, "learning_rate": 0.00097670887812619, "loss": 2.0919, "step": 26499 }, { "epoch": 0.7111421210820095, "grad_norm": 0.259765625, "learning_rate": 0.0009766870047566215, "loss": 2.2052, "step": 26500 }, { "epoch": 0.7111689566337484, "grad_norm": 0.248046875, "learning_rate": 0.0009766651305607122, "loss": 2.2001, "step": 26501 }, { "epoch": 0.7111957921854873, "grad_norm": 0.2373046875, "learning_rate": 0.0009766432555385106, "loss": 2.2244, "step": 26502 }, { "epoch": 0.7112226277372263, "grad_norm": 0.2490234375, "learning_rate": 0.0009766213796900645, "loss": 2.2182, "step": 26503 }, { "epoch": 0.7112494632889652, "grad_norm": 0.240234375, "learning_rate": 0.000976599503015422, "loss": 2.1587, "step": 26504 }, { "epoch": 0.7112762988407042, "grad_norm": 0.244140625, "learning_rate": 0.0009765776255146307, "loss": 2.2367, "step": 26505 }, { "epoch": 0.7113031343924431, "grad_norm": 0.23828125, "learning_rate": 0.0009765557471877392, "loss": 2.2224, "step": 26506 }, { "epoch": 0.7113299699441821, "grad_norm": 0.2373046875, "learning_rate": 0.0009765338680347949, "loss": 2.1012, "step": 26507 }, { "epoch": 0.711356805495921, "grad_norm": 0.251953125, "learning_rate": 0.0009765119880558463, "loss": 2.2534, "step": 26508 }, { "epoch": 0.7113836410476599, "grad_norm": 0.2421875, "learning_rate": 0.000976490107250941, "loss": 2.2212, "step": 26509 }, { "epoch": 0.7114104765993989, "grad_norm": 0.234375, "learning_rate": 0.0009764682256201272, "loss": 2.184, "step": 26510 }, { "epoch": 0.7114373121511378, "grad_norm": 0.2431640625, "learning_rate": 0.0009764463431634529, "loss": 2.2572, "step": 26511 }, { "epoch": 0.7114641477028768, "grad_norm": 0.259765625, "learning_rate": 0.000976424459880966, "loss": 2.2271, "step": 26512 }, { "epoch": 0.7114909832546157, "grad_norm": 0.2451171875, "learning_rate": 0.0009764025757727148, "loss": 2.1992, "step": 26513 }, { "epoch": 0.7115178188063547, "grad_norm": 0.244140625, "learning_rate": 0.0009763806908387469, "loss": 2.1732, "step": 26514 }, { "epoch": 0.7115446543580936, "grad_norm": 0.2470703125, "learning_rate": 0.0009763588050791105, "loss": 2.2246, "step": 26515 }, { "epoch": 0.7115714899098325, "grad_norm": 0.24609375, "learning_rate": 0.0009763369184938536, "loss": 2.0613, "step": 26516 }, { "epoch": 0.7115983254615715, "grad_norm": 0.25, "learning_rate": 0.0009763150310830243, "loss": 2.2077, "step": 26517 }, { "epoch": 0.7116251610133104, "grad_norm": 0.25, "learning_rate": 0.0009762931428466705, "loss": 2.1319, "step": 26518 }, { "epoch": 0.7116519965650494, "grad_norm": 0.2412109375, "learning_rate": 0.0009762712537848402, "loss": 2.2079, "step": 26519 }, { "epoch": 0.7116788321167883, "grad_norm": 0.2490234375, "learning_rate": 0.0009762493638975817, "loss": 2.2119, "step": 26520 }, { "epoch": 0.7117056676685273, "grad_norm": 0.240234375, "learning_rate": 0.0009762274731849425, "loss": 2.0914, "step": 26521 }, { "epoch": 0.7117325032202662, "grad_norm": 0.244140625, "learning_rate": 0.0009762055816469711, "loss": 2.1381, "step": 26522 }, { "epoch": 0.7117593387720051, "grad_norm": 0.236328125, "learning_rate": 0.0009761836892837152, "loss": 2.1849, "step": 26523 }, { "epoch": 0.7117861743237441, "grad_norm": 0.2333984375, "learning_rate": 0.000976161796095223, "loss": 2.1403, "step": 26524 }, { "epoch": 0.711813009875483, "grad_norm": 0.244140625, "learning_rate": 0.0009761399020815426, "loss": 2.1343, "step": 26525 }, { "epoch": 0.711839845427222, "grad_norm": 0.2451171875, "learning_rate": 0.0009761180072427219, "loss": 2.1273, "step": 26526 }, { "epoch": 0.7118666809789609, "grad_norm": 0.25390625, "learning_rate": 0.0009760961115788088, "loss": 2.1637, "step": 26527 }, { "epoch": 0.7118935165306999, "grad_norm": 0.265625, "learning_rate": 0.0009760742150898516, "loss": 2.1607, "step": 26528 }, { "epoch": 0.7119203520824389, "grad_norm": 0.26171875, "learning_rate": 0.000976052317775898, "loss": 2.1885, "step": 26529 }, { "epoch": 0.7119471876341777, "grad_norm": 0.248046875, "learning_rate": 0.0009760304196369965, "loss": 2.0968, "step": 26530 }, { "epoch": 0.7119740231859167, "grad_norm": 0.259765625, "learning_rate": 0.0009760085206731948, "loss": 2.1334, "step": 26531 }, { "epoch": 0.7120008587376556, "grad_norm": 0.234375, "learning_rate": 0.0009759866208845411, "loss": 2.2171, "step": 26532 }, { "epoch": 0.7120276942893946, "grad_norm": 0.240234375, "learning_rate": 0.0009759647202710833, "loss": 2.1977, "step": 26533 }, { "epoch": 0.7120545298411335, "grad_norm": 0.24609375, "learning_rate": 0.0009759428188328695, "loss": 2.2045, "step": 26534 }, { "epoch": 0.7120813653928725, "grad_norm": 0.25390625, "learning_rate": 0.0009759209165699478, "loss": 2.1784, "step": 26535 }, { "epoch": 0.7121082009446115, "grad_norm": 0.244140625, "learning_rate": 0.0009758990134823663, "loss": 2.153, "step": 26536 }, { "epoch": 0.7121350364963503, "grad_norm": 0.244140625, "learning_rate": 0.0009758771095701727, "loss": 2.0913, "step": 26537 }, { "epoch": 0.7121618720480893, "grad_norm": 0.248046875, "learning_rate": 0.0009758552048334155, "loss": 2.1986, "step": 26538 }, { "epoch": 0.7121887075998282, "grad_norm": 0.2470703125, "learning_rate": 0.0009758332992721424, "loss": 2.1566, "step": 26539 }, { "epoch": 0.7122155431515672, "grad_norm": 0.25390625, "learning_rate": 0.0009758113928864018, "loss": 2.2003, "step": 26540 }, { "epoch": 0.7122423787033061, "grad_norm": 0.2421875, "learning_rate": 0.0009757894856762415, "loss": 2.1896, "step": 26541 }, { "epoch": 0.7122692142550451, "grad_norm": 0.2431640625, "learning_rate": 0.0009757675776417096, "loss": 2.2213, "step": 26542 }, { "epoch": 0.7122960498067841, "grad_norm": 0.234375, "learning_rate": 0.0009757456687828543, "loss": 2.1619, "step": 26543 }, { "epoch": 0.712322885358523, "grad_norm": 0.2412109375, "learning_rate": 0.0009757237590997234, "loss": 2.1446, "step": 26544 }, { "epoch": 0.7123497209102619, "grad_norm": 0.24609375, "learning_rate": 0.0009757018485923651, "loss": 2.1798, "step": 26545 }, { "epoch": 0.7123765564620008, "grad_norm": 0.2265625, "learning_rate": 0.0009756799372608276, "loss": 2.2384, "step": 26546 }, { "epoch": 0.7124033920137398, "grad_norm": 0.251953125, "learning_rate": 0.0009756580251051589, "loss": 2.1551, "step": 26547 }, { "epoch": 0.7124302275654787, "grad_norm": 0.2431640625, "learning_rate": 0.0009756361121254069, "loss": 2.058, "step": 26548 }, { "epoch": 0.7124570631172177, "grad_norm": 0.2451171875, "learning_rate": 0.0009756141983216196, "loss": 2.1257, "step": 26549 }, { "epoch": 0.7124838986689567, "grad_norm": 0.236328125, "learning_rate": 0.0009755922836938455, "loss": 2.0857, "step": 26550 }, { "epoch": 0.7125107342206956, "grad_norm": 0.251953125, "learning_rate": 0.0009755703682421325, "loss": 2.1711, "step": 26551 }, { "epoch": 0.7125375697724345, "grad_norm": 0.2373046875, "learning_rate": 0.0009755484519665283, "loss": 2.087, "step": 26552 }, { "epoch": 0.7125644053241734, "grad_norm": 0.2578125, "learning_rate": 0.0009755265348670816, "loss": 2.0884, "step": 26553 }, { "epoch": 0.7125912408759124, "grad_norm": 0.244140625, "learning_rate": 0.0009755046169438401, "loss": 2.145, "step": 26554 }, { "epoch": 0.7126180764276514, "grad_norm": 0.244140625, "learning_rate": 0.0009754826981968516, "loss": 2.1847, "step": 26555 }, { "epoch": 0.7126449119793903, "grad_norm": 0.2392578125, "learning_rate": 0.000975460778626165, "loss": 2.2237, "step": 26556 }, { "epoch": 0.7126717475311293, "grad_norm": 0.2392578125, "learning_rate": 0.0009754388582318276, "loss": 2.1557, "step": 26557 }, { "epoch": 0.7126985830828682, "grad_norm": 0.2421875, "learning_rate": 0.000975416937013888, "loss": 2.167, "step": 26558 }, { "epoch": 0.7127254186346071, "grad_norm": 0.2578125, "learning_rate": 0.000975395014972394, "loss": 2.2948, "step": 26559 }, { "epoch": 0.712752254186346, "grad_norm": 0.2412109375, "learning_rate": 0.0009753730921073938, "loss": 2.2453, "step": 26560 }, { "epoch": 0.712779089738085, "grad_norm": 0.248046875, "learning_rate": 0.0009753511684189354, "loss": 2.2146, "step": 26561 }, { "epoch": 0.712805925289824, "grad_norm": 0.2490234375, "learning_rate": 0.0009753292439070671, "loss": 2.2577, "step": 26562 }, { "epoch": 0.7128327608415629, "grad_norm": 0.234375, "learning_rate": 0.0009753073185718368, "loss": 2.041, "step": 26563 }, { "epoch": 0.7128595963933019, "grad_norm": 0.2451171875, "learning_rate": 0.0009752853924132926, "loss": 2.2115, "step": 26564 }, { "epoch": 0.7128864319450408, "grad_norm": 0.2353515625, "learning_rate": 0.0009752634654314827, "loss": 2.1553, "step": 26565 }, { "epoch": 0.7129132674967797, "grad_norm": 0.2333984375, "learning_rate": 0.0009752415376264551, "loss": 2.1766, "step": 26566 }, { "epoch": 0.7129401030485186, "grad_norm": 0.2421875, "learning_rate": 0.0009752196089982581, "loss": 2.1153, "step": 26567 }, { "epoch": 0.7129669386002576, "grad_norm": 0.2421875, "learning_rate": 0.0009751976795469397, "loss": 2.1188, "step": 26568 }, { "epoch": 0.7129937741519966, "grad_norm": 0.23828125, "learning_rate": 0.0009751757492725479, "loss": 2.097, "step": 26569 }, { "epoch": 0.7130206097037355, "grad_norm": 0.25, "learning_rate": 0.0009751538181751308, "loss": 2.2314, "step": 26570 }, { "epoch": 0.7130474452554745, "grad_norm": 0.2470703125, "learning_rate": 0.0009751318862547367, "loss": 2.1844, "step": 26571 }, { "epoch": 0.7130742808072134, "grad_norm": 0.2333984375, "learning_rate": 0.0009751099535114135, "loss": 2.2145, "step": 26572 }, { "epoch": 0.7131011163589523, "grad_norm": 0.240234375, "learning_rate": 0.0009750880199452095, "loss": 2.2168, "step": 26573 }, { "epoch": 0.7131279519106913, "grad_norm": 0.240234375, "learning_rate": 0.0009750660855561726, "loss": 2.1029, "step": 26574 }, { "epoch": 0.7131547874624302, "grad_norm": 0.244140625, "learning_rate": 0.0009750441503443513, "loss": 2.0656, "step": 26575 }, { "epoch": 0.7131816230141692, "grad_norm": 0.2373046875, "learning_rate": 0.0009750222143097934, "loss": 2.1834, "step": 26576 }, { "epoch": 0.7132084585659081, "grad_norm": 0.2470703125, "learning_rate": 0.000975000277452547, "loss": 2.1978, "step": 26577 }, { "epoch": 0.7132352941176471, "grad_norm": 0.2333984375, "learning_rate": 0.0009749783397726605, "loss": 2.0035, "step": 26578 }, { "epoch": 0.713262129669386, "grad_norm": 0.2490234375, "learning_rate": 0.0009749564012701815, "loss": 2.1097, "step": 26579 }, { "epoch": 0.713288965221125, "grad_norm": 0.2734375, "learning_rate": 0.0009749344619451587, "loss": 2.2697, "step": 26580 }, { "epoch": 0.7133158007728639, "grad_norm": 0.25390625, "learning_rate": 0.00097491252179764, "loss": 2.2661, "step": 26581 }, { "epoch": 0.7133426363246028, "grad_norm": 0.244140625, "learning_rate": 0.0009748905808276736, "loss": 2.0519, "step": 26582 }, { "epoch": 0.7133694718763418, "grad_norm": 0.2412109375, "learning_rate": 0.0009748686390353075, "loss": 2.1429, "step": 26583 }, { "epoch": 0.7133963074280807, "grad_norm": 0.240234375, "learning_rate": 0.0009748466964205897, "loss": 2.147, "step": 26584 }, { "epoch": 0.7134231429798197, "grad_norm": 0.255859375, "learning_rate": 0.0009748247529835686, "loss": 2.1525, "step": 26585 }, { "epoch": 0.7134499785315586, "grad_norm": 0.2412109375, "learning_rate": 0.0009748028087242926, "loss": 2.1617, "step": 26586 }, { "epoch": 0.7134768140832976, "grad_norm": 0.23828125, "learning_rate": 0.0009747808636428092, "loss": 2.1789, "step": 26587 }, { "epoch": 0.7135036496350365, "grad_norm": 0.2421875, "learning_rate": 0.0009747589177391667, "loss": 2.1472, "step": 26588 }, { "epoch": 0.7135304851867754, "grad_norm": 0.2578125, "learning_rate": 0.0009747369710134137, "loss": 2.236, "step": 26589 }, { "epoch": 0.7135573207385144, "grad_norm": 0.236328125, "learning_rate": 0.0009747150234655981, "loss": 2.2637, "step": 26590 }, { "epoch": 0.7135841562902533, "grad_norm": 0.2333984375, "learning_rate": 0.0009746930750957676, "loss": 2.077, "step": 26591 }, { "epoch": 0.7136109918419923, "grad_norm": 0.2392578125, "learning_rate": 0.000974671125903971, "loss": 2.1551, "step": 26592 }, { "epoch": 0.7136378273937312, "grad_norm": 0.2265625, "learning_rate": 0.0009746491758902562, "loss": 2.1156, "step": 26593 }, { "epoch": 0.7136646629454702, "grad_norm": 0.244140625, "learning_rate": 0.0009746272250546711, "loss": 2.3072, "step": 26594 }, { "epoch": 0.7136914984972091, "grad_norm": 0.240234375, "learning_rate": 0.0009746052733972644, "loss": 2.0442, "step": 26595 }, { "epoch": 0.713718334048948, "grad_norm": 0.24609375, "learning_rate": 0.0009745833209180838, "loss": 2.2002, "step": 26596 }, { "epoch": 0.713745169600687, "grad_norm": 0.2412109375, "learning_rate": 0.0009745613676171775, "loss": 2.2479, "step": 26597 }, { "epoch": 0.7137720051524259, "grad_norm": 0.236328125, "learning_rate": 0.0009745394134945939, "loss": 2.1805, "step": 26598 }, { "epoch": 0.7137988407041649, "grad_norm": 0.25390625, "learning_rate": 0.0009745174585503808, "loss": 2.2938, "step": 26599 }, { "epoch": 0.7138256762559039, "grad_norm": 0.2412109375, "learning_rate": 0.0009744955027845869, "loss": 2.1124, "step": 26600 }, { "epoch": 0.7138525118076428, "grad_norm": 0.244140625, "learning_rate": 0.0009744735461972597, "loss": 2.1273, "step": 26601 }, { "epoch": 0.7138793473593817, "grad_norm": 0.2333984375, "learning_rate": 0.000974451588788448, "loss": 2.14, "step": 26602 }, { "epoch": 0.7139061829111206, "grad_norm": 0.248046875, "learning_rate": 0.0009744296305581996, "loss": 2.1523, "step": 26603 }, { "epoch": 0.7139330184628596, "grad_norm": 0.25390625, "learning_rate": 0.0009744076715065628, "loss": 2.212, "step": 26604 }, { "epoch": 0.7139598540145985, "grad_norm": 0.244140625, "learning_rate": 0.0009743857116335855, "loss": 2.2995, "step": 26605 }, { "epoch": 0.7139866895663375, "grad_norm": 0.25390625, "learning_rate": 0.0009743637509393162, "loss": 2.3085, "step": 26606 }, { "epoch": 0.7140135251180765, "grad_norm": 0.2451171875, "learning_rate": 0.0009743417894238031, "loss": 2.3327, "step": 26607 }, { "epoch": 0.7140403606698154, "grad_norm": 0.240234375, "learning_rate": 0.0009743198270870941, "loss": 2.2512, "step": 26608 }, { "epoch": 0.7140671962215543, "grad_norm": 0.236328125, "learning_rate": 0.0009742978639292378, "loss": 2.1859, "step": 26609 }, { "epoch": 0.7140940317732932, "grad_norm": 0.255859375, "learning_rate": 0.0009742758999502818, "loss": 2.259, "step": 26610 }, { "epoch": 0.7141208673250322, "grad_norm": 0.2451171875, "learning_rate": 0.0009742539351502745, "loss": 2.1807, "step": 26611 }, { "epoch": 0.7141477028767711, "grad_norm": 0.2373046875, "learning_rate": 0.0009742319695292644, "loss": 2.0716, "step": 26612 }, { "epoch": 0.7141745384285101, "grad_norm": 0.244140625, "learning_rate": 0.0009742100030872993, "loss": 2.2145, "step": 26613 }, { "epoch": 0.7142013739802491, "grad_norm": 0.24609375, "learning_rate": 0.0009741880358244279, "loss": 2.2003, "step": 26614 }, { "epoch": 0.714228209531988, "grad_norm": 0.2412109375, "learning_rate": 0.0009741660677406977, "loss": 2.1779, "step": 26615 }, { "epoch": 0.714255045083727, "grad_norm": 0.2431640625, "learning_rate": 0.0009741440988361573, "loss": 2.2059, "step": 26616 }, { "epoch": 0.7142818806354658, "grad_norm": 0.2392578125, "learning_rate": 0.0009741221291108548, "loss": 2.2381, "step": 26617 }, { "epoch": 0.7143087161872048, "grad_norm": 0.2392578125, "learning_rate": 0.0009741001585648384, "loss": 2.1562, "step": 26618 }, { "epoch": 0.7143355517389438, "grad_norm": 0.24609375, "learning_rate": 0.0009740781871981565, "loss": 2.2241, "step": 26619 }, { "epoch": 0.7143623872906827, "grad_norm": 0.224609375, "learning_rate": 0.0009740562150108569, "loss": 2.1564, "step": 26620 }, { "epoch": 0.7143892228424217, "grad_norm": 0.2421875, "learning_rate": 0.0009740342420029882, "loss": 2.2215, "step": 26621 }, { "epoch": 0.7144160583941606, "grad_norm": 0.2412109375, "learning_rate": 0.0009740122681745982, "loss": 2.15, "step": 26622 }, { "epoch": 0.7144428939458995, "grad_norm": 0.2421875, "learning_rate": 0.0009739902935257354, "loss": 2.2219, "step": 26623 }, { "epoch": 0.7144697294976384, "grad_norm": 0.2490234375, "learning_rate": 0.0009739683180564482, "loss": 2.1539, "step": 26624 }, { "epoch": 0.7144965650493774, "grad_norm": 0.2470703125, "learning_rate": 0.0009739463417667841, "loss": 2.1944, "step": 26625 }, { "epoch": 0.7145234006011164, "grad_norm": 0.23828125, "learning_rate": 0.000973924364656792, "loss": 2.1955, "step": 26626 }, { "epoch": 0.7145502361528553, "grad_norm": 0.2470703125, "learning_rate": 0.0009739023867265197, "loss": 2.0997, "step": 26627 }, { "epoch": 0.7145770717045943, "grad_norm": 0.2353515625, "learning_rate": 0.0009738804079760157, "loss": 2.2079, "step": 26628 }, { "epoch": 0.7146039072563332, "grad_norm": 0.2353515625, "learning_rate": 0.0009738584284053282, "loss": 2.0332, "step": 26629 }, { "epoch": 0.7146307428080722, "grad_norm": 0.25390625, "learning_rate": 0.000973836448014505, "loss": 2.1949, "step": 26630 }, { "epoch": 0.714657578359811, "grad_norm": 0.2392578125, "learning_rate": 0.0009738144668035948, "loss": 2.0828, "step": 26631 }, { "epoch": 0.71468441391155, "grad_norm": 0.2353515625, "learning_rate": 0.0009737924847726454, "loss": 2.1755, "step": 26632 }, { "epoch": 0.714711249463289, "grad_norm": 0.251953125, "learning_rate": 0.0009737705019217057, "loss": 2.2157, "step": 26633 }, { "epoch": 0.7147380850150279, "grad_norm": 0.2578125, "learning_rate": 0.000973748518250823, "loss": 2.288, "step": 26634 }, { "epoch": 0.7147649205667669, "grad_norm": 0.2451171875, "learning_rate": 0.0009737265337600464, "loss": 2.1963, "step": 26635 }, { "epoch": 0.7147917561185058, "grad_norm": 0.248046875, "learning_rate": 0.0009737045484494236, "loss": 2.1495, "step": 26636 }, { "epoch": 0.7148185916702448, "grad_norm": 0.2421875, "learning_rate": 0.0009736825623190029, "loss": 2.2009, "step": 26637 }, { "epoch": 0.7148454272219836, "grad_norm": 0.2412109375, "learning_rate": 0.0009736605753688328, "loss": 2.159, "step": 26638 }, { "epoch": 0.7148722627737226, "grad_norm": 0.240234375, "learning_rate": 0.000973638587598961, "loss": 2.1435, "step": 26639 }, { "epoch": 0.7148990983254616, "grad_norm": 0.234375, "learning_rate": 0.0009736165990094364, "loss": 2.2029, "step": 26640 }, { "epoch": 0.7149259338772005, "grad_norm": 0.2294921875, "learning_rate": 0.0009735946096003066, "loss": 2.1319, "step": 26641 }, { "epoch": 0.7149527694289395, "grad_norm": 0.2412109375, "learning_rate": 0.0009735726193716203, "loss": 2.125, "step": 26642 }, { "epoch": 0.7149796049806784, "grad_norm": 0.2431640625, "learning_rate": 0.0009735506283234255, "loss": 2.271, "step": 26643 }, { "epoch": 0.7150064405324174, "grad_norm": 0.2392578125, "learning_rate": 0.0009735286364557707, "loss": 2.2577, "step": 26644 }, { "epoch": 0.7150332760841563, "grad_norm": 0.2353515625, "learning_rate": 0.0009735066437687038, "loss": 2.1274, "step": 26645 }, { "epoch": 0.7150601116358952, "grad_norm": 0.25, "learning_rate": 0.0009734846502622732, "loss": 2.3229, "step": 26646 }, { "epoch": 0.7150869471876342, "grad_norm": 0.2392578125, "learning_rate": 0.0009734626559365272, "loss": 2.233, "step": 26647 }, { "epoch": 0.7151137827393731, "grad_norm": 0.244140625, "learning_rate": 0.0009734406607915142, "loss": 2.1608, "step": 26648 }, { "epoch": 0.7151406182911121, "grad_norm": 0.2392578125, "learning_rate": 0.000973418664827282, "loss": 2.1283, "step": 26649 }, { "epoch": 0.715167453842851, "grad_norm": 0.2451171875, "learning_rate": 0.0009733966680438792, "loss": 2.0997, "step": 26650 }, { "epoch": 0.71519428939459, "grad_norm": 0.24609375, "learning_rate": 0.000973374670441354, "loss": 2.1492, "step": 26651 }, { "epoch": 0.715221124946329, "grad_norm": 0.2412109375, "learning_rate": 0.0009733526720197545, "loss": 2.1785, "step": 26652 }, { "epoch": 0.7152479604980678, "grad_norm": 0.2314453125, "learning_rate": 0.0009733306727791291, "loss": 2.1599, "step": 26653 }, { "epoch": 0.7152747960498068, "grad_norm": 0.2392578125, "learning_rate": 0.0009733086727195262, "loss": 2.0075, "step": 26654 }, { "epoch": 0.7153016316015457, "grad_norm": 0.240234375, "learning_rate": 0.0009732866718409938, "loss": 2.2217, "step": 26655 }, { "epoch": 0.7153284671532847, "grad_norm": 0.2470703125, "learning_rate": 0.0009732646701435803, "loss": 2.1422, "step": 26656 }, { "epoch": 0.7153553027050236, "grad_norm": 0.2412109375, "learning_rate": 0.0009732426676273338, "loss": 2.0719, "step": 26657 }, { "epoch": 0.7153821382567626, "grad_norm": 0.2392578125, "learning_rate": 0.0009732206642923027, "loss": 2.1511, "step": 26658 }, { "epoch": 0.7154089738085015, "grad_norm": 0.23828125, "learning_rate": 0.0009731986601385354, "loss": 2.1395, "step": 26659 }, { "epoch": 0.7154358093602404, "grad_norm": 0.2490234375, "learning_rate": 0.0009731766551660801, "loss": 2.1664, "step": 26660 }, { "epoch": 0.7154626449119794, "grad_norm": 0.2470703125, "learning_rate": 0.0009731546493749848, "loss": 2.1841, "step": 26661 }, { "epoch": 0.7154894804637183, "grad_norm": 0.2373046875, "learning_rate": 0.0009731326427652983, "loss": 2.2136, "step": 26662 }, { "epoch": 0.7155163160154573, "grad_norm": 0.240234375, "learning_rate": 0.0009731106353370683, "loss": 2.1788, "step": 26663 }, { "epoch": 0.7155431515671962, "grad_norm": 0.2412109375, "learning_rate": 0.0009730886270903433, "loss": 2.2611, "step": 26664 }, { "epoch": 0.7155699871189352, "grad_norm": 0.2451171875, "learning_rate": 0.0009730666180251717, "loss": 2.1494, "step": 26665 }, { "epoch": 0.7155968226706741, "grad_norm": 0.2373046875, "learning_rate": 0.0009730446081416018, "loss": 2.1817, "step": 26666 }, { "epoch": 0.715623658222413, "grad_norm": 0.2412109375, "learning_rate": 0.0009730225974396816, "loss": 2.2022, "step": 26667 }, { "epoch": 0.715650493774152, "grad_norm": 0.2314453125, "learning_rate": 0.0009730005859194598, "loss": 2.202, "step": 26668 }, { "epoch": 0.7156773293258909, "grad_norm": 0.2490234375, "learning_rate": 0.000972978573580984, "loss": 2.2777, "step": 26669 }, { "epoch": 0.7157041648776299, "grad_norm": 0.244140625, "learning_rate": 0.0009729565604243033, "loss": 2.1484, "step": 26670 }, { "epoch": 0.7157310004293689, "grad_norm": 0.259765625, "learning_rate": 0.0009729345464494656, "loss": 2.1099, "step": 26671 }, { "epoch": 0.7157578359811078, "grad_norm": 0.244140625, "learning_rate": 0.0009729125316565192, "loss": 2.1539, "step": 26672 }, { "epoch": 0.7157846715328468, "grad_norm": 0.240234375, "learning_rate": 0.0009728905160455124, "loss": 2.0751, "step": 26673 }, { "epoch": 0.7158115070845856, "grad_norm": 0.232421875, "learning_rate": 0.0009728684996164936, "loss": 2.2005, "step": 26674 }, { "epoch": 0.7158383426363246, "grad_norm": 0.25390625, "learning_rate": 0.000972846482369511, "loss": 2.2042, "step": 26675 }, { "epoch": 0.7158651781880635, "grad_norm": 0.2470703125, "learning_rate": 0.0009728244643046128, "loss": 2.2679, "step": 26676 }, { "epoch": 0.7158920137398025, "grad_norm": 0.2333984375, "learning_rate": 0.0009728024454218476, "loss": 2.2421, "step": 26677 }, { "epoch": 0.7159188492915415, "grad_norm": 0.2353515625, "learning_rate": 0.0009727804257212632, "loss": 2.2442, "step": 26678 }, { "epoch": 0.7159456848432804, "grad_norm": 0.2470703125, "learning_rate": 0.0009727584052029084, "loss": 2.133, "step": 26679 }, { "epoch": 0.7159725203950194, "grad_norm": 0.2333984375, "learning_rate": 0.0009727363838668314, "loss": 2.1095, "step": 26680 }, { "epoch": 0.7159993559467582, "grad_norm": 0.2412109375, "learning_rate": 0.0009727143617130802, "loss": 2.161, "step": 26681 }, { "epoch": 0.7160261914984972, "grad_norm": 0.23828125, "learning_rate": 0.0009726923387417036, "loss": 2.1839, "step": 26682 }, { "epoch": 0.7160530270502361, "grad_norm": 0.244140625, "learning_rate": 0.0009726703149527496, "loss": 2.0828, "step": 26683 }, { "epoch": 0.7160798626019751, "grad_norm": 0.2421875, "learning_rate": 0.0009726482903462665, "loss": 2.1888, "step": 26684 }, { "epoch": 0.7161066981537141, "grad_norm": 0.251953125, "learning_rate": 0.0009726262649223027, "loss": 2.2691, "step": 26685 }, { "epoch": 0.716133533705453, "grad_norm": 0.240234375, "learning_rate": 0.0009726042386809065, "loss": 2.2306, "step": 26686 }, { "epoch": 0.716160369257192, "grad_norm": 0.2373046875, "learning_rate": 0.0009725822116221263, "loss": 2.1566, "step": 26687 }, { "epoch": 0.7161872048089308, "grad_norm": 0.2578125, "learning_rate": 0.0009725601837460103, "loss": 2.2958, "step": 26688 }, { "epoch": 0.7162140403606698, "grad_norm": 0.2451171875, "learning_rate": 0.0009725381550526068, "loss": 2.2526, "step": 26689 }, { "epoch": 0.7162408759124088, "grad_norm": 0.2451171875, "learning_rate": 0.0009725161255419642, "loss": 2.1609, "step": 26690 }, { "epoch": 0.7162677114641477, "grad_norm": 0.2412109375, "learning_rate": 0.000972494095214131, "loss": 2.251, "step": 26691 }, { "epoch": 0.7162945470158867, "grad_norm": 0.2353515625, "learning_rate": 0.0009724720640691549, "loss": 2.1289, "step": 26692 }, { "epoch": 0.7163213825676256, "grad_norm": 0.2412109375, "learning_rate": 0.0009724500321070851, "loss": 2.0931, "step": 26693 }, { "epoch": 0.7163482181193646, "grad_norm": 0.232421875, "learning_rate": 0.0009724279993279694, "loss": 2.2228, "step": 26694 }, { "epoch": 0.7163750536711034, "grad_norm": 0.2431640625, "learning_rate": 0.0009724059657318562, "loss": 2.1523, "step": 26695 }, { "epoch": 0.7164018892228424, "grad_norm": 0.23828125, "learning_rate": 0.000972383931318794, "loss": 2.2445, "step": 26696 }, { "epoch": 0.7164287247745814, "grad_norm": 0.240234375, "learning_rate": 0.0009723618960888307, "loss": 2.066, "step": 26697 }, { "epoch": 0.7164555603263203, "grad_norm": 0.236328125, "learning_rate": 0.0009723398600420153, "loss": 2.1898, "step": 26698 }, { "epoch": 0.7164823958780593, "grad_norm": 0.2412109375, "learning_rate": 0.0009723178231783957, "loss": 2.2003, "step": 26699 }, { "epoch": 0.7165092314297982, "grad_norm": 0.2294921875, "learning_rate": 0.0009722957854980201, "loss": 2.1631, "step": 26700 }, { "epoch": 0.7165360669815372, "grad_norm": 0.2451171875, "learning_rate": 0.0009722737470009374, "loss": 2.3109, "step": 26701 }, { "epoch": 0.716562902533276, "grad_norm": 0.2353515625, "learning_rate": 0.0009722517076871955, "loss": 2.1807, "step": 26702 }, { "epoch": 0.716589738085015, "grad_norm": 0.23828125, "learning_rate": 0.0009722296675568428, "loss": 2.173, "step": 26703 }, { "epoch": 0.716616573636754, "grad_norm": 0.2421875, "learning_rate": 0.0009722076266099278, "loss": 2.2225, "step": 26704 }, { "epoch": 0.7166434091884929, "grad_norm": 0.2392578125, "learning_rate": 0.0009721855848464987, "loss": 2.249, "step": 26705 }, { "epoch": 0.7166702447402319, "grad_norm": 0.2255859375, "learning_rate": 0.000972163542266604, "loss": 2.1495, "step": 26706 }, { "epoch": 0.7166970802919708, "grad_norm": 0.2333984375, "learning_rate": 0.000972141498870292, "loss": 2.1199, "step": 26707 }, { "epoch": 0.7167239158437098, "grad_norm": 0.228515625, "learning_rate": 0.000972119454657611, "loss": 2.1514, "step": 26708 }, { "epoch": 0.7167507513954486, "grad_norm": 0.234375, "learning_rate": 0.0009720974096286094, "loss": 2.1415, "step": 26709 }, { "epoch": 0.7167775869471876, "grad_norm": 0.23046875, "learning_rate": 0.0009720753637833356, "loss": 2.1295, "step": 26710 }, { "epoch": 0.7168044224989266, "grad_norm": 0.23828125, "learning_rate": 0.0009720533171218376, "loss": 2.244, "step": 26711 }, { "epoch": 0.7168312580506655, "grad_norm": 0.23828125, "learning_rate": 0.0009720312696441644, "loss": 2.0735, "step": 26712 }, { "epoch": 0.7168580936024045, "grad_norm": 0.23828125, "learning_rate": 0.0009720092213503641, "loss": 2.092, "step": 26713 }, { "epoch": 0.7168849291541434, "grad_norm": 0.25390625, "learning_rate": 0.0009719871722404848, "loss": 2.2262, "step": 26714 }, { "epoch": 0.7169117647058824, "grad_norm": 0.244140625, "learning_rate": 0.0009719651223145751, "loss": 2.2524, "step": 26715 }, { "epoch": 0.7169386002576214, "grad_norm": 0.24609375, "learning_rate": 0.0009719430715726835, "loss": 2.343, "step": 26716 }, { "epoch": 0.7169654358093602, "grad_norm": 0.24609375, "learning_rate": 0.0009719210200148582, "loss": 2.1952, "step": 26717 }, { "epoch": 0.7169922713610992, "grad_norm": 0.236328125, "learning_rate": 0.0009718989676411476, "loss": 2.2134, "step": 26718 }, { "epoch": 0.7170191069128381, "grad_norm": 0.23046875, "learning_rate": 0.0009718769144516, "loss": 2.1641, "step": 26719 }, { "epoch": 0.7170459424645771, "grad_norm": 0.23828125, "learning_rate": 0.0009718548604462639, "loss": 2.1612, "step": 26720 }, { "epoch": 0.717072778016316, "grad_norm": 0.2392578125, "learning_rate": 0.0009718328056251877, "loss": 2.1488, "step": 26721 }, { "epoch": 0.717099613568055, "grad_norm": 0.240234375, "learning_rate": 0.0009718107499884197, "loss": 2.2138, "step": 26722 }, { "epoch": 0.717126449119794, "grad_norm": 0.224609375, "learning_rate": 0.0009717886935360083, "loss": 2.2362, "step": 26723 }, { "epoch": 0.7171532846715328, "grad_norm": 0.2421875, "learning_rate": 0.0009717666362680018, "loss": 2.16, "step": 26724 }, { "epoch": 0.7171801202232718, "grad_norm": 0.2412109375, "learning_rate": 0.0009717445781844487, "loss": 2.2853, "step": 26725 }, { "epoch": 0.7172069557750107, "grad_norm": 0.23046875, "learning_rate": 0.0009717225192853974, "loss": 2.0901, "step": 26726 }, { "epoch": 0.7172337913267497, "grad_norm": 0.2470703125, "learning_rate": 0.0009717004595708962, "loss": 2.2425, "step": 26727 }, { "epoch": 0.7172606268784886, "grad_norm": 0.224609375, "learning_rate": 0.0009716783990409936, "loss": 2.1299, "step": 26728 }, { "epoch": 0.7172874624302276, "grad_norm": 0.2373046875, "learning_rate": 0.0009716563376957381, "loss": 2.1222, "step": 26729 }, { "epoch": 0.7173142979819666, "grad_norm": 0.23828125, "learning_rate": 0.0009716342755351778, "loss": 2.0195, "step": 26730 }, { "epoch": 0.7173411335337054, "grad_norm": 0.232421875, "learning_rate": 0.0009716122125593611, "loss": 2.1649, "step": 26731 }, { "epoch": 0.7173679690854444, "grad_norm": 0.2353515625, "learning_rate": 0.0009715901487683367, "loss": 2.1677, "step": 26732 }, { "epoch": 0.7173948046371833, "grad_norm": 0.263671875, "learning_rate": 0.0009715680841621527, "loss": 2.0921, "step": 26733 }, { "epoch": 0.7174216401889223, "grad_norm": 0.236328125, "learning_rate": 0.0009715460187408578, "loss": 2.1077, "step": 26734 }, { "epoch": 0.7174484757406612, "grad_norm": 0.2470703125, "learning_rate": 0.0009715239525045003, "loss": 2.0949, "step": 26735 }, { "epoch": 0.7174753112924002, "grad_norm": 0.2431640625, "learning_rate": 0.0009715018854531283, "loss": 2.1384, "step": 26736 }, { "epoch": 0.7175021468441392, "grad_norm": 0.2412109375, "learning_rate": 0.0009714798175867906, "loss": 2.2409, "step": 26737 }, { "epoch": 0.717528982395878, "grad_norm": 0.25, "learning_rate": 0.0009714577489055355, "loss": 2.1519, "step": 26738 }, { "epoch": 0.717555817947617, "grad_norm": 0.2265625, "learning_rate": 0.0009714356794094113, "loss": 2.1773, "step": 26739 }, { "epoch": 0.7175826534993559, "grad_norm": 0.2392578125, "learning_rate": 0.0009714136090984666, "loss": 2.112, "step": 26740 }, { "epoch": 0.7176094890510949, "grad_norm": 0.25, "learning_rate": 0.0009713915379727496, "loss": 2.184, "step": 26741 }, { "epoch": 0.7176363246028339, "grad_norm": 0.25, "learning_rate": 0.0009713694660323088, "loss": 2.3143, "step": 26742 }, { "epoch": 0.7176631601545728, "grad_norm": 0.2490234375, "learning_rate": 0.0009713473932771927, "loss": 2.1212, "step": 26743 }, { "epoch": 0.7176899957063118, "grad_norm": 0.234375, "learning_rate": 0.0009713253197074498, "loss": 2.2517, "step": 26744 }, { "epoch": 0.7177168312580506, "grad_norm": 0.2412109375, "learning_rate": 0.0009713032453231283, "loss": 2.2306, "step": 26745 }, { "epoch": 0.7177436668097896, "grad_norm": 0.244140625, "learning_rate": 0.0009712811701242767, "loss": 2.2289, "step": 26746 }, { "epoch": 0.7177705023615285, "grad_norm": 0.234375, "learning_rate": 0.0009712590941109435, "loss": 2.144, "step": 26747 }, { "epoch": 0.7177973379132675, "grad_norm": 0.23828125, "learning_rate": 0.000971237017283177, "loss": 2.2333, "step": 26748 }, { "epoch": 0.7178241734650065, "grad_norm": 0.2373046875, "learning_rate": 0.0009712149396410258, "loss": 2.1378, "step": 26749 }, { "epoch": 0.7178510090167454, "grad_norm": 0.234375, "learning_rate": 0.0009711928611845382, "loss": 2.211, "step": 26750 }, { "epoch": 0.7178778445684844, "grad_norm": 0.23828125, "learning_rate": 0.0009711707819137627, "loss": 2.3005, "step": 26751 }, { "epoch": 0.7179046801202232, "grad_norm": 0.234375, "learning_rate": 0.0009711487018287476, "loss": 2.1478, "step": 26752 }, { "epoch": 0.7179315156719622, "grad_norm": 0.2255859375, "learning_rate": 0.0009711266209295415, "loss": 2.119, "step": 26753 }, { "epoch": 0.7179583512237011, "grad_norm": 0.2373046875, "learning_rate": 0.0009711045392161927, "loss": 2.1837, "step": 26754 }, { "epoch": 0.7179851867754401, "grad_norm": 0.244140625, "learning_rate": 0.0009710824566887498, "loss": 2.0605, "step": 26755 }, { "epoch": 0.7180120223271791, "grad_norm": 0.2333984375, "learning_rate": 0.0009710603733472611, "loss": 2.2926, "step": 26756 }, { "epoch": 0.718038857878918, "grad_norm": 0.234375, "learning_rate": 0.0009710382891917753, "loss": 2.2287, "step": 26757 }, { "epoch": 0.718065693430657, "grad_norm": 0.2392578125, "learning_rate": 0.0009710162042223405, "loss": 2.0967, "step": 26758 }, { "epoch": 0.7180925289823958, "grad_norm": 0.23046875, "learning_rate": 0.0009709941184390053, "loss": 2.2261, "step": 26759 }, { "epoch": 0.7181193645341348, "grad_norm": 0.2451171875, "learning_rate": 0.0009709720318418182, "loss": 2.076, "step": 26760 }, { "epoch": 0.7181462000858738, "grad_norm": 0.2392578125, "learning_rate": 0.0009709499444308275, "loss": 2.1465, "step": 26761 }, { "epoch": 0.7181730356376127, "grad_norm": 0.2431640625, "learning_rate": 0.0009709278562060817, "loss": 2.2138, "step": 26762 }, { "epoch": 0.7181998711893517, "grad_norm": 0.24609375, "learning_rate": 0.0009709057671676295, "loss": 2.2501, "step": 26763 }, { "epoch": 0.7182267067410906, "grad_norm": 0.2333984375, "learning_rate": 0.0009708836773155194, "loss": 2.113, "step": 26764 }, { "epoch": 0.7182535422928296, "grad_norm": 0.2412109375, "learning_rate": 0.0009708615866497992, "loss": 2.1378, "step": 26765 }, { "epoch": 0.7182803778445684, "grad_norm": 0.2421875, "learning_rate": 0.0009708394951705179, "loss": 2.2407, "step": 26766 }, { "epoch": 0.7183072133963074, "grad_norm": 0.2392578125, "learning_rate": 0.0009708174028777238, "loss": 2.2115, "step": 26767 }, { "epoch": 0.7183340489480464, "grad_norm": 0.23828125, "learning_rate": 0.0009707953097714656, "loss": 2.0993, "step": 26768 }, { "epoch": 0.7183608844997853, "grad_norm": 0.2490234375, "learning_rate": 0.0009707732158517915, "loss": 2.2635, "step": 26769 }, { "epoch": 0.7183877200515243, "grad_norm": 0.2373046875, "learning_rate": 0.0009707511211187499, "loss": 2.0741, "step": 26770 }, { "epoch": 0.7184145556032632, "grad_norm": 0.251953125, "learning_rate": 0.0009707290255723896, "loss": 2.25, "step": 26771 }, { "epoch": 0.7184413911550022, "grad_norm": 0.2392578125, "learning_rate": 0.0009707069292127589, "loss": 2.2232, "step": 26772 }, { "epoch": 0.718468226706741, "grad_norm": 0.2421875, "learning_rate": 0.0009706848320399062, "loss": 2.1803, "step": 26773 }, { "epoch": 0.71849506225848, "grad_norm": 0.2412109375, "learning_rate": 0.0009706627340538799, "loss": 2.1211, "step": 26774 }, { "epoch": 0.718521897810219, "grad_norm": 0.2412109375, "learning_rate": 0.000970640635254729, "loss": 2.2758, "step": 26775 }, { "epoch": 0.7185487333619579, "grad_norm": 0.232421875, "learning_rate": 0.0009706185356425012, "loss": 2.2431, "step": 26776 }, { "epoch": 0.7185755689136969, "grad_norm": 0.2353515625, "learning_rate": 0.0009705964352172456, "loss": 2.1206, "step": 26777 }, { "epoch": 0.7186024044654358, "grad_norm": 0.2353515625, "learning_rate": 0.0009705743339790102, "loss": 2.1838, "step": 26778 }, { "epoch": 0.7186292400171748, "grad_norm": 0.2490234375, "learning_rate": 0.000970552231927844, "loss": 2.1352, "step": 26779 }, { "epoch": 0.7186560755689136, "grad_norm": 0.2373046875, "learning_rate": 0.0009705301290637951, "loss": 2.0677, "step": 26780 }, { "epoch": 0.7186829111206526, "grad_norm": 0.2373046875, "learning_rate": 0.000970508025386912, "loss": 2.1883, "step": 26781 }, { "epoch": 0.7187097466723916, "grad_norm": 0.2294921875, "learning_rate": 0.0009704859208972434, "loss": 2.0863, "step": 26782 }, { "epoch": 0.7187365822241305, "grad_norm": 0.2353515625, "learning_rate": 0.0009704638155948377, "loss": 2.1826, "step": 26783 }, { "epoch": 0.7187634177758695, "grad_norm": 0.236328125, "learning_rate": 0.0009704417094797433, "loss": 2.2148, "step": 26784 }, { "epoch": 0.7187902533276084, "grad_norm": 0.2314453125, "learning_rate": 0.000970419602552009, "loss": 2.1783, "step": 26785 }, { "epoch": 0.7188170888793474, "grad_norm": 0.2373046875, "learning_rate": 0.000970397494811683, "loss": 2.1577, "step": 26786 }, { "epoch": 0.7188439244310864, "grad_norm": 0.236328125, "learning_rate": 0.0009703753862588138, "loss": 2.185, "step": 26787 }, { "epoch": 0.7188707599828252, "grad_norm": 0.2421875, "learning_rate": 0.0009703532768934499, "loss": 2.1645, "step": 26788 }, { "epoch": 0.7188975955345642, "grad_norm": 0.236328125, "learning_rate": 0.0009703311667156399, "loss": 2.1753, "step": 26789 }, { "epoch": 0.7189244310863031, "grad_norm": 0.240234375, "learning_rate": 0.0009703090557254322, "loss": 2.1819, "step": 26790 }, { "epoch": 0.7189512666380421, "grad_norm": 0.2333984375, "learning_rate": 0.0009702869439228756, "loss": 2.1612, "step": 26791 }, { "epoch": 0.718978102189781, "grad_norm": 0.2314453125, "learning_rate": 0.0009702648313080184, "loss": 2.1704, "step": 26792 }, { "epoch": 0.71900493774152, "grad_norm": 0.234375, "learning_rate": 0.0009702427178809088, "loss": 2.0562, "step": 26793 }, { "epoch": 0.719031773293259, "grad_norm": 0.2314453125, "learning_rate": 0.0009702206036415958, "loss": 2.1571, "step": 26794 }, { "epoch": 0.7190586088449978, "grad_norm": 0.259765625, "learning_rate": 0.0009701984885901277, "loss": 2.218, "step": 26795 }, { "epoch": 0.7190854443967368, "grad_norm": 0.23828125, "learning_rate": 0.0009701763727265531, "loss": 2.1756, "step": 26796 }, { "epoch": 0.7191122799484757, "grad_norm": 0.234375, "learning_rate": 0.0009701542560509204, "loss": 2.1438, "step": 26797 }, { "epoch": 0.7191391155002147, "grad_norm": 0.2353515625, "learning_rate": 0.000970132138563278, "loss": 2.1801, "step": 26798 }, { "epoch": 0.7191659510519536, "grad_norm": 0.2373046875, "learning_rate": 0.0009701100202636747, "loss": 2.1661, "step": 26799 }, { "epoch": 0.7191927866036926, "grad_norm": 0.2333984375, "learning_rate": 0.000970087901152159, "loss": 2.2782, "step": 26800 }, { "epoch": 0.7192196221554316, "grad_norm": 0.2392578125, "learning_rate": 0.0009700657812287792, "loss": 2.171, "step": 26801 }, { "epoch": 0.7192464577071704, "grad_norm": 0.2421875, "learning_rate": 0.000970043660493584, "loss": 2.2126, "step": 26802 }, { "epoch": 0.7192732932589094, "grad_norm": 0.23828125, "learning_rate": 0.000970021538946622, "loss": 2.2215, "step": 26803 }, { "epoch": 0.7193001288106483, "grad_norm": 0.2392578125, "learning_rate": 0.0009699994165879413, "loss": 2.2361, "step": 26804 }, { "epoch": 0.7193269643623873, "grad_norm": 0.2294921875, "learning_rate": 0.0009699772934175909, "loss": 2.1539, "step": 26805 }, { "epoch": 0.7193537999141262, "grad_norm": 0.232421875, "learning_rate": 0.0009699551694356192, "loss": 2.1415, "step": 26806 }, { "epoch": 0.7193806354658652, "grad_norm": 0.2392578125, "learning_rate": 0.0009699330446420747, "loss": 2.1781, "step": 26807 }, { "epoch": 0.7194074710176042, "grad_norm": 0.25390625, "learning_rate": 0.0009699109190370058, "loss": 2.2815, "step": 26808 }, { "epoch": 0.719434306569343, "grad_norm": 0.2265625, "learning_rate": 0.0009698887926204613, "loss": 1.9826, "step": 26809 }, { "epoch": 0.719461142121082, "grad_norm": 0.2421875, "learning_rate": 0.0009698666653924898, "loss": 2.1751, "step": 26810 }, { "epoch": 0.7194879776728209, "grad_norm": 0.236328125, "learning_rate": 0.0009698445373531393, "loss": 2.2114, "step": 26811 }, { "epoch": 0.7195148132245599, "grad_norm": 0.2412109375, "learning_rate": 0.0009698224085024589, "loss": 2.1993, "step": 26812 }, { "epoch": 0.7195416487762989, "grad_norm": 0.23828125, "learning_rate": 0.0009698002788404967, "loss": 2.1778, "step": 26813 }, { "epoch": 0.7195684843280378, "grad_norm": 0.2216796875, "learning_rate": 0.0009697781483673019, "loss": 2.135, "step": 26814 }, { "epoch": 0.7195953198797768, "grad_norm": 0.2431640625, "learning_rate": 0.0009697560170829222, "loss": 2.1769, "step": 26815 }, { "epoch": 0.7196221554315156, "grad_norm": 0.2333984375, "learning_rate": 0.0009697338849874069, "loss": 2.2221, "step": 26816 }, { "epoch": 0.7196489909832546, "grad_norm": 0.2353515625, "learning_rate": 0.0009697117520808042, "loss": 2.1692, "step": 26817 }, { "epoch": 0.7196758265349935, "grad_norm": 0.2353515625, "learning_rate": 0.0009696896183631626, "loss": 2.2372, "step": 26818 }, { "epoch": 0.7197026620867325, "grad_norm": 0.240234375, "learning_rate": 0.0009696674838345307, "loss": 2.2129, "step": 26819 }, { "epoch": 0.7197294976384715, "grad_norm": 0.2353515625, "learning_rate": 0.0009696453484949572, "loss": 2.1633, "step": 26820 }, { "epoch": 0.7197563331902104, "grad_norm": 0.234375, "learning_rate": 0.0009696232123444904, "loss": 2.1171, "step": 26821 }, { "epoch": 0.7197831687419494, "grad_norm": 0.240234375, "learning_rate": 0.0009696010753831791, "loss": 2.2121, "step": 26822 }, { "epoch": 0.7198100042936882, "grad_norm": 0.251953125, "learning_rate": 0.0009695789376110718, "loss": 2.2885, "step": 26823 }, { "epoch": 0.7198368398454272, "grad_norm": 0.2421875, "learning_rate": 0.0009695567990282172, "loss": 2.2106, "step": 26824 }, { "epoch": 0.7198636753971661, "grad_norm": 0.2216796875, "learning_rate": 0.0009695346596346635, "loss": 2.1372, "step": 26825 }, { "epoch": 0.7198905109489051, "grad_norm": 0.2333984375, "learning_rate": 0.0009695125194304594, "loss": 2.2081, "step": 26826 }, { "epoch": 0.7199173465006441, "grad_norm": 0.23828125, "learning_rate": 0.0009694903784156538, "loss": 2.2155, "step": 26827 }, { "epoch": 0.719944182052383, "grad_norm": 0.2333984375, "learning_rate": 0.0009694682365902949, "loss": 2.1536, "step": 26828 }, { "epoch": 0.719971017604122, "grad_norm": 0.224609375, "learning_rate": 0.0009694460939544312, "loss": 2.0178, "step": 26829 }, { "epoch": 0.7199978531558608, "grad_norm": 0.23828125, "learning_rate": 0.0009694239505081117, "loss": 2.1592, "step": 26830 }, { "epoch": 0.7200246887075998, "grad_norm": 0.232421875, "learning_rate": 0.0009694018062513847, "loss": 2.1732, "step": 26831 }, { "epoch": 0.7200515242593388, "grad_norm": 0.232421875, "learning_rate": 0.0009693796611842987, "loss": 2.1957, "step": 26832 }, { "epoch": 0.7200783598110777, "grad_norm": 0.2353515625, "learning_rate": 0.0009693575153069026, "loss": 2.2683, "step": 26833 }, { "epoch": 0.7201051953628167, "grad_norm": 0.2373046875, "learning_rate": 0.0009693353686192447, "loss": 2.221, "step": 26834 }, { "epoch": 0.7201320309145556, "grad_norm": 0.2373046875, "learning_rate": 0.0009693132211213736, "loss": 2.1606, "step": 26835 }, { "epoch": 0.7201588664662946, "grad_norm": 0.232421875, "learning_rate": 0.000969291072813338, "loss": 2.1572, "step": 26836 }, { "epoch": 0.7201857020180334, "grad_norm": 0.359375, "learning_rate": 0.0009692689236951862, "loss": 2.0662, "step": 26837 }, { "epoch": 0.7202125375697724, "grad_norm": 0.236328125, "learning_rate": 0.0009692467737669674, "loss": 2.2084, "step": 26838 }, { "epoch": 0.7202393731215114, "grad_norm": 0.236328125, "learning_rate": 0.0009692246230287296, "loss": 2.0763, "step": 26839 }, { "epoch": 0.7202662086732503, "grad_norm": 0.2431640625, "learning_rate": 0.0009692024714805216, "loss": 2.0767, "step": 26840 }, { "epoch": 0.7202930442249893, "grad_norm": 0.2412109375, "learning_rate": 0.0009691803191223922, "loss": 2.124, "step": 26841 }, { "epoch": 0.7203198797767282, "grad_norm": 0.24609375, "learning_rate": 0.0009691581659543896, "loss": 2.0754, "step": 26842 }, { "epoch": 0.7203467153284672, "grad_norm": 0.2373046875, "learning_rate": 0.0009691360119765625, "loss": 2.1487, "step": 26843 }, { "epoch": 0.720373550880206, "grad_norm": 0.23046875, "learning_rate": 0.0009691138571889597, "loss": 2.1277, "step": 26844 }, { "epoch": 0.720400386431945, "grad_norm": 0.234375, "learning_rate": 0.0009690917015916298, "loss": 2.1108, "step": 26845 }, { "epoch": 0.720427221983684, "grad_norm": 0.24609375, "learning_rate": 0.0009690695451846211, "loss": 2.2506, "step": 26846 }, { "epoch": 0.7204540575354229, "grad_norm": 0.2470703125, "learning_rate": 0.0009690473879679825, "loss": 2.1895, "step": 26847 }, { "epoch": 0.7204808930871619, "grad_norm": 0.240234375, "learning_rate": 0.0009690252299417627, "loss": 2.2437, "step": 26848 }, { "epoch": 0.7205077286389008, "grad_norm": 0.2314453125, "learning_rate": 0.0009690030711060098, "loss": 2.1488, "step": 26849 }, { "epoch": 0.7205345641906398, "grad_norm": 0.234375, "learning_rate": 0.0009689809114607729, "loss": 2.1918, "step": 26850 }, { "epoch": 0.7205613997423786, "grad_norm": 0.2431640625, "learning_rate": 0.0009689587510061003, "loss": 2.1785, "step": 26851 }, { "epoch": 0.7205882352941176, "grad_norm": 0.2294921875, "learning_rate": 0.000968936589742041, "loss": 2.1867, "step": 26852 }, { "epoch": 0.7206150708458566, "grad_norm": 0.224609375, "learning_rate": 0.0009689144276686432, "loss": 2.0507, "step": 26853 }, { "epoch": 0.7206419063975955, "grad_norm": 0.2412109375, "learning_rate": 0.0009688922647859556, "loss": 2.2835, "step": 26854 }, { "epoch": 0.7206687419493345, "grad_norm": 0.2412109375, "learning_rate": 0.000968870101094027, "loss": 2.2558, "step": 26855 }, { "epoch": 0.7206955775010734, "grad_norm": 0.2275390625, "learning_rate": 0.0009688479365929059, "loss": 2.1084, "step": 26856 }, { "epoch": 0.7207224130528124, "grad_norm": 0.2412109375, "learning_rate": 0.0009688257712826409, "loss": 2.1998, "step": 26857 }, { "epoch": 0.7207492486045514, "grad_norm": 0.2392578125, "learning_rate": 0.0009688036051632808, "loss": 2.177, "step": 26858 }, { "epoch": 0.7207760841562902, "grad_norm": 0.228515625, "learning_rate": 0.0009687814382348741, "loss": 2.1735, "step": 26859 }, { "epoch": 0.7208029197080292, "grad_norm": 0.2373046875, "learning_rate": 0.0009687592704974692, "loss": 2.1764, "step": 26860 }, { "epoch": 0.7208297552597681, "grad_norm": 0.2353515625, "learning_rate": 0.0009687371019511151, "loss": 2.2373, "step": 26861 }, { "epoch": 0.7208565908115071, "grad_norm": 0.228515625, "learning_rate": 0.0009687149325958603, "loss": 2.2416, "step": 26862 }, { "epoch": 0.720883426363246, "grad_norm": 0.2275390625, "learning_rate": 0.0009686927624317533, "loss": 2.1758, "step": 26863 }, { "epoch": 0.720910261914985, "grad_norm": 0.228515625, "learning_rate": 0.000968670591458843, "loss": 2.1487, "step": 26864 }, { "epoch": 0.720937097466724, "grad_norm": 0.23046875, "learning_rate": 0.0009686484196771777, "loss": 2.1321, "step": 26865 }, { "epoch": 0.7209639330184628, "grad_norm": 0.23046875, "learning_rate": 0.0009686262470868065, "loss": 2.1445, "step": 26866 }, { "epoch": 0.7209907685702018, "grad_norm": 0.232421875, "learning_rate": 0.0009686040736877776, "loss": 2.2287, "step": 26867 }, { "epoch": 0.7210176041219407, "grad_norm": 0.2275390625, "learning_rate": 0.0009685818994801398, "loss": 2.0534, "step": 26868 }, { "epoch": 0.7210444396736797, "grad_norm": 0.2255859375, "learning_rate": 0.0009685597244639418, "loss": 2.1254, "step": 26869 }, { "epoch": 0.7210712752254186, "grad_norm": 0.2392578125, "learning_rate": 0.0009685375486392321, "loss": 2.0632, "step": 26870 }, { "epoch": 0.7210981107771576, "grad_norm": 0.2353515625, "learning_rate": 0.0009685153720060595, "loss": 2.1876, "step": 26871 }, { "epoch": 0.7211249463288966, "grad_norm": 0.236328125, "learning_rate": 0.0009684931945644726, "loss": 2.0655, "step": 26872 }, { "epoch": 0.7211517818806354, "grad_norm": 0.2421875, "learning_rate": 0.0009684710163145201, "loss": 2.081, "step": 26873 }, { "epoch": 0.7211786174323744, "grad_norm": 0.2421875, "learning_rate": 0.0009684488372562503, "loss": 2.1958, "step": 26874 }, { "epoch": 0.7212054529841133, "grad_norm": 0.24609375, "learning_rate": 0.0009684266573897125, "loss": 2.1311, "step": 26875 }, { "epoch": 0.7212322885358523, "grad_norm": 0.2373046875, "learning_rate": 0.000968404476714955, "loss": 2.1409, "step": 26876 }, { "epoch": 0.7212591240875912, "grad_norm": 0.2421875, "learning_rate": 0.0009683822952320263, "loss": 2.059, "step": 26877 }, { "epoch": 0.7212859596393302, "grad_norm": 0.23046875, "learning_rate": 0.0009683601129409754, "loss": 2.1763, "step": 26878 }, { "epoch": 0.7213127951910692, "grad_norm": 0.236328125, "learning_rate": 0.0009683379298418505, "loss": 2.1695, "step": 26879 }, { "epoch": 0.721339630742808, "grad_norm": 0.23828125, "learning_rate": 0.0009683157459347009, "loss": 2.2356, "step": 26880 }, { "epoch": 0.721366466294547, "grad_norm": 0.2255859375, "learning_rate": 0.0009682935612195747, "loss": 2.143, "step": 26881 }, { "epoch": 0.7213933018462859, "grad_norm": 0.232421875, "learning_rate": 0.0009682713756965208, "loss": 2.1751, "step": 26882 }, { "epoch": 0.7214201373980249, "grad_norm": 0.2373046875, "learning_rate": 0.0009682491893655878, "loss": 2.1849, "step": 26883 }, { "epoch": 0.7214469729497639, "grad_norm": 0.228515625, "learning_rate": 0.0009682270022268245, "loss": 2.1696, "step": 26884 }, { "epoch": 0.7214738085015028, "grad_norm": 0.228515625, "learning_rate": 0.0009682048142802795, "loss": 2.1416, "step": 26885 }, { "epoch": 0.7215006440532418, "grad_norm": 0.236328125, "learning_rate": 0.0009681826255260014, "loss": 2.1193, "step": 26886 }, { "epoch": 0.7215274796049806, "grad_norm": 0.234375, "learning_rate": 0.000968160435964039, "loss": 2.1071, "step": 26887 }, { "epoch": 0.7215543151567196, "grad_norm": 0.2392578125, "learning_rate": 0.0009681382455944407, "loss": 2.0749, "step": 26888 }, { "epoch": 0.7215811507084585, "grad_norm": 0.232421875, "learning_rate": 0.0009681160544172558, "loss": 2.0972, "step": 26889 }, { "epoch": 0.7216079862601975, "grad_norm": 0.2373046875, "learning_rate": 0.0009680938624325325, "loss": 2.1153, "step": 26890 }, { "epoch": 0.7216348218119365, "grad_norm": 0.2236328125, "learning_rate": 0.0009680716696403193, "loss": 2.0761, "step": 26891 }, { "epoch": 0.7216616573636754, "grad_norm": 0.2451171875, "learning_rate": 0.0009680494760406653, "loss": 2.1324, "step": 26892 }, { "epoch": 0.7216884929154144, "grad_norm": 0.232421875, "learning_rate": 0.000968027281633619, "loss": 2.1532, "step": 26893 }, { "epoch": 0.7217153284671532, "grad_norm": 0.2412109375, "learning_rate": 0.0009680050864192292, "loss": 2.1452, "step": 26894 }, { "epoch": 0.7217421640188922, "grad_norm": 0.2314453125, "learning_rate": 0.0009679828903975444, "loss": 2.2106, "step": 26895 }, { "epoch": 0.7217689995706311, "grad_norm": 0.236328125, "learning_rate": 0.0009679606935686134, "loss": 2.167, "step": 26896 }, { "epoch": 0.7217958351223701, "grad_norm": 0.2373046875, "learning_rate": 0.000967938495932485, "loss": 2.1397, "step": 26897 }, { "epoch": 0.7218226706741091, "grad_norm": 0.236328125, "learning_rate": 0.0009679162974892075, "loss": 2.1603, "step": 26898 }, { "epoch": 0.721849506225848, "grad_norm": 0.2294921875, "learning_rate": 0.0009678940982388301, "loss": 2.2387, "step": 26899 }, { "epoch": 0.721876341777587, "grad_norm": 0.244140625, "learning_rate": 0.0009678718981814015, "loss": 2.1333, "step": 26900 }, { "epoch": 0.7219031773293259, "grad_norm": 0.240234375, "learning_rate": 0.0009678496973169699, "loss": 2.1445, "step": 26901 }, { "epoch": 0.7219300128810648, "grad_norm": 0.23046875, "learning_rate": 0.0009678274956455842, "loss": 2.2136, "step": 26902 }, { "epoch": 0.7219568484328038, "grad_norm": 0.2373046875, "learning_rate": 0.0009678052931672935, "loss": 2.2061, "step": 26903 }, { "epoch": 0.7219836839845427, "grad_norm": 0.2294921875, "learning_rate": 0.0009677830898821462, "loss": 2.1132, "step": 26904 }, { "epoch": 0.7220105195362817, "grad_norm": 0.236328125, "learning_rate": 0.0009677608857901907, "loss": 2.2652, "step": 26905 }, { "epoch": 0.7220373550880206, "grad_norm": 0.25390625, "learning_rate": 0.0009677386808914763, "loss": 2.2791, "step": 26906 }, { "epoch": 0.7220641906397596, "grad_norm": 0.22265625, "learning_rate": 0.0009677164751860512, "loss": 2.0912, "step": 26907 }, { "epoch": 0.7220910261914985, "grad_norm": 0.2333984375, "learning_rate": 0.0009676942686739644, "loss": 2.2185, "step": 26908 }, { "epoch": 0.7221178617432374, "grad_norm": 0.2373046875, "learning_rate": 0.0009676720613552646, "loss": 2.1476, "step": 26909 }, { "epoch": 0.7221446972949764, "grad_norm": 0.2314453125, "learning_rate": 0.0009676498532300003, "loss": 2.1626, "step": 26910 }, { "epoch": 0.7221715328467153, "grad_norm": 0.232421875, "learning_rate": 0.0009676276442982207, "loss": 2.1835, "step": 26911 }, { "epoch": 0.7221983683984543, "grad_norm": 0.2294921875, "learning_rate": 0.000967605434559974, "loss": 2.1302, "step": 26912 }, { "epoch": 0.7222252039501932, "grad_norm": 0.232421875, "learning_rate": 0.0009675832240153092, "loss": 2.1572, "step": 26913 }, { "epoch": 0.7222520395019322, "grad_norm": 0.236328125, "learning_rate": 0.0009675610126642749, "loss": 2.1544, "step": 26914 }, { "epoch": 0.722278875053671, "grad_norm": 0.2333984375, "learning_rate": 0.00096753880050692, "loss": 2.1655, "step": 26915 }, { "epoch": 0.72230571060541, "grad_norm": 0.232421875, "learning_rate": 0.0009675165875432929, "loss": 2.1614, "step": 26916 }, { "epoch": 0.722332546157149, "grad_norm": 0.228515625, "learning_rate": 0.0009674943737734426, "loss": 2.148, "step": 26917 }, { "epoch": 0.7223593817088879, "grad_norm": 0.234375, "learning_rate": 0.0009674721591974178, "loss": 2.0565, "step": 26918 }, { "epoch": 0.7223862172606269, "grad_norm": 0.232421875, "learning_rate": 0.0009674499438152672, "loss": 2.1517, "step": 26919 }, { "epoch": 0.7224130528123658, "grad_norm": 0.2333984375, "learning_rate": 0.0009674277276270395, "loss": 2.1545, "step": 26920 }, { "epoch": 0.7224398883641048, "grad_norm": 0.2255859375, "learning_rate": 0.0009674055106327834, "loss": 2.0316, "step": 26921 }, { "epoch": 0.7224667239158437, "grad_norm": 0.2177734375, "learning_rate": 0.000967383292832548, "loss": 2.0453, "step": 26922 }, { "epoch": 0.7224935594675826, "grad_norm": 0.2294921875, "learning_rate": 0.0009673610742263814, "loss": 2.1172, "step": 26923 }, { "epoch": 0.7225203950193216, "grad_norm": 0.2333984375, "learning_rate": 0.0009673388548143327, "loss": 2.1696, "step": 26924 }, { "epoch": 0.7225472305710605, "grad_norm": 0.2353515625, "learning_rate": 0.0009673166345964508, "loss": 2.1949, "step": 26925 }, { "epoch": 0.7225740661227995, "grad_norm": 0.2353515625, "learning_rate": 0.0009672944135727844, "loss": 2.077, "step": 26926 }, { "epoch": 0.7226009016745384, "grad_norm": 0.24609375, "learning_rate": 0.0009672721917433818, "loss": 2.1562, "step": 26927 }, { "epoch": 0.7226277372262774, "grad_norm": 0.236328125, "learning_rate": 0.0009672499691082922, "loss": 2.159, "step": 26928 }, { "epoch": 0.7226545727780164, "grad_norm": 0.240234375, "learning_rate": 0.0009672277456675642, "loss": 2.1809, "step": 26929 }, { "epoch": 0.7226814083297552, "grad_norm": 0.2255859375, "learning_rate": 0.0009672055214212466, "loss": 2.0902, "step": 26930 }, { "epoch": 0.7227082438814942, "grad_norm": 0.2373046875, "learning_rate": 0.000967183296369388, "loss": 2.1647, "step": 26931 }, { "epoch": 0.7227350794332331, "grad_norm": 0.228515625, "learning_rate": 0.0009671610705120374, "loss": 2.1066, "step": 26932 }, { "epoch": 0.7227619149849721, "grad_norm": 0.2265625, "learning_rate": 0.0009671388438492434, "loss": 2.1308, "step": 26933 }, { "epoch": 0.722788750536711, "grad_norm": 0.2294921875, "learning_rate": 0.0009671166163810547, "loss": 2.1919, "step": 26934 }, { "epoch": 0.72281558608845, "grad_norm": 0.23046875, "learning_rate": 0.0009670943881075203, "loss": 2.1286, "step": 26935 }, { "epoch": 0.722842421640189, "grad_norm": 0.2373046875, "learning_rate": 0.0009670721590286887, "loss": 2.2332, "step": 26936 }, { "epoch": 0.7228692571919278, "grad_norm": 0.2412109375, "learning_rate": 0.0009670499291446089, "loss": 2.2324, "step": 26937 }, { "epoch": 0.7228960927436668, "grad_norm": 0.232421875, "learning_rate": 0.0009670276984553293, "loss": 2.1661, "step": 26938 }, { "epoch": 0.7229229282954057, "grad_norm": 0.2421875, "learning_rate": 0.0009670054669608991, "loss": 2.1036, "step": 26939 }, { "epoch": 0.7229497638471447, "grad_norm": 0.2373046875, "learning_rate": 0.0009669832346613669, "loss": 2.2425, "step": 26940 }, { "epoch": 0.7229765993988836, "grad_norm": 0.224609375, "learning_rate": 0.0009669610015567815, "loss": 2.1486, "step": 26941 }, { "epoch": 0.7230034349506226, "grad_norm": 0.228515625, "learning_rate": 0.0009669387676471916, "loss": 2.1635, "step": 26942 }, { "epoch": 0.7230302705023616, "grad_norm": 0.234375, "learning_rate": 0.0009669165329326459, "loss": 2.0982, "step": 26943 }, { "epoch": 0.7230571060541005, "grad_norm": 0.2275390625, "learning_rate": 0.0009668942974131931, "loss": 2.1358, "step": 26944 }, { "epoch": 0.7230839416058394, "grad_norm": 0.22265625, "learning_rate": 0.0009668720610888825, "loss": 1.9996, "step": 26945 }, { "epoch": 0.7231107771575783, "grad_norm": 0.2314453125, "learning_rate": 0.0009668498239597625, "loss": 2.1291, "step": 26946 }, { "epoch": 0.7231376127093173, "grad_norm": 0.25, "learning_rate": 0.0009668275860258818, "loss": 2.1891, "step": 26947 }, { "epoch": 0.7231644482610562, "grad_norm": 0.2421875, "learning_rate": 0.0009668053472872893, "loss": 2.2273, "step": 26948 }, { "epoch": 0.7231912838127952, "grad_norm": 0.2333984375, "learning_rate": 0.0009667831077440338, "loss": 2.2404, "step": 26949 }, { "epoch": 0.7232181193645342, "grad_norm": 0.2333984375, "learning_rate": 0.000966760867396164, "loss": 2.1864, "step": 26950 }, { "epoch": 0.723244954916273, "grad_norm": 0.220703125, "learning_rate": 0.0009667386262437287, "loss": 2.1225, "step": 26951 }, { "epoch": 0.723271790468012, "grad_norm": 0.236328125, "learning_rate": 0.0009667163842867768, "loss": 2.0905, "step": 26952 }, { "epoch": 0.7232986260197509, "grad_norm": 0.234375, "learning_rate": 0.0009666941415253571, "loss": 1.9556, "step": 26953 }, { "epoch": 0.7233254615714899, "grad_norm": 0.2470703125, "learning_rate": 0.0009666718979595186, "loss": 2.3504, "step": 26954 }, { "epoch": 0.7233522971232289, "grad_norm": 0.2333984375, "learning_rate": 0.0009666496535893094, "loss": 2.0213, "step": 26955 }, { "epoch": 0.7233791326749678, "grad_norm": 0.2470703125, "learning_rate": 0.0009666274084147788, "loss": 2.1589, "step": 26956 }, { "epoch": 0.7234059682267068, "grad_norm": 0.2255859375, "learning_rate": 0.0009666051624359758, "loss": 2.1794, "step": 26957 }, { "epoch": 0.7234328037784457, "grad_norm": 0.2314453125, "learning_rate": 0.0009665829156529489, "loss": 2.2755, "step": 26958 }, { "epoch": 0.7234596393301846, "grad_norm": 0.234375, "learning_rate": 0.0009665606680657468, "loss": 2.2724, "step": 26959 }, { "epoch": 0.7234864748819235, "grad_norm": 0.232421875, "learning_rate": 0.0009665384196744183, "loss": 2.2908, "step": 26960 }, { "epoch": 0.7235133104336625, "grad_norm": 0.2265625, "learning_rate": 0.0009665161704790126, "loss": 2.2275, "step": 26961 }, { "epoch": 0.7235401459854015, "grad_norm": 0.23046875, "learning_rate": 0.0009664939204795782, "loss": 2.1824, "step": 26962 }, { "epoch": 0.7235669815371404, "grad_norm": 0.228515625, "learning_rate": 0.0009664716696761637, "loss": 2.1857, "step": 26963 }, { "epoch": 0.7235938170888794, "grad_norm": 0.234375, "learning_rate": 0.0009664494180688185, "loss": 2.1991, "step": 26964 }, { "epoch": 0.7236206526406183, "grad_norm": 0.26953125, "learning_rate": 0.0009664271656575909, "loss": 2.1473, "step": 26965 }, { "epoch": 0.7236474881923572, "grad_norm": 0.2216796875, "learning_rate": 0.0009664049124425299, "loss": 2.1054, "step": 26966 }, { "epoch": 0.7236743237440961, "grad_norm": 0.234375, "learning_rate": 0.0009663826584236845, "loss": 2.2592, "step": 26967 }, { "epoch": 0.7237011592958351, "grad_norm": 0.224609375, "learning_rate": 0.0009663604036011033, "loss": 2.2077, "step": 26968 }, { "epoch": 0.7237279948475741, "grad_norm": 0.2470703125, "learning_rate": 0.0009663381479748349, "loss": 2.2311, "step": 26969 }, { "epoch": 0.723754830399313, "grad_norm": 0.2255859375, "learning_rate": 0.0009663158915449287, "loss": 2.1952, "step": 26970 }, { "epoch": 0.723781665951052, "grad_norm": 0.23828125, "learning_rate": 0.000966293634311433, "loss": 2.2205, "step": 26971 }, { "epoch": 0.7238085015027909, "grad_norm": 0.234375, "learning_rate": 0.0009662713762743968, "loss": 2.2166, "step": 26972 }, { "epoch": 0.7238353370545298, "grad_norm": 0.236328125, "learning_rate": 0.0009662491174338693, "loss": 2.1088, "step": 26973 }, { "epoch": 0.7238621726062688, "grad_norm": 0.2275390625, "learning_rate": 0.0009662268577898987, "loss": 2.1393, "step": 26974 }, { "epoch": 0.7238890081580077, "grad_norm": 0.23046875, "learning_rate": 0.0009662045973425341, "loss": 2.145, "step": 26975 }, { "epoch": 0.7239158437097467, "grad_norm": 0.234375, "learning_rate": 0.0009661823360918245, "loss": 2.1605, "step": 26976 }, { "epoch": 0.7239426792614856, "grad_norm": 0.236328125, "learning_rate": 0.0009661600740378184, "loss": 2.1621, "step": 26977 }, { "epoch": 0.7239695148132246, "grad_norm": 0.2265625, "learning_rate": 0.0009661378111805649, "loss": 2.1707, "step": 26978 }, { "epoch": 0.7239963503649635, "grad_norm": 0.2373046875, "learning_rate": 0.0009661155475201129, "loss": 2.1041, "step": 26979 }, { "epoch": 0.7240231859167024, "grad_norm": 0.2314453125, "learning_rate": 0.000966093283056511, "loss": 2.1351, "step": 26980 }, { "epoch": 0.7240500214684414, "grad_norm": 0.236328125, "learning_rate": 0.0009660710177898082, "loss": 2.0923, "step": 26981 }, { "epoch": 0.7240768570201803, "grad_norm": 0.2265625, "learning_rate": 0.0009660487517200533, "loss": 2.1454, "step": 26982 }, { "epoch": 0.7241036925719193, "grad_norm": 0.2421875, "learning_rate": 0.0009660264848472948, "loss": 2.1323, "step": 26983 }, { "epoch": 0.7241305281236582, "grad_norm": 0.2314453125, "learning_rate": 0.0009660042171715822, "loss": 2.1255, "step": 26984 }, { "epoch": 0.7241573636753972, "grad_norm": 0.23828125, "learning_rate": 0.000965981948692964, "loss": 2.0842, "step": 26985 }, { "epoch": 0.7241841992271361, "grad_norm": 0.2412109375, "learning_rate": 0.000965959679411489, "loss": 2.2031, "step": 26986 }, { "epoch": 0.724211034778875, "grad_norm": 0.2314453125, "learning_rate": 0.0009659374093272062, "loss": 2.1544, "step": 26987 }, { "epoch": 0.724237870330614, "grad_norm": 0.2294921875, "learning_rate": 0.0009659151384401643, "loss": 2.1142, "step": 26988 }, { "epoch": 0.7242647058823529, "grad_norm": 0.2294921875, "learning_rate": 0.0009658928667504124, "loss": 2.1597, "step": 26989 }, { "epoch": 0.7242915414340919, "grad_norm": 0.240234375, "learning_rate": 0.0009658705942579991, "loss": 2.1887, "step": 26990 }, { "epoch": 0.7243183769858308, "grad_norm": 0.2294921875, "learning_rate": 0.0009658483209629733, "loss": 2.2054, "step": 26991 }, { "epoch": 0.7243452125375698, "grad_norm": 0.234375, "learning_rate": 0.000965826046865384, "loss": 2.1362, "step": 26992 }, { "epoch": 0.7243720480893087, "grad_norm": 0.2373046875, "learning_rate": 0.00096580377196528, "loss": 2.2231, "step": 26993 }, { "epoch": 0.7243988836410477, "grad_norm": 0.2431640625, "learning_rate": 0.00096578149626271, "loss": 2.1113, "step": 26994 }, { "epoch": 0.7244257191927866, "grad_norm": 0.2314453125, "learning_rate": 0.0009657592197577233, "loss": 2.1466, "step": 26995 }, { "epoch": 0.7244525547445255, "grad_norm": 0.2314453125, "learning_rate": 0.0009657369424503681, "loss": 2.0601, "step": 26996 }, { "epoch": 0.7244793902962645, "grad_norm": 0.234375, "learning_rate": 0.0009657146643406939, "loss": 2.1601, "step": 26997 }, { "epoch": 0.7245062258480034, "grad_norm": 0.2373046875, "learning_rate": 0.0009656923854287491, "loss": 2.1097, "step": 26998 }, { "epoch": 0.7245330613997424, "grad_norm": 0.2392578125, "learning_rate": 0.0009656701057145832, "loss": 2.1969, "step": 26999 }, { "epoch": 0.7245598969514814, "grad_norm": 0.2421875, "learning_rate": 0.0009656478251982443, "loss": 2.0981, "step": 27000 }, { "epoch": 0.7245867325032203, "grad_norm": 0.23046875, "learning_rate": 0.0009656255438797818, "loss": 2.1531, "step": 27001 }, { "epoch": 0.7246135680549592, "grad_norm": 0.2275390625, "learning_rate": 0.0009656032617592445, "loss": 2.19, "step": 27002 }, { "epoch": 0.7246404036066981, "grad_norm": 0.2412109375, "learning_rate": 0.000965580978836681, "loss": 2.2082, "step": 27003 }, { "epoch": 0.7246672391584371, "grad_norm": 0.240234375, "learning_rate": 0.0009655586951121405, "loss": 2.1847, "step": 27004 }, { "epoch": 0.724694074710176, "grad_norm": 0.24609375, "learning_rate": 0.0009655364105856717, "loss": 2.1877, "step": 27005 }, { "epoch": 0.724720910261915, "grad_norm": 0.2275390625, "learning_rate": 0.0009655141252573237, "loss": 2.1352, "step": 27006 }, { "epoch": 0.724747745813654, "grad_norm": 0.234375, "learning_rate": 0.0009654918391271452, "loss": 2.0838, "step": 27007 }, { "epoch": 0.7247745813653929, "grad_norm": 0.248046875, "learning_rate": 0.000965469552195185, "loss": 2.1498, "step": 27008 }, { "epoch": 0.7248014169171318, "grad_norm": 0.2392578125, "learning_rate": 0.0009654472644614922, "loss": 2.0345, "step": 27009 }, { "epoch": 0.7248282524688707, "grad_norm": 0.2392578125, "learning_rate": 0.0009654249759261157, "loss": 2.1333, "step": 27010 }, { "epoch": 0.7248550880206097, "grad_norm": 0.23828125, "learning_rate": 0.0009654026865891041, "loss": 2.1047, "step": 27011 }, { "epoch": 0.7248819235723486, "grad_norm": 0.240234375, "learning_rate": 0.0009653803964505068, "loss": 2.2789, "step": 27012 }, { "epoch": 0.7249087591240876, "grad_norm": 0.24609375, "learning_rate": 0.0009653581055103723, "loss": 2.2463, "step": 27013 }, { "epoch": 0.7249355946758266, "grad_norm": 0.232421875, "learning_rate": 0.0009653358137687497, "loss": 2.1168, "step": 27014 }, { "epoch": 0.7249624302275655, "grad_norm": 0.2333984375, "learning_rate": 0.0009653135212256876, "loss": 2.1582, "step": 27015 }, { "epoch": 0.7249892657793044, "grad_norm": 0.232421875, "learning_rate": 0.0009652912278812352, "loss": 2.2003, "step": 27016 }, { "epoch": 0.7250161013310433, "grad_norm": 0.248046875, "learning_rate": 0.0009652689337354413, "loss": 2.2424, "step": 27017 }, { "epoch": 0.7250429368827823, "grad_norm": 0.2470703125, "learning_rate": 0.0009652466387883547, "loss": 2.1769, "step": 27018 }, { "epoch": 0.7250697724345213, "grad_norm": 0.2236328125, "learning_rate": 0.0009652243430400246, "loss": 2.1359, "step": 27019 }, { "epoch": 0.7250966079862602, "grad_norm": 0.220703125, "learning_rate": 0.0009652020464904998, "loss": 2.0384, "step": 27020 }, { "epoch": 0.7251234435379992, "grad_norm": 0.224609375, "learning_rate": 0.000965179749139829, "loss": 2.0872, "step": 27021 }, { "epoch": 0.7251502790897381, "grad_norm": 0.2265625, "learning_rate": 0.0009651574509880614, "loss": 2.104, "step": 27022 }, { "epoch": 0.725177114641477, "grad_norm": 0.2255859375, "learning_rate": 0.0009651351520352456, "loss": 2.1269, "step": 27023 }, { "epoch": 0.7252039501932159, "grad_norm": 0.25, "learning_rate": 0.0009651128522814309, "loss": 2.1226, "step": 27024 }, { "epoch": 0.7252307857449549, "grad_norm": 0.244140625, "learning_rate": 0.0009650905517266659, "loss": 2.1523, "step": 27025 }, { "epoch": 0.7252576212966939, "grad_norm": 0.240234375, "learning_rate": 0.0009650682503709998, "loss": 2.2547, "step": 27026 }, { "epoch": 0.7252844568484328, "grad_norm": 0.2275390625, "learning_rate": 0.0009650459482144812, "loss": 2.1186, "step": 27027 }, { "epoch": 0.7253112924001718, "grad_norm": 0.23828125, "learning_rate": 0.0009650236452571592, "loss": 2.1554, "step": 27028 }, { "epoch": 0.7253381279519107, "grad_norm": 0.232421875, "learning_rate": 0.0009650013414990828, "loss": 2.1628, "step": 27029 }, { "epoch": 0.7253649635036497, "grad_norm": 0.2333984375, "learning_rate": 0.0009649790369403008, "loss": 2.1843, "step": 27030 }, { "epoch": 0.7253917990553885, "grad_norm": 0.224609375, "learning_rate": 0.0009649567315808621, "loss": 2.084, "step": 27031 }, { "epoch": 0.7254186346071275, "grad_norm": 0.2294921875, "learning_rate": 0.0009649344254208157, "loss": 2.1145, "step": 27032 }, { "epoch": 0.7254454701588665, "grad_norm": 0.232421875, "learning_rate": 0.0009649121184602106, "loss": 2.1244, "step": 27033 }, { "epoch": 0.7254723057106054, "grad_norm": 0.23828125, "learning_rate": 0.0009648898106990958, "loss": 2.1053, "step": 27034 }, { "epoch": 0.7254991412623444, "grad_norm": 0.234375, "learning_rate": 0.0009648675021375198, "loss": 2.1288, "step": 27035 }, { "epoch": 0.7255259768140833, "grad_norm": 0.2216796875, "learning_rate": 0.0009648451927755321, "loss": 2.0843, "step": 27036 }, { "epoch": 0.7255528123658223, "grad_norm": 0.2265625, "learning_rate": 0.0009648228826131813, "loss": 2.2273, "step": 27037 }, { "epoch": 0.7255796479175611, "grad_norm": 0.2431640625, "learning_rate": 0.0009648005716505165, "loss": 2.1721, "step": 27038 }, { "epoch": 0.7256064834693001, "grad_norm": 0.2431640625, "learning_rate": 0.0009647782598875864, "loss": 2.1447, "step": 27039 }, { "epoch": 0.7256333190210391, "grad_norm": 0.2333984375, "learning_rate": 0.0009647559473244403, "loss": 2.1938, "step": 27040 }, { "epoch": 0.725660154572778, "grad_norm": 0.23046875, "learning_rate": 0.0009647336339611268, "loss": 2.0518, "step": 27041 }, { "epoch": 0.725686990124517, "grad_norm": 0.240234375, "learning_rate": 0.0009647113197976951, "loss": 2.2389, "step": 27042 }, { "epoch": 0.7257138256762559, "grad_norm": 0.224609375, "learning_rate": 0.000964689004834194, "loss": 2.0746, "step": 27043 }, { "epoch": 0.7257406612279949, "grad_norm": 0.220703125, "learning_rate": 0.0009646666890706727, "loss": 2.1221, "step": 27044 }, { "epoch": 0.7257674967797338, "grad_norm": 0.2216796875, "learning_rate": 0.0009646443725071798, "loss": 2.2062, "step": 27045 }, { "epoch": 0.7257943323314727, "grad_norm": 0.224609375, "learning_rate": 0.0009646220551437645, "loss": 2.107, "step": 27046 }, { "epoch": 0.7258211678832117, "grad_norm": 0.2265625, "learning_rate": 0.0009645997369804757, "loss": 2.0709, "step": 27047 }, { "epoch": 0.7258480034349506, "grad_norm": 0.2294921875, "learning_rate": 0.0009645774180173623, "loss": 2.1617, "step": 27048 }, { "epoch": 0.7258748389866896, "grad_norm": 0.232421875, "learning_rate": 0.0009645550982544732, "loss": 2.175, "step": 27049 }, { "epoch": 0.7259016745384285, "grad_norm": 0.228515625, "learning_rate": 0.0009645327776918577, "loss": 2.0784, "step": 27050 }, { "epoch": 0.7259285100901675, "grad_norm": 0.234375, "learning_rate": 0.0009645104563295643, "loss": 2.0981, "step": 27051 }, { "epoch": 0.7259553456419064, "grad_norm": 0.2216796875, "learning_rate": 0.0009644881341676424, "loss": 2.0794, "step": 27052 }, { "epoch": 0.7259821811936453, "grad_norm": 0.22265625, "learning_rate": 0.0009644658112061405, "loss": 2.0852, "step": 27053 }, { "epoch": 0.7260090167453843, "grad_norm": 0.228515625, "learning_rate": 0.0009644434874451082, "loss": 2.0159, "step": 27054 }, { "epoch": 0.7260358522971232, "grad_norm": 0.2412109375, "learning_rate": 0.0009644211628845939, "loss": 2.1225, "step": 27055 }, { "epoch": 0.7260626878488622, "grad_norm": 0.2392578125, "learning_rate": 0.0009643988375246467, "loss": 2.1051, "step": 27056 }, { "epoch": 0.7260895234006011, "grad_norm": 0.2265625, "learning_rate": 0.0009643765113653158, "loss": 2.0661, "step": 27057 }, { "epoch": 0.7261163589523401, "grad_norm": 0.2421875, "learning_rate": 0.0009643541844066501, "loss": 2.1076, "step": 27058 }, { "epoch": 0.726143194504079, "grad_norm": 0.2255859375, "learning_rate": 0.0009643318566486984, "loss": 2.1307, "step": 27059 }, { "epoch": 0.7261700300558179, "grad_norm": 0.2314453125, "learning_rate": 0.0009643095280915098, "loss": 2.1666, "step": 27060 }, { "epoch": 0.7261968656075569, "grad_norm": 0.23828125, "learning_rate": 0.0009642871987351331, "loss": 2.1721, "step": 27061 }, { "epoch": 0.7262237011592958, "grad_norm": 0.23828125, "learning_rate": 0.0009642648685796178, "loss": 2.1274, "step": 27062 }, { "epoch": 0.7262505367110348, "grad_norm": 0.2373046875, "learning_rate": 0.0009642425376250125, "loss": 2.1931, "step": 27063 }, { "epoch": 0.7262773722627737, "grad_norm": 0.228515625, "learning_rate": 0.000964220205871366, "loss": 2.1391, "step": 27064 }, { "epoch": 0.7263042078145127, "grad_norm": 0.236328125, "learning_rate": 0.0009641978733187276, "loss": 2.1278, "step": 27065 }, { "epoch": 0.7263310433662516, "grad_norm": 0.2314453125, "learning_rate": 0.0009641755399671464, "loss": 2.1751, "step": 27066 }, { "epoch": 0.7263578789179905, "grad_norm": 0.2333984375, "learning_rate": 0.000964153205816671, "loss": 2.2126, "step": 27067 }, { "epoch": 0.7263847144697295, "grad_norm": 0.236328125, "learning_rate": 0.0009641308708673506, "loss": 2.1756, "step": 27068 }, { "epoch": 0.7264115500214684, "grad_norm": 0.2373046875, "learning_rate": 0.0009641085351192344, "loss": 2.1857, "step": 27069 }, { "epoch": 0.7264383855732074, "grad_norm": 0.232421875, "learning_rate": 0.0009640861985723709, "loss": 2.0709, "step": 27070 }, { "epoch": 0.7264652211249464, "grad_norm": 0.2265625, "learning_rate": 0.0009640638612268096, "loss": 2.1747, "step": 27071 }, { "epoch": 0.7264920566766853, "grad_norm": 0.23828125, "learning_rate": 0.0009640415230825993, "loss": 2.1722, "step": 27072 }, { "epoch": 0.7265188922284243, "grad_norm": 0.224609375, "learning_rate": 0.000964019184139789, "loss": 2.1607, "step": 27073 }, { "epoch": 0.7265457277801631, "grad_norm": 0.240234375, "learning_rate": 0.0009639968443984278, "loss": 2.148, "step": 27074 }, { "epoch": 0.7265725633319021, "grad_norm": 0.2294921875, "learning_rate": 0.0009639745038585644, "loss": 2.0178, "step": 27075 }, { "epoch": 0.726599398883641, "grad_norm": 0.2265625, "learning_rate": 0.000963952162520248, "loss": 2.2157, "step": 27076 }, { "epoch": 0.72662623443538, "grad_norm": 0.2392578125, "learning_rate": 0.0009639298203835278, "loss": 2.1252, "step": 27077 }, { "epoch": 0.726653069987119, "grad_norm": 0.2236328125, "learning_rate": 0.0009639074774484526, "loss": 2.1947, "step": 27078 }, { "epoch": 0.7266799055388579, "grad_norm": 0.228515625, "learning_rate": 0.0009638851337150715, "loss": 2.1544, "step": 27079 }, { "epoch": 0.7267067410905969, "grad_norm": 0.23046875, "learning_rate": 0.0009638627891834333, "loss": 2.1747, "step": 27080 }, { "epoch": 0.7267335766423357, "grad_norm": 0.2314453125, "learning_rate": 0.0009638404438535873, "loss": 2.1842, "step": 27081 }, { "epoch": 0.7267604121940747, "grad_norm": 0.2314453125, "learning_rate": 0.0009638180977255825, "loss": 2.1011, "step": 27082 }, { "epoch": 0.7267872477458136, "grad_norm": 0.2353515625, "learning_rate": 0.0009637957507994677, "loss": 2.2065, "step": 27083 }, { "epoch": 0.7268140832975526, "grad_norm": 0.224609375, "learning_rate": 0.0009637734030752921, "loss": 2.2345, "step": 27084 }, { "epoch": 0.7268409188492916, "grad_norm": 0.2353515625, "learning_rate": 0.0009637510545531045, "loss": 2.0384, "step": 27085 }, { "epoch": 0.7268677544010305, "grad_norm": 0.2236328125, "learning_rate": 0.0009637287052329546, "loss": 2.0505, "step": 27086 }, { "epoch": 0.7268945899527695, "grad_norm": 0.234375, "learning_rate": 0.0009637063551148904, "loss": 2.2594, "step": 27087 }, { "epoch": 0.7269214255045083, "grad_norm": 0.2392578125, "learning_rate": 0.0009636840041989618, "loss": 2.1653, "step": 27088 }, { "epoch": 0.7269482610562473, "grad_norm": 0.240234375, "learning_rate": 0.0009636616524852172, "loss": 2.2218, "step": 27089 }, { "epoch": 0.7269750966079863, "grad_norm": 0.2431640625, "learning_rate": 0.0009636392999737061, "loss": 2.2722, "step": 27090 }, { "epoch": 0.7270019321597252, "grad_norm": 0.2255859375, "learning_rate": 0.0009636169466644774, "loss": 2.0585, "step": 27091 }, { "epoch": 0.7270287677114642, "grad_norm": 0.2236328125, "learning_rate": 0.00096359459255758, "loss": 2.0904, "step": 27092 }, { "epoch": 0.7270556032632031, "grad_norm": 0.228515625, "learning_rate": 0.0009635722376530631, "loss": 2.0807, "step": 27093 }, { "epoch": 0.7270824388149421, "grad_norm": 0.228515625, "learning_rate": 0.0009635498819509756, "loss": 2.1116, "step": 27094 }, { "epoch": 0.7271092743666809, "grad_norm": 0.2255859375, "learning_rate": 0.0009635275254513667, "loss": 2.0443, "step": 27095 }, { "epoch": 0.7271361099184199, "grad_norm": 0.228515625, "learning_rate": 0.0009635051681542853, "loss": 2.1908, "step": 27096 }, { "epoch": 0.7271629454701589, "grad_norm": 0.2255859375, "learning_rate": 0.0009634828100597807, "loss": 2.0682, "step": 27097 }, { "epoch": 0.7271897810218978, "grad_norm": 0.2294921875, "learning_rate": 0.0009634604511679014, "loss": 2.1577, "step": 27098 }, { "epoch": 0.7272166165736368, "grad_norm": 0.2333984375, "learning_rate": 0.0009634380914786972, "loss": 2.0453, "step": 27099 }, { "epoch": 0.7272434521253757, "grad_norm": 0.2353515625, "learning_rate": 0.0009634157309922164, "loss": 2.1211, "step": 27100 }, { "epoch": 0.7272702876771147, "grad_norm": 0.2314453125, "learning_rate": 0.0009633933697085086, "loss": 2.1068, "step": 27101 }, { "epoch": 0.7272971232288535, "grad_norm": 0.2265625, "learning_rate": 0.0009633710076276226, "loss": 2.1775, "step": 27102 }, { "epoch": 0.7273239587805925, "grad_norm": 0.2373046875, "learning_rate": 0.0009633486447496075, "loss": 2.217, "step": 27103 }, { "epoch": 0.7273507943323315, "grad_norm": 0.2255859375, "learning_rate": 0.0009633262810745123, "loss": 2.1242, "step": 27104 }, { "epoch": 0.7273776298840704, "grad_norm": 0.2314453125, "learning_rate": 0.0009633039166023864, "loss": 2.1869, "step": 27105 }, { "epoch": 0.7274044654358094, "grad_norm": 0.224609375, "learning_rate": 0.0009632815513332783, "loss": 2.0399, "step": 27106 }, { "epoch": 0.7274313009875483, "grad_norm": 0.248046875, "learning_rate": 0.0009632591852672374, "loss": 2.1788, "step": 27107 }, { "epoch": 0.7274581365392873, "grad_norm": 0.2333984375, "learning_rate": 0.000963236818404313, "loss": 2.1574, "step": 27108 }, { "epoch": 0.7274849720910261, "grad_norm": 0.216796875, "learning_rate": 0.0009632144507445537, "loss": 2.1322, "step": 27109 }, { "epoch": 0.7275118076427651, "grad_norm": 0.228515625, "learning_rate": 0.0009631920822880086, "loss": 2.114, "step": 27110 }, { "epoch": 0.7275386431945041, "grad_norm": 0.224609375, "learning_rate": 0.0009631697130347271, "loss": 2.1561, "step": 27111 }, { "epoch": 0.727565478746243, "grad_norm": 0.2412109375, "learning_rate": 0.000963147342984758, "loss": 2.1348, "step": 27112 }, { "epoch": 0.727592314297982, "grad_norm": 0.2265625, "learning_rate": 0.0009631249721381507, "loss": 2.062, "step": 27113 }, { "epoch": 0.7276191498497209, "grad_norm": 0.2373046875, "learning_rate": 0.0009631026004949538, "loss": 2.1516, "step": 27114 }, { "epoch": 0.7276459854014599, "grad_norm": 0.244140625, "learning_rate": 0.0009630802280552166, "loss": 2.2105, "step": 27115 }, { "epoch": 0.7276728209531989, "grad_norm": 0.240234375, "learning_rate": 0.0009630578548189883, "loss": 2.1916, "step": 27116 }, { "epoch": 0.7276996565049377, "grad_norm": 0.2275390625, "learning_rate": 0.0009630354807863179, "loss": 2.1439, "step": 27117 }, { "epoch": 0.7277264920566767, "grad_norm": 0.23046875, "learning_rate": 0.0009630131059572543, "loss": 2.2069, "step": 27118 }, { "epoch": 0.7277533276084156, "grad_norm": 0.228515625, "learning_rate": 0.0009629907303318471, "loss": 2.1525, "step": 27119 }, { "epoch": 0.7277801631601546, "grad_norm": 0.2255859375, "learning_rate": 0.0009629683539101446, "loss": 2.1281, "step": 27120 }, { "epoch": 0.7278069987118935, "grad_norm": 0.2294921875, "learning_rate": 0.0009629459766921966, "loss": 2.2082, "step": 27121 }, { "epoch": 0.7278338342636325, "grad_norm": 0.2314453125, "learning_rate": 0.0009629235986780519, "loss": 2.1763, "step": 27122 }, { "epoch": 0.7278606698153715, "grad_norm": 0.2294921875, "learning_rate": 0.0009629012198677594, "loss": 2.1861, "step": 27123 }, { "epoch": 0.7278875053671103, "grad_norm": 0.2314453125, "learning_rate": 0.0009628788402613686, "loss": 2.2077, "step": 27124 }, { "epoch": 0.7279143409188493, "grad_norm": 0.2392578125, "learning_rate": 0.0009628564598589283, "loss": 2.0717, "step": 27125 }, { "epoch": 0.7279411764705882, "grad_norm": 0.2373046875, "learning_rate": 0.0009628340786604876, "loss": 2.1232, "step": 27126 }, { "epoch": 0.7279680120223272, "grad_norm": 0.2294921875, "learning_rate": 0.0009628116966660958, "loss": 2.112, "step": 27127 }, { "epoch": 0.7279948475740661, "grad_norm": 0.22265625, "learning_rate": 0.0009627893138758019, "loss": 2.1046, "step": 27128 }, { "epoch": 0.7280216831258051, "grad_norm": 0.2353515625, "learning_rate": 0.0009627669302896549, "loss": 2.2024, "step": 27129 }, { "epoch": 0.728048518677544, "grad_norm": 0.2197265625, "learning_rate": 0.0009627445459077041, "loss": 2.1565, "step": 27130 }, { "epoch": 0.7280753542292829, "grad_norm": 0.232421875, "learning_rate": 0.0009627221607299984, "loss": 2.1666, "step": 27131 }, { "epoch": 0.7281021897810219, "grad_norm": 0.228515625, "learning_rate": 0.0009626997747565868, "loss": 2.187, "step": 27132 }, { "epoch": 0.7281290253327608, "grad_norm": 0.224609375, "learning_rate": 0.000962677387987519, "loss": 2.0812, "step": 27133 }, { "epoch": 0.7281558608844998, "grad_norm": 0.2294921875, "learning_rate": 0.0009626550004228433, "loss": 2.0935, "step": 27134 }, { "epoch": 0.7281826964362387, "grad_norm": 0.2421875, "learning_rate": 0.0009626326120626095, "loss": 2.1661, "step": 27135 }, { "epoch": 0.7282095319879777, "grad_norm": 0.2314453125, "learning_rate": 0.0009626102229068663, "loss": 2.0947, "step": 27136 }, { "epoch": 0.7282363675397167, "grad_norm": 0.2255859375, "learning_rate": 0.0009625878329556631, "loss": 2.0922, "step": 27137 }, { "epoch": 0.7282632030914555, "grad_norm": 0.220703125, "learning_rate": 0.0009625654422090486, "loss": 2.0479, "step": 27138 }, { "epoch": 0.7282900386431945, "grad_norm": 0.232421875, "learning_rate": 0.0009625430506670727, "loss": 2.1394, "step": 27139 }, { "epoch": 0.7283168741949334, "grad_norm": 0.23046875, "learning_rate": 0.0009625206583297834, "loss": 2.1424, "step": 27140 }, { "epoch": 0.7283437097466724, "grad_norm": 0.228515625, "learning_rate": 0.0009624982651972309, "loss": 2.1662, "step": 27141 }, { "epoch": 0.7283705452984114, "grad_norm": 0.236328125, "learning_rate": 0.0009624758712694635, "loss": 2.1396, "step": 27142 }, { "epoch": 0.7283973808501503, "grad_norm": 0.2275390625, "learning_rate": 0.0009624534765465309, "loss": 2.1869, "step": 27143 }, { "epoch": 0.7284242164018893, "grad_norm": 0.2275390625, "learning_rate": 0.0009624310810284821, "loss": 2.1226, "step": 27144 }, { "epoch": 0.7284510519536281, "grad_norm": 0.2236328125, "learning_rate": 0.0009624086847153658, "loss": 2.2102, "step": 27145 }, { "epoch": 0.7284778875053671, "grad_norm": 0.2333984375, "learning_rate": 0.0009623862876072317, "loss": 2.069, "step": 27146 }, { "epoch": 0.728504723057106, "grad_norm": 0.2314453125, "learning_rate": 0.0009623638897041287, "loss": 2.1798, "step": 27147 }, { "epoch": 0.728531558608845, "grad_norm": 0.228515625, "learning_rate": 0.0009623414910061059, "loss": 2.1285, "step": 27148 }, { "epoch": 0.728558394160584, "grad_norm": 0.2216796875, "learning_rate": 0.0009623190915132125, "loss": 2.0166, "step": 27149 }, { "epoch": 0.7285852297123229, "grad_norm": 0.224609375, "learning_rate": 0.0009622966912254976, "loss": 2.0592, "step": 27150 }, { "epoch": 0.7286120652640619, "grad_norm": 0.2451171875, "learning_rate": 0.0009622742901430104, "loss": 2.1268, "step": 27151 }, { "epoch": 0.7286389008158007, "grad_norm": 0.228515625, "learning_rate": 0.0009622518882657998, "loss": 2.1501, "step": 27152 }, { "epoch": 0.7286657363675397, "grad_norm": 0.2265625, "learning_rate": 0.0009622294855939152, "loss": 2.075, "step": 27153 }, { "epoch": 0.7286925719192786, "grad_norm": 0.2392578125, "learning_rate": 0.0009622070821274058, "loss": 2.118, "step": 27154 }, { "epoch": 0.7287194074710176, "grad_norm": 0.21875, "learning_rate": 0.0009621846778663205, "loss": 2.1491, "step": 27155 }, { "epoch": 0.7287462430227566, "grad_norm": 0.224609375, "learning_rate": 0.0009621622728107087, "loss": 2.1106, "step": 27156 }, { "epoch": 0.7287730785744955, "grad_norm": 0.234375, "learning_rate": 0.0009621398669606193, "loss": 2.106, "step": 27157 }, { "epoch": 0.7287999141262345, "grad_norm": 0.2353515625, "learning_rate": 0.0009621174603161016, "loss": 2.1331, "step": 27158 }, { "epoch": 0.7288267496779733, "grad_norm": 0.2236328125, "learning_rate": 0.0009620950528772047, "loss": 2.0799, "step": 27159 }, { "epoch": 0.7288535852297123, "grad_norm": 0.2255859375, "learning_rate": 0.0009620726446439779, "loss": 2.0553, "step": 27160 }, { "epoch": 0.7288804207814513, "grad_norm": 0.23046875, "learning_rate": 0.0009620502356164701, "loss": 2.1639, "step": 27161 }, { "epoch": 0.7289072563331902, "grad_norm": 0.224609375, "learning_rate": 0.0009620278257947308, "loss": 2.16, "step": 27162 }, { "epoch": 0.7289340918849292, "grad_norm": 0.228515625, "learning_rate": 0.0009620054151788088, "loss": 2.1104, "step": 27163 }, { "epoch": 0.7289609274366681, "grad_norm": 0.23046875, "learning_rate": 0.0009619830037687535, "loss": 2.1928, "step": 27164 }, { "epoch": 0.7289877629884071, "grad_norm": 0.224609375, "learning_rate": 0.0009619605915646138, "loss": 2.1029, "step": 27165 }, { "epoch": 0.7290145985401459, "grad_norm": 0.224609375, "learning_rate": 0.0009619381785664394, "loss": 2.0679, "step": 27166 }, { "epoch": 0.7290414340918849, "grad_norm": 0.2314453125, "learning_rate": 0.0009619157647742789, "loss": 2.1859, "step": 27167 }, { "epoch": 0.7290682696436239, "grad_norm": 0.232421875, "learning_rate": 0.0009618933501881816, "loss": 2.1577, "step": 27168 }, { "epoch": 0.7290951051953628, "grad_norm": 0.23046875, "learning_rate": 0.000961870934808197, "loss": 2.1708, "step": 27169 }, { "epoch": 0.7291219407471018, "grad_norm": 0.2412109375, "learning_rate": 0.0009618485186343738, "loss": 2.2125, "step": 27170 }, { "epoch": 0.7291487762988407, "grad_norm": 0.228515625, "learning_rate": 0.0009618261016667615, "loss": 2.2227, "step": 27171 }, { "epoch": 0.7291756118505797, "grad_norm": 0.232421875, "learning_rate": 0.0009618036839054092, "loss": 2.0752, "step": 27172 }, { "epoch": 0.7292024474023185, "grad_norm": 0.232421875, "learning_rate": 0.000961781265350366, "loss": 2.1375, "step": 27173 }, { "epoch": 0.7292292829540575, "grad_norm": 0.2197265625, "learning_rate": 0.0009617588460016813, "loss": 1.9475, "step": 27174 }, { "epoch": 0.7292561185057965, "grad_norm": 0.2236328125, "learning_rate": 0.0009617364258594039, "loss": 2.0514, "step": 27175 }, { "epoch": 0.7292829540575354, "grad_norm": 0.2314453125, "learning_rate": 0.0009617140049235833, "loss": 2.071, "step": 27176 }, { "epoch": 0.7293097896092744, "grad_norm": 0.236328125, "learning_rate": 0.0009616915831942686, "loss": 2.0483, "step": 27177 }, { "epoch": 0.7293366251610133, "grad_norm": 0.228515625, "learning_rate": 0.000961669160671509, "loss": 2.1248, "step": 27178 }, { "epoch": 0.7293634607127523, "grad_norm": 0.234375, "learning_rate": 0.0009616467373553537, "loss": 2.16, "step": 27179 }, { "epoch": 0.7293902962644911, "grad_norm": 0.2412109375, "learning_rate": 0.0009616243132458519, "loss": 2.1773, "step": 27180 }, { "epoch": 0.7294171318162301, "grad_norm": 0.2333984375, "learning_rate": 0.0009616018883430526, "loss": 2.0797, "step": 27181 }, { "epoch": 0.7294439673679691, "grad_norm": 0.236328125, "learning_rate": 0.0009615794626470053, "loss": 2.1317, "step": 27182 }, { "epoch": 0.729470802919708, "grad_norm": 0.2294921875, "learning_rate": 0.0009615570361577588, "loss": 2.1317, "step": 27183 }, { "epoch": 0.729497638471447, "grad_norm": 0.2392578125, "learning_rate": 0.0009615346088753627, "loss": 2.1867, "step": 27184 }, { "epoch": 0.7295244740231859, "grad_norm": 0.220703125, "learning_rate": 0.000961512180799866, "loss": 2.1132, "step": 27185 }, { "epoch": 0.7295513095749249, "grad_norm": 0.2265625, "learning_rate": 0.000961489751931318, "loss": 2.1333, "step": 27186 }, { "epoch": 0.7295781451266639, "grad_norm": 0.2197265625, "learning_rate": 0.0009614673222697678, "loss": 2.0932, "step": 27187 }, { "epoch": 0.7296049806784027, "grad_norm": 0.2197265625, "learning_rate": 0.0009614448918152645, "loss": 2.1524, "step": 27188 }, { "epoch": 0.7296318162301417, "grad_norm": 0.228515625, "learning_rate": 0.0009614224605678577, "loss": 2.2563, "step": 27189 }, { "epoch": 0.7296586517818806, "grad_norm": 0.228515625, "learning_rate": 0.0009614000285275961, "loss": 2.1595, "step": 27190 }, { "epoch": 0.7296854873336196, "grad_norm": 0.216796875, "learning_rate": 0.0009613775956945293, "loss": 2.0525, "step": 27191 }, { "epoch": 0.7297123228853585, "grad_norm": 0.2353515625, "learning_rate": 0.0009613551620687063, "loss": 2.1998, "step": 27192 }, { "epoch": 0.7297391584370975, "grad_norm": 0.228515625, "learning_rate": 0.0009613327276501765, "loss": 2.069, "step": 27193 }, { "epoch": 0.7297659939888365, "grad_norm": 0.236328125, "learning_rate": 0.0009613102924389891, "loss": 2.211, "step": 27194 }, { "epoch": 0.7297928295405753, "grad_norm": 0.2275390625, "learning_rate": 0.0009612878564351929, "loss": 2.0631, "step": 27195 }, { "epoch": 0.7298196650923143, "grad_norm": 0.2294921875, "learning_rate": 0.0009612654196388377, "loss": 2.169, "step": 27196 }, { "epoch": 0.7298465006440532, "grad_norm": 0.23828125, "learning_rate": 0.0009612429820499724, "loss": 2.1256, "step": 27197 }, { "epoch": 0.7298733361957922, "grad_norm": 0.224609375, "learning_rate": 0.0009612205436686461, "loss": 2.0623, "step": 27198 }, { "epoch": 0.7299001717475311, "grad_norm": 0.216796875, "learning_rate": 0.0009611981044949083, "loss": 2.0869, "step": 27199 }, { "epoch": 0.7299270072992701, "grad_norm": 0.224609375, "learning_rate": 0.0009611756645288082, "loss": 2.1868, "step": 27200 }, { "epoch": 0.7299538428510091, "grad_norm": 0.2431640625, "learning_rate": 0.000961153223770395, "loss": 2.1921, "step": 27201 }, { "epoch": 0.7299806784027479, "grad_norm": 0.236328125, "learning_rate": 0.0009611307822197177, "loss": 2.134, "step": 27202 }, { "epoch": 0.7300075139544869, "grad_norm": 0.22265625, "learning_rate": 0.0009611083398768258, "loss": 2.0839, "step": 27203 }, { "epoch": 0.7300343495062258, "grad_norm": 0.2216796875, "learning_rate": 0.0009610858967417683, "loss": 2.0623, "step": 27204 }, { "epoch": 0.7300611850579648, "grad_norm": 0.21875, "learning_rate": 0.0009610634528145948, "loss": 2.1334, "step": 27205 }, { "epoch": 0.7300880206097037, "grad_norm": 0.2216796875, "learning_rate": 0.0009610410080953543, "loss": 2.1397, "step": 27206 }, { "epoch": 0.7301148561614427, "grad_norm": 0.2236328125, "learning_rate": 0.0009610185625840959, "loss": 2.1878, "step": 27207 }, { "epoch": 0.7301416917131817, "grad_norm": 0.224609375, "learning_rate": 0.000960996116280869, "loss": 2.126, "step": 27208 }, { "epoch": 0.7301685272649205, "grad_norm": 0.2197265625, "learning_rate": 0.000960973669185723, "loss": 2.09, "step": 27209 }, { "epoch": 0.7301953628166595, "grad_norm": 0.220703125, "learning_rate": 0.0009609512212987067, "loss": 2.1306, "step": 27210 }, { "epoch": 0.7302221983683984, "grad_norm": 0.2197265625, "learning_rate": 0.0009609287726198699, "loss": 2.0706, "step": 27211 }, { "epoch": 0.7302490339201374, "grad_norm": 0.2333984375, "learning_rate": 0.0009609063231492613, "loss": 2.0935, "step": 27212 }, { "epoch": 0.7302758694718764, "grad_norm": 0.2431640625, "learning_rate": 0.0009608838728869308, "loss": 2.1864, "step": 27213 }, { "epoch": 0.7303027050236153, "grad_norm": 0.220703125, "learning_rate": 0.0009608614218329269, "loss": 2.067, "step": 27214 }, { "epoch": 0.7303295405753543, "grad_norm": 0.2255859375, "learning_rate": 0.0009608389699872991, "loss": 2.0715, "step": 27215 }, { "epoch": 0.7303563761270931, "grad_norm": 0.220703125, "learning_rate": 0.0009608165173500972, "loss": 2.1124, "step": 27216 }, { "epoch": 0.7303832116788321, "grad_norm": 0.2197265625, "learning_rate": 0.0009607940639213697, "loss": 2.0861, "step": 27217 }, { "epoch": 0.730410047230571, "grad_norm": 0.2294921875, "learning_rate": 0.0009607716097011662, "loss": 2.0706, "step": 27218 }, { "epoch": 0.73043688278231, "grad_norm": 0.232421875, "learning_rate": 0.000960749154689536, "loss": 2.0773, "step": 27219 }, { "epoch": 0.730463718334049, "grad_norm": 0.232421875, "learning_rate": 0.0009607266988865284, "loss": 2.0691, "step": 27220 }, { "epoch": 0.7304905538857879, "grad_norm": 0.2177734375, "learning_rate": 0.0009607042422921922, "loss": 2.1303, "step": 27221 }, { "epoch": 0.7305173894375269, "grad_norm": 0.2294921875, "learning_rate": 0.0009606817849065774, "loss": 2.1726, "step": 27222 }, { "epoch": 0.7305442249892657, "grad_norm": 0.2265625, "learning_rate": 0.0009606593267297328, "loss": 2.1326, "step": 27223 }, { "epoch": 0.7305710605410047, "grad_norm": 0.2236328125, "learning_rate": 0.0009606368677617074, "loss": 2.1885, "step": 27224 }, { "epoch": 0.7305978960927436, "grad_norm": 0.2373046875, "learning_rate": 0.0009606144080025512, "loss": 2.1875, "step": 27225 }, { "epoch": 0.7306247316444826, "grad_norm": 0.2255859375, "learning_rate": 0.0009605919474523129, "loss": 2.0962, "step": 27226 }, { "epoch": 0.7306515671962216, "grad_norm": 0.2353515625, "learning_rate": 0.0009605694861110419, "loss": 2.1383, "step": 27227 }, { "epoch": 0.7306784027479605, "grad_norm": 0.2197265625, "learning_rate": 0.0009605470239787877, "loss": 2.0971, "step": 27228 }, { "epoch": 0.7307052382996995, "grad_norm": 0.2294921875, "learning_rate": 0.0009605245610555993, "loss": 2.0593, "step": 27229 }, { "epoch": 0.7307320738514383, "grad_norm": 0.2255859375, "learning_rate": 0.0009605020973415261, "loss": 2.1328, "step": 27230 }, { "epoch": 0.7307589094031773, "grad_norm": 0.2294921875, "learning_rate": 0.0009604796328366174, "loss": 2.1038, "step": 27231 }, { "epoch": 0.7307857449549163, "grad_norm": 0.2255859375, "learning_rate": 0.0009604571675409224, "loss": 2.1524, "step": 27232 }, { "epoch": 0.7308125805066552, "grad_norm": 0.220703125, "learning_rate": 0.0009604347014544905, "loss": 2.0805, "step": 27233 }, { "epoch": 0.7308394160583942, "grad_norm": 0.2236328125, "learning_rate": 0.0009604122345773709, "loss": 2.0936, "step": 27234 }, { "epoch": 0.7308662516101331, "grad_norm": 0.228515625, "learning_rate": 0.0009603897669096127, "loss": 2.1249, "step": 27235 }, { "epoch": 0.7308930871618721, "grad_norm": 0.2353515625, "learning_rate": 0.0009603672984512657, "loss": 2.1685, "step": 27236 }, { "epoch": 0.730919922713611, "grad_norm": 0.2353515625, "learning_rate": 0.0009603448292023785, "loss": 2.1696, "step": 27237 }, { "epoch": 0.7309467582653499, "grad_norm": 0.216796875, "learning_rate": 0.0009603223591630009, "loss": 2.1089, "step": 27238 }, { "epoch": 0.7309735938170889, "grad_norm": 0.23828125, "learning_rate": 0.0009602998883331821, "loss": 2.1351, "step": 27239 }, { "epoch": 0.7310004293688278, "grad_norm": 0.2275390625, "learning_rate": 0.0009602774167129714, "loss": 2.1067, "step": 27240 }, { "epoch": 0.7310272649205668, "grad_norm": 0.23828125, "learning_rate": 0.0009602549443024181, "loss": 2.2571, "step": 27241 }, { "epoch": 0.7310541004723057, "grad_norm": 0.2275390625, "learning_rate": 0.0009602324711015713, "loss": 2.0648, "step": 27242 }, { "epoch": 0.7310809360240447, "grad_norm": 0.228515625, "learning_rate": 0.0009602099971104805, "loss": 2.0957, "step": 27243 }, { "epoch": 0.7311077715757835, "grad_norm": 0.228515625, "learning_rate": 0.0009601875223291948, "loss": 2.1979, "step": 27244 }, { "epoch": 0.7311346071275225, "grad_norm": 0.224609375, "learning_rate": 0.0009601650467577639, "loss": 2.1187, "step": 27245 }, { "epoch": 0.7311614426792615, "grad_norm": 0.2392578125, "learning_rate": 0.0009601425703962366, "loss": 2.1154, "step": 27246 }, { "epoch": 0.7311882782310004, "grad_norm": 0.2275390625, "learning_rate": 0.0009601200932446626, "loss": 2.1413, "step": 27247 }, { "epoch": 0.7312151137827394, "grad_norm": 0.220703125, "learning_rate": 0.0009600976153030911, "loss": 2.1393, "step": 27248 }, { "epoch": 0.7312419493344783, "grad_norm": 0.2294921875, "learning_rate": 0.0009600751365715712, "loss": 2.1277, "step": 27249 }, { "epoch": 0.7312687848862173, "grad_norm": 0.2294921875, "learning_rate": 0.0009600526570501524, "loss": 2.175, "step": 27250 }, { "epoch": 0.7312956204379562, "grad_norm": 0.216796875, "learning_rate": 0.0009600301767388842, "loss": 2.1374, "step": 27251 }, { "epoch": 0.7313224559896951, "grad_norm": 0.2275390625, "learning_rate": 0.0009600076956378155, "loss": 2.1692, "step": 27252 }, { "epoch": 0.7313492915414341, "grad_norm": 0.216796875, "learning_rate": 0.0009599852137469961, "loss": 2.0817, "step": 27253 }, { "epoch": 0.731376127093173, "grad_norm": 0.2255859375, "learning_rate": 0.0009599627310664748, "loss": 2.1425, "step": 27254 }, { "epoch": 0.731402962644912, "grad_norm": 0.2216796875, "learning_rate": 0.0009599402475963012, "loss": 2.0491, "step": 27255 }, { "epoch": 0.7314297981966509, "grad_norm": 0.2275390625, "learning_rate": 0.0009599177633365248, "loss": 2.0802, "step": 27256 }, { "epoch": 0.7314566337483899, "grad_norm": 0.2373046875, "learning_rate": 0.0009598952782871943, "loss": 2.1509, "step": 27257 }, { "epoch": 0.7314834693001289, "grad_norm": 0.2265625, "learning_rate": 0.0009598727924483598, "loss": 2.177, "step": 27258 }, { "epoch": 0.7315103048518677, "grad_norm": 0.2294921875, "learning_rate": 0.0009598503058200703, "loss": 2.0924, "step": 27259 }, { "epoch": 0.7315371404036067, "grad_norm": 0.2392578125, "learning_rate": 0.0009598278184023749, "loss": 2.2283, "step": 27260 }, { "epoch": 0.7315639759553456, "grad_norm": 0.23046875, "learning_rate": 0.0009598053301953232, "loss": 2.1509, "step": 27261 }, { "epoch": 0.7315908115070846, "grad_norm": 0.2236328125, "learning_rate": 0.0009597828411989645, "loss": 2.1117, "step": 27262 }, { "epoch": 0.7316176470588235, "grad_norm": 0.2333984375, "learning_rate": 0.000959760351413348, "loss": 2.1392, "step": 27263 }, { "epoch": 0.7316444826105625, "grad_norm": 0.22265625, "learning_rate": 0.0009597378608385232, "loss": 2.1237, "step": 27264 }, { "epoch": 0.7316713181623015, "grad_norm": 0.2216796875, "learning_rate": 0.0009597153694745394, "loss": 2.0582, "step": 27265 }, { "epoch": 0.7316981537140403, "grad_norm": 0.224609375, "learning_rate": 0.0009596928773214458, "loss": 2.1326, "step": 27266 }, { "epoch": 0.7317249892657793, "grad_norm": 0.224609375, "learning_rate": 0.0009596703843792919, "loss": 2.0889, "step": 27267 }, { "epoch": 0.7317518248175182, "grad_norm": 0.220703125, "learning_rate": 0.0009596478906481271, "loss": 2.068, "step": 27268 }, { "epoch": 0.7317786603692572, "grad_norm": 0.23046875, "learning_rate": 0.0009596253961280005, "loss": 2.0634, "step": 27269 }, { "epoch": 0.7318054959209961, "grad_norm": 0.232421875, "learning_rate": 0.0009596029008189616, "loss": 2.1433, "step": 27270 }, { "epoch": 0.7318323314727351, "grad_norm": 0.236328125, "learning_rate": 0.0009595804047210598, "loss": 2.1037, "step": 27271 }, { "epoch": 0.7318591670244741, "grad_norm": 0.234375, "learning_rate": 0.0009595579078343444, "loss": 2.0899, "step": 27272 }, { "epoch": 0.731886002576213, "grad_norm": 0.224609375, "learning_rate": 0.0009595354101588647, "loss": 2.0756, "step": 27273 }, { "epoch": 0.7319128381279519, "grad_norm": 0.228515625, "learning_rate": 0.00095951291169467, "loss": 2.1078, "step": 27274 }, { "epoch": 0.7319396736796908, "grad_norm": 0.2265625, "learning_rate": 0.0009594904124418099, "loss": 2.1833, "step": 27275 }, { "epoch": 0.7319665092314298, "grad_norm": 0.2392578125, "learning_rate": 0.0009594679124003335, "loss": 2.0682, "step": 27276 }, { "epoch": 0.7319933447831687, "grad_norm": 0.224609375, "learning_rate": 0.0009594454115702902, "loss": 2.1833, "step": 27277 }, { "epoch": 0.7320201803349077, "grad_norm": 0.2216796875, "learning_rate": 0.0009594229099517294, "loss": 2.1351, "step": 27278 }, { "epoch": 0.7320470158866467, "grad_norm": 0.23046875, "learning_rate": 0.0009594004075447007, "loss": 2.0621, "step": 27279 }, { "epoch": 0.7320738514383855, "grad_norm": 0.220703125, "learning_rate": 0.0009593779043492531, "loss": 2.0778, "step": 27280 }, { "epoch": 0.7321006869901245, "grad_norm": 0.2333984375, "learning_rate": 0.0009593554003654362, "loss": 2.1715, "step": 27281 }, { "epoch": 0.7321275225418634, "grad_norm": 0.240234375, "learning_rate": 0.0009593328955932991, "loss": 2.0474, "step": 27282 }, { "epoch": 0.7321543580936024, "grad_norm": 0.232421875, "learning_rate": 0.0009593103900328913, "loss": 2.1887, "step": 27283 }, { "epoch": 0.7321811936453414, "grad_norm": 0.2451171875, "learning_rate": 0.0009592878836842626, "loss": 2.1731, "step": 27284 }, { "epoch": 0.7322080291970803, "grad_norm": 0.2265625, "learning_rate": 0.0009592653765474616, "loss": 2.0715, "step": 27285 }, { "epoch": 0.7322348647488193, "grad_norm": 0.2275390625, "learning_rate": 0.0009592428686225383, "loss": 2.1088, "step": 27286 }, { "epoch": 0.7322617003005581, "grad_norm": 0.220703125, "learning_rate": 0.0009592203599095415, "loss": 2.1677, "step": 27287 }, { "epoch": 0.7322885358522971, "grad_norm": 0.2392578125, "learning_rate": 0.0009591978504085212, "loss": 2.1453, "step": 27288 }, { "epoch": 0.732315371404036, "grad_norm": 0.220703125, "learning_rate": 0.0009591753401195264, "loss": 2.0489, "step": 27289 }, { "epoch": 0.732342206955775, "grad_norm": 0.2255859375, "learning_rate": 0.0009591528290426068, "loss": 2.0479, "step": 27290 }, { "epoch": 0.732369042507514, "grad_norm": 0.224609375, "learning_rate": 0.0009591303171778113, "loss": 2.1144, "step": 27291 }, { "epoch": 0.7323958780592529, "grad_norm": 0.2451171875, "learning_rate": 0.0009591078045251897, "loss": 2.2348, "step": 27292 }, { "epoch": 0.7324227136109919, "grad_norm": 0.2275390625, "learning_rate": 0.0009590852910847912, "loss": 2.1563, "step": 27293 }, { "epoch": 0.7324495491627308, "grad_norm": 0.2265625, "learning_rate": 0.000959062776856665, "loss": 2.0963, "step": 27294 }, { "epoch": 0.7324763847144697, "grad_norm": 0.2265625, "learning_rate": 0.000959040261840861, "loss": 2.1675, "step": 27295 }, { "epoch": 0.7325032202662086, "grad_norm": 0.2265625, "learning_rate": 0.0009590177460374281, "loss": 2.2069, "step": 27296 }, { "epoch": 0.7325300558179476, "grad_norm": 0.2333984375, "learning_rate": 0.0009589952294464159, "loss": 2.0908, "step": 27297 }, { "epoch": 0.7325568913696866, "grad_norm": 0.224609375, "learning_rate": 0.0009589727120678738, "loss": 2.0911, "step": 27298 }, { "epoch": 0.7325837269214255, "grad_norm": 0.234375, "learning_rate": 0.0009589501939018513, "loss": 2.1838, "step": 27299 }, { "epoch": 0.7326105624731645, "grad_norm": 0.2236328125, "learning_rate": 0.0009589276749483976, "loss": 2.0986, "step": 27300 }, { "epoch": 0.7326373980249034, "grad_norm": 0.21875, "learning_rate": 0.0009589051552075622, "loss": 2.1541, "step": 27301 }, { "epoch": 0.7326642335766423, "grad_norm": 0.2236328125, "learning_rate": 0.0009588826346793945, "loss": 2.2215, "step": 27302 }, { "epoch": 0.7326910691283813, "grad_norm": 0.2216796875, "learning_rate": 0.0009588601133639438, "loss": 2.0751, "step": 27303 }, { "epoch": 0.7327179046801202, "grad_norm": 0.216796875, "learning_rate": 0.0009588375912612596, "loss": 2.0689, "step": 27304 }, { "epoch": 0.7327447402318592, "grad_norm": 0.220703125, "learning_rate": 0.0009588150683713912, "loss": 2.1005, "step": 27305 }, { "epoch": 0.7327715757835981, "grad_norm": 0.2197265625, "learning_rate": 0.0009587925446943884, "loss": 2.0419, "step": 27306 }, { "epoch": 0.7327984113353371, "grad_norm": 0.220703125, "learning_rate": 0.0009587700202303002, "loss": 2.1319, "step": 27307 }, { "epoch": 0.732825246887076, "grad_norm": 0.2314453125, "learning_rate": 0.000958747494979176, "loss": 2.0962, "step": 27308 }, { "epoch": 0.732852082438815, "grad_norm": 0.2265625, "learning_rate": 0.0009587249689410654, "loss": 2.1146, "step": 27309 }, { "epoch": 0.7328789179905539, "grad_norm": 0.2255859375, "learning_rate": 0.0009587024421160178, "loss": 2.001, "step": 27310 }, { "epoch": 0.7329057535422928, "grad_norm": 0.2216796875, "learning_rate": 0.0009586799145040825, "loss": 2.1083, "step": 27311 }, { "epoch": 0.7329325890940318, "grad_norm": 0.2197265625, "learning_rate": 0.0009586573861053092, "loss": 2.0885, "step": 27312 }, { "epoch": 0.7329594246457707, "grad_norm": 0.2177734375, "learning_rate": 0.0009586348569197468, "loss": 2.1707, "step": 27313 }, { "epoch": 0.7329862601975097, "grad_norm": 0.2255859375, "learning_rate": 0.0009586123269474453, "loss": 2.0995, "step": 27314 }, { "epoch": 0.7330130957492486, "grad_norm": 0.21484375, "learning_rate": 0.0009585897961884537, "loss": 2.0948, "step": 27315 }, { "epoch": 0.7330399313009875, "grad_norm": 0.22265625, "learning_rate": 0.0009585672646428216, "loss": 2.0578, "step": 27316 }, { "epoch": 0.7330667668527265, "grad_norm": 0.2236328125, "learning_rate": 0.0009585447323105985, "loss": 2.0507, "step": 27317 }, { "epoch": 0.7330936024044654, "grad_norm": 0.2236328125, "learning_rate": 0.0009585221991918337, "loss": 2.0892, "step": 27318 }, { "epoch": 0.7331204379562044, "grad_norm": 0.232421875, "learning_rate": 0.0009584996652865766, "loss": 2.1196, "step": 27319 }, { "epoch": 0.7331472735079433, "grad_norm": 0.2314453125, "learning_rate": 0.0009584771305948767, "loss": 2.0902, "step": 27320 }, { "epoch": 0.7331741090596823, "grad_norm": 0.220703125, "learning_rate": 0.0009584545951167837, "loss": 2.0002, "step": 27321 }, { "epoch": 0.7332009446114212, "grad_norm": 0.232421875, "learning_rate": 0.0009584320588523466, "loss": 2.1268, "step": 27322 }, { "epoch": 0.7332277801631601, "grad_norm": 0.2265625, "learning_rate": 0.0009584095218016151, "loss": 2.0538, "step": 27323 }, { "epoch": 0.7332546157148991, "grad_norm": 0.2255859375, "learning_rate": 0.0009583869839646384, "loss": 2.2108, "step": 27324 }, { "epoch": 0.733281451266638, "grad_norm": 0.224609375, "learning_rate": 0.000958364445341466, "loss": 2.1809, "step": 27325 }, { "epoch": 0.733308286818377, "grad_norm": 0.2255859375, "learning_rate": 0.0009583419059321477, "loss": 2.2179, "step": 27326 }, { "epoch": 0.7333351223701159, "grad_norm": 0.236328125, "learning_rate": 0.0009583193657367325, "loss": 2.2096, "step": 27327 }, { "epoch": 0.7333619579218549, "grad_norm": 0.2333984375, "learning_rate": 0.0009582968247552702, "loss": 2.0679, "step": 27328 }, { "epoch": 0.7333887934735939, "grad_norm": 0.21875, "learning_rate": 0.00095827428298781, "loss": 2.0753, "step": 27329 }, { "epoch": 0.7334156290253327, "grad_norm": 0.2265625, "learning_rate": 0.0009582517404344013, "loss": 2.1243, "step": 27330 }, { "epoch": 0.7334424645770717, "grad_norm": 0.2197265625, "learning_rate": 0.0009582291970950938, "loss": 2.1262, "step": 27331 }, { "epoch": 0.7334693001288106, "grad_norm": 0.22265625, "learning_rate": 0.000958206652969937, "loss": 2.0794, "step": 27332 }, { "epoch": 0.7334961356805496, "grad_norm": 0.2197265625, "learning_rate": 0.00095818410805898, "loss": 2.0172, "step": 27333 }, { "epoch": 0.7335229712322885, "grad_norm": 0.2275390625, "learning_rate": 0.0009581615623622725, "loss": 2.1256, "step": 27334 }, { "epoch": 0.7335498067840275, "grad_norm": 0.220703125, "learning_rate": 0.0009581390158798638, "loss": 2.1479, "step": 27335 }, { "epoch": 0.7335766423357665, "grad_norm": 0.23046875, "learning_rate": 0.0009581164686118035, "loss": 2.2681, "step": 27336 }, { "epoch": 0.7336034778875054, "grad_norm": 0.228515625, "learning_rate": 0.0009580939205581411, "loss": 2.2329, "step": 27337 }, { "epoch": 0.7336303134392443, "grad_norm": 0.2265625, "learning_rate": 0.000958071371718926, "loss": 2.0882, "step": 27338 }, { "epoch": 0.7336571489909832, "grad_norm": 0.220703125, "learning_rate": 0.0009580488220942076, "loss": 2.0732, "step": 27339 }, { "epoch": 0.7336839845427222, "grad_norm": 0.2177734375, "learning_rate": 0.0009580262716840354, "loss": 2.1127, "step": 27340 }, { "epoch": 0.7337108200944611, "grad_norm": 0.2265625, "learning_rate": 0.0009580037204884588, "loss": 2.0894, "step": 27341 }, { "epoch": 0.7337376556462001, "grad_norm": 0.236328125, "learning_rate": 0.0009579811685075276, "loss": 2.166, "step": 27342 }, { "epoch": 0.7337644911979391, "grad_norm": 0.2275390625, "learning_rate": 0.0009579586157412909, "loss": 2.0632, "step": 27343 }, { "epoch": 0.733791326749678, "grad_norm": 0.240234375, "learning_rate": 0.0009579360621897984, "loss": 2.0911, "step": 27344 }, { "epoch": 0.7338181623014169, "grad_norm": 0.2138671875, "learning_rate": 0.0009579135078530994, "loss": 2.1356, "step": 27345 }, { "epoch": 0.7338449978531558, "grad_norm": 0.2294921875, "learning_rate": 0.0009578909527312436, "loss": 2.1805, "step": 27346 }, { "epoch": 0.7338718334048948, "grad_norm": 0.2314453125, "learning_rate": 0.0009578683968242802, "loss": 2.0507, "step": 27347 }, { "epoch": 0.7338986689566337, "grad_norm": 0.2275390625, "learning_rate": 0.0009578458401322588, "loss": 2.1453, "step": 27348 }, { "epoch": 0.7339255045083727, "grad_norm": 0.2275390625, "learning_rate": 0.0009578232826552291, "loss": 2.2025, "step": 27349 }, { "epoch": 0.7339523400601117, "grad_norm": 0.220703125, "learning_rate": 0.0009578007243932402, "loss": 2.0797, "step": 27350 }, { "epoch": 0.7339791756118506, "grad_norm": 0.2275390625, "learning_rate": 0.0009577781653463417, "loss": 2.0836, "step": 27351 }, { "epoch": 0.7340060111635895, "grad_norm": 0.234375, "learning_rate": 0.0009577556055145834, "loss": 2.1875, "step": 27352 }, { "epoch": 0.7340328467153284, "grad_norm": 0.22265625, "learning_rate": 0.0009577330448980146, "loss": 2.2098, "step": 27353 }, { "epoch": 0.7340596822670674, "grad_norm": 0.2265625, "learning_rate": 0.0009577104834966845, "loss": 2.1085, "step": 27354 }, { "epoch": 0.7340865178188064, "grad_norm": 0.21875, "learning_rate": 0.0009576879213106428, "loss": 2.0817, "step": 27355 }, { "epoch": 0.7341133533705453, "grad_norm": 0.2197265625, "learning_rate": 0.0009576653583399393, "loss": 2.09, "step": 27356 }, { "epoch": 0.7341401889222843, "grad_norm": 0.234375, "learning_rate": 0.0009576427945846231, "loss": 2.1518, "step": 27357 }, { "epoch": 0.7341670244740232, "grad_norm": 0.21484375, "learning_rate": 0.0009576202300447437, "loss": 2.0335, "step": 27358 }, { "epoch": 0.7341938600257621, "grad_norm": 0.232421875, "learning_rate": 0.000957597664720351, "loss": 2.0893, "step": 27359 }, { "epoch": 0.734220695577501, "grad_norm": 0.236328125, "learning_rate": 0.0009575750986114941, "loss": 2.2307, "step": 27360 }, { "epoch": 0.73424753112924, "grad_norm": 0.2294921875, "learning_rate": 0.0009575525317182226, "loss": 2.1188, "step": 27361 }, { "epoch": 0.734274366680979, "grad_norm": 0.216796875, "learning_rate": 0.000957529964040586, "loss": 2.0813, "step": 27362 }, { "epoch": 0.7343012022327179, "grad_norm": 0.2216796875, "learning_rate": 0.000957507395578634, "loss": 1.9962, "step": 27363 }, { "epoch": 0.7343280377844569, "grad_norm": 0.216796875, "learning_rate": 0.0009574848263324159, "loss": 2.0548, "step": 27364 }, { "epoch": 0.7343548733361958, "grad_norm": 0.228515625, "learning_rate": 0.0009574622563019811, "loss": 2.1144, "step": 27365 }, { "epoch": 0.7343817088879347, "grad_norm": 0.2197265625, "learning_rate": 0.0009574396854873795, "loss": 2.0447, "step": 27366 }, { "epoch": 0.7344085444396736, "grad_norm": 0.2275390625, "learning_rate": 0.0009574171138886603, "loss": 2.0966, "step": 27367 }, { "epoch": 0.7344353799914126, "grad_norm": 0.2158203125, "learning_rate": 0.0009573945415058732, "loss": 2.0588, "step": 27368 }, { "epoch": 0.7344622155431516, "grad_norm": 0.2158203125, "learning_rate": 0.0009573719683390674, "loss": 2.1177, "step": 27369 }, { "epoch": 0.7344890510948905, "grad_norm": 0.2255859375, "learning_rate": 0.0009573493943882928, "loss": 2.072, "step": 27370 }, { "epoch": 0.7345158866466295, "grad_norm": 0.2314453125, "learning_rate": 0.0009573268196535987, "loss": 2.1469, "step": 27371 }, { "epoch": 0.7345427221983684, "grad_norm": 0.2333984375, "learning_rate": 0.0009573042441350347, "loss": 2.1148, "step": 27372 }, { "epoch": 0.7345695577501073, "grad_norm": 0.2275390625, "learning_rate": 0.0009572816678326504, "loss": 2.1576, "step": 27373 }, { "epoch": 0.7345963933018463, "grad_norm": 0.2255859375, "learning_rate": 0.0009572590907464951, "loss": 2.084, "step": 27374 }, { "epoch": 0.7346232288535852, "grad_norm": 0.224609375, "learning_rate": 0.0009572365128766185, "loss": 2.1334, "step": 27375 }, { "epoch": 0.7346500644053242, "grad_norm": 0.2275390625, "learning_rate": 0.00095721393422307, "loss": 2.0568, "step": 27376 }, { "epoch": 0.7346768999570631, "grad_norm": 0.23046875, "learning_rate": 0.0009571913547858996, "loss": 2.1109, "step": 27377 }, { "epoch": 0.7347037355088021, "grad_norm": 0.2255859375, "learning_rate": 0.000957168774565156, "loss": 2.0843, "step": 27378 }, { "epoch": 0.734730571060541, "grad_norm": 0.23046875, "learning_rate": 0.0009571461935608894, "loss": 2.085, "step": 27379 }, { "epoch": 0.73475740661228, "grad_norm": 0.228515625, "learning_rate": 0.0009571236117731491, "loss": 2.1634, "step": 27380 }, { "epoch": 0.7347842421640189, "grad_norm": 0.2197265625, "learning_rate": 0.0009571010292019846, "loss": 2.0605, "step": 27381 }, { "epoch": 0.7348110777157578, "grad_norm": 0.2392578125, "learning_rate": 0.0009570784458474456, "loss": 2.0649, "step": 27382 }, { "epoch": 0.7348379132674968, "grad_norm": 0.2265625, "learning_rate": 0.0009570558617095816, "loss": 2.241, "step": 27383 }, { "epoch": 0.7348647488192357, "grad_norm": 0.2294921875, "learning_rate": 0.000957033276788442, "loss": 2.1669, "step": 27384 }, { "epoch": 0.7348915843709747, "grad_norm": 0.2158203125, "learning_rate": 0.0009570106910840765, "loss": 2.0498, "step": 27385 }, { "epoch": 0.7349184199227136, "grad_norm": 0.2265625, "learning_rate": 0.0009569881045965343, "loss": 2.1464, "step": 27386 }, { "epoch": 0.7349452554744526, "grad_norm": 0.216796875, "learning_rate": 0.0009569655173258656, "loss": 2.1494, "step": 27387 }, { "epoch": 0.7349720910261915, "grad_norm": 0.220703125, "learning_rate": 0.0009569429292721194, "loss": 2.1147, "step": 27388 }, { "epoch": 0.7349989265779304, "grad_norm": 0.224609375, "learning_rate": 0.0009569203404353453, "loss": 2.1572, "step": 27389 }, { "epoch": 0.7350257621296694, "grad_norm": 0.216796875, "learning_rate": 0.0009568977508155931, "loss": 2.0679, "step": 27390 }, { "epoch": 0.7350525976814083, "grad_norm": 0.216796875, "learning_rate": 0.0009568751604129124, "loss": 1.9784, "step": 27391 }, { "epoch": 0.7350794332331473, "grad_norm": 0.2216796875, "learning_rate": 0.0009568525692273523, "loss": 2.107, "step": 27392 }, { "epoch": 0.7351062687848862, "grad_norm": 0.220703125, "learning_rate": 0.000956829977258963, "loss": 2.1684, "step": 27393 }, { "epoch": 0.7351331043366252, "grad_norm": 0.2236328125, "learning_rate": 0.0009568073845077933, "loss": 2.0332, "step": 27394 }, { "epoch": 0.7351599398883641, "grad_norm": 0.2177734375, "learning_rate": 0.0009567847909738935, "loss": 2.0714, "step": 27395 }, { "epoch": 0.735186775440103, "grad_norm": 0.216796875, "learning_rate": 0.0009567621966573126, "loss": 2.0798, "step": 27396 }, { "epoch": 0.735213610991842, "grad_norm": 0.2333984375, "learning_rate": 0.0009567396015581006, "loss": 2.1471, "step": 27397 }, { "epoch": 0.7352404465435809, "grad_norm": 0.2294921875, "learning_rate": 0.0009567170056763068, "loss": 2.1764, "step": 27398 }, { "epoch": 0.7352672820953199, "grad_norm": 0.21875, "learning_rate": 0.000956694409011981, "loss": 2.0964, "step": 27399 }, { "epoch": 0.7352941176470589, "grad_norm": 0.234375, "learning_rate": 0.0009566718115651722, "loss": 2.0874, "step": 27400 }, { "epoch": 0.7353209531987978, "grad_norm": 0.2138671875, "learning_rate": 0.0009566492133359308, "loss": 2.035, "step": 27401 }, { "epoch": 0.7353477887505367, "grad_norm": 0.22265625, "learning_rate": 0.0009566266143243056, "loss": 2.1329, "step": 27402 }, { "epoch": 0.7353746243022756, "grad_norm": 0.2314453125, "learning_rate": 0.0009566040145303467, "loss": 2.0755, "step": 27403 }, { "epoch": 0.7354014598540146, "grad_norm": 0.224609375, "learning_rate": 0.0009565814139541036, "loss": 2.0707, "step": 27404 }, { "epoch": 0.7354282954057535, "grad_norm": 0.21875, "learning_rate": 0.0009565588125956257, "loss": 2.056, "step": 27405 }, { "epoch": 0.7354551309574925, "grad_norm": 0.216796875, "learning_rate": 0.0009565362104549626, "loss": 2.1643, "step": 27406 }, { "epoch": 0.7354819665092315, "grad_norm": 0.2265625, "learning_rate": 0.000956513607532164, "loss": 2.0992, "step": 27407 }, { "epoch": 0.7355088020609704, "grad_norm": 0.2275390625, "learning_rate": 0.0009564910038272793, "loss": 2.0896, "step": 27408 }, { "epoch": 0.7355356376127093, "grad_norm": 0.212890625, "learning_rate": 0.0009564683993403584, "loss": 2.0571, "step": 27409 }, { "epoch": 0.7355624731644482, "grad_norm": 0.228515625, "learning_rate": 0.0009564457940714507, "loss": 2.0985, "step": 27410 }, { "epoch": 0.7355893087161872, "grad_norm": 0.236328125, "learning_rate": 0.0009564231880206059, "loss": 2.1464, "step": 27411 }, { "epoch": 0.7356161442679261, "grad_norm": 0.2138671875, "learning_rate": 0.0009564005811878733, "loss": 2.0551, "step": 27412 }, { "epoch": 0.7356429798196651, "grad_norm": 0.22265625, "learning_rate": 0.0009563779735733026, "loss": 2.0712, "step": 27413 }, { "epoch": 0.7356698153714041, "grad_norm": 0.2177734375, "learning_rate": 0.0009563553651769436, "loss": 2.0025, "step": 27414 }, { "epoch": 0.735696650923143, "grad_norm": 0.2177734375, "learning_rate": 0.0009563327559988458, "loss": 2.116, "step": 27415 }, { "epoch": 0.735723486474882, "grad_norm": 0.21484375, "learning_rate": 0.0009563101460390587, "loss": 2.0045, "step": 27416 }, { "epoch": 0.7357503220266208, "grad_norm": 0.22265625, "learning_rate": 0.0009562875352976321, "loss": 2.0625, "step": 27417 }, { "epoch": 0.7357771575783598, "grad_norm": 0.2177734375, "learning_rate": 0.0009562649237746154, "loss": 2.0471, "step": 27418 }, { "epoch": 0.7358039931300988, "grad_norm": 0.2109375, "learning_rate": 0.0009562423114700581, "loss": 2.0876, "step": 27419 }, { "epoch": 0.7358308286818377, "grad_norm": 0.2294921875, "learning_rate": 0.0009562196983840103, "loss": 2.1601, "step": 27420 }, { "epoch": 0.7358576642335767, "grad_norm": 0.224609375, "learning_rate": 0.0009561970845165209, "loss": 2.1457, "step": 27421 }, { "epoch": 0.7358844997853156, "grad_norm": 0.2197265625, "learning_rate": 0.0009561744698676402, "loss": 2.1751, "step": 27422 }, { "epoch": 0.7359113353370546, "grad_norm": 0.21484375, "learning_rate": 0.0009561518544374173, "loss": 1.9956, "step": 27423 }, { "epoch": 0.7359381708887934, "grad_norm": 0.2158203125, "learning_rate": 0.0009561292382259022, "loss": 2.0752, "step": 27424 }, { "epoch": 0.7359650064405324, "grad_norm": 0.2138671875, "learning_rate": 0.0009561066212331442, "loss": 2.05, "step": 27425 }, { "epoch": 0.7359918419922714, "grad_norm": 0.2255859375, "learning_rate": 0.0009560840034591931, "loss": 2.1272, "step": 27426 }, { "epoch": 0.7360186775440103, "grad_norm": 0.2177734375, "learning_rate": 0.0009560613849040985, "loss": 1.9894, "step": 27427 }, { "epoch": 0.7360455130957493, "grad_norm": 0.2216796875, "learning_rate": 0.0009560387655679097, "loss": 2.0329, "step": 27428 }, { "epoch": 0.7360723486474882, "grad_norm": 0.22265625, "learning_rate": 0.000956016145450677, "loss": 2.0376, "step": 27429 }, { "epoch": 0.7360991841992272, "grad_norm": 0.21875, "learning_rate": 0.0009559935245524495, "loss": 1.9881, "step": 27430 }, { "epoch": 0.736126019750966, "grad_norm": 0.2314453125, "learning_rate": 0.0009559709028732768, "loss": 2.1023, "step": 27431 }, { "epoch": 0.736152855302705, "grad_norm": 0.2265625, "learning_rate": 0.0009559482804132087, "loss": 2.1384, "step": 27432 }, { "epoch": 0.736179690854444, "grad_norm": 0.2158203125, "learning_rate": 0.0009559256571722948, "loss": 2.1141, "step": 27433 }, { "epoch": 0.7362065264061829, "grad_norm": 0.2216796875, "learning_rate": 0.0009559030331505847, "loss": 2.049, "step": 27434 }, { "epoch": 0.7362333619579219, "grad_norm": 0.224609375, "learning_rate": 0.0009558804083481283, "loss": 2.1575, "step": 27435 }, { "epoch": 0.7362601975096608, "grad_norm": 0.228515625, "learning_rate": 0.0009558577827649745, "loss": 2.0455, "step": 27436 }, { "epoch": 0.7362870330613998, "grad_norm": 0.228515625, "learning_rate": 0.0009558351564011738, "loss": 2.0672, "step": 27437 }, { "epoch": 0.7363138686131386, "grad_norm": 0.224609375, "learning_rate": 0.0009558125292567755, "loss": 2.1494, "step": 27438 }, { "epoch": 0.7363407041648776, "grad_norm": 0.220703125, "learning_rate": 0.0009557899013318289, "loss": 2.0284, "step": 27439 }, { "epoch": 0.7363675397166166, "grad_norm": 0.2177734375, "learning_rate": 0.0009557672726263842, "loss": 2.1323, "step": 27440 }, { "epoch": 0.7363943752683555, "grad_norm": 0.2138671875, "learning_rate": 0.0009557446431404906, "loss": 2.0996, "step": 27441 }, { "epoch": 0.7364212108200945, "grad_norm": 0.216796875, "learning_rate": 0.0009557220128741979, "loss": 2.0964, "step": 27442 }, { "epoch": 0.7364480463718334, "grad_norm": 0.23046875, "learning_rate": 0.0009556993818275559, "loss": 2.036, "step": 27443 }, { "epoch": 0.7364748819235724, "grad_norm": 0.2353515625, "learning_rate": 0.000955676750000614, "loss": 2.1186, "step": 27444 }, { "epoch": 0.7365017174753113, "grad_norm": 0.2138671875, "learning_rate": 0.0009556541173934219, "loss": 2.0678, "step": 27445 }, { "epoch": 0.7365285530270502, "grad_norm": 0.21875, "learning_rate": 0.0009556314840060297, "loss": 2.1109, "step": 27446 }, { "epoch": 0.7365553885787892, "grad_norm": 0.232421875, "learning_rate": 0.0009556088498384862, "loss": 2.1043, "step": 27447 }, { "epoch": 0.7365822241305281, "grad_norm": 0.2314453125, "learning_rate": 0.0009555862148908416, "loss": 2.1882, "step": 27448 }, { "epoch": 0.7366090596822671, "grad_norm": 0.21484375, "learning_rate": 0.0009555635791631455, "loss": 2.0719, "step": 27449 }, { "epoch": 0.736635895234006, "grad_norm": 0.2177734375, "learning_rate": 0.0009555409426554477, "loss": 1.988, "step": 27450 }, { "epoch": 0.736662730785745, "grad_norm": 0.2216796875, "learning_rate": 0.0009555183053677974, "loss": 2.1274, "step": 27451 }, { "epoch": 0.736689566337484, "grad_norm": 0.2177734375, "learning_rate": 0.0009554956673002446, "loss": 2.0902, "step": 27452 }, { "epoch": 0.7367164018892228, "grad_norm": 0.224609375, "learning_rate": 0.0009554730284528389, "loss": 2.2224, "step": 27453 }, { "epoch": 0.7367432374409618, "grad_norm": 0.2236328125, "learning_rate": 0.00095545038882563, "loss": 2.1247, "step": 27454 }, { "epoch": 0.7367700729927007, "grad_norm": 0.2177734375, "learning_rate": 0.0009554277484186675, "loss": 2.0021, "step": 27455 }, { "epoch": 0.7367969085444397, "grad_norm": 0.234375, "learning_rate": 0.0009554051072320009, "loss": 2.1653, "step": 27456 }, { "epoch": 0.7368237440961786, "grad_norm": 0.2314453125, "learning_rate": 0.0009553824652656804, "loss": 2.1132, "step": 27457 }, { "epoch": 0.7368505796479176, "grad_norm": 0.2236328125, "learning_rate": 0.0009553598225197553, "loss": 2.1359, "step": 27458 }, { "epoch": 0.7368774151996565, "grad_norm": 0.2392578125, "learning_rate": 0.000955337178994275, "loss": 2.0875, "step": 27459 }, { "epoch": 0.7369042507513954, "grad_norm": 0.212890625, "learning_rate": 0.0009553145346892898, "loss": 2.051, "step": 27460 }, { "epoch": 0.7369310863031344, "grad_norm": 0.2119140625, "learning_rate": 0.0009552918896048489, "loss": 2.1187, "step": 27461 }, { "epoch": 0.7369579218548733, "grad_norm": 0.2099609375, "learning_rate": 0.0009552692437410021, "loss": 1.9766, "step": 27462 }, { "epoch": 0.7369847574066123, "grad_norm": 0.2373046875, "learning_rate": 0.000955246597097799, "loss": 2.1487, "step": 27463 }, { "epoch": 0.7370115929583512, "grad_norm": 0.2216796875, "learning_rate": 0.0009552239496752896, "loss": 1.9983, "step": 27464 }, { "epoch": 0.7370384285100902, "grad_norm": 0.220703125, "learning_rate": 0.0009552013014735234, "loss": 2.2145, "step": 27465 }, { "epoch": 0.7370652640618292, "grad_norm": 0.2265625, "learning_rate": 0.0009551786524925498, "loss": 2.0858, "step": 27466 }, { "epoch": 0.737092099613568, "grad_norm": 0.2314453125, "learning_rate": 0.0009551560027324188, "loss": 2.0731, "step": 27467 }, { "epoch": 0.737118935165307, "grad_norm": 0.2099609375, "learning_rate": 0.00095513335219318, "loss": 1.9858, "step": 27468 }, { "epoch": 0.7371457707170459, "grad_norm": 0.2265625, "learning_rate": 0.0009551107008748832, "loss": 2.0648, "step": 27469 }, { "epoch": 0.7371726062687849, "grad_norm": 0.2255859375, "learning_rate": 0.000955088048777578, "loss": 2.1371, "step": 27470 }, { "epoch": 0.7371994418205239, "grad_norm": 0.22265625, "learning_rate": 0.000955065395901314, "loss": 2.0387, "step": 27471 }, { "epoch": 0.7372262773722628, "grad_norm": 0.2177734375, "learning_rate": 0.0009550427422461411, "loss": 2.103, "step": 27472 }, { "epoch": 0.7372531129240018, "grad_norm": 0.244140625, "learning_rate": 0.0009550200878121088, "loss": 2.1263, "step": 27473 }, { "epoch": 0.7372799484757406, "grad_norm": 0.2216796875, "learning_rate": 0.0009549974325992669, "loss": 1.963, "step": 27474 }, { "epoch": 0.7373067840274796, "grad_norm": 0.216796875, "learning_rate": 0.0009549747766076649, "loss": 2.0438, "step": 27475 }, { "epoch": 0.7373336195792185, "grad_norm": 0.2216796875, "learning_rate": 0.000954952119837353, "loss": 2.087, "step": 27476 }, { "epoch": 0.7373604551309575, "grad_norm": 0.2138671875, "learning_rate": 0.0009549294622883803, "loss": 2.0688, "step": 27477 }, { "epoch": 0.7373872906826965, "grad_norm": 0.2177734375, "learning_rate": 0.0009549068039607969, "loss": 2.0647, "step": 27478 }, { "epoch": 0.7374141262344354, "grad_norm": 0.2158203125, "learning_rate": 0.0009548841448546524, "loss": 2.1039, "step": 27479 }, { "epoch": 0.7374409617861744, "grad_norm": 0.216796875, "learning_rate": 0.0009548614849699964, "loss": 2.0503, "step": 27480 }, { "epoch": 0.7374677973379132, "grad_norm": 0.21875, "learning_rate": 0.0009548388243068789, "loss": 2.0293, "step": 27481 }, { "epoch": 0.7374946328896522, "grad_norm": 0.2197265625, "learning_rate": 0.0009548161628653493, "loss": 2.0974, "step": 27482 }, { "epoch": 0.7375214684413911, "grad_norm": 0.2275390625, "learning_rate": 0.0009547935006454575, "loss": 2.0895, "step": 27483 }, { "epoch": 0.7375483039931301, "grad_norm": 0.2294921875, "learning_rate": 0.000954770837647253, "loss": 2.1028, "step": 27484 }, { "epoch": 0.7375751395448691, "grad_norm": 0.2353515625, "learning_rate": 0.0009547481738707857, "loss": 2.1481, "step": 27485 }, { "epoch": 0.737601975096608, "grad_norm": 0.232421875, "learning_rate": 0.0009547255093161054, "loss": 2.1285, "step": 27486 }, { "epoch": 0.737628810648347, "grad_norm": 0.2265625, "learning_rate": 0.0009547028439832614, "loss": 2.149, "step": 27487 }, { "epoch": 0.7376556462000858, "grad_norm": 0.220703125, "learning_rate": 0.0009546801778723041, "loss": 2.1075, "step": 27488 }, { "epoch": 0.7376824817518248, "grad_norm": 0.2236328125, "learning_rate": 0.0009546575109832826, "loss": 2.1565, "step": 27489 }, { "epoch": 0.7377093173035638, "grad_norm": 0.22265625, "learning_rate": 0.0009546348433162468, "loss": 2.0178, "step": 27490 }, { "epoch": 0.7377361528553027, "grad_norm": 0.21484375, "learning_rate": 0.0009546121748712467, "loss": 1.9624, "step": 27491 }, { "epoch": 0.7377629884070417, "grad_norm": 0.2216796875, "learning_rate": 0.0009545895056483315, "loss": 2.1749, "step": 27492 }, { "epoch": 0.7377898239587806, "grad_norm": 0.2138671875, "learning_rate": 0.0009545668356475516, "loss": 2.0657, "step": 27493 }, { "epoch": 0.7378166595105196, "grad_norm": 0.2314453125, "learning_rate": 0.0009545441648689563, "loss": 2.1904, "step": 27494 }, { "epoch": 0.7378434950622584, "grad_norm": 0.2265625, "learning_rate": 0.0009545214933125953, "loss": 2.1505, "step": 27495 }, { "epoch": 0.7378703306139974, "grad_norm": 0.21875, "learning_rate": 0.0009544988209785184, "loss": 2.0522, "step": 27496 }, { "epoch": 0.7378971661657364, "grad_norm": 0.212890625, "learning_rate": 0.0009544761478667757, "loss": 2.0966, "step": 27497 }, { "epoch": 0.7379240017174753, "grad_norm": 0.2197265625, "learning_rate": 0.0009544534739774162, "loss": 2.0271, "step": 27498 }, { "epoch": 0.7379508372692143, "grad_norm": 0.2197265625, "learning_rate": 0.0009544307993104903, "loss": 2.0311, "step": 27499 }, { "epoch": 0.7379776728209532, "grad_norm": 0.216796875, "learning_rate": 0.0009544081238660475, "loss": 2.0425, "step": 27500 }, { "epoch": 0.7380045083726922, "grad_norm": 0.216796875, "learning_rate": 0.0009543854476441374, "loss": 2.0449, "step": 27501 }, { "epoch": 0.738031343924431, "grad_norm": 0.234375, "learning_rate": 0.0009543627706448101, "loss": 2.1011, "step": 27502 }, { "epoch": 0.73805817947617, "grad_norm": 0.21875, "learning_rate": 0.000954340092868115, "loss": 2.1093, "step": 27503 }, { "epoch": 0.738085015027909, "grad_norm": 0.2099609375, "learning_rate": 0.000954317414314102, "loss": 1.99, "step": 27504 }, { "epoch": 0.7381118505796479, "grad_norm": 0.2109375, "learning_rate": 0.0009542947349828209, "loss": 2.1413, "step": 27505 }, { "epoch": 0.7381386861313869, "grad_norm": 0.228515625, "learning_rate": 0.0009542720548743213, "loss": 2.0932, "step": 27506 }, { "epoch": 0.7381655216831258, "grad_norm": 0.21875, "learning_rate": 0.0009542493739886531, "loss": 2.0267, "step": 27507 }, { "epoch": 0.7381923572348648, "grad_norm": 0.2265625, "learning_rate": 0.000954226692325866, "loss": 2.0382, "step": 27508 }, { "epoch": 0.7382191927866036, "grad_norm": 0.2294921875, "learning_rate": 0.0009542040098860096, "loss": 2.0603, "step": 27509 }, { "epoch": 0.7382460283383426, "grad_norm": 0.232421875, "learning_rate": 0.0009541813266691339, "loss": 2.1644, "step": 27510 }, { "epoch": 0.7382728638900816, "grad_norm": 0.2275390625, "learning_rate": 0.0009541586426752888, "loss": 2.1816, "step": 27511 }, { "epoch": 0.7382996994418205, "grad_norm": 0.2294921875, "learning_rate": 0.0009541359579045234, "loss": 2.0959, "step": 27512 }, { "epoch": 0.7383265349935595, "grad_norm": 0.2236328125, "learning_rate": 0.0009541132723568882, "loss": 2.1147, "step": 27513 }, { "epoch": 0.7383533705452984, "grad_norm": 0.2099609375, "learning_rate": 0.0009540905860324326, "loss": 2.104, "step": 27514 }, { "epoch": 0.7383802060970374, "grad_norm": 0.220703125, "learning_rate": 0.0009540678989312064, "loss": 2.0721, "step": 27515 }, { "epoch": 0.7384070416487764, "grad_norm": 0.21484375, "learning_rate": 0.0009540452110532595, "loss": 2.0591, "step": 27516 }, { "epoch": 0.7384338772005152, "grad_norm": 0.2158203125, "learning_rate": 0.0009540225223986415, "loss": 1.9756, "step": 27517 }, { "epoch": 0.7384607127522542, "grad_norm": 0.224609375, "learning_rate": 0.0009539998329674022, "loss": 2.2197, "step": 27518 }, { "epoch": 0.7384875483039931, "grad_norm": 0.2099609375, "learning_rate": 0.0009539771427595914, "loss": 2.0691, "step": 27519 }, { "epoch": 0.7385143838557321, "grad_norm": 0.21875, "learning_rate": 0.000953954451775259, "loss": 2.0933, "step": 27520 }, { "epoch": 0.738541219407471, "grad_norm": 0.2197265625, "learning_rate": 0.0009539317600144546, "loss": 2.082, "step": 27521 }, { "epoch": 0.73856805495921, "grad_norm": 0.21484375, "learning_rate": 0.000953909067477228, "loss": 2.0843, "step": 27522 }, { "epoch": 0.738594890510949, "grad_norm": 0.2236328125, "learning_rate": 0.000953886374163629, "loss": 2.0689, "step": 27523 }, { "epoch": 0.7386217260626878, "grad_norm": 0.2236328125, "learning_rate": 0.0009538636800737076, "loss": 2.14, "step": 27524 }, { "epoch": 0.7386485616144268, "grad_norm": 0.2197265625, "learning_rate": 0.0009538409852075133, "loss": 2.1157, "step": 27525 }, { "epoch": 0.7386753971661657, "grad_norm": 0.2158203125, "learning_rate": 0.0009538182895650959, "loss": 2.0819, "step": 27526 }, { "epoch": 0.7387022327179047, "grad_norm": 0.224609375, "learning_rate": 0.0009537955931465054, "loss": 2.0551, "step": 27527 }, { "epoch": 0.7387290682696436, "grad_norm": 0.224609375, "learning_rate": 0.0009537728959517914, "loss": 2.0665, "step": 27528 }, { "epoch": 0.7387559038213826, "grad_norm": 0.220703125, "learning_rate": 0.0009537501979810036, "loss": 2.115, "step": 27529 }, { "epoch": 0.7387827393731216, "grad_norm": 0.2265625, "learning_rate": 0.0009537274992341922, "loss": 2.1311, "step": 27530 }, { "epoch": 0.7388095749248604, "grad_norm": 0.224609375, "learning_rate": 0.0009537047997114067, "loss": 2.1233, "step": 27531 }, { "epoch": 0.7388364104765994, "grad_norm": 0.21484375, "learning_rate": 0.0009536820994126968, "loss": 2.0087, "step": 27532 }, { "epoch": 0.7388632460283383, "grad_norm": 0.2197265625, "learning_rate": 0.0009536593983381125, "loss": 2.0804, "step": 27533 }, { "epoch": 0.7388900815800773, "grad_norm": 0.224609375, "learning_rate": 0.0009536366964877033, "loss": 2.0174, "step": 27534 }, { "epoch": 0.7389169171318162, "grad_norm": 0.2177734375, "learning_rate": 0.0009536139938615195, "loss": 2.0354, "step": 27535 }, { "epoch": 0.7389437526835552, "grad_norm": 0.21875, "learning_rate": 0.0009535912904596107, "loss": 2.0697, "step": 27536 }, { "epoch": 0.7389705882352942, "grad_norm": 0.2138671875, "learning_rate": 0.0009535685862820263, "loss": 2.1155, "step": 27537 }, { "epoch": 0.738997423787033, "grad_norm": 0.216796875, "learning_rate": 0.0009535458813288168, "loss": 2.1483, "step": 27538 }, { "epoch": 0.739024259338772, "grad_norm": 0.2216796875, "learning_rate": 0.0009535231756000314, "loss": 2.0447, "step": 27539 }, { "epoch": 0.7390510948905109, "grad_norm": 0.2197265625, "learning_rate": 0.0009535004690957202, "loss": 2.048, "step": 27540 }, { "epoch": 0.7390779304422499, "grad_norm": 0.2158203125, "learning_rate": 0.0009534777618159331, "loss": 2.02, "step": 27541 }, { "epoch": 0.7391047659939889, "grad_norm": 0.2177734375, "learning_rate": 0.0009534550537607197, "loss": 2.1253, "step": 27542 }, { "epoch": 0.7391316015457278, "grad_norm": 0.2158203125, "learning_rate": 0.0009534323449301299, "loss": 2.0627, "step": 27543 }, { "epoch": 0.7391584370974668, "grad_norm": 0.2314453125, "learning_rate": 0.0009534096353242136, "loss": 2.1872, "step": 27544 }, { "epoch": 0.7391852726492056, "grad_norm": 0.224609375, "learning_rate": 0.0009533869249430204, "loss": 2.1148, "step": 27545 }, { "epoch": 0.7392121082009446, "grad_norm": 0.220703125, "learning_rate": 0.0009533642137866003, "loss": 2.05, "step": 27546 }, { "epoch": 0.7392389437526835, "grad_norm": 0.2197265625, "learning_rate": 0.0009533415018550031, "loss": 2.0466, "step": 27547 }, { "epoch": 0.7392657793044225, "grad_norm": 0.2158203125, "learning_rate": 0.0009533187891482786, "loss": 2.1199, "step": 27548 }, { "epoch": 0.7392926148561615, "grad_norm": 0.216796875, "learning_rate": 0.0009532960756664765, "loss": 2.0889, "step": 27549 }, { "epoch": 0.7393194504079004, "grad_norm": 0.2177734375, "learning_rate": 0.000953273361409647, "loss": 2.1446, "step": 27550 }, { "epoch": 0.7393462859596394, "grad_norm": 0.2119140625, "learning_rate": 0.0009532506463778395, "loss": 2.0598, "step": 27551 }, { "epoch": 0.7393731215113782, "grad_norm": 0.2158203125, "learning_rate": 0.0009532279305711042, "loss": 2.0452, "step": 27552 }, { "epoch": 0.7393999570631172, "grad_norm": 0.2216796875, "learning_rate": 0.0009532052139894905, "loss": 2.061, "step": 27553 }, { "epoch": 0.7394267926148561, "grad_norm": 0.228515625, "learning_rate": 0.0009531824966330486, "loss": 2.0505, "step": 27554 }, { "epoch": 0.7394536281665951, "grad_norm": 0.22265625, "learning_rate": 0.0009531597785018281, "loss": 2.1267, "step": 27555 }, { "epoch": 0.7394804637183341, "grad_norm": 0.224609375, "learning_rate": 0.0009531370595958791, "loss": 2.0164, "step": 27556 }, { "epoch": 0.739507299270073, "grad_norm": 0.2177734375, "learning_rate": 0.0009531143399152511, "loss": 2.1809, "step": 27557 }, { "epoch": 0.739534134821812, "grad_norm": 0.2265625, "learning_rate": 0.0009530916194599944, "loss": 2.1311, "step": 27558 }, { "epoch": 0.7395609703735508, "grad_norm": 0.2197265625, "learning_rate": 0.0009530688982301584, "loss": 2.0159, "step": 27559 }, { "epoch": 0.7395878059252898, "grad_norm": 0.220703125, "learning_rate": 0.000953046176225793, "loss": 2.2018, "step": 27560 }, { "epoch": 0.7396146414770288, "grad_norm": 0.21484375, "learning_rate": 0.0009530234534469484, "loss": 2.0959, "step": 27561 }, { "epoch": 0.7396414770287677, "grad_norm": 0.220703125, "learning_rate": 0.000953000729893674, "loss": 2.1127, "step": 27562 }, { "epoch": 0.7396683125805067, "grad_norm": 0.2099609375, "learning_rate": 0.00095297800556602, "loss": 2.0544, "step": 27563 }, { "epoch": 0.7396951481322456, "grad_norm": 0.2255859375, "learning_rate": 0.000952955280464036, "loss": 2.1434, "step": 27564 }, { "epoch": 0.7397219836839846, "grad_norm": 0.2197265625, "learning_rate": 0.000952932554587772, "loss": 2.0805, "step": 27565 }, { "epoch": 0.7397488192357234, "grad_norm": 0.224609375, "learning_rate": 0.0009529098279372778, "loss": 2.0378, "step": 27566 }, { "epoch": 0.7397756547874624, "grad_norm": 0.216796875, "learning_rate": 0.0009528871005126033, "loss": 1.9842, "step": 27567 }, { "epoch": 0.7398024903392014, "grad_norm": 0.2119140625, "learning_rate": 0.0009528643723137983, "loss": 2.1662, "step": 27568 }, { "epoch": 0.7398293258909403, "grad_norm": 0.2275390625, "learning_rate": 0.0009528416433409126, "loss": 2.1194, "step": 27569 }, { "epoch": 0.7398561614426793, "grad_norm": 0.228515625, "learning_rate": 0.0009528189135939963, "loss": 2.1454, "step": 27570 }, { "epoch": 0.7398829969944182, "grad_norm": 0.2138671875, "learning_rate": 0.000952796183073099, "loss": 2.1013, "step": 27571 }, { "epoch": 0.7399098325461572, "grad_norm": 0.212890625, "learning_rate": 0.0009527734517782707, "loss": 2.0827, "step": 27572 }, { "epoch": 0.739936668097896, "grad_norm": 0.2216796875, "learning_rate": 0.0009527507197095612, "loss": 2.1129, "step": 27573 }, { "epoch": 0.739963503649635, "grad_norm": 0.2177734375, "learning_rate": 0.0009527279868670204, "loss": 2.1205, "step": 27574 }, { "epoch": 0.739990339201374, "grad_norm": 0.2197265625, "learning_rate": 0.0009527052532506983, "loss": 2.0605, "step": 27575 }, { "epoch": 0.7400171747531129, "grad_norm": 0.2236328125, "learning_rate": 0.0009526825188606443, "loss": 1.977, "step": 27576 }, { "epoch": 0.7400440103048519, "grad_norm": 0.216796875, "learning_rate": 0.0009526597836969089, "loss": 2.1025, "step": 27577 }, { "epoch": 0.7400708458565908, "grad_norm": 0.2216796875, "learning_rate": 0.0009526370477595418, "loss": 2.1195, "step": 27578 }, { "epoch": 0.7400976814083298, "grad_norm": 0.2197265625, "learning_rate": 0.0009526143110485925, "loss": 2.149, "step": 27579 }, { "epoch": 0.7401245169600686, "grad_norm": 0.2109375, "learning_rate": 0.0009525915735641112, "loss": 2.0471, "step": 27580 }, { "epoch": 0.7401513525118076, "grad_norm": 0.21484375, "learning_rate": 0.0009525688353061479, "loss": 2.1072, "step": 27581 }, { "epoch": 0.7401781880635466, "grad_norm": 0.2158203125, "learning_rate": 0.000952546096274752, "loss": 2.024, "step": 27582 }, { "epoch": 0.7402050236152855, "grad_norm": 0.2099609375, "learning_rate": 0.0009525233564699739, "loss": 1.9917, "step": 27583 }, { "epoch": 0.7402318591670245, "grad_norm": 0.2177734375, "learning_rate": 0.0009525006158918632, "loss": 2.1687, "step": 27584 }, { "epoch": 0.7402586947187634, "grad_norm": 0.234375, "learning_rate": 0.0009524778745404701, "loss": 2.1959, "step": 27585 }, { "epoch": 0.7402855302705024, "grad_norm": 0.2255859375, "learning_rate": 0.0009524551324158439, "loss": 2.0024, "step": 27586 }, { "epoch": 0.7403123658222414, "grad_norm": 0.2138671875, "learning_rate": 0.0009524323895180351, "loss": 2.0978, "step": 27587 }, { "epoch": 0.7403392013739802, "grad_norm": 0.2236328125, "learning_rate": 0.0009524096458470931, "loss": 2.0682, "step": 27588 }, { "epoch": 0.7403660369257192, "grad_norm": 0.2197265625, "learning_rate": 0.0009523869014030681, "loss": 2.1396, "step": 27589 }, { "epoch": 0.7403928724774581, "grad_norm": 0.2216796875, "learning_rate": 0.0009523641561860098, "loss": 2.0541, "step": 27590 }, { "epoch": 0.7404197080291971, "grad_norm": 0.2119140625, "learning_rate": 0.0009523414101959684, "loss": 2.1316, "step": 27591 }, { "epoch": 0.740446543580936, "grad_norm": 0.2177734375, "learning_rate": 0.0009523186634329936, "loss": 2.1705, "step": 27592 }, { "epoch": 0.740473379132675, "grad_norm": 0.22265625, "learning_rate": 0.0009522959158971352, "loss": 2.0409, "step": 27593 }, { "epoch": 0.740500214684414, "grad_norm": 0.21875, "learning_rate": 0.0009522731675884432, "loss": 1.992, "step": 27594 }, { "epoch": 0.7405270502361528, "grad_norm": 0.2177734375, "learning_rate": 0.0009522504185069676, "loss": 2.1956, "step": 27595 }, { "epoch": 0.7405538857878918, "grad_norm": 0.21484375, "learning_rate": 0.000952227668652758, "loss": 2.0842, "step": 27596 }, { "epoch": 0.7405807213396307, "grad_norm": 0.21484375, "learning_rate": 0.0009522049180258649, "loss": 2.0653, "step": 27597 }, { "epoch": 0.7406075568913697, "grad_norm": 0.2177734375, "learning_rate": 0.0009521821666263376, "loss": 2.0972, "step": 27598 }, { "epoch": 0.7406343924431086, "grad_norm": 0.2265625, "learning_rate": 0.0009521594144542262, "loss": 2.1092, "step": 27599 }, { "epoch": 0.7406612279948476, "grad_norm": 0.2314453125, "learning_rate": 0.0009521366615095807, "loss": 2.0876, "step": 27600 }, { "epoch": 0.7406880635465866, "grad_norm": 0.216796875, "learning_rate": 0.0009521139077924511, "loss": 2.0734, "step": 27601 }, { "epoch": 0.7407148990983254, "grad_norm": 0.212890625, "learning_rate": 0.000952091153302887, "loss": 2.0653, "step": 27602 }, { "epoch": 0.7407417346500644, "grad_norm": 0.2138671875, "learning_rate": 0.0009520683980409386, "loss": 2.0269, "step": 27603 }, { "epoch": 0.7407685702018033, "grad_norm": 0.2314453125, "learning_rate": 0.0009520456420066555, "loss": 2.1116, "step": 27604 }, { "epoch": 0.7407954057535423, "grad_norm": 0.2236328125, "learning_rate": 0.0009520228852000881, "loss": 2.1368, "step": 27605 }, { "epoch": 0.7408222413052812, "grad_norm": 0.21875, "learning_rate": 0.000952000127621286, "loss": 2.0107, "step": 27606 }, { "epoch": 0.7408490768570202, "grad_norm": 0.2177734375, "learning_rate": 0.000951977369270299, "loss": 2.0244, "step": 27607 }, { "epoch": 0.7408759124087592, "grad_norm": 0.2177734375, "learning_rate": 0.0009519546101471773, "loss": 2.0829, "step": 27608 }, { "epoch": 0.740902747960498, "grad_norm": 0.2333984375, "learning_rate": 0.000951931850251971, "loss": 2.0759, "step": 27609 }, { "epoch": 0.740929583512237, "grad_norm": 0.2275390625, "learning_rate": 0.0009519090895847293, "loss": 2.0692, "step": 27610 }, { "epoch": 0.7409564190639759, "grad_norm": 0.2138671875, "learning_rate": 0.0009518863281455028, "loss": 2.0534, "step": 27611 }, { "epoch": 0.7409832546157149, "grad_norm": 0.2177734375, "learning_rate": 0.0009518635659343414, "loss": 2.107, "step": 27612 }, { "epoch": 0.7410100901674539, "grad_norm": 0.224609375, "learning_rate": 0.0009518408029512948, "loss": 2.0532, "step": 27613 }, { "epoch": 0.7410369257191928, "grad_norm": 0.2216796875, "learning_rate": 0.0009518180391964128, "loss": 2.0189, "step": 27614 }, { "epoch": 0.7410637612709318, "grad_norm": 0.2294921875, "learning_rate": 0.0009517952746697457, "loss": 2.1306, "step": 27615 }, { "epoch": 0.7410905968226706, "grad_norm": 0.216796875, "learning_rate": 0.0009517725093713431, "loss": 2.0076, "step": 27616 }, { "epoch": 0.7411174323744096, "grad_norm": 0.212890625, "learning_rate": 0.0009517497433012554, "loss": 2.0206, "step": 27617 }, { "epoch": 0.7411442679261485, "grad_norm": 0.2080078125, "learning_rate": 0.0009517269764595318, "loss": 2.0784, "step": 27618 }, { "epoch": 0.7411711034778875, "grad_norm": 0.21484375, "learning_rate": 0.0009517042088462229, "loss": 1.9926, "step": 27619 }, { "epoch": 0.7411979390296265, "grad_norm": 0.2177734375, "learning_rate": 0.0009516814404613785, "loss": 2.0971, "step": 27620 }, { "epoch": 0.7412247745813654, "grad_norm": 0.21875, "learning_rate": 0.0009516586713050485, "loss": 2.1021, "step": 27621 }, { "epoch": 0.7412516101331044, "grad_norm": 0.21875, "learning_rate": 0.0009516359013772829, "loss": 2.0541, "step": 27622 }, { "epoch": 0.7412784456848432, "grad_norm": 0.2119140625, "learning_rate": 0.0009516131306781315, "loss": 1.9545, "step": 27623 }, { "epoch": 0.7413052812365822, "grad_norm": 0.2265625, "learning_rate": 0.0009515903592076442, "loss": 2.0236, "step": 27624 }, { "epoch": 0.7413321167883211, "grad_norm": 0.216796875, "learning_rate": 0.0009515675869658714, "loss": 2.0974, "step": 27625 }, { "epoch": 0.7413589523400601, "grad_norm": 0.220703125, "learning_rate": 0.0009515448139528625, "loss": 2.1119, "step": 27626 }, { "epoch": 0.7413857878917991, "grad_norm": 0.2265625, "learning_rate": 0.0009515220401686677, "loss": 2.1277, "step": 27627 }, { "epoch": 0.741412623443538, "grad_norm": 0.2216796875, "learning_rate": 0.0009514992656133371, "loss": 2.0135, "step": 27628 }, { "epoch": 0.741439458995277, "grad_norm": 0.21875, "learning_rate": 0.0009514764902869205, "loss": 2.0315, "step": 27629 }, { "epoch": 0.7414662945470158, "grad_norm": 0.224609375, "learning_rate": 0.0009514537141894678, "loss": 2.0888, "step": 27630 }, { "epoch": 0.7414931300987548, "grad_norm": 0.216796875, "learning_rate": 0.0009514309373210291, "loss": 2.0392, "step": 27631 }, { "epoch": 0.7415199656504938, "grad_norm": 0.216796875, "learning_rate": 0.0009514081596816542, "loss": 2.0046, "step": 27632 }, { "epoch": 0.7415468012022327, "grad_norm": 0.216796875, "learning_rate": 0.0009513853812713933, "loss": 2.0652, "step": 27633 }, { "epoch": 0.7415736367539717, "grad_norm": 0.22265625, "learning_rate": 0.0009513626020902963, "loss": 2.0129, "step": 27634 }, { "epoch": 0.7416004723057106, "grad_norm": 0.2294921875, "learning_rate": 0.0009513398221384131, "loss": 2.119, "step": 27635 }, { "epoch": 0.7416273078574496, "grad_norm": 0.2158203125, "learning_rate": 0.0009513170414157937, "loss": 2.0659, "step": 27636 }, { "epoch": 0.7416541434091884, "grad_norm": 0.2236328125, "learning_rate": 0.000951294259922488, "loss": 2.0699, "step": 27637 }, { "epoch": 0.7416809789609274, "grad_norm": 0.212890625, "learning_rate": 0.000951271477658546, "loss": 2.0607, "step": 27638 }, { "epoch": 0.7417078145126664, "grad_norm": 0.2197265625, "learning_rate": 0.0009512486946240178, "loss": 2.1772, "step": 27639 }, { "epoch": 0.7417346500644053, "grad_norm": 0.2197265625, "learning_rate": 0.0009512259108189533, "loss": 2.092, "step": 27640 }, { "epoch": 0.7417614856161443, "grad_norm": 0.2265625, "learning_rate": 0.0009512031262434024, "loss": 2.0893, "step": 27641 }, { "epoch": 0.7417883211678832, "grad_norm": 0.212890625, "learning_rate": 0.0009511803408974154, "loss": 2.015, "step": 27642 }, { "epoch": 0.7418151567196222, "grad_norm": 0.2177734375, "learning_rate": 0.0009511575547810419, "loss": 2.1234, "step": 27643 }, { "epoch": 0.741841992271361, "grad_norm": 0.2236328125, "learning_rate": 0.0009511347678943321, "loss": 2.0159, "step": 27644 }, { "epoch": 0.7418688278231, "grad_norm": 0.2236328125, "learning_rate": 0.0009511119802373358, "loss": 2.1944, "step": 27645 }, { "epoch": 0.741895663374839, "grad_norm": 0.2470703125, "learning_rate": 0.0009510891918101031, "loss": 2.086, "step": 27646 }, { "epoch": 0.7419224989265779, "grad_norm": 0.216796875, "learning_rate": 0.0009510664026126841, "loss": 2.1912, "step": 27647 }, { "epoch": 0.7419493344783169, "grad_norm": 0.2060546875, "learning_rate": 0.0009510436126451288, "loss": 1.9955, "step": 27648 }, { "epoch": 0.7419761700300558, "grad_norm": 0.21875, "learning_rate": 0.0009510208219074869, "loss": 2.0097, "step": 27649 }, { "epoch": 0.7420030055817948, "grad_norm": 0.2216796875, "learning_rate": 0.0009509980303998086, "loss": 2.1539, "step": 27650 }, { "epoch": 0.7420298411335337, "grad_norm": 0.2255859375, "learning_rate": 0.000950975238122144, "loss": 2.0224, "step": 27651 }, { "epoch": 0.7420566766852726, "grad_norm": 0.21875, "learning_rate": 0.0009509524450745429, "loss": 2.0589, "step": 27652 }, { "epoch": 0.7420835122370116, "grad_norm": 0.2197265625, "learning_rate": 0.0009509296512570555, "loss": 2.059, "step": 27653 }, { "epoch": 0.7421103477887505, "grad_norm": 0.216796875, "learning_rate": 0.0009509068566697317, "loss": 2.098, "step": 27654 }, { "epoch": 0.7421371833404895, "grad_norm": 0.216796875, "learning_rate": 0.0009508840613126214, "loss": 2.0977, "step": 27655 }, { "epoch": 0.7421640188922284, "grad_norm": 0.205078125, "learning_rate": 0.0009508612651857746, "loss": 2.0138, "step": 27656 }, { "epoch": 0.7421908544439674, "grad_norm": 0.2197265625, "learning_rate": 0.0009508384682892417, "loss": 2.0453, "step": 27657 }, { "epoch": 0.7422176899957064, "grad_norm": 0.2119140625, "learning_rate": 0.0009508156706230722, "loss": 2.094, "step": 27658 }, { "epoch": 0.7422445255474452, "grad_norm": 0.224609375, "learning_rate": 0.0009507928721873164, "loss": 2.045, "step": 27659 }, { "epoch": 0.7422713610991842, "grad_norm": 0.2265625, "learning_rate": 0.000950770072982024, "loss": 2.146, "step": 27660 }, { "epoch": 0.7422981966509231, "grad_norm": 0.2255859375, "learning_rate": 0.0009507472730072456, "loss": 2.0748, "step": 27661 }, { "epoch": 0.7423250322026621, "grad_norm": 0.2138671875, "learning_rate": 0.0009507244722630308, "loss": 1.9715, "step": 27662 }, { "epoch": 0.742351867754401, "grad_norm": 0.2177734375, "learning_rate": 0.0009507016707494295, "loss": 2.1296, "step": 27663 }, { "epoch": 0.74237870330614, "grad_norm": 0.2099609375, "learning_rate": 0.0009506788684664921, "loss": 2.0531, "step": 27664 }, { "epoch": 0.742405538857879, "grad_norm": 0.216796875, "learning_rate": 0.0009506560654142684, "loss": 2.0317, "step": 27665 }, { "epoch": 0.7424323744096178, "grad_norm": 0.2138671875, "learning_rate": 0.0009506332615928085, "loss": 2.0616, "step": 27666 }, { "epoch": 0.7424592099613568, "grad_norm": 0.2216796875, "learning_rate": 0.0009506104570021623, "loss": 2.0385, "step": 27667 }, { "epoch": 0.7424860455130957, "grad_norm": 0.2099609375, "learning_rate": 0.0009505876516423801, "loss": 2.0541, "step": 27668 }, { "epoch": 0.7425128810648347, "grad_norm": 0.216796875, "learning_rate": 0.0009505648455135113, "loss": 2.0904, "step": 27669 }, { "epoch": 0.7425397166165736, "grad_norm": 0.2119140625, "learning_rate": 0.0009505420386156068, "loss": 2.033, "step": 27670 }, { "epoch": 0.7425665521683126, "grad_norm": 0.216796875, "learning_rate": 0.0009505192309487159, "loss": 2.0046, "step": 27671 }, { "epoch": 0.7425933877200516, "grad_norm": 0.2109375, "learning_rate": 0.000950496422512889, "loss": 2.0614, "step": 27672 }, { "epoch": 0.7426202232717904, "grad_norm": 0.22265625, "learning_rate": 0.0009504736133081762, "loss": 2.0376, "step": 27673 }, { "epoch": 0.7426470588235294, "grad_norm": 0.212890625, "learning_rate": 0.0009504508033346274, "loss": 2.0101, "step": 27674 }, { "epoch": 0.7426738943752683, "grad_norm": 0.216796875, "learning_rate": 0.0009504279925922926, "loss": 2.0244, "step": 27675 }, { "epoch": 0.7427007299270073, "grad_norm": 0.212890625, "learning_rate": 0.0009504051810812218, "loss": 2.0735, "step": 27676 }, { "epoch": 0.7427275654787462, "grad_norm": 0.2255859375, "learning_rate": 0.0009503823688014652, "loss": 2.038, "step": 27677 }, { "epoch": 0.7427544010304852, "grad_norm": 0.2294921875, "learning_rate": 0.0009503595557530727, "loss": 2.0959, "step": 27678 }, { "epoch": 0.7427812365822242, "grad_norm": 0.21484375, "learning_rate": 0.0009503367419360945, "loss": 2.0691, "step": 27679 }, { "epoch": 0.742808072133963, "grad_norm": 0.21484375, "learning_rate": 0.0009503139273505804, "loss": 1.9696, "step": 27680 }, { "epoch": 0.742834907685702, "grad_norm": 0.2216796875, "learning_rate": 0.0009502911119965809, "loss": 2.0896, "step": 27681 }, { "epoch": 0.7428617432374409, "grad_norm": 0.2177734375, "learning_rate": 0.0009502682958741456, "loss": 2.0936, "step": 27682 }, { "epoch": 0.7428885787891799, "grad_norm": 0.2060546875, "learning_rate": 0.0009502454789833246, "loss": 2.0536, "step": 27683 }, { "epoch": 0.7429154143409189, "grad_norm": 0.2177734375, "learning_rate": 0.0009502226613241681, "loss": 2.0228, "step": 27684 }, { "epoch": 0.7429422498926578, "grad_norm": 0.220703125, "learning_rate": 0.0009501998428967263, "loss": 2.0619, "step": 27685 }, { "epoch": 0.7429690854443968, "grad_norm": 0.2255859375, "learning_rate": 0.0009501770237010488, "loss": 2.0047, "step": 27686 }, { "epoch": 0.7429959209961357, "grad_norm": 0.2197265625, "learning_rate": 0.0009501542037371862, "loss": 2.1149, "step": 27687 }, { "epoch": 0.7430227565478746, "grad_norm": 0.21875, "learning_rate": 0.000950131383005188, "loss": 2.1905, "step": 27688 }, { "epoch": 0.7430495920996135, "grad_norm": 0.23046875, "learning_rate": 0.0009501085615051049, "loss": 2.1742, "step": 27689 }, { "epoch": 0.7430764276513525, "grad_norm": 0.212890625, "learning_rate": 0.0009500857392369864, "loss": 2.0525, "step": 27690 }, { "epoch": 0.7431032632030915, "grad_norm": 0.2177734375, "learning_rate": 0.0009500629162008827, "loss": 2.083, "step": 27691 }, { "epoch": 0.7431300987548304, "grad_norm": 0.21484375, "learning_rate": 0.0009500400923968443, "loss": 2.0695, "step": 27692 }, { "epoch": 0.7431569343065694, "grad_norm": 0.2138671875, "learning_rate": 0.0009500172678249206, "loss": 2.1179, "step": 27693 }, { "epoch": 0.7431837698583083, "grad_norm": 0.2099609375, "learning_rate": 0.0009499944424851621, "loss": 2.0285, "step": 27694 }, { "epoch": 0.7432106054100472, "grad_norm": 0.232421875, "learning_rate": 0.0009499716163776188, "loss": 2.0717, "step": 27695 }, { "epoch": 0.7432374409617861, "grad_norm": 0.22265625, "learning_rate": 0.0009499487895023407, "loss": 2.1078, "step": 27696 }, { "epoch": 0.7432642765135251, "grad_norm": 0.2177734375, "learning_rate": 0.000949925961859378, "loss": 2.1353, "step": 27697 }, { "epoch": 0.7432911120652641, "grad_norm": 0.21484375, "learning_rate": 0.0009499031334487806, "loss": 2.0655, "step": 27698 }, { "epoch": 0.743317947617003, "grad_norm": 0.2099609375, "learning_rate": 0.0009498803042705986, "loss": 2.0646, "step": 27699 }, { "epoch": 0.743344783168742, "grad_norm": 0.2109375, "learning_rate": 0.0009498574743248823, "loss": 2.0625, "step": 27700 }, { "epoch": 0.7433716187204809, "grad_norm": 0.2236328125, "learning_rate": 0.0009498346436116815, "loss": 1.9603, "step": 27701 }, { "epoch": 0.7433984542722198, "grad_norm": 0.2060546875, "learning_rate": 0.0009498118121310464, "loss": 2.1006, "step": 27702 }, { "epoch": 0.7434252898239588, "grad_norm": 0.21484375, "learning_rate": 0.0009497889798830272, "loss": 2.0382, "step": 27703 }, { "epoch": 0.7434521253756977, "grad_norm": 0.2255859375, "learning_rate": 0.0009497661468676738, "loss": 2.0591, "step": 27704 }, { "epoch": 0.7434789609274367, "grad_norm": 0.2216796875, "learning_rate": 0.0009497433130850365, "loss": 1.9345, "step": 27705 }, { "epoch": 0.7435057964791756, "grad_norm": 0.2236328125, "learning_rate": 0.0009497204785351653, "loss": 2.006, "step": 27706 }, { "epoch": 0.7435326320309146, "grad_norm": 0.2197265625, "learning_rate": 0.0009496976432181102, "loss": 2.043, "step": 27707 }, { "epoch": 0.7435594675826535, "grad_norm": 0.212890625, "learning_rate": 0.0009496748071339212, "loss": 2.0881, "step": 27708 }, { "epoch": 0.7435863031343924, "grad_norm": 0.212890625, "learning_rate": 0.0009496519702826486, "loss": 2.045, "step": 27709 }, { "epoch": 0.7436131386861314, "grad_norm": 0.2177734375, "learning_rate": 0.0009496291326643425, "loss": 2.0145, "step": 27710 }, { "epoch": 0.7436399742378703, "grad_norm": 0.220703125, "learning_rate": 0.0009496062942790529, "loss": 2.0715, "step": 27711 }, { "epoch": 0.7436668097896093, "grad_norm": 0.2177734375, "learning_rate": 0.0009495834551268301, "loss": 2.0938, "step": 27712 }, { "epoch": 0.7436936453413482, "grad_norm": 0.2177734375, "learning_rate": 0.000949560615207724, "loss": 2.0896, "step": 27713 }, { "epoch": 0.7437204808930872, "grad_norm": 0.21484375, "learning_rate": 0.0009495377745217846, "loss": 2.0981, "step": 27714 }, { "epoch": 0.7437473164448261, "grad_norm": 0.2138671875, "learning_rate": 0.0009495149330690623, "loss": 2.1, "step": 27715 }, { "epoch": 0.743774151996565, "grad_norm": 0.216796875, "learning_rate": 0.0009494920908496069, "loss": 2.0215, "step": 27716 }, { "epoch": 0.743800987548304, "grad_norm": 0.208984375, "learning_rate": 0.000949469247863469, "loss": 1.9767, "step": 27717 }, { "epoch": 0.7438278231000429, "grad_norm": 0.2265625, "learning_rate": 0.0009494464041106981, "loss": 2.1723, "step": 27718 }, { "epoch": 0.7438546586517819, "grad_norm": 0.21484375, "learning_rate": 0.0009494235595913445, "loss": 2.1023, "step": 27719 }, { "epoch": 0.7438814942035208, "grad_norm": 0.2099609375, "learning_rate": 0.0009494007143054587, "loss": 2.0576, "step": 27720 }, { "epoch": 0.7439083297552598, "grad_norm": 0.21875, "learning_rate": 0.0009493778682530906, "loss": 2.0041, "step": 27721 }, { "epoch": 0.7439351653069987, "grad_norm": 0.2333984375, "learning_rate": 0.00094935502143429, "loss": 2.0988, "step": 27722 }, { "epoch": 0.7439620008587376, "grad_norm": 0.212890625, "learning_rate": 0.0009493321738491073, "loss": 2.0507, "step": 27723 }, { "epoch": 0.7439888364104766, "grad_norm": 0.216796875, "learning_rate": 0.0009493093254975926, "loss": 2.0415, "step": 27724 }, { "epoch": 0.7440156719622155, "grad_norm": 0.216796875, "learning_rate": 0.000949286476379796, "loss": 1.9623, "step": 27725 }, { "epoch": 0.7440425075139545, "grad_norm": 0.2099609375, "learning_rate": 0.0009492636264957676, "loss": 2.0931, "step": 27726 }, { "epoch": 0.7440693430656934, "grad_norm": 0.2158203125, "learning_rate": 0.0009492407758455578, "loss": 2.1065, "step": 27727 }, { "epoch": 0.7440961786174324, "grad_norm": 0.2158203125, "learning_rate": 0.0009492179244292161, "loss": 2.0093, "step": 27728 }, { "epoch": 0.7441230141691714, "grad_norm": 0.21875, "learning_rate": 0.0009491950722467933, "loss": 2.0756, "step": 27729 }, { "epoch": 0.7441498497209103, "grad_norm": 0.212890625, "learning_rate": 0.0009491722192983391, "loss": 2.0601, "step": 27730 }, { "epoch": 0.7441766852726492, "grad_norm": 0.2109375, "learning_rate": 0.0009491493655839038, "loss": 2.0125, "step": 27731 }, { "epoch": 0.7442035208243881, "grad_norm": 0.2099609375, "learning_rate": 0.0009491265111035378, "loss": 2.0361, "step": 27732 }, { "epoch": 0.7442303563761271, "grad_norm": 0.2236328125, "learning_rate": 0.0009491036558572904, "loss": 2.0624, "step": 27733 }, { "epoch": 0.744257191927866, "grad_norm": 0.2109375, "learning_rate": 0.0009490807998452129, "loss": 1.9942, "step": 27734 }, { "epoch": 0.744284027479605, "grad_norm": 0.21484375, "learning_rate": 0.0009490579430673545, "loss": 1.9982, "step": 27735 }, { "epoch": 0.744310863031344, "grad_norm": 0.212890625, "learning_rate": 0.0009490350855237656, "loss": 2.007, "step": 27736 }, { "epoch": 0.7443376985830829, "grad_norm": 0.216796875, "learning_rate": 0.0009490122272144965, "loss": 2.0759, "step": 27737 }, { "epoch": 0.7443645341348218, "grad_norm": 0.2216796875, "learning_rate": 0.0009489893681395973, "loss": 2.0326, "step": 27738 }, { "epoch": 0.7443913696865607, "grad_norm": 0.208984375, "learning_rate": 0.0009489665082991178, "loss": 2.0934, "step": 27739 }, { "epoch": 0.7444182052382997, "grad_norm": 0.2060546875, "learning_rate": 0.0009489436476931089, "loss": 1.9566, "step": 27740 }, { "epoch": 0.7444450407900386, "grad_norm": 0.2060546875, "learning_rate": 0.0009489207863216202, "loss": 1.9468, "step": 27741 }, { "epoch": 0.7444718763417776, "grad_norm": 0.2294921875, "learning_rate": 0.0009488979241847017, "loss": 2.1408, "step": 27742 }, { "epoch": 0.7444987118935166, "grad_norm": 0.2236328125, "learning_rate": 0.0009488750612824039, "loss": 2.1214, "step": 27743 }, { "epoch": 0.7445255474452555, "grad_norm": 0.2138671875, "learning_rate": 0.000948852197614777, "loss": 2.027, "step": 27744 }, { "epoch": 0.7445523829969944, "grad_norm": 0.20703125, "learning_rate": 0.0009488293331818709, "loss": 1.9416, "step": 27745 }, { "epoch": 0.7445792185487333, "grad_norm": 0.2158203125, "learning_rate": 0.0009488064679837359, "loss": 2.1157, "step": 27746 }, { "epoch": 0.7446060541004723, "grad_norm": 0.216796875, "learning_rate": 0.0009487836020204219, "loss": 2.0988, "step": 27747 }, { "epoch": 0.7446328896522112, "grad_norm": 0.2080078125, "learning_rate": 0.0009487607352919797, "loss": 2.0847, "step": 27748 }, { "epoch": 0.7446597252039502, "grad_norm": 0.21875, "learning_rate": 0.0009487378677984588, "loss": 2.0937, "step": 27749 }, { "epoch": 0.7446865607556892, "grad_norm": 0.20703125, "learning_rate": 0.0009487149995399096, "loss": 2.0372, "step": 27750 }, { "epoch": 0.744713396307428, "grad_norm": 0.21875, "learning_rate": 0.0009486921305163824, "loss": 2.1429, "step": 27751 }, { "epoch": 0.744740231859167, "grad_norm": 0.2099609375, "learning_rate": 0.0009486692607279273, "loss": 2.0978, "step": 27752 }, { "epoch": 0.7447670674109059, "grad_norm": 0.2099609375, "learning_rate": 0.0009486463901745942, "loss": 2.0276, "step": 27753 }, { "epoch": 0.7447939029626449, "grad_norm": 0.216796875, "learning_rate": 0.0009486235188564337, "loss": 2.1362, "step": 27754 }, { "epoch": 0.7448207385143839, "grad_norm": 0.201171875, "learning_rate": 0.0009486006467734957, "loss": 1.9637, "step": 27755 }, { "epoch": 0.7448475740661228, "grad_norm": 0.2265625, "learning_rate": 0.0009485777739258305, "loss": 2.0405, "step": 27756 }, { "epoch": 0.7448744096178618, "grad_norm": 0.22265625, "learning_rate": 0.0009485549003134881, "loss": 2.1866, "step": 27757 }, { "epoch": 0.7449012451696007, "grad_norm": 0.2119140625, "learning_rate": 0.0009485320259365188, "loss": 2.0574, "step": 27758 }, { "epoch": 0.7449280807213396, "grad_norm": 0.2099609375, "learning_rate": 0.0009485091507949729, "loss": 2.1207, "step": 27759 }, { "epoch": 0.7449549162730785, "grad_norm": 0.2216796875, "learning_rate": 0.0009484862748889004, "loss": 2.0463, "step": 27760 }, { "epoch": 0.7449817518248175, "grad_norm": 0.21875, "learning_rate": 0.0009484633982183516, "loss": 2.0156, "step": 27761 }, { "epoch": 0.7450085873765565, "grad_norm": 0.2099609375, "learning_rate": 0.0009484405207833763, "loss": 2.0336, "step": 27762 }, { "epoch": 0.7450354229282954, "grad_norm": 0.21484375, "learning_rate": 0.0009484176425840254, "loss": 2.0143, "step": 27763 }, { "epoch": 0.7450622584800344, "grad_norm": 0.2177734375, "learning_rate": 0.0009483947636203486, "loss": 1.9651, "step": 27764 }, { "epoch": 0.7450890940317733, "grad_norm": 0.21875, "learning_rate": 0.0009483718838923961, "loss": 2.076, "step": 27765 }, { "epoch": 0.7451159295835122, "grad_norm": 0.208984375, "learning_rate": 0.0009483490034002182, "loss": 2.0285, "step": 27766 }, { "epoch": 0.7451427651352511, "grad_norm": 0.2080078125, "learning_rate": 0.0009483261221438651, "loss": 1.9912, "step": 27767 }, { "epoch": 0.7451696006869901, "grad_norm": 0.2265625, "learning_rate": 0.0009483032401233869, "loss": 2.0574, "step": 27768 }, { "epoch": 0.7451964362387291, "grad_norm": 0.2041015625, "learning_rate": 0.0009482803573388339, "loss": 1.9928, "step": 27769 }, { "epoch": 0.745223271790468, "grad_norm": 0.208984375, "learning_rate": 0.0009482574737902563, "loss": 2.0056, "step": 27770 }, { "epoch": 0.745250107342207, "grad_norm": 0.2177734375, "learning_rate": 0.0009482345894777042, "loss": 2.0815, "step": 27771 }, { "epoch": 0.7452769428939459, "grad_norm": 0.2109375, "learning_rate": 0.0009482117044012279, "loss": 2.0194, "step": 27772 }, { "epoch": 0.7453037784456849, "grad_norm": 0.2177734375, "learning_rate": 0.0009481888185608776, "loss": 2.0151, "step": 27773 }, { "epoch": 0.7453306139974238, "grad_norm": 0.2138671875, "learning_rate": 0.0009481659319567035, "loss": 2.077, "step": 27774 }, { "epoch": 0.7453574495491627, "grad_norm": 0.2216796875, "learning_rate": 0.0009481430445887557, "loss": 1.9738, "step": 27775 }, { "epoch": 0.7453842851009017, "grad_norm": 0.2099609375, "learning_rate": 0.0009481201564570846, "loss": 2.0193, "step": 27776 }, { "epoch": 0.7454111206526406, "grad_norm": 0.21875, "learning_rate": 0.0009480972675617402, "loss": 2.0597, "step": 27777 }, { "epoch": 0.7454379562043796, "grad_norm": 0.2158203125, "learning_rate": 0.0009480743779027727, "loss": 2.1069, "step": 27778 }, { "epoch": 0.7454647917561185, "grad_norm": 0.2197265625, "learning_rate": 0.0009480514874802326, "loss": 2.0276, "step": 27779 }, { "epoch": 0.7454916273078575, "grad_norm": 0.2197265625, "learning_rate": 0.0009480285962941699, "loss": 2.0248, "step": 27780 }, { "epoch": 0.7455184628595964, "grad_norm": 0.208984375, "learning_rate": 0.0009480057043446347, "loss": 2.0001, "step": 27781 }, { "epoch": 0.7455452984113353, "grad_norm": 0.2197265625, "learning_rate": 0.0009479828116316774, "loss": 2.0346, "step": 27782 }, { "epoch": 0.7455721339630743, "grad_norm": 0.212890625, "learning_rate": 0.0009479599181553484, "loss": 2.1203, "step": 27783 }, { "epoch": 0.7455989695148132, "grad_norm": 0.2119140625, "learning_rate": 0.0009479370239156976, "loss": 2.0652, "step": 27784 }, { "epoch": 0.7456258050665522, "grad_norm": 0.2138671875, "learning_rate": 0.0009479141289127753, "loss": 1.9864, "step": 27785 }, { "epoch": 0.7456526406182911, "grad_norm": 0.2041015625, "learning_rate": 0.0009478912331466317, "loss": 2.0, "step": 27786 }, { "epoch": 0.74567947617003, "grad_norm": 0.21875, "learning_rate": 0.0009478683366173173, "loss": 2.0079, "step": 27787 }, { "epoch": 0.745706311721769, "grad_norm": 0.212890625, "learning_rate": 0.0009478454393248819, "loss": 2.0794, "step": 27788 }, { "epoch": 0.7457331472735079, "grad_norm": 0.21484375, "learning_rate": 0.000947822541269376, "loss": 2.1193, "step": 27789 }, { "epoch": 0.7457599828252469, "grad_norm": 0.2099609375, "learning_rate": 0.0009477996424508498, "loss": 2.0175, "step": 27790 }, { "epoch": 0.7457868183769858, "grad_norm": 0.208984375, "learning_rate": 0.0009477767428693536, "loss": 1.9973, "step": 27791 }, { "epoch": 0.7458136539287248, "grad_norm": 0.2080078125, "learning_rate": 0.0009477538425249374, "loss": 2.0196, "step": 27792 }, { "epoch": 0.7458404894804637, "grad_norm": 0.2158203125, "learning_rate": 0.0009477309414176517, "loss": 2.0636, "step": 27793 }, { "epoch": 0.7458673250322027, "grad_norm": 0.2177734375, "learning_rate": 0.0009477080395475465, "loss": 2.0318, "step": 27794 }, { "epoch": 0.7458941605839416, "grad_norm": 0.205078125, "learning_rate": 0.0009476851369146722, "loss": 2.0579, "step": 27795 }, { "epoch": 0.7459209961356805, "grad_norm": 0.2138671875, "learning_rate": 0.0009476622335190789, "loss": 2.0788, "step": 27796 }, { "epoch": 0.7459478316874195, "grad_norm": 0.2099609375, "learning_rate": 0.0009476393293608172, "loss": 1.9954, "step": 27797 }, { "epoch": 0.7459746672391584, "grad_norm": 0.2099609375, "learning_rate": 0.000947616424439937, "loss": 2.0924, "step": 27798 }, { "epoch": 0.7460015027908974, "grad_norm": 0.20703125, "learning_rate": 0.0009475935187564885, "loss": 2.0176, "step": 27799 }, { "epoch": 0.7460283383426364, "grad_norm": 0.20703125, "learning_rate": 0.0009475706123105221, "loss": 1.9673, "step": 27800 }, { "epoch": 0.7460551738943753, "grad_norm": 0.2080078125, "learning_rate": 0.000947547705102088, "loss": 2.0444, "step": 27801 }, { "epoch": 0.7460820094461142, "grad_norm": 0.2119140625, "learning_rate": 0.0009475247971312366, "loss": 2.0936, "step": 27802 }, { "epoch": 0.7461088449978531, "grad_norm": 0.2138671875, "learning_rate": 0.0009475018883980179, "loss": 2.1104, "step": 27803 }, { "epoch": 0.7461356805495921, "grad_norm": 0.2109375, "learning_rate": 0.0009474789789024826, "loss": 2.0619, "step": 27804 }, { "epoch": 0.746162516101331, "grad_norm": 0.212890625, "learning_rate": 0.0009474560686446805, "loss": 1.9977, "step": 27805 }, { "epoch": 0.74618935165307, "grad_norm": 0.2080078125, "learning_rate": 0.0009474331576246618, "loss": 2.0349, "step": 27806 }, { "epoch": 0.746216187204809, "grad_norm": 0.212890625, "learning_rate": 0.0009474102458424771, "loss": 1.9925, "step": 27807 }, { "epoch": 0.7462430227565479, "grad_norm": 0.2099609375, "learning_rate": 0.0009473873332981765, "loss": 2.0367, "step": 27808 }, { "epoch": 0.7462698583082868, "grad_norm": 0.2216796875, "learning_rate": 0.0009473644199918104, "loss": 2.1124, "step": 27809 }, { "epoch": 0.7462966938600257, "grad_norm": 0.20703125, "learning_rate": 0.0009473415059234291, "loss": 2.0862, "step": 27810 }, { "epoch": 0.7463235294117647, "grad_norm": 0.208984375, "learning_rate": 0.0009473185910930825, "loss": 2.0282, "step": 27811 }, { "epoch": 0.7463503649635036, "grad_norm": 0.2216796875, "learning_rate": 0.0009472956755008212, "loss": 2.0468, "step": 27812 }, { "epoch": 0.7463772005152426, "grad_norm": 0.21875, "learning_rate": 0.0009472727591466952, "loss": 2.0497, "step": 27813 }, { "epoch": 0.7464040360669816, "grad_norm": 0.216796875, "learning_rate": 0.000947249842030755, "loss": 2.0895, "step": 27814 }, { "epoch": 0.7464308716187205, "grad_norm": 0.201171875, "learning_rate": 0.0009472269241530509, "loss": 1.9327, "step": 27815 }, { "epoch": 0.7464577071704595, "grad_norm": 0.20703125, "learning_rate": 0.0009472040055136331, "loss": 2.0156, "step": 27816 }, { "epoch": 0.7464845427221983, "grad_norm": 0.2080078125, "learning_rate": 0.0009471810861125517, "loss": 1.9677, "step": 27817 }, { "epoch": 0.7465113782739373, "grad_norm": 0.205078125, "learning_rate": 0.0009471581659498572, "loss": 1.9459, "step": 27818 }, { "epoch": 0.7465382138256763, "grad_norm": 0.20703125, "learning_rate": 0.0009471352450255998, "loss": 2.1121, "step": 27819 }, { "epoch": 0.7465650493774152, "grad_norm": 0.208984375, "learning_rate": 0.0009471123233398298, "loss": 2.0162, "step": 27820 }, { "epoch": 0.7465918849291542, "grad_norm": 0.212890625, "learning_rate": 0.0009470894008925977, "loss": 2.1072, "step": 27821 }, { "epoch": 0.7466187204808931, "grad_norm": 0.2099609375, "learning_rate": 0.0009470664776839535, "loss": 2.0477, "step": 27822 }, { "epoch": 0.746645556032632, "grad_norm": 0.208984375, "learning_rate": 0.0009470435537139473, "loss": 2.0109, "step": 27823 }, { "epoch": 0.7466723915843709, "grad_norm": 0.2109375, "learning_rate": 0.0009470206289826298, "loss": 2.1228, "step": 27824 }, { "epoch": 0.7466992271361099, "grad_norm": 0.2119140625, "learning_rate": 0.0009469977034900512, "loss": 1.8942, "step": 27825 }, { "epoch": 0.7467260626878489, "grad_norm": 0.212890625, "learning_rate": 0.0009469747772362616, "loss": 2.0, "step": 27826 }, { "epoch": 0.7467528982395878, "grad_norm": 0.2109375, "learning_rate": 0.0009469518502213115, "loss": 2.0066, "step": 27827 }, { "epoch": 0.7467797337913268, "grad_norm": 0.2119140625, "learning_rate": 0.000946928922445251, "loss": 2.0099, "step": 27828 }, { "epoch": 0.7468065693430657, "grad_norm": 0.2099609375, "learning_rate": 0.0009469059939081307, "loss": 1.9883, "step": 27829 }, { "epoch": 0.7468334048948047, "grad_norm": 0.208984375, "learning_rate": 0.0009468830646100007, "loss": 2.0329, "step": 27830 }, { "epoch": 0.7468602404465435, "grad_norm": 0.216796875, "learning_rate": 0.0009468601345509112, "loss": 2.0626, "step": 27831 }, { "epoch": 0.7468870759982825, "grad_norm": 0.220703125, "learning_rate": 0.0009468372037309126, "loss": 2.1259, "step": 27832 }, { "epoch": 0.7469139115500215, "grad_norm": 0.2119140625, "learning_rate": 0.0009468142721500552, "loss": 2.0524, "step": 27833 }, { "epoch": 0.7469407471017604, "grad_norm": 0.212890625, "learning_rate": 0.0009467913398083893, "loss": 2.0242, "step": 27834 }, { "epoch": 0.7469675826534994, "grad_norm": 0.2109375, "learning_rate": 0.0009467684067059654, "loss": 2.0864, "step": 27835 }, { "epoch": 0.7469944182052383, "grad_norm": 0.208984375, "learning_rate": 0.0009467454728428337, "loss": 1.9885, "step": 27836 }, { "epoch": 0.7470212537569773, "grad_norm": 0.208984375, "learning_rate": 0.0009467225382190441, "loss": 2.0645, "step": 27837 }, { "epoch": 0.7470480893087161, "grad_norm": 0.20703125, "learning_rate": 0.0009466996028346476, "loss": 2.0202, "step": 27838 }, { "epoch": 0.7470749248604551, "grad_norm": 0.216796875, "learning_rate": 0.0009466766666896939, "loss": 2.1212, "step": 27839 }, { "epoch": 0.7471017604121941, "grad_norm": 0.2197265625, "learning_rate": 0.0009466537297842338, "loss": 2.0205, "step": 27840 }, { "epoch": 0.747128595963933, "grad_norm": 0.2158203125, "learning_rate": 0.0009466307921183174, "loss": 2.092, "step": 27841 }, { "epoch": 0.747155431515672, "grad_norm": 0.216796875, "learning_rate": 0.0009466078536919947, "loss": 2.0797, "step": 27842 }, { "epoch": 0.7471822670674109, "grad_norm": 0.21484375, "learning_rate": 0.0009465849145053168, "loss": 1.9972, "step": 27843 }, { "epoch": 0.7472091026191499, "grad_norm": 0.2109375, "learning_rate": 0.0009465619745583332, "loss": 2.0893, "step": 27844 }, { "epoch": 0.7472359381708888, "grad_norm": 0.2080078125, "learning_rate": 0.0009465390338510948, "loss": 2.0805, "step": 27845 }, { "epoch": 0.7472627737226277, "grad_norm": 0.203125, "learning_rate": 0.0009465160923836517, "loss": 1.9922, "step": 27846 }, { "epoch": 0.7472896092743667, "grad_norm": 0.2119140625, "learning_rate": 0.0009464931501560542, "loss": 2.0063, "step": 27847 }, { "epoch": 0.7473164448261056, "grad_norm": 0.205078125, "learning_rate": 0.0009464702071683527, "loss": 1.9633, "step": 27848 }, { "epoch": 0.7473432803778446, "grad_norm": 0.21875, "learning_rate": 0.0009464472634205974, "loss": 2.0796, "step": 27849 }, { "epoch": 0.7473701159295835, "grad_norm": 0.2265625, "learning_rate": 0.0009464243189128388, "loss": 2.0657, "step": 27850 }, { "epoch": 0.7473969514813225, "grad_norm": 0.2080078125, "learning_rate": 0.0009464013736451271, "loss": 2.0942, "step": 27851 }, { "epoch": 0.7474237870330614, "grad_norm": 0.2138671875, "learning_rate": 0.0009463784276175127, "loss": 2.0639, "step": 27852 }, { "epoch": 0.7474506225848003, "grad_norm": 0.2177734375, "learning_rate": 0.000946355480830046, "loss": 2.1009, "step": 27853 }, { "epoch": 0.7474774581365393, "grad_norm": 0.205078125, "learning_rate": 0.0009463325332827772, "loss": 1.947, "step": 27854 }, { "epoch": 0.7475042936882782, "grad_norm": 0.2119140625, "learning_rate": 0.0009463095849757568, "loss": 2.088, "step": 27855 }, { "epoch": 0.7475311292400172, "grad_norm": 0.21875, "learning_rate": 0.0009462866359090349, "loss": 1.9915, "step": 27856 }, { "epoch": 0.7475579647917561, "grad_norm": 0.216796875, "learning_rate": 0.0009462636860826623, "loss": 2.0909, "step": 27857 }, { "epoch": 0.7475848003434951, "grad_norm": 0.2158203125, "learning_rate": 0.0009462407354966889, "loss": 1.9401, "step": 27858 }, { "epoch": 0.747611635895234, "grad_norm": 0.22265625, "learning_rate": 0.0009462177841511652, "loss": 2.0917, "step": 27859 }, { "epoch": 0.7476384714469729, "grad_norm": 0.2177734375, "learning_rate": 0.0009461948320461414, "loss": 2.0649, "step": 27860 }, { "epoch": 0.7476653069987119, "grad_norm": 0.208984375, "learning_rate": 0.0009461718791816681, "loss": 1.9762, "step": 27861 }, { "epoch": 0.7476921425504508, "grad_norm": 0.2099609375, "learning_rate": 0.0009461489255577955, "loss": 2.0091, "step": 27862 }, { "epoch": 0.7477189781021898, "grad_norm": 0.2099609375, "learning_rate": 0.0009461259711745742, "loss": 2.0399, "step": 27863 }, { "epoch": 0.7477458136539287, "grad_norm": 0.205078125, "learning_rate": 0.0009461030160320542, "loss": 2.0123, "step": 27864 }, { "epoch": 0.7477726492056677, "grad_norm": 0.2138671875, "learning_rate": 0.0009460800601302858, "loss": 1.9211, "step": 27865 }, { "epoch": 0.7477994847574067, "grad_norm": 0.2138671875, "learning_rate": 0.0009460571034693198, "loss": 2.0274, "step": 27866 }, { "epoch": 0.7478263203091455, "grad_norm": 0.2138671875, "learning_rate": 0.0009460341460492065, "loss": 2.0737, "step": 27867 }, { "epoch": 0.7478531558608845, "grad_norm": 0.2119140625, "learning_rate": 0.0009460111878699957, "loss": 2.0334, "step": 27868 }, { "epoch": 0.7478799914126234, "grad_norm": 0.2060546875, "learning_rate": 0.0009459882289317383, "loss": 2.0154, "step": 27869 }, { "epoch": 0.7479068269643624, "grad_norm": 0.212890625, "learning_rate": 0.0009459652692344845, "loss": 2.1426, "step": 27870 }, { "epoch": 0.7479336625161014, "grad_norm": 0.2138671875, "learning_rate": 0.0009459423087782847, "loss": 2.0561, "step": 27871 }, { "epoch": 0.7479604980678403, "grad_norm": 0.21484375, "learning_rate": 0.0009459193475631895, "loss": 1.9728, "step": 27872 }, { "epoch": 0.7479873336195793, "grad_norm": 0.2138671875, "learning_rate": 0.0009458963855892486, "loss": 2.0323, "step": 27873 }, { "epoch": 0.7480141691713181, "grad_norm": 0.2060546875, "learning_rate": 0.000945873422856513, "loss": 2.0436, "step": 27874 }, { "epoch": 0.7480410047230571, "grad_norm": 0.2177734375, "learning_rate": 0.0009458504593650329, "loss": 1.9104, "step": 27875 }, { "epoch": 0.748067840274796, "grad_norm": 0.2080078125, "learning_rate": 0.0009458274951148587, "loss": 2.0071, "step": 27876 }, { "epoch": 0.748094675826535, "grad_norm": 0.21484375, "learning_rate": 0.0009458045301060406, "loss": 2.0306, "step": 27877 }, { "epoch": 0.748121511378274, "grad_norm": 0.205078125, "learning_rate": 0.0009457815643386291, "loss": 1.9555, "step": 27878 }, { "epoch": 0.7481483469300129, "grad_norm": 0.212890625, "learning_rate": 0.0009457585978126745, "loss": 2.2105, "step": 27879 }, { "epoch": 0.7481751824817519, "grad_norm": 0.205078125, "learning_rate": 0.0009457356305282275, "loss": 2.0068, "step": 27880 }, { "epoch": 0.7482020180334907, "grad_norm": 0.2255859375, "learning_rate": 0.0009457126624853382, "loss": 1.9795, "step": 27881 }, { "epoch": 0.7482288535852297, "grad_norm": 0.2138671875, "learning_rate": 0.0009456896936840567, "loss": 1.9985, "step": 27882 }, { "epoch": 0.7482556891369686, "grad_norm": 0.2109375, "learning_rate": 0.0009456667241244341, "loss": 2.1357, "step": 27883 }, { "epoch": 0.7482825246887076, "grad_norm": 0.212890625, "learning_rate": 0.0009456437538065203, "loss": 2.0666, "step": 27884 }, { "epoch": 0.7483093602404466, "grad_norm": 0.212890625, "learning_rate": 0.0009456207827303656, "loss": 1.9299, "step": 27885 }, { "epoch": 0.7483361957921855, "grad_norm": 0.2041015625, "learning_rate": 0.0009455978108960209, "loss": 1.959, "step": 27886 }, { "epoch": 0.7483630313439245, "grad_norm": 0.20703125, "learning_rate": 0.000945574838303536, "loss": 2.0082, "step": 27887 }, { "epoch": 0.7483898668956633, "grad_norm": 0.2158203125, "learning_rate": 0.0009455518649529617, "loss": 1.994, "step": 27888 }, { "epoch": 0.7484167024474023, "grad_norm": 0.208984375, "learning_rate": 0.0009455288908443482, "loss": 1.9864, "step": 27889 }, { "epoch": 0.7484435379991413, "grad_norm": 0.228515625, "learning_rate": 0.000945505915977746, "loss": 2.1222, "step": 27890 }, { "epoch": 0.7484703735508802, "grad_norm": 0.2138671875, "learning_rate": 0.0009454829403532056, "loss": 2.1257, "step": 27891 }, { "epoch": 0.7484972091026192, "grad_norm": 0.2080078125, "learning_rate": 0.0009454599639707772, "loss": 2.0335, "step": 27892 }, { "epoch": 0.7485240446543581, "grad_norm": 0.2080078125, "learning_rate": 0.0009454369868305111, "loss": 2.0972, "step": 27893 }, { "epoch": 0.7485508802060971, "grad_norm": 0.2109375, "learning_rate": 0.0009454140089324581, "loss": 2.1007, "step": 27894 }, { "epoch": 0.7485777157578359, "grad_norm": 0.2109375, "learning_rate": 0.0009453910302766683, "loss": 2.0815, "step": 27895 }, { "epoch": 0.7486045513095749, "grad_norm": 0.205078125, "learning_rate": 0.0009453680508631921, "loss": 1.979, "step": 27896 }, { "epoch": 0.7486313868613139, "grad_norm": 0.21484375, "learning_rate": 0.0009453450706920802, "loss": 2.0629, "step": 27897 }, { "epoch": 0.7486582224130528, "grad_norm": 0.20703125, "learning_rate": 0.0009453220897633826, "loss": 2.0715, "step": 27898 }, { "epoch": 0.7486850579647918, "grad_norm": 0.2109375, "learning_rate": 0.0009452991080771501, "loss": 2.1101, "step": 27899 }, { "epoch": 0.7487118935165307, "grad_norm": 0.21875, "learning_rate": 0.0009452761256334329, "loss": 2.0986, "step": 27900 }, { "epoch": 0.7487387290682697, "grad_norm": 0.212890625, "learning_rate": 0.0009452531424322815, "loss": 2.0548, "step": 27901 }, { "epoch": 0.7487655646200085, "grad_norm": 0.2060546875, "learning_rate": 0.0009452301584737463, "loss": 2.0155, "step": 27902 }, { "epoch": 0.7487924001717475, "grad_norm": 0.208984375, "learning_rate": 0.0009452071737578778, "loss": 1.9716, "step": 27903 }, { "epoch": 0.7488192357234865, "grad_norm": 0.2158203125, "learning_rate": 0.000945184188284726, "loss": 1.9782, "step": 27904 }, { "epoch": 0.7488460712752254, "grad_norm": 0.21484375, "learning_rate": 0.0009451612020543419, "loss": 2.1018, "step": 27905 }, { "epoch": 0.7488729068269644, "grad_norm": 0.2080078125, "learning_rate": 0.0009451382150667756, "loss": 2.0815, "step": 27906 }, { "epoch": 0.7488997423787033, "grad_norm": 0.2138671875, "learning_rate": 0.0009451152273220776, "loss": 2.0894, "step": 27907 }, { "epoch": 0.7489265779304423, "grad_norm": 0.21875, "learning_rate": 0.0009450922388202983, "loss": 2.1897, "step": 27908 }, { "epoch": 0.7489534134821811, "grad_norm": 0.203125, "learning_rate": 0.0009450692495614881, "loss": 1.9482, "step": 27909 }, { "epoch": 0.7489802490339201, "grad_norm": 0.220703125, "learning_rate": 0.0009450462595456975, "loss": 1.9788, "step": 27910 }, { "epoch": 0.7490070845856591, "grad_norm": 0.2041015625, "learning_rate": 0.000945023268772977, "loss": 1.9508, "step": 27911 }, { "epoch": 0.749033920137398, "grad_norm": 0.208984375, "learning_rate": 0.0009450002772433768, "loss": 2.0181, "step": 27912 }, { "epoch": 0.749060755689137, "grad_norm": 0.2060546875, "learning_rate": 0.0009449772849569476, "loss": 2.0103, "step": 27913 }, { "epoch": 0.7490875912408759, "grad_norm": 0.2099609375, "learning_rate": 0.0009449542919137399, "loss": 2.0845, "step": 27914 }, { "epoch": 0.7491144267926149, "grad_norm": 0.2109375, "learning_rate": 0.0009449312981138037, "loss": 1.9642, "step": 27915 }, { "epoch": 0.7491412623443539, "grad_norm": 0.22265625, "learning_rate": 0.00094490830355719, "loss": 2.0404, "step": 27916 }, { "epoch": 0.7491680978960927, "grad_norm": 0.208984375, "learning_rate": 0.0009448853082439487, "loss": 1.9651, "step": 27917 }, { "epoch": 0.7491949334478317, "grad_norm": 0.2138671875, "learning_rate": 0.0009448623121741306, "loss": 1.9614, "step": 27918 }, { "epoch": 0.7492217689995706, "grad_norm": 0.2109375, "learning_rate": 0.0009448393153477862, "loss": 2.0286, "step": 27919 }, { "epoch": 0.7492486045513096, "grad_norm": 0.2109375, "learning_rate": 0.0009448163177649655, "loss": 2.0413, "step": 27920 }, { "epoch": 0.7492754401030485, "grad_norm": 0.220703125, "learning_rate": 0.0009447933194257194, "loss": 2.0752, "step": 27921 }, { "epoch": 0.7493022756547875, "grad_norm": 0.2060546875, "learning_rate": 0.0009447703203300981, "loss": 1.9628, "step": 27922 }, { "epoch": 0.7493291112065265, "grad_norm": 0.20703125, "learning_rate": 0.0009447473204781524, "loss": 1.9927, "step": 27923 }, { "epoch": 0.7493559467582653, "grad_norm": 0.2109375, "learning_rate": 0.0009447243198699323, "loss": 2.0762, "step": 27924 }, { "epoch": 0.7493827823100043, "grad_norm": 0.205078125, "learning_rate": 0.0009447013185054884, "loss": 2.1244, "step": 27925 }, { "epoch": 0.7494096178617432, "grad_norm": 0.212890625, "learning_rate": 0.0009446783163848713, "loss": 2.0666, "step": 27926 }, { "epoch": 0.7494364534134822, "grad_norm": 0.1982421875, "learning_rate": 0.0009446553135081314, "loss": 1.9345, "step": 27927 }, { "epoch": 0.7494632889652211, "grad_norm": 0.201171875, "learning_rate": 0.0009446323098753191, "loss": 2.0941, "step": 27928 }, { "epoch": 0.7494901245169601, "grad_norm": 0.205078125, "learning_rate": 0.0009446093054864849, "loss": 2.0003, "step": 27929 }, { "epoch": 0.7495169600686991, "grad_norm": 0.208984375, "learning_rate": 0.0009445863003416793, "loss": 1.987, "step": 27930 }, { "epoch": 0.7495437956204379, "grad_norm": 0.2109375, "learning_rate": 0.0009445632944409529, "loss": 2.0314, "step": 27931 }, { "epoch": 0.7495706311721769, "grad_norm": 0.21484375, "learning_rate": 0.0009445402877843557, "loss": 2.0916, "step": 27932 }, { "epoch": 0.7495974667239158, "grad_norm": 0.2138671875, "learning_rate": 0.0009445172803719386, "loss": 2.0401, "step": 27933 }, { "epoch": 0.7496243022756548, "grad_norm": 0.2060546875, "learning_rate": 0.0009444942722037521, "loss": 2.0271, "step": 27934 }, { "epoch": 0.7496511378273937, "grad_norm": 0.2099609375, "learning_rate": 0.0009444712632798464, "loss": 2.034, "step": 27935 }, { "epoch": 0.7496779733791327, "grad_norm": 0.216796875, "learning_rate": 0.0009444482536002722, "loss": 2.0016, "step": 27936 }, { "epoch": 0.7497048089308717, "grad_norm": 0.203125, "learning_rate": 0.0009444252431650797, "loss": 1.9548, "step": 27937 }, { "epoch": 0.7497316444826105, "grad_norm": 0.21875, "learning_rate": 0.0009444022319743194, "loss": 2.0361, "step": 27938 }, { "epoch": 0.7497584800343495, "grad_norm": 0.19921875, "learning_rate": 0.0009443792200280423, "loss": 1.9246, "step": 27939 }, { "epoch": 0.7497853155860884, "grad_norm": 0.208984375, "learning_rate": 0.0009443562073262981, "loss": 1.9539, "step": 27940 }, { "epoch": 0.7498121511378274, "grad_norm": 0.2099609375, "learning_rate": 0.0009443331938691381, "loss": 1.9847, "step": 27941 }, { "epoch": 0.7498389866895664, "grad_norm": 0.2109375, "learning_rate": 0.0009443101796566121, "loss": 1.9391, "step": 27942 }, { "epoch": 0.7498658222413053, "grad_norm": 0.208984375, "learning_rate": 0.0009442871646887709, "loss": 2.0379, "step": 27943 }, { "epoch": 0.7498926577930443, "grad_norm": 0.19921875, "learning_rate": 0.0009442641489656651, "loss": 2.0164, "step": 27944 }, { "epoch": 0.7499194933447831, "grad_norm": 0.2138671875, "learning_rate": 0.0009442411324873451, "loss": 2.0382, "step": 27945 }, { "epoch": 0.7499463288965221, "grad_norm": 0.2099609375, "learning_rate": 0.0009442181152538611, "loss": 2.0684, "step": 27946 }, { "epoch": 0.749973164448261, "grad_norm": 0.21875, "learning_rate": 0.0009441950972652638, "loss": 2.1269, "step": 27947 }, { "epoch": 0.75, "grad_norm": 0.205078125, "learning_rate": 0.000944172078521604, "loss": 2.0275, "step": 27948 }, { "epoch": 0.750026835551739, "grad_norm": 0.203125, "learning_rate": 0.0009441490590229316, "loss": 2.0114, "step": 27949 }, { "epoch": 0.7500536711034779, "grad_norm": 0.2138671875, "learning_rate": 0.0009441260387692977, "loss": 2.0724, "step": 27950 }, { "epoch": 0.7500805066552169, "grad_norm": 0.2109375, "learning_rate": 0.0009441030177607524, "loss": 1.9942, "step": 27951 }, { "epoch": 0.7501073422069557, "grad_norm": 0.21484375, "learning_rate": 0.0009440799959973461, "loss": 2.1369, "step": 27952 }, { "epoch": 0.7501341777586947, "grad_norm": 0.2119140625, "learning_rate": 0.0009440569734791299, "loss": 2.0136, "step": 27953 }, { "epoch": 0.7501610133104336, "grad_norm": 0.2099609375, "learning_rate": 0.0009440339502061537, "loss": 1.9722, "step": 27954 }, { "epoch": 0.7501878488621726, "grad_norm": 0.2060546875, "learning_rate": 0.0009440109261784682, "loss": 2.0038, "step": 27955 }, { "epoch": 0.7502146844139116, "grad_norm": 0.2041015625, "learning_rate": 0.0009439879013961241, "loss": 2.0677, "step": 27956 }, { "epoch": 0.7502415199656505, "grad_norm": 0.20703125, "learning_rate": 0.0009439648758591715, "loss": 2.031, "step": 27957 }, { "epoch": 0.7502683555173895, "grad_norm": 0.2109375, "learning_rate": 0.0009439418495676615, "loss": 1.9474, "step": 27958 }, { "epoch": 0.7502951910691283, "grad_norm": 0.2138671875, "learning_rate": 0.000943918822521644, "loss": 1.992, "step": 27959 }, { "epoch": 0.7503220266208673, "grad_norm": 0.203125, "learning_rate": 0.0009438957947211698, "loss": 2.004, "step": 27960 }, { "epoch": 0.7503488621726063, "grad_norm": 0.21875, "learning_rate": 0.0009438727661662895, "loss": 2.053, "step": 27961 }, { "epoch": 0.7503756977243452, "grad_norm": 0.2158203125, "learning_rate": 0.0009438497368570535, "loss": 2.0274, "step": 27962 }, { "epoch": 0.7504025332760842, "grad_norm": 0.2158203125, "learning_rate": 0.0009438267067935123, "loss": 2.0385, "step": 27963 }, { "epoch": 0.7504293688278231, "grad_norm": 0.205078125, "learning_rate": 0.0009438036759757165, "loss": 1.9912, "step": 27964 }, { "epoch": 0.7504562043795621, "grad_norm": 0.2138671875, "learning_rate": 0.0009437806444037167, "loss": 2.129, "step": 27965 }, { "epoch": 0.7504830399313009, "grad_norm": 0.20703125, "learning_rate": 0.000943757612077563, "loss": 2.0182, "step": 27966 }, { "epoch": 0.7505098754830399, "grad_norm": 0.2080078125, "learning_rate": 0.0009437345789973065, "loss": 2.0491, "step": 27967 }, { "epoch": 0.7505367110347789, "grad_norm": 0.2021484375, "learning_rate": 0.0009437115451629974, "loss": 2.0092, "step": 27968 }, { "epoch": 0.7505635465865178, "grad_norm": 0.2099609375, "learning_rate": 0.0009436885105746862, "loss": 2.0627, "step": 27969 }, { "epoch": 0.7505903821382568, "grad_norm": 0.2177734375, "learning_rate": 0.0009436654752324237, "loss": 2.0411, "step": 27970 }, { "epoch": 0.7506172176899957, "grad_norm": 0.2177734375, "learning_rate": 0.0009436424391362601, "loss": 2.082, "step": 27971 }, { "epoch": 0.7506440532417347, "grad_norm": 0.2109375, "learning_rate": 0.0009436194022862461, "loss": 2.0145, "step": 27972 }, { "epoch": 0.7506708887934735, "grad_norm": 0.2109375, "learning_rate": 0.0009435963646824323, "loss": 2.0224, "step": 27973 }, { "epoch": 0.7506977243452125, "grad_norm": 0.205078125, "learning_rate": 0.0009435733263248691, "loss": 2.0439, "step": 27974 }, { "epoch": 0.7507245598969515, "grad_norm": 0.203125, "learning_rate": 0.0009435502872136071, "loss": 2.0663, "step": 27975 }, { "epoch": 0.7507513954486904, "grad_norm": 0.248046875, "learning_rate": 0.000943527247348697, "loss": 2.0315, "step": 27976 }, { "epoch": 0.7507782310004294, "grad_norm": 0.2080078125, "learning_rate": 0.000943504206730189, "loss": 1.965, "step": 27977 }, { "epoch": 0.7508050665521683, "grad_norm": 0.20703125, "learning_rate": 0.0009434811653581339, "loss": 2.1048, "step": 27978 }, { "epoch": 0.7508319021039073, "grad_norm": 0.205078125, "learning_rate": 0.000943458123232582, "loss": 2.0706, "step": 27979 }, { "epoch": 0.7508587376556461, "grad_norm": 0.2109375, "learning_rate": 0.0009434350803535843, "loss": 2.169, "step": 27980 }, { "epoch": 0.7508855732073851, "grad_norm": 0.20703125, "learning_rate": 0.000943412036721191, "loss": 1.9777, "step": 27981 }, { "epoch": 0.7509124087591241, "grad_norm": 0.20703125, "learning_rate": 0.0009433889923354526, "loss": 1.9739, "step": 27982 }, { "epoch": 0.750939244310863, "grad_norm": 0.20703125, "learning_rate": 0.00094336594719642, "loss": 1.9865, "step": 27983 }, { "epoch": 0.750966079862602, "grad_norm": 0.21484375, "learning_rate": 0.0009433429013041434, "loss": 1.9416, "step": 27984 }, { "epoch": 0.7509929154143409, "grad_norm": 0.205078125, "learning_rate": 0.0009433198546586733, "loss": 1.9647, "step": 27985 }, { "epoch": 0.7510197509660799, "grad_norm": 0.220703125, "learning_rate": 0.0009432968072600607, "loss": 2.0586, "step": 27986 }, { "epoch": 0.7510465865178189, "grad_norm": 0.20703125, "learning_rate": 0.0009432737591083559, "loss": 2.0583, "step": 27987 }, { "epoch": 0.7510734220695577, "grad_norm": 0.208984375, "learning_rate": 0.0009432507102036093, "loss": 2.0767, "step": 27988 }, { "epoch": 0.7511002576212967, "grad_norm": 0.20703125, "learning_rate": 0.0009432276605458718, "loss": 2.006, "step": 27989 }, { "epoch": 0.7511270931730356, "grad_norm": 0.21484375, "learning_rate": 0.0009432046101351938, "loss": 2.0013, "step": 27990 }, { "epoch": 0.7511539287247746, "grad_norm": 0.2138671875, "learning_rate": 0.0009431815589716258, "loss": 2.049, "step": 27991 }, { "epoch": 0.7511807642765135, "grad_norm": 0.2060546875, "learning_rate": 0.0009431585070552183, "loss": 1.976, "step": 27992 }, { "epoch": 0.7512075998282525, "grad_norm": 0.2041015625, "learning_rate": 0.0009431354543860221, "loss": 2.0124, "step": 27993 }, { "epoch": 0.7512344353799915, "grad_norm": 0.2158203125, "learning_rate": 0.0009431124009640877, "loss": 2.0367, "step": 27994 }, { "epoch": 0.7512612709317303, "grad_norm": 0.20703125, "learning_rate": 0.0009430893467894656, "loss": 1.99, "step": 27995 }, { "epoch": 0.7512881064834693, "grad_norm": 0.2099609375, "learning_rate": 0.0009430662918622063, "loss": 2.0389, "step": 27996 }, { "epoch": 0.7513149420352082, "grad_norm": 0.2099609375, "learning_rate": 0.0009430432361823607, "loss": 1.9691, "step": 27997 }, { "epoch": 0.7513417775869472, "grad_norm": 0.20703125, "learning_rate": 0.000943020179749979, "loss": 2.0761, "step": 27998 }, { "epoch": 0.7513686131386861, "grad_norm": 0.20703125, "learning_rate": 0.000942997122565112, "loss": 2.0181, "step": 27999 }, { "epoch": 0.7513954486904251, "grad_norm": 0.208984375, "learning_rate": 0.0009429740646278102, "loss": 1.9883, "step": 28000 }, { "epoch": 0.7514222842421641, "grad_norm": 0.2119140625, "learning_rate": 0.0009429510059381245, "loss": 1.9917, "step": 28001 }, { "epoch": 0.7514491197939029, "grad_norm": 0.2236328125, "learning_rate": 0.0009429279464961047, "loss": 2.0404, "step": 28002 }, { "epoch": 0.7514759553456419, "grad_norm": 0.2099609375, "learning_rate": 0.0009429048863018022, "loss": 2.0305, "step": 28003 }, { "epoch": 0.7515027908973808, "grad_norm": 0.2158203125, "learning_rate": 0.0009428818253552672, "loss": 2.0235, "step": 28004 }, { "epoch": 0.7515296264491198, "grad_norm": 0.2060546875, "learning_rate": 0.0009428587636565502, "loss": 1.9534, "step": 28005 }, { "epoch": 0.7515564620008587, "grad_norm": 0.2109375, "learning_rate": 0.0009428357012057022, "loss": 2.047, "step": 28006 }, { "epoch": 0.7515832975525977, "grad_norm": 0.2080078125, "learning_rate": 0.0009428126380027733, "loss": 2.0913, "step": 28007 }, { "epoch": 0.7516101331043367, "grad_norm": 0.2099609375, "learning_rate": 0.0009427895740478144, "loss": 2.0408, "step": 28008 }, { "epoch": 0.7516369686560755, "grad_norm": 0.208984375, "learning_rate": 0.0009427665093408762, "loss": 1.9651, "step": 28009 }, { "epoch": 0.7516638042078145, "grad_norm": 0.2109375, "learning_rate": 0.0009427434438820087, "loss": 2.1108, "step": 28010 }, { "epoch": 0.7516906397595534, "grad_norm": 0.20703125, "learning_rate": 0.0009427203776712634, "loss": 2.0605, "step": 28011 }, { "epoch": 0.7517174753112924, "grad_norm": 0.203125, "learning_rate": 0.0009426973107086903, "loss": 1.9355, "step": 28012 }, { "epoch": 0.7517443108630314, "grad_norm": 0.20703125, "learning_rate": 0.0009426742429943398, "loss": 2.0702, "step": 28013 }, { "epoch": 0.7517711464147703, "grad_norm": 0.203125, "learning_rate": 0.0009426511745282632, "loss": 1.9747, "step": 28014 }, { "epoch": 0.7517979819665093, "grad_norm": 0.208984375, "learning_rate": 0.0009426281053105107, "loss": 2.0188, "step": 28015 }, { "epoch": 0.7518248175182481, "grad_norm": 0.2158203125, "learning_rate": 0.0009426050353411326, "loss": 2.0974, "step": 28016 }, { "epoch": 0.7518516530699871, "grad_norm": 0.2099609375, "learning_rate": 0.0009425819646201801, "loss": 1.9891, "step": 28017 }, { "epoch": 0.751878488621726, "grad_norm": 0.2001953125, "learning_rate": 0.0009425588931477037, "loss": 1.9819, "step": 28018 }, { "epoch": 0.751905324173465, "grad_norm": 0.2080078125, "learning_rate": 0.0009425358209237537, "loss": 2.0008, "step": 28019 }, { "epoch": 0.751932159725204, "grad_norm": 0.201171875, "learning_rate": 0.0009425127479483808, "loss": 2.0401, "step": 28020 }, { "epoch": 0.7519589952769429, "grad_norm": 0.2080078125, "learning_rate": 0.0009424896742216358, "loss": 2.0075, "step": 28021 }, { "epoch": 0.7519858308286819, "grad_norm": 0.2109375, "learning_rate": 0.0009424665997435692, "loss": 1.9984, "step": 28022 }, { "epoch": 0.7520126663804207, "grad_norm": 0.2177734375, "learning_rate": 0.0009424435245142317, "loss": 2.0314, "step": 28023 }, { "epoch": 0.7520395019321597, "grad_norm": 0.2109375, "learning_rate": 0.0009424204485336739, "loss": 1.9767, "step": 28024 }, { "epoch": 0.7520663374838986, "grad_norm": 0.20703125, "learning_rate": 0.0009423973718019462, "loss": 1.9632, "step": 28025 }, { "epoch": 0.7520931730356376, "grad_norm": 0.21875, "learning_rate": 0.0009423742943190995, "loss": 2.0072, "step": 28026 }, { "epoch": 0.7521200085873766, "grad_norm": 0.2177734375, "learning_rate": 0.0009423512160851843, "loss": 2.1079, "step": 28027 }, { "epoch": 0.7521468441391155, "grad_norm": 0.208984375, "learning_rate": 0.0009423281371002513, "loss": 2.0074, "step": 28028 }, { "epoch": 0.7521736796908545, "grad_norm": 0.203125, "learning_rate": 0.0009423050573643511, "loss": 1.9946, "step": 28029 }, { "epoch": 0.7522005152425933, "grad_norm": 0.2080078125, "learning_rate": 0.0009422819768775343, "loss": 2.0649, "step": 28030 }, { "epoch": 0.7522273507943323, "grad_norm": 0.2060546875, "learning_rate": 0.0009422588956398514, "loss": 2.1089, "step": 28031 }, { "epoch": 0.7522541863460713, "grad_norm": 0.203125, "learning_rate": 0.0009422358136513534, "loss": 1.9288, "step": 28032 }, { "epoch": 0.7522810218978102, "grad_norm": 0.208984375, "learning_rate": 0.0009422127309120905, "loss": 2.0207, "step": 28033 }, { "epoch": 0.7523078574495492, "grad_norm": 0.2060546875, "learning_rate": 0.0009421896474221136, "loss": 2.0282, "step": 28034 }, { "epoch": 0.7523346930012881, "grad_norm": 0.2041015625, "learning_rate": 0.0009421665631814734, "loss": 2.0315, "step": 28035 }, { "epoch": 0.7523615285530271, "grad_norm": 0.2099609375, "learning_rate": 0.0009421434781902203, "loss": 2.0137, "step": 28036 }, { "epoch": 0.752388364104766, "grad_norm": 0.2109375, "learning_rate": 0.0009421203924484052, "loss": 2.0296, "step": 28037 }, { "epoch": 0.7524151996565049, "grad_norm": 0.208984375, "learning_rate": 0.0009420973059560784, "loss": 1.9888, "step": 28038 }, { "epoch": 0.7524420352082439, "grad_norm": 0.2041015625, "learning_rate": 0.000942074218713291, "loss": 2.0722, "step": 28039 }, { "epoch": 0.7524688707599828, "grad_norm": 0.2216796875, "learning_rate": 0.0009420511307200933, "loss": 2.0089, "step": 28040 }, { "epoch": 0.7524957063117218, "grad_norm": 0.212890625, "learning_rate": 0.000942028041976536, "loss": 2.0085, "step": 28041 }, { "epoch": 0.7525225418634607, "grad_norm": 0.2080078125, "learning_rate": 0.00094200495248267, "loss": 2.138, "step": 28042 }, { "epoch": 0.7525493774151997, "grad_norm": 0.197265625, "learning_rate": 0.0009419818622385455, "loss": 1.9487, "step": 28043 }, { "epoch": 0.7525762129669386, "grad_norm": 0.2099609375, "learning_rate": 0.0009419587712442134, "loss": 2.1125, "step": 28044 }, { "epoch": 0.7526030485186775, "grad_norm": 0.203125, "learning_rate": 0.0009419356794997246, "loss": 2.0215, "step": 28045 }, { "epoch": 0.7526298840704165, "grad_norm": 0.2216796875, "learning_rate": 0.0009419125870051293, "loss": 2.1156, "step": 28046 }, { "epoch": 0.7526567196221554, "grad_norm": 0.20703125, "learning_rate": 0.0009418894937604786, "loss": 2.1185, "step": 28047 }, { "epoch": 0.7526835551738944, "grad_norm": 0.22265625, "learning_rate": 0.0009418663997658227, "loss": 2.1575, "step": 28048 }, { "epoch": 0.7527103907256333, "grad_norm": 0.2080078125, "learning_rate": 0.0009418433050212129, "loss": 1.9867, "step": 28049 }, { "epoch": 0.7527372262773723, "grad_norm": 0.2021484375, "learning_rate": 0.000941820209526699, "loss": 1.98, "step": 28050 }, { "epoch": 0.7527640618291112, "grad_norm": 0.205078125, "learning_rate": 0.0009417971132823323, "loss": 2.0175, "step": 28051 }, { "epoch": 0.7527908973808501, "grad_norm": 0.2138671875, "learning_rate": 0.0009417740162881631, "loss": 2.0861, "step": 28052 }, { "epoch": 0.7528177329325891, "grad_norm": 0.212890625, "learning_rate": 0.0009417509185442426, "loss": 1.9729, "step": 28053 }, { "epoch": 0.752844568484328, "grad_norm": 0.2041015625, "learning_rate": 0.000941727820050621, "loss": 2.0258, "step": 28054 }, { "epoch": 0.752871404036067, "grad_norm": 0.2138671875, "learning_rate": 0.000941704720807349, "loss": 1.9841, "step": 28055 }, { "epoch": 0.7528982395878059, "grad_norm": 0.203125, "learning_rate": 0.0009416816208144774, "loss": 2.0599, "step": 28056 }, { "epoch": 0.7529250751395449, "grad_norm": 0.2119140625, "learning_rate": 0.0009416585200720569, "loss": 1.995, "step": 28057 }, { "epoch": 0.7529519106912839, "grad_norm": 0.201171875, "learning_rate": 0.0009416354185801381, "loss": 1.9609, "step": 28058 }, { "epoch": 0.7529787462430227, "grad_norm": 0.2041015625, "learning_rate": 0.0009416123163387718, "loss": 2.0368, "step": 28059 }, { "epoch": 0.7530055817947617, "grad_norm": 0.2109375, "learning_rate": 0.0009415892133480085, "loss": 2.0072, "step": 28060 }, { "epoch": 0.7530324173465006, "grad_norm": 0.20703125, "learning_rate": 0.000941566109607899, "loss": 1.963, "step": 28061 }, { "epoch": 0.7530592528982396, "grad_norm": 0.2099609375, "learning_rate": 0.0009415430051184938, "loss": 1.9786, "step": 28062 }, { "epoch": 0.7530860884499785, "grad_norm": 0.21875, "learning_rate": 0.000941519899879844, "loss": 2.0943, "step": 28063 }, { "epoch": 0.7531129240017175, "grad_norm": 0.208984375, "learning_rate": 0.0009414967938919998, "loss": 1.9631, "step": 28064 }, { "epoch": 0.7531397595534565, "grad_norm": 0.2109375, "learning_rate": 0.0009414736871550121, "loss": 1.9349, "step": 28065 }, { "epoch": 0.7531665951051953, "grad_norm": 0.205078125, "learning_rate": 0.0009414505796689316, "loss": 2.0335, "step": 28066 }, { "epoch": 0.7531934306569343, "grad_norm": 0.20703125, "learning_rate": 0.000941427471433809, "loss": 1.9417, "step": 28067 }, { "epoch": 0.7532202662086732, "grad_norm": 0.2041015625, "learning_rate": 0.0009414043624496951, "loss": 2.0573, "step": 28068 }, { "epoch": 0.7532471017604122, "grad_norm": 0.2060546875, "learning_rate": 0.0009413812527166403, "loss": 1.9851, "step": 28069 }, { "epoch": 0.7532739373121511, "grad_norm": 0.203125, "learning_rate": 0.0009413581422346956, "loss": 2.0164, "step": 28070 }, { "epoch": 0.7533007728638901, "grad_norm": 0.201171875, "learning_rate": 0.0009413350310039115, "loss": 1.9893, "step": 28071 }, { "epoch": 0.7533276084156291, "grad_norm": 0.2001953125, "learning_rate": 0.0009413119190243387, "loss": 2.0216, "step": 28072 }, { "epoch": 0.753354443967368, "grad_norm": 0.203125, "learning_rate": 0.0009412888062960282, "loss": 1.9944, "step": 28073 }, { "epoch": 0.7533812795191069, "grad_norm": 0.20703125, "learning_rate": 0.0009412656928190304, "loss": 1.9713, "step": 28074 }, { "epoch": 0.7534081150708458, "grad_norm": 0.2041015625, "learning_rate": 0.0009412425785933958, "loss": 2.0438, "step": 28075 }, { "epoch": 0.7534349506225848, "grad_norm": 0.21484375, "learning_rate": 0.0009412194636191758, "loss": 2.074, "step": 28076 }, { "epoch": 0.7534617861743237, "grad_norm": 0.201171875, "learning_rate": 0.0009411963478964204, "loss": 1.9879, "step": 28077 }, { "epoch": 0.7534886217260627, "grad_norm": 0.2041015625, "learning_rate": 0.0009411732314251805, "loss": 2.0288, "step": 28078 }, { "epoch": 0.7535154572778017, "grad_norm": 0.2021484375, "learning_rate": 0.000941150114205507, "loss": 2.08, "step": 28079 }, { "epoch": 0.7535422928295405, "grad_norm": 0.19921875, "learning_rate": 0.0009411269962374506, "loss": 1.9149, "step": 28080 }, { "epoch": 0.7535691283812795, "grad_norm": 0.2255859375, "learning_rate": 0.0009411038775210619, "loss": 2.0384, "step": 28081 }, { "epoch": 0.7535959639330184, "grad_norm": 0.2119140625, "learning_rate": 0.0009410807580563917, "loss": 1.9385, "step": 28082 }, { "epoch": 0.7536227994847574, "grad_norm": 0.2041015625, "learning_rate": 0.0009410576378434906, "loss": 2.0002, "step": 28083 }, { "epoch": 0.7536496350364964, "grad_norm": 0.208984375, "learning_rate": 0.0009410345168824095, "loss": 2.0043, "step": 28084 }, { "epoch": 0.7536764705882353, "grad_norm": 0.208984375, "learning_rate": 0.0009410113951731988, "loss": 2.1159, "step": 28085 }, { "epoch": 0.7537033061399743, "grad_norm": 0.2080078125, "learning_rate": 0.0009409882727159096, "loss": 2.093, "step": 28086 }, { "epoch": 0.7537301416917132, "grad_norm": 0.20703125, "learning_rate": 0.0009409651495105923, "loss": 2.0222, "step": 28087 }, { "epoch": 0.7537569772434521, "grad_norm": 0.208984375, "learning_rate": 0.000940942025557298, "loss": 2.0962, "step": 28088 }, { "epoch": 0.753783812795191, "grad_norm": 0.20703125, "learning_rate": 0.0009409189008560769, "loss": 2.0201, "step": 28089 }, { "epoch": 0.75381064834693, "grad_norm": 0.216796875, "learning_rate": 0.0009408957754069802, "loss": 2.1064, "step": 28090 }, { "epoch": 0.753837483898669, "grad_norm": 0.2119140625, "learning_rate": 0.0009408726492100584, "loss": 2.0777, "step": 28091 }, { "epoch": 0.7538643194504079, "grad_norm": 0.20703125, "learning_rate": 0.0009408495222653623, "loss": 2.0768, "step": 28092 }, { "epoch": 0.7538911550021469, "grad_norm": 0.201171875, "learning_rate": 0.0009408263945729428, "loss": 2.0036, "step": 28093 }, { "epoch": 0.7539179905538858, "grad_norm": 0.2060546875, "learning_rate": 0.0009408032661328503, "loss": 2.0676, "step": 28094 }, { "epoch": 0.7539448261056247, "grad_norm": 0.2109375, "learning_rate": 0.0009407801369451356, "loss": 2.0801, "step": 28095 }, { "epoch": 0.7539716616573636, "grad_norm": 0.2099609375, "learning_rate": 0.0009407570070098498, "loss": 2.098, "step": 28096 }, { "epoch": 0.7539984972091026, "grad_norm": 0.2060546875, "learning_rate": 0.0009407338763270431, "loss": 2.0689, "step": 28097 }, { "epoch": 0.7540253327608416, "grad_norm": 0.203125, "learning_rate": 0.0009407107448967669, "loss": 1.9894, "step": 28098 }, { "epoch": 0.7540521683125805, "grad_norm": 0.205078125, "learning_rate": 0.0009406876127190712, "loss": 2.0433, "step": 28099 }, { "epoch": 0.7540790038643195, "grad_norm": 0.2080078125, "learning_rate": 0.0009406644797940073, "loss": 2.0855, "step": 28100 }, { "epoch": 0.7541058394160584, "grad_norm": 0.2236328125, "learning_rate": 0.0009406413461216256, "loss": 2.1473, "step": 28101 }, { "epoch": 0.7541326749677973, "grad_norm": 0.2041015625, "learning_rate": 0.0009406182117019771, "loss": 2.0165, "step": 28102 }, { "epoch": 0.7541595105195363, "grad_norm": 0.205078125, "learning_rate": 0.0009405950765351125, "loss": 2.0437, "step": 28103 }, { "epoch": 0.7541863460712752, "grad_norm": 0.203125, "learning_rate": 0.0009405719406210825, "loss": 1.9928, "step": 28104 }, { "epoch": 0.7542131816230142, "grad_norm": 0.197265625, "learning_rate": 0.0009405488039599378, "loss": 2.0459, "step": 28105 }, { "epoch": 0.7542400171747531, "grad_norm": 0.1982421875, "learning_rate": 0.0009405256665517292, "loss": 1.9956, "step": 28106 }, { "epoch": 0.7542668527264921, "grad_norm": 0.21484375, "learning_rate": 0.0009405025283965075, "loss": 2.1049, "step": 28107 }, { "epoch": 0.754293688278231, "grad_norm": 0.203125, "learning_rate": 0.0009404793894943234, "loss": 2.1053, "step": 28108 }, { "epoch": 0.75432052382997, "grad_norm": 0.2021484375, "learning_rate": 0.0009404562498452278, "loss": 2.0308, "step": 28109 }, { "epoch": 0.7543473593817089, "grad_norm": 0.2060546875, "learning_rate": 0.0009404331094492713, "loss": 2.0353, "step": 28110 }, { "epoch": 0.7543741949334478, "grad_norm": 0.2080078125, "learning_rate": 0.0009404099683065047, "loss": 2.1026, "step": 28111 }, { "epoch": 0.7544010304851868, "grad_norm": 0.2041015625, "learning_rate": 0.0009403868264169788, "loss": 1.9532, "step": 28112 }, { "epoch": 0.7544278660369257, "grad_norm": 0.2021484375, "learning_rate": 0.0009403636837807443, "loss": 1.9619, "step": 28113 }, { "epoch": 0.7544547015886647, "grad_norm": 0.20703125, "learning_rate": 0.0009403405403978519, "loss": 2.1008, "step": 28114 }, { "epoch": 0.7544815371404036, "grad_norm": 0.20703125, "learning_rate": 0.0009403173962683526, "loss": 2.006, "step": 28115 }, { "epoch": 0.7545083726921425, "grad_norm": 0.216796875, "learning_rate": 0.0009402942513922972, "loss": 2.0386, "step": 28116 }, { "epoch": 0.7545352082438815, "grad_norm": 0.2021484375, "learning_rate": 0.0009402711057697363, "loss": 1.9527, "step": 28117 }, { "epoch": 0.7545620437956204, "grad_norm": 0.205078125, "learning_rate": 0.0009402479594007205, "loss": 1.9916, "step": 28118 }, { "epoch": 0.7545888793473594, "grad_norm": 0.2119140625, "learning_rate": 0.0009402248122853009, "loss": 2.0764, "step": 28119 }, { "epoch": 0.7546157148990983, "grad_norm": 0.20703125, "learning_rate": 0.0009402016644235281, "loss": 2.0645, "step": 28120 }, { "epoch": 0.7546425504508373, "grad_norm": 0.201171875, "learning_rate": 0.0009401785158154531, "loss": 1.9622, "step": 28121 }, { "epoch": 0.7546693860025762, "grad_norm": 0.208984375, "learning_rate": 0.0009401553664611263, "loss": 2.0196, "step": 28122 }, { "epoch": 0.7546962215543151, "grad_norm": 0.2080078125, "learning_rate": 0.0009401322163605989, "loss": 2.0684, "step": 28123 }, { "epoch": 0.7547230571060541, "grad_norm": 0.2060546875, "learning_rate": 0.0009401090655139213, "loss": 2.0716, "step": 28124 }, { "epoch": 0.754749892657793, "grad_norm": 0.2060546875, "learning_rate": 0.0009400859139211446, "loss": 2.1232, "step": 28125 }, { "epoch": 0.754776728209532, "grad_norm": 0.203125, "learning_rate": 0.0009400627615823194, "loss": 1.9728, "step": 28126 }, { "epoch": 0.7548035637612709, "grad_norm": 0.2080078125, "learning_rate": 0.0009400396084974966, "loss": 2.0282, "step": 28127 }, { "epoch": 0.7548303993130099, "grad_norm": 0.208984375, "learning_rate": 0.0009400164546667268, "loss": 2.0284, "step": 28128 }, { "epoch": 0.7548572348647489, "grad_norm": 0.2119140625, "learning_rate": 0.0009399933000900612, "loss": 1.997, "step": 28129 }, { "epoch": 0.7548840704164878, "grad_norm": 0.2109375, "learning_rate": 0.0009399701447675501, "loss": 2.0206, "step": 28130 }, { "epoch": 0.7549109059682267, "grad_norm": 0.2001953125, "learning_rate": 0.0009399469886992445, "loss": 1.9868, "step": 28131 }, { "epoch": 0.7549377415199656, "grad_norm": 0.203125, "learning_rate": 0.0009399238318851954, "loss": 2.0688, "step": 28132 }, { "epoch": 0.7549645770717046, "grad_norm": 0.1982421875, "learning_rate": 0.0009399006743254534, "loss": 1.9864, "step": 28133 }, { "epoch": 0.7549914126234435, "grad_norm": 0.2080078125, "learning_rate": 0.0009398775160200692, "loss": 1.9461, "step": 28134 }, { "epoch": 0.7550182481751825, "grad_norm": 0.2119140625, "learning_rate": 0.0009398543569690938, "loss": 2.0202, "step": 28135 }, { "epoch": 0.7550450837269215, "grad_norm": 0.2041015625, "learning_rate": 0.0009398311971725779, "loss": 1.9073, "step": 28136 }, { "epoch": 0.7550719192786604, "grad_norm": 0.21484375, "learning_rate": 0.0009398080366305723, "loss": 1.9457, "step": 28137 }, { "epoch": 0.7550987548303993, "grad_norm": 0.205078125, "learning_rate": 0.000939784875343128, "loss": 2.0968, "step": 28138 }, { "epoch": 0.7551255903821382, "grad_norm": 0.20703125, "learning_rate": 0.0009397617133102953, "loss": 2.0369, "step": 28139 }, { "epoch": 0.7551524259338772, "grad_norm": 0.220703125, "learning_rate": 0.0009397385505321257, "loss": 2.1062, "step": 28140 }, { "epoch": 0.7551792614856161, "grad_norm": 0.2001953125, "learning_rate": 0.0009397153870086695, "loss": 2.0337, "step": 28141 }, { "epoch": 0.7552060970373551, "grad_norm": 0.2099609375, "learning_rate": 0.0009396922227399778, "loss": 1.9937, "step": 28142 }, { "epoch": 0.7552329325890941, "grad_norm": 0.208984375, "learning_rate": 0.0009396690577261014, "loss": 1.9688, "step": 28143 }, { "epoch": 0.755259768140833, "grad_norm": 0.2021484375, "learning_rate": 0.0009396458919670908, "loss": 2.081, "step": 28144 }, { "epoch": 0.755286603692572, "grad_norm": 0.21484375, "learning_rate": 0.0009396227254629971, "loss": 2.026, "step": 28145 }, { "epoch": 0.7553134392443108, "grad_norm": 0.205078125, "learning_rate": 0.0009395995582138711, "loss": 2.0352, "step": 28146 }, { "epoch": 0.7553402747960498, "grad_norm": 0.205078125, "learning_rate": 0.0009395763902197635, "loss": 2.0451, "step": 28147 }, { "epoch": 0.7553671103477888, "grad_norm": 0.2021484375, "learning_rate": 0.0009395532214807251, "loss": 1.9384, "step": 28148 }, { "epoch": 0.7553939458995277, "grad_norm": 0.2109375, "learning_rate": 0.000939530051996807, "loss": 2.0934, "step": 28149 }, { "epoch": 0.7554207814512667, "grad_norm": 0.21484375, "learning_rate": 0.0009395068817680599, "loss": 2.1285, "step": 28150 }, { "epoch": 0.7554476170030056, "grad_norm": 0.201171875, "learning_rate": 0.0009394837107945344, "loss": 1.9433, "step": 28151 }, { "epoch": 0.7554744525547445, "grad_norm": 0.2021484375, "learning_rate": 0.0009394605390762818, "loss": 2.0691, "step": 28152 }, { "epoch": 0.7555012881064834, "grad_norm": 0.20703125, "learning_rate": 0.0009394373666133524, "loss": 2.0304, "step": 28153 }, { "epoch": 0.7555281236582224, "grad_norm": 0.2041015625, "learning_rate": 0.0009394141934057974, "loss": 1.9627, "step": 28154 }, { "epoch": 0.7555549592099614, "grad_norm": 0.20703125, "learning_rate": 0.0009393910194536676, "loss": 2.0911, "step": 28155 }, { "epoch": 0.7555817947617003, "grad_norm": 0.201171875, "learning_rate": 0.0009393678447570135, "loss": 2.0059, "step": 28156 }, { "epoch": 0.7556086303134393, "grad_norm": 0.1953125, "learning_rate": 0.0009393446693158864, "loss": 2.0086, "step": 28157 }, { "epoch": 0.7556354658651782, "grad_norm": 0.212890625, "learning_rate": 0.0009393214931303368, "loss": 2.1402, "step": 28158 }, { "epoch": 0.7556623014169171, "grad_norm": 0.2041015625, "learning_rate": 0.0009392983162004159, "loss": 1.9915, "step": 28159 }, { "epoch": 0.755689136968656, "grad_norm": 0.20703125, "learning_rate": 0.0009392751385261741, "loss": 1.9989, "step": 28160 }, { "epoch": 0.755715972520395, "grad_norm": 0.203125, "learning_rate": 0.0009392519601076625, "loss": 1.9486, "step": 28161 }, { "epoch": 0.755742808072134, "grad_norm": 0.20703125, "learning_rate": 0.0009392287809449318, "loss": 2.0225, "step": 28162 }, { "epoch": 0.7557696436238729, "grad_norm": 0.2041015625, "learning_rate": 0.0009392056010380333, "loss": 1.9415, "step": 28163 }, { "epoch": 0.7557964791756119, "grad_norm": 0.208984375, "learning_rate": 0.0009391824203870171, "loss": 1.9273, "step": 28164 }, { "epoch": 0.7558233147273508, "grad_norm": 0.19921875, "learning_rate": 0.0009391592389919348, "loss": 1.9314, "step": 28165 }, { "epoch": 0.7558501502790897, "grad_norm": 0.2041015625, "learning_rate": 0.0009391360568528367, "loss": 2.0182, "step": 28166 }, { "epoch": 0.7558769858308286, "grad_norm": 0.205078125, "learning_rate": 0.0009391128739697738, "loss": 1.9286, "step": 28167 }, { "epoch": 0.7559038213825676, "grad_norm": 0.2099609375, "learning_rate": 0.0009390896903427973, "loss": 2.0523, "step": 28168 }, { "epoch": 0.7559306569343066, "grad_norm": 0.2001953125, "learning_rate": 0.0009390665059719577, "loss": 2.0077, "step": 28169 }, { "epoch": 0.7559574924860455, "grad_norm": 0.1953125, "learning_rate": 0.0009390433208573059, "loss": 1.9487, "step": 28170 }, { "epoch": 0.7559843280377845, "grad_norm": 0.2109375, "learning_rate": 0.0009390201349988926, "loss": 1.9698, "step": 28171 }, { "epoch": 0.7560111635895234, "grad_norm": 0.2099609375, "learning_rate": 0.0009389969483967691, "loss": 2.0917, "step": 28172 }, { "epoch": 0.7560379991412624, "grad_norm": 0.208984375, "learning_rate": 0.000938973761050986, "loss": 1.9517, "step": 28173 }, { "epoch": 0.7560648346930013, "grad_norm": 0.201171875, "learning_rate": 0.0009389505729615941, "loss": 2.0063, "step": 28174 }, { "epoch": 0.7560916702447402, "grad_norm": 0.1982421875, "learning_rate": 0.0009389273841286445, "loss": 2.0059, "step": 28175 }, { "epoch": 0.7561185057964792, "grad_norm": 0.203125, "learning_rate": 0.0009389041945521878, "loss": 1.9738, "step": 28176 }, { "epoch": 0.7561453413482181, "grad_norm": 0.1962890625, "learning_rate": 0.0009388810042322751, "loss": 1.9128, "step": 28177 }, { "epoch": 0.7561721768999571, "grad_norm": 0.1982421875, "learning_rate": 0.0009388578131689572, "loss": 1.8545, "step": 28178 }, { "epoch": 0.756199012451696, "grad_norm": 0.2060546875, "learning_rate": 0.0009388346213622848, "loss": 2.0135, "step": 28179 }, { "epoch": 0.756225848003435, "grad_norm": 0.201171875, "learning_rate": 0.000938811428812309, "loss": 1.9897, "step": 28180 }, { "epoch": 0.7562526835551739, "grad_norm": 0.2041015625, "learning_rate": 0.0009387882355190806, "loss": 1.97, "step": 28181 }, { "epoch": 0.7562795191069128, "grad_norm": 0.197265625, "learning_rate": 0.0009387650414826506, "loss": 1.938, "step": 28182 }, { "epoch": 0.7563063546586518, "grad_norm": 0.2109375, "learning_rate": 0.0009387418467030697, "loss": 2.0629, "step": 28183 }, { "epoch": 0.7563331902103907, "grad_norm": 0.2119140625, "learning_rate": 0.0009387186511803888, "loss": 2.032, "step": 28184 }, { "epoch": 0.7563600257621297, "grad_norm": 0.2041015625, "learning_rate": 0.0009386954549146589, "loss": 1.9593, "step": 28185 }, { "epoch": 0.7563868613138686, "grad_norm": 0.208984375, "learning_rate": 0.0009386722579059308, "loss": 2.0952, "step": 28186 }, { "epoch": 0.7564136968656076, "grad_norm": 0.19921875, "learning_rate": 0.0009386490601542553, "loss": 2.004, "step": 28187 }, { "epoch": 0.7564405324173465, "grad_norm": 0.2060546875, "learning_rate": 0.0009386258616596834, "loss": 1.9619, "step": 28188 }, { "epoch": 0.7564673679690854, "grad_norm": 0.2216796875, "learning_rate": 0.0009386026624222662, "loss": 2.0728, "step": 28189 }, { "epoch": 0.7564942035208244, "grad_norm": 0.203125, "learning_rate": 0.0009385794624420542, "loss": 1.9574, "step": 28190 }, { "epoch": 0.7565210390725633, "grad_norm": 0.2021484375, "learning_rate": 0.0009385562617190985, "loss": 2.0064, "step": 28191 }, { "epoch": 0.7565478746243023, "grad_norm": 0.203125, "learning_rate": 0.0009385330602534499, "loss": 2.053, "step": 28192 }, { "epoch": 0.7565747101760412, "grad_norm": 0.205078125, "learning_rate": 0.0009385098580451595, "loss": 2.0918, "step": 28193 }, { "epoch": 0.7566015457277802, "grad_norm": 0.2060546875, "learning_rate": 0.000938486655094278, "loss": 2.0008, "step": 28194 }, { "epoch": 0.7566283812795191, "grad_norm": 0.205078125, "learning_rate": 0.0009384634514008562, "loss": 2.0804, "step": 28195 }, { "epoch": 0.756655216831258, "grad_norm": 0.2138671875, "learning_rate": 0.0009384402469649455, "loss": 2.0238, "step": 28196 }, { "epoch": 0.756682052382997, "grad_norm": 0.2001953125, "learning_rate": 0.0009384170417865962, "loss": 2.0789, "step": 28197 }, { "epoch": 0.7567088879347359, "grad_norm": 0.205078125, "learning_rate": 0.0009383938358658595, "loss": 2.0296, "step": 28198 }, { "epoch": 0.7567357234864749, "grad_norm": 0.203125, "learning_rate": 0.0009383706292027864, "loss": 1.9637, "step": 28199 }, { "epoch": 0.7567625590382139, "grad_norm": 0.2021484375, "learning_rate": 0.0009383474217974277, "loss": 1.9197, "step": 28200 }, { "epoch": 0.7567893945899528, "grad_norm": 0.2099609375, "learning_rate": 0.0009383242136498343, "loss": 2.058, "step": 28201 }, { "epoch": 0.7568162301416917, "grad_norm": 0.20703125, "learning_rate": 0.0009383010047600569, "loss": 1.9983, "step": 28202 }, { "epoch": 0.7568430656934306, "grad_norm": 0.19921875, "learning_rate": 0.0009382777951281469, "loss": 2.0493, "step": 28203 }, { "epoch": 0.7568699012451696, "grad_norm": 0.212890625, "learning_rate": 0.0009382545847541547, "loss": 2.0389, "step": 28204 }, { "epoch": 0.7568967367969085, "grad_norm": 0.212890625, "learning_rate": 0.0009382313736381317, "loss": 2.0086, "step": 28205 }, { "epoch": 0.7569235723486475, "grad_norm": 0.2041015625, "learning_rate": 0.0009382081617801284, "loss": 2.0551, "step": 28206 }, { "epoch": 0.7569504079003865, "grad_norm": 0.2109375, "learning_rate": 0.0009381849491801959, "loss": 2.0176, "step": 28207 }, { "epoch": 0.7569772434521254, "grad_norm": 0.2109375, "learning_rate": 0.0009381617358383853, "loss": 2.0021, "step": 28208 }, { "epoch": 0.7570040790038643, "grad_norm": 0.212890625, "learning_rate": 0.0009381385217547471, "loss": 2.0405, "step": 28209 }, { "epoch": 0.7570309145556032, "grad_norm": 0.2109375, "learning_rate": 0.0009381153069293328, "loss": 2.0384, "step": 28210 }, { "epoch": 0.7570577501073422, "grad_norm": 0.20703125, "learning_rate": 0.0009380920913621925, "loss": 2.0227, "step": 28211 }, { "epoch": 0.7570845856590811, "grad_norm": 0.1982421875, "learning_rate": 0.000938068875053378, "loss": 2.0786, "step": 28212 }, { "epoch": 0.7571114212108201, "grad_norm": 0.2119140625, "learning_rate": 0.0009380456580029398, "loss": 2.114, "step": 28213 }, { "epoch": 0.7571382567625591, "grad_norm": 0.203125, "learning_rate": 0.0009380224402109286, "loss": 2.022, "step": 28214 }, { "epoch": 0.757165092314298, "grad_norm": 0.208984375, "learning_rate": 0.0009379992216773957, "loss": 2.0569, "step": 28215 }, { "epoch": 0.757191927866037, "grad_norm": 0.201171875, "learning_rate": 0.0009379760024023923, "loss": 2.0069, "step": 28216 }, { "epoch": 0.7572187634177758, "grad_norm": 0.2001953125, "learning_rate": 0.0009379527823859688, "loss": 2.0178, "step": 28217 }, { "epoch": 0.7572455989695148, "grad_norm": 0.205078125, "learning_rate": 0.0009379295616281761, "loss": 2.1067, "step": 28218 }, { "epoch": 0.7572724345212538, "grad_norm": 0.203125, "learning_rate": 0.0009379063401290656, "loss": 1.9671, "step": 28219 }, { "epoch": 0.7572992700729927, "grad_norm": 0.2099609375, "learning_rate": 0.0009378831178886879, "loss": 1.9557, "step": 28220 }, { "epoch": 0.7573261056247317, "grad_norm": 0.2041015625, "learning_rate": 0.0009378598949070942, "loss": 1.943, "step": 28221 }, { "epoch": 0.7573529411764706, "grad_norm": 0.19921875, "learning_rate": 0.0009378366711843352, "loss": 1.9616, "step": 28222 }, { "epoch": 0.7573797767282096, "grad_norm": 0.2099609375, "learning_rate": 0.000937813446720462, "loss": 1.9849, "step": 28223 }, { "epoch": 0.7574066122799484, "grad_norm": 0.1982421875, "learning_rate": 0.0009377902215155253, "loss": 2.0141, "step": 28224 }, { "epoch": 0.7574334478316874, "grad_norm": 0.20703125, "learning_rate": 0.0009377669955695765, "loss": 2.1197, "step": 28225 }, { "epoch": 0.7574602833834264, "grad_norm": 0.20703125, "learning_rate": 0.0009377437688826659, "loss": 1.9929, "step": 28226 }, { "epoch": 0.7574871189351653, "grad_norm": 0.2021484375, "learning_rate": 0.0009377205414548451, "loss": 2.0108, "step": 28227 }, { "epoch": 0.7575139544869043, "grad_norm": 0.2109375, "learning_rate": 0.0009376973132861647, "loss": 2.0058, "step": 28228 }, { "epoch": 0.7575407900386432, "grad_norm": 0.1962890625, "learning_rate": 0.0009376740843766758, "loss": 2.0037, "step": 28229 }, { "epoch": 0.7575676255903822, "grad_norm": 0.20703125, "learning_rate": 0.0009376508547264293, "loss": 2.0262, "step": 28230 }, { "epoch": 0.757594461142121, "grad_norm": 0.1943359375, "learning_rate": 0.0009376276243354762, "loss": 1.9445, "step": 28231 }, { "epoch": 0.75762129669386, "grad_norm": 0.19921875, "learning_rate": 0.0009376043932038674, "loss": 2.0291, "step": 28232 }, { "epoch": 0.757648132245599, "grad_norm": 0.208984375, "learning_rate": 0.0009375811613316539, "loss": 2.0277, "step": 28233 }, { "epoch": 0.7576749677973379, "grad_norm": 0.203125, "learning_rate": 0.0009375579287188865, "loss": 2.0182, "step": 28234 }, { "epoch": 0.7577018033490769, "grad_norm": 0.1962890625, "learning_rate": 0.0009375346953656164, "loss": 1.8694, "step": 28235 }, { "epoch": 0.7577286389008158, "grad_norm": 0.1962890625, "learning_rate": 0.0009375114612718946, "loss": 2.0017, "step": 28236 }, { "epoch": 0.7577554744525548, "grad_norm": 0.2021484375, "learning_rate": 0.0009374882264377718, "loss": 2.0769, "step": 28237 }, { "epoch": 0.7577823100042936, "grad_norm": 0.20703125, "learning_rate": 0.0009374649908632993, "loss": 2.0467, "step": 28238 }, { "epoch": 0.7578091455560326, "grad_norm": 0.2001953125, "learning_rate": 0.0009374417545485278, "loss": 1.9209, "step": 28239 }, { "epoch": 0.7578359811077716, "grad_norm": 0.2001953125, "learning_rate": 0.0009374185174935082, "loss": 1.9001, "step": 28240 }, { "epoch": 0.7578628166595105, "grad_norm": 0.208984375, "learning_rate": 0.0009373952796982919, "loss": 2.0186, "step": 28241 }, { "epoch": 0.7578896522112495, "grad_norm": 0.201171875, "learning_rate": 0.0009373720411629295, "loss": 2.0233, "step": 28242 }, { "epoch": 0.7579164877629884, "grad_norm": 0.2021484375, "learning_rate": 0.0009373488018874722, "loss": 1.9879, "step": 28243 }, { "epoch": 0.7579433233147274, "grad_norm": 0.2138671875, "learning_rate": 0.0009373255618719707, "loss": 2.1116, "step": 28244 }, { "epoch": 0.7579701588664663, "grad_norm": 0.208984375, "learning_rate": 0.0009373023211164763, "loss": 2.0635, "step": 28245 }, { "epoch": 0.7579969944182052, "grad_norm": 0.2021484375, "learning_rate": 0.0009372790796210399, "loss": 1.9941, "step": 28246 }, { "epoch": 0.7580238299699442, "grad_norm": 0.2021484375, "learning_rate": 0.0009372558373857123, "loss": 2.015, "step": 28247 }, { "epoch": 0.7580506655216831, "grad_norm": 0.2041015625, "learning_rate": 0.0009372325944105445, "loss": 1.9157, "step": 28248 }, { "epoch": 0.7580775010734221, "grad_norm": 0.2021484375, "learning_rate": 0.0009372093506955877, "loss": 2.0715, "step": 28249 }, { "epoch": 0.758104336625161, "grad_norm": 0.2080078125, "learning_rate": 0.0009371861062408928, "loss": 1.954, "step": 28250 }, { "epoch": 0.7581311721769, "grad_norm": 0.205078125, "learning_rate": 0.0009371628610465108, "loss": 1.9112, "step": 28251 }, { "epoch": 0.758158007728639, "grad_norm": 0.2099609375, "learning_rate": 0.0009371396151124928, "loss": 2.0637, "step": 28252 }, { "epoch": 0.7581848432803778, "grad_norm": 0.193359375, "learning_rate": 0.0009371163684388896, "loss": 1.8922, "step": 28253 }, { "epoch": 0.7582116788321168, "grad_norm": 0.19921875, "learning_rate": 0.0009370931210257521, "loss": 2.0688, "step": 28254 }, { "epoch": 0.7582385143838557, "grad_norm": 0.216796875, "learning_rate": 0.0009370698728731317, "loss": 2.0972, "step": 28255 }, { "epoch": 0.7582653499355947, "grad_norm": 0.2060546875, "learning_rate": 0.0009370466239810792, "loss": 2.0739, "step": 28256 }, { "epoch": 0.7582921854873336, "grad_norm": 0.205078125, "learning_rate": 0.0009370233743496453, "loss": 1.9897, "step": 28257 }, { "epoch": 0.7583190210390726, "grad_norm": 0.2099609375, "learning_rate": 0.0009370001239788814, "loss": 2.1017, "step": 28258 }, { "epoch": 0.7583458565908116, "grad_norm": 0.2001953125, "learning_rate": 0.0009369768728688385, "loss": 2.0083, "step": 28259 }, { "epoch": 0.7583726921425504, "grad_norm": 0.1982421875, "learning_rate": 0.0009369536210195673, "loss": 1.9616, "step": 28260 }, { "epoch": 0.7583995276942894, "grad_norm": 0.2119140625, "learning_rate": 0.0009369303684311192, "loss": 2.0335, "step": 28261 }, { "epoch": 0.7584263632460283, "grad_norm": 0.20703125, "learning_rate": 0.0009369071151035447, "loss": 2.0256, "step": 28262 }, { "epoch": 0.7584531987977673, "grad_norm": 0.2060546875, "learning_rate": 0.0009368838610368952, "loss": 2.0143, "step": 28263 }, { "epoch": 0.7584800343495062, "grad_norm": 0.2119140625, "learning_rate": 0.0009368606062312218, "loss": 1.9879, "step": 28264 }, { "epoch": 0.7585068699012452, "grad_norm": 0.203125, "learning_rate": 0.0009368373506865752, "loss": 1.9778, "step": 28265 }, { "epoch": 0.7585337054529842, "grad_norm": 0.201171875, "learning_rate": 0.0009368140944030065, "loss": 1.9912, "step": 28266 }, { "epoch": 0.758560541004723, "grad_norm": 0.2060546875, "learning_rate": 0.0009367908373805669, "loss": 2.0471, "step": 28267 }, { "epoch": 0.758587376556462, "grad_norm": 0.2001953125, "learning_rate": 0.0009367675796193072, "loss": 2.0817, "step": 28268 }, { "epoch": 0.7586142121082009, "grad_norm": 0.2060546875, "learning_rate": 0.0009367443211192785, "loss": 2.0934, "step": 28269 }, { "epoch": 0.7586410476599399, "grad_norm": 0.19921875, "learning_rate": 0.0009367210618805319, "loss": 2.0173, "step": 28270 }, { "epoch": 0.7586678832116789, "grad_norm": 0.2041015625, "learning_rate": 0.0009366978019031182, "loss": 1.9803, "step": 28271 }, { "epoch": 0.7586947187634178, "grad_norm": 0.2001953125, "learning_rate": 0.0009366745411870888, "loss": 1.9553, "step": 28272 }, { "epoch": 0.7587215543151568, "grad_norm": 0.2041015625, "learning_rate": 0.0009366512797324945, "loss": 1.9861, "step": 28273 }, { "epoch": 0.7587483898668956, "grad_norm": 0.2236328125, "learning_rate": 0.0009366280175393862, "loss": 2.1227, "step": 28274 }, { "epoch": 0.7587752254186346, "grad_norm": 0.2099609375, "learning_rate": 0.0009366047546078151, "loss": 2.0799, "step": 28275 }, { "epoch": 0.7588020609703735, "grad_norm": 0.2021484375, "learning_rate": 0.0009365814909378322, "loss": 1.9854, "step": 28276 }, { "epoch": 0.7588288965221125, "grad_norm": 0.21484375, "learning_rate": 0.0009365582265294885, "loss": 2.056, "step": 28277 }, { "epoch": 0.7588557320738515, "grad_norm": 0.2041015625, "learning_rate": 0.0009365349613828351, "loss": 1.9672, "step": 28278 }, { "epoch": 0.7588825676255904, "grad_norm": 0.203125, "learning_rate": 0.000936511695497923, "loss": 2.0346, "step": 28279 }, { "epoch": 0.7589094031773294, "grad_norm": 0.201171875, "learning_rate": 0.0009364884288748034, "loss": 2.0066, "step": 28280 }, { "epoch": 0.7589362387290682, "grad_norm": 0.2021484375, "learning_rate": 0.000936465161513527, "loss": 2.0312, "step": 28281 }, { "epoch": 0.7589630742808072, "grad_norm": 0.2021484375, "learning_rate": 0.000936441893414145, "loss": 2.0531, "step": 28282 }, { "epoch": 0.7589899098325461, "grad_norm": 0.203125, "learning_rate": 0.0009364186245767086, "loss": 2.0136, "step": 28283 }, { "epoch": 0.7590167453842851, "grad_norm": 0.2080078125, "learning_rate": 0.0009363953550012686, "loss": 2.0465, "step": 28284 }, { "epoch": 0.7590435809360241, "grad_norm": 0.2099609375, "learning_rate": 0.0009363720846878762, "loss": 2.0272, "step": 28285 }, { "epoch": 0.759070416487763, "grad_norm": 0.203125, "learning_rate": 0.0009363488136365825, "loss": 1.9812, "step": 28286 }, { "epoch": 0.759097252039502, "grad_norm": 0.20703125, "learning_rate": 0.0009363255418474385, "loss": 1.9658, "step": 28287 }, { "epoch": 0.7591240875912408, "grad_norm": 0.208984375, "learning_rate": 0.0009363022693204949, "loss": 2.0393, "step": 28288 }, { "epoch": 0.7591509231429798, "grad_norm": 0.19921875, "learning_rate": 0.0009362789960558033, "loss": 2.0017, "step": 28289 }, { "epoch": 0.7591777586947188, "grad_norm": 0.2041015625, "learning_rate": 0.0009362557220534144, "loss": 2.0806, "step": 28290 }, { "epoch": 0.7592045942464577, "grad_norm": 0.2001953125, "learning_rate": 0.0009362324473133795, "loss": 2.0656, "step": 28291 }, { "epoch": 0.7592314297981967, "grad_norm": 0.201171875, "learning_rate": 0.0009362091718357495, "loss": 2.0561, "step": 28292 }, { "epoch": 0.7592582653499356, "grad_norm": 0.205078125, "learning_rate": 0.0009361858956205754, "loss": 2.0914, "step": 28293 }, { "epoch": 0.7592851009016746, "grad_norm": 0.1953125, "learning_rate": 0.0009361626186679085, "loss": 1.8588, "step": 28294 }, { "epoch": 0.7593119364534134, "grad_norm": 0.2060546875, "learning_rate": 0.0009361393409777995, "loss": 2.0814, "step": 28295 }, { "epoch": 0.7593387720051524, "grad_norm": 0.216796875, "learning_rate": 0.0009361160625502998, "loss": 2.0472, "step": 28296 }, { "epoch": 0.7593656075568914, "grad_norm": 0.2080078125, "learning_rate": 0.0009360927833854605, "loss": 2.1084, "step": 28297 }, { "epoch": 0.7593924431086303, "grad_norm": 0.201171875, "learning_rate": 0.0009360695034833324, "loss": 2.0716, "step": 28298 }, { "epoch": 0.7594192786603693, "grad_norm": 0.2021484375, "learning_rate": 0.0009360462228439664, "loss": 2.0137, "step": 28299 }, { "epoch": 0.7594461142121082, "grad_norm": 0.19921875, "learning_rate": 0.0009360229414674142, "loss": 1.978, "step": 28300 }, { "epoch": 0.7594729497638472, "grad_norm": 0.205078125, "learning_rate": 0.0009359996593537265, "loss": 1.9925, "step": 28301 }, { "epoch": 0.759499785315586, "grad_norm": 0.197265625, "learning_rate": 0.0009359763765029542, "loss": 2.0059, "step": 28302 }, { "epoch": 0.759526620867325, "grad_norm": 0.19921875, "learning_rate": 0.000935953092915149, "loss": 2.0461, "step": 28303 }, { "epoch": 0.759553456419064, "grad_norm": 0.2001953125, "learning_rate": 0.000935929808590361, "loss": 1.986, "step": 28304 }, { "epoch": 0.7595802919708029, "grad_norm": 0.201171875, "learning_rate": 0.0009359065235286422, "loss": 1.9752, "step": 28305 }, { "epoch": 0.7596071275225419, "grad_norm": 0.203125, "learning_rate": 0.0009358832377300432, "loss": 1.9952, "step": 28306 }, { "epoch": 0.7596339630742808, "grad_norm": 0.1982421875, "learning_rate": 0.0009358599511946152, "loss": 1.9163, "step": 28307 }, { "epoch": 0.7596607986260198, "grad_norm": 0.2119140625, "learning_rate": 0.0009358366639224092, "loss": 2.0289, "step": 28308 }, { "epoch": 0.7596876341777586, "grad_norm": 0.19921875, "learning_rate": 0.0009358133759134765, "loss": 1.9602, "step": 28309 }, { "epoch": 0.7597144697294976, "grad_norm": 0.197265625, "learning_rate": 0.000935790087167868, "loss": 1.9739, "step": 28310 }, { "epoch": 0.7597413052812366, "grad_norm": 0.19921875, "learning_rate": 0.0009357667976856349, "loss": 2.0014, "step": 28311 }, { "epoch": 0.7597681408329755, "grad_norm": 0.197265625, "learning_rate": 0.0009357435074668281, "loss": 2.0296, "step": 28312 }, { "epoch": 0.7597949763847145, "grad_norm": 0.201171875, "learning_rate": 0.0009357202165114989, "loss": 2.0251, "step": 28313 }, { "epoch": 0.7598218119364534, "grad_norm": 0.2060546875, "learning_rate": 0.0009356969248196982, "loss": 1.9249, "step": 28314 }, { "epoch": 0.7598486474881924, "grad_norm": 0.205078125, "learning_rate": 0.0009356736323914774, "loss": 1.9966, "step": 28315 }, { "epoch": 0.7598754830399314, "grad_norm": 0.2197265625, "learning_rate": 0.0009356503392268873, "loss": 2.1621, "step": 28316 }, { "epoch": 0.7599023185916702, "grad_norm": 0.203125, "learning_rate": 0.0009356270453259792, "loss": 2.0053, "step": 28317 }, { "epoch": 0.7599291541434092, "grad_norm": 0.197265625, "learning_rate": 0.0009356037506888039, "loss": 1.9536, "step": 28318 }, { "epoch": 0.7599559896951481, "grad_norm": 0.2001953125, "learning_rate": 0.000935580455315413, "loss": 2.0197, "step": 28319 }, { "epoch": 0.7599828252468871, "grad_norm": 0.2041015625, "learning_rate": 0.0009355571592058571, "loss": 2.0662, "step": 28320 }, { "epoch": 0.760009660798626, "grad_norm": 0.203125, "learning_rate": 0.0009355338623601875, "loss": 2.0149, "step": 28321 }, { "epoch": 0.760036496350365, "grad_norm": 0.1982421875, "learning_rate": 0.0009355105647784554, "loss": 2.0566, "step": 28322 }, { "epoch": 0.760063331902104, "grad_norm": 0.201171875, "learning_rate": 0.0009354872664607118, "loss": 2.0205, "step": 28323 }, { "epoch": 0.7600901674538428, "grad_norm": 0.2021484375, "learning_rate": 0.0009354639674070079, "loss": 1.9832, "step": 28324 }, { "epoch": 0.7601170030055818, "grad_norm": 0.2080078125, "learning_rate": 0.0009354406676173947, "loss": 2.0903, "step": 28325 }, { "epoch": 0.7601438385573207, "grad_norm": 0.197265625, "learning_rate": 0.0009354173670919234, "loss": 1.9857, "step": 28326 }, { "epoch": 0.7601706741090597, "grad_norm": 0.1982421875, "learning_rate": 0.000935394065830645, "loss": 1.9906, "step": 28327 }, { "epoch": 0.7601975096607986, "grad_norm": 0.205078125, "learning_rate": 0.0009353707638336108, "loss": 2.0419, "step": 28328 }, { "epoch": 0.7602243452125376, "grad_norm": 0.19921875, "learning_rate": 0.0009353474611008717, "loss": 2.0018, "step": 28329 }, { "epoch": 0.7602511807642766, "grad_norm": 0.205078125, "learning_rate": 0.0009353241576324789, "loss": 1.9802, "step": 28330 }, { "epoch": 0.7602780163160154, "grad_norm": 0.19921875, "learning_rate": 0.0009353008534284838, "loss": 2.0339, "step": 28331 }, { "epoch": 0.7603048518677544, "grad_norm": 0.2041015625, "learning_rate": 0.000935277548488937, "loss": 2.0829, "step": 28332 }, { "epoch": 0.7603316874194933, "grad_norm": 0.197265625, "learning_rate": 0.0009352542428138899, "loss": 1.875, "step": 28333 }, { "epoch": 0.7603585229712323, "grad_norm": 0.2021484375, "learning_rate": 0.0009352309364033938, "loss": 1.9534, "step": 28334 }, { "epoch": 0.7603853585229712, "grad_norm": 0.21484375, "learning_rate": 0.0009352076292574995, "loss": 2.0516, "step": 28335 }, { "epoch": 0.7604121940747102, "grad_norm": 0.2177734375, "learning_rate": 0.0009351843213762584, "loss": 1.9928, "step": 28336 }, { "epoch": 0.7604390296264492, "grad_norm": 0.205078125, "learning_rate": 0.0009351610127597214, "loss": 2.0218, "step": 28337 }, { "epoch": 0.760465865178188, "grad_norm": 0.205078125, "learning_rate": 0.0009351377034079399, "loss": 2.041, "step": 28338 }, { "epoch": 0.760492700729927, "grad_norm": 0.2060546875, "learning_rate": 0.0009351143933209649, "loss": 2.0165, "step": 28339 }, { "epoch": 0.7605195362816659, "grad_norm": 0.203125, "learning_rate": 0.0009350910824988475, "loss": 1.9771, "step": 28340 }, { "epoch": 0.7605463718334049, "grad_norm": 0.2041015625, "learning_rate": 0.0009350677709416386, "loss": 1.9841, "step": 28341 }, { "epoch": 0.7605732073851439, "grad_norm": 0.19921875, "learning_rate": 0.0009350444586493897, "loss": 1.958, "step": 28342 }, { "epoch": 0.7606000429368828, "grad_norm": 0.2001953125, "learning_rate": 0.000935021145622152, "loss": 2.005, "step": 28343 }, { "epoch": 0.7606268784886218, "grad_norm": 0.201171875, "learning_rate": 0.0009349978318599763, "loss": 2.061, "step": 28344 }, { "epoch": 0.7606537140403606, "grad_norm": 0.2041015625, "learning_rate": 0.000934974517362914, "loss": 1.9912, "step": 28345 }, { "epoch": 0.7606805495920996, "grad_norm": 0.2119140625, "learning_rate": 0.0009349512021310161, "loss": 2.0298, "step": 28346 }, { "epoch": 0.7607073851438385, "grad_norm": 0.19921875, "learning_rate": 0.0009349278861643339, "loss": 1.908, "step": 28347 }, { "epoch": 0.7607342206955775, "grad_norm": 0.2109375, "learning_rate": 0.0009349045694629184, "loss": 2.0512, "step": 28348 }, { "epoch": 0.7607610562473165, "grad_norm": 0.2001953125, "learning_rate": 0.0009348812520268207, "loss": 2.0188, "step": 28349 }, { "epoch": 0.7607878917990554, "grad_norm": 0.2001953125, "learning_rate": 0.0009348579338560922, "loss": 1.9633, "step": 28350 }, { "epoch": 0.7608147273507944, "grad_norm": 0.203125, "learning_rate": 0.0009348346149507838, "loss": 2.0837, "step": 28351 }, { "epoch": 0.7608415629025332, "grad_norm": 0.20703125, "learning_rate": 0.0009348112953109468, "loss": 2.0636, "step": 28352 }, { "epoch": 0.7608683984542722, "grad_norm": 0.197265625, "learning_rate": 0.0009347879749366324, "loss": 1.905, "step": 28353 }, { "epoch": 0.7608952340060111, "grad_norm": 0.197265625, "learning_rate": 0.0009347646538278915, "loss": 2.0195, "step": 28354 }, { "epoch": 0.7609220695577501, "grad_norm": 0.201171875, "learning_rate": 0.0009347413319847755, "loss": 2.0436, "step": 28355 }, { "epoch": 0.7609489051094891, "grad_norm": 0.2001953125, "learning_rate": 0.0009347180094073358, "loss": 2.0247, "step": 28356 }, { "epoch": 0.760975740661228, "grad_norm": 0.2001953125, "learning_rate": 0.0009346946860956229, "loss": 1.9583, "step": 28357 }, { "epoch": 0.761002576212967, "grad_norm": 0.1982421875, "learning_rate": 0.0009346713620496885, "loss": 2.0575, "step": 28358 }, { "epoch": 0.7610294117647058, "grad_norm": 0.2041015625, "learning_rate": 0.0009346480372695835, "loss": 2.0461, "step": 28359 }, { "epoch": 0.7610562473164448, "grad_norm": 0.2041015625, "learning_rate": 0.0009346247117553592, "loss": 1.9951, "step": 28360 }, { "epoch": 0.7610830828681838, "grad_norm": 0.2001953125, "learning_rate": 0.0009346013855070668, "loss": 1.9597, "step": 28361 }, { "epoch": 0.7611099184199227, "grad_norm": 0.20703125, "learning_rate": 0.0009345780585247572, "loss": 2.0431, "step": 28362 }, { "epoch": 0.7611367539716617, "grad_norm": 0.2041015625, "learning_rate": 0.0009345547308084821, "loss": 2.0404, "step": 28363 }, { "epoch": 0.7611635895234006, "grad_norm": 0.203125, "learning_rate": 0.000934531402358292, "loss": 1.9192, "step": 28364 }, { "epoch": 0.7611904250751396, "grad_norm": 0.19921875, "learning_rate": 0.0009345080731742387, "loss": 2.0558, "step": 28365 }, { "epoch": 0.7612172606268784, "grad_norm": 0.19921875, "learning_rate": 0.0009344847432563729, "loss": 1.9978, "step": 28366 }, { "epoch": 0.7612440961786174, "grad_norm": 0.1953125, "learning_rate": 0.0009344614126047461, "loss": 1.9514, "step": 28367 }, { "epoch": 0.7612709317303564, "grad_norm": 0.2021484375, "learning_rate": 0.0009344380812194093, "loss": 2.0265, "step": 28368 }, { "epoch": 0.7612977672820953, "grad_norm": 0.1982421875, "learning_rate": 0.0009344147491004137, "loss": 2.0441, "step": 28369 }, { "epoch": 0.7613246028338343, "grad_norm": 0.205078125, "learning_rate": 0.0009343914162478105, "loss": 2.0797, "step": 28370 }, { "epoch": 0.7613514383855732, "grad_norm": 0.201171875, "learning_rate": 0.0009343680826616513, "loss": 1.9674, "step": 28371 }, { "epoch": 0.7613782739373122, "grad_norm": 0.2109375, "learning_rate": 0.0009343447483419865, "loss": 2.0556, "step": 28372 }, { "epoch": 0.761405109489051, "grad_norm": 0.1982421875, "learning_rate": 0.0009343214132888678, "loss": 2.047, "step": 28373 }, { "epoch": 0.76143194504079, "grad_norm": 0.2041015625, "learning_rate": 0.0009342980775023461, "loss": 2.0579, "step": 28374 }, { "epoch": 0.761458780592529, "grad_norm": 0.1982421875, "learning_rate": 0.0009342747409824731, "loss": 1.9677, "step": 28375 }, { "epoch": 0.7614856161442679, "grad_norm": 0.2001953125, "learning_rate": 0.0009342514037292995, "loss": 1.962, "step": 28376 }, { "epoch": 0.7615124516960069, "grad_norm": 0.1982421875, "learning_rate": 0.0009342280657428766, "loss": 1.929, "step": 28377 }, { "epoch": 0.7615392872477458, "grad_norm": 0.193359375, "learning_rate": 0.0009342047270232557, "loss": 1.9528, "step": 28378 }, { "epoch": 0.7615661227994848, "grad_norm": 0.197265625, "learning_rate": 0.0009341813875704881, "loss": 2.0005, "step": 28379 }, { "epoch": 0.7615929583512236, "grad_norm": 0.2158203125, "learning_rate": 0.0009341580473846246, "loss": 2.0766, "step": 28380 }, { "epoch": 0.7616197939029626, "grad_norm": 0.19921875, "learning_rate": 0.0009341347064657169, "loss": 2.0422, "step": 28381 }, { "epoch": 0.7616466294547016, "grad_norm": 0.2060546875, "learning_rate": 0.0009341113648138159, "loss": 2.0666, "step": 28382 }, { "epoch": 0.7616734650064405, "grad_norm": 0.19921875, "learning_rate": 0.0009340880224289728, "loss": 2.0176, "step": 28383 }, { "epoch": 0.7617003005581795, "grad_norm": 0.205078125, "learning_rate": 0.0009340646793112387, "loss": 2.0153, "step": 28384 }, { "epoch": 0.7617271361099184, "grad_norm": 0.212890625, "learning_rate": 0.0009340413354606651, "loss": 2.1219, "step": 28385 }, { "epoch": 0.7617539716616574, "grad_norm": 0.2060546875, "learning_rate": 0.0009340179908773033, "loss": 2.0521, "step": 28386 }, { "epoch": 0.7617808072133964, "grad_norm": 0.201171875, "learning_rate": 0.000933994645561204, "loss": 2.0294, "step": 28387 }, { "epoch": 0.7618076427651352, "grad_norm": 0.2099609375, "learning_rate": 0.000933971299512419, "loss": 2.0982, "step": 28388 }, { "epoch": 0.7618344783168742, "grad_norm": 0.1962890625, "learning_rate": 0.000933947952730999, "loss": 1.9582, "step": 28389 }, { "epoch": 0.7618613138686131, "grad_norm": 0.2041015625, "learning_rate": 0.0009339246052169954, "loss": 2.0801, "step": 28390 }, { "epoch": 0.7618881494203521, "grad_norm": 0.197265625, "learning_rate": 0.0009339012569704595, "loss": 2.0214, "step": 28391 }, { "epoch": 0.761914984972091, "grad_norm": 0.1982421875, "learning_rate": 0.0009338779079914427, "loss": 2.0657, "step": 28392 }, { "epoch": 0.76194182052383, "grad_norm": 0.19921875, "learning_rate": 0.0009338545582799958, "loss": 1.9521, "step": 28393 }, { "epoch": 0.761968656075569, "grad_norm": 0.2021484375, "learning_rate": 0.0009338312078361702, "loss": 2.0607, "step": 28394 }, { "epoch": 0.7619954916273078, "grad_norm": 0.1943359375, "learning_rate": 0.0009338078566600173, "loss": 1.92, "step": 28395 }, { "epoch": 0.7620223271790468, "grad_norm": 0.20703125, "learning_rate": 0.0009337845047515881, "loss": 2.0571, "step": 28396 }, { "epoch": 0.7620491627307857, "grad_norm": 0.2080078125, "learning_rate": 0.0009337611521109337, "loss": 2.0908, "step": 28397 }, { "epoch": 0.7620759982825247, "grad_norm": 0.1962890625, "learning_rate": 0.0009337377987381058, "loss": 1.9764, "step": 28398 }, { "epoch": 0.7621028338342636, "grad_norm": 0.201171875, "learning_rate": 0.0009337144446331553, "loss": 2.0435, "step": 28399 }, { "epoch": 0.7621296693860026, "grad_norm": 0.1982421875, "learning_rate": 0.0009336910897961334, "loss": 1.9414, "step": 28400 }, { "epoch": 0.7621565049377416, "grad_norm": 0.2001953125, "learning_rate": 0.0009336677342270916, "loss": 2.0096, "step": 28401 }, { "epoch": 0.7621833404894804, "grad_norm": 0.1962890625, "learning_rate": 0.0009336443779260807, "loss": 1.9363, "step": 28402 }, { "epoch": 0.7622101760412194, "grad_norm": 0.1982421875, "learning_rate": 0.0009336210208931524, "loss": 2.0038, "step": 28403 }, { "epoch": 0.7622370115929583, "grad_norm": 0.1943359375, "learning_rate": 0.0009335976631283577, "loss": 1.9597, "step": 28404 }, { "epoch": 0.7622638471446973, "grad_norm": 0.19921875, "learning_rate": 0.0009335743046317476, "loss": 1.9777, "step": 28405 }, { "epoch": 0.7622906826964362, "grad_norm": 0.1982421875, "learning_rate": 0.0009335509454033739, "loss": 2.0187, "step": 28406 }, { "epoch": 0.7623175182481752, "grad_norm": 0.205078125, "learning_rate": 0.0009335275854432876, "loss": 2.0066, "step": 28407 }, { "epoch": 0.7623443537999142, "grad_norm": 0.20703125, "learning_rate": 0.0009335042247515397, "loss": 1.9766, "step": 28408 }, { "epoch": 0.762371189351653, "grad_norm": 0.205078125, "learning_rate": 0.0009334808633281818, "loss": 1.898, "step": 28409 }, { "epoch": 0.762398024903392, "grad_norm": 0.2001953125, "learning_rate": 0.000933457501173265, "loss": 1.9805, "step": 28410 }, { "epoch": 0.7624248604551309, "grad_norm": 0.2080078125, "learning_rate": 0.0009334341382868404, "loss": 2.0227, "step": 28411 }, { "epoch": 0.7624516960068699, "grad_norm": 0.2041015625, "learning_rate": 0.0009334107746689596, "loss": 2.0576, "step": 28412 }, { "epoch": 0.7624785315586089, "grad_norm": 0.2080078125, "learning_rate": 0.0009333874103196735, "loss": 2.0463, "step": 28413 }, { "epoch": 0.7625053671103478, "grad_norm": 0.1884765625, "learning_rate": 0.0009333640452390336, "loss": 1.8887, "step": 28414 }, { "epoch": 0.7625322026620868, "grad_norm": 0.2041015625, "learning_rate": 0.0009333406794270909, "loss": 2.0294, "step": 28415 }, { "epoch": 0.7625590382138256, "grad_norm": 0.201171875, "learning_rate": 0.000933317312883897, "loss": 2.1267, "step": 28416 }, { "epoch": 0.7625858737655646, "grad_norm": 0.1953125, "learning_rate": 0.0009332939456095029, "loss": 1.9429, "step": 28417 }, { "epoch": 0.7626127093173035, "grad_norm": 0.2001953125, "learning_rate": 0.00093327057760396, "loss": 1.9958, "step": 28418 }, { "epoch": 0.7626395448690425, "grad_norm": 0.205078125, "learning_rate": 0.0009332472088673194, "loss": 2.0522, "step": 28419 }, { "epoch": 0.7626663804207815, "grad_norm": 0.201171875, "learning_rate": 0.0009332238393996324, "loss": 1.9907, "step": 28420 }, { "epoch": 0.7626932159725204, "grad_norm": 0.2060546875, "learning_rate": 0.0009332004692009506, "loss": 1.9515, "step": 28421 }, { "epoch": 0.7627200515242594, "grad_norm": 0.19921875, "learning_rate": 0.0009331770982713248, "loss": 2.0477, "step": 28422 }, { "epoch": 0.7627468870759982, "grad_norm": 0.1982421875, "learning_rate": 0.0009331537266108065, "loss": 2.0022, "step": 28423 }, { "epoch": 0.7627737226277372, "grad_norm": 0.193359375, "learning_rate": 0.0009331303542194471, "loss": 1.8982, "step": 28424 }, { "epoch": 0.7628005581794761, "grad_norm": 0.2041015625, "learning_rate": 0.0009331069810972974, "loss": 1.9388, "step": 28425 }, { "epoch": 0.7628273937312151, "grad_norm": 0.20703125, "learning_rate": 0.0009330836072444093, "loss": 1.9857, "step": 28426 }, { "epoch": 0.7628542292829541, "grad_norm": 0.1982421875, "learning_rate": 0.0009330602326608336, "loss": 1.9809, "step": 28427 }, { "epoch": 0.762881064834693, "grad_norm": 0.19921875, "learning_rate": 0.0009330368573466217, "loss": 1.9801, "step": 28428 }, { "epoch": 0.762907900386432, "grad_norm": 0.193359375, "learning_rate": 0.000933013481301825, "loss": 1.9661, "step": 28429 }, { "epoch": 0.7629347359381708, "grad_norm": 0.2001953125, "learning_rate": 0.0009329901045264946, "loss": 2.0438, "step": 28430 }, { "epoch": 0.7629615714899098, "grad_norm": 0.205078125, "learning_rate": 0.000932966727020682, "loss": 2.1008, "step": 28431 }, { "epoch": 0.7629884070416488, "grad_norm": 0.205078125, "learning_rate": 0.0009329433487844382, "loss": 1.9639, "step": 28432 }, { "epoch": 0.7630152425933877, "grad_norm": 0.20703125, "learning_rate": 0.0009329199698178147, "loss": 1.9892, "step": 28433 }, { "epoch": 0.7630420781451267, "grad_norm": 0.1982421875, "learning_rate": 0.000932896590120863, "loss": 2.0136, "step": 28434 }, { "epoch": 0.7630689136968656, "grad_norm": 0.201171875, "learning_rate": 0.0009328732096936337, "loss": 1.9693, "step": 28435 }, { "epoch": 0.7630957492486046, "grad_norm": 0.2021484375, "learning_rate": 0.0009328498285361788, "loss": 2.0229, "step": 28436 }, { "epoch": 0.7631225848003435, "grad_norm": 0.2060546875, "learning_rate": 0.0009328264466485493, "loss": 2.0165, "step": 28437 }, { "epoch": 0.7631494203520824, "grad_norm": 0.203125, "learning_rate": 0.0009328030640307964, "loss": 1.9614, "step": 28438 }, { "epoch": 0.7631762559038214, "grad_norm": 0.197265625, "learning_rate": 0.0009327796806829714, "loss": 2.0396, "step": 28439 }, { "epoch": 0.7632030914555603, "grad_norm": 0.2001953125, "learning_rate": 0.0009327562966051258, "loss": 1.9703, "step": 28440 }, { "epoch": 0.7632299270072993, "grad_norm": 0.1962890625, "learning_rate": 0.0009327329117973109, "loss": 1.9631, "step": 28441 }, { "epoch": 0.7632567625590382, "grad_norm": 0.1982421875, "learning_rate": 0.0009327095262595776, "loss": 1.916, "step": 28442 }, { "epoch": 0.7632835981107772, "grad_norm": 0.2041015625, "learning_rate": 0.0009326861399919777, "loss": 2.0037, "step": 28443 }, { "epoch": 0.763310433662516, "grad_norm": 0.203125, "learning_rate": 0.0009326627529945621, "loss": 1.9948, "step": 28444 }, { "epoch": 0.763337269214255, "grad_norm": 0.2021484375, "learning_rate": 0.0009326393652673825, "loss": 2.0531, "step": 28445 }, { "epoch": 0.763364104765994, "grad_norm": 0.201171875, "learning_rate": 0.0009326159768104899, "loss": 2.0566, "step": 28446 }, { "epoch": 0.7633909403177329, "grad_norm": 0.2001953125, "learning_rate": 0.0009325925876239356, "loss": 2.0304, "step": 28447 }, { "epoch": 0.7634177758694719, "grad_norm": 0.2001953125, "learning_rate": 0.0009325691977077712, "loss": 2.0374, "step": 28448 }, { "epoch": 0.7634446114212108, "grad_norm": 0.1962890625, "learning_rate": 0.000932545807062048, "loss": 1.9304, "step": 28449 }, { "epoch": 0.7634714469729498, "grad_norm": 0.2001953125, "learning_rate": 0.0009325224156868168, "loss": 2.0728, "step": 28450 }, { "epoch": 0.7634982825246887, "grad_norm": 0.205078125, "learning_rate": 0.0009324990235821292, "loss": 2.0112, "step": 28451 }, { "epoch": 0.7635251180764276, "grad_norm": 0.2001953125, "learning_rate": 0.0009324756307480368, "loss": 1.9352, "step": 28452 }, { "epoch": 0.7635519536281666, "grad_norm": 0.201171875, "learning_rate": 0.0009324522371845905, "loss": 2.0481, "step": 28453 }, { "epoch": 0.7635787891799055, "grad_norm": 0.19921875, "learning_rate": 0.000932428842891842, "loss": 1.9574, "step": 28454 }, { "epoch": 0.7636056247316445, "grad_norm": 0.193359375, "learning_rate": 0.0009324054478698422, "loss": 1.9856, "step": 28455 }, { "epoch": 0.7636324602833834, "grad_norm": 0.1962890625, "learning_rate": 0.0009323820521186427, "loss": 2.0775, "step": 28456 }, { "epoch": 0.7636592958351224, "grad_norm": 0.20703125, "learning_rate": 0.000932358655638295, "loss": 2.0913, "step": 28457 }, { "epoch": 0.7636861313868614, "grad_norm": 0.205078125, "learning_rate": 0.0009323352584288499, "loss": 2.0793, "step": 28458 }, { "epoch": 0.7637129669386002, "grad_norm": 0.2021484375, "learning_rate": 0.0009323118604903591, "loss": 1.9296, "step": 28459 }, { "epoch": 0.7637398024903392, "grad_norm": 0.1953125, "learning_rate": 0.000932288461822874, "loss": 2.0172, "step": 28460 }, { "epoch": 0.7637666380420781, "grad_norm": 0.201171875, "learning_rate": 0.0009322650624264455, "loss": 1.9382, "step": 28461 }, { "epoch": 0.7637934735938171, "grad_norm": 0.2041015625, "learning_rate": 0.0009322416623011254, "loss": 2.0247, "step": 28462 }, { "epoch": 0.763820309145556, "grad_norm": 0.2001953125, "learning_rate": 0.0009322182614469648, "loss": 1.9684, "step": 28463 }, { "epoch": 0.763847144697295, "grad_norm": 0.1865234375, "learning_rate": 0.000932194859864015, "loss": 1.824, "step": 28464 }, { "epoch": 0.763873980249034, "grad_norm": 0.19921875, "learning_rate": 0.0009321714575523275, "loss": 2.0582, "step": 28465 }, { "epoch": 0.7639008158007728, "grad_norm": 0.1943359375, "learning_rate": 0.0009321480545119536, "loss": 2.0179, "step": 28466 }, { "epoch": 0.7639276513525118, "grad_norm": 0.2060546875, "learning_rate": 0.0009321246507429444, "loss": 1.9614, "step": 28467 }, { "epoch": 0.7639544869042507, "grad_norm": 0.201171875, "learning_rate": 0.0009321012462453515, "loss": 2.0186, "step": 28468 }, { "epoch": 0.7639813224559897, "grad_norm": 0.197265625, "learning_rate": 0.0009320778410192264, "loss": 2.0087, "step": 28469 }, { "epoch": 0.7640081580077286, "grad_norm": 0.1943359375, "learning_rate": 0.0009320544350646198, "loss": 1.9521, "step": 28470 }, { "epoch": 0.7640349935594676, "grad_norm": 0.1962890625, "learning_rate": 0.0009320310283815838, "loss": 1.986, "step": 28471 }, { "epoch": 0.7640618291112066, "grad_norm": 0.2001953125, "learning_rate": 0.0009320076209701693, "loss": 2.0197, "step": 28472 }, { "epoch": 0.7640886646629454, "grad_norm": 0.2060546875, "learning_rate": 0.0009319842128304277, "loss": 2.0199, "step": 28473 }, { "epoch": 0.7641155002146844, "grad_norm": 0.2060546875, "learning_rate": 0.0009319608039624105, "loss": 2.0735, "step": 28474 }, { "epoch": 0.7641423357664233, "grad_norm": 0.201171875, "learning_rate": 0.0009319373943661689, "loss": 1.9815, "step": 28475 }, { "epoch": 0.7641691713181623, "grad_norm": 0.205078125, "learning_rate": 0.0009319139840417545, "loss": 2.0007, "step": 28476 }, { "epoch": 0.7641960068699012, "grad_norm": 0.2001953125, "learning_rate": 0.0009318905729892181, "loss": 1.9878, "step": 28477 }, { "epoch": 0.7642228424216402, "grad_norm": 0.19921875, "learning_rate": 0.0009318671612086117, "loss": 2.0014, "step": 28478 }, { "epoch": 0.7642496779733792, "grad_norm": 0.193359375, "learning_rate": 0.0009318437486999864, "loss": 2.0344, "step": 28479 }, { "epoch": 0.764276513525118, "grad_norm": 0.2021484375, "learning_rate": 0.0009318203354633935, "loss": 2.0088, "step": 28480 }, { "epoch": 0.764303349076857, "grad_norm": 0.2041015625, "learning_rate": 0.0009317969214988843, "loss": 1.9126, "step": 28481 }, { "epoch": 0.7643301846285959, "grad_norm": 0.2041015625, "learning_rate": 0.0009317735068065103, "loss": 2.0368, "step": 28482 }, { "epoch": 0.7643570201803349, "grad_norm": 0.2099609375, "learning_rate": 0.0009317500913863228, "loss": 1.9808, "step": 28483 }, { "epoch": 0.7643838557320739, "grad_norm": 0.203125, "learning_rate": 0.0009317266752383734, "loss": 2.1226, "step": 28484 }, { "epoch": 0.7644106912838128, "grad_norm": 0.201171875, "learning_rate": 0.0009317032583627133, "loss": 2.0058, "step": 28485 }, { "epoch": 0.7644375268355518, "grad_norm": 0.203125, "learning_rate": 0.0009316798407593935, "loss": 2.0451, "step": 28486 }, { "epoch": 0.7644643623872907, "grad_norm": 0.1953125, "learning_rate": 0.0009316564224284659, "loss": 1.9784, "step": 28487 }, { "epoch": 0.7644911979390296, "grad_norm": 0.19921875, "learning_rate": 0.0009316330033699818, "loss": 2.0698, "step": 28488 }, { "epoch": 0.7645180334907685, "grad_norm": 0.197265625, "learning_rate": 0.0009316095835839921, "loss": 2.0402, "step": 28489 }, { "epoch": 0.7645448690425075, "grad_norm": 0.1923828125, "learning_rate": 0.0009315861630705489, "loss": 1.9416, "step": 28490 }, { "epoch": 0.7645717045942465, "grad_norm": 0.197265625, "learning_rate": 0.0009315627418297032, "loss": 2.0076, "step": 28491 }, { "epoch": 0.7645985401459854, "grad_norm": 0.1953125, "learning_rate": 0.0009315393198615062, "loss": 1.9897, "step": 28492 }, { "epoch": 0.7646253756977244, "grad_norm": 0.19921875, "learning_rate": 0.0009315158971660095, "loss": 2.097, "step": 28493 }, { "epoch": 0.7646522112494633, "grad_norm": 0.1982421875, "learning_rate": 0.0009314924737432646, "loss": 2.0179, "step": 28494 }, { "epoch": 0.7646790468012022, "grad_norm": 0.2060546875, "learning_rate": 0.0009314690495933225, "loss": 2.0718, "step": 28495 }, { "epoch": 0.7647058823529411, "grad_norm": 0.205078125, "learning_rate": 0.000931445624716235, "loss": 2.0262, "step": 28496 }, { "epoch": 0.7647327179046801, "grad_norm": 0.205078125, "learning_rate": 0.0009314221991120533, "loss": 2.0094, "step": 28497 }, { "epoch": 0.7647595534564191, "grad_norm": 0.1923828125, "learning_rate": 0.0009313987727808289, "loss": 1.9163, "step": 28498 }, { "epoch": 0.764786389008158, "grad_norm": 0.1962890625, "learning_rate": 0.0009313753457226129, "loss": 2.0166, "step": 28499 }, { "epoch": 0.764813224559897, "grad_norm": 0.2021484375, "learning_rate": 0.000931351917937457, "loss": 1.9997, "step": 28500 }, { "epoch": 0.7648400601116359, "grad_norm": 0.1982421875, "learning_rate": 0.0009313284894254125, "loss": 2.0164, "step": 28501 }, { "epoch": 0.7648668956633748, "grad_norm": 0.1962890625, "learning_rate": 0.0009313050601865307, "loss": 2.009, "step": 28502 }, { "epoch": 0.7648937312151138, "grad_norm": 0.2021484375, "learning_rate": 0.0009312816302208631, "loss": 2.0397, "step": 28503 }, { "epoch": 0.7649205667668527, "grad_norm": 0.2021484375, "learning_rate": 0.000931258199528461, "loss": 2.0422, "step": 28504 }, { "epoch": 0.7649474023185917, "grad_norm": 0.19921875, "learning_rate": 0.0009312347681093759, "loss": 1.927, "step": 28505 }, { "epoch": 0.7649742378703306, "grad_norm": 0.2021484375, "learning_rate": 0.0009312113359636592, "loss": 2.0386, "step": 28506 }, { "epoch": 0.7650010734220696, "grad_norm": 0.2001953125, "learning_rate": 0.0009311879030913622, "loss": 1.9276, "step": 28507 }, { "epoch": 0.7650279089738085, "grad_norm": 0.1962890625, "learning_rate": 0.0009311644694925366, "loss": 1.9362, "step": 28508 }, { "epoch": 0.7650547445255474, "grad_norm": 0.2041015625, "learning_rate": 0.0009311410351672332, "loss": 2.0677, "step": 28509 }, { "epoch": 0.7650815800772864, "grad_norm": 0.201171875, "learning_rate": 0.000931117600115504, "loss": 1.8775, "step": 28510 }, { "epoch": 0.7651084156290253, "grad_norm": 0.1982421875, "learning_rate": 0.0009310941643374003, "loss": 2.0075, "step": 28511 }, { "epoch": 0.7651352511807643, "grad_norm": 0.2001953125, "learning_rate": 0.0009310707278329732, "loss": 1.9903, "step": 28512 }, { "epoch": 0.7651620867325032, "grad_norm": 0.2021484375, "learning_rate": 0.0009310472906022746, "loss": 2.0629, "step": 28513 }, { "epoch": 0.7651889222842422, "grad_norm": 0.197265625, "learning_rate": 0.0009310238526453554, "loss": 1.9677, "step": 28514 }, { "epoch": 0.7652157578359811, "grad_norm": 0.1962890625, "learning_rate": 0.0009310004139622673, "loss": 2.057, "step": 28515 }, { "epoch": 0.76524259338772, "grad_norm": 0.203125, "learning_rate": 0.0009309769745530618, "loss": 1.9929, "step": 28516 }, { "epoch": 0.765269428939459, "grad_norm": 0.212890625, "learning_rate": 0.0009309535344177901, "loss": 1.9536, "step": 28517 }, { "epoch": 0.7652962644911979, "grad_norm": 0.19921875, "learning_rate": 0.0009309300935565038, "loss": 1.9804, "step": 28518 }, { "epoch": 0.7653231000429369, "grad_norm": 0.1943359375, "learning_rate": 0.0009309066519692542, "loss": 1.9015, "step": 28519 }, { "epoch": 0.7653499355946758, "grad_norm": 0.2060546875, "learning_rate": 0.0009308832096560926, "loss": 2.0596, "step": 28520 }, { "epoch": 0.7653767711464148, "grad_norm": 0.20703125, "learning_rate": 0.0009308597666170707, "loss": 2.0376, "step": 28521 }, { "epoch": 0.7654036066981537, "grad_norm": 0.1953125, "learning_rate": 0.0009308363228522398, "loss": 1.9534, "step": 28522 }, { "epoch": 0.7654304422498927, "grad_norm": 0.2001953125, "learning_rate": 0.0009308128783616515, "loss": 1.9903, "step": 28523 }, { "epoch": 0.7654572778016316, "grad_norm": 0.197265625, "learning_rate": 0.0009307894331453569, "loss": 2.0393, "step": 28524 }, { "epoch": 0.7654841133533705, "grad_norm": 0.1904296875, "learning_rate": 0.0009307659872034076, "loss": 1.8777, "step": 28525 }, { "epoch": 0.7655109489051095, "grad_norm": 0.2138671875, "learning_rate": 0.000930742540535855, "loss": 2.0312, "step": 28526 }, { "epoch": 0.7655377844568484, "grad_norm": 0.205078125, "learning_rate": 0.0009307190931427507, "loss": 1.9702, "step": 28527 }, { "epoch": 0.7655646200085874, "grad_norm": 0.2001953125, "learning_rate": 0.0009306956450241459, "loss": 2.0404, "step": 28528 }, { "epoch": 0.7655914555603264, "grad_norm": 0.203125, "learning_rate": 0.0009306721961800923, "loss": 1.8999, "step": 28529 }, { "epoch": 0.7656182911120653, "grad_norm": 0.205078125, "learning_rate": 0.0009306487466106411, "loss": 2.0497, "step": 28530 }, { "epoch": 0.7656451266638042, "grad_norm": 0.19921875, "learning_rate": 0.0009306252963158436, "loss": 2.0061, "step": 28531 }, { "epoch": 0.7656719622155431, "grad_norm": 0.2021484375, "learning_rate": 0.0009306018452957518, "loss": 2.036, "step": 28532 }, { "epoch": 0.7656987977672821, "grad_norm": 0.2041015625, "learning_rate": 0.0009305783935504168, "loss": 2.0078, "step": 28533 }, { "epoch": 0.765725633319021, "grad_norm": 0.1982421875, "learning_rate": 0.00093055494107989, "loss": 1.9612, "step": 28534 }, { "epoch": 0.76575246887076, "grad_norm": 0.19140625, "learning_rate": 0.0009305314878842229, "loss": 1.9153, "step": 28535 }, { "epoch": 0.765779304422499, "grad_norm": 0.1923828125, "learning_rate": 0.0009305080339634669, "loss": 1.942, "step": 28536 }, { "epoch": 0.7658061399742379, "grad_norm": 0.2021484375, "learning_rate": 0.0009304845793176733, "loss": 1.9711, "step": 28537 }, { "epoch": 0.7658329755259768, "grad_norm": 0.1943359375, "learning_rate": 0.0009304611239468941, "loss": 1.8897, "step": 28538 }, { "epoch": 0.7658598110777157, "grad_norm": 0.1962890625, "learning_rate": 0.0009304376678511804, "loss": 2.0061, "step": 28539 }, { "epoch": 0.7658866466294547, "grad_norm": 0.1962890625, "learning_rate": 0.0009304142110305835, "loss": 1.8549, "step": 28540 }, { "epoch": 0.7659134821811936, "grad_norm": 0.1962890625, "learning_rate": 0.0009303907534851553, "loss": 1.9771, "step": 28541 }, { "epoch": 0.7659403177329326, "grad_norm": 0.19921875, "learning_rate": 0.0009303672952149468, "loss": 1.9688, "step": 28542 }, { "epoch": 0.7659671532846716, "grad_norm": 0.2001953125, "learning_rate": 0.0009303438362200096, "loss": 2.0102, "step": 28543 }, { "epoch": 0.7659939888364105, "grad_norm": 0.1962890625, "learning_rate": 0.0009303203765003953, "loss": 1.9895, "step": 28544 }, { "epoch": 0.7660208243881494, "grad_norm": 0.203125, "learning_rate": 0.0009302969160561552, "loss": 2.0558, "step": 28545 }, { "epoch": 0.7660476599398883, "grad_norm": 0.201171875, "learning_rate": 0.000930273454887341, "loss": 2.0675, "step": 28546 }, { "epoch": 0.7660744954916273, "grad_norm": 0.197265625, "learning_rate": 0.0009302499929940039, "loss": 1.9707, "step": 28547 }, { "epoch": 0.7661013310433663, "grad_norm": 0.2041015625, "learning_rate": 0.0009302265303761955, "loss": 2.03, "step": 28548 }, { "epoch": 0.7661281665951052, "grad_norm": 0.1982421875, "learning_rate": 0.0009302030670339671, "loss": 2.004, "step": 28549 }, { "epoch": 0.7661550021468442, "grad_norm": 0.19921875, "learning_rate": 0.0009301796029673705, "loss": 1.9527, "step": 28550 }, { "epoch": 0.7661818376985831, "grad_norm": 0.2119140625, "learning_rate": 0.000930156138176457, "loss": 2.0265, "step": 28551 }, { "epoch": 0.766208673250322, "grad_norm": 0.193359375, "learning_rate": 0.000930132672661278, "loss": 1.8891, "step": 28552 }, { "epoch": 0.7662355088020609, "grad_norm": 0.201171875, "learning_rate": 0.0009301092064218849, "loss": 1.9851, "step": 28553 }, { "epoch": 0.7662623443537999, "grad_norm": 0.1962890625, "learning_rate": 0.0009300857394583295, "loss": 1.9395, "step": 28554 }, { "epoch": 0.7662891799055389, "grad_norm": 0.2041015625, "learning_rate": 0.0009300622717706631, "loss": 2.008, "step": 28555 }, { "epoch": 0.7663160154572778, "grad_norm": 0.201171875, "learning_rate": 0.000930038803358937, "loss": 1.9383, "step": 28556 }, { "epoch": 0.7663428510090168, "grad_norm": 0.19921875, "learning_rate": 0.0009300153342232031, "loss": 2.0051, "step": 28557 }, { "epoch": 0.7663696865607557, "grad_norm": 0.1953125, "learning_rate": 0.0009299918643635126, "loss": 1.9543, "step": 28558 }, { "epoch": 0.7663965221124946, "grad_norm": 0.19921875, "learning_rate": 0.000929968393779917, "loss": 2.0303, "step": 28559 }, { "epoch": 0.7664233576642335, "grad_norm": 0.193359375, "learning_rate": 0.0009299449224724678, "loss": 1.954, "step": 28560 }, { "epoch": 0.7664501932159725, "grad_norm": 0.197265625, "learning_rate": 0.0009299214504412166, "loss": 1.9463, "step": 28561 }, { "epoch": 0.7664770287677115, "grad_norm": 0.193359375, "learning_rate": 0.0009298979776862147, "loss": 2.0352, "step": 28562 }, { "epoch": 0.7665038643194504, "grad_norm": 0.1953125, "learning_rate": 0.0009298745042075137, "loss": 1.9927, "step": 28563 }, { "epoch": 0.7665306998711894, "grad_norm": 0.201171875, "learning_rate": 0.0009298510300051652, "loss": 1.9881, "step": 28564 }, { "epoch": 0.7665575354229283, "grad_norm": 0.19921875, "learning_rate": 0.0009298275550792205, "loss": 2.0093, "step": 28565 }, { "epoch": 0.7665843709746673, "grad_norm": 0.1962890625, "learning_rate": 0.0009298040794297311, "loss": 2.0609, "step": 28566 }, { "epoch": 0.7666112065264061, "grad_norm": 0.1962890625, "learning_rate": 0.0009297806030567487, "loss": 1.9338, "step": 28567 }, { "epoch": 0.7666380420781451, "grad_norm": 0.1943359375, "learning_rate": 0.0009297571259603246, "loss": 1.9138, "step": 28568 }, { "epoch": 0.7666648776298841, "grad_norm": 0.1962890625, "learning_rate": 0.0009297336481405104, "loss": 1.971, "step": 28569 }, { "epoch": 0.766691713181623, "grad_norm": 0.2060546875, "learning_rate": 0.0009297101695973576, "loss": 1.9963, "step": 28570 }, { "epoch": 0.766718548733362, "grad_norm": 0.1962890625, "learning_rate": 0.0009296866903309176, "loss": 2.0429, "step": 28571 }, { "epoch": 0.7667453842851009, "grad_norm": 0.1982421875, "learning_rate": 0.0009296632103412422, "loss": 2.011, "step": 28572 }, { "epoch": 0.7667722198368399, "grad_norm": 0.189453125, "learning_rate": 0.0009296397296283825, "loss": 1.9209, "step": 28573 }, { "epoch": 0.7667990553885788, "grad_norm": 0.1953125, "learning_rate": 0.0009296162481923904, "loss": 1.9904, "step": 28574 }, { "epoch": 0.7668258909403177, "grad_norm": 0.1943359375, "learning_rate": 0.0009295927660333171, "loss": 1.962, "step": 28575 }, { "epoch": 0.7668527264920567, "grad_norm": 0.1953125, "learning_rate": 0.0009295692831512143, "loss": 1.9539, "step": 28576 }, { "epoch": 0.7668795620437956, "grad_norm": 0.2001953125, "learning_rate": 0.0009295457995461334, "loss": 1.9529, "step": 28577 }, { "epoch": 0.7669063975955346, "grad_norm": 0.2021484375, "learning_rate": 0.0009295223152181261, "loss": 2.0078, "step": 28578 }, { "epoch": 0.7669332331472735, "grad_norm": 0.197265625, "learning_rate": 0.0009294988301672436, "loss": 2.0194, "step": 28579 }, { "epoch": 0.7669600686990125, "grad_norm": 0.1982421875, "learning_rate": 0.0009294753443935378, "loss": 1.8903, "step": 28580 }, { "epoch": 0.7669869042507514, "grad_norm": 0.1962890625, "learning_rate": 0.0009294518578970599, "loss": 2.0626, "step": 28581 }, { "epoch": 0.7670137398024903, "grad_norm": 0.19921875, "learning_rate": 0.0009294283706778615, "loss": 2.0957, "step": 28582 }, { "epoch": 0.7670405753542293, "grad_norm": 0.19921875, "learning_rate": 0.0009294048827359943, "loss": 2.006, "step": 28583 }, { "epoch": 0.7670674109059682, "grad_norm": 0.1982421875, "learning_rate": 0.0009293813940715095, "loss": 2.0155, "step": 28584 }, { "epoch": 0.7670942464577072, "grad_norm": 0.1962890625, "learning_rate": 0.0009293579046844591, "loss": 2.0685, "step": 28585 }, { "epoch": 0.7671210820094461, "grad_norm": 0.1982421875, "learning_rate": 0.0009293344145748942, "loss": 2.0217, "step": 28586 }, { "epoch": 0.767147917561185, "grad_norm": 0.1923828125, "learning_rate": 0.0009293109237428665, "loss": 1.883, "step": 28587 }, { "epoch": 0.767174753112924, "grad_norm": 0.2021484375, "learning_rate": 0.0009292874321884275, "loss": 2.0554, "step": 28588 }, { "epoch": 0.7672015886646629, "grad_norm": 0.1962890625, "learning_rate": 0.0009292639399116289, "loss": 1.9749, "step": 28589 }, { "epoch": 0.7672284242164019, "grad_norm": 0.1953125, "learning_rate": 0.0009292404469125219, "loss": 1.9588, "step": 28590 }, { "epoch": 0.7672552597681408, "grad_norm": 0.19140625, "learning_rate": 0.0009292169531911582, "loss": 1.9134, "step": 28591 }, { "epoch": 0.7672820953198798, "grad_norm": 0.197265625, "learning_rate": 0.0009291934587475895, "loss": 2.035, "step": 28592 }, { "epoch": 0.7673089308716187, "grad_norm": 0.1953125, "learning_rate": 0.0009291699635818672, "loss": 1.9787, "step": 28593 }, { "epoch": 0.7673357664233577, "grad_norm": 0.197265625, "learning_rate": 0.0009291464676940427, "loss": 1.9195, "step": 28594 }, { "epoch": 0.7673626019750966, "grad_norm": 0.1982421875, "learning_rate": 0.0009291229710841678, "loss": 2.0576, "step": 28595 }, { "epoch": 0.7673894375268355, "grad_norm": 0.197265625, "learning_rate": 0.0009290994737522941, "loss": 2.0186, "step": 28596 }, { "epoch": 0.7674162730785745, "grad_norm": 0.1962890625, "learning_rate": 0.0009290759756984727, "loss": 2.0139, "step": 28597 }, { "epoch": 0.7674431086303134, "grad_norm": 0.1982421875, "learning_rate": 0.0009290524769227554, "loss": 2.0249, "step": 28598 }, { "epoch": 0.7674699441820524, "grad_norm": 0.193359375, "learning_rate": 0.000929028977425194, "loss": 1.9852, "step": 28599 }, { "epoch": 0.7674967797337914, "grad_norm": 0.1943359375, "learning_rate": 0.0009290054772058397, "loss": 1.9978, "step": 28600 }, { "epoch": 0.7675236152855303, "grad_norm": 0.2001953125, "learning_rate": 0.0009289819762647441, "loss": 1.9251, "step": 28601 }, { "epoch": 0.7675504508372692, "grad_norm": 0.203125, "learning_rate": 0.000928958474601959, "loss": 1.9939, "step": 28602 }, { "epoch": 0.7675772863890081, "grad_norm": 0.2060546875, "learning_rate": 0.0009289349722175358, "loss": 1.9934, "step": 28603 }, { "epoch": 0.7676041219407471, "grad_norm": 0.1982421875, "learning_rate": 0.0009289114691115259, "loss": 1.9159, "step": 28604 }, { "epoch": 0.767630957492486, "grad_norm": 0.193359375, "learning_rate": 0.0009288879652839811, "loss": 1.9066, "step": 28605 }, { "epoch": 0.767657793044225, "grad_norm": 0.2001953125, "learning_rate": 0.0009288644607349529, "loss": 2.0458, "step": 28606 }, { "epoch": 0.767684628595964, "grad_norm": 0.201171875, "learning_rate": 0.0009288409554644927, "loss": 2.0432, "step": 28607 }, { "epoch": 0.7677114641477029, "grad_norm": 0.2119140625, "learning_rate": 0.0009288174494726522, "loss": 1.9961, "step": 28608 }, { "epoch": 0.7677382996994419, "grad_norm": 0.1943359375, "learning_rate": 0.000928793942759483, "loss": 1.9976, "step": 28609 }, { "epoch": 0.7677651352511807, "grad_norm": 0.1904296875, "learning_rate": 0.0009287704353250365, "loss": 1.9281, "step": 28610 }, { "epoch": 0.7677919708029197, "grad_norm": 0.2001953125, "learning_rate": 0.0009287469271693647, "loss": 1.997, "step": 28611 }, { "epoch": 0.7678188063546586, "grad_norm": 0.197265625, "learning_rate": 0.0009287234182925186, "loss": 2.0573, "step": 28612 }, { "epoch": 0.7678456419063976, "grad_norm": 0.201171875, "learning_rate": 0.0009286999086945501, "loss": 2.0607, "step": 28613 }, { "epoch": 0.7678724774581366, "grad_norm": 0.19140625, "learning_rate": 0.0009286763983755107, "loss": 1.9742, "step": 28614 }, { "epoch": 0.7678993130098755, "grad_norm": 0.201171875, "learning_rate": 0.0009286528873354519, "loss": 2.0913, "step": 28615 }, { "epoch": 0.7679261485616145, "grad_norm": 0.197265625, "learning_rate": 0.0009286293755744256, "loss": 2.0226, "step": 28616 }, { "epoch": 0.7679529841133533, "grad_norm": 0.2001953125, "learning_rate": 0.000928605863092483, "loss": 1.925, "step": 28617 }, { "epoch": 0.7679798196650923, "grad_norm": 0.197265625, "learning_rate": 0.0009285823498896758, "loss": 1.9665, "step": 28618 }, { "epoch": 0.7680066552168313, "grad_norm": 0.201171875, "learning_rate": 0.0009285588359660555, "loss": 2.0826, "step": 28619 }, { "epoch": 0.7680334907685702, "grad_norm": 0.1943359375, "learning_rate": 0.000928535321321674, "loss": 1.9641, "step": 28620 }, { "epoch": 0.7680603263203092, "grad_norm": 0.197265625, "learning_rate": 0.0009285118059565823, "loss": 1.9602, "step": 28621 }, { "epoch": 0.7680871618720481, "grad_norm": 0.19921875, "learning_rate": 0.0009284882898708327, "loss": 1.9611, "step": 28622 }, { "epoch": 0.768113997423787, "grad_norm": 0.197265625, "learning_rate": 0.0009284647730644763, "loss": 2.0428, "step": 28623 }, { "epoch": 0.7681408329755259, "grad_norm": 0.201171875, "learning_rate": 0.0009284412555375649, "loss": 1.9517, "step": 28624 }, { "epoch": 0.7681676685272649, "grad_norm": 0.2080078125, "learning_rate": 0.00092841773729015, "loss": 2.0239, "step": 28625 }, { "epoch": 0.7681945040790039, "grad_norm": 0.2041015625, "learning_rate": 0.000928394218322283, "loss": 2.0457, "step": 28626 }, { "epoch": 0.7682213396307428, "grad_norm": 0.19921875, "learning_rate": 0.0009283706986340158, "loss": 1.9724, "step": 28627 }, { "epoch": 0.7682481751824818, "grad_norm": 0.1982421875, "learning_rate": 0.0009283471782254001, "loss": 2.0496, "step": 28628 }, { "epoch": 0.7682750107342207, "grad_norm": 0.203125, "learning_rate": 0.0009283236570964871, "loss": 2.0811, "step": 28629 }, { "epoch": 0.7683018462859597, "grad_norm": 0.1953125, "learning_rate": 0.0009283001352473287, "loss": 1.999, "step": 28630 }, { "epoch": 0.7683286818376985, "grad_norm": 0.197265625, "learning_rate": 0.0009282766126779763, "loss": 2.0786, "step": 28631 }, { "epoch": 0.7683555173894375, "grad_norm": 0.1943359375, "learning_rate": 0.0009282530893884816, "loss": 1.9424, "step": 28632 }, { "epoch": 0.7683823529411765, "grad_norm": 0.197265625, "learning_rate": 0.0009282295653788963, "loss": 2.1147, "step": 28633 }, { "epoch": 0.7684091884929154, "grad_norm": 0.197265625, "learning_rate": 0.0009282060406492717, "loss": 2.0193, "step": 28634 }, { "epoch": 0.7684360240446544, "grad_norm": 0.201171875, "learning_rate": 0.0009281825151996597, "loss": 1.9632, "step": 28635 }, { "epoch": 0.7684628595963933, "grad_norm": 0.1962890625, "learning_rate": 0.0009281589890301119, "loss": 2.0086, "step": 28636 }, { "epoch": 0.7684896951481323, "grad_norm": 0.1982421875, "learning_rate": 0.0009281354621406795, "loss": 2.0422, "step": 28637 }, { "epoch": 0.7685165306998711, "grad_norm": 0.1953125, "learning_rate": 0.0009281119345314148, "loss": 1.9964, "step": 28638 }, { "epoch": 0.7685433662516101, "grad_norm": 0.1953125, "learning_rate": 0.0009280884062023691, "loss": 1.949, "step": 28639 }, { "epoch": 0.7685702018033491, "grad_norm": 0.1943359375, "learning_rate": 0.0009280648771535937, "loss": 2.0509, "step": 28640 }, { "epoch": 0.768597037355088, "grad_norm": 0.1904296875, "learning_rate": 0.0009280413473851405, "loss": 1.9613, "step": 28641 }, { "epoch": 0.768623872906827, "grad_norm": 0.2001953125, "learning_rate": 0.0009280178168970613, "loss": 2.0408, "step": 28642 }, { "epoch": 0.7686507084585659, "grad_norm": 0.201171875, "learning_rate": 0.0009279942856894074, "loss": 2.0014, "step": 28643 }, { "epoch": 0.7686775440103049, "grad_norm": 0.201171875, "learning_rate": 0.0009279707537622306, "loss": 2.0074, "step": 28644 }, { "epoch": 0.7687043795620438, "grad_norm": 0.1962890625, "learning_rate": 0.0009279472211155822, "loss": 1.9681, "step": 28645 }, { "epoch": 0.7687312151137827, "grad_norm": 0.19921875, "learning_rate": 0.0009279236877495143, "loss": 1.9777, "step": 28646 }, { "epoch": 0.7687580506655217, "grad_norm": 0.193359375, "learning_rate": 0.0009279001536640783, "loss": 1.9955, "step": 28647 }, { "epoch": 0.7687848862172606, "grad_norm": 0.2080078125, "learning_rate": 0.000927876618859326, "loss": 2.1228, "step": 28648 }, { "epoch": 0.7688117217689996, "grad_norm": 0.2041015625, "learning_rate": 0.0009278530833353086, "loss": 2.0629, "step": 28649 }, { "epoch": 0.7688385573207385, "grad_norm": 0.203125, "learning_rate": 0.0009278295470920779, "loss": 2.0201, "step": 28650 }, { "epoch": 0.7688653928724775, "grad_norm": 0.19921875, "learning_rate": 0.0009278060101296857, "loss": 2.0444, "step": 28651 }, { "epoch": 0.7688922284242165, "grad_norm": 0.19921875, "learning_rate": 0.0009277824724481837, "loss": 1.9942, "step": 28652 }, { "epoch": 0.7689190639759553, "grad_norm": 0.193359375, "learning_rate": 0.0009277589340476233, "loss": 1.9761, "step": 28653 }, { "epoch": 0.7689458995276943, "grad_norm": 0.19921875, "learning_rate": 0.0009277353949280562, "loss": 2.0209, "step": 28654 }, { "epoch": 0.7689727350794332, "grad_norm": 0.1953125, "learning_rate": 0.0009277118550895342, "loss": 2.0195, "step": 28655 }, { "epoch": 0.7689995706311722, "grad_norm": 0.193359375, "learning_rate": 0.0009276883145321088, "loss": 2.0032, "step": 28656 }, { "epoch": 0.7690264061829111, "grad_norm": 0.1943359375, "learning_rate": 0.0009276647732558314, "loss": 2.0457, "step": 28657 }, { "epoch": 0.7690532417346501, "grad_norm": 0.1953125, "learning_rate": 0.0009276412312607541, "loss": 2.0089, "step": 28658 }, { "epoch": 0.769080077286389, "grad_norm": 0.19921875, "learning_rate": 0.0009276176885469283, "loss": 1.9715, "step": 28659 }, { "epoch": 0.7691069128381279, "grad_norm": 0.197265625, "learning_rate": 0.0009275941451144056, "loss": 2.0649, "step": 28660 }, { "epoch": 0.7691337483898669, "grad_norm": 0.2021484375, "learning_rate": 0.0009275706009632377, "loss": 2.041, "step": 28661 }, { "epoch": 0.7691605839416058, "grad_norm": 0.1953125, "learning_rate": 0.0009275470560934766, "loss": 1.9581, "step": 28662 }, { "epoch": 0.7691874194933448, "grad_norm": 0.201171875, "learning_rate": 0.0009275235105051733, "loss": 1.9811, "step": 28663 }, { "epoch": 0.7692142550450837, "grad_norm": 0.1953125, "learning_rate": 0.0009274999641983799, "loss": 2.0354, "step": 28664 }, { "epoch": 0.7692410905968227, "grad_norm": 0.1923828125, "learning_rate": 0.0009274764171731478, "loss": 1.9872, "step": 28665 }, { "epoch": 0.7692679261485617, "grad_norm": 0.205078125, "learning_rate": 0.0009274528694295289, "loss": 2.0728, "step": 28666 }, { "epoch": 0.7692947617003005, "grad_norm": 0.2041015625, "learning_rate": 0.0009274293209675748, "loss": 2.0619, "step": 28667 }, { "epoch": 0.7693215972520395, "grad_norm": 0.1962890625, "learning_rate": 0.0009274057717873368, "loss": 2.0677, "step": 28668 }, { "epoch": 0.7693484328037784, "grad_norm": 0.1953125, "learning_rate": 0.0009273822218888672, "loss": 2.0512, "step": 28669 }, { "epoch": 0.7693752683555174, "grad_norm": 0.1982421875, "learning_rate": 0.0009273586712722173, "loss": 1.9936, "step": 28670 }, { "epoch": 0.7694021039072564, "grad_norm": 0.2021484375, "learning_rate": 0.0009273351199374385, "loss": 1.9793, "step": 28671 }, { "epoch": 0.7694289394589953, "grad_norm": 0.193359375, "learning_rate": 0.000927311567884583, "loss": 1.9949, "step": 28672 }, { "epoch": 0.7694557750107343, "grad_norm": 0.1982421875, "learning_rate": 0.0009272880151137021, "loss": 2.0627, "step": 28673 }, { "epoch": 0.7694826105624731, "grad_norm": 0.1982421875, "learning_rate": 0.0009272644616248477, "loss": 2.0324, "step": 28674 }, { "epoch": 0.7695094461142121, "grad_norm": 0.1982421875, "learning_rate": 0.0009272409074180713, "loss": 1.8948, "step": 28675 }, { "epoch": 0.769536281665951, "grad_norm": 0.1962890625, "learning_rate": 0.0009272173524934247, "loss": 2.0185, "step": 28676 }, { "epoch": 0.76956311721769, "grad_norm": 0.2109375, "learning_rate": 0.0009271937968509593, "loss": 2.008, "step": 28677 }, { "epoch": 0.769589952769429, "grad_norm": 0.1943359375, "learning_rate": 0.0009271702404907271, "loss": 1.9879, "step": 28678 }, { "epoch": 0.7696167883211679, "grad_norm": 0.201171875, "learning_rate": 0.0009271466834127796, "loss": 2.0476, "step": 28679 }, { "epoch": 0.7696436238729069, "grad_norm": 0.20703125, "learning_rate": 0.0009271231256171687, "loss": 2.1087, "step": 28680 }, { "epoch": 0.7696704594246457, "grad_norm": 0.1904296875, "learning_rate": 0.0009270995671039456, "loss": 1.9949, "step": 28681 }, { "epoch": 0.7696972949763847, "grad_norm": 0.205078125, "learning_rate": 0.0009270760078731625, "loss": 2.038, "step": 28682 }, { "epoch": 0.7697241305281236, "grad_norm": 0.2001953125, "learning_rate": 0.0009270524479248707, "loss": 2.0384, "step": 28683 }, { "epoch": 0.7697509660798626, "grad_norm": 0.1904296875, "learning_rate": 0.0009270288872591223, "loss": 1.9291, "step": 28684 }, { "epoch": 0.7697778016316016, "grad_norm": 0.193359375, "learning_rate": 0.0009270053258759686, "loss": 2.0055, "step": 28685 }, { "epoch": 0.7698046371833405, "grad_norm": 0.1953125, "learning_rate": 0.0009269817637754614, "loss": 1.9342, "step": 28686 }, { "epoch": 0.7698314727350795, "grad_norm": 0.203125, "learning_rate": 0.0009269582009576525, "loss": 2.0371, "step": 28687 }, { "epoch": 0.7698583082868183, "grad_norm": 0.1923828125, "learning_rate": 0.0009269346374225934, "loss": 2.0168, "step": 28688 }, { "epoch": 0.7698851438385573, "grad_norm": 0.1982421875, "learning_rate": 0.0009269110731703359, "loss": 2.0055, "step": 28689 }, { "epoch": 0.7699119793902963, "grad_norm": 0.1923828125, "learning_rate": 0.0009268875082009317, "loss": 1.9649, "step": 28690 }, { "epoch": 0.7699388149420352, "grad_norm": 0.1962890625, "learning_rate": 0.0009268639425144324, "loss": 2.0012, "step": 28691 }, { "epoch": 0.7699656504937742, "grad_norm": 0.1953125, "learning_rate": 0.0009268403761108899, "loss": 2.0641, "step": 28692 }, { "epoch": 0.7699924860455131, "grad_norm": 0.2021484375, "learning_rate": 0.0009268168089903557, "loss": 1.9899, "step": 28693 }, { "epoch": 0.7700193215972521, "grad_norm": 0.1943359375, "learning_rate": 0.0009267932411528816, "loss": 2.002, "step": 28694 }, { "epoch": 0.7700461571489909, "grad_norm": 0.1982421875, "learning_rate": 0.0009267696725985193, "loss": 2.008, "step": 28695 }, { "epoch": 0.7700729927007299, "grad_norm": 0.1953125, "learning_rate": 0.0009267461033273203, "loss": 2.0242, "step": 28696 }, { "epoch": 0.7700998282524689, "grad_norm": 0.1962890625, "learning_rate": 0.0009267225333393367, "loss": 1.9891, "step": 28697 }, { "epoch": 0.7701266638042078, "grad_norm": 0.197265625, "learning_rate": 0.00092669896263462, "loss": 1.9506, "step": 28698 }, { "epoch": 0.7701534993559468, "grad_norm": 0.2041015625, "learning_rate": 0.0009266753912132218, "loss": 2.1183, "step": 28699 }, { "epoch": 0.7701803349076857, "grad_norm": 0.1982421875, "learning_rate": 0.000926651819075194, "loss": 2.0414, "step": 28700 }, { "epoch": 0.7702071704594247, "grad_norm": 0.1962890625, "learning_rate": 0.0009266282462205881, "loss": 1.974, "step": 28701 }, { "epoch": 0.7702340060111635, "grad_norm": 0.193359375, "learning_rate": 0.0009266046726494558, "loss": 2.0073, "step": 28702 }, { "epoch": 0.7702608415629025, "grad_norm": 0.2001953125, "learning_rate": 0.0009265810983618492, "loss": 2.0284, "step": 28703 }, { "epoch": 0.7702876771146415, "grad_norm": 0.201171875, "learning_rate": 0.0009265575233578195, "loss": 2.0191, "step": 28704 }, { "epoch": 0.7703145126663804, "grad_norm": 0.2041015625, "learning_rate": 0.0009265339476374188, "loss": 2.0334, "step": 28705 }, { "epoch": 0.7703413482181194, "grad_norm": 0.189453125, "learning_rate": 0.0009265103712006988, "loss": 1.8943, "step": 28706 }, { "epoch": 0.7703681837698583, "grad_norm": 0.197265625, "learning_rate": 0.0009264867940477108, "loss": 1.9109, "step": 28707 }, { "epoch": 0.7703950193215973, "grad_norm": 0.1982421875, "learning_rate": 0.000926463216178507, "loss": 2.0023, "step": 28708 }, { "epoch": 0.7704218548733361, "grad_norm": 0.1982421875, "learning_rate": 0.0009264396375931391, "loss": 2.0892, "step": 28709 }, { "epoch": 0.7704486904250751, "grad_norm": 0.189453125, "learning_rate": 0.0009264160582916585, "loss": 1.9898, "step": 28710 }, { "epoch": 0.7704755259768141, "grad_norm": 0.205078125, "learning_rate": 0.0009263924782741173, "loss": 2.0107, "step": 28711 }, { "epoch": 0.770502361528553, "grad_norm": 0.1943359375, "learning_rate": 0.0009263688975405668, "loss": 2.0311, "step": 28712 }, { "epoch": 0.770529197080292, "grad_norm": 0.2021484375, "learning_rate": 0.0009263453160910589, "loss": 2.0797, "step": 28713 }, { "epoch": 0.7705560326320309, "grad_norm": 0.1982421875, "learning_rate": 0.0009263217339256456, "loss": 2.0071, "step": 28714 }, { "epoch": 0.7705828681837699, "grad_norm": 0.1923828125, "learning_rate": 0.0009262981510443784, "loss": 1.9206, "step": 28715 }, { "epoch": 0.7706097037355089, "grad_norm": 0.1943359375, "learning_rate": 0.000926274567447309, "loss": 2.0356, "step": 28716 }, { "epoch": 0.7706365392872477, "grad_norm": 0.197265625, "learning_rate": 0.0009262509831344892, "loss": 1.9517, "step": 28717 }, { "epoch": 0.7706633748389867, "grad_norm": 0.1982421875, "learning_rate": 0.0009262273981059706, "loss": 2.0865, "step": 28718 }, { "epoch": 0.7706902103907256, "grad_norm": 0.1943359375, "learning_rate": 0.0009262038123618051, "loss": 1.9702, "step": 28719 }, { "epoch": 0.7707170459424646, "grad_norm": 0.19921875, "learning_rate": 0.0009261802259020447, "loss": 1.9598, "step": 28720 }, { "epoch": 0.7707438814942035, "grad_norm": 0.19140625, "learning_rate": 0.0009261566387267405, "loss": 1.9179, "step": 28721 }, { "epoch": 0.7707707170459425, "grad_norm": 0.19140625, "learning_rate": 0.0009261330508359447, "loss": 1.9442, "step": 28722 }, { "epoch": 0.7707975525976815, "grad_norm": 0.1982421875, "learning_rate": 0.0009261094622297091, "loss": 2.1038, "step": 28723 }, { "epoch": 0.7708243881494203, "grad_norm": 0.2001953125, "learning_rate": 0.000926085872908085, "loss": 2.0331, "step": 28724 }, { "epoch": 0.7708512237011593, "grad_norm": 0.1953125, "learning_rate": 0.0009260622828711248, "loss": 1.9524, "step": 28725 }, { "epoch": 0.7708780592528982, "grad_norm": 0.1875, "learning_rate": 0.0009260386921188795, "loss": 1.9153, "step": 28726 }, { "epoch": 0.7709048948046372, "grad_norm": 0.1953125, "learning_rate": 0.0009260151006514015, "loss": 2.0406, "step": 28727 }, { "epoch": 0.7709317303563761, "grad_norm": 0.19140625, "learning_rate": 0.0009259915084687421, "loss": 1.9472, "step": 28728 }, { "epoch": 0.7709585659081151, "grad_norm": 0.197265625, "learning_rate": 0.0009259679155709533, "loss": 1.9509, "step": 28729 }, { "epoch": 0.7709854014598541, "grad_norm": 0.2001953125, "learning_rate": 0.0009259443219580868, "loss": 2.0614, "step": 28730 }, { "epoch": 0.7710122370115929, "grad_norm": 0.1962890625, "learning_rate": 0.0009259207276301944, "loss": 1.9352, "step": 28731 }, { "epoch": 0.7710390725633319, "grad_norm": 0.1904296875, "learning_rate": 0.0009258971325873278, "loss": 1.9553, "step": 28732 }, { "epoch": 0.7710659081150708, "grad_norm": 0.1943359375, "learning_rate": 0.0009258735368295386, "loss": 1.9791, "step": 28733 }, { "epoch": 0.7710927436668098, "grad_norm": 0.1943359375, "learning_rate": 0.0009258499403568789, "loss": 1.9582, "step": 28734 }, { "epoch": 0.7711195792185487, "grad_norm": 0.1962890625, "learning_rate": 0.0009258263431694003, "loss": 2.0244, "step": 28735 }, { "epoch": 0.7711464147702877, "grad_norm": 0.2041015625, "learning_rate": 0.0009258027452671545, "loss": 2.0292, "step": 28736 }, { "epoch": 0.7711732503220267, "grad_norm": 0.1953125, "learning_rate": 0.0009257791466501933, "loss": 1.9954, "step": 28737 }, { "epoch": 0.7712000858737655, "grad_norm": 0.1982421875, "learning_rate": 0.0009257555473185687, "loss": 2.0671, "step": 28738 }, { "epoch": 0.7712269214255045, "grad_norm": 0.197265625, "learning_rate": 0.000925731947272332, "loss": 2.0029, "step": 28739 }, { "epoch": 0.7712537569772434, "grad_norm": 0.19140625, "learning_rate": 0.0009257083465115354, "loss": 1.9014, "step": 28740 }, { "epoch": 0.7712805925289824, "grad_norm": 0.19921875, "learning_rate": 0.0009256847450362305, "loss": 2.0523, "step": 28741 }, { "epoch": 0.7713074280807214, "grad_norm": 0.1923828125, "learning_rate": 0.0009256611428464691, "loss": 1.9646, "step": 28742 }, { "epoch": 0.7713342636324603, "grad_norm": 0.1962890625, "learning_rate": 0.000925637539942303, "loss": 1.8906, "step": 28743 }, { "epoch": 0.7713610991841993, "grad_norm": 0.201171875, "learning_rate": 0.0009256139363237838, "loss": 1.9888, "step": 28744 }, { "epoch": 0.7713879347359381, "grad_norm": 0.2001953125, "learning_rate": 0.0009255903319909635, "loss": 2.0739, "step": 28745 }, { "epoch": 0.7714147702876771, "grad_norm": 0.1982421875, "learning_rate": 0.0009255667269438938, "loss": 1.9619, "step": 28746 }, { "epoch": 0.771441605839416, "grad_norm": 0.203125, "learning_rate": 0.0009255431211826264, "loss": 1.987, "step": 28747 }, { "epoch": 0.771468441391155, "grad_norm": 0.201171875, "learning_rate": 0.0009255195147072132, "loss": 2.0707, "step": 28748 }, { "epoch": 0.771495276942894, "grad_norm": 0.2001953125, "learning_rate": 0.0009254959075177061, "loss": 2.1337, "step": 28749 }, { "epoch": 0.7715221124946329, "grad_norm": 0.1943359375, "learning_rate": 0.0009254722996141567, "loss": 1.9377, "step": 28750 }, { "epoch": 0.7715489480463719, "grad_norm": 0.1943359375, "learning_rate": 0.0009254486909966169, "loss": 2.0566, "step": 28751 }, { "epoch": 0.7715757835981107, "grad_norm": 0.193359375, "learning_rate": 0.0009254250816651382, "loss": 2.0478, "step": 28752 }, { "epoch": 0.7716026191498497, "grad_norm": 0.19140625, "learning_rate": 0.0009254014716197726, "loss": 1.9815, "step": 28753 }, { "epoch": 0.7716294547015886, "grad_norm": 0.1953125, "learning_rate": 0.0009253778608605721, "loss": 1.9484, "step": 28754 }, { "epoch": 0.7716562902533276, "grad_norm": 0.193359375, "learning_rate": 0.0009253542493875882, "loss": 2.0156, "step": 28755 }, { "epoch": 0.7716831258050666, "grad_norm": 0.1982421875, "learning_rate": 0.0009253306372008727, "loss": 2.0712, "step": 28756 }, { "epoch": 0.7717099613568055, "grad_norm": 0.193359375, "learning_rate": 0.0009253070243004777, "loss": 2.0303, "step": 28757 }, { "epoch": 0.7717367969085445, "grad_norm": 0.19921875, "learning_rate": 0.0009252834106864546, "loss": 2.119, "step": 28758 }, { "epoch": 0.7717636324602833, "grad_norm": 0.1962890625, "learning_rate": 0.0009252597963588556, "loss": 1.9957, "step": 28759 }, { "epoch": 0.7717904680120223, "grad_norm": 0.1953125, "learning_rate": 0.0009252361813177322, "loss": 1.9559, "step": 28760 }, { "epoch": 0.7718173035637613, "grad_norm": 0.19140625, "learning_rate": 0.0009252125655631362, "loss": 1.9187, "step": 28761 }, { "epoch": 0.7718441391155002, "grad_norm": 0.19140625, "learning_rate": 0.0009251889490951197, "loss": 2.0174, "step": 28762 }, { "epoch": 0.7718709746672392, "grad_norm": 0.1923828125, "learning_rate": 0.0009251653319137342, "loss": 1.9897, "step": 28763 }, { "epoch": 0.7718978102189781, "grad_norm": 0.1923828125, "learning_rate": 0.0009251417140190317, "loss": 1.9648, "step": 28764 }, { "epoch": 0.7719246457707171, "grad_norm": 0.2001953125, "learning_rate": 0.0009251180954110639, "loss": 2.0211, "step": 28765 }, { "epoch": 0.771951481322456, "grad_norm": 0.19921875, "learning_rate": 0.0009250944760898826, "loss": 2.0502, "step": 28766 }, { "epoch": 0.7719783168741949, "grad_norm": 0.197265625, "learning_rate": 0.0009250708560555398, "loss": 1.998, "step": 28767 }, { "epoch": 0.7720051524259339, "grad_norm": 0.2021484375, "learning_rate": 0.0009250472353080871, "loss": 2.0226, "step": 28768 }, { "epoch": 0.7720319879776728, "grad_norm": 0.1904296875, "learning_rate": 0.0009250236138475763, "loss": 1.9164, "step": 28769 }, { "epoch": 0.7720588235294118, "grad_norm": 0.19140625, "learning_rate": 0.0009249999916740594, "loss": 1.9823, "step": 28770 }, { "epoch": 0.7720856590811507, "grad_norm": 0.1953125, "learning_rate": 0.0009249763687875881, "loss": 2.0262, "step": 28771 }, { "epoch": 0.7721124946328897, "grad_norm": 0.197265625, "learning_rate": 0.0009249527451882142, "loss": 2.0234, "step": 28772 }, { "epoch": 0.7721393301846285, "grad_norm": 0.2001953125, "learning_rate": 0.0009249291208759898, "loss": 2.1228, "step": 28773 }, { "epoch": 0.7721661657363675, "grad_norm": 0.2001953125, "learning_rate": 0.0009249054958509663, "loss": 1.9692, "step": 28774 }, { "epoch": 0.7721930012881065, "grad_norm": 0.19921875, "learning_rate": 0.0009248818701131959, "loss": 2.0612, "step": 28775 }, { "epoch": 0.7722198368398454, "grad_norm": 0.189453125, "learning_rate": 0.00092485824366273, "loss": 2.0116, "step": 28776 }, { "epoch": 0.7722466723915844, "grad_norm": 0.197265625, "learning_rate": 0.0009248346164996209, "loss": 1.9863, "step": 28777 }, { "epoch": 0.7722735079433233, "grad_norm": 0.19140625, "learning_rate": 0.0009248109886239202, "loss": 2.0156, "step": 28778 }, { "epoch": 0.7723003434950623, "grad_norm": 0.205078125, "learning_rate": 0.0009247873600356797, "loss": 2.0865, "step": 28779 }, { "epoch": 0.7723271790468011, "grad_norm": 0.2021484375, "learning_rate": 0.0009247637307349513, "loss": 2.0037, "step": 28780 }, { "epoch": 0.7723540145985401, "grad_norm": 0.2001953125, "learning_rate": 0.0009247401007217869, "loss": 2.0303, "step": 28781 }, { "epoch": 0.7723808501502791, "grad_norm": 0.19140625, "learning_rate": 0.0009247164699962381, "loss": 2.0263, "step": 28782 }, { "epoch": 0.772407685702018, "grad_norm": 0.19140625, "learning_rate": 0.000924692838558357, "loss": 2.0178, "step": 28783 }, { "epoch": 0.772434521253757, "grad_norm": 0.1943359375, "learning_rate": 0.0009246692064081953, "loss": 2.0573, "step": 28784 }, { "epoch": 0.7724613568054959, "grad_norm": 0.197265625, "learning_rate": 0.000924645573545805, "loss": 2.053, "step": 28785 }, { "epoch": 0.7724881923572349, "grad_norm": 0.1953125, "learning_rate": 0.0009246219399712377, "loss": 1.9672, "step": 28786 }, { "epoch": 0.7725150279089739, "grad_norm": 0.19140625, "learning_rate": 0.0009245983056845454, "loss": 1.9583, "step": 28787 }, { "epoch": 0.7725418634607127, "grad_norm": 0.193359375, "learning_rate": 0.00092457467068578, "loss": 2.1592, "step": 28788 }, { "epoch": 0.7725686990124517, "grad_norm": 0.201171875, "learning_rate": 0.000924551034974993, "loss": 1.9965, "step": 28789 }, { "epoch": 0.7725955345641906, "grad_norm": 0.2001953125, "learning_rate": 0.0009245273985522368, "loss": 2.0494, "step": 28790 }, { "epoch": 0.7726223701159296, "grad_norm": 0.1982421875, "learning_rate": 0.0009245037614175628, "loss": 2.0387, "step": 28791 }, { "epoch": 0.7726492056676685, "grad_norm": 0.201171875, "learning_rate": 0.0009244801235710231, "loss": 2.0225, "step": 28792 }, { "epoch": 0.7726760412194075, "grad_norm": 0.2001953125, "learning_rate": 0.0009244564850126695, "loss": 1.9932, "step": 28793 }, { "epoch": 0.7727028767711465, "grad_norm": 0.1962890625, "learning_rate": 0.0009244328457425537, "loss": 2.0293, "step": 28794 }, { "epoch": 0.7727297123228853, "grad_norm": 0.193359375, "learning_rate": 0.0009244092057607277, "loss": 2.0206, "step": 28795 }, { "epoch": 0.7727565478746243, "grad_norm": 0.1943359375, "learning_rate": 0.0009243855650672434, "loss": 2.0361, "step": 28796 }, { "epoch": 0.7727833834263632, "grad_norm": 0.1962890625, "learning_rate": 0.0009243619236621527, "loss": 2.0138, "step": 28797 }, { "epoch": 0.7728102189781022, "grad_norm": 0.1923828125, "learning_rate": 0.0009243382815455072, "loss": 2.0653, "step": 28798 }, { "epoch": 0.7728370545298411, "grad_norm": 0.2021484375, "learning_rate": 0.0009243146387173591, "loss": 2.0741, "step": 28799 }, { "epoch": 0.7728638900815801, "grad_norm": 0.2001953125, "learning_rate": 0.0009242909951777599, "loss": 2.0493, "step": 28800 }, { "epoch": 0.7728907256333191, "grad_norm": 0.1904296875, "learning_rate": 0.0009242673509267617, "loss": 2.0443, "step": 28801 }, { "epoch": 0.7729175611850579, "grad_norm": 0.1953125, "learning_rate": 0.0009242437059644166, "loss": 2.0781, "step": 28802 }, { "epoch": 0.7729443967367969, "grad_norm": 0.1953125, "learning_rate": 0.0009242200602907759, "loss": 2.0048, "step": 28803 }, { "epoch": 0.7729712322885358, "grad_norm": 0.1943359375, "learning_rate": 0.0009241964139058918, "loss": 1.9553, "step": 28804 }, { "epoch": 0.7729980678402748, "grad_norm": 0.1923828125, "learning_rate": 0.0009241727668098162, "loss": 2.0349, "step": 28805 }, { "epoch": 0.7730249033920137, "grad_norm": 0.2021484375, "learning_rate": 0.000924149119002601, "loss": 2.0068, "step": 28806 }, { "epoch": 0.7730517389437527, "grad_norm": 0.19140625, "learning_rate": 0.000924125470484298, "loss": 1.9209, "step": 28807 }, { "epoch": 0.7730785744954917, "grad_norm": 0.193359375, "learning_rate": 0.0009241018212549591, "loss": 2.0277, "step": 28808 }, { "epoch": 0.7731054100472305, "grad_norm": 0.19140625, "learning_rate": 0.000924078171314636, "loss": 2.033, "step": 28809 }, { "epoch": 0.7731322455989695, "grad_norm": 0.1962890625, "learning_rate": 0.000924054520663381, "loss": 1.9882, "step": 28810 }, { "epoch": 0.7731590811507084, "grad_norm": 0.1923828125, "learning_rate": 0.0009240308693012454, "loss": 1.9465, "step": 28811 }, { "epoch": 0.7731859167024474, "grad_norm": 0.1904296875, "learning_rate": 0.0009240072172282816, "loss": 1.9894, "step": 28812 }, { "epoch": 0.7732127522541864, "grad_norm": 0.1943359375, "learning_rate": 0.0009239835644445412, "loss": 1.9406, "step": 28813 }, { "epoch": 0.7732395878059253, "grad_norm": 0.2060546875, "learning_rate": 0.0009239599109500762, "loss": 2.0781, "step": 28814 }, { "epoch": 0.7732664233576643, "grad_norm": 0.2001953125, "learning_rate": 0.0009239362567449384, "loss": 1.9834, "step": 28815 }, { "epoch": 0.7732932589094031, "grad_norm": 0.203125, "learning_rate": 0.00092391260182918, "loss": 2.0754, "step": 28816 }, { "epoch": 0.7733200944611421, "grad_norm": 0.2021484375, "learning_rate": 0.0009238889462028525, "loss": 1.9996, "step": 28817 }, { "epoch": 0.773346930012881, "grad_norm": 0.193359375, "learning_rate": 0.0009238652898660079, "loss": 1.9948, "step": 28818 }, { "epoch": 0.77337376556462, "grad_norm": 0.1962890625, "learning_rate": 0.0009238416328186981, "loss": 2.0145, "step": 28819 }, { "epoch": 0.773400601116359, "grad_norm": 0.19140625, "learning_rate": 0.0009238179750609751, "loss": 2.0743, "step": 28820 }, { "epoch": 0.7734274366680979, "grad_norm": 0.201171875, "learning_rate": 0.0009237943165928907, "loss": 2.1056, "step": 28821 }, { "epoch": 0.7734542722198369, "grad_norm": 0.1943359375, "learning_rate": 0.0009237706574144968, "loss": 2.0908, "step": 28822 }, { "epoch": 0.7734811077715757, "grad_norm": 0.1943359375, "learning_rate": 0.0009237469975258455, "loss": 2.0644, "step": 28823 }, { "epoch": 0.7735079433233147, "grad_norm": 0.1953125, "learning_rate": 0.0009237233369269883, "loss": 2.0819, "step": 28824 }, { "epoch": 0.7735347788750536, "grad_norm": 0.1982421875, "learning_rate": 0.0009236996756179775, "loss": 2.0213, "step": 28825 }, { "epoch": 0.7735616144267926, "grad_norm": 0.1953125, "learning_rate": 0.0009236760135988649, "loss": 2.0028, "step": 28826 }, { "epoch": 0.7735884499785316, "grad_norm": 0.1953125, "learning_rate": 0.0009236523508697022, "loss": 2.1337, "step": 28827 }, { "epoch": 0.7736152855302705, "grad_norm": 0.1962890625, "learning_rate": 0.0009236286874305415, "loss": 2.0539, "step": 28828 }, { "epoch": 0.7736421210820095, "grad_norm": 0.193359375, "learning_rate": 0.0009236050232814348, "loss": 2.0546, "step": 28829 }, { "epoch": 0.7736689566337484, "grad_norm": 0.1962890625, "learning_rate": 0.0009235813584224339, "loss": 1.9985, "step": 28830 }, { "epoch": 0.7736957921854873, "grad_norm": 0.1962890625, "learning_rate": 0.0009235576928535903, "loss": 2.0297, "step": 28831 }, { "epoch": 0.7737226277372263, "grad_norm": 0.1962890625, "learning_rate": 0.0009235340265749567, "loss": 1.9918, "step": 28832 }, { "epoch": 0.7737494632889652, "grad_norm": 0.1884765625, "learning_rate": 0.0009235103595865845, "loss": 1.9837, "step": 28833 }, { "epoch": 0.7737762988407042, "grad_norm": 0.201171875, "learning_rate": 0.0009234866918885257, "loss": 2.0756, "step": 28834 }, { "epoch": 0.7738031343924431, "grad_norm": 0.193359375, "learning_rate": 0.0009234630234808325, "loss": 1.9725, "step": 28835 }, { "epoch": 0.7738299699441821, "grad_norm": 0.2001953125, "learning_rate": 0.0009234393543635564, "loss": 1.9857, "step": 28836 }, { "epoch": 0.773856805495921, "grad_norm": 0.19921875, "learning_rate": 0.0009234156845367496, "loss": 1.991, "step": 28837 }, { "epoch": 0.7738836410476599, "grad_norm": 0.193359375, "learning_rate": 0.0009233920140004639, "loss": 1.9976, "step": 28838 }, { "epoch": 0.7739104765993989, "grad_norm": 0.205078125, "learning_rate": 0.0009233683427547512, "loss": 2.1012, "step": 28839 }, { "epoch": 0.7739373121511378, "grad_norm": 0.203125, "learning_rate": 0.0009233446707996634, "loss": 1.9735, "step": 28840 }, { "epoch": 0.7739641477028768, "grad_norm": 0.193359375, "learning_rate": 0.0009233209981352529, "loss": 2.0028, "step": 28841 }, { "epoch": 0.7739909832546157, "grad_norm": 0.19140625, "learning_rate": 0.0009232973247615708, "loss": 2.0597, "step": 28842 }, { "epoch": 0.7740178188063547, "grad_norm": 0.1953125, "learning_rate": 0.0009232736506786699, "loss": 2.0477, "step": 28843 }, { "epoch": 0.7740446543580936, "grad_norm": 0.1923828125, "learning_rate": 0.0009232499758866015, "loss": 2.0061, "step": 28844 }, { "epoch": 0.7740714899098325, "grad_norm": 0.1953125, "learning_rate": 0.0009232263003854177, "loss": 1.9877, "step": 28845 }, { "epoch": 0.7740983254615715, "grad_norm": 0.1923828125, "learning_rate": 0.0009232026241751706, "loss": 2.082, "step": 28846 }, { "epoch": 0.7741251610133104, "grad_norm": 0.19140625, "learning_rate": 0.000923178947255912, "loss": 2.006, "step": 28847 }, { "epoch": 0.7741519965650494, "grad_norm": 0.193359375, "learning_rate": 0.0009231552696276939, "loss": 1.9983, "step": 28848 }, { "epoch": 0.7741788321167883, "grad_norm": 0.189453125, "learning_rate": 0.0009231315912905683, "loss": 1.9636, "step": 28849 }, { "epoch": 0.7742056676685273, "grad_norm": 0.1904296875, "learning_rate": 0.0009231079122445868, "loss": 2.019, "step": 28850 }, { "epoch": 0.7742325032202662, "grad_norm": 0.1953125, "learning_rate": 0.0009230842324898017, "loss": 2.0293, "step": 28851 }, { "epoch": 0.7742593387720051, "grad_norm": 0.1953125, "learning_rate": 0.0009230605520262648, "loss": 1.9706, "step": 28852 }, { "epoch": 0.7742861743237441, "grad_norm": 0.1953125, "learning_rate": 0.0009230368708540281, "loss": 1.9554, "step": 28853 }, { "epoch": 0.774313009875483, "grad_norm": 0.1962890625, "learning_rate": 0.0009230131889731438, "loss": 1.9691, "step": 28854 }, { "epoch": 0.774339845427222, "grad_norm": 0.1923828125, "learning_rate": 0.0009229895063836633, "loss": 1.979, "step": 28855 }, { "epoch": 0.7743666809789609, "grad_norm": 0.1953125, "learning_rate": 0.000922965823085639, "loss": 2.0211, "step": 28856 }, { "epoch": 0.7743935165306999, "grad_norm": 0.19921875, "learning_rate": 0.0009229421390791228, "loss": 2.0623, "step": 28857 }, { "epoch": 0.7744203520824389, "grad_norm": 0.1943359375, "learning_rate": 0.0009229184543641665, "loss": 1.9664, "step": 28858 }, { "epoch": 0.7744471876341777, "grad_norm": 0.19140625, "learning_rate": 0.0009228947689408219, "loss": 2.0188, "step": 28859 }, { "epoch": 0.7744740231859167, "grad_norm": 0.1982421875, "learning_rate": 0.0009228710828091413, "loss": 2.0338, "step": 28860 }, { "epoch": 0.7745008587376556, "grad_norm": 0.1962890625, "learning_rate": 0.0009228473959691766, "loss": 2.0455, "step": 28861 }, { "epoch": 0.7745276942893946, "grad_norm": 0.1982421875, "learning_rate": 0.0009228237084209796, "loss": 1.9442, "step": 28862 }, { "epoch": 0.7745545298411335, "grad_norm": 0.2001953125, "learning_rate": 0.0009228000201646025, "loss": 2.0523, "step": 28863 }, { "epoch": 0.7745813653928725, "grad_norm": 0.1982421875, "learning_rate": 0.000922776331200097, "loss": 2.0843, "step": 28864 }, { "epoch": 0.7746082009446115, "grad_norm": 0.2021484375, "learning_rate": 0.0009227526415275151, "loss": 2.1456, "step": 28865 }, { "epoch": 0.7746350364963503, "grad_norm": 0.1962890625, "learning_rate": 0.0009227289511469091, "loss": 1.9514, "step": 28866 }, { "epoch": 0.7746618720480893, "grad_norm": 0.1875, "learning_rate": 0.0009227052600583305, "loss": 2.024, "step": 28867 }, { "epoch": 0.7746887075998282, "grad_norm": 0.2041015625, "learning_rate": 0.0009226815682618316, "loss": 2.0855, "step": 28868 }, { "epoch": 0.7747155431515672, "grad_norm": 0.1884765625, "learning_rate": 0.0009226578757574645, "loss": 1.9363, "step": 28869 }, { "epoch": 0.7747423787033061, "grad_norm": 0.1953125, "learning_rate": 0.0009226341825452805, "loss": 2.0308, "step": 28870 }, { "epoch": 0.7747692142550451, "grad_norm": 0.189453125, "learning_rate": 0.0009226104886253324, "loss": 2.0017, "step": 28871 }, { "epoch": 0.7747960498067841, "grad_norm": 0.1982421875, "learning_rate": 0.0009225867939976716, "loss": 2.0996, "step": 28872 }, { "epoch": 0.774822885358523, "grad_norm": 0.1982421875, "learning_rate": 0.0009225630986623503, "loss": 2.1129, "step": 28873 }, { "epoch": 0.7748497209102619, "grad_norm": 0.193359375, "learning_rate": 0.0009225394026194205, "loss": 2.0609, "step": 28874 }, { "epoch": 0.7748765564620008, "grad_norm": 0.2001953125, "learning_rate": 0.0009225157058689342, "loss": 1.9224, "step": 28875 }, { "epoch": 0.7749033920137398, "grad_norm": 0.193359375, "learning_rate": 0.0009224920084109433, "loss": 1.9392, "step": 28876 }, { "epoch": 0.7749302275654787, "grad_norm": 0.1953125, "learning_rate": 0.0009224683102454997, "loss": 1.9967, "step": 28877 }, { "epoch": 0.7749570631172177, "grad_norm": 0.1923828125, "learning_rate": 0.0009224446113726555, "loss": 2.0173, "step": 28878 }, { "epoch": 0.7749838986689567, "grad_norm": 0.197265625, "learning_rate": 0.000922420911792463, "loss": 2.0605, "step": 28879 }, { "epoch": 0.7750107342206956, "grad_norm": 0.197265625, "learning_rate": 0.0009223972115049736, "loss": 2.0534, "step": 28880 }, { "epoch": 0.7750375697724345, "grad_norm": 0.19921875, "learning_rate": 0.0009223735105102395, "loss": 2.0046, "step": 28881 }, { "epoch": 0.7750644053241734, "grad_norm": 0.1962890625, "learning_rate": 0.0009223498088083129, "loss": 2.0009, "step": 28882 }, { "epoch": 0.7750912408759124, "grad_norm": 0.1943359375, "learning_rate": 0.0009223261063992459, "loss": 1.9732, "step": 28883 }, { "epoch": 0.7751180764276514, "grad_norm": 0.1875, "learning_rate": 0.0009223024032830897, "loss": 2.0038, "step": 28884 }, { "epoch": 0.7751449119793903, "grad_norm": 0.205078125, "learning_rate": 0.0009222786994598972, "loss": 2.0063, "step": 28885 }, { "epoch": 0.7751717475311293, "grad_norm": 0.19921875, "learning_rate": 0.00092225499492972, "loss": 2.005, "step": 28886 }, { "epoch": 0.7751985830828682, "grad_norm": 0.2001953125, "learning_rate": 0.0009222312896926101, "loss": 2.0991, "step": 28887 }, { "epoch": 0.7752254186346071, "grad_norm": 0.197265625, "learning_rate": 0.0009222075837486196, "loss": 2.0155, "step": 28888 }, { "epoch": 0.775252254186346, "grad_norm": 0.1953125, "learning_rate": 0.0009221838770978005, "loss": 2.0771, "step": 28889 }, { "epoch": 0.775279089738085, "grad_norm": 0.1904296875, "learning_rate": 0.0009221601697402047, "loss": 1.9544, "step": 28890 }, { "epoch": 0.775305925289824, "grad_norm": 0.1982421875, "learning_rate": 0.0009221364616758842, "loss": 2.1032, "step": 28891 }, { "epoch": 0.7753327608415629, "grad_norm": 0.1904296875, "learning_rate": 0.0009221127529048911, "loss": 2.0557, "step": 28892 }, { "epoch": 0.7753595963933019, "grad_norm": 0.1953125, "learning_rate": 0.0009220890434272775, "loss": 2.0917, "step": 28893 }, { "epoch": 0.7753864319450408, "grad_norm": 0.1884765625, "learning_rate": 0.0009220653332430954, "loss": 2.0054, "step": 28894 }, { "epoch": 0.7754132674967797, "grad_norm": 0.19921875, "learning_rate": 0.0009220416223523964, "loss": 2.0114, "step": 28895 }, { "epoch": 0.7754401030485186, "grad_norm": 0.1962890625, "learning_rate": 0.000922017910755233, "loss": 2.0163, "step": 28896 }, { "epoch": 0.7754669386002576, "grad_norm": 0.201171875, "learning_rate": 0.000921994198451657, "loss": 2.0096, "step": 28897 }, { "epoch": 0.7754937741519966, "grad_norm": 0.1923828125, "learning_rate": 0.0009219704854417204, "loss": 2.0351, "step": 28898 }, { "epoch": 0.7755206097037355, "grad_norm": 0.1943359375, "learning_rate": 0.0009219467717254753, "loss": 2.0034, "step": 28899 }, { "epoch": 0.7755474452554745, "grad_norm": 0.2001953125, "learning_rate": 0.0009219230573029738, "loss": 2.0812, "step": 28900 }, { "epoch": 0.7755742808072134, "grad_norm": 0.1982421875, "learning_rate": 0.0009218993421742679, "loss": 2.0764, "step": 28901 }, { "epoch": 0.7756011163589523, "grad_norm": 0.1943359375, "learning_rate": 0.0009218756263394094, "loss": 2.0622, "step": 28902 }, { "epoch": 0.7756279519106913, "grad_norm": 0.197265625, "learning_rate": 0.0009218519097984505, "loss": 2.0175, "step": 28903 }, { "epoch": 0.7756547874624302, "grad_norm": 0.197265625, "learning_rate": 0.0009218281925514432, "loss": 2.0543, "step": 28904 }, { "epoch": 0.7756816230141692, "grad_norm": 0.2001953125, "learning_rate": 0.0009218044745984396, "loss": 2.0204, "step": 28905 }, { "epoch": 0.7757084585659081, "grad_norm": 0.201171875, "learning_rate": 0.0009217807559394914, "loss": 2.0276, "step": 28906 }, { "epoch": 0.7757352941176471, "grad_norm": 0.1923828125, "learning_rate": 0.0009217570365746511, "loss": 1.945, "step": 28907 }, { "epoch": 0.775762129669386, "grad_norm": 0.19921875, "learning_rate": 0.0009217333165039706, "loss": 2.0533, "step": 28908 }, { "epoch": 0.775788965221125, "grad_norm": 0.2021484375, "learning_rate": 0.0009217095957275017, "loss": 2.0386, "step": 28909 }, { "epoch": 0.7758158007728639, "grad_norm": 0.1923828125, "learning_rate": 0.0009216858742452968, "loss": 2.0273, "step": 28910 }, { "epoch": 0.7758426363246028, "grad_norm": 0.19140625, "learning_rate": 0.0009216621520574075, "loss": 1.9547, "step": 28911 }, { "epoch": 0.7758694718763418, "grad_norm": 0.19921875, "learning_rate": 0.0009216384291638862, "loss": 1.9887, "step": 28912 }, { "epoch": 0.7758963074280807, "grad_norm": 0.1962890625, "learning_rate": 0.0009216147055647847, "loss": 2.0397, "step": 28913 }, { "epoch": 0.7759231429798197, "grad_norm": 0.197265625, "learning_rate": 0.0009215909812601554, "loss": 2.052, "step": 28914 }, { "epoch": 0.7759499785315586, "grad_norm": 0.1884765625, "learning_rate": 0.0009215672562500498, "loss": 1.9989, "step": 28915 }, { "epoch": 0.7759768140832976, "grad_norm": 0.193359375, "learning_rate": 0.0009215435305345205, "loss": 2.0842, "step": 28916 }, { "epoch": 0.7760036496350365, "grad_norm": 0.197265625, "learning_rate": 0.000921519804113619, "loss": 2.0577, "step": 28917 }, { "epoch": 0.7760304851867754, "grad_norm": 0.1943359375, "learning_rate": 0.0009214960769873978, "loss": 2.0266, "step": 28918 }, { "epoch": 0.7760573207385144, "grad_norm": 0.197265625, "learning_rate": 0.000921472349155909, "loss": 2.1172, "step": 28919 }, { "epoch": 0.7760841562902533, "grad_norm": 0.1875, "learning_rate": 0.0009214486206192042, "loss": 1.9592, "step": 28920 }, { "epoch": 0.7761109918419923, "grad_norm": 0.1962890625, "learning_rate": 0.0009214248913773358, "loss": 2.0091, "step": 28921 }, { "epoch": 0.7761378273937312, "grad_norm": 0.1943359375, "learning_rate": 0.0009214011614303556, "loss": 2.0259, "step": 28922 }, { "epoch": 0.7761646629454702, "grad_norm": 0.197265625, "learning_rate": 0.0009213774307783159, "loss": 2.0209, "step": 28923 }, { "epoch": 0.7761914984972091, "grad_norm": 0.203125, "learning_rate": 0.0009213536994212687, "loss": 2.0073, "step": 28924 }, { "epoch": 0.776218334048948, "grad_norm": 0.1923828125, "learning_rate": 0.0009213299673592661, "loss": 2.0162, "step": 28925 }, { "epoch": 0.776245169600687, "grad_norm": 0.193359375, "learning_rate": 0.00092130623459236, "loss": 2.0828, "step": 28926 }, { "epoch": 0.7762720051524259, "grad_norm": 0.193359375, "learning_rate": 0.0009212825011206026, "loss": 1.9766, "step": 28927 }, { "epoch": 0.7762988407041649, "grad_norm": 0.1962890625, "learning_rate": 0.0009212587669440458, "loss": 2.0733, "step": 28928 }, { "epoch": 0.7763256762559039, "grad_norm": 0.1953125, "learning_rate": 0.0009212350320627417, "loss": 2.0172, "step": 28929 }, { "epoch": 0.7763525118076428, "grad_norm": 0.1884765625, "learning_rate": 0.0009212112964767428, "loss": 1.9586, "step": 28930 }, { "epoch": 0.7763793473593817, "grad_norm": 0.1962890625, "learning_rate": 0.0009211875601861005, "loss": 2.0865, "step": 28931 }, { "epoch": 0.7764061829111206, "grad_norm": 0.1962890625, "learning_rate": 0.0009211638231908674, "loss": 2.0058, "step": 28932 }, { "epoch": 0.7764330184628596, "grad_norm": 0.193359375, "learning_rate": 0.0009211400854910951, "loss": 1.9969, "step": 28933 }, { "epoch": 0.7764598540145985, "grad_norm": 0.193359375, "learning_rate": 0.0009211163470868362, "loss": 2.0095, "step": 28934 }, { "epoch": 0.7764866895663375, "grad_norm": 0.1953125, "learning_rate": 0.0009210926079781423, "loss": 2.0317, "step": 28935 }, { "epoch": 0.7765135251180765, "grad_norm": 0.197265625, "learning_rate": 0.0009210688681650659, "loss": 2.0962, "step": 28936 }, { "epoch": 0.7765403606698154, "grad_norm": 0.193359375, "learning_rate": 0.0009210451276476587, "loss": 1.9944, "step": 28937 }, { "epoch": 0.7765671962215543, "grad_norm": 0.19140625, "learning_rate": 0.0009210213864259731, "loss": 1.894, "step": 28938 }, { "epoch": 0.7765940317732932, "grad_norm": 0.1923828125, "learning_rate": 0.0009209976445000609, "loss": 2.0274, "step": 28939 }, { "epoch": 0.7766208673250322, "grad_norm": 0.19140625, "learning_rate": 0.0009209739018699742, "loss": 2.0188, "step": 28940 }, { "epoch": 0.7766477028767711, "grad_norm": 0.1962890625, "learning_rate": 0.0009209501585357654, "loss": 1.9929, "step": 28941 }, { "epoch": 0.7766745384285101, "grad_norm": 0.201171875, "learning_rate": 0.0009209264144974862, "loss": 2.0009, "step": 28942 }, { "epoch": 0.7767013739802491, "grad_norm": 0.1982421875, "learning_rate": 0.0009209026697551889, "loss": 2.0161, "step": 28943 }, { "epoch": 0.776728209531988, "grad_norm": 0.1953125, "learning_rate": 0.0009208789243089257, "loss": 2.0587, "step": 28944 }, { "epoch": 0.776755045083727, "grad_norm": 0.1953125, "learning_rate": 0.0009208551781587485, "loss": 1.9614, "step": 28945 }, { "epoch": 0.7767818806354658, "grad_norm": 0.1943359375, "learning_rate": 0.0009208314313047094, "loss": 2.0545, "step": 28946 }, { "epoch": 0.7768087161872048, "grad_norm": 0.197265625, "learning_rate": 0.0009208076837468604, "loss": 2.0565, "step": 28947 }, { "epoch": 0.7768355517389438, "grad_norm": 0.19140625, "learning_rate": 0.0009207839354852538, "loss": 1.9821, "step": 28948 }, { "epoch": 0.7768623872906827, "grad_norm": 0.193359375, "learning_rate": 0.0009207601865199418, "loss": 2.0778, "step": 28949 }, { "epoch": 0.7768892228424217, "grad_norm": 0.197265625, "learning_rate": 0.0009207364368509762, "loss": 1.967, "step": 28950 }, { "epoch": 0.7769160583941606, "grad_norm": 0.193359375, "learning_rate": 0.000920712686478409, "loss": 1.9365, "step": 28951 }, { "epoch": 0.7769428939458995, "grad_norm": 0.201171875, "learning_rate": 0.0009206889354022931, "loss": 2.0543, "step": 28952 }, { "epoch": 0.7769697294976384, "grad_norm": 0.19921875, "learning_rate": 0.0009206651836226795, "loss": 2.0604, "step": 28953 }, { "epoch": 0.7769965650493774, "grad_norm": 0.2041015625, "learning_rate": 0.0009206414311396209, "loss": 2.09, "step": 28954 }, { "epoch": 0.7770234006011164, "grad_norm": 0.205078125, "learning_rate": 0.0009206176779531694, "loss": 2.0539, "step": 28955 }, { "epoch": 0.7770502361528553, "grad_norm": 0.1904296875, "learning_rate": 0.0009205939240633772, "loss": 1.9463, "step": 28956 }, { "epoch": 0.7770770717045943, "grad_norm": 0.201171875, "learning_rate": 0.0009205701694702961, "loss": 2.0902, "step": 28957 }, { "epoch": 0.7771039072563332, "grad_norm": 0.193359375, "learning_rate": 0.0009205464141739783, "loss": 2.0223, "step": 28958 }, { "epoch": 0.7771307428080722, "grad_norm": 0.19140625, "learning_rate": 0.0009205226581744761, "loss": 2.0017, "step": 28959 }, { "epoch": 0.777157578359811, "grad_norm": 0.1943359375, "learning_rate": 0.0009204989014718414, "loss": 1.9977, "step": 28960 }, { "epoch": 0.77718441391155, "grad_norm": 0.197265625, "learning_rate": 0.0009204751440661264, "loss": 2.0715, "step": 28961 }, { "epoch": 0.777211249463289, "grad_norm": 0.1923828125, "learning_rate": 0.0009204513859573831, "loss": 2.0015, "step": 28962 }, { "epoch": 0.7772380850150279, "grad_norm": 0.19140625, "learning_rate": 0.0009204276271456641, "loss": 2.0276, "step": 28963 }, { "epoch": 0.7772649205667669, "grad_norm": 0.1982421875, "learning_rate": 0.0009204038676310208, "loss": 2.0591, "step": 28964 }, { "epoch": 0.7772917561185058, "grad_norm": 0.1923828125, "learning_rate": 0.0009203801074135058, "loss": 1.9563, "step": 28965 }, { "epoch": 0.7773185916702448, "grad_norm": 0.1943359375, "learning_rate": 0.0009203563464931712, "loss": 2.0297, "step": 28966 }, { "epoch": 0.7773454272219836, "grad_norm": 0.193359375, "learning_rate": 0.0009203325848700691, "loss": 2.0684, "step": 28967 }, { "epoch": 0.7773722627737226, "grad_norm": 0.1982421875, "learning_rate": 0.0009203088225442512, "loss": 2.059, "step": 28968 }, { "epoch": 0.7773990983254616, "grad_norm": 0.201171875, "learning_rate": 0.0009202850595157702, "loss": 2.1618, "step": 28969 }, { "epoch": 0.7774259338772005, "grad_norm": 0.1962890625, "learning_rate": 0.0009202612957846779, "loss": 1.9931, "step": 28970 }, { "epoch": 0.7774527694289395, "grad_norm": 0.193359375, "learning_rate": 0.0009202375313510267, "loss": 2.0607, "step": 28971 }, { "epoch": 0.7774796049806784, "grad_norm": 0.193359375, "learning_rate": 0.0009202137662148685, "loss": 1.9854, "step": 28972 }, { "epoch": 0.7775064405324174, "grad_norm": 0.1962890625, "learning_rate": 0.0009201900003762554, "loss": 1.9756, "step": 28973 }, { "epoch": 0.7775332760841563, "grad_norm": 0.1962890625, "learning_rate": 0.0009201662338352398, "loss": 1.8857, "step": 28974 }, { "epoch": 0.7775601116358952, "grad_norm": 0.1943359375, "learning_rate": 0.0009201424665918737, "loss": 1.9991, "step": 28975 }, { "epoch": 0.7775869471876342, "grad_norm": 0.19140625, "learning_rate": 0.0009201186986462091, "loss": 2.0139, "step": 28976 }, { "epoch": 0.7776137827393731, "grad_norm": 0.2001953125, "learning_rate": 0.0009200949299982984, "loss": 2.066, "step": 28977 }, { "epoch": 0.7776406182911121, "grad_norm": 0.193359375, "learning_rate": 0.0009200711606481936, "loss": 1.9693, "step": 28978 }, { "epoch": 0.777667453842851, "grad_norm": 0.2060546875, "learning_rate": 0.0009200473905959465, "loss": 2.114, "step": 28979 }, { "epoch": 0.77769428939459, "grad_norm": 0.19921875, "learning_rate": 0.0009200236198416099, "loss": 2.1131, "step": 28980 }, { "epoch": 0.777721124946329, "grad_norm": 0.1962890625, "learning_rate": 0.0009199998483852357, "loss": 2.0723, "step": 28981 }, { "epoch": 0.7777479604980678, "grad_norm": 0.193359375, "learning_rate": 0.0009199760762268757, "loss": 2.0813, "step": 28982 }, { "epoch": 0.7777747960498068, "grad_norm": 0.1923828125, "learning_rate": 0.0009199523033665823, "loss": 1.983, "step": 28983 }, { "epoch": 0.7778016316015457, "grad_norm": 0.1923828125, "learning_rate": 0.000919928529804408, "loss": 1.9997, "step": 28984 }, { "epoch": 0.7778284671532847, "grad_norm": 0.197265625, "learning_rate": 0.0009199047555404044, "loss": 2.0806, "step": 28985 }, { "epoch": 0.7778553027050236, "grad_norm": 0.1943359375, "learning_rate": 0.000919880980574624, "loss": 2.0427, "step": 28986 }, { "epoch": 0.7778821382567626, "grad_norm": 0.197265625, "learning_rate": 0.0009198572049071186, "loss": 2.0743, "step": 28987 }, { "epoch": 0.7779089738085015, "grad_norm": 0.1953125, "learning_rate": 0.0009198334285379409, "loss": 2.111, "step": 28988 }, { "epoch": 0.7779358093602404, "grad_norm": 0.197265625, "learning_rate": 0.0009198096514671427, "loss": 2.0462, "step": 28989 }, { "epoch": 0.7779626449119794, "grad_norm": 0.1962890625, "learning_rate": 0.000919785873694776, "loss": 2.0959, "step": 28990 }, { "epoch": 0.7779894804637183, "grad_norm": 0.193359375, "learning_rate": 0.0009197620952208934, "loss": 1.9875, "step": 28991 }, { "epoch": 0.7780163160154573, "grad_norm": 0.1923828125, "learning_rate": 0.0009197383160455467, "loss": 2.0373, "step": 28992 }, { "epoch": 0.7780431515671962, "grad_norm": 0.193359375, "learning_rate": 0.0009197145361687884, "loss": 2.0729, "step": 28993 }, { "epoch": 0.7780699871189352, "grad_norm": 0.1962890625, "learning_rate": 0.0009196907555906702, "loss": 2.0255, "step": 28994 }, { "epoch": 0.7780968226706741, "grad_norm": 0.193359375, "learning_rate": 0.0009196669743112448, "loss": 2.005, "step": 28995 }, { "epoch": 0.778123658222413, "grad_norm": 0.1953125, "learning_rate": 0.000919643192330564, "loss": 1.968, "step": 28996 }, { "epoch": 0.778150493774152, "grad_norm": 0.197265625, "learning_rate": 0.0009196194096486799, "loss": 2.0722, "step": 28997 }, { "epoch": 0.7781773293258909, "grad_norm": 0.1982421875, "learning_rate": 0.0009195956262656451, "loss": 2.0437, "step": 28998 }, { "epoch": 0.7782041648776299, "grad_norm": 0.1943359375, "learning_rate": 0.0009195718421815115, "loss": 1.965, "step": 28999 }, { "epoch": 0.7782310004293689, "grad_norm": 0.2021484375, "learning_rate": 0.0009195480573963312, "loss": 2.0349, "step": 29000 }, { "epoch": 0.7782578359811078, "grad_norm": 0.1943359375, "learning_rate": 0.0009195242719101563, "loss": 2.0378, "step": 29001 }, { "epoch": 0.7782846715328468, "grad_norm": 0.1953125, "learning_rate": 0.0009195004857230394, "loss": 2.0855, "step": 29002 }, { "epoch": 0.7783115070845856, "grad_norm": 0.1904296875, "learning_rate": 0.0009194766988350325, "loss": 1.9932, "step": 29003 }, { "epoch": 0.7783383426363246, "grad_norm": 0.19921875, "learning_rate": 0.0009194529112461875, "loss": 2.0578, "step": 29004 }, { "epoch": 0.7783651781880635, "grad_norm": 0.1884765625, "learning_rate": 0.0009194291229565569, "loss": 1.953, "step": 29005 }, { "epoch": 0.7783920137398025, "grad_norm": 0.185546875, "learning_rate": 0.0009194053339661928, "loss": 1.9918, "step": 29006 }, { "epoch": 0.7784188492915415, "grad_norm": 0.19140625, "learning_rate": 0.0009193815442751472, "loss": 1.9852, "step": 29007 }, { "epoch": 0.7784456848432804, "grad_norm": 0.1953125, "learning_rate": 0.0009193577538834728, "loss": 1.9796, "step": 29008 }, { "epoch": 0.7784725203950194, "grad_norm": 0.197265625, "learning_rate": 0.0009193339627912211, "loss": 2.0607, "step": 29009 }, { "epoch": 0.7784993559467582, "grad_norm": 0.1953125, "learning_rate": 0.0009193101709984449, "loss": 2.1259, "step": 29010 }, { "epoch": 0.7785261914984972, "grad_norm": 0.197265625, "learning_rate": 0.000919286378505196, "loss": 2.045, "step": 29011 }, { "epoch": 0.7785530270502361, "grad_norm": 0.1943359375, "learning_rate": 0.0009192625853115268, "loss": 2.0553, "step": 29012 }, { "epoch": 0.7785798626019751, "grad_norm": 0.197265625, "learning_rate": 0.000919238791417489, "loss": 2.1669, "step": 29013 }, { "epoch": 0.7786066981537141, "grad_norm": 0.2001953125, "learning_rate": 0.0009192149968231358, "loss": 2.1011, "step": 29014 }, { "epoch": 0.778633533705453, "grad_norm": 0.19140625, "learning_rate": 0.0009191912015285185, "loss": 2.0386, "step": 29015 }, { "epoch": 0.778660369257192, "grad_norm": 0.1884765625, "learning_rate": 0.0009191674055336897, "loss": 2.0395, "step": 29016 }, { "epoch": 0.7786872048089308, "grad_norm": 0.1923828125, "learning_rate": 0.0009191436088387015, "loss": 2.1338, "step": 29017 }, { "epoch": 0.7787140403606698, "grad_norm": 0.1962890625, "learning_rate": 0.0009191198114436061, "loss": 2.0412, "step": 29018 }, { "epoch": 0.7787408759124088, "grad_norm": 0.19140625, "learning_rate": 0.0009190960133484557, "loss": 2.1052, "step": 29019 }, { "epoch": 0.7787677114641477, "grad_norm": 0.1923828125, "learning_rate": 0.0009190722145533025, "loss": 1.9762, "step": 29020 }, { "epoch": 0.7787945470158867, "grad_norm": 0.1953125, "learning_rate": 0.0009190484150581989, "loss": 1.975, "step": 29021 }, { "epoch": 0.7788213825676256, "grad_norm": 0.1904296875, "learning_rate": 0.0009190246148631968, "loss": 1.9934, "step": 29022 }, { "epoch": 0.7788482181193646, "grad_norm": 0.197265625, "learning_rate": 0.0009190008139683487, "loss": 2.0511, "step": 29023 }, { "epoch": 0.7788750536711034, "grad_norm": 0.1982421875, "learning_rate": 0.0009189770123737063, "loss": 1.9756, "step": 29024 }, { "epoch": 0.7789018892228424, "grad_norm": 0.2021484375, "learning_rate": 0.0009189532100793226, "loss": 2.0675, "step": 29025 }, { "epoch": 0.7789287247745814, "grad_norm": 0.1884765625, "learning_rate": 0.0009189294070852492, "loss": 1.9317, "step": 29026 }, { "epoch": 0.7789555603263203, "grad_norm": 0.1943359375, "learning_rate": 0.0009189056033915385, "loss": 1.964, "step": 29027 }, { "epoch": 0.7789823958780593, "grad_norm": 0.1904296875, "learning_rate": 0.0009188817989982428, "loss": 2.0094, "step": 29028 }, { "epoch": 0.7790092314297982, "grad_norm": 0.1962890625, "learning_rate": 0.0009188579939054142, "loss": 2.0221, "step": 29029 }, { "epoch": 0.7790360669815372, "grad_norm": 0.197265625, "learning_rate": 0.0009188341881131051, "loss": 2.1021, "step": 29030 }, { "epoch": 0.779062902533276, "grad_norm": 0.1943359375, "learning_rate": 0.0009188103816213674, "loss": 1.9962, "step": 29031 }, { "epoch": 0.779089738085015, "grad_norm": 0.1962890625, "learning_rate": 0.0009187865744302535, "loss": 2.1001, "step": 29032 }, { "epoch": 0.779116573636754, "grad_norm": 0.193359375, "learning_rate": 0.0009187627665398157, "loss": 2.0682, "step": 29033 }, { "epoch": 0.7791434091884929, "grad_norm": 0.1962890625, "learning_rate": 0.0009187389579501063, "loss": 2.0944, "step": 29034 }, { "epoch": 0.7791702447402319, "grad_norm": 0.1865234375, "learning_rate": 0.0009187151486611772, "loss": 1.8851, "step": 29035 }, { "epoch": 0.7791970802919708, "grad_norm": 0.1923828125, "learning_rate": 0.000918691338673081, "loss": 2.0108, "step": 29036 }, { "epoch": 0.7792239158437098, "grad_norm": 0.1923828125, "learning_rate": 0.0009186675279858697, "loss": 2.0535, "step": 29037 }, { "epoch": 0.7792507513954486, "grad_norm": 0.19140625, "learning_rate": 0.0009186437165995955, "loss": 2.0774, "step": 29038 }, { "epoch": 0.7792775869471876, "grad_norm": 0.19140625, "learning_rate": 0.0009186199045143111, "loss": 1.9735, "step": 29039 }, { "epoch": 0.7793044224989266, "grad_norm": 0.1953125, "learning_rate": 0.000918596091730068, "loss": 1.9801, "step": 29040 }, { "epoch": 0.7793312580506655, "grad_norm": 0.193359375, "learning_rate": 0.0009185722782469189, "loss": 2.0633, "step": 29041 }, { "epoch": 0.7793580936024045, "grad_norm": 0.203125, "learning_rate": 0.0009185484640649159, "loss": 2.0197, "step": 29042 }, { "epoch": 0.7793849291541434, "grad_norm": 0.1943359375, "learning_rate": 0.0009185246491841113, "loss": 2.0313, "step": 29043 }, { "epoch": 0.7794117647058824, "grad_norm": 0.1953125, "learning_rate": 0.0009185008336045575, "loss": 2.071, "step": 29044 }, { "epoch": 0.7794386002576214, "grad_norm": 0.2060546875, "learning_rate": 0.0009184770173263064, "loss": 2.1138, "step": 29045 }, { "epoch": 0.7794654358093602, "grad_norm": 0.1953125, "learning_rate": 0.0009184532003494105, "loss": 2.0773, "step": 29046 }, { "epoch": 0.7794922713610992, "grad_norm": 0.19140625, "learning_rate": 0.000918429382673922, "loss": 2.0079, "step": 29047 }, { "epoch": 0.7795191069128381, "grad_norm": 0.1962890625, "learning_rate": 0.000918405564299893, "loss": 2.1794, "step": 29048 }, { "epoch": 0.7795459424645771, "grad_norm": 0.197265625, "learning_rate": 0.0009183817452273758, "loss": 2.0707, "step": 29049 }, { "epoch": 0.779572778016316, "grad_norm": 0.1962890625, "learning_rate": 0.0009183579254564229, "loss": 2.1401, "step": 29050 }, { "epoch": 0.779599613568055, "grad_norm": 0.1923828125, "learning_rate": 0.0009183341049870863, "loss": 2.0126, "step": 29051 }, { "epoch": 0.779626449119794, "grad_norm": 0.193359375, "learning_rate": 0.0009183102838194184, "loss": 2.0284, "step": 29052 }, { "epoch": 0.7796532846715328, "grad_norm": 0.1953125, "learning_rate": 0.0009182864619534713, "loss": 2.0801, "step": 29053 }, { "epoch": 0.7796801202232718, "grad_norm": 0.1923828125, "learning_rate": 0.0009182626393892975, "loss": 1.9978, "step": 29054 }, { "epoch": 0.7797069557750107, "grad_norm": 0.1904296875, "learning_rate": 0.0009182388161269489, "loss": 2.0114, "step": 29055 }, { "epoch": 0.7797337913267497, "grad_norm": 0.1923828125, "learning_rate": 0.0009182149921664781, "loss": 2.0614, "step": 29056 }, { "epoch": 0.7797606268784886, "grad_norm": 0.1904296875, "learning_rate": 0.0009181911675079373, "loss": 1.9835, "step": 29057 }, { "epoch": 0.7797874624302276, "grad_norm": 0.193359375, "learning_rate": 0.0009181673421513784, "loss": 2.0541, "step": 29058 }, { "epoch": 0.7798142979819666, "grad_norm": 0.1953125, "learning_rate": 0.0009181435160968541, "loss": 2.0036, "step": 29059 }, { "epoch": 0.7798411335337054, "grad_norm": 0.1943359375, "learning_rate": 0.0009181196893444165, "loss": 1.9886, "step": 29060 }, { "epoch": 0.7798679690854444, "grad_norm": 0.19921875, "learning_rate": 0.000918095861894118, "loss": 2.0297, "step": 29061 }, { "epoch": 0.7798948046371833, "grad_norm": 0.1904296875, "learning_rate": 0.0009180720337460108, "loss": 2.0272, "step": 29062 }, { "epoch": 0.7799216401889223, "grad_norm": 0.19140625, "learning_rate": 0.0009180482049001468, "loss": 2.0185, "step": 29063 }, { "epoch": 0.7799484757406612, "grad_norm": 0.1943359375, "learning_rate": 0.000918024375356579, "loss": 2.0446, "step": 29064 }, { "epoch": 0.7799753112924002, "grad_norm": 0.2080078125, "learning_rate": 0.0009180005451153593, "loss": 2.0206, "step": 29065 }, { "epoch": 0.7800021468441392, "grad_norm": 0.197265625, "learning_rate": 0.0009179767141765397, "loss": 2.0903, "step": 29066 }, { "epoch": 0.780028982395878, "grad_norm": 0.19140625, "learning_rate": 0.0009179528825401727, "loss": 1.9992, "step": 29067 }, { "epoch": 0.780055817947617, "grad_norm": 0.201171875, "learning_rate": 0.0009179290502063108, "loss": 2.1159, "step": 29068 }, { "epoch": 0.7800826534993559, "grad_norm": 0.2041015625, "learning_rate": 0.0009179052171750061, "loss": 2.0886, "step": 29069 }, { "epoch": 0.7801094890510949, "grad_norm": 0.1943359375, "learning_rate": 0.0009178813834463109, "loss": 2.0192, "step": 29070 }, { "epoch": 0.7801363246028339, "grad_norm": 0.1904296875, "learning_rate": 0.0009178575490202773, "loss": 2.05, "step": 29071 }, { "epoch": 0.7801631601545728, "grad_norm": 0.1962890625, "learning_rate": 0.000917833713896958, "loss": 2.0695, "step": 29072 }, { "epoch": 0.7801899957063118, "grad_norm": 0.1982421875, "learning_rate": 0.0009178098780764049, "loss": 2.1516, "step": 29073 }, { "epoch": 0.7802168312580506, "grad_norm": 0.205078125, "learning_rate": 0.0009177860415586703, "loss": 2.1345, "step": 29074 }, { "epoch": 0.7802436668097896, "grad_norm": 0.1953125, "learning_rate": 0.0009177622043438068, "loss": 2.0307, "step": 29075 }, { "epoch": 0.7802705023615285, "grad_norm": 0.1953125, "learning_rate": 0.0009177383664318665, "loss": 2.0574, "step": 29076 }, { "epoch": 0.7802973379132675, "grad_norm": 0.193359375, "learning_rate": 0.0009177145278229016, "loss": 2.017, "step": 29077 }, { "epoch": 0.7803241734650065, "grad_norm": 0.1982421875, "learning_rate": 0.0009176906885169646, "loss": 2.0179, "step": 29078 }, { "epoch": 0.7803510090167454, "grad_norm": 0.1904296875, "learning_rate": 0.0009176668485141075, "loss": 1.9598, "step": 29079 }, { "epoch": 0.7803778445684844, "grad_norm": 0.1904296875, "learning_rate": 0.000917643007814383, "loss": 2.0315, "step": 29080 }, { "epoch": 0.7804046801202232, "grad_norm": 0.1943359375, "learning_rate": 0.0009176191664178432, "loss": 2.0602, "step": 29081 }, { "epoch": 0.7804315156719622, "grad_norm": 0.1943359375, "learning_rate": 0.0009175953243245404, "loss": 2.0036, "step": 29082 }, { "epoch": 0.7804583512237011, "grad_norm": 0.1923828125, "learning_rate": 0.0009175714815345267, "loss": 2.0389, "step": 29083 }, { "epoch": 0.7804851867754401, "grad_norm": 0.197265625, "learning_rate": 0.0009175476380478549, "loss": 2.0231, "step": 29084 }, { "epoch": 0.7805120223271791, "grad_norm": 0.1904296875, "learning_rate": 0.0009175237938645768, "loss": 1.9457, "step": 29085 }, { "epoch": 0.780538857878918, "grad_norm": 0.1982421875, "learning_rate": 0.0009174999489847447, "loss": 2.1226, "step": 29086 }, { "epoch": 0.780565693430657, "grad_norm": 0.193359375, "learning_rate": 0.0009174761034084117, "loss": 2.0216, "step": 29087 }, { "epoch": 0.7805925289823958, "grad_norm": 0.1962890625, "learning_rate": 0.000917452257135629, "loss": 2.0476, "step": 29088 }, { "epoch": 0.7806193645341348, "grad_norm": 0.1953125, "learning_rate": 0.0009174284101664496, "loss": 1.9495, "step": 29089 }, { "epoch": 0.7806462000858738, "grad_norm": 0.2021484375, "learning_rate": 0.0009174045625009257, "loss": 2.1144, "step": 29090 }, { "epoch": 0.7806730356376127, "grad_norm": 0.1953125, "learning_rate": 0.0009173807141391097, "loss": 2.0538, "step": 29091 }, { "epoch": 0.7806998711893517, "grad_norm": 0.1875, "learning_rate": 0.0009173568650810537, "loss": 1.9685, "step": 29092 }, { "epoch": 0.7807267067410906, "grad_norm": 0.1943359375, "learning_rate": 0.00091733301532681, "loss": 2.0178, "step": 29093 }, { "epoch": 0.7807535422928296, "grad_norm": 0.1923828125, "learning_rate": 0.0009173091648764311, "loss": 2.0503, "step": 29094 }, { "epoch": 0.7807803778445684, "grad_norm": 0.1962890625, "learning_rate": 0.0009172853137299693, "loss": 2.0327, "step": 29095 }, { "epoch": 0.7808072133963074, "grad_norm": 0.1923828125, "learning_rate": 0.0009172614618874768, "loss": 2.018, "step": 29096 }, { "epoch": 0.7808340489480464, "grad_norm": 0.19140625, "learning_rate": 0.0009172376093490059, "loss": 2.1119, "step": 29097 }, { "epoch": 0.7808608844997853, "grad_norm": 0.1962890625, "learning_rate": 0.0009172137561146093, "loss": 2.0824, "step": 29098 }, { "epoch": 0.7808877200515243, "grad_norm": 0.193359375, "learning_rate": 0.0009171899021843386, "loss": 2.0935, "step": 29099 }, { "epoch": 0.7809145556032632, "grad_norm": 0.1875, "learning_rate": 0.000917166047558247, "loss": 1.9767, "step": 29100 }, { "epoch": 0.7809413911550022, "grad_norm": 0.2041015625, "learning_rate": 0.0009171421922363863, "loss": 2.0108, "step": 29101 }, { "epoch": 0.780968226706741, "grad_norm": 0.19140625, "learning_rate": 0.0009171183362188088, "loss": 2.0718, "step": 29102 }, { "epoch": 0.78099506225848, "grad_norm": 0.19140625, "learning_rate": 0.000917094479505567, "loss": 2.0275, "step": 29103 }, { "epoch": 0.781021897810219, "grad_norm": 0.1923828125, "learning_rate": 0.0009170706220967133, "loss": 2.0583, "step": 29104 }, { "epoch": 0.7810487333619579, "grad_norm": 0.18359375, "learning_rate": 0.0009170467639922999, "loss": 1.9315, "step": 29105 }, { "epoch": 0.7810755689136969, "grad_norm": 0.1923828125, "learning_rate": 0.0009170229051923793, "loss": 2.0991, "step": 29106 }, { "epoch": 0.7811024044654358, "grad_norm": 0.19140625, "learning_rate": 0.0009169990456970035, "loss": 2.0111, "step": 29107 }, { "epoch": 0.7811292400171748, "grad_norm": 0.1923828125, "learning_rate": 0.0009169751855062252, "loss": 2.0451, "step": 29108 }, { "epoch": 0.7811560755689136, "grad_norm": 0.1943359375, "learning_rate": 0.0009169513246200964, "loss": 2.0868, "step": 29109 }, { "epoch": 0.7811829111206526, "grad_norm": 0.2001953125, "learning_rate": 0.0009169274630386699, "loss": 2.0741, "step": 29110 }, { "epoch": 0.7812097466723916, "grad_norm": 0.2021484375, "learning_rate": 0.0009169036007619974, "loss": 2.1254, "step": 29111 }, { "epoch": 0.7812365822241305, "grad_norm": 0.1953125, "learning_rate": 0.0009168797377901322, "loss": 2.1367, "step": 29112 }, { "epoch": 0.7812634177758695, "grad_norm": 0.1962890625, "learning_rate": 0.0009168558741231256, "loss": 2.0011, "step": 29113 }, { "epoch": 0.7812902533276084, "grad_norm": 0.1962890625, "learning_rate": 0.0009168320097610307, "loss": 2.0618, "step": 29114 }, { "epoch": 0.7813170888793474, "grad_norm": 0.1904296875, "learning_rate": 0.0009168081447038995, "loss": 2.0698, "step": 29115 }, { "epoch": 0.7813439244310864, "grad_norm": 0.193359375, "learning_rate": 0.0009167842789517844, "loss": 1.9741, "step": 29116 }, { "epoch": 0.7813707599828252, "grad_norm": 0.197265625, "learning_rate": 0.0009167604125047378, "loss": 2.111, "step": 29117 }, { "epoch": 0.7813975955345642, "grad_norm": 0.193359375, "learning_rate": 0.000916736545362812, "loss": 2.0471, "step": 29118 }, { "epoch": 0.7814244310863031, "grad_norm": 0.1962890625, "learning_rate": 0.0009167126775260596, "loss": 2.0642, "step": 29119 }, { "epoch": 0.7814512666380421, "grad_norm": 0.1943359375, "learning_rate": 0.0009166888089945327, "loss": 2.0426, "step": 29120 }, { "epoch": 0.781478102189781, "grad_norm": 0.1962890625, "learning_rate": 0.0009166649397682836, "loss": 2.0213, "step": 29121 }, { "epoch": 0.78150493774152, "grad_norm": 0.1962890625, "learning_rate": 0.0009166410698473648, "loss": 2.0398, "step": 29122 }, { "epoch": 0.781531773293259, "grad_norm": 0.1923828125, "learning_rate": 0.0009166171992318288, "loss": 2.0493, "step": 29123 }, { "epoch": 0.7815586088449978, "grad_norm": 0.19140625, "learning_rate": 0.0009165933279217277, "loss": 2.1013, "step": 29124 }, { "epoch": 0.7815854443967368, "grad_norm": 0.1865234375, "learning_rate": 0.0009165694559171138, "loss": 2.0443, "step": 29125 }, { "epoch": 0.7816122799484757, "grad_norm": 0.193359375, "learning_rate": 0.00091654558321804, "loss": 2.0428, "step": 29126 }, { "epoch": 0.7816391155002147, "grad_norm": 0.189453125, "learning_rate": 0.0009165217098245582, "loss": 2.0207, "step": 29127 }, { "epoch": 0.7816659510519536, "grad_norm": 0.1943359375, "learning_rate": 0.0009164978357367209, "loss": 2.0787, "step": 29128 }, { "epoch": 0.7816927866036926, "grad_norm": 0.1943359375, "learning_rate": 0.0009164739609545802, "loss": 2.1313, "step": 29129 }, { "epoch": 0.7817196221554316, "grad_norm": 0.201171875, "learning_rate": 0.0009164500854781891, "loss": 2.1559, "step": 29130 }, { "epoch": 0.7817464577071704, "grad_norm": 0.19140625, "learning_rate": 0.0009164262093075994, "loss": 2.0446, "step": 29131 }, { "epoch": 0.7817732932589094, "grad_norm": 0.197265625, "learning_rate": 0.0009164023324428639, "loss": 2.0143, "step": 29132 }, { "epoch": 0.7818001288106483, "grad_norm": 0.1982421875, "learning_rate": 0.0009163784548840345, "loss": 2.0181, "step": 29133 }, { "epoch": 0.7818269643623873, "grad_norm": 0.189453125, "learning_rate": 0.0009163545766311641, "loss": 1.9938, "step": 29134 }, { "epoch": 0.7818537999141262, "grad_norm": 0.201171875, "learning_rate": 0.0009163306976843048, "loss": 2.0001, "step": 29135 }, { "epoch": 0.7818806354658652, "grad_norm": 0.1982421875, "learning_rate": 0.0009163068180435088, "loss": 2.0106, "step": 29136 }, { "epoch": 0.7819074710176042, "grad_norm": 0.189453125, "learning_rate": 0.0009162829377088287, "loss": 1.9496, "step": 29137 }, { "epoch": 0.781934306569343, "grad_norm": 0.1962890625, "learning_rate": 0.0009162590566803172, "loss": 2.102, "step": 29138 }, { "epoch": 0.781961142121082, "grad_norm": 0.197265625, "learning_rate": 0.000916235174958026, "loss": 2.0887, "step": 29139 }, { "epoch": 0.7819879776728209, "grad_norm": 0.193359375, "learning_rate": 0.0009162112925420082, "loss": 2.0862, "step": 29140 }, { "epoch": 0.7820148132245599, "grad_norm": 0.1884765625, "learning_rate": 0.0009161874094323157, "loss": 2.0395, "step": 29141 }, { "epoch": 0.7820416487762989, "grad_norm": 0.19140625, "learning_rate": 0.000916163525629001, "loss": 1.9926, "step": 29142 }, { "epoch": 0.7820684843280378, "grad_norm": 0.193359375, "learning_rate": 0.0009161396411321167, "loss": 2.0555, "step": 29143 }, { "epoch": 0.7820953198797768, "grad_norm": 0.1962890625, "learning_rate": 0.0009161157559417148, "loss": 2.0877, "step": 29144 }, { "epoch": 0.7821221554315156, "grad_norm": 0.1943359375, "learning_rate": 0.0009160918700578481, "loss": 2.0311, "step": 29145 }, { "epoch": 0.7821489909832546, "grad_norm": 0.2041015625, "learning_rate": 0.0009160679834805689, "loss": 2.0928, "step": 29146 }, { "epoch": 0.7821758265349935, "grad_norm": 0.1943359375, "learning_rate": 0.0009160440962099294, "loss": 2.0566, "step": 29147 }, { "epoch": 0.7822026620867325, "grad_norm": 0.1943359375, "learning_rate": 0.0009160202082459821, "loss": 2.02, "step": 29148 }, { "epoch": 0.7822294976384715, "grad_norm": 0.193359375, "learning_rate": 0.0009159963195887797, "loss": 2.0819, "step": 29149 }, { "epoch": 0.7822563331902104, "grad_norm": 0.19140625, "learning_rate": 0.0009159724302383739, "loss": 2.0109, "step": 29150 }, { "epoch": 0.7822831687419494, "grad_norm": 0.1875, "learning_rate": 0.0009159485401948179, "loss": 2.0393, "step": 29151 }, { "epoch": 0.7823100042936882, "grad_norm": 0.1923828125, "learning_rate": 0.0009159246494581636, "loss": 2.03, "step": 29152 }, { "epoch": 0.7823368398454272, "grad_norm": 0.2001953125, "learning_rate": 0.0009159007580284637, "loss": 2.0293, "step": 29153 }, { "epoch": 0.7823636753971661, "grad_norm": 0.1923828125, "learning_rate": 0.0009158768659057703, "loss": 2.0482, "step": 29154 }, { "epoch": 0.7823905109489051, "grad_norm": 0.2001953125, "learning_rate": 0.000915852973090136, "loss": 2.0591, "step": 29155 }, { "epoch": 0.7824173465006441, "grad_norm": 0.1962890625, "learning_rate": 0.0009158290795816134, "loss": 2.1326, "step": 29156 }, { "epoch": 0.782444182052383, "grad_norm": 0.197265625, "learning_rate": 0.0009158051853802547, "loss": 2.0653, "step": 29157 }, { "epoch": 0.782471017604122, "grad_norm": 0.19140625, "learning_rate": 0.0009157812904861121, "loss": 2.0296, "step": 29158 }, { "epoch": 0.7824978531558608, "grad_norm": 0.1923828125, "learning_rate": 0.0009157573948992385, "loss": 2.044, "step": 29159 }, { "epoch": 0.7825246887075998, "grad_norm": 0.19140625, "learning_rate": 0.0009157334986196861, "loss": 1.9823, "step": 29160 }, { "epoch": 0.7825515242593388, "grad_norm": 0.197265625, "learning_rate": 0.000915709601647507, "loss": 2.138, "step": 29161 }, { "epoch": 0.7825783598110777, "grad_norm": 0.205078125, "learning_rate": 0.0009156857039827541, "loss": 2.0837, "step": 29162 }, { "epoch": 0.7826051953628167, "grad_norm": 0.1943359375, "learning_rate": 0.0009156618056254798, "loss": 2.0978, "step": 29163 }, { "epoch": 0.7826320309145556, "grad_norm": 0.2001953125, "learning_rate": 0.000915637906575736, "loss": 2.129, "step": 29164 }, { "epoch": 0.7826588664662946, "grad_norm": 0.19140625, "learning_rate": 0.0009156140068335757, "loss": 2.0492, "step": 29165 }, { "epoch": 0.7826857020180334, "grad_norm": 0.1962890625, "learning_rate": 0.0009155901063990512, "loss": 2.1064, "step": 29166 }, { "epoch": 0.7827125375697724, "grad_norm": 0.19921875, "learning_rate": 0.0009155662052722146, "loss": 2.1438, "step": 29167 }, { "epoch": 0.7827393731215114, "grad_norm": 0.201171875, "learning_rate": 0.0009155423034531188, "loss": 2.1057, "step": 29168 }, { "epoch": 0.7827662086732503, "grad_norm": 0.19140625, "learning_rate": 0.0009155184009418158, "loss": 2.0537, "step": 29169 }, { "epoch": 0.7827930442249893, "grad_norm": 0.1953125, "learning_rate": 0.0009154944977383587, "loss": 2.0732, "step": 29170 }, { "epoch": 0.7828198797767282, "grad_norm": 0.1923828125, "learning_rate": 0.0009154705938427992, "loss": 1.9847, "step": 29171 }, { "epoch": 0.7828467153284672, "grad_norm": 0.189453125, "learning_rate": 0.0009154466892551898, "loss": 2.052, "step": 29172 }, { "epoch": 0.782873550880206, "grad_norm": 0.1904296875, "learning_rate": 0.0009154227839755835, "loss": 2.0499, "step": 29173 }, { "epoch": 0.782900386431945, "grad_norm": 0.2001953125, "learning_rate": 0.0009153988780040325, "loss": 2.0482, "step": 29174 }, { "epoch": 0.782927221983684, "grad_norm": 0.1904296875, "learning_rate": 0.0009153749713405888, "loss": 2.0178, "step": 29175 }, { "epoch": 0.7829540575354229, "grad_norm": 0.1982421875, "learning_rate": 0.0009153510639853055, "loss": 2.1691, "step": 29176 }, { "epoch": 0.7829808930871619, "grad_norm": 0.1982421875, "learning_rate": 0.0009153271559382346, "loss": 2.1214, "step": 29177 }, { "epoch": 0.7830077286389008, "grad_norm": 0.1943359375, "learning_rate": 0.0009153032471994287, "loss": 2.0583, "step": 29178 }, { "epoch": 0.7830345641906398, "grad_norm": 0.203125, "learning_rate": 0.0009152793377689403, "loss": 2.1531, "step": 29179 }, { "epoch": 0.7830613997423786, "grad_norm": 0.1884765625, "learning_rate": 0.0009152554276468216, "loss": 1.9891, "step": 29180 }, { "epoch": 0.7830882352941176, "grad_norm": 0.193359375, "learning_rate": 0.0009152315168331254, "loss": 2.0814, "step": 29181 }, { "epoch": 0.7831150708458566, "grad_norm": 0.193359375, "learning_rate": 0.0009152076053279041, "loss": 2.1167, "step": 29182 }, { "epoch": 0.7831419063975955, "grad_norm": 0.1904296875, "learning_rate": 0.0009151836931312097, "loss": 2.1441, "step": 29183 }, { "epoch": 0.7831687419493345, "grad_norm": 0.1953125, "learning_rate": 0.0009151597802430952, "loss": 2.1527, "step": 29184 }, { "epoch": 0.7831955775010734, "grad_norm": 0.1943359375, "learning_rate": 0.0009151358666636131, "loss": 2.0276, "step": 29185 }, { "epoch": 0.7832224130528124, "grad_norm": 0.1943359375, "learning_rate": 0.0009151119523928152, "loss": 2.0282, "step": 29186 }, { "epoch": 0.7832492486045514, "grad_norm": 0.197265625, "learning_rate": 0.0009150880374307547, "loss": 2.034, "step": 29187 }, { "epoch": 0.7832760841562902, "grad_norm": 0.1962890625, "learning_rate": 0.0009150641217774835, "loss": 2.0928, "step": 29188 }, { "epoch": 0.7833029197080292, "grad_norm": 0.1943359375, "learning_rate": 0.0009150402054330547, "loss": 2.172, "step": 29189 }, { "epoch": 0.7833297552597681, "grad_norm": 0.1953125, "learning_rate": 0.00091501628839752, "loss": 1.9897, "step": 29190 }, { "epoch": 0.7833565908115071, "grad_norm": 0.1904296875, "learning_rate": 0.0009149923706709324, "loss": 2.0006, "step": 29191 }, { "epoch": 0.783383426363246, "grad_norm": 0.1923828125, "learning_rate": 0.0009149684522533443, "loss": 2.0299, "step": 29192 }, { "epoch": 0.783410261914985, "grad_norm": 0.189453125, "learning_rate": 0.0009149445331448078, "loss": 2.0816, "step": 29193 }, { "epoch": 0.783437097466724, "grad_norm": 0.1953125, "learning_rate": 0.000914920613345376, "loss": 2.035, "step": 29194 }, { "epoch": 0.7834639330184628, "grad_norm": 0.1923828125, "learning_rate": 0.0009148966928551008, "loss": 2.0439, "step": 29195 }, { "epoch": 0.7834907685702018, "grad_norm": 0.1923828125, "learning_rate": 0.000914872771674035, "loss": 2.0348, "step": 29196 }, { "epoch": 0.7835176041219407, "grad_norm": 0.1923828125, "learning_rate": 0.0009148488498022307, "loss": 2.0142, "step": 29197 }, { "epoch": 0.7835444396736797, "grad_norm": 0.1962890625, "learning_rate": 0.0009148249272397409, "loss": 2.0936, "step": 29198 }, { "epoch": 0.7835712752254186, "grad_norm": 0.2001953125, "learning_rate": 0.0009148010039866178, "loss": 2.0686, "step": 29199 }, { "epoch": 0.7835981107771576, "grad_norm": 0.2021484375, "learning_rate": 0.000914777080042914, "loss": 2.1934, "step": 29200 }, { "epoch": 0.7836249463288966, "grad_norm": 0.208984375, "learning_rate": 0.0009147531554086817, "loss": 2.0503, "step": 29201 }, { "epoch": 0.7836517818806354, "grad_norm": 0.19140625, "learning_rate": 0.0009147292300839739, "loss": 1.9456, "step": 29202 }, { "epoch": 0.7836786174323744, "grad_norm": 0.1943359375, "learning_rate": 0.0009147053040688424, "loss": 2.1814, "step": 29203 }, { "epoch": 0.7837054529841133, "grad_norm": 0.1875, "learning_rate": 0.0009146813773633403, "loss": 2.0201, "step": 29204 }, { "epoch": 0.7837322885358523, "grad_norm": 0.1943359375, "learning_rate": 0.0009146574499675197, "loss": 2.0364, "step": 29205 }, { "epoch": 0.7837591240875912, "grad_norm": 0.1943359375, "learning_rate": 0.0009146335218814332, "loss": 2.0062, "step": 29206 }, { "epoch": 0.7837859596393302, "grad_norm": 0.1953125, "learning_rate": 0.0009146095931051335, "loss": 2.0453, "step": 29207 }, { "epoch": 0.7838127951910692, "grad_norm": 0.19140625, "learning_rate": 0.0009145856636386728, "loss": 1.9628, "step": 29208 }, { "epoch": 0.783839630742808, "grad_norm": 0.1904296875, "learning_rate": 0.0009145617334821037, "loss": 2.0148, "step": 29209 }, { "epoch": 0.783866466294547, "grad_norm": 0.2001953125, "learning_rate": 0.0009145378026354787, "loss": 2.2393, "step": 29210 }, { "epoch": 0.7838933018462859, "grad_norm": 0.1943359375, "learning_rate": 0.0009145138710988502, "loss": 2.0792, "step": 29211 }, { "epoch": 0.7839201373980249, "grad_norm": 0.185546875, "learning_rate": 0.0009144899388722709, "loss": 2.0316, "step": 29212 }, { "epoch": 0.7839469729497639, "grad_norm": 0.1884765625, "learning_rate": 0.0009144660059557933, "loss": 2.0783, "step": 29213 }, { "epoch": 0.7839738085015028, "grad_norm": 0.1923828125, "learning_rate": 0.0009144420723494697, "loss": 2.0257, "step": 29214 }, { "epoch": 0.7840006440532418, "grad_norm": 0.19140625, "learning_rate": 0.0009144181380533528, "loss": 2.0954, "step": 29215 }, { "epoch": 0.7840274796049806, "grad_norm": 0.193359375, "learning_rate": 0.0009143942030674948, "loss": 2.1012, "step": 29216 }, { "epoch": 0.7840543151567196, "grad_norm": 0.1923828125, "learning_rate": 0.0009143702673919486, "loss": 2.1423, "step": 29217 }, { "epoch": 0.7840811507084585, "grad_norm": 0.1875, "learning_rate": 0.0009143463310267663, "loss": 2.0588, "step": 29218 }, { "epoch": 0.7841079862601975, "grad_norm": 0.19140625, "learning_rate": 0.0009143223939720009, "loss": 2.0944, "step": 29219 }, { "epoch": 0.7841348218119365, "grad_norm": 0.1943359375, "learning_rate": 0.0009142984562277045, "loss": 2.1163, "step": 29220 }, { "epoch": 0.7841616573636754, "grad_norm": 0.19921875, "learning_rate": 0.0009142745177939299, "loss": 2.0935, "step": 29221 }, { "epoch": 0.7841884929154144, "grad_norm": 0.1923828125, "learning_rate": 0.0009142505786707293, "loss": 2.0718, "step": 29222 }, { "epoch": 0.7842153284671532, "grad_norm": 0.1943359375, "learning_rate": 0.0009142266388581555, "loss": 2.0798, "step": 29223 }, { "epoch": 0.7842421640188922, "grad_norm": 0.1923828125, "learning_rate": 0.0009142026983562608, "loss": 1.972, "step": 29224 }, { "epoch": 0.7842689995706311, "grad_norm": 0.1904296875, "learning_rate": 0.0009141787571650978, "loss": 2.1312, "step": 29225 }, { "epoch": 0.7842958351223701, "grad_norm": 0.193359375, "learning_rate": 0.0009141548152847191, "loss": 2.06, "step": 29226 }, { "epoch": 0.7843226706741091, "grad_norm": 0.1953125, "learning_rate": 0.0009141308727151772, "loss": 2.1455, "step": 29227 }, { "epoch": 0.784349506225848, "grad_norm": 0.1953125, "learning_rate": 0.0009141069294565246, "loss": 2.1152, "step": 29228 }, { "epoch": 0.784376341777587, "grad_norm": 0.1923828125, "learning_rate": 0.0009140829855088138, "loss": 2.0913, "step": 29229 }, { "epoch": 0.7844031773293259, "grad_norm": 0.1943359375, "learning_rate": 0.0009140590408720971, "loss": 2.0823, "step": 29230 }, { "epoch": 0.7844300128810648, "grad_norm": 0.197265625, "learning_rate": 0.0009140350955464273, "loss": 2.1442, "step": 29231 }, { "epoch": 0.7844568484328038, "grad_norm": 0.1953125, "learning_rate": 0.0009140111495318573, "loss": 2.1091, "step": 29232 }, { "epoch": 0.7844836839845427, "grad_norm": 0.1904296875, "learning_rate": 0.000913987202828439, "loss": 2.1132, "step": 29233 }, { "epoch": 0.7845105195362817, "grad_norm": 0.1884765625, "learning_rate": 0.0009139632554362251, "loss": 2.0551, "step": 29234 }, { "epoch": 0.7845373550880206, "grad_norm": 0.1943359375, "learning_rate": 0.0009139393073552682, "loss": 2.0973, "step": 29235 }, { "epoch": 0.7845641906397596, "grad_norm": 0.1943359375, "learning_rate": 0.0009139153585856209, "loss": 2.1176, "step": 29236 }, { "epoch": 0.7845910261914985, "grad_norm": 0.1904296875, "learning_rate": 0.0009138914091273354, "loss": 2.008, "step": 29237 }, { "epoch": 0.7846178617432374, "grad_norm": 0.1923828125, "learning_rate": 0.0009138674589804647, "loss": 2.0818, "step": 29238 }, { "epoch": 0.7846446972949764, "grad_norm": 0.1962890625, "learning_rate": 0.0009138435081450611, "loss": 2.1164, "step": 29239 }, { "epoch": 0.7846715328467153, "grad_norm": 0.1962890625, "learning_rate": 0.0009138195566211773, "loss": 2.19, "step": 29240 }, { "epoch": 0.7846983683984543, "grad_norm": 0.1904296875, "learning_rate": 0.0009137956044088656, "loss": 2.0657, "step": 29241 }, { "epoch": 0.7847252039501932, "grad_norm": 0.197265625, "learning_rate": 0.0009137716515081785, "loss": 2.0534, "step": 29242 }, { "epoch": 0.7847520395019322, "grad_norm": 0.1904296875, "learning_rate": 0.0009137476979191692, "loss": 1.977, "step": 29243 }, { "epoch": 0.784778875053671, "grad_norm": 0.193359375, "learning_rate": 0.0009137237436418892, "loss": 2.1348, "step": 29244 }, { "epoch": 0.78480571060541, "grad_norm": 0.2021484375, "learning_rate": 0.0009136997886763919, "loss": 2.1822, "step": 29245 }, { "epoch": 0.784832546157149, "grad_norm": 0.2001953125, "learning_rate": 0.0009136758330227295, "loss": 2.0928, "step": 29246 }, { "epoch": 0.7848593817088879, "grad_norm": 0.1953125, "learning_rate": 0.0009136518766809547, "loss": 2.0877, "step": 29247 }, { "epoch": 0.7848862172606269, "grad_norm": 0.197265625, "learning_rate": 0.0009136279196511199, "loss": 1.9519, "step": 29248 }, { "epoch": 0.7849130528123658, "grad_norm": 0.1904296875, "learning_rate": 0.0009136039619332776, "loss": 1.9973, "step": 29249 }, { "epoch": 0.7849398883641048, "grad_norm": 0.1865234375, "learning_rate": 0.0009135800035274806, "loss": 1.9919, "step": 29250 }, { "epoch": 0.7849667239158437, "grad_norm": 0.1943359375, "learning_rate": 0.0009135560444337811, "loss": 2.0428, "step": 29251 }, { "epoch": 0.7849935594675826, "grad_norm": 0.189453125, "learning_rate": 0.0009135320846522323, "loss": 2.0611, "step": 29252 }, { "epoch": 0.7850203950193216, "grad_norm": 0.1982421875, "learning_rate": 0.000913508124182886, "loss": 2.0398, "step": 29253 }, { "epoch": 0.7850472305710605, "grad_norm": 0.193359375, "learning_rate": 0.0009134841630257953, "loss": 2.0868, "step": 29254 }, { "epoch": 0.7850740661227995, "grad_norm": 0.1865234375, "learning_rate": 0.0009134602011810125, "loss": 1.9885, "step": 29255 }, { "epoch": 0.7851009016745384, "grad_norm": 0.1904296875, "learning_rate": 0.0009134362386485903, "loss": 2.0168, "step": 29256 }, { "epoch": 0.7851277372262774, "grad_norm": 0.1865234375, "learning_rate": 0.0009134122754285811, "loss": 1.9341, "step": 29257 }, { "epoch": 0.7851545727780164, "grad_norm": 0.197265625, "learning_rate": 0.0009133883115210377, "loss": 2.0602, "step": 29258 }, { "epoch": 0.7851814083297552, "grad_norm": 0.1943359375, "learning_rate": 0.0009133643469260124, "loss": 2.0327, "step": 29259 }, { "epoch": 0.7852082438814942, "grad_norm": 0.1962890625, "learning_rate": 0.000913340381643558, "loss": 2.1385, "step": 29260 }, { "epoch": 0.7852350794332331, "grad_norm": 0.1923828125, "learning_rate": 0.000913316415673727, "loss": 1.9792, "step": 29261 }, { "epoch": 0.7852619149849721, "grad_norm": 0.1923828125, "learning_rate": 0.0009132924490165719, "loss": 2.067, "step": 29262 }, { "epoch": 0.785288750536711, "grad_norm": 0.1884765625, "learning_rate": 0.0009132684816721454, "loss": 2.0455, "step": 29263 }, { "epoch": 0.78531558608845, "grad_norm": 0.1953125, "learning_rate": 0.0009132445136405, "loss": 2.0771, "step": 29264 }, { "epoch": 0.785342421640189, "grad_norm": 0.189453125, "learning_rate": 0.0009132205449216882, "loss": 2.0902, "step": 29265 }, { "epoch": 0.7853692571919278, "grad_norm": 0.1982421875, "learning_rate": 0.0009131965755157628, "loss": 2.0403, "step": 29266 }, { "epoch": 0.7853960927436668, "grad_norm": 0.1962890625, "learning_rate": 0.0009131726054227762, "loss": 2.1378, "step": 29267 }, { "epoch": 0.7854229282954057, "grad_norm": 0.193359375, "learning_rate": 0.0009131486346427812, "loss": 2.137, "step": 29268 }, { "epoch": 0.7854497638471447, "grad_norm": 0.189453125, "learning_rate": 0.0009131246631758301, "loss": 2.0315, "step": 29269 }, { "epoch": 0.7854765993988836, "grad_norm": 0.197265625, "learning_rate": 0.0009131006910219756, "loss": 2.0036, "step": 29270 }, { "epoch": 0.7855034349506226, "grad_norm": 0.2001953125, "learning_rate": 0.0009130767181812703, "loss": 2.24, "step": 29271 }, { "epoch": 0.7855302705023616, "grad_norm": 0.1962890625, "learning_rate": 0.0009130527446537668, "loss": 1.9911, "step": 29272 }, { "epoch": 0.7855571060541005, "grad_norm": 0.1943359375, "learning_rate": 0.0009130287704395175, "loss": 2.135, "step": 29273 }, { "epoch": 0.7855839416058394, "grad_norm": 0.1953125, "learning_rate": 0.0009130047955385754, "loss": 2.1162, "step": 29274 }, { "epoch": 0.7856107771575783, "grad_norm": 0.19140625, "learning_rate": 0.0009129808199509929, "loss": 2.0976, "step": 29275 }, { "epoch": 0.7856376127093173, "grad_norm": 0.1884765625, "learning_rate": 0.0009129568436768224, "loss": 2.0789, "step": 29276 }, { "epoch": 0.7856644482610562, "grad_norm": 0.1923828125, "learning_rate": 0.0009129328667161168, "loss": 2.0949, "step": 29277 }, { "epoch": 0.7856912838127952, "grad_norm": 0.1953125, "learning_rate": 0.0009129088890689285, "loss": 2.114, "step": 29278 }, { "epoch": 0.7857181193645342, "grad_norm": 0.19140625, "learning_rate": 0.0009128849107353102, "loss": 2.0853, "step": 29279 }, { "epoch": 0.785744954916273, "grad_norm": 0.1923828125, "learning_rate": 0.0009128609317153143, "loss": 2.0466, "step": 29280 }, { "epoch": 0.785771790468012, "grad_norm": 0.189453125, "learning_rate": 0.0009128369520089935, "loss": 2.0616, "step": 29281 }, { "epoch": 0.7857986260197509, "grad_norm": 0.193359375, "learning_rate": 0.0009128129716164008, "loss": 2.1563, "step": 29282 }, { "epoch": 0.7858254615714899, "grad_norm": 0.19921875, "learning_rate": 0.0009127889905375884, "loss": 2.0807, "step": 29283 }, { "epoch": 0.7858522971232289, "grad_norm": 0.1962890625, "learning_rate": 0.0009127650087726088, "loss": 2.1663, "step": 29284 }, { "epoch": 0.7858791326749678, "grad_norm": 0.1962890625, "learning_rate": 0.0009127410263215149, "loss": 2.1564, "step": 29285 }, { "epoch": 0.7859059682267068, "grad_norm": 0.203125, "learning_rate": 0.0009127170431843591, "loss": 2.1179, "step": 29286 }, { "epoch": 0.7859328037784457, "grad_norm": 0.1943359375, "learning_rate": 0.0009126930593611941, "loss": 2.1251, "step": 29287 }, { "epoch": 0.7859596393301846, "grad_norm": 0.2099609375, "learning_rate": 0.0009126690748520727, "loss": 2.1817, "step": 29288 }, { "epoch": 0.7859864748819235, "grad_norm": 0.205078125, "learning_rate": 0.0009126450896570471, "loss": 2.0976, "step": 29289 }, { "epoch": 0.7860133104336625, "grad_norm": 0.19921875, "learning_rate": 0.0009126211037761705, "loss": 2.0673, "step": 29290 }, { "epoch": 0.7860401459854015, "grad_norm": 0.1953125, "learning_rate": 0.0009125971172094948, "loss": 2.0466, "step": 29291 }, { "epoch": 0.7860669815371404, "grad_norm": 0.19140625, "learning_rate": 0.0009125731299570732, "loss": 2.1064, "step": 29292 }, { "epoch": 0.7860938170888794, "grad_norm": 0.1982421875, "learning_rate": 0.000912549142018958, "loss": 2.0437, "step": 29293 }, { "epoch": 0.7861206526406183, "grad_norm": 0.1962890625, "learning_rate": 0.0009125251533952022, "loss": 2.0877, "step": 29294 }, { "epoch": 0.7861474881923572, "grad_norm": 0.1953125, "learning_rate": 0.0009125011640858579, "loss": 2.083, "step": 29295 }, { "epoch": 0.7861743237440961, "grad_norm": 0.1953125, "learning_rate": 0.0009124771740909779, "loss": 2.1765, "step": 29296 }, { "epoch": 0.7862011592958351, "grad_norm": 0.1953125, "learning_rate": 0.0009124531834106151, "loss": 2.0672, "step": 29297 }, { "epoch": 0.7862279948475741, "grad_norm": 0.1904296875, "learning_rate": 0.0009124291920448219, "loss": 2.1215, "step": 29298 }, { "epoch": 0.786254830399313, "grad_norm": 0.1923828125, "learning_rate": 0.000912405199993651, "loss": 2.0738, "step": 29299 }, { "epoch": 0.786281665951052, "grad_norm": 0.1943359375, "learning_rate": 0.0009123812072571551, "loss": 2.117, "step": 29300 }, { "epoch": 0.7863085015027909, "grad_norm": 0.1953125, "learning_rate": 0.0009123572138353865, "loss": 2.1311, "step": 29301 }, { "epoch": 0.7863353370545298, "grad_norm": 0.1923828125, "learning_rate": 0.0009123332197283982, "loss": 2.0876, "step": 29302 }, { "epoch": 0.7863621726062688, "grad_norm": 0.189453125, "learning_rate": 0.0009123092249362426, "loss": 2.0811, "step": 29303 }, { "epoch": 0.7863890081580077, "grad_norm": 0.1923828125, "learning_rate": 0.0009122852294589725, "loss": 2.0971, "step": 29304 }, { "epoch": 0.7864158437097467, "grad_norm": 0.1875, "learning_rate": 0.0009122612332966406, "loss": 2.0769, "step": 29305 }, { "epoch": 0.7864426792614856, "grad_norm": 0.1953125, "learning_rate": 0.0009122372364492994, "loss": 2.0899, "step": 29306 }, { "epoch": 0.7864695148132246, "grad_norm": 0.2041015625, "learning_rate": 0.0009122132389170014, "loss": 2.2132, "step": 29307 }, { "epoch": 0.7864963503649635, "grad_norm": 0.193359375, "learning_rate": 0.0009121892406997995, "loss": 2.0967, "step": 29308 }, { "epoch": 0.7865231859167024, "grad_norm": 0.1923828125, "learning_rate": 0.0009121652417977463, "loss": 2.0842, "step": 29309 }, { "epoch": 0.7865500214684414, "grad_norm": 0.1962890625, "learning_rate": 0.0009121412422108943, "loss": 2.1127, "step": 29310 }, { "epoch": 0.7865768570201803, "grad_norm": 0.19921875, "learning_rate": 0.0009121172419392963, "loss": 2.1418, "step": 29311 }, { "epoch": 0.7866036925719193, "grad_norm": 0.193359375, "learning_rate": 0.0009120932409830049, "loss": 2.0994, "step": 29312 }, { "epoch": 0.7866305281236582, "grad_norm": 0.1904296875, "learning_rate": 0.0009120692393420728, "loss": 2.1198, "step": 29313 }, { "epoch": 0.7866573636753972, "grad_norm": 0.1953125, "learning_rate": 0.0009120452370165527, "loss": 2.1506, "step": 29314 }, { "epoch": 0.7866841992271361, "grad_norm": 0.19140625, "learning_rate": 0.0009120212340064967, "loss": 2.0941, "step": 29315 }, { "epoch": 0.786711034778875, "grad_norm": 0.19140625, "learning_rate": 0.0009119972303119585, "loss": 2.1348, "step": 29316 }, { "epoch": 0.786737870330614, "grad_norm": 0.189453125, "learning_rate": 0.0009119732259329898, "loss": 2.0191, "step": 29317 }, { "epoch": 0.7867647058823529, "grad_norm": 0.1953125, "learning_rate": 0.0009119492208696437, "loss": 2.115, "step": 29318 }, { "epoch": 0.7867915414340919, "grad_norm": 0.1943359375, "learning_rate": 0.0009119252151219729, "loss": 2.0872, "step": 29319 }, { "epoch": 0.7868183769858308, "grad_norm": 0.197265625, "learning_rate": 0.0009119012086900299, "loss": 2.1096, "step": 29320 }, { "epoch": 0.7868452125375698, "grad_norm": 0.1953125, "learning_rate": 0.0009118772015738673, "loss": 2.1658, "step": 29321 }, { "epoch": 0.7868720480893087, "grad_norm": 0.1923828125, "learning_rate": 0.0009118531937735379, "loss": 2.1349, "step": 29322 }, { "epoch": 0.7868988836410477, "grad_norm": 0.197265625, "learning_rate": 0.0009118291852890944, "loss": 2.0746, "step": 29323 }, { "epoch": 0.7869257191927866, "grad_norm": 0.1943359375, "learning_rate": 0.0009118051761205893, "loss": 2.1006, "step": 29324 }, { "epoch": 0.7869525547445255, "grad_norm": 0.1875, "learning_rate": 0.0009117811662680756, "loss": 2.0131, "step": 29325 }, { "epoch": 0.7869793902962645, "grad_norm": 0.1904296875, "learning_rate": 0.0009117571557316054, "loss": 2.0746, "step": 29326 }, { "epoch": 0.7870062258480034, "grad_norm": 0.1962890625, "learning_rate": 0.0009117331445112319, "loss": 2.08, "step": 29327 }, { "epoch": 0.7870330613997424, "grad_norm": 0.1982421875, "learning_rate": 0.0009117091326070078, "loss": 2.1582, "step": 29328 }, { "epoch": 0.7870598969514814, "grad_norm": 0.203125, "learning_rate": 0.0009116851200189853, "loss": 2.0278, "step": 29329 }, { "epoch": 0.7870867325032203, "grad_norm": 0.2001953125, "learning_rate": 0.0009116611067472174, "loss": 2.0929, "step": 29330 }, { "epoch": 0.7871135680549592, "grad_norm": 0.193359375, "learning_rate": 0.0009116370927917569, "loss": 2.2109, "step": 29331 }, { "epoch": 0.7871404036066981, "grad_norm": 0.1943359375, "learning_rate": 0.000911613078152656, "loss": 2.0552, "step": 29332 }, { "epoch": 0.7871672391584371, "grad_norm": 0.19140625, "learning_rate": 0.000911589062829968, "loss": 2.0633, "step": 29333 }, { "epoch": 0.787194074710176, "grad_norm": 0.1953125, "learning_rate": 0.000911565046823745, "loss": 2.0579, "step": 29334 }, { "epoch": 0.787220910261915, "grad_norm": 0.193359375, "learning_rate": 0.0009115410301340402, "loss": 2.0819, "step": 29335 }, { "epoch": 0.787247745813654, "grad_norm": 0.1962890625, "learning_rate": 0.0009115170127609059, "loss": 2.1942, "step": 29336 }, { "epoch": 0.7872745813653929, "grad_norm": 0.189453125, "learning_rate": 0.0009114929947043948, "loss": 2.1065, "step": 29337 }, { "epoch": 0.7873014169171318, "grad_norm": 0.1982421875, "learning_rate": 0.0009114689759645601, "loss": 2.0103, "step": 29338 }, { "epoch": 0.7873282524688707, "grad_norm": 0.2021484375, "learning_rate": 0.0009114449565414538, "loss": 2.0756, "step": 29339 }, { "epoch": 0.7873550880206097, "grad_norm": 0.193359375, "learning_rate": 0.0009114209364351288, "loss": 2.0902, "step": 29340 }, { "epoch": 0.7873819235723486, "grad_norm": 0.1943359375, "learning_rate": 0.0009113969156456381, "loss": 2.1484, "step": 29341 }, { "epoch": 0.7874087591240876, "grad_norm": 0.1904296875, "learning_rate": 0.0009113728941730341, "loss": 2.0817, "step": 29342 }, { "epoch": 0.7874355946758266, "grad_norm": 0.185546875, "learning_rate": 0.0009113488720173694, "loss": 2.0349, "step": 29343 }, { "epoch": 0.7874624302275655, "grad_norm": 0.1943359375, "learning_rate": 0.0009113248491786972, "loss": 2.0996, "step": 29344 }, { "epoch": 0.7874892657793044, "grad_norm": 0.193359375, "learning_rate": 0.0009113008256570698, "loss": 2.0925, "step": 29345 }, { "epoch": 0.7875161013310433, "grad_norm": 0.1875, "learning_rate": 0.0009112768014525398, "loss": 2.0778, "step": 29346 }, { "epoch": 0.7875429368827823, "grad_norm": 0.19140625, "learning_rate": 0.0009112527765651602, "loss": 2.1744, "step": 29347 }, { "epoch": 0.7875697724345213, "grad_norm": 0.1962890625, "learning_rate": 0.0009112287509949836, "loss": 2.0157, "step": 29348 }, { "epoch": 0.7875966079862602, "grad_norm": 0.1953125, "learning_rate": 0.0009112047247420627, "loss": 2.1253, "step": 29349 }, { "epoch": 0.7876234435379992, "grad_norm": 0.189453125, "learning_rate": 0.0009111806978064501, "loss": 2.0663, "step": 29350 }, { "epoch": 0.7876502790897381, "grad_norm": 0.201171875, "learning_rate": 0.0009111566701881985, "loss": 2.0875, "step": 29351 }, { "epoch": 0.787677114641477, "grad_norm": 0.1953125, "learning_rate": 0.000911132641887361, "loss": 2.0965, "step": 29352 }, { "epoch": 0.7877039501932159, "grad_norm": 0.1923828125, "learning_rate": 0.0009111086129039899, "loss": 2.0261, "step": 29353 }, { "epoch": 0.7877307857449549, "grad_norm": 0.1962890625, "learning_rate": 0.0009110845832381378, "loss": 2.076, "step": 29354 }, { "epoch": 0.7877576212966939, "grad_norm": 0.197265625, "learning_rate": 0.000911060552889858, "loss": 2.0756, "step": 29355 }, { "epoch": 0.7877844568484328, "grad_norm": 0.19921875, "learning_rate": 0.0009110365218592028, "loss": 2.1231, "step": 29356 }, { "epoch": 0.7878112924001718, "grad_norm": 0.1923828125, "learning_rate": 0.0009110124901462248, "loss": 2.1132, "step": 29357 }, { "epoch": 0.7878381279519107, "grad_norm": 0.193359375, "learning_rate": 0.000910988457750977, "loss": 2.092, "step": 29358 }, { "epoch": 0.7878649635036497, "grad_norm": 0.197265625, "learning_rate": 0.0009109644246735121, "loss": 2.0912, "step": 29359 }, { "epoch": 0.7878917990553885, "grad_norm": 0.1875, "learning_rate": 0.0009109403909138825, "loss": 2.1062, "step": 29360 }, { "epoch": 0.7879186346071275, "grad_norm": 0.2001953125, "learning_rate": 0.0009109163564721414, "loss": 2.1565, "step": 29361 }, { "epoch": 0.7879454701588665, "grad_norm": 0.1982421875, "learning_rate": 0.0009108923213483411, "loss": 2.1276, "step": 29362 }, { "epoch": 0.7879723057106054, "grad_norm": 0.205078125, "learning_rate": 0.0009108682855425346, "loss": 2.1956, "step": 29363 }, { "epoch": 0.7879991412623444, "grad_norm": 0.1953125, "learning_rate": 0.0009108442490547748, "loss": 2.0913, "step": 29364 }, { "epoch": 0.7880259768140833, "grad_norm": 0.193359375, "learning_rate": 0.0009108202118851136, "loss": 2.0269, "step": 29365 }, { "epoch": 0.7880528123658223, "grad_norm": 0.2001953125, "learning_rate": 0.0009107961740336047, "loss": 2.1989, "step": 29366 }, { "epoch": 0.7880796479175611, "grad_norm": 0.1943359375, "learning_rate": 0.0009107721355003004, "loss": 1.9732, "step": 29367 }, { "epoch": 0.7881064834693001, "grad_norm": 0.1943359375, "learning_rate": 0.0009107480962852534, "loss": 2.1064, "step": 29368 }, { "epoch": 0.7881333190210391, "grad_norm": 0.1953125, "learning_rate": 0.0009107240563885166, "loss": 2.1083, "step": 29369 }, { "epoch": 0.788160154572778, "grad_norm": 0.208984375, "learning_rate": 0.0009107000158101424, "loss": 2.1413, "step": 29370 }, { "epoch": 0.788186990124517, "grad_norm": 0.1953125, "learning_rate": 0.000910675974550184, "loss": 2.1059, "step": 29371 }, { "epoch": 0.7882138256762559, "grad_norm": 0.1953125, "learning_rate": 0.0009106519326086939, "loss": 2.041, "step": 29372 }, { "epoch": 0.7882406612279949, "grad_norm": 0.2001953125, "learning_rate": 0.0009106278899857247, "loss": 2.0579, "step": 29373 }, { "epoch": 0.7882674967797338, "grad_norm": 0.19921875, "learning_rate": 0.0009106038466813296, "loss": 2.0954, "step": 29374 }, { "epoch": 0.7882943323314727, "grad_norm": 0.1953125, "learning_rate": 0.0009105798026955607, "loss": 2.0413, "step": 29375 }, { "epoch": 0.7883211678832117, "grad_norm": 0.193359375, "learning_rate": 0.0009105557580284712, "loss": 2.1067, "step": 29376 }, { "epoch": 0.7883480034349506, "grad_norm": 0.1962890625, "learning_rate": 0.0009105317126801137, "loss": 2.0806, "step": 29377 }, { "epoch": 0.7883748389866896, "grad_norm": 0.203125, "learning_rate": 0.0009105076666505412, "loss": 2.1386, "step": 29378 }, { "epoch": 0.7884016745384285, "grad_norm": 0.1982421875, "learning_rate": 0.0009104836199398059, "loss": 2.0617, "step": 29379 }, { "epoch": 0.7884285100901675, "grad_norm": 0.1953125, "learning_rate": 0.0009104595725479612, "loss": 2.0962, "step": 29380 }, { "epoch": 0.7884553456419064, "grad_norm": 0.1962890625, "learning_rate": 0.0009104355244750593, "loss": 2.125, "step": 29381 }, { "epoch": 0.7884821811936453, "grad_norm": 0.1923828125, "learning_rate": 0.0009104114757211533, "loss": 2.102, "step": 29382 }, { "epoch": 0.7885090167453843, "grad_norm": 0.193359375, "learning_rate": 0.0009103874262862959, "loss": 2.078, "step": 29383 }, { "epoch": 0.7885358522971232, "grad_norm": 0.1904296875, "learning_rate": 0.0009103633761705398, "loss": 2.1028, "step": 29384 }, { "epoch": 0.7885626878488622, "grad_norm": 0.1923828125, "learning_rate": 0.0009103393253739377, "loss": 2.1485, "step": 29385 }, { "epoch": 0.7885895234006011, "grad_norm": 0.1982421875, "learning_rate": 0.0009103152738965424, "loss": 2.1471, "step": 29386 }, { "epoch": 0.7886163589523401, "grad_norm": 0.1943359375, "learning_rate": 0.0009102912217384066, "loss": 2.1955, "step": 29387 }, { "epoch": 0.788643194504079, "grad_norm": 0.19921875, "learning_rate": 0.0009102671688995833, "loss": 2.0938, "step": 29388 }, { "epoch": 0.7886700300558179, "grad_norm": 0.193359375, "learning_rate": 0.0009102431153801251, "loss": 2.1499, "step": 29389 }, { "epoch": 0.7886968656075569, "grad_norm": 0.193359375, "learning_rate": 0.0009102190611800847, "loss": 2.1922, "step": 29390 }, { "epoch": 0.7887237011592958, "grad_norm": 0.1962890625, "learning_rate": 0.000910195006299515, "loss": 2.0975, "step": 29391 }, { "epoch": 0.7887505367110348, "grad_norm": 0.1982421875, "learning_rate": 0.0009101709507384687, "loss": 2.161, "step": 29392 }, { "epoch": 0.7887773722627737, "grad_norm": 0.1982421875, "learning_rate": 0.0009101468944969986, "loss": 2.1127, "step": 29393 }, { "epoch": 0.7888042078145127, "grad_norm": 0.19921875, "learning_rate": 0.0009101228375751574, "loss": 2.235, "step": 29394 }, { "epoch": 0.7888310433662516, "grad_norm": 0.1962890625, "learning_rate": 0.000910098779972998, "loss": 2.1482, "step": 29395 }, { "epoch": 0.7888578789179905, "grad_norm": 0.1943359375, "learning_rate": 0.0009100747216905732, "loss": 2.1139, "step": 29396 }, { "epoch": 0.7888847144697295, "grad_norm": 0.1953125, "learning_rate": 0.0009100506627279355, "loss": 2.1099, "step": 29397 }, { "epoch": 0.7889115500214684, "grad_norm": 0.2001953125, "learning_rate": 0.000910026603085138, "loss": 2.2167, "step": 29398 }, { "epoch": 0.7889383855732074, "grad_norm": 0.1923828125, "learning_rate": 0.0009100025427622333, "loss": 2.013, "step": 29399 }, { "epoch": 0.7889652211249464, "grad_norm": 0.1962890625, "learning_rate": 0.0009099784817592742, "loss": 2.0913, "step": 29400 }, { "epoch": 0.7889920566766853, "grad_norm": 0.193359375, "learning_rate": 0.0009099544200763136, "loss": 2.1053, "step": 29401 }, { "epoch": 0.7890188922284243, "grad_norm": 0.19140625, "learning_rate": 0.0009099303577134039, "loss": 2.1369, "step": 29402 }, { "epoch": 0.7890457277801631, "grad_norm": 0.19140625, "learning_rate": 0.0009099062946705986, "loss": 2.1854, "step": 29403 }, { "epoch": 0.7890725633319021, "grad_norm": 0.193359375, "learning_rate": 0.0009098822309479499, "loss": 2.1655, "step": 29404 }, { "epoch": 0.789099398883641, "grad_norm": 0.1953125, "learning_rate": 0.0009098581665455106, "loss": 2.188, "step": 29405 }, { "epoch": 0.78912623443538, "grad_norm": 0.189453125, "learning_rate": 0.0009098341014633337, "loss": 2.0381, "step": 29406 }, { "epoch": 0.789153069987119, "grad_norm": 0.19140625, "learning_rate": 0.0009098100357014721, "loss": 2.0683, "step": 29407 }, { "epoch": 0.7891799055388579, "grad_norm": 0.193359375, "learning_rate": 0.0009097859692599784, "loss": 2.1605, "step": 29408 }, { "epoch": 0.7892067410905969, "grad_norm": 0.1923828125, "learning_rate": 0.0009097619021389053, "loss": 2.075, "step": 29409 }, { "epoch": 0.7892335766423357, "grad_norm": 0.1982421875, "learning_rate": 0.0009097378343383058, "loss": 2.1077, "step": 29410 }, { "epoch": 0.7892604121940747, "grad_norm": 0.1962890625, "learning_rate": 0.0009097137658582326, "loss": 2.1828, "step": 29411 }, { "epoch": 0.7892872477458136, "grad_norm": 0.2041015625, "learning_rate": 0.0009096896966987385, "loss": 2.1608, "step": 29412 }, { "epoch": 0.7893140832975526, "grad_norm": 0.1923828125, "learning_rate": 0.0009096656268598762, "loss": 2.1937, "step": 29413 }, { "epoch": 0.7893409188492916, "grad_norm": 0.19921875, "learning_rate": 0.0009096415563416989, "loss": 2.1155, "step": 29414 }, { "epoch": 0.7893677544010305, "grad_norm": 0.2021484375, "learning_rate": 0.0009096174851442589, "loss": 2.1233, "step": 29415 }, { "epoch": 0.7893945899527695, "grad_norm": 0.1904296875, "learning_rate": 0.0009095934132676093, "loss": 2.0847, "step": 29416 }, { "epoch": 0.7894214255045083, "grad_norm": 0.197265625, "learning_rate": 0.0009095693407118028, "loss": 2.1016, "step": 29417 }, { "epoch": 0.7894482610562473, "grad_norm": 0.1943359375, "learning_rate": 0.0009095452674768923, "loss": 2.1377, "step": 29418 }, { "epoch": 0.7894750966079863, "grad_norm": 0.1904296875, "learning_rate": 0.0009095211935629306, "loss": 2.1017, "step": 29419 }, { "epoch": 0.7895019321597252, "grad_norm": 0.1943359375, "learning_rate": 0.0009094971189699704, "loss": 2.0961, "step": 29420 }, { "epoch": 0.7895287677114642, "grad_norm": 0.19921875, "learning_rate": 0.0009094730436980644, "loss": 2.1356, "step": 29421 }, { "epoch": 0.7895556032632031, "grad_norm": 0.1982421875, "learning_rate": 0.0009094489677472658, "loss": 2.1395, "step": 29422 }, { "epoch": 0.7895824388149421, "grad_norm": 0.1923828125, "learning_rate": 0.0009094248911176272, "loss": 2.1172, "step": 29423 }, { "epoch": 0.7896092743666809, "grad_norm": 0.1943359375, "learning_rate": 0.0009094008138092013, "loss": 2.1135, "step": 29424 }, { "epoch": 0.7896361099184199, "grad_norm": 0.1943359375, "learning_rate": 0.0009093767358220413, "loss": 2.1502, "step": 29425 }, { "epoch": 0.7896629454701589, "grad_norm": 0.1923828125, "learning_rate": 0.0009093526571561995, "loss": 2.16, "step": 29426 }, { "epoch": 0.7896897810218978, "grad_norm": 0.1943359375, "learning_rate": 0.000909328577811729, "loss": 2.1574, "step": 29427 }, { "epoch": 0.7897166165736368, "grad_norm": 0.1943359375, "learning_rate": 0.0009093044977886827, "loss": 2.1914, "step": 29428 }, { "epoch": 0.7897434521253757, "grad_norm": 0.1904296875, "learning_rate": 0.0009092804170871134, "loss": 2.0889, "step": 29429 }, { "epoch": 0.7897702876771147, "grad_norm": 0.1923828125, "learning_rate": 0.0009092563357070737, "loss": 2.1366, "step": 29430 }, { "epoch": 0.7897971232288535, "grad_norm": 0.19140625, "learning_rate": 0.0009092322536486167, "loss": 2.0809, "step": 29431 }, { "epoch": 0.7898239587805925, "grad_norm": 0.197265625, "learning_rate": 0.000909208170911795, "loss": 2.1114, "step": 29432 }, { "epoch": 0.7898507943323315, "grad_norm": 0.1943359375, "learning_rate": 0.0009091840874966616, "loss": 2.1073, "step": 29433 }, { "epoch": 0.7898776298840704, "grad_norm": 0.1923828125, "learning_rate": 0.0009091600034032694, "loss": 2.0664, "step": 29434 }, { "epoch": 0.7899044654358094, "grad_norm": 0.1982421875, "learning_rate": 0.0009091359186316707, "loss": 2.1724, "step": 29435 }, { "epoch": 0.7899313009875483, "grad_norm": 0.19921875, "learning_rate": 0.0009091118331819193, "loss": 2.2286, "step": 29436 }, { "epoch": 0.7899581365392873, "grad_norm": 0.193359375, "learning_rate": 0.0009090877470540672, "loss": 2.0854, "step": 29437 }, { "epoch": 0.7899849720910261, "grad_norm": 0.1904296875, "learning_rate": 0.0009090636602481673, "loss": 2.1198, "step": 29438 }, { "epoch": 0.7900118076427651, "grad_norm": 0.1943359375, "learning_rate": 0.0009090395727642729, "loss": 2.2102, "step": 29439 }, { "epoch": 0.7900386431945041, "grad_norm": 0.1943359375, "learning_rate": 0.0009090154846024368, "loss": 2.1502, "step": 29440 }, { "epoch": 0.790065478746243, "grad_norm": 0.189453125, "learning_rate": 0.0009089913957627114, "loss": 2.152, "step": 29441 }, { "epoch": 0.790092314297982, "grad_norm": 0.1884765625, "learning_rate": 0.0009089673062451497, "loss": 2.056, "step": 29442 }, { "epoch": 0.7901191498497209, "grad_norm": 0.1962890625, "learning_rate": 0.0009089432160498047, "loss": 2.058, "step": 29443 }, { "epoch": 0.7901459854014599, "grad_norm": 0.19140625, "learning_rate": 0.0009089191251767293, "loss": 2.1402, "step": 29444 }, { "epoch": 0.7901728209531989, "grad_norm": 0.1923828125, "learning_rate": 0.0009088950336259761, "loss": 2.1914, "step": 29445 }, { "epoch": 0.7901996565049377, "grad_norm": 0.19140625, "learning_rate": 0.0009088709413975982, "loss": 2.039, "step": 29446 }, { "epoch": 0.7902264920566767, "grad_norm": 0.19140625, "learning_rate": 0.0009088468484916482, "loss": 2.1103, "step": 29447 }, { "epoch": 0.7902533276084156, "grad_norm": 0.19140625, "learning_rate": 0.0009088227549081791, "loss": 2.016, "step": 29448 }, { "epoch": 0.7902801631601546, "grad_norm": 0.1953125, "learning_rate": 0.0009087986606472437, "loss": 2.1002, "step": 29449 }, { "epoch": 0.7903069987118935, "grad_norm": 0.1943359375, "learning_rate": 0.0009087745657088951, "loss": 2.0389, "step": 29450 }, { "epoch": 0.7903338342636325, "grad_norm": 0.197265625, "learning_rate": 0.0009087504700931858, "loss": 2.1348, "step": 29451 }, { "epoch": 0.7903606698153715, "grad_norm": 0.1962890625, "learning_rate": 0.0009087263738001688, "loss": 2.0668, "step": 29452 }, { "epoch": 0.7903875053671103, "grad_norm": 0.1923828125, "learning_rate": 0.0009087022768298967, "loss": 2.1147, "step": 29453 }, { "epoch": 0.7904143409188493, "grad_norm": 0.2001953125, "learning_rate": 0.000908678179182423, "loss": 2.1339, "step": 29454 }, { "epoch": 0.7904411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.0009086540808578, "loss": 2.2707, "step": 29455 }, { "epoch": 0.7904680120223272, "grad_norm": 0.197265625, "learning_rate": 0.0009086299818560809, "loss": 2.1164, "step": 29456 }, { "epoch": 0.7904948475740661, "grad_norm": 0.197265625, "learning_rate": 0.0009086058821773183, "loss": 2.0906, "step": 29457 }, { "epoch": 0.7905216831258051, "grad_norm": 0.1962890625, "learning_rate": 0.0009085817818215654, "loss": 2.0788, "step": 29458 }, { "epoch": 0.790548518677544, "grad_norm": 0.1982421875, "learning_rate": 0.0009085576807888747, "loss": 2.1459, "step": 29459 }, { "epoch": 0.7905753542292829, "grad_norm": 0.197265625, "learning_rate": 0.0009085335790792992, "loss": 2.184, "step": 29460 }, { "epoch": 0.7906021897810219, "grad_norm": 0.1884765625, "learning_rate": 0.0009085094766928918, "loss": 2.0725, "step": 29461 }, { "epoch": 0.7906290253327608, "grad_norm": 0.1904296875, "learning_rate": 0.0009084853736297054, "loss": 2.1362, "step": 29462 }, { "epoch": 0.7906558608844998, "grad_norm": 0.1962890625, "learning_rate": 0.0009084612698897928, "loss": 2.0544, "step": 29463 }, { "epoch": 0.7906826964362387, "grad_norm": 0.1884765625, "learning_rate": 0.0009084371654732069, "loss": 2.1008, "step": 29464 }, { "epoch": 0.7907095319879777, "grad_norm": 0.1875, "learning_rate": 0.0009084130603800008, "loss": 2.0057, "step": 29465 }, { "epoch": 0.7907363675397167, "grad_norm": 0.1875, "learning_rate": 0.000908388954610227, "loss": 2.0657, "step": 29466 }, { "epoch": 0.7907632030914555, "grad_norm": 0.1943359375, "learning_rate": 0.0009083648481639387, "loss": 2.1457, "step": 29467 }, { "epoch": 0.7907900386431945, "grad_norm": 0.1962890625, "learning_rate": 0.0009083407410411884, "loss": 2.1332, "step": 29468 }, { "epoch": 0.7908168741949334, "grad_norm": 0.1962890625, "learning_rate": 0.0009083166332420295, "loss": 2.1046, "step": 29469 }, { "epoch": 0.7908437097466724, "grad_norm": 0.1923828125, "learning_rate": 0.0009082925247665144, "loss": 2.0298, "step": 29470 }, { "epoch": 0.7908705452984114, "grad_norm": 0.1923828125, "learning_rate": 0.0009082684156146962, "loss": 2.1191, "step": 29471 }, { "epoch": 0.7908973808501503, "grad_norm": 0.19921875, "learning_rate": 0.0009082443057866281, "loss": 2.292, "step": 29472 }, { "epoch": 0.7909242164018893, "grad_norm": 0.1904296875, "learning_rate": 0.0009082201952823625, "loss": 2.1872, "step": 29473 }, { "epoch": 0.7909510519536281, "grad_norm": 0.1943359375, "learning_rate": 0.0009081960841019524, "loss": 2.119, "step": 29474 }, { "epoch": 0.7909778875053671, "grad_norm": 0.19921875, "learning_rate": 0.0009081719722454507, "loss": 2.1391, "step": 29475 }, { "epoch": 0.791004723057106, "grad_norm": 0.1953125, "learning_rate": 0.0009081478597129106, "loss": 2.15, "step": 29476 }, { "epoch": 0.791031558608845, "grad_norm": 0.1982421875, "learning_rate": 0.0009081237465043844, "loss": 2.1215, "step": 29477 }, { "epoch": 0.791058394160584, "grad_norm": 0.2021484375, "learning_rate": 0.0009080996326199256, "loss": 2.1284, "step": 29478 }, { "epoch": 0.7910852297123229, "grad_norm": 0.1904296875, "learning_rate": 0.0009080755180595869, "loss": 2.0952, "step": 29479 }, { "epoch": 0.7911120652640619, "grad_norm": 0.1962890625, "learning_rate": 0.0009080514028234211, "loss": 2.1769, "step": 29480 }, { "epoch": 0.7911389008158007, "grad_norm": 0.19140625, "learning_rate": 0.0009080272869114812, "loss": 2.0556, "step": 29481 }, { "epoch": 0.7911657363675397, "grad_norm": 0.1953125, "learning_rate": 0.0009080031703238198, "loss": 2.1449, "step": 29482 }, { "epoch": 0.7911925719192786, "grad_norm": 0.193359375, "learning_rate": 0.0009079790530604904, "loss": 2.1806, "step": 29483 }, { "epoch": 0.7912194074710176, "grad_norm": 0.1982421875, "learning_rate": 0.0009079549351215453, "loss": 2.113, "step": 29484 }, { "epoch": 0.7912462430227566, "grad_norm": 0.1943359375, "learning_rate": 0.0009079308165070379, "loss": 2.1064, "step": 29485 }, { "epoch": 0.7912730785744955, "grad_norm": 0.1962890625, "learning_rate": 0.0009079066972170207, "loss": 2.1227, "step": 29486 }, { "epoch": 0.7912999141262345, "grad_norm": 0.1982421875, "learning_rate": 0.000907882577251547, "loss": 2.1993, "step": 29487 }, { "epoch": 0.7913267496779733, "grad_norm": 0.193359375, "learning_rate": 0.0009078584566106693, "loss": 2.1799, "step": 29488 }, { "epoch": 0.7913535852297123, "grad_norm": 0.1943359375, "learning_rate": 0.0009078343352944407, "loss": 2.185, "step": 29489 }, { "epoch": 0.7913804207814513, "grad_norm": 0.1923828125, "learning_rate": 0.0009078102133029142, "loss": 2.1189, "step": 29490 }, { "epoch": 0.7914072563331902, "grad_norm": 0.1943359375, "learning_rate": 0.0009077860906361426, "loss": 2.1514, "step": 29491 }, { "epoch": 0.7914340918849292, "grad_norm": 0.19140625, "learning_rate": 0.000907761967294179, "loss": 2.1323, "step": 29492 }, { "epoch": 0.7914609274366681, "grad_norm": 0.1923828125, "learning_rate": 0.0009077378432770761, "loss": 2.1667, "step": 29493 }, { "epoch": 0.7914877629884071, "grad_norm": 0.1943359375, "learning_rate": 0.000907713718584887, "loss": 2.1596, "step": 29494 }, { "epoch": 0.7915145985401459, "grad_norm": 0.2001953125, "learning_rate": 0.0009076895932176644, "loss": 2.1276, "step": 29495 }, { "epoch": 0.7915414340918849, "grad_norm": 0.19921875, "learning_rate": 0.0009076654671754614, "loss": 2.1839, "step": 29496 }, { "epoch": 0.7915682696436239, "grad_norm": 0.1943359375, "learning_rate": 0.0009076413404583308, "loss": 2.19, "step": 29497 }, { "epoch": 0.7915951051953628, "grad_norm": 0.2138671875, "learning_rate": 0.0009076172130663261, "loss": 2.2224, "step": 29498 }, { "epoch": 0.7916219407471018, "grad_norm": 0.19140625, "learning_rate": 0.0009075930849994991, "loss": 2.1369, "step": 29499 }, { "epoch": 0.7916487762988407, "grad_norm": 0.19140625, "learning_rate": 0.0009075689562579036, "loss": 2.1507, "step": 29500 }, { "epoch": 0.7916756118505797, "grad_norm": 0.1943359375, "learning_rate": 0.0009075448268415923, "loss": 2.2045, "step": 29501 }, { "epoch": 0.7917024474023185, "grad_norm": 0.193359375, "learning_rate": 0.0009075206967506184, "loss": 2.1453, "step": 29502 }, { "epoch": 0.7917292829540575, "grad_norm": 0.1923828125, "learning_rate": 0.000907496565985034, "loss": 2.1566, "step": 29503 }, { "epoch": 0.7917561185057965, "grad_norm": 0.193359375, "learning_rate": 0.000907472434544893, "loss": 2.2042, "step": 29504 }, { "epoch": 0.7917829540575354, "grad_norm": 0.1923828125, "learning_rate": 0.000907448302430248, "loss": 2.0866, "step": 29505 }, { "epoch": 0.7918097896092744, "grad_norm": 0.193359375, "learning_rate": 0.0009074241696411516, "loss": 2.1147, "step": 29506 }, { "epoch": 0.7918366251610133, "grad_norm": 0.19140625, "learning_rate": 0.0009074000361776573, "loss": 2.1044, "step": 29507 }, { "epoch": 0.7918634607127523, "grad_norm": 0.193359375, "learning_rate": 0.0009073759020398174, "loss": 2.1813, "step": 29508 }, { "epoch": 0.7918902962644911, "grad_norm": 0.1923828125, "learning_rate": 0.0009073517672276856, "loss": 2.0875, "step": 29509 }, { "epoch": 0.7919171318162301, "grad_norm": 0.193359375, "learning_rate": 0.0009073276317413143, "loss": 2.0614, "step": 29510 }, { "epoch": 0.7919439673679691, "grad_norm": 0.193359375, "learning_rate": 0.0009073034955807564, "loss": 2.1546, "step": 29511 }, { "epoch": 0.791970802919708, "grad_norm": 0.1904296875, "learning_rate": 0.0009072793587460652, "loss": 2.1755, "step": 29512 }, { "epoch": 0.791997638471447, "grad_norm": 0.1923828125, "learning_rate": 0.0009072552212372936, "loss": 2.0944, "step": 29513 }, { "epoch": 0.7920244740231859, "grad_norm": 0.1884765625, "learning_rate": 0.0009072310830544943, "loss": 2.1112, "step": 29514 }, { "epoch": 0.7920513095749249, "grad_norm": 0.19140625, "learning_rate": 0.0009072069441977203, "loss": 2.0983, "step": 29515 }, { "epoch": 0.7920781451266639, "grad_norm": 0.189453125, "learning_rate": 0.0009071828046670248, "loss": 2.1215, "step": 29516 }, { "epoch": 0.7921049806784027, "grad_norm": 0.1904296875, "learning_rate": 0.0009071586644624605, "loss": 2.116, "step": 29517 }, { "epoch": 0.7921318162301417, "grad_norm": 0.1923828125, "learning_rate": 0.0009071345235840807, "loss": 2.178, "step": 29518 }, { "epoch": 0.7921586517818806, "grad_norm": 0.19921875, "learning_rate": 0.0009071103820319378, "loss": 2.1262, "step": 29519 }, { "epoch": 0.7921854873336196, "grad_norm": 0.1953125, "learning_rate": 0.0009070862398060852, "loss": 2.1213, "step": 29520 }, { "epoch": 0.7922123228853585, "grad_norm": 0.1923828125, "learning_rate": 0.0009070620969065758, "loss": 2.2276, "step": 29521 }, { "epoch": 0.7922391584370975, "grad_norm": 0.19140625, "learning_rate": 0.0009070379533334624, "loss": 2.0904, "step": 29522 }, { "epoch": 0.7922659939888365, "grad_norm": 0.1923828125, "learning_rate": 0.0009070138090867981, "loss": 2.1049, "step": 29523 }, { "epoch": 0.7922928295405753, "grad_norm": 0.1943359375, "learning_rate": 0.0009069896641666358, "loss": 2.1934, "step": 29524 }, { "epoch": 0.7923196650923143, "grad_norm": 0.19140625, "learning_rate": 0.0009069655185730285, "loss": 2.1102, "step": 29525 }, { "epoch": 0.7923465006440532, "grad_norm": 0.1923828125, "learning_rate": 0.0009069413723060292, "loss": 2.1443, "step": 29526 }, { "epoch": 0.7923733361957922, "grad_norm": 0.189453125, "learning_rate": 0.000906917225365691, "loss": 2.151, "step": 29527 }, { "epoch": 0.7924001717475311, "grad_norm": 0.1923828125, "learning_rate": 0.0009068930777520663, "loss": 2.1156, "step": 29528 }, { "epoch": 0.7924270072992701, "grad_norm": 0.1943359375, "learning_rate": 0.0009068689294652087, "loss": 2.1218, "step": 29529 }, { "epoch": 0.7924538428510091, "grad_norm": 0.193359375, "learning_rate": 0.000906844780505171, "loss": 2.1054, "step": 29530 }, { "epoch": 0.7924806784027479, "grad_norm": 0.193359375, "learning_rate": 0.0009068206308720062, "loss": 2.1829, "step": 29531 }, { "epoch": 0.7925075139544869, "grad_norm": 0.1953125, "learning_rate": 0.0009067964805657669, "loss": 2.1864, "step": 29532 }, { "epoch": 0.7925343495062258, "grad_norm": 0.1943359375, "learning_rate": 0.0009067723295865067, "loss": 2.1529, "step": 29533 }, { "epoch": 0.7925611850579648, "grad_norm": 0.1962890625, "learning_rate": 0.0009067481779342782, "loss": 2.1207, "step": 29534 }, { "epoch": 0.7925880206097037, "grad_norm": 0.19140625, "learning_rate": 0.0009067240256091343, "loss": 2.136, "step": 29535 }, { "epoch": 0.7926148561614427, "grad_norm": 0.2001953125, "learning_rate": 0.0009066998726111282, "loss": 2.1405, "step": 29536 }, { "epoch": 0.7926416917131817, "grad_norm": 0.203125, "learning_rate": 0.0009066757189403127, "loss": 2.1008, "step": 29537 }, { "epoch": 0.7926685272649205, "grad_norm": 0.1962890625, "learning_rate": 0.0009066515645967412, "loss": 2.1273, "step": 29538 }, { "epoch": 0.7926953628166595, "grad_norm": 0.1953125, "learning_rate": 0.0009066274095804661, "loss": 2.2123, "step": 29539 }, { "epoch": 0.7927221983683984, "grad_norm": 0.1923828125, "learning_rate": 0.0009066032538915408, "loss": 2.1387, "step": 29540 }, { "epoch": 0.7927490339201374, "grad_norm": 0.1962890625, "learning_rate": 0.0009065790975300182, "loss": 2.1407, "step": 29541 }, { "epoch": 0.7927758694718764, "grad_norm": 0.19140625, "learning_rate": 0.000906554940495951, "loss": 2.1322, "step": 29542 }, { "epoch": 0.7928027050236153, "grad_norm": 0.19140625, "learning_rate": 0.0009065307827893927, "loss": 2.1332, "step": 29543 }, { "epoch": 0.7928295405753543, "grad_norm": 0.1943359375, "learning_rate": 0.0009065066244103961, "loss": 2.1868, "step": 29544 }, { "epoch": 0.7928563761270931, "grad_norm": 0.1923828125, "learning_rate": 0.000906482465359014, "loss": 2.1902, "step": 29545 }, { "epoch": 0.7928832116788321, "grad_norm": 0.193359375, "learning_rate": 0.0009064583056352996, "loss": 2.166, "step": 29546 }, { "epoch": 0.792910047230571, "grad_norm": 0.1953125, "learning_rate": 0.0009064341452393056, "loss": 2.164, "step": 29547 }, { "epoch": 0.79293688278231, "grad_norm": 0.1982421875, "learning_rate": 0.0009064099841710855, "loss": 2.1872, "step": 29548 }, { "epoch": 0.792963718334049, "grad_norm": 0.197265625, "learning_rate": 0.0009063858224306921, "loss": 2.1113, "step": 29549 }, { "epoch": 0.7929905538857879, "grad_norm": 0.1884765625, "learning_rate": 0.0009063616600181782, "loss": 2.1478, "step": 29550 }, { "epoch": 0.7930173894375269, "grad_norm": 0.1884765625, "learning_rate": 0.0009063374969335969, "loss": 2.128, "step": 29551 }, { "epoch": 0.7930442249892657, "grad_norm": 0.193359375, "learning_rate": 0.0009063133331770015, "loss": 2.1541, "step": 29552 }, { "epoch": 0.7930710605410047, "grad_norm": 0.193359375, "learning_rate": 0.0009062891687484445, "loss": 2.1093, "step": 29553 }, { "epoch": 0.7930978960927436, "grad_norm": 0.1923828125, "learning_rate": 0.0009062650036479793, "loss": 2.1811, "step": 29554 }, { "epoch": 0.7931247316444826, "grad_norm": 0.197265625, "learning_rate": 0.0009062408378756587, "loss": 2.1121, "step": 29555 }, { "epoch": 0.7931515671962216, "grad_norm": 0.19921875, "learning_rate": 0.000906216671431536, "loss": 2.2035, "step": 29556 }, { "epoch": 0.7931784027479605, "grad_norm": 0.1982421875, "learning_rate": 0.0009061925043156639, "loss": 2.1582, "step": 29557 }, { "epoch": 0.7932052382996995, "grad_norm": 0.1953125, "learning_rate": 0.0009061683365280955, "loss": 2.1898, "step": 29558 }, { "epoch": 0.7932320738514383, "grad_norm": 0.19140625, "learning_rate": 0.0009061441680688838, "loss": 2.1607, "step": 29559 }, { "epoch": 0.7932589094031773, "grad_norm": 0.19921875, "learning_rate": 0.000906119998938082, "loss": 2.1975, "step": 29560 }, { "epoch": 0.7932857449549163, "grad_norm": 0.1962890625, "learning_rate": 0.0009060958291357427, "loss": 2.145, "step": 29561 }, { "epoch": 0.7933125805066552, "grad_norm": 0.1923828125, "learning_rate": 0.0009060716586619195, "loss": 2.0788, "step": 29562 }, { "epoch": 0.7933394160583942, "grad_norm": 0.1953125, "learning_rate": 0.0009060474875166652, "loss": 2.2688, "step": 29563 }, { "epoch": 0.7933662516101331, "grad_norm": 0.1923828125, "learning_rate": 0.0009060233157000325, "loss": 2.1357, "step": 29564 }, { "epoch": 0.7933930871618721, "grad_norm": 0.189453125, "learning_rate": 0.0009059991432120748, "loss": 2.0798, "step": 29565 }, { "epoch": 0.793419922713611, "grad_norm": 0.1923828125, "learning_rate": 0.0009059749700528448, "loss": 2.0887, "step": 29566 }, { "epoch": 0.7934467582653499, "grad_norm": 0.1962890625, "learning_rate": 0.000905950796222396, "loss": 2.1342, "step": 29567 }, { "epoch": 0.7934735938170889, "grad_norm": 0.19140625, "learning_rate": 0.0009059266217207811, "loss": 2.1972, "step": 29568 }, { "epoch": 0.7935004293688278, "grad_norm": 0.1943359375, "learning_rate": 0.0009059024465480532, "loss": 2.1459, "step": 29569 }, { "epoch": 0.7935272649205668, "grad_norm": 0.1953125, "learning_rate": 0.0009058782707042651, "loss": 2.1642, "step": 29570 }, { "epoch": 0.7935541004723057, "grad_norm": 0.1943359375, "learning_rate": 0.0009058540941894706, "loss": 2.191, "step": 29571 }, { "epoch": 0.7935809360240447, "grad_norm": 0.1904296875, "learning_rate": 0.0009058299170037219, "loss": 2.0933, "step": 29572 }, { "epoch": 0.7936077715757835, "grad_norm": 0.189453125, "learning_rate": 0.0009058057391470721, "loss": 2.1754, "step": 29573 }, { "epoch": 0.7936346071275225, "grad_norm": 0.193359375, "learning_rate": 0.0009057815606195747, "loss": 2.1021, "step": 29574 }, { "epoch": 0.7936614426792615, "grad_norm": 0.19140625, "learning_rate": 0.0009057573814212825, "loss": 2.1116, "step": 29575 }, { "epoch": 0.7936882782310004, "grad_norm": 0.1962890625, "learning_rate": 0.0009057332015522486, "loss": 2.1841, "step": 29576 }, { "epoch": 0.7937151137827394, "grad_norm": 0.193359375, "learning_rate": 0.000905709021012526, "loss": 2.1344, "step": 29577 }, { "epoch": 0.7937419493344783, "grad_norm": 0.203125, "learning_rate": 0.0009056848398021676, "loss": 2.09, "step": 29578 }, { "epoch": 0.7937687848862173, "grad_norm": 0.1904296875, "learning_rate": 0.0009056606579212268, "loss": 2.077, "step": 29579 }, { "epoch": 0.7937956204379562, "grad_norm": 0.203125, "learning_rate": 0.0009056364753697564, "loss": 2.169, "step": 29580 }, { "epoch": 0.7938224559896951, "grad_norm": 0.203125, "learning_rate": 0.0009056122921478095, "loss": 2.1688, "step": 29581 }, { "epoch": 0.7938492915414341, "grad_norm": 0.1884765625, "learning_rate": 0.000905588108255439, "loss": 2.1283, "step": 29582 }, { "epoch": 0.793876127093173, "grad_norm": 0.1923828125, "learning_rate": 0.0009055639236926984, "loss": 2.1379, "step": 29583 }, { "epoch": 0.793902962644912, "grad_norm": 0.189453125, "learning_rate": 0.0009055397384596399, "loss": 2.1726, "step": 29584 }, { "epoch": 0.7939297981966509, "grad_norm": 0.1943359375, "learning_rate": 0.0009055155525563177, "loss": 2.2318, "step": 29585 }, { "epoch": 0.7939566337483899, "grad_norm": 0.1943359375, "learning_rate": 0.000905491365982784, "loss": 2.1099, "step": 29586 }, { "epoch": 0.7939834693001289, "grad_norm": 0.1962890625, "learning_rate": 0.000905467178739092, "loss": 2.1725, "step": 29587 }, { "epoch": 0.7940103048518677, "grad_norm": 0.1923828125, "learning_rate": 0.000905442990825295, "loss": 2.149, "step": 29588 }, { "epoch": 0.7940371404036067, "grad_norm": 0.193359375, "learning_rate": 0.0009054188022414461, "loss": 2.1863, "step": 29589 }, { "epoch": 0.7940639759553456, "grad_norm": 0.19921875, "learning_rate": 0.0009053946129875979, "loss": 2.2141, "step": 29590 }, { "epoch": 0.7940908115070846, "grad_norm": 0.189453125, "learning_rate": 0.0009053704230638039, "loss": 2.123, "step": 29591 }, { "epoch": 0.7941176470588235, "grad_norm": 0.193359375, "learning_rate": 0.0009053462324701171, "loss": 2.2609, "step": 29592 }, { "epoch": 0.7941444826105625, "grad_norm": 0.19140625, "learning_rate": 0.0009053220412065905, "loss": 2.1191, "step": 29593 }, { "epoch": 0.7941713181623015, "grad_norm": 0.1953125, "learning_rate": 0.0009052978492732771, "loss": 2.2064, "step": 29594 }, { "epoch": 0.7941981537140403, "grad_norm": 0.1923828125, "learning_rate": 0.0009052736566702299, "loss": 2.1116, "step": 29595 }, { "epoch": 0.7942249892657793, "grad_norm": 0.1953125, "learning_rate": 0.0009052494633975025, "loss": 2.1425, "step": 29596 }, { "epoch": 0.7942518248175182, "grad_norm": 0.185546875, "learning_rate": 0.0009052252694551473, "loss": 2.0925, "step": 29597 }, { "epoch": 0.7942786603692572, "grad_norm": 0.201171875, "learning_rate": 0.0009052010748432177, "loss": 2.1922, "step": 29598 }, { "epoch": 0.7943054959209961, "grad_norm": 0.2001953125, "learning_rate": 0.0009051768795617666, "loss": 2.1364, "step": 29599 }, { "epoch": 0.7943323314727351, "grad_norm": 0.1943359375, "learning_rate": 0.0009051526836108475, "loss": 2.1489, "step": 29600 }, { "epoch": 0.7943591670244741, "grad_norm": 0.19921875, "learning_rate": 0.000905128486990513, "loss": 2.1868, "step": 29601 }, { "epoch": 0.794386002576213, "grad_norm": 0.1943359375, "learning_rate": 0.0009051042897008165, "loss": 2.1111, "step": 29602 }, { "epoch": 0.7944128381279519, "grad_norm": 0.1884765625, "learning_rate": 0.0009050800917418108, "loss": 2.1025, "step": 29603 }, { "epoch": 0.7944396736796908, "grad_norm": 0.1962890625, "learning_rate": 0.0009050558931135491, "loss": 2.251, "step": 29604 }, { "epoch": 0.7944665092314298, "grad_norm": 0.1953125, "learning_rate": 0.0009050316938160847, "loss": 2.1604, "step": 29605 }, { "epoch": 0.7944933447831687, "grad_norm": 0.189453125, "learning_rate": 0.0009050074938494703, "loss": 2.0728, "step": 29606 }, { "epoch": 0.7945201803349077, "grad_norm": 0.1943359375, "learning_rate": 0.0009049832932137594, "loss": 2.1243, "step": 29607 }, { "epoch": 0.7945470158866467, "grad_norm": 0.1962890625, "learning_rate": 0.0009049590919090048, "loss": 2.1279, "step": 29608 }, { "epoch": 0.7945738514383855, "grad_norm": 0.1953125, "learning_rate": 0.0009049348899352596, "loss": 2.081, "step": 29609 }, { "epoch": 0.7946006869901245, "grad_norm": 0.1923828125, "learning_rate": 0.0009049106872925771, "loss": 2.1826, "step": 29610 }, { "epoch": 0.7946275225418634, "grad_norm": 0.1904296875, "learning_rate": 0.0009048864839810103, "loss": 2.161, "step": 29611 }, { "epoch": 0.7946543580936024, "grad_norm": 0.19140625, "learning_rate": 0.0009048622800006121, "loss": 2.1585, "step": 29612 }, { "epoch": 0.7946811936453414, "grad_norm": 0.1943359375, "learning_rate": 0.0009048380753514356, "loss": 2.1852, "step": 29613 }, { "epoch": 0.7947080291970803, "grad_norm": 0.185546875, "learning_rate": 0.0009048138700335344, "loss": 2.1415, "step": 29614 }, { "epoch": 0.7947348647488193, "grad_norm": 0.193359375, "learning_rate": 0.0009047896640469611, "loss": 2.2104, "step": 29615 }, { "epoch": 0.7947617003005581, "grad_norm": 0.193359375, "learning_rate": 0.000904765457391769, "loss": 2.1987, "step": 29616 }, { "epoch": 0.7947885358522971, "grad_norm": 0.189453125, "learning_rate": 0.0009047412500680109, "loss": 2.0968, "step": 29617 }, { "epoch": 0.794815371404036, "grad_norm": 0.1953125, "learning_rate": 0.0009047170420757405, "loss": 2.1882, "step": 29618 }, { "epoch": 0.794842206955775, "grad_norm": 0.1962890625, "learning_rate": 0.0009046928334150103, "loss": 2.1304, "step": 29619 }, { "epoch": 0.794869042507514, "grad_norm": 0.1962890625, "learning_rate": 0.0009046686240858738, "loss": 2.149, "step": 29620 }, { "epoch": 0.7948958780592529, "grad_norm": 0.2001953125, "learning_rate": 0.0009046444140883838, "loss": 2.2025, "step": 29621 }, { "epoch": 0.7949227136109919, "grad_norm": 0.2001953125, "learning_rate": 0.000904620203422594, "loss": 2.185, "step": 29622 }, { "epoch": 0.7949495491627308, "grad_norm": 0.19140625, "learning_rate": 0.0009045959920885567, "loss": 2.0994, "step": 29623 }, { "epoch": 0.7949763847144697, "grad_norm": 0.1962890625, "learning_rate": 0.0009045717800863256, "loss": 2.1827, "step": 29624 }, { "epoch": 0.7950032202662086, "grad_norm": 0.1943359375, "learning_rate": 0.0009045475674159534, "loss": 2.2073, "step": 29625 }, { "epoch": 0.7950300558179476, "grad_norm": 0.193359375, "learning_rate": 0.0009045233540774935, "loss": 2.2242, "step": 29626 }, { "epoch": 0.7950568913696866, "grad_norm": 0.1962890625, "learning_rate": 0.0009044991400709992, "loss": 2.1843, "step": 29627 }, { "epoch": 0.7950837269214255, "grad_norm": 0.1943359375, "learning_rate": 0.0009044749253965232, "loss": 2.1077, "step": 29628 }, { "epoch": 0.7951105624731645, "grad_norm": 0.1953125, "learning_rate": 0.0009044507100541187, "loss": 2.2516, "step": 29629 }, { "epoch": 0.7951373980249034, "grad_norm": 0.189453125, "learning_rate": 0.000904426494043839, "loss": 2.174, "step": 29630 }, { "epoch": 0.7951642335766423, "grad_norm": 0.1953125, "learning_rate": 0.0009044022773657372, "loss": 2.1869, "step": 29631 }, { "epoch": 0.7951910691283813, "grad_norm": 0.1923828125, "learning_rate": 0.0009043780600198662, "loss": 2.1686, "step": 29632 }, { "epoch": 0.7952179046801202, "grad_norm": 0.193359375, "learning_rate": 0.0009043538420062796, "loss": 2.2398, "step": 29633 }, { "epoch": 0.7952447402318592, "grad_norm": 0.1904296875, "learning_rate": 0.0009043296233250299, "loss": 2.1927, "step": 29634 }, { "epoch": 0.7952715757835981, "grad_norm": 0.189453125, "learning_rate": 0.0009043054039761707, "loss": 2.1837, "step": 29635 }, { "epoch": 0.7952984113353371, "grad_norm": 0.193359375, "learning_rate": 0.0009042811839597547, "loss": 2.1979, "step": 29636 }, { "epoch": 0.795325246887076, "grad_norm": 0.1943359375, "learning_rate": 0.0009042569632758357, "loss": 2.2208, "step": 29637 }, { "epoch": 0.795352082438815, "grad_norm": 0.185546875, "learning_rate": 0.0009042327419244663, "loss": 2.1319, "step": 29638 }, { "epoch": 0.7953789179905539, "grad_norm": 0.201171875, "learning_rate": 0.0009042085199056996, "loss": 2.1218, "step": 29639 }, { "epoch": 0.7954057535422928, "grad_norm": 0.193359375, "learning_rate": 0.000904184297219589, "loss": 2.1874, "step": 29640 }, { "epoch": 0.7954325890940318, "grad_norm": 0.193359375, "learning_rate": 0.0009041600738661877, "loss": 2.1378, "step": 29641 }, { "epoch": 0.7954594246457707, "grad_norm": 0.2021484375, "learning_rate": 0.0009041358498455486, "loss": 2.1895, "step": 29642 }, { "epoch": 0.7954862601975097, "grad_norm": 0.19140625, "learning_rate": 0.0009041116251577248, "loss": 2.1121, "step": 29643 }, { "epoch": 0.7955130957492486, "grad_norm": 0.1953125, "learning_rate": 0.0009040873998027697, "loss": 2.23, "step": 29644 }, { "epoch": 0.7955399313009875, "grad_norm": 0.1953125, "learning_rate": 0.0009040631737807362, "loss": 2.2054, "step": 29645 }, { "epoch": 0.7955667668527265, "grad_norm": 0.193359375, "learning_rate": 0.0009040389470916776, "loss": 2.1631, "step": 29646 }, { "epoch": 0.7955936024044654, "grad_norm": 0.193359375, "learning_rate": 0.0009040147197356471, "loss": 2.2042, "step": 29647 }, { "epoch": 0.7956204379562044, "grad_norm": 0.193359375, "learning_rate": 0.0009039904917126976, "loss": 2.175, "step": 29648 }, { "epoch": 0.7956472735079433, "grad_norm": 0.1923828125, "learning_rate": 0.0009039662630228824, "loss": 2.1785, "step": 29649 }, { "epoch": 0.7956741090596823, "grad_norm": 0.1904296875, "learning_rate": 0.0009039420336662547, "loss": 2.1915, "step": 29650 }, { "epoch": 0.7957009446114212, "grad_norm": 0.19140625, "learning_rate": 0.0009039178036428676, "loss": 2.2135, "step": 29651 }, { "epoch": 0.7957277801631601, "grad_norm": 0.193359375, "learning_rate": 0.0009038935729527743, "loss": 2.2478, "step": 29652 }, { "epoch": 0.7957546157148991, "grad_norm": 0.1962890625, "learning_rate": 0.0009038693415960279, "loss": 2.2423, "step": 29653 }, { "epoch": 0.795781451266638, "grad_norm": 0.1923828125, "learning_rate": 0.0009038451095726816, "loss": 2.1726, "step": 29654 }, { "epoch": 0.795808286818377, "grad_norm": 0.1953125, "learning_rate": 0.0009038208768827884, "loss": 2.2089, "step": 29655 }, { "epoch": 0.7958351223701159, "grad_norm": 0.1943359375, "learning_rate": 0.0009037966435264017, "loss": 2.1804, "step": 29656 }, { "epoch": 0.7958619579218549, "grad_norm": 0.1923828125, "learning_rate": 0.0009037724095035743, "loss": 2.1479, "step": 29657 }, { "epoch": 0.7958887934735939, "grad_norm": 0.1904296875, "learning_rate": 0.00090374817481436, "loss": 2.149, "step": 29658 }, { "epoch": 0.7959156290253327, "grad_norm": 0.1953125, "learning_rate": 0.0009037239394588112, "loss": 2.1447, "step": 29659 }, { "epoch": 0.7959424645770717, "grad_norm": 0.197265625, "learning_rate": 0.0009036997034369815, "loss": 2.2377, "step": 29660 }, { "epoch": 0.7959693001288106, "grad_norm": 0.19140625, "learning_rate": 0.0009036754667489242, "loss": 2.1386, "step": 29661 }, { "epoch": 0.7959961356805496, "grad_norm": 0.1943359375, "learning_rate": 0.000903651229394692, "loss": 2.2123, "step": 29662 }, { "epoch": 0.7960229712322885, "grad_norm": 0.19140625, "learning_rate": 0.0009036269913743386, "loss": 2.1515, "step": 29663 }, { "epoch": 0.7960498067840275, "grad_norm": 0.1904296875, "learning_rate": 0.0009036027526879169, "loss": 2.1274, "step": 29664 }, { "epoch": 0.7960766423357665, "grad_norm": 0.1943359375, "learning_rate": 0.0009035785133354799, "loss": 2.2376, "step": 29665 }, { "epoch": 0.7961034778875054, "grad_norm": 0.193359375, "learning_rate": 0.000903554273317081, "loss": 2.1665, "step": 29666 }, { "epoch": 0.7961303134392443, "grad_norm": 0.193359375, "learning_rate": 0.0009035300326327734, "loss": 2.1048, "step": 29667 }, { "epoch": 0.7961571489909832, "grad_norm": 0.1884765625, "learning_rate": 0.00090350579128261, "loss": 2.1459, "step": 29668 }, { "epoch": 0.7961839845427222, "grad_norm": 0.19140625, "learning_rate": 0.0009034815492666447, "loss": 2.1872, "step": 29669 }, { "epoch": 0.7962108200944611, "grad_norm": 0.193359375, "learning_rate": 0.0009034573065849297, "loss": 2.2234, "step": 29670 }, { "epoch": 0.7962376556462001, "grad_norm": 0.1962890625, "learning_rate": 0.0009034330632375186, "loss": 2.1672, "step": 29671 }, { "epoch": 0.7962644911979391, "grad_norm": 0.1943359375, "learning_rate": 0.0009034088192244649, "loss": 2.1958, "step": 29672 }, { "epoch": 0.796291326749678, "grad_norm": 0.193359375, "learning_rate": 0.0009033845745458214, "loss": 2.2612, "step": 29673 }, { "epoch": 0.7963181623014169, "grad_norm": 0.1962890625, "learning_rate": 0.0009033603292016415, "loss": 2.1718, "step": 29674 }, { "epoch": 0.7963449978531558, "grad_norm": 0.1884765625, "learning_rate": 0.0009033360831919782, "loss": 2.1411, "step": 29675 }, { "epoch": 0.7963718334048948, "grad_norm": 0.1904296875, "learning_rate": 0.0009033118365168847, "loss": 2.1551, "step": 29676 }, { "epoch": 0.7963986689566337, "grad_norm": 0.19140625, "learning_rate": 0.0009032875891764144, "loss": 2.1439, "step": 29677 }, { "epoch": 0.7964255045083727, "grad_norm": 0.1923828125, "learning_rate": 0.0009032633411706204, "loss": 2.1359, "step": 29678 }, { "epoch": 0.7964523400601117, "grad_norm": 0.1962890625, "learning_rate": 0.0009032390924995556, "loss": 2.1971, "step": 29679 }, { "epoch": 0.7964791756118506, "grad_norm": 0.19921875, "learning_rate": 0.0009032148431632737, "loss": 2.2188, "step": 29680 }, { "epoch": 0.7965060111635895, "grad_norm": 0.1953125, "learning_rate": 0.0009031905931618276, "loss": 2.1428, "step": 29681 }, { "epoch": 0.7965328467153284, "grad_norm": 0.201171875, "learning_rate": 0.0009031663424952704, "loss": 2.2255, "step": 29682 }, { "epoch": 0.7965596822670674, "grad_norm": 0.1923828125, "learning_rate": 0.0009031420911636555, "loss": 2.1988, "step": 29683 }, { "epoch": 0.7965865178188064, "grad_norm": 0.19140625, "learning_rate": 0.000903117839167036, "loss": 2.1607, "step": 29684 }, { "epoch": 0.7966133533705453, "grad_norm": 0.1923828125, "learning_rate": 0.0009030935865054653, "loss": 2.1586, "step": 29685 }, { "epoch": 0.7966401889222843, "grad_norm": 0.1923828125, "learning_rate": 0.0009030693331789963, "loss": 2.1966, "step": 29686 }, { "epoch": 0.7966670244740232, "grad_norm": 0.1826171875, "learning_rate": 0.0009030450791876825, "loss": 1.9907, "step": 29687 }, { "epoch": 0.7966938600257621, "grad_norm": 0.1904296875, "learning_rate": 0.0009030208245315768, "loss": 2.1219, "step": 29688 }, { "epoch": 0.796720695577501, "grad_norm": 0.1923828125, "learning_rate": 0.0009029965692107325, "loss": 2.1679, "step": 29689 }, { "epoch": 0.79674753112924, "grad_norm": 0.193359375, "learning_rate": 0.0009029723132252031, "loss": 2.208, "step": 29690 }, { "epoch": 0.796774366680979, "grad_norm": 0.1904296875, "learning_rate": 0.0009029480565750415, "loss": 2.1673, "step": 29691 }, { "epoch": 0.7968012022327179, "grad_norm": 0.189453125, "learning_rate": 0.0009029237992603009, "loss": 2.2351, "step": 29692 }, { "epoch": 0.7968280377844569, "grad_norm": 0.193359375, "learning_rate": 0.0009028995412810345, "loss": 2.0962, "step": 29693 }, { "epoch": 0.7968548733361958, "grad_norm": 0.1904296875, "learning_rate": 0.0009028752826372958, "loss": 2.1712, "step": 29694 }, { "epoch": 0.7968817088879347, "grad_norm": 0.193359375, "learning_rate": 0.0009028510233291379, "loss": 2.1885, "step": 29695 }, { "epoch": 0.7969085444396736, "grad_norm": 0.1904296875, "learning_rate": 0.0009028267633566138, "loss": 2.1478, "step": 29696 }, { "epoch": 0.7969353799914126, "grad_norm": 0.1904296875, "learning_rate": 0.0009028025027197769, "loss": 2.1788, "step": 29697 }, { "epoch": 0.7969622155431516, "grad_norm": 0.1953125, "learning_rate": 0.0009027782414186805, "loss": 2.1847, "step": 29698 }, { "epoch": 0.7969890510948905, "grad_norm": 0.193359375, "learning_rate": 0.0009027539794533777, "loss": 2.1868, "step": 29699 }, { "epoch": 0.7970158866466295, "grad_norm": 0.1943359375, "learning_rate": 0.0009027297168239216, "loss": 2.2008, "step": 29700 }, { "epoch": 0.7970427221983684, "grad_norm": 0.189453125, "learning_rate": 0.0009027054535303657, "loss": 2.1655, "step": 29701 }, { "epoch": 0.7970695577501073, "grad_norm": 0.203125, "learning_rate": 0.0009026811895727631, "loss": 2.1608, "step": 29702 }, { "epoch": 0.7970963933018463, "grad_norm": 0.2041015625, "learning_rate": 0.000902656924951167, "loss": 2.1829, "step": 29703 }, { "epoch": 0.7971232288535852, "grad_norm": 0.19140625, "learning_rate": 0.0009026326596656305, "loss": 2.0943, "step": 29704 }, { "epoch": 0.7971500644053242, "grad_norm": 0.1962890625, "learning_rate": 0.0009026083937162074, "loss": 2.1963, "step": 29705 }, { "epoch": 0.7971768999570631, "grad_norm": 0.193359375, "learning_rate": 0.0009025841271029501, "loss": 2.2328, "step": 29706 }, { "epoch": 0.7972037355088021, "grad_norm": 0.1904296875, "learning_rate": 0.0009025598598259122, "loss": 2.152, "step": 29707 }, { "epoch": 0.797230571060541, "grad_norm": 0.1953125, "learning_rate": 0.0009025355918851472, "loss": 2.1389, "step": 29708 }, { "epoch": 0.79725740661228, "grad_norm": 0.1923828125, "learning_rate": 0.000902511323280708, "loss": 2.1226, "step": 29709 }, { "epoch": 0.7972842421640189, "grad_norm": 0.1943359375, "learning_rate": 0.0009024870540126481, "loss": 2.1672, "step": 29710 }, { "epoch": 0.7973110777157578, "grad_norm": 0.1943359375, "learning_rate": 0.0009024627840810204, "loss": 2.1901, "step": 29711 }, { "epoch": 0.7973379132674968, "grad_norm": 0.193359375, "learning_rate": 0.0009024385134858785, "loss": 2.1816, "step": 29712 }, { "epoch": 0.7973647488192357, "grad_norm": 0.1923828125, "learning_rate": 0.0009024142422272756, "loss": 2.1013, "step": 29713 }, { "epoch": 0.7973915843709747, "grad_norm": 0.1904296875, "learning_rate": 0.0009023899703052647, "loss": 2.2154, "step": 29714 }, { "epoch": 0.7974184199227136, "grad_norm": 0.1904296875, "learning_rate": 0.0009023656977198989, "loss": 2.1855, "step": 29715 }, { "epoch": 0.7974452554744526, "grad_norm": 0.1943359375, "learning_rate": 0.000902341424471232, "loss": 2.2384, "step": 29716 }, { "epoch": 0.7974720910261915, "grad_norm": 0.1953125, "learning_rate": 0.0009023171505593171, "loss": 2.1874, "step": 29717 }, { "epoch": 0.7974989265779304, "grad_norm": 0.193359375, "learning_rate": 0.0009022928759842068, "loss": 2.1609, "step": 29718 }, { "epoch": 0.7975257621296694, "grad_norm": 0.1884765625, "learning_rate": 0.0009022686007459553, "loss": 2.2204, "step": 29719 }, { "epoch": 0.7975525976814083, "grad_norm": 0.1943359375, "learning_rate": 0.0009022443248446155, "loss": 2.1671, "step": 29720 }, { "epoch": 0.7975794332331473, "grad_norm": 0.1943359375, "learning_rate": 0.0009022200482802402, "loss": 2.1913, "step": 29721 }, { "epoch": 0.7976062687848862, "grad_norm": 0.1982421875, "learning_rate": 0.0009021957710528832, "loss": 2.1863, "step": 29722 }, { "epoch": 0.7976331043366252, "grad_norm": 0.1943359375, "learning_rate": 0.0009021714931625976, "loss": 2.1538, "step": 29723 }, { "epoch": 0.7976599398883641, "grad_norm": 0.1875, "learning_rate": 0.0009021472146094366, "loss": 2.1059, "step": 29724 }, { "epoch": 0.797686775440103, "grad_norm": 0.19921875, "learning_rate": 0.0009021229353934536, "loss": 2.1451, "step": 29725 }, { "epoch": 0.797713610991842, "grad_norm": 0.19140625, "learning_rate": 0.0009020986555147018, "loss": 2.1463, "step": 29726 }, { "epoch": 0.7977404465435809, "grad_norm": 0.1962890625, "learning_rate": 0.0009020743749732342, "loss": 2.2553, "step": 29727 }, { "epoch": 0.7977672820953199, "grad_norm": 0.1904296875, "learning_rate": 0.0009020500937691045, "loss": 2.137, "step": 29728 }, { "epoch": 0.7977941176470589, "grad_norm": 0.193359375, "learning_rate": 0.0009020258119023656, "loss": 2.2085, "step": 29729 }, { "epoch": 0.7978209531987978, "grad_norm": 0.193359375, "learning_rate": 0.0009020015293730711, "loss": 2.2338, "step": 29730 }, { "epoch": 0.7978477887505367, "grad_norm": 0.1904296875, "learning_rate": 0.000901977246181274, "loss": 2.156, "step": 29731 }, { "epoch": 0.7978746243022756, "grad_norm": 0.1943359375, "learning_rate": 0.0009019529623270276, "loss": 2.1692, "step": 29732 }, { "epoch": 0.7979014598540146, "grad_norm": 0.2001953125, "learning_rate": 0.0009019286778103853, "loss": 2.2384, "step": 29733 }, { "epoch": 0.7979282954057535, "grad_norm": 0.193359375, "learning_rate": 0.0009019043926314003, "loss": 2.2242, "step": 29734 }, { "epoch": 0.7979551309574925, "grad_norm": 0.19140625, "learning_rate": 0.0009018801067901261, "loss": 2.1631, "step": 29735 }, { "epoch": 0.7979819665092315, "grad_norm": 0.1904296875, "learning_rate": 0.0009018558202866153, "loss": 2.1613, "step": 29736 }, { "epoch": 0.7980088020609704, "grad_norm": 0.1904296875, "learning_rate": 0.0009018315331209222, "loss": 2.2566, "step": 29737 }, { "epoch": 0.7980356376127093, "grad_norm": 0.1884765625, "learning_rate": 0.0009018072452930991, "loss": 2.1319, "step": 29738 }, { "epoch": 0.7980624731644482, "grad_norm": 0.1982421875, "learning_rate": 0.0009017829568031998, "loss": 2.2391, "step": 29739 }, { "epoch": 0.7980893087161872, "grad_norm": 0.1943359375, "learning_rate": 0.0009017586676512775, "loss": 2.0945, "step": 29740 }, { "epoch": 0.7981161442679261, "grad_norm": 0.1904296875, "learning_rate": 0.0009017343778373855, "loss": 2.2289, "step": 29741 }, { "epoch": 0.7981429798196651, "grad_norm": 0.1953125, "learning_rate": 0.0009017100873615771, "loss": 2.1639, "step": 29742 }, { "epoch": 0.7981698153714041, "grad_norm": 0.1953125, "learning_rate": 0.0009016857962239052, "loss": 2.214, "step": 29743 }, { "epoch": 0.798196650923143, "grad_norm": 0.193359375, "learning_rate": 0.0009016615044244237, "loss": 2.1848, "step": 29744 }, { "epoch": 0.798223486474882, "grad_norm": 0.189453125, "learning_rate": 0.0009016372119631856, "loss": 2.1658, "step": 29745 }, { "epoch": 0.7982503220266208, "grad_norm": 0.197265625, "learning_rate": 0.0009016129188402442, "loss": 2.2233, "step": 29746 }, { "epoch": 0.7982771575783598, "grad_norm": 0.193359375, "learning_rate": 0.0009015886250556528, "loss": 2.1793, "step": 29747 }, { "epoch": 0.7983039931300988, "grad_norm": 0.1923828125, "learning_rate": 0.0009015643306094646, "loss": 2.1915, "step": 29748 }, { "epoch": 0.7983308286818377, "grad_norm": 0.193359375, "learning_rate": 0.000901540035501733, "loss": 2.2189, "step": 29749 }, { "epoch": 0.7983576642335767, "grad_norm": 0.193359375, "learning_rate": 0.0009015157397325113, "loss": 2.1234, "step": 29750 }, { "epoch": 0.7983844997853156, "grad_norm": 0.1953125, "learning_rate": 0.0009014914433018528, "loss": 2.1966, "step": 29751 }, { "epoch": 0.7984113353370546, "grad_norm": 0.193359375, "learning_rate": 0.0009014671462098107, "loss": 2.2046, "step": 29752 }, { "epoch": 0.7984381708887934, "grad_norm": 0.19140625, "learning_rate": 0.0009014428484564385, "loss": 2.2254, "step": 29753 }, { "epoch": 0.7984650064405324, "grad_norm": 0.1923828125, "learning_rate": 0.0009014185500417893, "loss": 2.1954, "step": 29754 }, { "epoch": 0.7984918419922714, "grad_norm": 0.1884765625, "learning_rate": 0.0009013942509659163, "loss": 2.2119, "step": 29755 }, { "epoch": 0.7985186775440103, "grad_norm": 0.1923828125, "learning_rate": 0.0009013699512288732, "loss": 2.2282, "step": 29756 }, { "epoch": 0.7985455130957493, "grad_norm": 0.193359375, "learning_rate": 0.0009013456508307131, "loss": 2.2196, "step": 29757 }, { "epoch": 0.7985723486474882, "grad_norm": 0.1943359375, "learning_rate": 0.0009013213497714893, "loss": 2.1994, "step": 29758 }, { "epoch": 0.7985991841992272, "grad_norm": 0.19921875, "learning_rate": 0.000901297048051255, "loss": 2.1931, "step": 29759 }, { "epoch": 0.798626019750966, "grad_norm": 0.1962890625, "learning_rate": 0.0009012727456700638, "loss": 2.1996, "step": 29760 }, { "epoch": 0.798652855302705, "grad_norm": 0.205078125, "learning_rate": 0.0009012484426279687, "loss": 2.2401, "step": 29761 }, { "epoch": 0.798679690854444, "grad_norm": 0.1904296875, "learning_rate": 0.0009012241389250231, "loss": 2.1462, "step": 29762 }, { "epoch": 0.7987065264061829, "grad_norm": 0.1923828125, "learning_rate": 0.0009011998345612805, "loss": 2.2305, "step": 29763 }, { "epoch": 0.7987333619579219, "grad_norm": 0.1953125, "learning_rate": 0.0009011755295367939, "loss": 2.2112, "step": 29764 }, { "epoch": 0.7987601975096608, "grad_norm": 0.1962890625, "learning_rate": 0.000901151223851617, "loss": 2.2252, "step": 29765 }, { "epoch": 0.7987870330613998, "grad_norm": 0.1904296875, "learning_rate": 0.0009011269175058026, "loss": 2.2606, "step": 29766 }, { "epoch": 0.7988138686131386, "grad_norm": 0.193359375, "learning_rate": 0.0009011026104994047, "loss": 2.1615, "step": 29767 }, { "epoch": 0.7988407041648776, "grad_norm": 0.1943359375, "learning_rate": 0.0009010783028324763, "loss": 2.1152, "step": 29768 }, { "epoch": 0.7988675397166166, "grad_norm": 0.1943359375, "learning_rate": 0.0009010539945050702, "loss": 2.2335, "step": 29769 }, { "epoch": 0.7988943752683555, "grad_norm": 0.1884765625, "learning_rate": 0.0009010296855172404, "loss": 2.1742, "step": 29770 }, { "epoch": 0.7989212108200945, "grad_norm": 0.1904296875, "learning_rate": 0.0009010053758690403, "loss": 2.1214, "step": 29771 }, { "epoch": 0.7989480463718334, "grad_norm": 0.1923828125, "learning_rate": 0.0009009810655605227, "loss": 2.1808, "step": 29772 }, { "epoch": 0.7989748819235724, "grad_norm": 0.19140625, "learning_rate": 0.0009009567545917414, "loss": 2.229, "step": 29773 }, { "epoch": 0.7990017174753113, "grad_norm": 0.1875, "learning_rate": 0.0009009324429627493, "loss": 2.1994, "step": 29774 }, { "epoch": 0.7990285530270502, "grad_norm": 0.1875, "learning_rate": 0.0009009081306736, "loss": 2.1736, "step": 29775 }, { "epoch": 0.7990553885787892, "grad_norm": 0.1923828125, "learning_rate": 0.0009008838177243469, "loss": 2.226, "step": 29776 }, { "epoch": 0.7990822241305281, "grad_norm": 0.1953125, "learning_rate": 0.0009008595041150432, "loss": 2.1818, "step": 29777 }, { "epoch": 0.7991090596822671, "grad_norm": 0.1923828125, "learning_rate": 0.0009008351898457422, "loss": 2.2063, "step": 29778 }, { "epoch": 0.799135895234006, "grad_norm": 0.2041015625, "learning_rate": 0.0009008108749164974, "loss": 2.2871, "step": 29779 }, { "epoch": 0.799162730785745, "grad_norm": 0.1953125, "learning_rate": 0.0009007865593273619, "loss": 2.2169, "step": 29780 }, { "epoch": 0.799189566337484, "grad_norm": 0.19921875, "learning_rate": 0.0009007622430783893, "loss": 2.158, "step": 29781 }, { "epoch": 0.7992164018892228, "grad_norm": 0.1875, "learning_rate": 0.0009007379261696328, "loss": 2.17, "step": 29782 }, { "epoch": 0.7992432374409618, "grad_norm": 0.1904296875, "learning_rate": 0.0009007136086011456, "loss": 2.2345, "step": 29783 }, { "epoch": 0.7992700729927007, "grad_norm": 0.1923828125, "learning_rate": 0.0009006892903729814, "loss": 2.1802, "step": 29784 }, { "epoch": 0.7992969085444397, "grad_norm": 0.1962890625, "learning_rate": 0.0009006649714851934, "loss": 2.1711, "step": 29785 }, { "epoch": 0.7993237440961786, "grad_norm": 0.19140625, "learning_rate": 0.0009006406519378348, "loss": 2.1364, "step": 29786 }, { "epoch": 0.7993505796479176, "grad_norm": 0.1953125, "learning_rate": 0.0009006163317309592, "loss": 2.2846, "step": 29787 }, { "epoch": 0.7993774151996565, "grad_norm": 0.193359375, "learning_rate": 0.0009005920108646197, "loss": 2.2412, "step": 29788 }, { "epoch": 0.7994042507513954, "grad_norm": 0.1943359375, "learning_rate": 0.0009005676893388697, "loss": 2.1793, "step": 29789 }, { "epoch": 0.7994310863031344, "grad_norm": 0.197265625, "learning_rate": 0.0009005433671537628, "loss": 2.189, "step": 29790 }, { "epoch": 0.7994579218548733, "grad_norm": 0.19140625, "learning_rate": 0.0009005190443093519, "loss": 2.1978, "step": 29791 }, { "epoch": 0.7994847574066123, "grad_norm": 0.193359375, "learning_rate": 0.0009004947208056908, "loss": 2.2055, "step": 29792 }, { "epoch": 0.7995115929583512, "grad_norm": 0.1943359375, "learning_rate": 0.0009004703966428327, "loss": 2.2566, "step": 29793 }, { "epoch": 0.7995384285100902, "grad_norm": 0.189453125, "learning_rate": 0.000900446071820831, "loss": 2.1603, "step": 29794 }, { "epoch": 0.7995652640618292, "grad_norm": 0.1953125, "learning_rate": 0.0009004217463397389, "loss": 2.2192, "step": 29795 }, { "epoch": 0.799592099613568, "grad_norm": 0.19921875, "learning_rate": 0.0009003974201996101, "loss": 2.2111, "step": 29796 }, { "epoch": 0.799618935165307, "grad_norm": 0.1953125, "learning_rate": 0.0009003730934004973, "loss": 2.1868, "step": 29797 }, { "epoch": 0.7996457707170459, "grad_norm": 0.1943359375, "learning_rate": 0.0009003487659424546, "loss": 2.1464, "step": 29798 }, { "epoch": 0.7996726062687849, "grad_norm": 0.2080078125, "learning_rate": 0.0009003244378255349, "loss": 2.2127, "step": 29799 }, { "epoch": 0.7996994418205239, "grad_norm": 0.19140625, "learning_rate": 0.0009003001090497921, "loss": 2.1549, "step": 29800 }, { "epoch": 0.7997262773722628, "grad_norm": 0.1982421875, "learning_rate": 0.000900275779615279, "loss": 2.1958, "step": 29801 }, { "epoch": 0.7997531129240018, "grad_norm": 0.2041015625, "learning_rate": 0.0009002514495220491, "loss": 2.1593, "step": 29802 }, { "epoch": 0.7997799484757406, "grad_norm": 0.1923828125, "learning_rate": 0.0009002271187701558, "loss": 2.2215, "step": 29803 }, { "epoch": 0.7998067840274796, "grad_norm": 0.1962890625, "learning_rate": 0.0009002027873596529, "loss": 2.2452, "step": 29804 }, { "epoch": 0.7998336195792185, "grad_norm": 0.189453125, "learning_rate": 0.000900178455290593, "loss": 2.2413, "step": 29805 }, { "epoch": 0.7998604551309575, "grad_norm": 0.19140625, "learning_rate": 0.0009001541225630299, "loss": 2.2487, "step": 29806 }, { "epoch": 0.7998872906826965, "grad_norm": 0.189453125, "learning_rate": 0.0009001297891770172, "loss": 2.2528, "step": 29807 }, { "epoch": 0.7999141262344354, "grad_norm": 0.189453125, "learning_rate": 0.0009001054551326079, "loss": 2.2139, "step": 29808 }, { "epoch": 0.7999409617861744, "grad_norm": 0.1953125, "learning_rate": 0.0009000811204298557, "loss": 2.1151, "step": 29809 }, { "epoch": 0.7999677973379132, "grad_norm": 0.1962890625, "learning_rate": 0.0009000567850688137, "loss": 2.1934, "step": 29810 }, { "epoch": 0.7999946328896522, "grad_norm": 0.1953125, "learning_rate": 0.0009000324490495353, "loss": 2.2398, "step": 29811 }, { "epoch": 0.8000214684413911, "grad_norm": 0.19140625, "learning_rate": 0.000900008112372074, "loss": 2.2152, "step": 29812 }, { "epoch": 0.8000483039931301, "grad_norm": 0.193359375, "learning_rate": 0.0008999837750364832, "loss": 2.2243, "step": 29813 }, { "epoch": 0.8000751395448691, "grad_norm": 0.1884765625, "learning_rate": 0.0008999594370428163, "loss": 2.1035, "step": 29814 }, { "epoch": 0.800101975096608, "grad_norm": 0.193359375, "learning_rate": 0.0008999350983911266, "loss": 2.2164, "step": 29815 }, { "epoch": 0.800101975096608, "eval_loss": 3.100449323654175, "eval_runtime": 833.9117, "eval_samples_per_second": 57.486, "eval_steps_per_second": 14.372, "step": 29815 }, { "epoch": 0.800128810648347, "grad_norm": 0.189453125, "learning_rate": 0.0008999107590814675, "loss": 2.1861, "step": 29816 }, { "epoch": 0.8001556462000858, "grad_norm": 0.1884765625, "learning_rate": 0.0008998864191138924, "loss": 2.1792, "step": 29817 }, { "epoch": 0.8001824817518248, "grad_norm": 0.189453125, "learning_rate": 0.0008998620784884549, "loss": 2.2194, "step": 29818 }, { "epoch": 0.8002093173035638, "grad_norm": 0.1904296875, "learning_rate": 0.0008998377372052081, "loss": 2.1883, "step": 29819 }, { "epoch": 0.8002361528553027, "grad_norm": 0.1943359375, "learning_rate": 0.0008998133952642055, "loss": 2.1526, "step": 29820 }, { "epoch": 0.8002629884070417, "grad_norm": 0.19140625, "learning_rate": 0.0008997890526655004, "loss": 2.2657, "step": 29821 }, { "epoch": 0.8002898239587806, "grad_norm": 0.1943359375, "learning_rate": 0.0008997647094091466, "loss": 2.1042, "step": 29822 }, { "epoch": 0.8003166595105196, "grad_norm": 0.1904296875, "learning_rate": 0.0008997403654951972, "loss": 2.2241, "step": 29823 }, { "epoch": 0.8003434950622584, "grad_norm": 0.189453125, "learning_rate": 0.0008997160209237053, "loss": 2.1331, "step": 29824 }, { "epoch": 0.8003703306139974, "grad_norm": 0.193359375, "learning_rate": 0.000899691675694725, "loss": 2.181, "step": 29825 }, { "epoch": 0.8003971661657364, "grad_norm": 0.197265625, "learning_rate": 0.0008996673298083092, "loss": 2.3091, "step": 29826 }, { "epoch": 0.8004240017174753, "grad_norm": 0.1904296875, "learning_rate": 0.0008996429832645112, "loss": 2.1526, "step": 29827 }, { "epoch": 0.8004508372692143, "grad_norm": 0.189453125, "learning_rate": 0.0008996186360633849, "loss": 2.1809, "step": 29828 }, { "epoch": 0.8004776728209532, "grad_norm": 0.189453125, "learning_rate": 0.0008995942882049835, "loss": 2.1982, "step": 29829 }, { "epoch": 0.8005045083726922, "grad_norm": 0.1904296875, "learning_rate": 0.0008995699396893603, "loss": 2.2432, "step": 29830 }, { "epoch": 0.800531343924431, "grad_norm": 0.1884765625, "learning_rate": 0.0008995455905165689, "loss": 2.1825, "step": 29831 }, { "epoch": 0.80055817947617, "grad_norm": 0.1923828125, "learning_rate": 0.0008995212406866625, "loss": 2.2139, "step": 29832 }, { "epoch": 0.800585015027909, "grad_norm": 0.189453125, "learning_rate": 0.0008994968901996946, "loss": 2.2725, "step": 29833 }, { "epoch": 0.8006118505796479, "grad_norm": 0.1884765625, "learning_rate": 0.0008994725390557187, "loss": 2.161, "step": 29834 }, { "epoch": 0.8006386861313869, "grad_norm": 0.19140625, "learning_rate": 0.000899448187254788, "loss": 2.1752, "step": 29835 }, { "epoch": 0.8006655216831258, "grad_norm": 0.193359375, "learning_rate": 0.0008994238347969561, "loss": 2.239, "step": 29836 }, { "epoch": 0.8006923572348648, "grad_norm": 0.1923828125, "learning_rate": 0.0008993994816822766, "loss": 2.2423, "step": 29837 }, { "epoch": 0.8007191927866036, "grad_norm": 0.1875, "learning_rate": 0.0008993751279108024, "loss": 2.1515, "step": 29838 }, { "epoch": 0.8007460283383426, "grad_norm": 0.193359375, "learning_rate": 0.0008993507734825875, "loss": 2.2425, "step": 29839 }, { "epoch": 0.8007728638900816, "grad_norm": 0.19921875, "learning_rate": 0.0008993264183976851, "loss": 2.3066, "step": 29840 }, { "epoch": 0.8007996994418205, "grad_norm": 0.1953125, "learning_rate": 0.0008993020626561484, "loss": 2.1804, "step": 29841 }, { "epoch": 0.8008265349935595, "grad_norm": 0.1943359375, "learning_rate": 0.0008992777062580312, "loss": 2.206, "step": 29842 }, { "epoch": 0.8008533705452984, "grad_norm": 0.193359375, "learning_rate": 0.0008992533492033867, "loss": 2.2312, "step": 29843 }, { "epoch": 0.8008802060970374, "grad_norm": 0.19140625, "learning_rate": 0.0008992289914922685, "loss": 2.1928, "step": 29844 }, { "epoch": 0.8009070416487764, "grad_norm": 0.193359375, "learning_rate": 0.0008992046331247297, "loss": 2.1608, "step": 29845 }, { "epoch": 0.8009338772005152, "grad_norm": 0.1962890625, "learning_rate": 0.0008991802741008241, "loss": 2.1532, "step": 29846 }, { "epoch": 0.8009607127522542, "grad_norm": 0.1962890625, "learning_rate": 0.0008991559144206051, "loss": 2.1825, "step": 29847 }, { "epoch": 0.8009875483039931, "grad_norm": 0.189453125, "learning_rate": 0.0008991315540841259, "loss": 2.1735, "step": 29848 }, { "epoch": 0.8010143838557321, "grad_norm": 0.1943359375, "learning_rate": 0.0008991071930914401, "loss": 2.2523, "step": 29849 }, { "epoch": 0.801041219407471, "grad_norm": 0.1923828125, "learning_rate": 0.0008990828314426009, "loss": 2.2268, "step": 29850 }, { "epoch": 0.80106805495921, "grad_norm": 0.1884765625, "learning_rate": 0.0008990584691376624, "loss": 2.1725, "step": 29851 }, { "epoch": 0.801094890510949, "grad_norm": 0.1884765625, "learning_rate": 0.0008990341061766773, "loss": 2.2234, "step": 29852 }, { "epoch": 0.8011217260626878, "grad_norm": 0.185546875, "learning_rate": 0.0008990097425596994, "loss": 2.1361, "step": 29853 }, { "epoch": 0.8011485616144268, "grad_norm": 0.193359375, "learning_rate": 0.0008989853782867823, "loss": 2.1823, "step": 29854 }, { "epoch": 0.8011753971661657, "grad_norm": 0.19140625, "learning_rate": 0.0008989610133579789, "loss": 2.2534, "step": 29855 }, { "epoch": 0.8012022327179047, "grad_norm": 0.1953125, "learning_rate": 0.0008989366477733432, "loss": 2.1732, "step": 29856 }, { "epoch": 0.8012290682696436, "grad_norm": 0.193359375, "learning_rate": 0.0008989122815329285, "loss": 2.1815, "step": 29857 }, { "epoch": 0.8012559038213826, "grad_norm": 0.1923828125, "learning_rate": 0.000898887914636788, "loss": 2.2321, "step": 29858 }, { "epoch": 0.8012827393731216, "grad_norm": 0.197265625, "learning_rate": 0.0008988635470849755, "loss": 2.2022, "step": 29859 }, { "epoch": 0.8013095749248604, "grad_norm": 0.193359375, "learning_rate": 0.0008988391788775443, "loss": 2.2926, "step": 29860 }, { "epoch": 0.8013364104765994, "grad_norm": 0.1943359375, "learning_rate": 0.0008988148100145476, "loss": 2.1481, "step": 29861 }, { "epoch": 0.8013632460283383, "grad_norm": 0.205078125, "learning_rate": 0.0008987904404960395, "loss": 2.1774, "step": 29862 }, { "epoch": 0.8013900815800773, "grad_norm": 0.1904296875, "learning_rate": 0.0008987660703220729, "loss": 2.2489, "step": 29863 }, { "epoch": 0.8014169171318162, "grad_norm": 0.1982421875, "learning_rate": 0.0008987416994927014, "loss": 2.2557, "step": 29864 }, { "epoch": 0.8014437526835552, "grad_norm": 0.1904296875, "learning_rate": 0.0008987173280079786, "loss": 2.1634, "step": 29865 }, { "epoch": 0.8014705882352942, "grad_norm": 0.1953125, "learning_rate": 0.0008986929558679577, "loss": 2.3157, "step": 29866 }, { "epoch": 0.801497423787033, "grad_norm": 0.1884765625, "learning_rate": 0.0008986685830726925, "loss": 2.2072, "step": 29867 }, { "epoch": 0.801524259338772, "grad_norm": 0.193359375, "learning_rate": 0.0008986442096222363, "loss": 2.2233, "step": 29868 }, { "epoch": 0.8015510948905109, "grad_norm": 0.1904296875, "learning_rate": 0.0008986198355166427, "loss": 2.2087, "step": 29869 }, { "epoch": 0.8015779304422499, "grad_norm": 0.1884765625, "learning_rate": 0.0008985954607559647, "loss": 2.1768, "step": 29870 }, { "epoch": 0.8016047659939889, "grad_norm": 0.1923828125, "learning_rate": 0.0008985710853402563, "loss": 2.1997, "step": 29871 }, { "epoch": 0.8016316015457278, "grad_norm": 0.193359375, "learning_rate": 0.0008985467092695706, "loss": 2.1716, "step": 29872 }, { "epoch": 0.8016584370974668, "grad_norm": 0.1904296875, "learning_rate": 0.0008985223325439616, "loss": 2.1296, "step": 29873 }, { "epoch": 0.8016852726492056, "grad_norm": 0.1904296875, "learning_rate": 0.0008984979551634822, "loss": 2.158, "step": 29874 }, { "epoch": 0.8017121082009446, "grad_norm": 0.2021484375, "learning_rate": 0.000898473577128186, "loss": 2.1889, "step": 29875 }, { "epoch": 0.8017389437526835, "grad_norm": 0.1943359375, "learning_rate": 0.0008984491984381268, "loss": 2.2406, "step": 29876 }, { "epoch": 0.8017657793044225, "grad_norm": 0.189453125, "learning_rate": 0.0008984248190933578, "loss": 2.1663, "step": 29877 }, { "epoch": 0.8017926148561615, "grad_norm": 0.1904296875, "learning_rate": 0.0008984004390939324, "loss": 2.2367, "step": 29878 }, { "epoch": 0.8018194504079004, "grad_norm": 0.203125, "learning_rate": 0.0008983760584399043, "loss": 2.2917, "step": 29879 }, { "epoch": 0.8018462859596394, "grad_norm": 0.193359375, "learning_rate": 0.0008983516771313271, "loss": 2.1987, "step": 29880 }, { "epoch": 0.8018731215113782, "grad_norm": 0.203125, "learning_rate": 0.0008983272951682539, "loss": 2.2007, "step": 29881 }, { "epoch": 0.8018999570631172, "grad_norm": 0.2001953125, "learning_rate": 0.0008983029125507384, "loss": 2.2481, "step": 29882 }, { "epoch": 0.8019267926148561, "grad_norm": 0.1953125, "learning_rate": 0.0008982785292788342, "loss": 2.2096, "step": 29883 }, { "epoch": 0.8019536281665951, "grad_norm": 0.1865234375, "learning_rate": 0.0008982541453525946, "loss": 2.1738, "step": 29884 }, { "epoch": 0.8019804637183341, "grad_norm": 0.1923828125, "learning_rate": 0.0008982297607720731, "loss": 2.1679, "step": 29885 }, { "epoch": 0.802007299270073, "grad_norm": 0.1953125, "learning_rate": 0.0008982053755373231, "loss": 2.1872, "step": 29886 }, { "epoch": 0.802034134821812, "grad_norm": 0.189453125, "learning_rate": 0.0008981809896483987, "loss": 2.2207, "step": 29887 }, { "epoch": 0.8020609703735508, "grad_norm": 0.19921875, "learning_rate": 0.0008981566031053525, "loss": 2.2297, "step": 29888 }, { "epoch": 0.8020878059252898, "grad_norm": 0.19921875, "learning_rate": 0.0008981322159082386, "loss": 2.1065, "step": 29889 }, { "epoch": 0.8021146414770288, "grad_norm": 0.1953125, "learning_rate": 0.0008981078280571103, "loss": 2.1828, "step": 29890 }, { "epoch": 0.8021414770287677, "grad_norm": 0.1875, "learning_rate": 0.0008980834395520212, "loss": 2.2251, "step": 29891 }, { "epoch": 0.8021683125805067, "grad_norm": 0.1962890625, "learning_rate": 0.0008980590503930246, "loss": 2.2929, "step": 29892 }, { "epoch": 0.8021951481322456, "grad_norm": 0.1953125, "learning_rate": 0.0008980346605801741, "loss": 2.2325, "step": 29893 }, { "epoch": 0.8022219836839846, "grad_norm": 0.1962890625, "learning_rate": 0.0008980102701135235, "loss": 2.2826, "step": 29894 }, { "epoch": 0.8022488192357234, "grad_norm": 0.1923828125, "learning_rate": 0.0008979858789931259, "loss": 2.1909, "step": 29895 }, { "epoch": 0.8022756547874624, "grad_norm": 0.1923828125, "learning_rate": 0.0008979614872190348, "loss": 2.2663, "step": 29896 }, { "epoch": 0.8023024903392014, "grad_norm": 0.1904296875, "learning_rate": 0.000897937094791304, "loss": 2.1947, "step": 29897 }, { "epoch": 0.8023293258909403, "grad_norm": 0.193359375, "learning_rate": 0.000897912701709987, "loss": 2.2121, "step": 29898 }, { "epoch": 0.8023561614426793, "grad_norm": 0.197265625, "learning_rate": 0.000897888307975137, "loss": 2.1933, "step": 29899 }, { "epoch": 0.8023829969944182, "grad_norm": 0.20703125, "learning_rate": 0.0008978639135868077, "loss": 2.3006, "step": 29900 }, { "epoch": 0.8024098325461572, "grad_norm": 0.1953125, "learning_rate": 0.0008978395185450525, "loss": 2.2344, "step": 29901 }, { "epoch": 0.802436668097896, "grad_norm": 0.1953125, "learning_rate": 0.0008978151228499254, "loss": 2.199, "step": 29902 }, { "epoch": 0.802463503649635, "grad_norm": 0.19140625, "learning_rate": 0.0008977907265014792, "loss": 2.1626, "step": 29903 }, { "epoch": 0.802490339201374, "grad_norm": 0.1904296875, "learning_rate": 0.0008977663294997678, "loss": 2.189, "step": 29904 }, { "epoch": 0.8025171747531129, "grad_norm": 0.19140625, "learning_rate": 0.0008977419318448447, "loss": 2.2893, "step": 29905 }, { "epoch": 0.8025440103048519, "grad_norm": 0.19140625, "learning_rate": 0.0008977175335367636, "loss": 2.2562, "step": 29906 }, { "epoch": 0.8025708458565908, "grad_norm": 0.189453125, "learning_rate": 0.0008976931345755777, "loss": 2.1649, "step": 29907 }, { "epoch": 0.8025976814083298, "grad_norm": 0.1884765625, "learning_rate": 0.0008976687349613404, "loss": 2.2554, "step": 29908 }, { "epoch": 0.8026245169600686, "grad_norm": 0.1962890625, "learning_rate": 0.0008976443346941058, "loss": 2.1707, "step": 29909 }, { "epoch": 0.8026513525118076, "grad_norm": 0.19140625, "learning_rate": 0.0008976199337739269, "loss": 2.1989, "step": 29910 }, { "epoch": 0.8026781880635466, "grad_norm": 0.1962890625, "learning_rate": 0.0008975955322008575, "loss": 2.2449, "step": 29911 }, { "epoch": 0.8027050236152855, "grad_norm": 0.1953125, "learning_rate": 0.000897571129974951, "loss": 2.1794, "step": 29912 }, { "epoch": 0.8027318591670245, "grad_norm": 0.19921875, "learning_rate": 0.0008975467270962612, "loss": 2.2208, "step": 29913 }, { "epoch": 0.8027586947187634, "grad_norm": 0.1923828125, "learning_rate": 0.0008975223235648411, "loss": 2.2293, "step": 29914 }, { "epoch": 0.8027855302705024, "grad_norm": 0.189453125, "learning_rate": 0.0008974979193807447, "loss": 2.2219, "step": 29915 }, { "epoch": 0.8028123658222414, "grad_norm": 0.193359375, "learning_rate": 0.0008974735145440255, "loss": 2.2283, "step": 29916 }, { "epoch": 0.8028392013739802, "grad_norm": 0.1923828125, "learning_rate": 0.0008974491090547368, "loss": 2.2774, "step": 29917 }, { "epoch": 0.8028660369257192, "grad_norm": 0.193359375, "learning_rate": 0.0008974247029129324, "loss": 2.1773, "step": 29918 }, { "epoch": 0.8028928724774581, "grad_norm": 0.1904296875, "learning_rate": 0.0008974002961186655, "loss": 2.2612, "step": 29919 }, { "epoch": 0.8029197080291971, "grad_norm": 0.1953125, "learning_rate": 0.00089737588867199, "loss": 2.2465, "step": 29920 }, { "epoch": 0.802946543580936, "grad_norm": 0.1982421875, "learning_rate": 0.0008973514805729594, "loss": 2.2581, "step": 29921 }, { "epoch": 0.802973379132675, "grad_norm": 0.2001953125, "learning_rate": 0.0008973270718216266, "loss": 2.2773, "step": 29922 }, { "epoch": 0.803000214684414, "grad_norm": 0.1953125, "learning_rate": 0.0008973026624180461, "loss": 2.2069, "step": 29923 }, { "epoch": 0.8030270502361528, "grad_norm": 0.1923828125, "learning_rate": 0.0008972782523622711, "loss": 2.1979, "step": 29924 }, { "epoch": 0.8030538857878918, "grad_norm": 0.1953125, "learning_rate": 0.0008972538416543549, "loss": 2.239, "step": 29925 }, { "epoch": 0.8030807213396307, "grad_norm": 0.19140625, "learning_rate": 0.0008972294302943514, "loss": 2.2293, "step": 29926 }, { "epoch": 0.8031075568913697, "grad_norm": 0.193359375, "learning_rate": 0.0008972050182823138, "loss": 2.3105, "step": 29927 }, { "epoch": 0.8031343924431086, "grad_norm": 0.19140625, "learning_rate": 0.0008971806056182959, "loss": 2.257, "step": 29928 }, { "epoch": 0.8031612279948476, "grad_norm": 0.1865234375, "learning_rate": 0.0008971561923023512, "loss": 2.1864, "step": 29929 }, { "epoch": 0.8031880635465866, "grad_norm": 0.1904296875, "learning_rate": 0.0008971317783345331, "loss": 2.251, "step": 29930 }, { "epoch": 0.8032148990983254, "grad_norm": 0.1923828125, "learning_rate": 0.0008971073637148955, "loss": 2.1619, "step": 29931 }, { "epoch": 0.8032417346500644, "grad_norm": 0.189453125, "learning_rate": 0.0008970829484434916, "loss": 2.2375, "step": 29932 }, { "epoch": 0.8032685702018033, "grad_norm": 0.1962890625, "learning_rate": 0.0008970585325203752, "loss": 2.236, "step": 29933 }, { "epoch": 0.8032954057535423, "grad_norm": 0.193359375, "learning_rate": 0.0008970341159455997, "loss": 2.2546, "step": 29934 }, { "epoch": 0.8033222413052812, "grad_norm": 0.1923828125, "learning_rate": 0.0008970096987192189, "loss": 2.1582, "step": 29935 }, { "epoch": 0.8033490768570202, "grad_norm": 0.1962890625, "learning_rate": 0.000896985280841286, "loss": 2.1566, "step": 29936 }, { "epoch": 0.8033759124087592, "grad_norm": 0.1875, "learning_rate": 0.0008969608623118549, "loss": 2.1749, "step": 29937 }, { "epoch": 0.803402747960498, "grad_norm": 0.1875, "learning_rate": 0.0008969364431309789, "loss": 2.1584, "step": 29938 }, { "epoch": 0.803429583512237, "grad_norm": 0.1923828125, "learning_rate": 0.0008969120232987118, "loss": 2.2613, "step": 29939 }, { "epoch": 0.8034564190639759, "grad_norm": 0.189453125, "learning_rate": 0.000896887602815107, "loss": 2.2244, "step": 29940 }, { "epoch": 0.8034832546157149, "grad_norm": 0.1943359375, "learning_rate": 0.0008968631816802183, "loss": 2.2103, "step": 29941 }, { "epoch": 0.8035100901674539, "grad_norm": 0.193359375, "learning_rate": 0.0008968387598940991, "loss": 2.2738, "step": 29942 }, { "epoch": 0.8035369257191928, "grad_norm": 0.1923828125, "learning_rate": 0.0008968143374568029, "loss": 2.2187, "step": 29943 }, { "epoch": 0.8035637612709318, "grad_norm": 0.19140625, "learning_rate": 0.0008967899143683834, "loss": 2.1713, "step": 29944 }, { "epoch": 0.8035905968226706, "grad_norm": 0.1904296875, "learning_rate": 0.0008967654906288941, "loss": 2.1988, "step": 29945 }, { "epoch": 0.8036174323744096, "grad_norm": 0.1923828125, "learning_rate": 0.0008967410662383886, "loss": 2.1959, "step": 29946 }, { "epoch": 0.8036442679261485, "grad_norm": 0.1953125, "learning_rate": 0.0008967166411969206, "loss": 2.238, "step": 29947 }, { "epoch": 0.8036711034778875, "grad_norm": 0.1923828125, "learning_rate": 0.0008966922155045435, "loss": 2.241, "step": 29948 }, { "epoch": 0.8036979390296265, "grad_norm": 0.189453125, "learning_rate": 0.0008966677891613111, "loss": 2.1865, "step": 29949 }, { "epoch": 0.8037247745813654, "grad_norm": 0.1943359375, "learning_rate": 0.0008966433621672768, "loss": 2.2016, "step": 29950 }, { "epoch": 0.8037516101331044, "grad_norm": 0.1943359375, "learning_rate": 0.0008966189345224941, "loss": 2.1751, "step": 29951 }, { "epoch": 0.8037784456848432, "grad_norm": 0.1943359375, "learning_rate": 0.0008965945062270168, "loss": 2.2298, "step": 29952 }, { "epoch": 0.8038052812365822, "grad_norm": 0.1884765625, "learning_rate": 0.0008965700772808985, "loss": 2.1844, "step": 29953 }, { "epoch": 0.8038321167883211, "grad_norm": 0.1865234375, "learning_rate": 0.0008965456476841926, "loss": 2.1618, "step": 29954 }, { "epoch": 0.8038589523400601, "grad_norm": 0.1923828125, "learning_rate": 0.0008965212174369529, "loss": 2.2499, "step": 29955 }, { "epoch": 0.8038857878917991, "grad_norm": 0.19140625, "learning_rate": 0.0008964967865392328, "loss": 2.1969, "step": 29956 }, { "epoch": 0.803912623443538, "grad_norm": 0.1875, "learning_rate": 0.0008964723549910862, "loss": 2.1941, "step": 29957 }, { "epoch": 0.803939458995277, "grad_norm": 0.1904296875, "learning_rate": 0.0008964479227925661, "loss": 2.2273, "step": 29958 }, { "epoch": 0.8039662945470158, "grad_norm": 0.1953125, "learning_rate": 0.0008964234899437266, "loss": 2.1965, "step": 29959 }, { "epoch": 0.8039931300987548, "grad_norm": 0.1962890625, "learning_rate": 0.0008963990564446213, "loss": 2.2514, "step": 29960 }, { "epoch": 0.8040199656504938, "grad_norm": 0.197265625, "learning_rate": 0.0008963746222953037, "loss": 2.2067, "step": 29961 }, { "epoch": 0.8040468012022327, "grad_norm": 0.1943359375, "learning_rate": 0.0008963501874958271, "loss": 2.1923, "step": 29962 }, { "epoch": 0.8040736367539717, "grad_norm": 0.19140625, "learning_rate": 0.0008963257520462455, "loss": 2.2678, "step": 29963 }, { "epoch": 0.8041004723057106, "grad_norm": 0.1904296875, "learning_rate": 0.0008963013159466126, "loss": 2.2415, "step": 29964 }, { "epoch": 0.8041273078574496, "grad_norm": 0.189453125, "learning_rate": 0.0008962768791969815, "loss": 2.1978, "step": 29965 }, { "epoch": 0.8041541434091884, "grad_norm": 0.1923828125, "learning_rate": 0.0008962524417974063, "loss": 2.2465, "step": 29966 }, { "epoch": 0.8041809789609274, "grad_norm": 0.1884765625, "learning_rate": 0.0008962280037479402, "loss": 2.2118, "step": 29967 }, { "epoch": 0.8042078145126664, "grad_norm": 0.193359375, "learning_rate": 0.0008962035650486372, "loss": 2.1996, "step": 29968 }, { "epoch": 0.8042346500644053, "grad_norm": 0.19140625, "learning_rate": 0.0008961791256995506, "loss": 2.1996, "step": 29969 }, { "epoch": 0.8042614856161443, "grad_norm": 0.1865234375, "learning_rate": 0.0008961546857007341, "loss": 2.1803, "step": 29970 }, { "epoch": 0.8042883211678832, "grad_norm": 0.1943359375, "learning_rate": 0.0008961302450522416, "loss": 2.2122, "step": 29971 }, { "epoch": 0.8043151567196222, "grad_norm": 0.1884765625, "learning_rate": 0.0008961058037541263, "loss": 2.2067, "step": 29972 }, { "epoch": 0.804341992271361, "grad_norm": 0.1865234375, "learning_rate": 0.0008960813618064419, "loss": 2.2006, "step": 29973 }, { "epoch": 0.8043688278231, "grad_norm": 0.1845703125, "learning_rate": 0.0008960569192092421, "loss": 2.2335, "step": 29974 }, { "epoch": 0.804395663374839, "grad_norm": 0.1943359375, "learning_rate": 0.0008960324759625807, "loss": 2.2207, "step": 29975 }, { "epoch": 0.8044224989265779, "grad_norm": 0.1904296875, "learning_rate": 0.0008960080320665109, "loss": 2.1835, "step": 29976 }, { "epoch": 0.8044493344783169, "grad_norm": 0.1923828125, "learning_rate": 0.0008959835875210868, "loss": 2.2844, "step": 29977 }, { "epoch": 0.8044761700300558, "grad_norm": 0.1923828125, "learning_rate": 0.0008959591423263617, "loss": 2.2159, "step": 29978 }, { "epoch": 0.8045030055817948, "grad_norm": 0.193359375, "learning_rate": 0.0008959346964823893, "loss": 2.2501, "step": 29979 }, { "epoch": 0.8045298411335337, "grad_norm": 0.189453125, "learning_rate": 0.0008959102499892232, "loss": 2.1042, "step": 29980 }, { "epoch": 0.8045566766852726, "grad_norm": 0.1923828125, "learning_rate": 0.0008958858028469171, "loss": 2.2091, "step": 29981 }, { "epoch": 0.8045835122370116, "grad_norm": 0.193359375, "learning_rate": 0.0008958613550555247, "loss": 2.2953, "step": 29982 }, { "epoch": 0.8046103477887505, "grad_norm": 0.189453125, "learning_rate": 0.0008958369066150995, "loss": 2.1982, "step": 29983 }, { "epoch": 0.8046371833404895, "grad_norm": 0.19140625, "learning_rate": 0.000895812457525695, "loss": 2.3424, "step": 29984 }, { "epoch": 0.8046640188922284, "grad_norm": 0.1953125, "learning_rate": 0.0008957880077873651, "loss": 2.18, "step": 29985 }, { "epoch": 0.8046908544439674, "grad_norm": 0.1875, "learning_rate": 0.0008957635574001634, "loss": 2.2694, "step": 29986 }, { "epoch": 0.8047176899957064, "grad_norm": 0.1865234375, "learning_rate": 0.0008957391063641433, "loss": 2.192, "step": 29987 }, { "epoch": 0.8047445255474452, "grad_norm": 0.1904296875, "learning_rate": 0.0008957146546793588, "loss": 2.2122, "step": 29988 }, { "epoch": 0.8047713610991842, "grad_norm": 0.1875, "learning_rate": 0.0008956902023458632, "loss": 2.2287, "step": 29989 }, { "epoch": 0.8047981966509231, "grad_norm": 0.19140625, "learning_rate": 0.0008956657493637104, "loss": 2.272, "step": 29990 }, { "epoch": 0.8048250322026621, "grad_norm": 0.19140625, "learning_rate": 0.0008956412957329538, "loss": 2.2135, "step": 29991 }, { "epoch": 0.804851867754401, "grad_norm": 0.19140625, "learning_rate": 0.0008956168414536472, "loss": 2.1845, "step": 29992 }, { "epoch": 0.80487870330614, "grad_norm": 0.1923828125, "learning_rate": 0.0008955923865258445, "loss": 2.1686, "step": 29993 }, { "epoch": 0.804905538857879, "grad_norm": 0.185546875, "learning_rate": 0.0008955679309495988, "loss": 2.2356, "step": 29994 }, { "epoch": 0.8049323744096178, "grad_norm": 0.1923828125, "learning_rate": 0.000895543474724964, "loss": 2.2361, "step": 29995 }, { "epoch": 0.8049592099613568, "grad_norm": 0.1845703125, "learning_rate": 0.0008955190178519939, "loss": 2.1285, "step": 29996 }, { "epoch": 0.8049860455130957, "grad_norm": 0.1923828125, "learning_rate": 0.000895494560330742, "loss": 2.2099, "step": 29997 }, { "epoch": 0.8050128810648347, "grad_norm": 0.1982421875, "learning_rate": 0.000895470102161262, "loss": 2.2702, "step": 29998 }, { "epoch": 0.8050397166165736, "grad_norm": 0.1884765625, "learning_rate": 0.0008954456433436073, "loss": 2.1791, "step": 29999 }, { "epoch": 0.8050665521683126, "grad_norm": 0.193359375, "learning_rate": 0.0008954211838778319, "loss": 2.2464, "step": 30000 }, { "epoch": 0.8050933877200516, "grad_norm": 0.1953125, "learning_rate": 0.0008953967237639894, "loss": 2.2133, "step": 30001 }, { "epoch": 0.8051202232717904, "grad_norm": 0.193359375, "learning_rate": 0.0008953722630021334, "loss": 2.2138, "step": 30002 }, { "epoch": 0.8051470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.0008953478015923175, "loss": 2.2297, "step": 30003 }, { "epoch": 0.8051738943752683, "grad_norm": 0.19140625, "learning_rate": 0.0008953233395345954, "loss": 2.2383, "step": 30004 }, { "epoch": 0.8052007299270073, "grad_norm": 0.1904296875, "learning_rate": 0.0008952988768290208, "loss": 2.2223, "step": 30005 }, { "epoch": 0.8052275654787462, "grad_norm": 0.189453125, "learning_rate": 0.0008952744134756473, "loss": 2.1981, "step": 30006 }, { "epoch": 0.8052544010304852, "grad_norm": 0.193359375, "learning_rate": 0.0008952499494745288, "loss": 2.2622, "step": 30007 }, { "epoch": 0.8052812365822242, "grad_norm": 0.1923828125, "learning_rate": 0.0008952254848257187, "loss": 2.2109, "step": 30008 }, { "epoch": 0.805308072133963, "grad_norm": 0.1865234375, "learning_rate": 0.0008952010195292705, "loss": 2.1934, "step": 30009 }, { "epoch": 0.805334907685702, "grad_norm": 0.1884765625, "learning_rate": 0.0008951765535852383, "loss": 2.198, "step": 30010 }, { "epoch": 0.8053617432374409, "grad_norm": 0.1884765625, "learning_rate": 0.0008951520869936757, "loss": 2.1901, "step": 30011 }, { "epoch": 0.8053885787891799, "grad_norm": 0.1884765625, "learning_rate": 0.0008951276197546361, "loss": 2.1457, "step": 30012 }, { "epoch": 0.8054154143409189, "grad_norm": 0.19140625, "learning_rate": 0.0008951031518681733, "loss": 2.2061, "step": 30013 }, { "epoch": 0.8054422498926578, "grad_norm": 0.1904296875, "learning_rate": 0.0008950786833343411, "loss": 2.2384, "step": 30014 }, { "epoch": 0.8054690854443968, "grad_norm": 0.197265625, "learning_rate": 0.0008950542141531931, "loss": 2.1972, "step": 30015 }, { "epoch": 0.8054959209961357, "grad_norm": 0.1923828125, "learning_rate": 0.0008950297443247829, "loss": 2.1757, "step": 30016 }, { "epoch": 0.8055227565478746, "grad_norm": 0.1982421875, "learning_rate": 0.0008950052738491642, "loss": 2.1343, "step": 30017 }, { "epoch": 0.8055495920996135, "grad_norm": 0.1923828125, "learning_rate": 0.0008949808027263908, "loss": 2.1707, "step": 30018 }, { "epoch": 0.8055764276513525, "grad_norm": 0.1884765625, "learning_rate": 0.0008949563309565163, "loss": 2.1793, "step": 30019 }, { "epoch": 0.8056032632030915, "grad_norm": 0.1953125, "learning_rate": 0.0008949318585395942, "loss": 2.1679, "step": 30020 }, { "epoch": 0.8056300987548304, "grad_norm": 0.19140625, "learning_rate": 0.0008949073854756785, "loss": 2.2941, "step": 30021 }, { "epoch": 0.8056569343065694, "grad_norm": 0.1884765625, "learning_rate": 0.0008948829117648231, "loss": 2.2327, "step": 30022 }, { "epoch": 0.8056837698583083, "grad_norm": 0.1904296875, "learning_rate": 0.0008948584374070809, "loss": 2.2073, "step": 30023 }, { "epoch": 0.8057106054100472, "grad_norm": 0.1962890625, "learning_rate": 0.0008948339624025061, "loss": 2.2565, "step": 30024 }, { "epoch": 0.8057374409617861, "grad_norm": 0.1875, "learning_rate": 0.0008948094867511525, "loss": 2.1974, "step": 30025 }, { "epoch": 0.8057642765135251, "grad_norm": 0.185546875, "learning_rate": 0.0008947850104530736, "loss": 2.2398, "step": 30026 }, { "epoch": 0.8057911120652641, "grad_norm": 0.1953125, "learning_rate": 0.000894760533508323, "loss": 2.2394, "step": 30027 }, { "epoch": 0.805817947617003, "grad_norm": 0.1904296875, "learning_rate": 0.0008947360559169546, "loss": 2.2499, "step": 30028 }, { "epoch": 0.805844783168742, "grad_norm": 0.1943359375, "learning_rate": 0.0008947115776790219, "loss": 2.2296, "step": 30029 }, { "epoch": 0.8058716187204809, "grad_norm": 0.189453125, "learning_rate": 0.0008946870987945788, "loss": 2.2, "step": 30030 }, { "epoch": 0.8058984542722198, "grad_norm": 0.1943359375, "learning_rate": 0.0008946626192636788, "loss": 2.2467, "step": 30031 }, { "epoch": 0.8059252898239588, "grad_norm": 0.193359375, "learning_rate": 0.0008946381390863757, "loss": 2.2334, "step": 30032 }, { "epoch": 0.8059521253756977, "grad_norm": 0.1884765625, "learning_rate": 0.0008946136582627235, "loss": 2.2826, "step": 30033 }, { "epoch": 0.8059789609274367, "grad_norm": 0.1865234375, "learning_rate": 0.0008945891767927751, "loss": 2.2584, "step": 30034 }, { "epoch": 0.8060057964791756, "grad_norm": 0.1923828125, "learning_rate": 0.0008945646946765851, "loss": 2.2474, "step": 30035 }, { "epoch": 0.8060326320309146, "grad_norm": 0.1875, "learning_rate": 0.0008945402119142067, "loss": 2.2255, "step": 30036 }, { "epoch": 0.8060594675826535, "grad_norm": 0.1953125, "learning_rate": 0.0008945157285056937, "loss": 2.2643, "step": 30037 }, { "epoch": 0.8060863031343924, "grad_norm": 0.193359375, "learning_rate": 0.0008944912444511, "loss": 2.2486, "step": 30038 }, { "epoch": 0.8061131386861314, "grad_norm": 0.19140625, "learning_rate": 0.000894466759750479, "loss": 2.2539, "step": 30039 }, { "epoch": 0.8061399742378703, "grad_norm": 0.203125, "learning_rate": 0.0008944422744038847, "loss": 2.1946, "step": 30040 }, { "epoch": 0.8061668097896093, "grad_norm": 0.1904296875, "learning_rate": 0.0008944177884113706, "loss": 2.2511, "step": 30041 }, { "epoch": 0.8061936453413482, "grad_norm": 0.1982421875, "learning_rate": 0.0008943933017729904, "loss": 2.303, "step": 30042 }, { "epoch": 0.8062204808930872, "grad_norm": 0.1904296875, "learning_rate": 0.0008943688144887979, "loss": 2.2362, "step": 30043 }, { "epoch": 0.8062473164448261, "grad_norm": 0.193359375, "learning_rate": 0.0008943443265588471, "loss": 2.2164, "step": 30044 }, { "epoch": 0.806274151996565, "grad_norm": 0.1923828125, "learning_rate": 0.0008943198379831913, "loss": 2.2873, "step": 30045 }, { "epoch": 0.806300987548304, "grad_norm": 0.1904296875, "learning_rate": 0.0008942953487618842, "loss": 2.3789, "step": 30046 }, { "epoch": 0.8063278231000429, "grad_norm": 0.1923828125, "learning_rate": 0.0008942708588949797, "loss": 2.2521, "step": 30047 }, { "epoch": 0.8063546586517819, "grad_norm": 0.189453125, "learning_rate": 0.0008942463683825316, "loss": 2.2704, "step": 30048 }, { "epoch": 0.8063814942035208, "grad_norm": 0.1962890625, "learning_rate": 0.0008942218772245935, "loss": 2.1844, "step": 30049 }, { "epoch": 0.8064083297552598, "grad_norm": 0.1923828125, "learning_rate": 0.0008941973854212194, "loss": 2.2433, "step": 30050 }, { "epoch": 0.8064351653069987, "grad_norm": 0.189453125, "learning_rate": 0.0008941728929724625, "loss": 2.1502, "step": 30051 }, { "epoch": 0.8064620008587376, "grad_norm": 0.1875, "learning_rate": 0.000894148399878377, "loss": 2.184, "step": 30052 }, { "epoch": 0.8064888364104766, "grad_norm": 0.1884765625, "learning_rate": 0.0008941239061390164, "loss": 2.1919, "step": 30053 }, { "epoch": 0.8065156719622155, "grad_norm": 0.1904296875, "learning_rate": 0.0008940994117544343, "loss": 2.2927, "step": 30054 }, { "epoch": 0.8065425075139545, "grad_norm": 0.19140625, "learning_rate": 0.000894074916724685, "loss": 2.2806, "step": 30055 }, { "epoch": 0.8065693430656934, "grad_norm": 0.19140625, "learning_rate": 0.0008940504210498215, "loss": 2.209, "step": 30056 }, { "epoch": 0.8065961786174324, "grad_norm": 0.19921875, "learning_rate": 0.0008940259247298979, "loss": 2.2519, "step": 30057 }, { "epoch": 0.8066230141691714, "grad_norm": 0.189453125, "learning_rate": 0.0008940014277649681, "loss": 2.2335, "step": 30058 }, { "epoch": 0.8066498497209103, "grad_norm": 0.1904296875, "learning_rate": 0.0008939769301550857, "loss": 2.2514, "step": 30059 }, { "epoch": 0.8066766852726492, "grad_norm": 0.1953125, "learning_rate": 0.0008939524319003042, "loss": 2.2365, "step": 30060 }, { "epoch": 0.8067035208243881, "grad_norm": 0.197265625, "learning_rate": 0.0008939279330006778, "loss": 2.2208, "step": 30061 }, { "epoch": 0.8067303563761271, "grad_norm": 0.1904296875, "learning_rate": 0.0008939034334562597, "loss": 2.2575, "step": 30062 }, { "epoch": 0.806757191927866, "grad_norm": 0.1982421875, "learning_rate": 0.0008938789332671042, "loss": 2.2773, "step": 30063 }, { "epoch": 0.806784027479605, "grad_norm": 0.1923828125, "learning_rate": 0.0008938544324332646, "loss": 2.2164, "step": 30064 }, { "epoch": 0.806810863031344, "grad_norm": 0.1904296875, "learning_rate": 0.0008938299309547949, "loss": 2.1225, "step": 30065 }, { "epoch": 0.8068376985830829, "grad_norm": 0.1923828125, "learning_rate": 0.0008938054288317488, "loss": 2.2833, "step": 30066 }, { "epoch": 0.8068645341348218, "grad_norm": 0.1865234375, "learning_rate": 0.0008937809260641799, "loss": 2.2634, "step": 30067 }, { "epoch": 0.8068913696865607, "grad_norm": 0.1923828125, "learning_rate": 0.0008937564226521421, "loss": 2.2153, "step": 30068 }, { "epoch": 0.8069182052382997, "grad_norm": 0.1943359375, "learning_rate": 0.0008937319185956894, "loss": 2.2665, "step": 30069 }, { "epoch": 0.8069450407900386, "grad_norm": 0.1865234375, "learning_rate": 0.0008937074138948751, "loss": 2.2964, "step": 30070 }, { "epoch": 0.8069718763417776, "grad_norm": 0.1884765625, "learning_rate": 0.000893682908549753, "loss": 2.1665, "step": 30071 }, { "epoch": 0.8069987118935166, "grad_norm": 0.1962890625, "learning_rate": 0.0008936584025603772, "loss": 2.2702, "step": 30072 }, { "epoch": 0.8070255474452555, "grad_norm": 0.185546875, "learning_rate": 0.0008936338959268012, "loss": 2.1979, "step": 30073 }, { "epoch": 0.8070523829969944, "grad_norm": 0.1904296875, "learning_rate": 0.0008936093886490788, "loss": 2.2221, "step": 30074 }, { "epoch": 0.8070792185487333, "grad_norm": 0.1865234375, "learning_rate": 0.0008935848807272638, "loss": 2.2247, "step": 30075 }, { "epoch": 0.8071060541004723, "grad_norm": 0.19140625, "learning_rate": 0.0008935603721614099, "loss": 2.2217, "step": 30076 }, { "epoch": 0.8071328896522112, "grad_norm": 0.189453125, "learning_rate": 0.0008935358629515711, "loss": 2.2475, "step": 30077 }, { "epoch": 0.8071597252039502, "grad_norm": 0.1953125, "learning_rate": 0.0008935113530978008, "loss": 2.3087, "step": 30078 }, { "epoch": 0.8071865607556892, "grad_norm": 0.193359375, "learning_rate": 0.0008934868426001527, "loss": 2.2014, "step": 30079 }, { "epoch": 0.807213396307428, "grad_norm": 0.19140625, "learning_rate": 0.0008934623314586813, "loss": 2.2296, "step": 30080 }, { "epoch": 0.807240231859167, "grad_norm": 0.19140625, "learning_rate": 0.0008934378196734398, "loss": 2.2795, "step": 30081 }, { "epoch": 0.8072670674109059, "grad_norm": 0.189453125, "learning_rate": 0.0008934133072444816, "loss": 2.1863, "step": 30082 }, { "epoch": 0.8072939029626449, "grad_norm": 0.189453125, "learning_rate": 0.0008933887941718614, "loss": 2.2684, "step": 30083 }, { "epoch": 0.8073207385143839, "grad_norm": 0.1875, "learning_rate": 0.0008933642804556325, "loss": 2.1604, "step": 30084 }, { "epoch": 0.8073475740661228, "grad_norm": 0.1884765625, "learning_rate": 0.0008933397660958485, "loss": 2.1899, "step": 30085 }, { "epoch": 0.8073744096178618, "grad_norm": 0.1923828125, "learning_rate": 0.0008933152510925632, "loss": 2.2413, "step": 30086 }, { "epoch": 0.8074012451696007, "grad_norm": 0.189453125, "learning_rate": 0.0008932907354458308, "loss": 2.2574, "step": 30087 }, { "epoch": 0.8074280807213396, "grad_norm": 0.1923828125, "learning_rate": 0.0008932662191557049, "loss": 2.3365, "step": 30088 }, { "epoch": 0.8074549162730785, "grad_norm": 0.1904296875, "learning_rate": 0.0008932417022222389, "loss": 2.2529, "step": 30089 }, { "epoch": 0.8074817518248175, "grad_norm": 0.1943359375, "learning_rate": 0.0008932171846454869, "loss": 2.1924, "step": 30090 }, { "epoch": 0.8075085873765565, "grad_norm": 0.19140625, "learning_rate": 0.0008931926664255029, "loss": 2.2027, "step": 30091 }, { "epoch": 0.8075354229282954, "grad_norm": 0.189453125, "learning_rate": 0.0008931681475623405, "loss": 2.2214, "step": 30092 }, { "epoch": 0.8075622584800344, "grad_norm": 0.1904296875, "learning_rate": 0.0008931436280560529, "loss": 2.2584, "step": 30093 }, { "epoch": 0.8075890940317733, "grad_norm": 0.19140625, "learning_rate": 0.0008931191079066949, "loss": 2.2168, "step": 30094 }, { "epoch": 0.8076159295835122, "grad_norm": 0.1865234375, "learning_rate": 0.0008930945871143198, "loss": 2.1784, "step": 30095 }, { "epoch": 0.8076427651352511, "grad_norm": 0.1953125, "learning_rate": 0.0008930700656789813, "loss": 2.2155, "step": 30096 }, { "epoch": 0.8076696006869901, "grad_norm": 0.1953125, "learning_rate": 0.0008930455436007333, "loss": 2.266, "step": 30097 }, { "epoch": 0.8076964362387291, "grad_norm": 0.193359375, "learning_rate": 0.0008930210208796295, "loss": 2.2224, "step": 30098 }, { "epoch": 0.807723271790468, "grad_norm": 0.1962890625, "learning_rate": 0.000892996497515724, "loss": 2.2212, "step": 30099 }, { "epoch": 0.807750107342207, "grad_norm": 0.1943359375, "learning_rate": 0.0008929719735090701, "loss": 2.1717, "step": 30100 }, { "epoch": 0.8077769428939459, "grad_norm": 0.1904296875, "learning_rate": 0.0008929474488597221, "loss": 2.1998, "step": 30101 }, { "epoch": 0.8078037784456849, "grad_norm": 0.1943359375, "learning_rate": 0.0008929229235677336, "loss": 2.2492, "step": 30102 }, { "epoch": 0.8078306139974238, "grad_norm": 0.1953125, "learning_rate": 0.0008928983976331583, "loss": 2.2989, "step": 30103 }, { "epoch": 0.8078574495491627, "grad_norm": 0.1943359375, "learning_rate": 0.00089287387105605, "loss": 2.2568, "step": 30104 }, { "epoch": 0.8078842851009017, "grad_norm": 0.1923828125, "learning_rate": 0.0008928493438364627, "loss": 2.263, "step": 30105 }, { "epoch": 0.8079111206526406, "grad_norm": 0.1884765625, "learning_rate": 0.0008928248159744502, "loss": 2.2104, "step": 30106 }, { "epoch": 0.8079379562043796, "grad_norm": 0.1884765625, "learning_rate": 0.0008928002874700661, "loss": 2.2113, "step": 30107 }, { "epoch": 0.8079647917561185, "grad_norm": 0.1904296875, "learning_rate": 0.0008927757583233642, "loss": 2.2745, "step": 30108 }, { "epoch": 0.8079916273078575, "grad_norm": 0.19140625, "learning_rate": 0.0008927512285343985, "loss": 2.2513, "step": 30109 }, { "epoch": 0.8080184628595964, "grad_norm": 0.1875, "learning_rate": 0.0008927266981032228, "loss": 2.1782, "step": 30110 }, { "epoch": 0.8080452984113353, "grad_norm": 0.1884765625, "learning_rate": 0.0008927021670298908, "loss": 2.2719, "step": 30111 }, { "epoch": 0.8080721339630743, "grad_norm": 0.1884765625, "learning_rate": 0.0008926776353144565, "loss": 2.3537, "step": 30112 }, { "epoch": 0.8080989695148132, "grad_norm": 0.1923828125, "learning_rate": 0.0008926531029569733, "loss": 2.2438, "step": 30113 }, { "epoch": 0.8081258050665522, "grad_norm": 0.1923828125, "learning_rate": 0.0008926285699574956, "loss": 2.1845, "step": 30114 }, { "epoch": 0.8081526406182911, "grad_norm": 0.1923828125, "learning_rate": 0.0008926040363160767, "loss": 2.2341, "step": 30115 }, { "epoch": 0.80817947617003, "grad_norm": 0.1962890625, "learning_rate": 0.0008925795020327706, "loss": 2.2591, "step": 30116 }, { "epoch": 0.808206311721769, "grad_norm": 0.1923828125, "learning_rate": 0.0008925549671076313, "loss": 2.2485, "step": 30117 }, { "epoch": 0.8082331472735079, "grad_norm": 0.189453125, "learning_rate": 0.0008925304315407123, "loss": 2.2396, "step": 30118 }, { "epoch": 0.8082599828252469, "grad_norm": 0.19921875, "learning_rate": 0.0008925058953320676, "loss": 2.322, "step": 30119 }, { "epoch": 0.8082868183769858, "grad_norm": 0.193359375, "learning_rate": 0.0008924813584817513, "loss": 2.2051, "step": 30120 }, { "epoch": 0.8083136539287248, "grad_norm": 0.19140625, "learning_rate": 0.0008924568209898167, "loss": 2.2909, "step": 30121 }, { "epoch": 0.8083404894804637, "grad_norm": 0.189453125, "learning_rate": 0.000892432282856318, "loss": 2.2247, "step": 30122 }, { "epoch": 0.8083673250322027, "grad_norm": 0.1943359375, "learning_rate": 0.0008924077440813088, "loss": 2.3169, "step": 30123 }, { "epoch": 0.8083941605839416, "grad_norm": 0.1904296875, "learning_rate": 0.0008923832046648432, "loss": 2.2095, "step": 30124 }, { "epoch": 0.8084209961356805, "grad_norm": 0.1884765625, "learning_rate": 0.0008923586646069747, "loss": 2.2433, "step": 30125 }, { "epoch": 0.8084478316874195, "grad_norm": 0.1875, "learning_rate": 0.0008923341239077575, "loss": 2.2402, "step": 30126 }, { "epoch": 0.8084746672391584, "grad_norm": 0.19140625, "learning_rate": 0.000892309582567245, "loss": 2.2684, "step": 30127 }, { "epoch": 0.8085015027908974, "grad_norm": 0.19140625, "learning_rate": 0.0008922850405854914, "loss": 2.2757, "step": 30128 }, { "epoch": 0.8085283383426364, "grad_norm": 0.1865234375, "learning_rate": 0.0008922604979625504, "loss": 2.2107, "step": 30129 }, { "epoch": 0.8085551738943753, "grad_norm": 0.1884765625, "learning_rate": 0.0008922359546984757, "loss": 2.1797, "step": 30130 }, { "epoch": 0.8085820094461142, "grad_norm": 0.1884765625, "learning_rate": 0.0008922114107933215, "loss": 2.2561, "step": 30131 }, { "epoch": 0.8086088449978531, "grad_norm": 0.1904296875, "learning_rate": 0.0008921868662471414, "loss": 2.1978, "step": 30132 }, { "epoch": 0.8086356805495921, "grad_norm": 0.1875, "learning_rate": 0.0008921623210599893, "loss": 2.2148, "step": 30133 }, { "epoch": 0.808662516101331, "grad_norm": 0.1884765625, "learning_rate": 0.0008921377752319189, "loss": 2.2102, "step": 30134 }, { "epoch": 0.80868935165307, "grad_norm": 0.19140625, "learning_rate": 0.0008921132287629842, "loss": 2.163, "step": 30135 }, { "epoch": 0.808716187204809, "grad_norm": 0.1962890625, "learning_rate": 0.0008920886816532391, "loss": 2.2337, "step": 30136 }, { "epoch": 0.8087430227565479, "grad_norm": 0.189453125, "learning_rate": 0.0008920641339027373, "loss": 2.2277, "step": 30137 }, { "epoch": 0.8087698583082868, "grad_norm": 0.1953125, "learning_rate": 0.0008920395855115327, "loss": 2.3005, "step": 30138 }, { "epoch": 0.8087966938600257, "grad_norm": 0.1943359375, "learning_rate": 0.0008920150364796792, "loss": 2.2152, "step": 30139 }, { "epoch": 0.8088235294117647, "grad_norm": 0.189453125, "learning_rate": 0.0008919904868072308, "loss": 2.2361, "step": 30140 }, { "epoch": 0.8088503649635036, "grad_norm": 0.193359375, "learning_rate": 0.0008919659364942409, "loss": 2.2528, "step": 30141 }, { "epoch": 0.8088772005152426, "grad_norm": 0.1884765625, "learning_rate": 0.0008919413855407639, "loss": 2.1924, "step": 30142 }, { "epoch": 0.8089040360669816, "grad_norm": 0.193359375, "learning_rate": 0.0008919168339468532, "loss": 2.2211, "step": 30143 }, { "epoch": 0.8089308716187205, "grad_norm": 0.1884765625, "learning_rate": 0.0008918922817125627, "loss": 2.2467, "step": 30144 }, { "epoch": 0.8089577071704595, "grad_norm": 0.1865234375, "learning_rate": 0.0008918677288379466, "loss": 2.2062, "step": 30145 }, { "epoch": 0.8089845427221983, "grad_norm": 0.189453125, "learning_rate": 0.0008918431753230585, "loss": 2.2292, "step": 30146 }, { "epoch": 0.8090113782739373, "grad_norm": 0.201171875, "learning_rate": 0.0008918186211679524, "loss": 2.3024, "step": 30147 }, { "epoch": 0.8090382138256763, "grad_norm": 0.1865234375, "learning_rate": 0.0008917940663726821, "loss": 2.2256, "step": 30148 }, { "epoch": 0.8090650493774152, "grad_norm": 0.193359375, "learning_rate": 0.0008917695109373015, "loss": 2.2575, "step": 30149 }, { "epoch": 0.8090918849291542, "grad_norm": 0.193359375, "learning_rate": 0.0008917449548618645, "loss": 2.2706, "step": 30150 }, { "epoch": 0.8091187204808931, "grad_norm": 0.1943359375, "learning_rate": 0.0008917203981464247, "loss": 2.3362, "step": 30151 }, { "epoch": 0.809145556032632, "grad_norm": 0.185546875, "learning_rate": 0.0008916958407910363, "loss": 2.2375, "step": 30152 }, { "epoch": 0.8091723915843709, "grad_norm": 0.185546875, "learning_rate": 0.0008916712827957531, "loss": 2.2285, "step": 30153 }, { "epoch": 0.8091992271361099, "grad_norm": 0.1904296875, "learning_rate": 0.0008916467241606288, "loss": 2.2818, "step": 30154 }, { "epoch": 0.8092260626878489, "grad_norm": 0.2001953125, "learning_rate": 0.0008916221648857173, "loss": 2.2469, "step": 30155 }, { "epoch": 0.8092528982395878, "grad_norm": 0.1923828125, "learning_rate": 0.0008915976049710726, "loss": 2.2, "step": 30156 }, { "epoch": 0.8092797337913268, "grad_norm": 0.20703125, "learning_rate": 0.0008915730444167488, "loss": 2.2035, "step": 30157 }, { "epoch": 0.8093065693430657, "grad_norm": 0.193359375, "learning_rate": 0.0008915484832227994, "loss": 2.2756, "step": 30158 }, { "epoch": 0.8093334048948047, "grad_norm": 0.1884765625, "learning_rate": 0.0008915239213892781, "loss": 2.2659, "step": 30159 }, { "epoch": 0.8093602404465435, "grad_norm": 0.1904296875, "learning_rate": 0.0008914993589162394, "loss": 2.1527, "step": 30160 }, { "epoch": 0.8093870759982825, "grad_norm": 0.1865234375, "learning_rate": 0.0008914747958037369, "loss": 2.1935, "step": 30161 }, { "epoch": 0.8094139115500215, "grad_norm": 0.1923828125, "learning_rate": 0.0008914502320518243, "loss": 2.2749, "step": 30162 }, { "epoch": 0.8094407471017604, "grad_norm": 0.19140625, "learning_rate": 0.0008914256676605556, "loss": 2.3104, "step": 30163 }, { "epoch": 0.8094675826534994, "grad_norm": 0.193359375, "learning_rate": 0.000891401102629985, "loss": 2.2053, "step": 30164 }, { "epoch": 0.8094944182052383, "grad_norm": 0.2001953125, "learning_rate": 0.0008913765369601658, "loss": 2.3123, "step": 30165 }, { "epoch": 0.8095212537569773, "grad_norm": 0.1884765625, "learning_rate": 0.0008913519706511524, "loss": 2.2138, "step": 30166 }, { "epoch": 0.8095480893087161, "grad_norm": 0.19140625, "learning_rate": 0.0008913274037029984, "loss": 2.2434, "step": 30167 }, { "epoch": 0.8095749248604551, "grad_norm": 0.1875, "learning_rate": 0.000891302836115758, "loss": 2.2827, "step": 30168 }, { "epoch": 0.8096017604121941, "grad_norm": 0.1904296875, "learning_rate": 0.0008912782678894845, "loss": 2.2387, "step": 30169 }, { "epoch": 0.809628595963933, "grad_norm": 0.193359375, "learning_rate": 0.0008912536990242324, "loss": 2.2577, "step": 30170 }, { "epoch": 0.809655431515672, "grad_norm": 0.189453125, "learning_rate": 0.0008912291295200554, "loss": 2.1864, "step": 30171 }, { "epoch": 0.8096822670674109, "grad_norm": 0.1904296875, "learning_rate": 0.0008912045593770072, "loss": 2.2421, "step": 30172 }, { "epoch": 0.8097091026191499, "grad_norm": 0.1953125, "learning_rate": 0.0008911799885951421, "loss": 2.1613, "step": 30173 }, { "epoch": 0.8097359381708888, "grad_norm": 0.193359375, "learning_rate": 0.0008911554171745135, "loss": 2.2227, "step": 30174 }, { "epoch": 0.8097627737226277, "grad_norm": 0.2001953125, "learning_rate": 0.0008911308451151758, "loss": 2.2832, "step": 30175 }, { "epoch": 0.8097896092743667, "grad_norm": 0.19140625, "learning_rate": 0.0008911062724171827, "loss": 2.2089, "step": 30176 }, { "epoch": 0.8098164448261056, "grad_norm": 0.2109375, "learning_rate": 0.0008910816990805877, "loss": 2.2335, "step": 30177 }, { "epoch": 0.8098432803778446, "grad_norm": 0.189453125, "learning_rate": 0.0008910571251054455, "loss": 2.2131, "step": 30178 }, { "epoch": 0.8098701159295835, "grad_norm": 0.1875, "learning_rate": 0.0008910325504918095, "loss": 2.2506, "step": 30179 }, { "epoch": 0.8098969514813225, "grad_norm": 0.19140625, "learning_rate": 0.0008910079752397337, "loss": 2.2723, "step": 30180 }, { "epoch": 0.8099237870330614, "grad_norm": 0.1875, "learning_rate": 0.0008909833993492719, "loss": 2.2349, "step": 30181 }, { "epoch": 0.8099506225848003, "grad_norm": 0.1875, "learning_rate": 0.0008909588228204783, "loss": 2.2044, "step": 30182 }, { "epoch": 0.8099774581365393, "grad_norm": 0.189453125, "learning_rate": 0.0008909342456534066, "loss": 2.297, "step": 30183 }, { "epoch": 0.8100042936882782, "grad_norm": 0.1962890625, "learning_rate": 0.0008909096678481107, "loss": 2.2929, "step": 30184 }, { "epoch": 0.8100311292400172, "grad_norm": 0.1865234375, "learning_rate": 0.0008908850894046446, "loss": 2.1807, "step": 30185 }, { "epoch": 0.8100579647917561, "grad_norm": 0.189453125, "learning_rate": 0.0008908605103230622, "loss": 2.2448, "step": 30186 }, { "epoch": 0.8100848003434951, "grad_norm": 0.1923828125, "learning_rate": 0.0008908359306034174, "loss": 2.291, "step": 30187 }, { "epoch": 0.810111635895234, "grad_norm": 0.19140625, "learning_rate": 0.0008908113502457642, "loss": 2.2927, "step": 30188 }, { "epoch": 0.8101384714469729, "grad_norm": 0.1904296875, "learning_rate": 0.0008907867692501564, "loss": 2.1689, "step": 30189 }, { "epoch": 0.8101653069987119, "grad_norm": 0.1884765625, "learning_rate": 0.0008907621876166479, "loss": 2.2523, "step": 30190 }, { "epoch": 0.8101921425504508, "grad_norm": 0.1904296875, "learning_rate": 0.0008907376053452928, "loss": 2.2979, "step": 30191 }, { "epoch": 0.8102189781021898, "grad_norm": 0.1875, "learning_rate": 0.0008907130224361449, "loss": 2.2017, "step": 30192 }, { "epoch": 0.8102458136539287, "grad_norm": 0.1884765625, "learning_rate": 0.0008906884388892583, "loss": 2.2761, "step": 30193 }, { "epoch": 0.8102726492056677, "grad_norm": 0.189453125, "learning_rate": 0.0008906638547046867, "loss": 2.2611, "step": 30194 }, { "epoch": 0.8102994847574067, "grad_norm": 0.193359375, "learning_rate": 0.000890639269882484, "loss": 2.268, "step": 30195 }, { "epoch": 0.8103263203091455, "grad_norm": 0.1923828125, "learning_rate": 0.0008906146844227044, "loss": 2.2126, "step": 30196 }, { "epoch": 0.8103531558608845, "grad_norm": 0.1845703125, "learning_rate": 0.0008905900983254016, "loss": 2.2726, "step": 30197 }, { "epoch": 0.8103799914126234, "grad_norm": 0.1923828125, "learning_rate": 0.0008905655115906296, "loss": 2.285, "step": 30198 }, { "epoch": 0.8104068269643624, "grad_norm": 0.193359375, "learning_rate": 0.0008905409242184423, "loss": 2.2668, "step": 30199 }, { "epoch": 0.8104336625161014, "grad_norm": 0.1875, "learning_rate": 0.0008905163362088939, "loss": 2.1959, "step": 30200 }, { "epoch": 0.8104604980678403, "grad_norm": 0.1904296875, "learning_rate": 0.000890491747562038, "loss": 2.2686, "step": 30201 }, { "epoch": 0.8104873336195793, "grad_norm": 0.189453125, "learning_rate": 0.0008904671582779286, "loss": 2.1753, "step": 30202 }, { "epoch": 0.8105141691713181, "grad_norm": 0.1904296875, "learning_rate": 0.0008904425683566197, "loss": 2.2363, "step": 30203 }, { "epoch": 0.8105410047230571, "grad_norm": 0.1884765625, "learning_rate": 0.0008904179777981656, "loss": 2.2113, "step": 30204 }, { "epoch": 0.810567840274796, "grad_norm": 0.1845703125, "learning_rate": 0.0008903933866026193, "loss": 2.1727, "step": 30205 }, { "epoch": 0.810594675826535, "grad_norm": 0.1826171875, "learning_rate": 0.0008903687947700358, "loss": 2.188, "step": 30206 }, { "epoch": 0.810621511378274, "grad_norm": 0.193359375, "learning_rate": 0.0008903442023004684, "loss": 2.2447, "step": 30207 }, { "epoch": 0.8106483469300129, "grad_norm": 0.1884765625, "learning_rate": 0.0008903196091939714, "loss": 2.2288, "step": 30208 }, { "epoch": 0.8106751824817519, "grad_norm": 0.1884765625, "learning_rate": 0.0008902950154505985, "loss": 2.2506, "step": 30209 }, { "epoch": 0.8107020180334907, "grad_norm": 0.19140625, "learning_rate": 0.0008902704210704038, "loss": 2.2708, "step": 30210 }, { "epoch": 0.8107288535852297, "grad_norm": 0.185546875, "learning_rate": 0.000890245826053441, "loss": 2.1781, "step": 30211 }, { "epoch": 0.8107556891369686, "grad_norm": 0.1865234375, "learning_rate": 0.0008902212303997645, "loss": 2.1616, "step": 30212 }, { "epoch": 0.8107825246887076, "grad_norm": 0.185546875, "learning_rate": 0.0008901966341094279, "loss": 2.1765, "step": 30213 }, { "epoch": 0.8108093602404466, "grad_norm": 0.189453125, "learning_rate": 0.0008901720371824851, "loss": 2.2186, "step": 30214 }, { "epoch": 0.8108361957921855, "grad_norm": 0.1875, "learning_rate": 0.0008901474396189905, "loss": 2.2275, "step": 30215 }, { "epoch": 0.8108630313439245, "grad_norm": 0.1923828125, "learning_rate": 0.0008901228414189977, "loss": 2.2628, "step": 30216 }, { "epoch": 0.8108898668956633, "grad_norm": 0.1884765625, "learning_rate": 0.0008900982425825607, "loss": 2.1426, "step": 30217 }, { "epoch": 0.8109167024474023, "grad_norm": 0.1904296875, "learning_rate": 0.0008900736431097335, "loss": 2.19, "step": 30218 }, { "epoch": 0.8109435379991413, "grad_norm": 0.1884765625, "learning_rate": 0.0008900490430005701, "loss": 2.3067, "step": 30219 }, { "epoch": 0.8109703735508802, "grad_norm": 0.1865234375, "learning_rate": 0.0008900244422551243, "loss": 2.2279, "step": 30220 }, { "epoch": 0.8109972091026192, "grad_norm": 0.1884765625, "learning_rate": 0.0008899998408734504, "loss": 2.2556, "step": 30221 }, { "epoch": 0.8110240446543581, "grad_norm": 0.189453125, "learning_rate": 0.0008899752388556021, "loss": 2.2535, "step": 30222 }, { "epoch": 0.8110508802060971, "grad_norm": 0.193359375, "learning_rate": 0.0008899506362016334, "loss": 2.2626, "step": 30223 }, { "epoch": 0.8110777157578359, "grad_norm": 0.1875, "learning_rate": 0.0008899260329115982, "loss": 2.2777, "step": 30224 }, { "epoch": 0.8111045513095749, "grad_norm": 0.1875, "learning_rate": 0.0008899014289855507, "loss": 2.2256, "step": 30225 }, { "epoch": 0.8111313868613139, "grad_norm": 0.1904296875, "learning_rate": 0.0008898768244235449, "loss": 2.2467, "step": 30226 }, { "epoch": 0.8111582224130528, "grad_norm": 0.1884765625, "learning_rate": 0.0008898522192256345, "loss": 2.2119, "step": 30227 }, { "epoch": 0.8111850579647918, "grad_norm": 0.18359375, "learning_rate": 0.0008898276133918735, "loss": 2.2194, "step": 30228 }, { "epoch": 0.8112118935165307, "grad_norm": 0.189453125, "learning_rate": 0.000889803006922316, "loss": 2.1853, "step": 30229 }, { "epoch": 0.8112387290682697, "grad_norm": 0.1904296875, "learning_rate": 0.0008897783998170161, "loss": 2.2103, "step": 30230 }, { "epoch": 0.8112655646200085, "grad_norm": 0.1884765625, "learning_rate": 0.0008897537920760275, "loss": 2.1658, "step": 30231 }, { "epoch": 0.8112924001717475, "grad_norm": 0.1875, "learning_rate": 0.0008897291836994046, "loss": 2.2702, "step": 30232 }, { "epoch": 0.8113192357234865, "grad_norm": 0.189453125, "learning_rate": 0.0008897045746872009, "loss": 2.2441, "step": 30233 }, { "epoch": 0.8113460712752254, "grad_norm": 0.189453125, "learning_rate": 0.0008896799650394706, "loss": 2.2373, "step": 30234 }, { "epoch": 0.8113729068269644, "grad_norm": 0.19140625, "learning_rate": 0.0008896553547562677, "loss": 2.2522, "step": 30235 }, { "epoch": 0.8113997423787033, "grad_norm": 0.189453125, "learning_rate": 0.0008896307438376461, "loss": 2.2562, "step": 30236 }, { "epoch": 0.8114265779304423, "grad_norm": 0.189453125, "learning_rate": 0.0008896061322836601, "loss": 2.2737, "step": 30237 }, { "epoch": 0.8114534134821811, "grad_norm": 0.1953125, "learning_rate": 0.0008895815200943632, "loss": 2.2772, "step": 30238 }, { "epoch": 0.8114802490339201, "grad_norm": 0.1884765625, "learning_rate": 0.0008895569072698096, "loss": 2.2364, "step": 30239 }, { "epoch": 0.8115070845856591, "grad_norm": 0.1865234375, "learning_rate": 0.0008895322938100535, "loss": 2.2065, "step": 30240 }, { "epoch": 0.811533920137398, "grad_norm": 0.1884765625, "learning_rate": 0.0008895076797151487, "loss": 2.2338, "step": 30241 }, { "epoch": 0.811560755689137, "grad_norm": 0.19140625, "learning_rate": 0.0008894830649851491, "loss": 2.1875, "step": 30242 }, { "epoch": 0.8115875912408759, "grad_norm": 0.1884765625, "learning_rate": 0.000889458449620109, "loss": 2.3114, "step": 30243 }, { "epoch": 0.8116144267926149, "grad_norm": 0.1884765625, "learning_rate": 0.0008894338336200822, "loss": 2.1935, "step": 30244 }, { "epoch": 0.8116412623443539, "grad_norm": 0.193359375, "learning_rate": 0.0008894092169851227, "loss": 2.2375, "step": 30245 }, { "epoch": 0.8116680978960927, "grad_norm": 0.1865234375, "learning_rate": 0.0008893845997152844, "loss": 2.2111, "step": 30246 }, { "epoch": 0.8116949334478317, "grad_norm": 0.1875, "learning_rate": 0.0008893599818106217, "loss": 2.1913, "step": 30247 }, { "epoch": 0.8117217689995706, "grad_norm": 0.185546875, "learning_rate": 0.0008893353632711881, "loss": 2.2467, "step": 30248 }, { "epoch": 0.8117486045513096, "grad_norm": 0.1884765625, "learning_rate": 0.0008893107440970378, "loss": 2.2147, "step": 30249 }, { "epoch": 0.8117754401030485, "grad_norm": 0.1865234375, "learning_rate": 0.0008892861242882249, "loss": 2.2406, "step": 30250 }, { "epoch": 0.8118022756547875, "grad_norm": 0.19140625, "learning_rate": 0.0008892615038448036, "loss": 2.287, "step": 30251 }, { "epoch": 0.8118291112065265, "grad_norm": 0.1865234375, "learning_rate": 0.0008892368827668274, "loss": 2.2768, "step": 30252 }, { "epoch": 0.8118559467582653, "grad_norm": 0.185546875, "learning_rate": 0.0008892122610543506, "loss": 2.2225, "step": 30253 }, { "epoch": 0.8118827823100043, "grad_norm": 0.185546875, "learning_rate": 0.0008891876387074271, "loss": 2.2329, "step": 30254 }, { "epoch": 0.8119096178617432, "grad_norm": 0.2001953125, "learning_rate": 0.0008891630157261113, "loss": 2.2424, "step": 30255 }, { "epoch": 0.8119364534134822, "grad_norm": 0.1904296875, "learning_rate": 0.0008891383921104566, "loss": 2.2701, "step": 30256 }, { "epoch": 0.8119632889652211, "grad_norm": 0.2021484375, "learning_rate": 0.0008891137678605174, "loss": 2.222, "step": 30257 }, { "epoch": 0.8119901245169601, "grad_norm": 0.1962890625, "learning_rate": 0.0008890891429763478, "loss": 2.267, "step": 30258 }, { "epoch": 0.8120169600686991, "grad_norm": 0.1845703125, "learning_rate": 0.0008890645174580014, "loss": 2.2424, "step": 30259 }, { "epoch": 0.8120437956204379, "grad_norm": 0.189453125, "learning_rate": 0.0008890398913055327, "loss": 2.2361, "step": 30260 }, { "epoch": 0.8120706311721769, "grad_norm": 0.19140625, "learning_rate": 0.0008890152645189955, "loss": 2.2198, "step": 30261 }, { "epoch": 0.8120974667239158, "grad_norm": 0.185546875, "learning_rate": 0.000888990637098444, "loss": 2.2041, "step": 30262 }, { "epoch": 0.8121243022756548, "grad_norm": 0.19140625, "learning_rate": 0.0008889660090439319, "loss": 2.2361, "step": 30263 }, { "epoch": 0.8121511378273937, "grad_norm": 0.1875, "learning_rate": 0.0008889413803555133, "loss": 2.199, "step": 30264 }, { "epoch": 0.8121779733791327, "grad_norm": 0.1923828125, "learning_rate": 0.0008889167510332423, "loss": 2.2992, "step": 30265 }, { "epoch": 0.8122048089308717, "grad_norm": 0.185546875, "learning_rate": 0.0008888921210771732, "loss": 2.2955, "step": 30266 }, { "epoch": 0.8122316444826105, "grad_norm": 0.1884765625, "learning_rate": 0.0008888674904873596, "loss": 2.2664, "step": 30267 }, { "epoch": 0.8122584800343495, "grad_norm": 0.1865234375, "learning_rate": 0.0008888428592638557, "loss": 2.3111, "step": 30268 }, { "epoch": 0.8122853155860884, "grad_norm": 0.189453125, "learning_rate": 0.0008888182274067156, "loss": 2.2166, "step": 30269 }, { "epoch": 0.8123121511378274, "grad_norm": 0.1923828125, "learning_rate": 0.0008887935949159933, "loss": 2.2864, "step": 30270 }, { "epoch": 0.8123389866895664, "grad_norm": 0.1884765625, "learning_rate": 0.0008887689617917429, "loss": 2.2544, "step": 30271 }, { "epoch": 0.8123658222413053, "grad_norm": 0.1845703125, "learning_rate": 0.000888744328034018, "loss": 2.2556, "step": 30272 }, { "epoch": 0.8123926577930443, "grad_norm": 0.1865234375, "learning_rate": 0.0008887196936428734, "loss": 2.2519, "step": 30273 }, { "epoch": 0.8124194933447831, "grad_norm": 0.1865234375, "learning_rate": 0.0008886950586183627, "loss": 2.2558, "step": 30274 }, { "epoch": 0.8124463288965221, "grad_norm": 0.1904296875, "learning_rate": 0.0008886704229605397, "loss": 2.2605, "step": 30275 }, { "epoch": 0.812473164448261, "grad_norm": 0.189453125, "learning_rate": 0.0008886457866694589, "loss": 2.2485, "step": 30276 }, { "epoch": 0.8125, "grad_norm": 0.1982421875, "learning_rate": 0.0008886211497451743, "loss": 2.2376, "step": 30277 }, { "epoch": 0.812526835551739, "grad_norm": 0.19140625, "learning_rate": 0.0008885965121877396, "loss": 2.1879, "step": 30278 }, { "epoch": 0.8125536711034779, "grad_norm": 0.1845703125, "learning_rate": 0.000888571873997209, "loss": 2.1947, "step": 30279 }, { "epoch": 0.8125805066552169, "grad_norm": 0.1943359375, "learning_rate": 0.0008885472351736367, "loss": 2.3254, "step": 30280 }, { "epoch": 0.8126073422069557, "grad_norm": 0.1904296875, "learning_rate": 0.0008885225957170768, "loss": 2.1871, "step": 30281 }, { "epoch": 0.8126341777586947, "grad_norm": 0.1943359375, "learning_rate": 0.0008884979556275831, "loss": 2.2348, "step": 30282 }, { "epoch": 0.8126610133104336, "grad_norm": 0.189453125, "learning_rate": 0.0008884733149052096, "loss": 2.3112, "step": 30283 }, { "epoch": 0.8126878488621726, "grad_norm": 0.1904296875, "learning_rate": 0.0008884486735500107, "loss": 2.2802, "step": 30284 }, { "epoch": 0.8127146844139116, "grad_norm": 0.1884765625, "learning_rate": 0.0008884240315620402, "loss": 2.2345, "step": 30285 }, { "epoch": 0.8127415199656505, "grad_norm": 0.189453125, "learning_rate": 0.0008883993889413522, "loss": 2.2508, "step": 30286 }, { "epoch": 0.8127683555173895, "grad_norm": 0.18359375, "learning_rate": 0.0008883747456880009, "loss": 2.2029, "step": 30287 }, { "epoch": 0.8127951910691283, "grad_norm": 0.1904296875, "learning_rate": 0.0008883501018020403, "loss": 2.2565, "step": 30288 }, { "epoch": 0.8128220266208673, "grad_norm": 0.1875, "learning_rate": 0.0008883254572835241, "loss": 2.2056, "step": 30289 }, { "epoch": 0.8128488621726063, "grad_norm": 0.1875, "learning_rate": 0.000888300812132507, "loss": 2.3151, "step": 30290 }, { "epoch": 0.8128756977243452, "grad_norm": 0.1904296875, "learning_rate": 0.0008882761663490426, "loss": 2.3291, "step": 30291 }, { "epoch": 0.8129025332760842, "grad_norm": 0.1865234375, "learning_rate": 0.0008882515199331851, "loss": 2.1755, "step": 30292 }, { "epoch": 0.8129293688278231, "grad_norm": 0.1923828125, "learning_rate": 0.0008882268728849888, "loss": 2.2556, "step": 30293 }, { "epoch": 0.8129562043795621, "grad_norm": 0.1923828125, "learning_rate": 0.0008882022252045072, "loss": 2.3066, "step": 30294 }, { "epoch": 0.8129830399313009, "grad_norm": 0.201171875, "learning_rate": 0.0008881775768917948, "loss": 2.2804, "step": 30295 }, { "epoch": 0.8130098754830399, "grad_norm": 0.1923828125, "learning_rate": 0.0008881529279469056, "loss": 2.1969, "step": 30296 }, { "epoch": 0.8130367110347789, "grad_norm": 0.189453125, "learning_rate": 0.0008881282783698937, "loss": 2.2038, "step": 30297 }, { "epoch": 0.8130635465865178, "grad_norm": 0.1943359375, "learning_rate": 0.0008881036281608132, "loss": 2.2094, "step": 30298 }, { "epoch": 0.8130903821382568, "grad_norm": 0.189453125, "learning_rate": 0.0008880789773197181, "loss": 2.2312, "step": 30299 }, { "epoch": 0.8131172176899957, "grad_norm": 0.1875, "learning_rate": 0.0008880543258466621, "loss": 2.2765, "step": 30300 }, { "epoch": 0.8131440532417347, "grad_norm": 0.189453125, "learning_rate": 0.0008880296737417, "loss": 2.2957, "step": 30301 }, { "epoch": 0.8131708887934735, "grad_norm": 0.1923828125, "learning_rate": 0.0008880050210048855, "loss": 2.3091, "step": 30302 }, { "epoch": 0.8131977243452125, "grad_norm": 0.1923828125, "learning_rate": 0.0008879803676362727, "loss": 2.3108, "step": 30303 }, { "epoch": 0.8132245598969515, "grad_norm": 0.1904296875, "learning_rate": 0.0008879557136359157, "loss": 2.2618, "step": 30304 }, { "epoch": 0.8132513954486904, "grad_norm": 0.19140625, "learning_rate": 0.0008879310590038685, "loss": 2.2312, "step": 30305 }, { "epoch": 0.8132782310004294, "grad_norm": 0.19140625, "learning_rate": 0.0008879064037401855, "loss": 2.213, "step": 30306 }, { "epoch": 0.8133050665521683, "grad_norm": 0.1904296875, "learning_rate": 0.0008878817478449204, "loss": 2.2538, "step": 30307 }, { "epoch": 0.8133319021039073, "grad_norm": 0.1884765625, "learning_rate": 0.0008878570913181274, "loss": 2.2483, "step": 30308 }, { "epoch": 0.8133587376556461, "grad_norm": 0.1904296875, "learning_rate": 0.0008878324341598607, "loss": 2.2206, "step": 30309 }, { "epoch": 0.8133855732073851, "grad_norm": 0.1923828125, "learning_rate": 0.0008878077763701742, "loss": 2.329, "step": 30310 }, { "epoch": 0.8134124087591241, "grad_norm": 0.1865234375, "learning_rate": 0.0008877831179491223, "loss": 2.265, "step": 30311 }, { "epoch": 0.813439244310863, "grad_norm": 0.1875, "learning_rate": 0.0008877584588967587, "loss": 2.272, "step": 30312 }, { "epoch": 0.813466079862602, "grad_norm": 0.1953125, "learning_rate": 0.000887733799213138, "loss": 2.2446, "step": 30313 }, { "epoch": 0.8134929154143409, "grad_norm": 0.19140625, "learning_rate": 0.0008877091388983136, "loss": 2.335, "step": 30314 }, { "epoch": 0.8135197509660799, "grad_norm": 0.1875, "learning_rate": 0.0008876844779523402, "loss": 2.2881, "step": 30315 }, { "epoch": 0.8135465865178189, "grad_norm": 0.1904296875, "learning_rate": 0.0008876598163752717, "loss": 2.2291, "step": 30316 }, { "epoch": 0.8135734220695577, "grad_norm": 0.193359375, "learning_rate": 0.0008876351541671621, "loss": 2.2463, "step": 30317 }, { "epoch": 0.8136002576212967, "grad_norm": 0.1884765625, "learning_rate": 0.0008876104913280657, "loss": 2.2374, "step": 30318 }, { "epoch": 0.8136270931730356, "grad_norm": 0.189453125, "learning_rate": 0.0008875858278580366, "loss": 2.3025, "step": 30319 }, { "epoch": 0.8136539287247746, "grad_norm": 0.1865234375, "learning_rate": 0.0008875611637571287, "loss": 2.1982, "step": 30320 }, { "epoch": 0.8136807642765135, "grad_norm": 0.1875, "learning_rate": 0.0008875364990253962, "loss": 2.2609, "step": 30321 }, { "epoch": 0.8137075998282525, "grad_norm": 0.1875, "learning_rate": 0.0008875118336628932, "loss": 2.3199, "step": 30322 }, { "epoch": 0.8137344353799915, "grad_norm": 0.185546875, "learning_rate": 0.0008874871676696738, "loss": 2.2663, "step": 30323 }, { "epoch": 0.8137612709317303, "grad_norm": 0.1865234375, "learning_rate": 0.0008874625010457922, "loss": 2.2234, "step": 30324 }, { "epoch": 0.8137881064834693, "grad_norm": 0.1865234375, "learning_rate": 0.0008874378337913023, "loss": 2.2032, "step": 30325 }, { "epoch": 0.8138149420352082, "grad_norm": 0.1875, "learning_rate": 0.0008874131659062585, "loss": 2.2115, "step": 30326 }, { "epoch": 0.8138417775869472, "grad_norm": 0.19140625, "learning_rate": 0.0008873884973907147, "loss": 2.2076, "step": 30327 }, { "epoch": 0.8138686131386861, "grad_norm": 0.1845703125, "learning_rate": 0.0008873638282447253, "loss": 2.2694, "step": 30328 }, { "epoch": 0.8138954486904251, "grad_norm": 0.1865234375, "learning_rate": 0.0008873391584683439, "loss": 2.2088, "step": 30329 }, { "epoch": 0.8139222842421641, "grad_norm": 0.1982421875, "learning_rate": 0.000887314488061625, "loss": 2.2453, "step": 30330 }, { "epoch": 0.8139491197939029, "grad_norm": 0.1875, "learning_rate": 0.0008872898170246229, "loss": 2.2032, "step": 30331 }, { "epoch": 0.8139759553456419, "grad_norm": 0.181640625, "learning_rate": 0.0008872651453573911, "loss": 2.2176, "step": 30332 }, { "epoch": 0.8140027908973808, "grad_norm": 0.1865234375, "learning_rate": 0.0008872404730599843, "loss": 2.2899, "step": 30333 }, { "epoch": 0.8140296264491198, "grad_norm": 0.189453125, "learning_rate": 0.0008872158001324563, "loss": 2.2204, "step": 30334 }, { "epoch": 0.8140564620008587, "grad_norm": 0.1962890625, "learning_rate": 0.0008871911265748613, "loss": 2.3088, "step": 30335 }, { "epoch": 0.8140832975525977, "grad_norm": 0.193359375, "learning_rate": 0.0008871664523872537, "loss": 2.2761, "step": 30336 }, { "epoch": 0.8141101331043367, "grad_norm": 0.189453125, "learning_rate": 0.0008871417775696871, "loss": 2.2443, "step": 30337 }, { "epoch": 0.8141369686560755, "grad_norm": 0.189453125, "learning_rate": 0.0008871171021222162, "loss": 2.2852, "step": 30338 }, { "epoch": 0.8141638042078145, "grad_norm": 0.1884765625, "learning_rate": 0.0008870924260448947, "loss": 2.2074, "step": 30339 }, { "epoch": 0.8141906397595534, "grad_norm": 0.19921875, "learning_rate": 0.0008870677493377768, "loss": 2.2607, "step": 30340 }, { "epoch": 0.8142174753112924, "grad_norm": 0.189453125, "learning_rate": 0.0008870430720009169, "loss": 2.2495, "step": 30341 }, { "epoch": 0.8142443108630314, "grad_norm": 0.1845703125, "learning_rate": 0.0008870183940343688, "loss": 2.2512, "step": 30342 }, { "epoch": 0.8142711464147703, "grad_norm": 0.193359375, "learning_rate": 0.0008869937154381868, "loss": 2.2648, "step": 30343 }, { "epoch": 0.8142979819665093, "grad_norm": 0.18359375, "learning_rate": 0.000886969036212425, "loss": 2.2144, "step": 30344 }, { "epoch": 0.8143248175182481, "grad_norm": 0.1904296875, "learning_rate": 0.0008869443563571376, "loss": 2.2266, "step": 30345 }, { "epoch": 0.8143516530699871, "grad_norm": 0.189453125, "learning_rate": 0.0008869196758723788, "loss": 2.2285, "step": 30346 }, { "epoch": 0.814378488621726, "grad_norm": 0.19140625, "learning_rate": 0.0008868949947582026, "loss": 2.2085, "step": 30347 }, { "epoch": 0.814405324173465, "grad_norm": 0.1884765625, "learning_rate": 0.0008868703130146631, "loss": 2.1743, "step": 30348 }, { "epoch": 0.814432159725204, "grad_norm": 0.1884765625, "learning_rate": 0.0008868456306418145, "loss": 2.2183, "step": 30349 }, { "epoch": 0.8144589952769429, "grad_norm": 0.1875, "learning_rate": 0.0008868209476397112, "loss": 2.257, "step": 30350 }, { "epoch": 0.8144858308286819, "grad_norm": 0.1875, "learning_rate": 0.0008867962640084068, "loss": 2.2358, "step": 30351 }, { "epoch": 0.8145126663804207, "grad_norm": 0.1875, "learning_rate": 0.000886771579747956, "loss": 2.2357, "step": 30352 }, { "epoch": 0.8145395019321597, "grad_norm": 0.1904296875, "learning_rate": 0.0008867468948584128, "loss": 2.2517, "step": 30353 }, { "epoch": 0.8145663374838986, "grad_norm": 0.1904296875, "learning_rate": 0.0008867222093398311, "loss": 2.2491, "step": 30354 }, { "epoch": 0.8145931730356376, "grad_norm": 0.1884765625, "learning_rate": 0.0008866975231922653, "loss": 2.2045, "step": 30355 }, { "epoch": 0.8146200085873766, "grad_norm": 0.2001953125, "learning_rate": 0.0008866728364157695, "loss": 2.3228, "step": 30356 }, { "epoch": 0.8146468441391155, "grad_norm": 0.189453125, "learning_rate": 0.0008866481490103979, "loss": 2.2749, "step": 30357 }, { "epoch": 0.8146736796908545, "grad_norm": 0.1865234375, "learning_rate": 0.0008866234609762045, "loss": 2.2318, "step": 30358 }, { "epoch": 0.8147005152425933, "grad_norm": 0.1923828125, "learning_rate": 0.0008865987723132434, "loss": 2.2675, "step": 30359 }, { "epoch": 0.8147273507943323, "grad_norm": 0.1904296875, "learning_rate": 0.0008865740830215691, "loss": 2.2298, "step": 30360 }, { "epoch": 0.8147541863460713, "grad_norm": 0.189453125, "learning_rate": 0.0008865493931012357, "loss": 2.3076, "step": 30361 }, { "epoch": 0.8147810218978102, "grad_norm": 0.1904296875, "learning_rate": 0.000886524702552297, "loss": 2.3378, "step": 30362 }, { "epoch": 0.8148078574495492, "grad_norm": 0.1884765625, "learning_rate": 0.0008865000113748075, "loss": 2.198, "step": 30363 }, { "epoch": 0.8148346930012881, "grad_norm": 0.193359375, "learning_rate": 0.0008864753195688212, "loss": 2.2892, "step": 30364 }, { "epoch": 0.8148615285530271, "grad_norm": 0.1865234375, "learning_rate": 0.0008864506271343923, "loss": 2.3316, "step": 30365 }, { "epoch": 0.814888364104766, "grad_norm": 0.1875, "learning_rate": 0.0008864259340715751, "loss": 2.2644, "step": 30366 }, { "epoch": 0.8149151996565049, "grad_norm": 0.1865234375, "learning_rate": 0.0008864012403804237, "loss": 2.209, "step": 30367 }, { "epoch": 0.8149420352082439, "grad_norm": 0.1865234375, "learning_rate": 0.0008863765460609921, "loss": 2.2517, "step": 30368 }, { "epoch": 0.8149688707599828, "grad_norm": 0.1865234375, "learning_rate": 0.0008863518511133348, "loss": 2.2209, "step": 30369 }, { "epoch": 0.8149957063117218, "grad_norm": 0.19140625, "learning_rate": 0.0008863271555375055, "loss": 2.3097, "step": 30370 }, { "epoch": 0.8150225418634607, "grad_norm": 0.1884765625, "learning_rate": 0.0008863024593335589, "loss": 2.2254, "step": 30371 }, { "epoch": 0.8150493774151997, "grad_norm": 0.1884765625, "learning_rate": 0.0008862777625015489, "loss": 2.2964, "step": 30372 }, { "epoch": 0.8150762129669386, "grad_norm": 0.1904296875, "learning_rate": 0.0008862530650415296, "loss": 2.2907, "step": 30373 }, { "epoch": 0.8151030485186775, "grad_norm": 0.19140625, "learning_rate": 0.0008862283669535554, "loss": 2.1919, "step": 30374 }, { "epoch": 0.8151298840704165, "grad_norm": 0.1875, "learning_rate": 0.0008862036682376805, "loss": 2.264, "step": 30375 }, { "epoch": 0.8151567196221554, "grad_norm": 0.193359375, "learning_rate": 0.0008861789688939586, "loss": 2.2562, "step": 30376 }, { "epoch": 0.8151835551738944, "grad_norm": 0.193359375, "learning_rate": 0.0008861542689224445, "loss": 2.3227, "step": 30377 }, { "epoch": 0.8152103907256333, "grad_norm": 0.185546875, "learning_rate": 0.000886129568323192, "loss": 2.2486, "step": 30378 }, { "epoch": 0.8152372262773723, "grad_norm": 0.185546875, "learning_rate": 0.0008861048670962557, "loss": 2.2069, "step": 30379 }, { "epoch": 0.8152640618291112, "grad_norm": 0.1865234375, "learning_rate": 0.0008860801652416892, "loss": 2.2716, "step": 30380 }, { "epoch": 0.8152908973808501, "grad_norm": 0.1875, "learning_rate": 0.000886055462759547, "loss": 2.203, "step": 30381 }, { "epoch": 0.8153177329325891, "grad_norm": 0.1875, "learning_rate": 0.0008860307596498834, "loss": 2.2039, "step": 30382 }, { "epoch": 0.815344568484328, "grad_norm": 0.1875, "learning_rate": 0.0008860060559127523, "loss": 2.2694, "step": 30383 }, { "epoch": 0.815371404036067, "grad_norm": 0.1904296875, "learning_rate": 0.0008859813515482081, "loss": 2.3165, "step": 30384 }, { "epoch": 0.8153982395878059, "grad_norm": 0.1875, "learning_rate": 0.0008859566465563048, "loss": 2.2153, "step": 30385 }, { "epoch": 0.8154250751395449, "grad_norm": 0.1875, "learning_rate": 0.0008859319409370973, "loss": 2.3271, "step": 30386 }, { "epoch": 0.8154519106912839, "grad_norm": 0.18359375, "learning_rate": 0.0008859072346906387, "loss": 2.2318, "step": 30387 }, { "epoch": 0.8154787462430227, "grad_norm": 0.189453125, "learning_rate": 0.0008858825278169839, "loss": 2.2977, "step": 30388 }, { "epoch": 0.8155055817947617, "grad_norm": 0.1865234375, "learning_rate": 0.0008858578203161869, "loss": 2.2454, "step": 30389 }, { "epoch": 0.8155324173465006, "grad_norm": 0.1865234375, "learning_rate": 0.0008858331121883021, "loss": 2.2819, "step": 30390 }, { "epoch": 0.8155592528982396, "grad_norm": 0.185546875, "learning_rate": 0.0008858084034333835, "loss": 2.2758, "step": 30391 }, { "epoch": 0.8155860884499785, "grad_norm": 0.1845703125, "learning_rate": 0.0008857836940514852, "loss": 2.2612, "step": 30392 }, { "epoch": 0.8156129240017175, "grad_norm": 0.193359375, "learning_rate": 0.0008857589840426618, "loss": 2.2712, "step": 30393 }, { "epoch": 0.8156397595534565, "grad_norm": 0.1904296875, "learning_rate": 0.0008857342734069672, "loss": 2.226, "step": 30394 }, { "epoch": 0.8156665951051953, "grad_norm": 0.1884765625, "learning_rate": 0.0008857095621444555, "loss": 2.2746, "step": 30395 }, { "epoch": 0.8156934306569343, "grad_norm": 0.1875, "learning_rate": 0.0008856848502551811, "loss": 2.2007, "step": 30396 }, { "epoch": 0.8157202662086732, "grad_norm": 0.1875, "learning_rate": 0.0008856601377391983, "loss": 2.1965, "step": 30397 }, { "epoch": 0.8157471017604122, "grad_norm": 0.189453125, "learning_rate": 0.0008856354245965611, "loss": 2.2575, "step": 30398 }, { "epoch": 0.8157739373121511, "grad_norm": 0.1865234375, "learning_rate": 0.0008856107108273239, "loss": 2.2513, "step": 30399 }, { "epoch": 0.8158007728638901, "grad_norm": 0.1875, "learning_rate": 0.0008855859964315408, "loss": 2.218, "step": 30400 }, { "epoch": 0.8158276084156291, "grad_norm": 0.1923828125, "learning_rate": 0.0008855612814092661, "loss": 2.2988, "step": 30401 }, { "epoch": 0.815854443967368, "grad_norm": 0.185546875, "learning_rate": 0.0008855365657605539, "loss": 2.2813, "step": 30402 }, { "epoch": 0.8158812795191069, "grad_norm": 0.1884765625, "learning_rate": 0.0008855118494854585, "loss": 2.2489, "step": 30403 }, { "epoch": 0.8159081150708458, "grad_norm": 0.1884765625, "learning_rate": 0.0008854871325840339, "loss": 2.2926, "step": 30404 }, { "epoch": 0.8159349506225848, "grad_norm": 0.1884765625, "learning_rate": 0.0008854624150563348, "loss": 2.2485, "step": 30405 }, { "epoch": 0.8159617861743237, "grad_norm": 0.1806640625, "learning_rate": 0.0008854376969024151, "loss": 2.1685, "step": 30406 }, { "epoch": 0.8159886217260627, "grad_norm": 0.1884765625, "learning_rate": 0.000885412978122329, "loss": 2.1775, "step": 30407 }, { "epoch": 0.8160154572778017, "grad_norm": 0.1884765625, "learning_rate": 0.000885388258716131, "loss": 2.2043, "step": 30408 }, { "epoch": 0.8160422928295405, "grad_norm": 0.1884765625, "learning_rate": 0.0008853635386838748, "loss": 2.2477, "step": 30409 }, { "epoch": 0.8160691283812795, "grad_norm": 0.1875, "learning_rate": 0.0008853388180256151, "loss": 2.2673, "step": 30410 }, { "epoch": 0.8160959639330184, "grad_norm": 0.1875, "learning_rate": 0.0008853140967414059, "loss": 2.2638, "step": 30411 }, { "epoch": 0.8161227994847574, "grad_norm": 0.1845703125, "learning_rate": 0.0008852893748313016, "loss": 2.1783, "step": 30412 }, { "epoch": 0.8161496350364964, "grad_norm": 0.19140625, "learning_rate": 0.0008852646522953564, "loss": 2.2462, "step": 30413 }, { "epoch": 0.8161764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.0008852399291336244, "loss": 2.2724, "step": 30414 }, { "epoch": 0.8162033061399743, "grad_norm": 0.189453125, "learning_rate": 0.0008852152053461598, "loss": 2.2336, "step": 30415 }, { "epoch": 0.8162301416917132, "grad_norm": 0.193359375, "learning_rate": 0.0008851904809330172, "loss": 2.2801, "step": 30416 }, { "epoch": 0.8162569772434521, "grad_norm": 0.1923828125, "learning_rate": 0.0008851657558942506, "loss": 2.2658, "step": 30417 }, { "epoch": 0.816283812795191, "grad_norm": 0.1875, "learning_rate": 0.0008851410302299138, "loss": 2.227, "step": 30418 }, { "epoch": 0.81631064834693, "grad_norm": 0.1865234375, "learning_rate": 0.0008851163039400618, "loss": 2.1924, "step": 30419 }, { "epoch": 0.816337483898669, "grad_norm": 0.1865234375, "learning_rate": 0.0008850915770247487, "loss": 2.2584, "step": 30420 }, { "epoch": 0.8163643194504079, "grad_norm": 0.1845703125, "learning_rate": 0.000885066849484028, "loss": 2.1306, "step": 30421 }, { "epoch": 0.8163911550021469, "grad_norm": 0.193359375, "learning_rate": 0.000885042121317955, "loss": 2.2616, "step": 30422 }, { "epoch": 0.8164179905538858, "grad_norm": 0.1845703125, "learning_rate": 0.0008850173925265835, "loss": 2.2417, "step": 30423 }, { "epoch": 0.8164448261056247, "grad_norm": 0.189453125, "learning_rate": 0.0008849926631099672, "loss": 2.2375, "step": 30424 }, { "epoch": 0.8164716616573636, "grad_norm": 0.1884765625, "learning_rate": 0.0008849679330681612, "loss": 2.3215, "step": 30425 }, { "epoch": 0.8164984972091026, "grad_norm": 0.1904296875, "learning_rate": 0.0008849432024012193, "loss": 2.2829, "step": 30426 }, { "epoch": 0.8165253327608416, "grad_norm": 0.185546875, "learning_rate": 0.0008849184711091959, "loss": 2.1283, "step": 30427 }, { "epoch": 0.8165521683125805, "grad_norm": 0.19140625, "learning_rate": 0.0008848937391921451, "loss": 2.2221, "step": 30428 }, { "epoch": 0.8165790038643195, "grad_norm": 0.1904296875, "learning_rate": 0.0008848690066501213, "loss": 2.187, "step": 30429 }, { "epoch": 0.8166058394160584, "grad_norm": 0.189453125, "learning_rate": 0.0008848442734831788, "loss": 2.1962, "step": 30430 }, { "epoch": 0.8166326749677973, "grad_norm": 0.1884765625, "learning_rate": 0.0008848195396913716, "loss": 2.1988, "step": 30431 }, { "epoch": 0.8166595105195363, "grad_norm": 0.185546875, "learning_rate": 0.0008847948052747542, "loss": 2.2798, "step": 30432 }, { "epoch": 0.8166863460712752, "grad_norm": 0.1875, "learning_rate": 0.0008847700702333807, "loss": 2.2284, "step": 30433 }, { "epoch": 0.8167131816230142, "grad_norm": 0.18359375, "learning_rate": 0.0008847453345673056, "loss": 2.2461, "step": 30434 }, { "epoch": 0.8167400171747531, "grad_norm": 0.1904296875, "learning_rate": 0.000884720598276583, "loss": 2.2587, "step": 30435 }, { "epoch": 0.8167668527264921, "grad_norm": 0.1943359375, "learning_rate": 0.000884695861361267, "loss": 2.3076, "step": 30436 }, { "epoch": 0.816793688278231, "grad_norm": 0.1875, "learning_rate": 0.0008846711238214124, "loss": 2.2458, "step": 30437 }, { "epoch": 0.81682052382997, "grad_norm": 0.1826171875, "learning_rate": 0.0008846463856570727, "loss": 2.2368, "step": 30438 }, { "epoch": 0.8168473593817089, "grad_norm": 0.1904296875, "learning_rate": 0.0008846216468683028, "loss": 2.2625, "step": 30439 }, { "epoch": 0.8168741949334478, "grad_norm": 0.1884765625, "learning_rate": 0.0008845969074551566, "loss": 2.1478, "step": 30440 }, { "epoch": 0.8169010304851868, "grad_norm": 0.1875, "learning_rate": 0.0008845721674176886, "loss": 2.2264, "step": 30441 }, { "epoch": 0.8169278660369257, "grad_norm": 0.19140625, "learning_rate": 0.000884547426755953, "loss": 2.2331, "step": 30442 }, { "epoch": 0.8169547015886647, "grad_norm": 0.1884765625, "learning_rate": 0.000884522685470004, "loss": 2.2313, "step": 30443 }, { "epoch": 0.8169815371404036, "grad_norm": 0.1904296875, "learning_rate": 0.000884497943559896, "loss": 2.1998, "step": 30444 }, { "epoch": 0.8170083726921425, "grad_norm": 0.1865234375, "learning_rate": 0.0008844732010256833, "loss": 2.2031, "step": 30445 }, { "epoch": 0.8170352082438815, "grad_norm": 0.1845703125, "learning_rate": 0.0008844484578674199, "loss": 2.1726, "step": 30446 }, { "epoch": 0.8170620437956204, "grad_norm": 0.189453125, "learning_rate": 0.0008844237140851604, "loss": 2.2493, "step": 30447 }, { "epoch": 0.8170888793473594, "grad_norm": 0.1943359375, "learning_rate": 0.000884398969678959, "loss": 2.2295, "step": 30448 }, { "epoch": 0.8171157148990983, "grad_norm": 0.1865234375, "learning_rate": 0.0008843742246488698, "loss": 2.2919, "step": 30449 }, { "epoch": 0.8171425504508373, "grad_norm": 0.1904296875, "learning_rate": 0.0008843494789949471, "loss": 2.1576, "step": 30450 }, { "epoch": 0.8171693860025762, "grad_norm": 0.189453125, "learning_rate": 0.0008843247327172455, "loss": 2.2241, "step": 30451 }, { "epoch": 0.8171962215543151, "grad_norm": 0.185546875, "learning_rate": 0.0008842999858158191, "loss": 2.2396, "step": 30452 }, { "epoch": 0.8172230571060541, "grad_norm": 0.1875, "learning_rate": 0.0008842752382907221, "loss": 2.239, "step": 30453 }, { "epoch": 0.817249892657793, "grad_norm": 0.2041015625, "learning_rate": 0.0008842504901420087, "loss": 2.2847, "step": 30454 }, { "epoch": 0.817276728209532, "grad_norm": 0.1904296875, "learning_rate": 0.0008842257413697337, "loss": 2.273, "step": 30455 }, { "epoch": 0.8173035637612709, "grad_norm": 0.189453125, "learning_rate": 0.0008842009919739509, "loss": 2.1811, "step": 30456 }, { "epoch": 0.8173303993130099, "grad_norm": 0.189453125, "learning_rate": 0.0008841762419547146, "loss": 2.2582, "step": 30457 }, { "epoch": 0.8173572348647489, "grad_norm": 0.1953125, "learning_rate": 0.0008841514913120792, "loss": 2.2988, "step": 30458 }, { "epoch": 0.8173840704164878, "grad_norm": 0.185546875, "learning_rate": 0.0008841267400460994, "loss": 2.2381, "step": 30459 }, { "epoch": 0.8174109059682267, "grad_norm": 0.18359375, "learning_rate": 0.0008841019881568288, "loss": 2.2327, "step": 30460 }, { "epoch": 0.8174377415199656, "grad_norm": 0.1875, "learning_rate": 0.0008840772356443221, "loss": 2.2808, "step": 30461 }, { "epoch": 0.8174645770717046, "grad_norm": 0.185546875, "learning_rate": 0.0008840524825086336, "loss": 2.2495, "step": 30462 }, { "epoch": 0.8174914126234435, "grad_norm": 0.189453125, "learning_rate": 0.0008840277287498174, "loss": 2.2088, "step": 30463 }, { "epoch": 0.8175182481751825, "grad_norm": 0.1845703125, "learning_rate": 0.0008840029743679281, "loss": 2.2826, "step": 30464 }, { "epoch": 0.8175450837269215, "grad_norm": 0.185546875, "learning_rate": 0.0008839782193630197, "loss": 2.2226, "step": 30465 }, { "epoch": 0.8175719192786604, "grad_norm": 0.1865234375, "learning_rate": 0.0008839534637351466, "loss": 2.3084, "step": 30466 }, { "epoch": 0.8175987548303993, "grad_norm": 0.189453125, "learning_rate": 0.0008839287074843633, "loss": 2.2025, "step": 30467 }, { "epoch": 0.8176255903821382, "grad_norm": 0.1865234375, "learning_rate": 0.0008839039506107236, "loss": 2.2177, "step": 30468 }, { "epoch": 0.8176524259338772, "grad_norm": 0.1865234375, "learning_rate": 0.0008838791931142824, "loss": 2.2221, "step": 30469 }, { "epoch": 0.8176792614856161, "grad_norm": 0.19140625, "learning_rate": 0.0008838544349950939, "loss": 2.2879, "step": 30470 }, { "epoch": 0.8177060970373551, "grad_norm": 0.1884765625, "learning_rate": 0.000883829676253212, "loss": 2.325, "step": 30471 }, { "epoch": 0.8177329325890941, "grad_norm": 0.1875, "learning_rate": 0.0008838049168886915, "loss": 2.2129, "step": 30472 }, { "epoch": 0.817759768140833, "grad_norm": 0.18359375, "learning_rate": 0.0008837801569015864, "loss": 2.2229, "step": 30473 }, { "epoch": 0.817786603692572, "grad_norm": 0.1923828125, "learning_rate": 0.0008837553962919513, "loss": 2.2898, "step": 30474 }, { "epoch": 0.8178134392443108, "grad_norm": 0.1865234375, "learning_rate": 0.0008837306350598401, "loss": 2.2276, "step": 30475 }, { "epoch": 0.8178402747960498, "grad_norm": 0.1904296875, "learning_rate": 0.0008837058732053076, "loss": 2.2035, "step": 30476 }, { "epoch": 0.8178671103477888, "grad_norm": 0.1923828125, "learning_rate": 0.0008836811107284077, "loss": 2.271, "step": 30477 }, { "epoch": 0.8178939458995277, "grad_norm": 0.1904296875, "learning_rate": 0.000883656347629195, "loss": 2.226, "step": 30478 }, { "epoch": 0.8179207814512667, "grad_norm": 0.1904296875, "learning_rate": 0.0008836315839077236, "loss": 2.2004, "step": 30479 }, { "epoch": 0.8179476170030056, "grad_norm": 0.193359375, "learning_rate": 0.000883606819564048, "loss": 2.3203, "step": 30480 }, { "epoch": 0.8179744525547445, "grad_norm": 0.185546875, "learning_rate": 0.0008835820545982226, "loss": 2.3249, "step": 30481 }, { "epoch": 0.8180012881064834, "grad_norm": 0.1875, "learning_rate": 0.0008835572890103014, "loss": 2.1915, "step": 30482 }, { "epoch": 0.8180281236582224, "grad_norm": 0.189453125, "learning_rate": 0.000883532522800339, "loss": 2.3149, "step": 30483 }, { "epoch": 0.8180549592099614, "grad_norm": 0.1845703125, "learning_rate": 0.0008835077559683898, "loss": 2.2356, "step": 30484 }, { "epoch": 0.8180817947617003, "grad_norm": 0.1845703125, "learning_rate": 0.0008834829885145079, "loss": 2.3049, "step": 30485 }, { "epoch": 0.8181086303134393, "grad_norm": 0.18359375, "learning_rate": 0.0008834582204387478, "loss": 2.3284, "step": 30486 }, { "epoch": 0.8181354658651782, "grad_norm": 0.189453125, "learning_rate": 0.0008834334517411637, "loss": 2.2971, "step": 30487 }, { "epoch": 0.8181623014169171, "grad_norm": 0.189453125, "learning_rate": 0.00088340868242181, "loss": 2.2452, "step": 30488 }, { "epoch": 0.818189136968656, "grad_norm": 0.1865234375, "learning_rate": 0.0008833839124807409, "loss": 2.2276, "step": 30489 }, { "epoch": 0.818215972520395, "grad_norm": 0.1884765625, "learning_rate": 0.000883359141918011, "loss": 2.2734, "step": 30490 }, { "epoch": 0.818242808072134, "grad_norm": 0.1884765625, "learning_rate": 0.0008833343707336744, "loss": 2.2773, "step": 30491 }, { "epoch": 0.8182696436238729, "grad_norm": 0.18359375, "learning_rate": 0.0008833095989277857, "loss": 2.201, "step": 30492 }, { "epoch": 0.8182964791756119, "grad_norm": 0.1904296875, "learning_rate": 0.0008832848265003991, "loss": 2.2848, "step": 30493 }, { "epoch": 0.8183233147273508, "grad_norm": 0.1953125, "learning_rate": 0.0008832600534515687, "loss": 2.2311, "step": 30494 }, { "epoch": 0.8183501502790897, "grad_norm": 0.1904296875, "learning_rate": 0.0008832352797813495, "loss": 2.2209, "step": 30495 }, { "epoch": 0.8183769858308286, "grad_norm": 0.1845703125, "learning_rate": 0.0008832105054897949, "loss": 2.2635, "step": 30496 }, { "epoch": 0.8184038213825676, "grad_norm": 0.189453125, "learning_rate": 0.00088318573057696, "loss": 2.2786, "step": 30497 }, { "epoch": 0.8184306569343066, "grad_norm": 0.193359375, "learning_rate": 0.0008831609550428991, "loss": 2.2374, "step": 30498 }, { "epoch": 0.8184574924860455, "grad_norm": 0.1904296875, "learning_rate": 0.0008831361788876662, "loss": 2.2592, "step": 30499 }, { "epoch": 0.8184843280377845, "grad_norm": 0.189453125, "learning_rate": 0.0008831114021113158, "loss": 2.1474, "step": 30500 }, { "epoch": 0.8185111635895234, "grad_norm": 0.18359375, "learning_rate": 0.0008830866247139024, "loss": 2.1805, "step": 30501 }, { "epoch": 0.8185379991412624, "grad_norm": 0.1904296875, "learning_rate": 0.0008830618466954802, "loss": 2.2855, "step": 30502 }, { "epoch": 0.8185648346930013, "grad_norm": 0.1875, "learning_rate": 0.0008830370680561034, "loss": 2.2707, "step": 30503 }, { "epoch": 0.8185916702447402, "grad_norm": 0.193359375, "learning_rate": 0.0008830122887958268, "loss": 2.2972, "step": 30504 }, { "epoch": 0.8186185057964792, "grad_norm": 0.1845703125, "learning_rate": 0.0008829875089147043, "loss": 2.2201, "step": 30505 }, { "epoch": 0.8186453413482181, "grad_norm": 0.1884765625, "learning_rate": 0.0008829627284127907, "loss": 2.272, "step": 30506 }, { "epoch": 0.8186721768999571, "grad_norm": 0.1865234375, "learning_rate": 0.00088293794729014, "loss": 2.3085, "step": 30507 }, { "epoch": 0.818699012451696, "grad_norm": 0.1865234375, "learning_rate": 0.0008829131655468066, "loss": 2.2238, "step": 30508 }, { "epoch": 0.818725848003435, "grad_norm": 0.1884765625, "learning_rate": 0.0008828883831828449, "loss": 2.2993, "step": 30509 }, { "epoch": 0.8187526835551739, "grad_norm": 0.1884765625, "learning_rate": 0.0008828636001983095, "loss": 2.1969, "step": 30510 }, { "epoch": 0.8187795191069128, "grad_norm": 0.201171875, "learning_rate": 0.0008828388165932545, "loss": 2.3378, "step": 30511 }, { "epoch": 0.8188063546586518, "grad_norm": 0.19140625, "learning_rate": 0.0008828140323677344, "loss": 2.1774, "step": 30512 }, { "epoch": 0.8188331902103907, "grad_norm": 0.1904296875, "learning_rate": 0.0008827892475218034, "loss": 2.2899, "step": 30513 }, { "epoch": 0.8188600257621297, "grad_norm": 0.189453125, "learning_rate": 0.0008827644620555161, "loss": 2.2309, "step": 30514 }, { "epoch": 0.8188868613138686, "grad_norm": 0.1865234375, "learning_rate": 0.0008827396759689268, "loss": 2.3359, "step": 30515 }, { "epoch": 0.8189136968656076, "grad_norm": 0.19140625, "learning_rate": 0.0008827148892620896, "loss": 2.2948, "step": 30516 }, { "epoch": 0.8189405324173465, "grad_norm": 0.1904296875, "learning_rate": 0.0008826901019350593, "loss": 2.2046, "step": 30517 }, { "epoch": 0.8189673679690854, "grad_norm": 0.1904296875, "learning_rate": 0.0008826653139878901, "loss": 2.2506, "step": 30518 }, { "epoch": 0.8189942035208244, "grad_norm": 0.1875, "learning_rate": 0.0008826405254206363, "loss": 2.2094, "step": 30519 }, { "epoch": 0.8190210390725633, "grad_norm": 0.1884765625, "learning_rate": 0.0008826157362333524, "loss": 2.2848, "step": 30520 }, { "epoch": 0.8190478746243023, "grad_norm": 0.1845703125, "learning_rate": 0.0008825909464260927, "loss": 2.2855, "step": 30521 }, { "epoch": 0.8190747101760412, "grad_norm": 0.185546875, "learning_rate": 0.0008825661559989116, "loss": 2.174, "step": 30522 }, { "epoch": 0.8191015457277802, "grad_norm": 0.1845703125, "learning_rate": 0.0008825413649518634, "loss": 2.1997, "step": 30523 }, { "epoch": 0.8191283812795191, "grad_norm": 0.1865234375, "learning_rate": 0.0008825165732850027, "loss": 2.1797, "step": 30524 }, { "epoch": 0.819155216831258, "grad_norm": 0.1845703125, "learning_rate": 0.0008824917809983836, "loss": 2.2715, "step": 30525 }, { "epoch": 0.819182052382997, "grad_norm": 0.185546875, "learning_rate": 0.0008824669880920607, "loss": 2.2311, "step": 30526 }, { "epoch": 0.8192088879347359, "grad_norm": 0.1884765625, "learning_rate": 0.0008824421945660883, "loss": 2.217, "step": 30527 }, { "epoch": 0.8192357234864749, "grad_norm": 0.185546875, "learning_rate": 0.000882417400420521, "loss": 2.1905, "step": 30528 }, { "epoch": 0.8192625590382139, "grad_norm": 0.1865234375, "learning_rate": 0.0008823926056554129, "loss": 2.2463, "step": 30529 }, { "epoch": 0.8192893945899528, "grad_norm": 0.189453125, "learning_rate": 0.0008823678102708184, "loss": 2.2783, "step": 30530 }, { "epoch": 0.8193162301416917, "grad_norm": 0.1875, "learning_rate": 0.0008823430142667921, "loss": 2.2685, "step": 30531 }, { "epoch": 0.8193430656934306, "grad_norm": 0.197265625, "learning_rate": 0.0008823182176433885, "loss": 2.2421, "step": 30532 }, { "epoch": 0.8193699012451696, "grad_norm": 0.18359375, "learning_rate": 0.0008822934204006615, "loss": 2.1805, "step": 30533 }, { "epoch": 0.8193967367969085, "grad_norm": 0.1884765625, "learning_rate": 0.0008822686225386659, "loss": 2.249, "step": 30534 }, { "epoch": 0.8194235723486475, "grad_norm": 0.185546875, "learning_rate": 0.000882243824057456, "loss": 2.2364, "step": 30535 }, { "epoch": 0.8194504079003865, "grad_norm": 0.1904296875, "learning_rate": 0.000882219024957086, "loss": 2.255, "step": 30536 }, { "epoch": 0.8194772434521254, "grad_norm": 0.193359375, "learning_rate": 0.0008821942252376107, "loss": 2.2669, "step": 30537 }, { "epoch": 0.8195040790038643, "grad_norm": 0.1884765625, "learning_rate": 0.0008821694248990842, "loss": 2.3119, "step": 30538 }, { "epoch": 0.8195309145556032, "grad_norm": 0.18359375, "learning_rate": 0.000882144623941561, "loss": 2.1972, "step": 30539 }, { "epoch": 0.8195577501073422, "grad_norm": 0.1875, "learning_rate": 0.0008821198223650956, "loss": 2.2422, "step": 30540 }, { "epoch": 0.8195845856590811, "grad_norm": 0.19140625, "learning_rate": 0.0008820950201697421, "loss": 2.3214, "step": 30541 }, { "epoch": 0.8196114212108201, "grad_norm": 0.1845703125, "learning_rate": 0.0008820702173555554, "loss": 2.2768, "step": 30542 }, { "epoch": 0.8196382567625591, "grad_norm": 0.185546875, "learning_rate": 0.0008820454139225895, "loss": 2.1953, "step": 30543 }, { "epoch": 0.819665092314298, "grad_norm": 0.1875, "learning_rate": 0.0008820206098708989, "loss": 2.2798, "step": 30544 }, { "epoch": 0.819691927866037, "grad_norm": 0.19140625, "learning_rate": 0.000881995805200538, "loss": 2.3333, "step": 30545 }, { "epoch": 0.8197187634177758, "grad_norm": 0.1884765625, "learning_rate": 0.0008819709999115614, "loss": 2.2347, "step": 30546 }, { "epoch": 0.8197455989695148, "grad_norm": 0.1865234375, "learning_rate": 0.0008819461940040233, "loss": 2.1766, "step": 30547 }, { "epoch": 0.8197724345212538, "grad_norm": 0.1865234375, "learning_rate": 0.0008819213874779783, "loss": 2.265, "step": 30548 }, { "epoch": 0.8197992700729927, "grad_norm": 0.1884765625, "learning_rate": 0.0008818965803334807, "loss": 2.2825, "step": 30549 }, { "epoch": 0.8198261056247317, "grad_norm": 0.1875, "learning_rate": 0.0008818717725705849, "loss": 2.2444, "step": 30550 }, { "epoch": 0.8198529411764706, "grad_norm": 0.185546875, "learning_rate": 0.0008818469641893453, "loss": 2.2932, "step": 30551 }, { "epoch": 0.8198797767282096, "grad_norm": 0.18359375, "learning_rate": 0.0008818221551898163, "loss": 2.2207, "step": 30552 }, { "epoch": 0.8199066122799484, "grad_norm": 0.1845703125, "learning_rate": 0.0008817973455720528, "loss": 2.2398, "step": 30553 }, { "epoch": 0.8199334478316874, "grad_norm": 0.19140625, "learning_rate": 0.0008817725353361084, "loss": 2.274, "step": 30554 }, { "epoch": 0.8199602833834264, "grad_norm": 0.1865234375, "learning_rate": 0.0008817477244820381, "loss": 2.1902, "step": 30555 }, { "epoch": 0.8199871189351653, "grad_norm": 0.1884765625, "learning_rate": 0.0008817229130098961, "loss": 2.2329, "step": 30556 }, { "epoch": 0.8200139544869043, "grad_norm": 0.18359375, "learning_rate": 0.0008816981009197373, "loss": 2.2588, "step": 30557 }, { "epoch": 0.8200407900386432, "grad_norm": 0.1845703125, "learning_rate": 0.0008816732882116153, "loss": 2.2297, "step": 30558 }, { "epoch": 0.8200676255903822, "grad_norm": 0.1865234375, "learning_rate": 0.0008816484748855851, "loss": 2.2186, "step": 30559 }, { "epoch": 0.820094461142121, "grad_norm": 0.1865234375, "learning_rate": 0.0008816236609417011, "loss": 2.2169, "step": 30560 }, { "epoch": 0.82012129669386, "grad_norm": 0.1845703125, "learning_rate": 0.0008815988463800177, "loss": 2.2089, "step": 30561 }, { "epoch": 0.820148132245599, "grad_norm": 0.1875, "learning_rate": 0.0008815740312005891, "loss": 2.2657, "step": 30562 }, { "epoch": 0.8201749677973379, "grad_norm": 0.1865234375, "learning_rate": 0.0008815492154034698, "loss": 2.2184, "step": 30563 }, { "epoch": 0.8202018033490769, "grad_norm": 0.185546875, "learning_rate": 0.0008815243989887147, "loss": 2.2234, "step": 30564 }, { "epoch": 0.8202286389008158, "grad_norm": 0.1884765625, "learning_rate": 0.0008814995819563777, "loss": 2.2116, "step": 30565 }, { "epoch": 0.8202554744525548, "grad_norm": 0.19140625, "learning_rate": 0.0008814747643065133, "loss": 2.2269, "step": 30566 }, { "epoch": 0.8202823100042936, "grad_norm": 0.1884765625, "learning_rate": 0.0008814499460391763, "loss": 2.2135, "step": 30567 }, { "epoch": 0.8203091455560326, "grad_norm": 0.1884765625, "learning_rate": 0.000881425127154421, "loss": 2.3028, "step": 30568 }, { "epoch": 0.8203359811077716, "grad_norm": 0.1875, "learning_rate": 0.0008814003076523016, "loss": 2.2503, "step": 30569 }, { "epoch": 0.8203628166595105, "grad_norm": 0.18359375, "learning_rate": 0.0008813754875328727, "loss": 2.1228, "step": 30570 }, { "epoch": 0.8203896522112495, "grad_norm": 0.1875, "learning_rate": 0.0008813506667961888, "loss": 2.2509, "step": 30571 }, { "epoch": 0.8204164877629884, "grad_norm": 0.1826171875, "learning_rate": 0.0008813258454423045, "loss": 2.1865, "step": 30572 }, { "epoch": 0.8204433233147274, "grad_norm": 0.18359375, "learning_rate": 0.0008813010234712737, "loss": 2.2735, "step": 30573 }, { "epoch": 0.8204701588664663, "grad_norm": 0.185546875, "learning_rate": 0.0008812762008831515, "loss": 2.2207, "step": 30574 }, { "epoch": 0.8204969944182052, "grad_norm": 0.189453125, "learning_rate": 0.0008812513776779918, "loss": 2.3027, "step": 30575 }, { "epoch": 0.8205238299699442, "grad_norm": 0.1845703125, "learning_rate": 0.0008812265538558496, "loss": 2.2253, "step": 30576 }, { "epoch": 0.8205506655216831, "grad_norm": 0.185546875, "learning_rate": 0.000881201729416779, "loss": 2.1924, "step": 30577 }, { "epoch": 0.8205775010734221, "grad_norm": 0.1884765625, "learning_rate": 0.0008811769043608345, "loss": 2.2711, "step": 30578 }, { "epoch": 0.820604336625161, "grad_norm": 0.1923828125, "learning_rate": 0.0008811520786880707, "loss": 2.2329, "step": 30579 }, { "epoch": 0.8206311721769, "grad_norm": 0.1875, "learning_rate": 0.0008811272523985417, "loss": 2.2648, "step": 30580 }, { "epoch": 0.820658007728639, "grad_norm": 0.1884765625, "learning_rate": 0.0008811024254923023, "loss": 2.2703, "step": 30581 }, { "epoch": 0.8206848432803778, "grad_norm": 0.1904296875, "learning_rate": 0.000881077597969407, "loss": 2.3529, "step": 30582 }, { "epoch": 0.8207116788321168, "grad_norm": 0.1826171875, "learning_rate": 0.0008810527698299102, "loss": 2.1837, "step": 30583 }, { "epoch": 0.8207385143838557, "grad_norm": 0.1865234375, "learning_rate": 0.0008810279410738662, "loss": 2.2874, "step": 30584 }, { "epoch": 0.8207653499355947, "grad_norm": 0.1865234375, "learning_rate": 0.0008810031117013296, "loss": 2.197, "step": 30585 }, { "epoch": 0.8207921854873336, "grad_norm": 0.19140625, "learning_rate": 0.0008809782817123548, "loss": 2.2168, "step": 30586 }, { "epoch": 0.8208190210390726, "grad_norm": 0.1865234375, "learning_rate": 0.0008809534511069962, "loss": 2.2069, "step": 30587 }, { "epoch": 0.8208458565908116, "grad_norm": 0.1845703125, "learning_rate": 0.0008809286198853087, "loss": 2.2526, "step": 30588 }, { "epoch": 0.8208726921425504, "grad_norm": 0.1875, "learning_rate": 0.0008809037880473462, "loss": 2.2701, "step": 30589 }, { "epoch": 0.8208995276942894, "grad_norm": 0.18359375, "learning_rate": 0.0008808789555931637, "loss": 2.1938, "step": 30590 }, { "epoch": 0.8209263632460283, "grad_norm": 0.189453125, "learning_rate": 0.0008808541225228151, "loss": 2.269, "step": 30591 }, { "epoch": 0.8209531987977673, "grad_norm": 0.1923828125, "learning_rate": 0.0008808292888363553, "loss": 2.2871, "step": 30592 }, { "epoch": 0.8209800343495062, "grad_norm": 0.189453125, "learning_rate": 0.0008808044545338386, "loss": 2.3128, "step": 30593 }, { "epoch": 0.8210068699012452, "grad_norm": 0.185546875, "learning_rate": 0.0008807796196153196, "loss": 2.2067, "step": 30594 }, { "epoch": 0.8210337054529842, "grad_norm": 0.1884765625, "learning_rate": 0.0008807547840808527, "loss": 2.2852, "step": 30595 }, { "epoch": 0.821060541004723, "grad_norm": 0.1904296875, "learning_rate": 0.0008807299479304926, "loss": 2.2621, "step": 30596 }, { "epoch": 0.821087376556462, "grad_norm": 0.1884765625, "learning_rate": 0.0008807051111642934, "loss": 2.3103, "step": 30597 }, { "epoch": 0.8211142121082009, "grad_norm": 0.1865234375, "learning_rate": 0.0008806802737823098, "loss": 2.2506, "step": 30598 }, { "epoch": 0.8211410476599399, "grad_norm": 0.185546875, "learning_rate": 0.0008806554357845963, "loss": 2.2348, "step": 30599 }, { "epoch": 0.8211678832116789, "grad_norm": 0.185546875, "learning_rate": 0.0008806305971712071, "loss": 2.253, "step": 30600 }, { "epoch": 0.8211947187634178, "grad_norm": 0.1884765625, "learning_rate": 0.0008806057579421974, "loss": 2.2364, "step": 30601 }, { "epoch": 0.8212215543151568, "grad_norm": 0.1845703125, "learning_rate": 0.0008805809180976209, "loss": 2.1842, "step": 30602 }, { "epoch": 0.8212483898668956, "grad_norm": 0.1884765625, "learning_rate": 0.0008805560776375323, "loss": 2.195, "step": 30603 }, { "epoch": 0.8212752254186346, "grad_norm": 0.1884765625, "learning_rate": 0.0008805312365619864, "loss": 2.2844, "step": 30604 }, { "epoch": 0.8213020609703735, "grad_norm": 0.1865234375, "learning_rate": 0.0008805063948710375, "loss": 2.2472, "step": 30605 }, { "epoch": 0.8213288965221125, "grad_norm": 0.1875, "learning_rate": 0.0008804815525647401, "loss": 2.2708, "step": 30606 }, { "epoch": 0.8213557320738515, "grad_norm": 0.1826171875, "learning_rate": 0.0008804567096431486, "loss": 2.254, "step": 30607 }, { "epoch": 0.8213825676255904, "grad_norm": 0.18359375, "learning_rate": 0.0008804318661063176, "loss": 2.2632, "step": 30608 }, { "epoch": 0.8214094031773294, "grad_norm": 0.1845703125, "learning_rate": 0.0008804070219543017, "loss": 2.2401, "step": 30609 }, { "epoch": 0.8214362387290682, "grad_norm": 0.1826171875, "learning_rate": 0.0008803821771871553, "loss": 2.2646, "step": 30610 }, { "epoch": 0.8214630742808072, "grad_norm": 0.18359375, "learning_rate": 0.0008803573318049328, "loss": 2.2294, "step": 30611 }, { "epoch": 0.8214899098325461, "grad_norm": 0.185546875, "learning_rate": 0.0008803324858076887, "loss": 2.3138, "step": 30612 }, { "epoch": 0.8215167453842851, "grad_norm": 0.1884765625, "learning_rate": 0.0008803076391954779, "loss": 2.3797, "step": 30613 }, { "epoch": 0.8215435809360241, "grad_norm": 0.1845703125, "learning_rate": 0.0008802827919683543, "loss": 2.2246, "step": 30614 }, { "epoch": 0.821570416487763, "grad_norm": 0.18359375, "learning_rate": 0.0008802579441263731, "loss": 2.2285, "step": 30615 }, { "epoch": 0.821597252039502, "grad_norm": 0.1865234375, "learning_rate": 0.000880233095669588, "loss": 2.264, "step": 30616 }, { "epoch": 0.8216240875912408, "grad_norm": 0.1806640625, "learning_rate": 0.000880208246598054, "loss": 2.255, "step": 30617 }, { "epoch": 0.8216509231429798, "grad_norm": 0.1884765625, "learning_rate": 0.0008801833969118257, "loss": 2.2732, "step": 30618 }, { "epoch": 0.8216777586947188, "grad_norm": 0.185546875, "learning_rate": 0.0008801585466109575, "loss": 2.2643, "step": 30619 }, { "epoch": 0.8217045942464577, "grad_norm": 0.1875, "learning_rate": 0.0008801336956955037, "loss": 2.2261, "step": 30620 }, { "epoch": 0.8217314297981967, "grad_norm": 0.1845703125, "learning_rate": 0.0008801088441655192, "loss": 2.2614, "step": 30621 }, { "epoch": 0.8217582653499356, "grad_norm": 0.1865234375, "learning_rate": 0.0008800839920210581, "loss": 2.217, "step": 30622 }, { "epoch": 0.8217851009016746, "grad_norm": 0.19140625, "learning_rate": 0.0008800591392621751, "loss": 2.2777, "step": 30623 }, { "epoch": 0.8218119364534134, "grad_norm": 0.1884765625, "learning_rate": 0.0008800342858889249, "loss": 2.3189, "step": 30624 }, { "epoch": 0.8218387720051524, "grad_norm": 0.185546875, "learning_rate": 0.0008800094319013616, "loss": 2.1531, "step": 30625 }, { "epoch": 0.8218656075568914, "grad_norm": 0.1865234375, "learning_rate": 0.0008799845772995404, "loss": 2.2691, "step": 30626 }, { "epoch": 0.8218924431086303, "grad_norm": 0.1865234375, "learning_rate": 0.0008799597220835152, "loss": 2.2221, "step": 30627 }, { "epoch": 0.8219192786603693, "grad_norm": 0.18359375, "learning_rate": 0.0008799348662533406, "loss": 2.1679, "step": 30628 }, { "epoch": 0.8219461142121082, "grad_norm": 0.18359375, "learning_rate": 0.0008799100098090714, "loss": 2.2035, "step": 30629 }, { "epoch": 0.8219729497638472, "grad_norm": 0.1884765625, "learning_rate": 0.0008798851527507622, "loss": 2.2536, "step": 30630 }, { "epoch": 0.821999785315586, "grad_norm": 0.1865234375, "learning_rate": 0.000879860295078467, "loss": 2.2221, "step": 30631 }, { "epoch": 0.822026620867325, "grad_norm": 0.1865234375, "learning_rate": 0.0008798354367922408, "loss": 2.2181, "step": 30632 }, { "epoch": 0.822053456419064, "grad_norm": 0.1865234375, "learning_rate": 0.000879810577892138, "loss": 2.2828, "step": 30633 }, { "epoch": 0.8220802919708029, "grad_norm": 0.1865234375, "learning_rate": 0.0008797857183782129, "loss": 2.2414, "step": 30634 }, { "epoch": 0.8221071275225419, "grad_norm": 0.1875, "learning_rate": 0.0008797608582505204, "loss": 2.2874, "step": 30635 }, { "epoch": 0.8221339630742808, "grad_norm": 0.1904296875, "learning_rate": 0.0008797359975091148, "loss": 2.287, "step": 30636 }, { "epoch": 0.8221607986260198, "grad_norm": 0.1845703125, "learning_rate": 0.000879711136154051, "loss": 2.2362, "step": 30637 }, { "epoch": 0.8221876341777586, "grad_norm": 0.1865234375, "learning_rate": 0.000879686274185383, "loss": 2.2592, "step": 30638 }, { "epoch": 0.8222144697294976, "grad_norm": 0.1884765625, "learning_rate": 0.0008796614116031656, "loss": 2.2173, "step": 30639 }, { "epoch": 0.8222413052812366, "grad_norm": 0.1845703125, "learning_rate": 0.0008796365484074533, "loss": 2.3166, "step": 30640 }, { "epoch": 0.8222681408329755, "grad_norm": 0.1865234375, "learning_rate": 0.000879611684598301, "loss": 2.2716, "step": 30641 }, { "epoch": 0.8222949763847145, "grad_norm": 0.1884765625, "learning_rate": 0.0008795868201757626, "loss": 2.2316, "step": 30642 }, { "epoch": 0.8223218119364534, "grad_norm": 0.1826171875, "learning_rate": 0.0008795619551398931, "loss": 2.2119, "step": 30643 }, { "epoch": 0.8223486474881924, "grad_norm": 0.1923828125, "learning_rate": 0.0008795370894907468, "loss": 2.2647, "step": 30644 }, { "epoch": 0.8223754830399314, "grad_norm": 0.189453125, "learning_rate": 0.0008795122232283785, "loss": 2.1621, "step": 30645 }, { "epoch": 0.8224023185916702, "grad_norm": 0.189453125, "learning_rate": 0.0008794873563528426, "loss": 2.2443, "step": 30646 }, { "epoch": 0.8224291541434092, "grad_norm": 0.1875, "learning_rate": 0.0008794624888641937, "loss": 2.1979, "step": 30647 }, { "epoch": 0.8224559896951481, "grad_norm": 0.189453125, "learning_rate": 0.0008794376207624863, "loss": 2.2531, "step": 30648 }, { "epoch": 0.8224828252468871, "grad_norm": 0.18359375, "learning_rate": 0.0008794127520477748, "loss": 2.2408, "step": 30649 }, { "epoch": 0.822509660798626, "grad_norm": 0.1845703125, "learning_rate": 0.000879387882720114, "loss": 2.2796, "step": 30650 }, { "epoch": 0.822536496350365, "grad_norm": 0.181640625, "learning_rate": 0.0008793630127795585, "loss": 2.2458, "step": 30651 }, { "epoch": 0.822563331902104, "grad_norm": 0.185546875, "learning_rate": 0.0008793381422261629, "loss": 2.272, "step": 30652 }, { "epoch": 0.8225901674538428, "grad_norm": 0.1875, "learning_rate": 0.0008793132710599813, "loss": 2.2953, "step": 30653 }, { "epoch": 0.8226170030055818, "grad_norm": 0.1875, "learning_rate": 0.0008792883992810686, "loss": 2.1689, "step": 30654 }, { "epoch": 0.8226438385573207, "grad_norm": 0.185546875, "learning_rate": 0.0008792635268894793, "loss": 2.2398, "step": 30655 }, { "epoch": 0.8226706741090597, "grad_norm": 0.1884765625, "learning_rate": 0.0008792386538852682, "loss": 2.2423, "step": 30656 }, { "epoch": 0.8226975096607986, "grad_norm": 0.197265625, "learning_rate": 0.0008792137802684895, "loss": 2.2147, "step": 30657 }, { "epoch": 0.8227243452125376, "grad_norm": 0.185546875, "learning_rate": 0.0008791889060391978, "loss": 2.265, "step": 30658 }, { "epoch": 0.8227511807642766, "grad_norm": 0.1943359375, "learning_rate": 0.0008791640311974479, "loss": 2.2196, "step": 30659 }, { "epoch": 0.8227780163160154, "grad_norm": 0.1875, "learning_rate": 0.0008791391557432944, "loss": 2.2546, "step": 30660 }, { "epoch": 0.8228048518677544, "grad_norm": 0.1904296875, "learning_rate": 0.0008791142796767913, "loss": 2.3117, "step": 30661 }, { "epoch": 0.8228316874194933, "grad_norm": 0.185546875, "learning_rate": 0.0008790894029979939, "loss": 2.199, "step": 30662 }, { "epoch": 0.8228585229712323, "grad_norm": 0.1865234375, "learning_rate": 0.0008790645257069565, "loss": 2.356, "step": 30663 }, { "epoch": 0.8228853585229712, "grad_norm": 0.1806640625, "learning_rate": 0.0008790396478037334, "loss": 2.2541, "step": 30664 }, { "epoch": 0.8229121940747102, "grad_norm": 0.1845703125, "learning_rate": 0.0008790147692883796, "loss": 2.1434, "step": 30665 }, { "epoch": 0.8229390296264492, "grad_norm": 0.1865234375, "learning_rate": 0.0008789898901609493, "loss": 2.2504, "step": 30666 }, { "epoch": 0.822965865178188, "grad_norm": 0.1826171875, "learning_rate": 0.0008789650104214974, "loss": 2.2574, "step": 30667 }, { "epoch": 0.822992700729927, "grad_norm": 0.189453125, "learning_rate": 0.0008789401300700784, "loss": 2.287, "step": 30668 }, { "epoch": 0.8230195362816659, "grad_norm": 0.189453125, "learning_rate": 0.0008789152491067468, "loss": 2.2839, "step": 30669 }, { "epoch": 0.8230463718334049, "grad_norm": 0.1904296875, "learning_rate": 0.0008788903675315571, "loss": 2.2502, "step": 30670 }, { "epoch": 0.8230732073851439, "grad_norm": 0.1904296875, "learning_rate": 0.0008788654853445641, "loss": 2.2348, "step": 30671 }, { "epoch": 0.8231000429368828, "grad_norm": 0.189453125, "learning_rate": 0.0008788406025458221, "loss": 2.3307, "step": 30672 }, { "epoch": 0.8231268784886218, "grad_norm": 0.185546875, "learning_rate": 0.000878815719135386, "loss": 2.2143, "step": 30673 }, { "epoch": 0.8231537140403606, "grad_norm": 0.181640625, "learning_rate": 0.0008787908351133102, "loss": 2.2467, "step": 30674 }, { "epoch": 0.8231805495920996, "grad_norm": 0.189453125, "learning_rate": 0.0008787659504796493, "loss": 2.2791, "step": 30675 }, { "epoch": 0.8232073851438385, "grad_norm": 0.1923828125, "learning_rate": 0.0008787410652344579, "loss": 2.3315, "step": 30676 }, { "epoch": 0.8232342206955775, "grad_norm": 0.1884765625, "learning_rate": 0.0008787161793777907, "loss": 2.2722, "step": 30677 }, { "epoch": 0.8232610562473165, "grad_norm": 0.189453125, "learning_rate": 0.000878691292909702, "loss": 2.2701, "step": 30678 }, { "epoch": 0.8232878917990554, "grad_norm": 0.1943359375, "learning_rate": 0.0008786664058302467, "loss": 2.2712, "step": 30679 }, { "epoch": 0.8233147273507944, "grad_norm": 0.1845703125, "learning_rate": 0.0008786415181394794, "loss": 2.2448, "step": 30680 }, { "epoch": 0.8233415629025332, "grad_norm": 0.189453125, "learning_rate": 0.0008786166298374546, "loss": 2.269, "step": 30681 }, { "epoch": 0.8233683984542722, "grad_norm": 0.1875, "learning_rate": 0.0008785917409242268, "loss": 2.2045, "step": 30682 }, { "epoch": 0.8233952340060111, "grad_norm": 0.1884765625, "learning_rate": 0.0008785668513998507, "loss": 2.1962, "step": 30683 }, { "epoch": 0.8234220695577501, "grad_norm": 0.1845703125, "learning_rate": 0.0008785419612643807, "loss": 2.2496, "step": 30684 }, { "epoch": 0.8234489051094891, "grad_norm": 0.1875, "learning_rate": 0.0008785170705178719, "loss": 2.3113, "step": 30685 }, { "epoch": 0.823475740661228, "grad_norm": 0.1884765625, "learning_rate": 0.0008784921791603785, "loss": 2.246, "step": 30686 }, { "epoch": 0.823502576212967, "grad_norm": 0.1865234375, "learning_rate": 0.000878467287191955, "loss": 2.2292, "step": 30687 }, { "epoch": 0.8235294117647058, "grad_norm": 0.1865234375, "learning_rate": 0.0008784423946126565, "loss": 2.2211, "step": 30688 }, { "epoch": 0.8235562473164448, "grad_norm": 0.1884765625, "learning_rate": 0.0008784175014225371, "loss": 2.2375, "step": 30689 }, { "epoch": 0.8235830828681838, "grad_norm": 0.185546875, "learning_rate": 0.0008783926076216515, "loss": 2.2471, "step": 30690 }, { "epoch": 0.8236099184199227, "grad_norm": 0.185546875, "learning_rate": 0.0008783677132100547, "loss": 2.3495, "step": 30691 }, { "epoch": 0.8236367539716617, "grad_norm": 0.1865234375, "learning_rate": 0.0008783428181878009, "loss": 2.2591, "step": 30692 }, { "epoch": 0.8236635895234006, "grad_norm": 0.1865234375, "learning_rate": 0.0008783179225549449, "loss": 2.2574, "step": 30693 }, { "epoch": 0.8236904250751396, "grad_norm": 0.1826171875, "learning_rate": 0.0008782930263115412, "loss": 2.2745, "step": 30694 }, { "epoch": 0.8237172606268784, "grad_norm": 0.1826171875, "learning_rate": 0.0008782681294576444, "loss": 2.1914, "step": 30695 }, { "epoch": 0.8237440961786174, "grad_norm": 0.18359375, "learning_rate": 0.0008782432319933095, "loss": 2.2263, "step": 30696 }, { "epoch": 0.8237709317303564, "grad_norm": 0.1953125, "learning_rate": 0.0008782183339185904, "loss": 2.2258, "step": 30697 }, { "epoch": 0.8237977672820953, "grad_norm": 0.1884765625, "learning_rate": 0.0008781934352335423, "loss": 2.2618, "step": 30698 }, { "epoch": 0.8238246028338343, "grad_norm": 0.18359375, "learning_rate": 0.0008781685359382198, "loss": 2.2277, "step": 30699 }, { "epoch": 0.8238514383855732, "grad_norm": 0.19140625, "learning_rate": 0.0008781436360326773, "loss": 2.2768, "step": 30700 }, { "epoch": 0.8238782739373122, "grad_norm": 0.1875, "learning_rate": 0.0008781187355169693, "loss": 2.2989, "step": 30701 }, { "epoch": 0.823905109489051, "grad_norm": 0.185546875, "learning_rate": 0.0008780938343911507, "loss": 2.3247, "step": 30702 }, { "epoch": 0.82393194504079, "grad_norm": 0.185546875, "learning_rate": 0.0008780689326552763, "loss": 2.248, "step": 30703 }, { "epoch": 0.823958780592529, "grad_norm": 0.18359375, "learning_rate": 0.0008780440303094002, "loss": 2.2093, "step": 30704 }, { "epoch": 0.8239856161442679, "grad_norm": 0.18359375, "learning_rate": 0.0008780191273535774, "loss": 2.2196, "step": 30705 }, { "epoch": 0.8240124516960069, "grad_norm": 0.1787109375, "learning_rate": 0.0008779942237878623, "loss": 2.1719, "step": 30706 }, { "epoch": 0.8240392872477458, "grad_norm": 0.1875, "learning_rate": 0.0008779693196123099, "loss": 2.2785, "step": 30707 }, { "epoch": 0.8240661227994848, "grad_norm": 0.1845703125, "learning_rate": 0.0008779444148269744, "loss": 2.2758, "step": 30708 }, { "epoch": 0.8240929583512236, "grad_norm": 0.1865234375, "learning_rate": 0.0008779195094319106, "loss": 2.2313, "step": 30709 }, { "epoch": 0.8241197939029626, "grad_norm": 0.181640625, "learning_rate": 0.0008778946034271733, "loss": 2.2457, "step": 30710 }, { "epoch": 0.8241466294547016, "grad_norm": 0.1845703125, "learning_rate": 0.000877869696812817, "loss": 2.2698, "step": 30711 }, { "epoch": 0.8241734650064405, "grad_norm": 0.1865234375, "learning_rate": 0.0008778447895888961, "loss": 2.2426, "step": 30712 }, { "epoch": 0.8242003005581795, "grad_norm": 0.1826171875, "learning_rate": 0.0008778198817554657, "loss": 2.2427, "step": 30713 }, { "epoch": 0.8242271361099184, "grad_norm": 0.1865234375, "learning_rate": 0.0008777949733125804, "loss": 2.2646, "step": 30714 }, { "epoch": 0.8242539716616574, "grad_norm": 0.185546875, "learning_rate": 0.0008777700642602942, "loss": 2.2244, "step": 30715 }, { "epoch": 0.8242808072133964, "grad_norm": 0.185546875, "learning_rate": 0.0008777451545986624, "loss": 2.3002, "step": 30716 }, { "epoch": 0.8243076427651352, "grad_norm": 0.181640625, "learning_rate": 0.0008777202443277395, "loss": 2.2634, "step": 30717 }, { "epoch": 0.8243344783168742, "grad_norm": 0.1875, "learning_rate": 0.00087769533344758, "loss": 2.2343, "step": 30718 }, { "epoch": 0.8243613138686131, "grad_norm": 0.1875, "learning_rate": 0.0008776704219582386, "loss": 2.1909, "step": 30719 }, { "epoch": 0.8243881494203521, "grad_norm": 0.1865234375, "learning_rate": 0.00087764550985977, "loss": 2.2141, "step": 30720 }, { "epoch": 0.824414984972091, "grad_norm": 0.1875, "learning_rate": 0.0008776205971522291, "loss": 2.3123, "step": 30721 }, { "epoch": 0.82444182052383, "grad_norm": 0.1845703125, "learning_rate": 0.0008775956838356702, "loss": 2.2586, "step": 30722 }, { "epoch": 0.824468656075569, "grad_norm": 0.1826171875, "learning_rate": 0.0008775707699101478, "loss": 2.2214, "step": 30723 }, { "epoch": 0.8244954916273078, "grad_norm": 0.185546875, "learning_rate": 0.000877545855375717, "loss": 2.2455, "step": 30724 }, { "epoch": 0.8245223271790468, "grad_norm": 0.1845703125, "learning_rate": 0.0008775209402324322, "loss": 2.2469, "step": 30725 }, { "epoch": 0.8245491627307857, "grad_norm": 0.1875, "learning_rate": 0.0008774960244803479, "loss": 2.2229, "step": 30726 }, { "epoch": 0.8245759982825247, "grad_norm": 0.1787109375, "learning_rate": 0.0008774711081195193, "loss": 2.1714, "step": 30727 }, { "epoch": 0.8246028338342636, "grad_norm": 0.1884765625, "learning_rate": 0.0008774461911500005, "loss": 2.2839, "step": 30728 }, { "epoch": 0.8246296693860026, "grad_norm": 0.185546875, "learning_rate": 0.0008774212735718466, "loss": 2.3164, "step": 30729 }, { "epoch": 0.8246565049377416, "grad_norm": 0.1826171875, "learning_rate": 0.0008773963553851119, "loss": 2.2363, "step": 30730 }, { "epoch": 0.8246833404894804, "grad_norm": 0.1865234375, "learning_rate": 0.0008773714365898511, "loss": 2.2703, "step": 30731 }, { "epoch": 0.8247101760412194, "grad_norm": 0.1904296875, "learning_rate": 0.0008773465171861192, "loss": 2.2582, "step": 30732 }, { "epoch": 0.8247370115929583, "grad_norm": 0.1904296875, "learning_rate": 0.0008773215971739705, "loss": 2.2877, "step": 30733 }, { "epoch": 0.8247638471446973, "grad_norm": 0.1904296875, "learning_rate": 0.0008772966765534597, "loss": 2.3096, "step": 30734 }, { "epoch": 0.8247906826964362, "grad_norm": 0.1826171875, "learning_rate": 0.0008772717553246419, "loss": 2.2989, "step": 30735 }, { "epoch": 0.8248175182481752, "grad_norm": 0.1845703125, "learning_rate": 0.0008772468334875712, "loss": 2.2124, "step": 30736 }, { "epoch": 0.8248443537999142, "grad_norm": 0.1865234375, "learning_rate": 0.0008772219110423024, "loss": 2.1441, "step": 30737 }, { "epoch": 0.824871189351653, "grad_norm": 0.1884765625, "learning_rate": 0.0008771969879888905, "loss": 2.2672, "step": 30738 }, { "epoch": 0.824898024903392, "grad_norm": 0.1884765625, "learning_rate": 0.0008771720643273899, "loss": 2.2103, "step": 30739 }, { "epoch": 0.8249248604551309, "grad_norm": 0.1875, "learning_rate": 0.0008771471400578553, "loss": 2.2764, "step": 30740 }, { "epoch": 0.8249516960068699, "grad_norm": 0.18359375, "learning_rate": 0.0008771222151803415, "loss": 2.2164, "step": 30741 }, { "epoch": 0.8249785315586089, "grad_norm": 0.193359375, "learning_rate": 0.000877097289694903, "loss": 2.2056, "step": 30742 }, { "epoch": 0.8250053671103478, "grad_norm": 0.1865234375, "learning_rate": 0.0008770723636015947, "loss": 2.2799, "step": 30743 }, { "epoch": 0.8250322026620868, "grad_norm": 0.1875, "learning_rate": 0.0008770474369004709, "loss": 2.218, "step": 30744 }, { "epoch": 0.8250590382138256, "grad_norm": 0.1806640625, "learning_rate": 0.0008770225095915867, "loss": 2.2143, "step": 30745 }, { "epoch": 0.8250858737655646, "grad_norm": 0.1865234375, "learning_rate": 0.0008769975816749967, "loss": 2.2617, "step": 30746 }, { "epoch": 0.8251127093173035, "grad_norm": 0.1845703125, "learning_rate": 0.0008769726531507554, "loss": 2.2881, "step": 30747 }, { "epoch": 0.8251395448690425, "grad_norm": 0.18359375, "learning_rate": 0.0008769477240189175, "loss": 2.2984, "step": 30748 }, { "epoch": 0.8251663804207815, "grad_norm": 0.181640625, "learning_rate": 0.000876922794279538, "loss": 2.2175, "step": 30749 }, { "epoch": 0.8251932159725204, "grad_norm": 0.185546875, "learning_rate": 0.0008768978639326714, "loss": 2.2792, "step": 30750 }, { "epoch": 0.8252200515242594, "grad_norm": 0.1845703125, "learning_rate": 0.0008768729329783719, "loss": 2.2575, "step": 30751 }, { "epoch": 0.8252468870759982, "grad_norm": 0.189453125, "learning_rate": 0.0008768480014166948, "loss": 2.2869, "step": 30752 }, { "epoch": 0.8252737226277372, "grad_norm": 0.1845703125, "learning_rate": 0.0008768230692476949, "loss": 2.2261, "step": 30753 }, { "epoch": 0.8253005581794761, "grad_norm": 0.185546875, "learning_rate": 0.0008767981364714265, "loss": 2.3343, "step": 30754 }, { "epoch": 0.8253273937312151, "grad_norm": 0.1845703125, "learning_rate": 0.0008767732030879443, "loss": 2.2456, "step": 30755 }, { "epoch": 0.8253542292829541, "grad_norm": 0.1826171875, "learning_rate": 0.0008767482690973034, "loss": 2.2862, "step": 30756 }, { "epoch": 0.825381064834693, "grad_norm": 0.1904296875, "learning_rate": 0.000876723334499558, "loss": 2.3012, "step": 30757 }, { "epoch": 0.825407900386432, "grad_norm": 0.1865234375, "learning_rate": 0.000876698399294763, "loss": 2.2481, "step": 30758 }, { "epoch": 0.8254347359381708, "grad_norm": 0.189453125, "learning_rate": 0.0008766734634829733, "loss": 2.2954, "step": 30759 }, { "epoch": 0.8254615714899098, "grad_norm": 0.1884765625, "learning_rate": 0.0008766485270642432, "loss": 2.3203, "step": 30760 }, { "epoch": 0.8254884070416488, "grad_norm": 0.185546875, "learning_rate": 0.0008766235900386279, "loss": 2.33, "step": 30761 }, { "epoch": 0.8255152425933877, "grad_norm": 0.1904296875, "learning_rate": 0.0008765986524061814, "loss": 2.2197, "step": 30762 }, { "epoch": 0.8255420781451267, "grad_norm": 0.18359375, "learning_rate": 0.0008765737141669593, "loss": 2.1596, "step": 30763 }, { "epoch": 0.8255689136968656, "grad_norm": 0.1875, "learning_rate": 0.0008765487753210154, "loss": 2.2696, "step": 30764 }, { "epoch": 0.8255957492486046, "grad_norm": 0.1865234375, "learning_rate": 0.0008765238358684051, "loss": 2.2437, "step": 30765 }, { "epoch": 0.8256225848003435, "grad_norm": 0.1884765625, "learning_rate": 0.000876498895809183, "loss": 2.2462, "step": 30766 }, { "epoch": 0.8256494203520824, "grad_norm": 0.1865234375, "learning_rate": 0.0008764739551434035, "loss": 2.3132, "step": 30767 }, { "epoch": 0.8256762559038214, "grad_norm": 0.181640625, "learning_rate": 0.0008764490138711213, "loss": 2.2343, "step": 30768 }, { "epoch": 0.8257030914555603, "grad_norm": 0.18359375, "learning_rate": 0.0008764240719923916, "loss": 2.1701, "step": 30769 }, { "epoch": 0.8257299270072993, "grad_norm": 0.18359375, "learning_rate": 0.0008763991295072688, "loss": 2.2033, "step": 30770 }, { "epoch": 0.8257567625590382, "grad_norm": 0.1865234375, "learning_rate": 0.0008763741864158072, "loss": 2.2388, "step": 30771 }, { "epoch": 0.8257835981107772, "grad_norm": 0.1865234375, "learning_rate": 0.0008763492427180625, "loss": 2.24, "step": 30772 }, { "epoch": 0.825810433662516, "grad_norm": 0.185546875, "learning_rate": 0.0008763242984140886, "loss": 2.1946, "step": 30773 }, { "epoch": 0.825837269214255, "grad_norm": 0.1826171875, "learning_rate": 0.0008762993535039404, "loss": 2.2561, "step": 30774 }, { "epoch": 0.825864104765994, "grad_norm": 0.1845703125, "learning_rate": 0.000876274407987673, "loss": 2.2626, "step": 30775 }, { "epoch": 0.8258909403177329, "grad_norm": 0.18359375, "learning_rate": 0.0008762494618653405, "loss": 2.178, "step": 30776 }, { "epoch": 0.8259177758694719, "grad_norm": 0.1826171875, "learning_rate": 0.0008762245151369982, "loss": 2.3143, "step": 30777 }, { "epoch": 0.8259446114212108, "grad_norm": 0.1865234375, "learning_rate": 0.0008761995678027005, "loss": 2.2874, "step": 30778 }, { "epoch": 0.8259714469729498, "grad_norm": 0.1826171875, "learning_rate": 0.0008761746198625021, "loss": 2.243, "step": 30779 }, { "epoch": 0.8259982825246887, "grad_norm": 0.1904296875, "learning_rate": 0.0008761496713164579, "loss": 2.2521, "step": 30780 }, { "epoch": 0.8260251180764276, "grad_norm": 0.18359375, "learning_rate": 0.0008761247221646226, "loss": 2.3007, "step": 30781 }, { "epoch": 0.8260519536281666, "grad_norm": 0.1875, "learning_rate": 0.0008760997724070508, "loss": 2.2132, "step": 30782 }, { "epoch": 0.8260787891799055, "grad_norm": 0.19140625, "learning_rate": 0.0008760748220437977, "loss": 2.2082, "step": 30783 }, { "epoch": 0.8261056247316445, "grad_norm": 0.1884765625, "learning_rate": 0.0008760498710749173, "loss": 2.2584, "step": 30784 }, { "epoch": 0.8261324602833834, "grad_norm": 0.18359375, "learning_rate": 0.0008760249195004646, "loss": 2.2573, "step": 30785 }, { "epoch": 0.8261592958351224, "grad_norm": 0.18359375, "learning_rate": 0.0008759999673204947, "loss": 2.1967, "step": 30786 }, { "epoch": 0.8261861313868614, "grad_norm": 0.18359375, "learning_rate": 0.000875975014535062, "loss": 2.2302, "step": 30787 }, { "epoch": 0.8262129669386002, "grad_norm": 0.1826171875, "learning_rate": 0.0008759500611442212, "loss": 2.2256, "step": 30788 }, { "epoch": 0.8262398024903392, "grad_norm": 0.1875, "learning_rate": 0.0008759251071480274, "loss": 2.3465, "step": 30789 }, { "epoch": 0.8262666380420781, "grad_norm": 0.185546875, "learning_rate": 0.000875900152546535, "loss": 2.2518, "step": 30790 }, { "epoch": 0.8262934735938171, "grad_norm": 0.1845703125, "learning_rate": 0.0008758751973397988, "loss": 2.3376, "step": 30791 }, { "epoch": 0.826320309145556, "grad_norm": 0.185546875, "learning_rate": 0.0008758502415278735, "loss": 2.2217, "step": 30792 }, { "epoch": 0.826347144697295, "grad_norm": 0.1806640625, "learning_rate": 0.000875825285110814, "loss": 2.2174, "step": 30793 }, { "epoch": 0.826373980249034, "grad_norm": 0.1806640625, "learning_rate": 0.000875800328088675, "loss": 2.2426, "step": 30794 }, { "epoch": 0.8264008158007728, "grad_norm": 0.1884765625, "learning_rate": 0.0008757753704615113, "loss": 2.3174, "step": 30795 }, { "epoch": 0.8264276513525118, "grad_norm": 0.1806640625, "learning_rate": 0.0008757504122293772, "loss": 2.2218, "step": 30796 }, { "epoch": 0.8264544869042507, "grad_norm": 0.1884765625, "learning_rate": 0.0008757254533923282, "loss": 2.2546, "step": 30797 }, { "epoch": 0.8264813224559897, "grad_norm": 0.1826171875, "learning_rate": 0.0008757004939504186, "loss": 2.1997, "step": 30798 }, { "epoch": 0.8265081580077286, "grad_norm": 0.1845703125, "learning_rate": 0.0008756755339037031, "loss": 2.2621, "step": 30799 }, { "epoch": 0.8265349935594676, "grad_norm": 0.1875, "learning_rate": 0.0008756505732522366, "loss": 2.3167, "step": 30800 }, { "epoch": 0.8265618291112066, "grad_norm": 0.185546875, "learning_rate": 0.000875625611996074, "loss": 2.2108, "step": 30801 }, { "epoch": 0.8265886646629454, "grad_norm": 0.1826171875, "learning_rate": 0.0008756006501352699, "loss": 2.2741, "step": 30802 }, { "epoch": 0.8266155002146844, "grad_norm": 0.193359375, "learning_rate": 0.0008755756876698788, "loss": 2.2668, "step": 30803 }, { "epoch": 0.8266423357664233, "grad_norm": 0.1826171875, "learning_rate": 0.0008755507245999559, "loss": 2.2761, "step": 30804 }, { "epoch": 0.8266691713181623, "grad_norm": 0.1884765625, "learning_rate": 0.0008755257609255558, "loss": 2.307, "step": 30805 }, { "epoch": 0.8266960068699012, "grad_norm": 0.189453125, "learning_rate": 0.0008755007966467333, "loss": 2.2805, "step": 30806 }, { "epoch": 0.8267228424216402, "grad_norm": 0.1865234375, "learning_rate": 0.0008754758317635427, "loss": 2.3141, "step": 30807 }, { "epoch": 0.8267496779733792, "grad_norm": 0.18359375, "learning_rate": 0.0008754508662760398, "loss": 2.255, "step": 30808 }, { "epoch": 0.826776513525118, "grad_norm": 0.1806640625, "learning_rate": 0.0008754259001842783, "loss": 2.1728, "step": 30809 }, { "epoch": 0.826803349076857, "grad_norm": 0.18359375, "learning_rate": 0.0008754009334883134, "loss": 2.2976, "step": 30810 }, { "epoch": 0.8268301846285959, "grad_norm": 0.1865234375, "learning_rate": 0.0008753759661882, "loss": 2.2506, "step": 30811 }, { "epoch": 0.8268570201803349, "grad_norm": 0.1875, "learning_rate": 0.0008753509982839928, "loss": 2.2731, "step": 30812 }, { "epoch": 0.8268838557320739, "grad_norm": 0.185546875, "learning_rate": 0.0008753260297757464, "loss": 2.2294, "step": 30813 }, { "epoch": 0.8269106912838128, "grad_norm": 0.1806640625, "learning_rate": 0.0008753010606635158, "loss": 2.248, "step": 30814 }, { "epoch": 0.8269375268355518, "grad_norm": 0.1845703125, "learning_rate": 0.0008752760909473556, "loss": 2.1823, "step": 30815 }, { "epoch": 0.8269643623872907, "grad_norm": 0.1884765625, "learning_rate": 0.0008752511206273206, "loss": 2.2977, "step": 30816 }, { "epoch": 0.8269911979390296, "grad_norm": 0.185546875, "learning_rate": 0.0008752261497034658, "loss": 2.2398, "step": 30817 }, { "epoch": 0.8270180334907685, "grad_norm": 0.1845703125, "learning_rate": 0.0008752011781758455, "loss": 2.2468, "step": 30818 }, { "epoch": 0.8270448690425075, "grad_norm": 0.185546875, "learning_rate": 0.0008751762060445152, "loss": 2.2035, "step": 30819 }, { "epoch": 0.8270717045942465, "grad_norm": 0.18359375, "learning_rate": 0.0008751512333095289, "loss": 2.3131, "step": 30820 }, { "epoch": 0.8270985401459854, "grad_norm": 0.1884765625, "learning_rate": 0.0008751262599709419, "loss": 2.2449, "step": 30821 }, { "epoch": 0.8271253756977244, "grad_norm": 0.1884765625, "learning_rate": 0.0008751012860288088, "loss": 2.2022, "step": 30822 }, { "epoch": 0.8271522112494633, "grad_norm": 0.185546875, "learning_rate": 0.0008750763114831845, "loss": 2.2484, "step": 30823 }, { "epoch": 0.8271790468012022, "grad_norm": 0.1923828125, "learning_rate": 0.0008750513363341235, "loss": 2.2674, "step": 30824 }, { "epoch": 0.8272058823529411, "grad_norm": 0.1884765625, "learning_rate": 0.0008750263605816809, "loss": 2.266, "step": 30825 }, { "epoch": 0.8272327179046801, "grad_norm": 0.1875, "learning_rate": 0.0008750013842259113, "loss": 2.2152, "step": 30826 }, { "epoch": 0.8272595534564191, "grad_norm": 0.1875, "learning_rate": 0.0008749764072668698, "loss": 2.2562, "step": 30827 }, { "epoch": 0.827286389008158, "grad_norm": 0.1845703125, "learning_rate": 0.0008749514297046108, "loss": 2.3217, "step": 30828 }, { "epoch": 0.827313224559897, "grad_norm": 0.189453125, "learning_rate": 0.0008749264515391893, "loss": 2.2968, "step": 30829 }, { "epoch": 0.8273400601116359, "grad_norm": 0.181640625, "learning_rate": 0.00087490147277066, "loss": 2.2221, "step": 30830 }, { "epoch": 0.8273668956633748, "grad_norm": 0.185546875, "learning_rate": 0.0008748764933990779, "loss": 2.2622, "step": 30831 }, { "epoch": 0.8273937312151138, "grad_norm": 0.1875, "learning_rate": 0.0008748515134244973, "loss": 2.2349, "step": 30832 }, { "epoch": 0.8274205667668527, "grad_norm": 0.1865234375, "learning_rate": 0.0008748265328469736, "loss": 2.268, "step": 30833 }, { "epoch": 0.8274474023185917, "grad_norm": 0.1826171875, "learning_rate": 0.0008748015516665614, "loss": 2.1995, "step": 30834 }, { "epoch": 0.8274742378703306, "grad_norm": 0.193359375, "learning_rate": 0.0008747765698833152, "loss": 2.307, "step": 30835 }, { "epoch": 0.8275010734220696, "grad_norm": 0.1875, "learning_rate": 0.0008747515874972902, "loss": 2.2412, "step": 30836 }, { "epoch": 0.8275279089738085, "grad_norm": 0.1806640625, "learning_rate": 0.000874726604508541, "loss": 2.2009, "step": 30837 }, { "epoch": 0.8275547445255474, "grad_norm": 0.185546875, "learning_rate": 0.0008747016209171226, "loss": 2.311, "step": 30838 }, { "epoch": 0.8275815800772864, "grad_norm": 0.1884765625, "learning_rate": 0.0008746766367230895, "loss": 2.2319, "step": 30839 }, { "epoch": 0.8276084156290253, "grad_norm": 0.1865234375, "learning_rate": 0.0008746516519264968, "loss": 2.2432, "step": 30840 }, { "epoch": 0.8276352511807643, "grad_norm": 0.1845703125, "learning_rate": 0.0008746266665273991, "loss": 2.3027, "step": 30841 }, { "epoch": 0.8276620867325032, "grad_norm": 0.18359375, "learning_rate": 0.0008746016805258512, "loss": 2.2396, "step": 30842 }, { "epoch": 0.8276889222842422, "grad_norm": 0.1845703125, "learning_rate": 0.0008745766939219081, "loss": 2.2547, "step": 30843 }, { "epoch": 0.8277157578359811, "grad_norm": 0.185546875, "learning_rate": 0.0008745517067156244, "loss": 2.2163, "step": 30844 }, { "epoch": 0.82774259338772, "grad_norm": 0.1865234375, "learning_rate": 0.0008745267189070553, "loss": 2.2259, "step": 30845 }, { "epoch": 0.827769428939459, "grad_norm": 0.1826171875, "learning_rate": 0.000874501730496255, "loss": 2.2171, "step": 30846 }, { "epoch": 0.8277962644911979, "grad_norm": 0.1875, "learning_rate": 0.000874476741483279, "loss": 2.2997, "step": 30847 }, { "epoch": 0.8278231000429369, "grad_norm": 0.1904296875, "learning_rate": 0.0008744517518681816, "loss": 2.2568, "step": 30848 }, { "epoch": 0.8278499355946758, "grad_norm": 0.185546875, "learning_rate": 0.0008744267616510178, "loss": 2.1843, "step": 30849 }, { "epoch": 0.8278767711464148, "grad_norm": 0.1884765625, "learning_rate": 0.0008744017708318426, "loss": 2.239, "step": 30850 }, { "epoch": 0.8279036066981537, "grad_norm": 0.185546875, "learning_rate": 0.0008743767794107104, "loss": 2.2497, "step": 30851 }, { "epoch": 0.8279304422498927, "grad_norm": 0.1875, "learning_rate": 0.0008743517873876764, "loss": 2.2799, "step": 30852 }, { "epoch": 0.8279572778016316, "grad_norm": 0.181640625, "learning_rate": 0.0008743267947627952, "loss": 2.1405, "step": 30853 }, { "epoch": 0.8279841133533705, "grad_norm": 0.185546875, "learning_rate": 0.0008743018015361218, "loss": 2.3318, "step": 30854 }, { "epoch": 0.8280109489051095, "grad_norm": 0.1884765625, "learning_rate": 0.0008742768077077109, "loss": 2.199, "step": 30855 }, { "epoch": 0.8280377844568484, "grad_norm": 0.185546875, "learning_rate": 0.0008742518132776176, "loss": 2.2472, "step": 30856 }, { "epoch": 0.8280646200085874, "grad_norm": 0.185546875, "learning_rate": 0.0008742268182458963, "loss": 2.2553, "step": 30857 }, { "epoch": 0.8280914555603264, "grad_norm": 0.185546875, "learning_rate": 0.0008742018226126019, "loss": 2.2271, "step": 30858 }, { "epoch": 0.8281182911120653, "grad_norm": 0.1875, "learning_rate": 0.0008741768263777896, "loss": 2.209, "step": 30859 }, { "epoch": 0.8281451266638042, "grad_norm": 0.189453125, "learning_rate": 0.0008741518295415138, "loss": 2.2205, "step": 30860 }, { "epoch": 0.8281719622155431, "grad_norm": 0.1875, "learning_rate": 0.0008741268321038298, "loss": 2.3081, "step": 30861 }, { "epoch": 0.8281987977672821, "grad_norm": 0.1865234375, "learning_rate": 0.000874101834064792, "loss": 2.2589, "step": 30862 }, { "epoch": 0.828225633319021, "grad_norm": 0.189453125, "learning_rate": 0.0008740768354244554, "loss": 2.2522, "step": 30863 }, { "epoch": 0.82825246887076, "grad_norm": 0.1875, "learning_rate": 0.000874051836182875, "loss": 2.3226, "step": 30864 }, { "epoch": 0.828279304422499, "grad_norm": 0.1875, "learning_rate": 0.0008740268363401054, "loss": 2.2814, "step": 30865 }, { "epoch": 0.8283061399742379, "grad_norm": 0.1875, "learning_rate": 0.0008740018358962015, "loss": 2.3062, "step": 30866 }, { "epoch": 0.8283329755259768, "grad_norm": 0.1865234375, "learning_rate": 0.0008739768348512184, "loss": 2.2915, "step": 30867 }, { "epoch": 0.8283598110777157, "grad_norm": 0.18359375, "learning_rate": 0.0008739518332052106, "loss": 2.285, "step": 30868 }, { "epoch": 0.8283866466294547, "grad_norm": 0.1865234375, "learning_rate": 0.0008739268309582329, "loss": 2.2787, "step": 30869 }, { "epoch": 0.8284134821811936, "grad_norm": 0.1826171875, "learning_rate": 0.0008739018281103407, "loss": 2.2105, "step": 30870 }, { "epoch": 0.8284403177329326, "grad_norm": 0.1865234375, "learning_rate": 0.0008738768246615881, "loss": 2.2107, "step": 30871 }, { "epoch": 0.8284671532846716, "grad_norm": 0.18359375, "learning_rate": 0.0008738518206120305, "loss": 2.1942, "step": 30872 }, { "epoch": 0.8284939888364105, "grad_norm": 0.1826171875, "learning_rate": 0.0008738268159617225, "loss": 2.182, "step": 30873 }, { "epoch": 0.8285208243881494, "grad_norm": 0.1845703125, "learning_rate": 0.0008738018107107192, "loss": 2.2694, "step": 30874 }, { "epoch": 0.8285476599398883, "grad_norm": 0.185546875, "learning_rate": 0.0008737768048590752, "loss": 2.3031, "step": 30875 }, { "epoch": 0.8285744954916273, "grad_norm": 0.185546875, "learning_rate": 0.0008737517984068454, "loss": 2.2489, "step": 30876 }, { "epoch": 0.8286013310433663, "grad_norm": 0.185546875, "learning_rate": 0.0008737267913540846, "loss": 2.2118, "step": 30877 }, { "epoch": 0.8286281665951052, "grad_norm": 0.18359375, "learning_rate": 0.0008737017837008479, "loss": 2.2535, "step": 30878 }, { "epoch": 0.8286550021468442, "grad_norm": 0.1884765625, "learning_rate": 0.00087367677544719, "loss": 2.318, "step": 30879 }, { "epoch": 0.8286818376985831, "grad_norm": 0.1875, "learning_rate": 0.0008736517665931656, "loss": 2.2779, "step": 30880 }, { "epoch": 0.828708673250322, "grad_norm": 0.1875, "learning_rate": 0.00087362675713883, "loss": 2.3514, "step": 30881 }, { "epoch": 0.8287355088020609, "grad_norm": 0.1826171875, "learning_rate": 0.0008736017470842376, "loss": 2.2495, "step": 30882 }, { "epoch": 0.8287623443537999, "grad_norm": 0.1923828125, "learning_rate": 0.0008735767364294434, "loss": 2.257, "step": 30883 }, { "epoch": 0.8287891799055389, "grad_norm": 0.1884765625, "learning_rate": 0.0008735517251745025, "loss": 2.2206, "step": 30884 }, { "epoch": 0.8288160154572778, "grad_norm": 0.18359375, "learning_rate": 0.0008735267133194696, "loss": 2.3039, "step": 30885 }, { "epoch": 0.8288428510090168, "grad_norm": 0.20703125, "learning_rate": 0.0008735017008643996, "loss": 2.2306, "step": 30886 }, { "epoch": 0.8288696865607557, "grad_norm": 0.185546875, "learning_rate": 0.0008734766878093471, "loss": 2.2275, "step": 30887 }, { "epoch": 0.8288965221124946, "grad_norm": 0.1796875, "learning_rate": 0.0008734516741543673, "loss": 2.2381, "step": 30888 }, { "epoch": 0.8289233576642335, "grad_norm": 0.19140625, "learning_rate": 0.0008734266598995151, "loss": 2.2891, "step": 30889 }, { "epoch": 0.8289501932159725, "grad_norm": 0.1845703125, "learning_rate": 0.0008734016450448451, "loss": 2.3175, "step": 30890 }, { "epoch": 0.8289770287677115, "grad_norm": 0.1865234375, "learning_rate": 0.0008733766295904124, "loss": 2.2463, "step": 30891 }, { "epoch": 0.8290038643194504, "grad_norm": 0.1845703125, "learning_rate": 0.0008733516135362719, "loss": 2.2325, "step": 30892 }, { "epoch": 0.8290306998711894, "grad_norm": 0.18359375, "learning_rate": 0.0008733265968824782, "loss": 2.3135, "step": 30893 }, { "epoch": 0.8290575354229283, "grad_norm": 0.1845703125, "learning_rate": 0.0008733015796290864, "loss": 2.1665, "step": 30894 }, { "epoch": 0.8290843709746673, "grad_norm": 0.1845703125, "learning_rate": 0.0008732765617761514, "loss": 2.2588, "step": 30895 }, { "epoch": 0.8291112065264061, "grad_norm": 0.1826171875, "learning_rate": 0.000873251543323728, "loss": 2.2317, "step": 30896 }, { "epoch": 0.8291380420781451, "grad_norm": 0.1806640625, "learning_rate": 0.0008732265242718711, "loss": 2.2262, "step": 30897 }, { "epoch": 0.8291648776298841, "grad_norm": 0.18359375, "learning_rate": 0.0008732015046206356, "loss": 2.2085, "step": 30898 }, { "epoch": 0.829191713181623, "grad_norm": 0.18359375, "learning_rate": 0.0008731764843700764, "loss": 2.2686, "step": 30899 }, { "epoch": 0.829218548733362, "grad_norm": 0.1826171875, "learning_rate": 0.0008731514635202483, "loss": 2.2577, "step": 30900 }, { "epoch": 0.8292453842851009, "grad_norm": 0.1826171875, "learning_rate": 0.0008731264420712062, "loss": 2.2684, "step": 30901 }, { "epoch": 0.8292722198368399, "grad_norm": 0.18359375, "learning_rate": 0.0008731014200230051, "loss": 2.2157, "step": 30902 }, { "epoch": 0.8292990553885788, "grad_norm": 0.1845703125, "learning_rate": 0.0008730763973757, "loss": 2.3369, "step": 30903 }, { "epoch": 0.8293258909403177, "grad_norm": 0.1923828125, "learning_rate": 0.0008730513741293456, "loss": 2.2614, "step": 30904 }, { "epoch": 0.8293527264920567, "grad_norm": 0.1845703125, "learning_rate": 0.0008730263502839966, "loss": 2.2694, "step": 30905 }, { "epoch": 0.8293795620437956, "grad_norm": 0.1865234375, "learning_rate": 0.0008730013258397082, "loss": 2.2957, "step": 30906 }, { "epoch": 0.8294063975955346, "grad_norm": 0.1884765625, "learning_rate": 0.0008729763007965354, "loss": 2.267, "step": 30907 }, { "epoch": 0.8294332331472735, "grad_norm": 0.1875, "learning_rate": 0.0008729512751545326, "loss": 2.2434, "step": 30908 }, { "epoch": 0.8294600686990125, "grad_norm": 0.18359375, "learning_rate": 0.0008729262489137553, "loss": 2.2597, "step": 30909 }, { "epoch": 0.8294869042507514, "grad_norm": 0.1806640625, "learning_rate": 0.0008729012220742578, "loss": 2.2318, "step": 30910 }, { "epoch": 0.8295137398024903, "grad_norm": 0.1826171875, "learning_rate": 0.0008728761946360955, "loss": 2.152, "step": 30911 }, { "epoch": 0.8295405753542293, "grad_norm": 0.185546875, "learning_rate": 0.0008728511665993231, "loss": 2.2678, "step": 30912 }, { "epoch": 0.8295674109059682, "grad_norm": 0.185546875, "learning_rate": 0.0008728261379639954, "loss": 2.2971, "step": 30913 }, { "epoch": 0.8295942464577072, "grad_norm": 0.1796875, "learning_rate": 0.0008728011087301675, "loss": 2.24, "step": 30914 }, { "epoch": 0.8296210820094461, "grad_norm": 0.185546875, "learning_rate": 0.0008727760788978943, "loss": 2.2644, "step": 30915 }, { "epoch": 0.829647917561185, "grad_norm": 0.185546875, "learning_rate": 0.0008727510484672304, "loss": 2.2388, "step": 30916 }, { "epoch": 0.829674753112924, "grad_norm": 0.185546875, "learning_rate": 0.0008727260174382313, "loss": 2.208, "step": 30917 }, { "epoch": 0.8297015886646629, "grad_norm": 0.1845703125, "learning_rate": 0.0008727009858109514, "loss": 2.26, "step": 30918 }, { "epoch": 0.8297284242164019, "grad_norm": 0.181640625, "learning_rate": 0.0008726759535854457, "loss": 2.1673, "step": 30919 }, { "epoch": 0.8297552597681408, "grad_norm": 0.19140625, "learning_rate": 0.000872650920761769, "loss": 2.2502, "step": 30920 }, { "epoch": 0.8297820953198798, "grad_norm": 0.18359375, "learning_rate": 0.0008726258873399767, "loss": 2.2153, "step": 30921 }, { "epoch": 0.8298089308716187, "grad_norm": 0.1806640625, "learning_rate": 0.0008726008533201234, "loss": 2.2776, "step": 30922 }, { "epoch": 0.8298357664233577, "grad_norm": 0.185546875, "learning_rate": 0.0008725758187022639, "loss": 2.3489, "step": 30923 }, { "epoch": 0.8298626019750966, "grad_norm": 0.1845703125, "learning_rate": 0.0008725507834864533, "loss": 2.2592, "step": 30924 }, { "epoch": 0.8298894375268355, "grad_norm": 0.18359375, "learning_rate": 0.0008725257476727464, "loss": 2.2271, "step": 30925 }, { "epoch": 0.8299162730785745, "grad_norm": 0.185546875, "learning_rate": 0.0008725007112611984, "loss": 2.2494, "step": 30926 }, { "epoch": 0.8299431086303134, "grad_norm": 0.1826171875, "learning_rate": 0.0008724756742518638, "loss": 2.3405, "step": 30927 }, { "epoch": 0.8299699441820524, "grad_norm": 0.19140625, "learning_rate": 0.0008724506366447977, "loss": 2.2508, "step": 30928 }, { "epoch": 0.8299967797337914, "grad_norm": 0.1875, "learning_rate": 0.0008724255984400551, "loss": 2.2251, "step": 30929 }, { "epoch": 0.8300236152855303, "grad_norm": 0.18359375, "learning_rate": 0.0008724005596376909, "loss": 2.2065, "step": 30930 }, { "epoch": 0.8300504508372692, "grad_norm": 0.1865234375, "learning_rate": 0.0008723755202377601, "loss": 2.3164, "step": 30931 }, { "epoch": 0.8300772863890081, "grad_norm": 0.1806640625, "learning_rate": 0.0008723504802403176, "loss": 2.2317, "step": 30932 }, { "epoch": 0.8301041219407471, "grad_norm": 0.1884765625, "learning_rate": 0.0008723254396454181, "loss": 2.2384, "step": 30933 }, { "epoch": 0.830130957492486, "grad_norm": 0.18359375, "learning_rate": 0.0008723003984531169, "loss": 2.2061, "step": 30934 }, { "epoch": 0.830157793044225, "grad_norm": 0.1865234375, "learning_rate": 0.0008722753566634686, "loss": 2.2965, "step": 30935 }, { "epoch": 0.830184628595964, "grad_norm": 0.1845703125, "learning_rate": 0.0008722503142765281, "loss": 2.2311, "step": 30936 }, { "epoch": 0.8302114641477029, "grad_norm": 0.181640625, "learning_rate": 0.0008722252712923508, "loss": 2.2722, "step": 30937 }, { "epoch": 0.8302382996994419, "grad_norm": 0.181640625, "learning_rate": 0.0008722002277109912, "loss": 2.2167, "step": 30938 }, { "epoch": 0.8302651352511807, "grad_norm": 0.18359375, "learning_rate": 0.0008721751835325044, "loss": 2.2873, "step": 30939 }, { "epoch": 0.8302919708029197, "grad_norm": 0.181640625, "learning_rate": 0.0008721501387569454, "loss": 2.241, "step": 30940 }, { "epoch": 0.8303188063546586, "grad_norm": 0.1826171875, "learning_rate": 0.000872125093384369, "loss": 2.2846, "step": 30941 }, { "epoch": 0.8303456419063976, "grad_norm": 0.18359375, "learning_rate": 0.0008721000474148301, "loss": 2.2497, "step": 30942 }, { "epoch": 0.8303724774581366, "grad_norm": 0.1796875, "learning_rate": 0.0008720750008483839, "loss": 2.2095, "step": 30943 }, { "epoch": 0.8303993130098755, "grad_norm": 0.18359375, "learning_rate": 0.0008720499536850851, "loss": 2.1823, "step": 30944 }, { "epoch": 0.8304261485616145, "grad_norm": 0.1845703125, "learning_rate": 0.0008720249059249889, "loss": 2.3015, "step": 30945 }, { "epoch": 0.8304529841133533, "grad_norm": 0.185546875, "learning_rate": 0.0008719998575681499, "loss": 2.2739, "step": 30946 }, { "epoch": 0.8304798196650923, "grad_norm": 0.1845703125, "learning_rate": 0.0008719748086146233, "loss": 2.3028, "step": 30947 }, { "epoch": 0.8305066552168313, "grad_norm": 0.1845703125, "learning_rate": 0.000871949759064464, "loss": 2.2852, "step": 30948 }, { "epoch": 0.8305334907685702, "grad_norm": 0.1826171875, "learning_rate": 0.0008719247089177269, "loss": 2.2407, "step": 30949 }, { "epoch": 0.8305603263203092, "grad_norm": 0.1884765625, "learning_rate": 0.0008718996581744671, "loss": 2.1949, "step": 30950 }, { "epoch": 0.8305871618720481, "grad_norm": 0.1875, "learning_rate": 0.0008718746068347393, "loss": 2.314, "step": 30951 }, { "epoch": 0.830613997423787, "grad_norm": 0.1806640625, "learning_rate": 0.0008718495548985987, "loss": 2.2168, "step": 30952 }, { "epoch": 0.8306408329755259, "grad_norm": 0.18359375, "learning_rate": 0.0008718245023661, "loss": 2.2045, "step": 30953 }, { "epoch": 0.8306676685272649, "grad_norm": 0.18359375, "learning_rate": 0.0008717994492372987, "loss": 2.2301, "step": 30954 }, { "epoch": 0.8306945040790039, "grad_norm": 0.1865234375, "learning_rate": 0.000871774395512249, "loss": 2.3084, "step": 30955 }, { "epoch": 0.8307213396307428, "grad_norm": 0.1806640625, "learning_rate": 0.0008717493411910062, "loss": 2.1873, "step": 30956 }, { "epoch": 0.8307481751824818, "grad_norm": 0.185546875, "learning_rate": 0.0008717242862736255, "loss": 2.1428, "step": 30957 }, { "epoch": 0.8307750107342207, "grad_norm": 0.18359375, "learning_rate": 0.0008716992307601616, "loss": 2.226, "step": 30958 }, { "epoch": 0.8308018462859597, "grad_norm": 0.189453125, "learning_rate": 0.0008716741746506696, "loss": 2.2852, "step": 30959 }, { "epoch": 0.8308286818376985, "grad_norm": 0.181640625, "learning_rate": 0.0008716491179452043, "loss": 2.2426, "step": 30960 }, { "epoch": 0.8308555173894375, "grad_norm": 0.185546875, "learning_rate": 0.0008716240606438208, "loss": 2.3155, "step": 30961 }, { "epoch": 0.8308823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.0008715990027465738, "loss": 2.2099, "step": 30962 }, { "epoch": 0.8309091884929154, "grad_norm": 0.1845703125, "learning_rate": 0.000871573944253519, "loss": 2.2254, "step": 30963 }, { "epoch": 0.8309360240446544, "grad_norm": 0.185546875, "learning_rate": 0.0008715488851647103, "loss": 2.3443, "step": 30964 }, { "epoch": 0.8309628595963933, "grad_norm": 0.1826171875, "learning_rate": 0.0008715238254802035, "loss": 2.2502, "step": 30965 }, { "epoch": 0.8309896951481323, "grad_norm": 0.1865234375, "learning_rate": 0.0008714987652000535, "loss": 2.1974, "step": 30966 }, { "epoch": 0.8310165306998711, "grad_norm": 0.1865234375, "learning_rate": 0.0008714737043243147, "loss": 2.2969, "step": 30967 }, { "epoch": 0.8310433662516101, "grad_norm": 0.181640625, "learning_rate": 0.0008714486428530425, "loss": 2.2444, "step": 30968 }, { "epoch": 0.8310702018033491, "grad_norm": 0.1875, "learning_rate": 0.0008714235807862923, "loss": 2.2565, "step": 30969 }, { "epoch": 0.831097037355088, "grad_norm": 0.1845703125, "learning_rate": 0.0008713985181241182, "loss": 2.2522, "step": 30970 }, { "epoch": 0.831123872906827, "grad_norm": 0.185546875, "learning_rate": 0.0008713734548665756, "loss": 2.2746, "step": 30971 }, { "epoch": 0.8311507084585659, "grad_norm": 0.1875, "learning_rate": 0.0008713483910137196, "loss": 2.2846, "step": 30972 }, { "epoch": 0.8311775440103049, "grad_norm": 0.1826171875, "learning_rate": 0.000871323326565605, "loss": 2.2641, "step": 30973 }, { "epoch": 0.8312043795620438, "grad_norm": 0.1884765625, "learning_rate": 0.0008712982615222869, "loss": 2.257, "step": 30974 }, { "epoch": 0.8312312151137827, "grad_norm": 0.1865234375, "learning_rate": 0.0008712731958838203, "loss": 2.2816, "step": 30975 }, { "epoch": 0.8312580506655217, "grad_norm": 0.18359375, "learning_rate": 0.0008712481296502601, "loss": 2.1742, "step": 30976 }, { "epoch": 0.8312848862172606, "grad_norm": 0.189453125, "learning_rate": 0.0008712230628216611, "loss": 2.21, "step": 30977 }, { "epoch": 0.8313117217689996, "grad_norm": 0.181640625, "learning_rate": 0.0008711979953980786, "loss": 2.2451, "step": 30978 }, { "epoch": 0.8313385573207385, "grad_norm": 0.1845703125, "learning_rate": 0.0008711729273795677, "loss": 2.2634, "step": 30979 }, { "epoch": 0.8313653928724775, "grad_norm": 0.185546875, "learning_rate": 0.0008711478587661829, "loss": 2.2984, "step": 30980 }, { "epoch": 0.8313922284242165, "grad_norm": 0.1943359375, "learning_rate": 0.0008711227895579794, "loss": 2.2867, "step": 30981 }, { "epoch": 0.8314190639759553, "grad_norm": 0.181640625, "learning_rate": 0.0008710977197550126, "loss": 2.2625, "step": 30982 }, { "epoch": 0.8314458995276943, "grad_norm": 0.1875, "learning_rate": 0.0008710726493573371, "loss": 2.2288, "step": 30983 }, { "epoch": 0.8314727350794332, "grad_norm": 0.185546875, "learning_rate": 0.0008710475783650077, "loss": 2.2782, "step": 30984 }, { "epoch": 0.8314995706311722, "grad_norm": 0.18359375, "learning_rate": 0.0008710225067780798, "loss": 2.1998, "step": 30985 }, { "epoch": 0.8315264061829111, "grad_norm": 0.18359375, "learning_rate": 0.0008709974345966084, "loss": 2.2549, "step": 30986 }, { "epoch": 0.8315532417346501, "grad_norm": 0.1923828125, "learning_rate": 0.0008709723618206481, "loss": 2.2719, "step": 30987 }, { "epoch": 0.831580077286389, "grad_norm": 0.1845703125, "learning_rate": 0.0008709472884502543, "loss": 2.2573, "step": 30988 }, { "epoch": 0.8316069128381279, "grad_norm": 0.1875, "learning_rate": 0.0008709222144854817, "loss": 2.2431, "step": 30989 }, { "epoch": 0.8316337483898669, "grad_norm": 0.1943359375, "learning_rate": 0.0008708971399263858, "loss": 2.3075, "step": 30990 }, { "epoch": 0.8316605839416058, "grad_norm": 0.1806640625, "learning_rate": 0.0008708720647730211, "loss": 2.2284, "step": 30991 }, { "epoch": 0.8316874194933448, "grad_norm": 0.181640625, "learning_rate": 0.0008708469890254426, "loss": 2.2269, "step": 30992 }, { "epoch": 0.8317142550450837, "grad_norm": 0.1826171875, "learning_rate": 0.0008708219126837057, "loss": 2.2924, "step": 30993 }, { "epoch": 0.8317410905968227, "grad_norm": 0.181640625, "learning_rate": 0.0008707968357478652, "loss": 2.2858, "step": 30994 }, { "epoch": 0.8317679261485617, "grad_norm": 0.18359375, "learning_rate": 0.000870771758217976, "loss": 2.2249, "step": 30995 }, { "epoch": 0.8317947617003005, "grad_norm": 0.1826171875, "learning_rate": 0.0008707466800940934, "loss": 2.2315, "step": 30996 }, { "epoch": 0.8318215972520395, "grad_norm": 0.185546875, "learning_rate": 0.0008707216013762721, "loss": 2.308, "step": 30997 }, { "epoch": 0.8318484328037784, "grad_norm": 0.185546875, "learning_rate": 0.0008706965220645674, "loss": 2.2939, "step": 30998 }, { "epoch": 0.8318752683555174, "grad_norm": 0.181640625, "learning_rate": 0.0008706714421590339, "loss": 2.2206, "step": 30999 }, { "epoch": 0.8319021039072564, "grad_norm": 0.181640625, "learning_rate": 0.000870646361659727, "loss": 2.3207, "step": 31000 }, { "epoch": 0.8319289394589953, "grad_norm": 0.1865234375, "learning_rate": 0.0008706212805667018, "loss": 2.2793, "step": 31001 }, { "epoch": 0.8319557750107343, "grad_norm": 0.181640625, "learning_rate": 0.0008705961988800129, "loss": 2.2335, "step": 31002 }, { "epoch": 0.8319826105624731, "grad_norm": 0.18359375, "learning_rate": 0.0008705711165997157, "loss": 2.2406, "step": 31003 }, { "epoch": 0.8320094461142121, "grad_norm": 0.185546875, "learning_rate": 0.0008705460337258649, "loss": 2.3474, "step": 31004 }, { "epoch": 0.832036281665951, "grad_norm": 0.1845703125, "learning_rate": 0.000870520950258516, "loss": 2.2588, "step": 31005 }, { "epoch": 0.83206311721769, "grad_norm": 0.18359375, "learning_rate": 0.0008704958661977233, "loss": 2.2234, "step": 31006 }, { "epoch": 0.832089952769429, "grad_norm": 0.1845703125, "learning_rate": 0.0008704707815435426, "loss": 2.3097, "step": 31007 }, { "epoch": 0.8321167883211679, "grad_norm": 0.1884765625, "learning_rate": 0.0008704456962960285, "loss": 2.2095, "step": 31008 }, { "epoch": 0.8321436238729069, "grad_norm": 0.18359375, "learning_rate": 0.0008704206104552361, "loss": 2.2079, "step": 31009 }, { "epoch": 0.8321704594246457, "grad_norm": 0.1884765625, "learning_rate": 0.0008703955240212203, "loss": 2.3576, "step": 31010 }, { "epoch": 0.8321972949763847, "grad_norm": 0.1845703125, "learning_rate": 0.0008703704369940365, "loss": 2.2052, "step": 31011 }, { "epoch": 0.8322241305281236, "grad_norm": 0.1845703125, "learning_rate": 0.0008703453493737393, "loss": 2.2597, "step": 31012 }, { "epoch": 0.8322509660798626, "grad_norm": 0.1865234375, "learning_rate": 0.000870320261160384, "loss": 2.2834, "step": 31013 }, { "epoch": 0.8322778016316016, "grad_norm": 0.1845703125, "learning_rate": 0.0008702951723540256, "loss": 2.2787, "step": 31014 }, { "epoch": 0.8323046371833405, "grad_norm": 0.1806640625, "learning_rate": 0.0008702700829547191, "loss": 2.1958, "step": 31015 }, { "epoch": 0.8323314727350795, "grad_norm": 0.18359375, "learning_rate": 0.0008702449929625197, "loss": 2.1895, "step": 31016 }, { "epoch": 0.8323583082868183, "grad_norm": 0.1826171875, "learning_rate": 0.0008702199023774821, "loss": 2.2448, "step": 31017 }, { "epoch": 0.8323851438385573, "grad_norm": 0.1826171875, "learning_rate": 0.0008701948111996615, "loss": 2.2676, "step": 31018 }, { "epoch": 0.8324119793902963, "grad_norm": 0.1826171875, "learning_rate": 0.000870169719429113, "loss": 2.2679, "step": 31019 }, { "epoch": 0.8324388149420352, "grad_norm": 0.185546875, "learning_rate": 0.0008701446270658918, "loss": 2.2594, "step": 31020 }, { "epoch": 0.8324656504937742, "grad_norm": 0.185546875, "learning_rate": 0.0008701195341100526, "loss": 2.295, "step": 31021 }, { "epoch": 0.8324924860455131, "grad_norm": 0.1904296875, "learning_rate": 0.0008700944405616507, "loss": 2.2775, "step": 31022 }, { "epoch": 0.8325193215972521, "grad_norm": 0.1796875, "learning_rate": 0.0008700693464207411, "loss": 2.2257, "step": 31023 }, { "epoch": 0.8325461571489909, "grad_norm": 0.185546875, "learning_rate": 0.0008700442516873788, "loss": 2.2849, "step": 31024 }, { "epoch": 0.8325729927007299, "grad_norm": 0.1884765625, "learning_rate": 0.0008700191563616187, "loss": 2.2289, "step": 31025 }, { "epoch": 0.8325998282524689, "grad_norm": 0.1826171875, "learning_rate": 0.0008699940604435161, "loss": 2.2485, "step": 31026 }, { "epoch": 0.8326266638042078, "grad_norm": 0.1845703125, "learning_rate": 0.0008699689639331261, "loss": 2.2391, "step": 31027 }, { "epoch": 0.8326534993559468, "grad_norm": 0.1826171875, "learning_rate": 0.0008699438668305033, "loss": 2.1892, "step": 31028 }, { "epoch": 0.8326803349076857, "grad_norm": 0.18359375, "learning_rate": 0.0008699187691357033, "loss": 2.2562, "step": 31029 }, { "epoch": 0.8327071704594247, "grad_norm": 0.18359375, "learning_rate": 0.0008698936708487809, "loss": 2.3201, "step": 31030 }, { "epoch": 0.8327340060111635, "grad_norm": 0.1845703125, "learning_rate": 0.0008698685719697912, "loss": 2.2795, "step": 31031 }, { "epoch": 0.8327608415629025, "grad_norm": 0.185546875, "learning_rate": 0.0008698434724987893, "loss": 2.1288, "step": 31032 }, { "epoch": 0.8327876771146415, "grad_norm": 0.1826171875, "learning_rate": 0.0008698183724358301, "loss": 2.1813, "step": 31033 }, { "epoch": 0.8328145126663804, "grad_norm": 0.181640625, "learning_rate": 0.0008697932717809688, "loss": 2.2551, "step": 31034 }, { "epoch": 0.8328413482181194, "grad_norm": 0.18359375, "learning_rate": 0.0008697681705342606, "loss": 2.2931, "step": 31035 }, { "epoch": 0.8328681837698583, "grad_norm": 0.1865234375, "learning_rate": 0.0008697430686957602, "loss": 2.3089, "step": 31036 }, { "epoch": 0.8328950193215973, "grad_norm": 0.1826171875, "learning_rate": 0.000869717966265523, "loss": 2.2265, "step": 31037 }, { "epoch": 0.8329218548733361, "grad_norm": 0.1884765625, "learning_rate": 0.0008696928632436038, "loss": 2.229, "step": 31038 }, { "epoch": 0.8329486904250751, "grad_norm": 0.18359375, "learning_rate": 0.0008696677596300579, "loss": 2.2205, "step": 31039 }, { "epoch": 0.8329755259768141, "grad_norm": 0.1875, "learning_rate": 0.0008696426554249401, "loss": 2.2997, "step": 31040 }, { "epoch": 0.833002361528553, "grad_norm": 0.18359375, "learning_rate": 0.0008696175506283059, "loss": 2.2706, "step": 31041 }, { "epoch": 0.833029197080292, "grad_norm": 0.1826171875, "learning_rate": 0.0008695924452402099, "loss": 2.1995, "step": 31042 }, { "epoch": 0.8330560326320309, "grad_norm": 0.18359375, "learning_rate": 0.0008695673392607075, "loss": 2.282, "step": 31043 }, { "epoch": 0.8330828681837699, "grad_norm": 0.1826171875, "learning_rate": 0.0008695422326898538, "loss": 2.2265, "step": 31044 }, { "epoch": 0.8331097037355089, "grad_norm": 0.181640625, "learning_rate": 0.0008695171255277035, "loss": 2.2719, "step": 31045 }, { "epoch": 0.8331365392872477, "grad_norm": 0.18359375, "learning_rate": 0.0008694920177743121, "loss": 2.2966, "step": 31046 }, { "epoch": 0.8331633748389867, "grad_norm": 0.1875, "learning_rate": 0.0008694669094297343, "loss": 2.3472, "step": 31047 }, { "epoch": 0.8331902103907256, "grad_norm": 0.181640625, "learning_rate": 0.0008694418004940255, "loss": 2.2248, "step": 31048 }, { "epoch": 0.8332170459424646, "grad_norm": 0.18359375, "learning_rate": 0.0008694166909672407, "loss": 2.2291, "step": 31049 }, { "epoch": 0.8332438814942035, "grad_norm": 0.185546875, "learning_rate": 0.0008693915808494348, "loss": 2.2564, "step": 31050 }, { "epoch": 0.8332707170459425, "grad_norm": 0.1845703125, "learning_rate": 0.000869366470140663, "loss": 2.3374, "step": 31051 }, { "epoch": 0.8332975525976815, "grad_norm": 0.1875, "learning_rate": 0.0008693413588409807, "loss": 2.2732, "step": 31052 }, { "epoch": 0.8333243881494203, "grad_norm": 0.18359375, "learning_rate": 0.0008693162469504427, "loss": 2.1593, "step": 31053 }, { "epoch": 0.8333512237011593, "grad_norm": 0.1826171875, "learning_rate": 0.0008692911344691037, "loss": 2.211, "step": 31054 }, { "epoch": 0.8333780592528982, "grad_norm": 0.181640625, "learning_rate": 0.0008692660213970195, "loss": 2.2499, "step": 31055 }, { "epoch": 0.8334048948046372, "grad_norm": 0.18359375, "learning_rate": 0.0008692409077342448, "loss": 2.1829, "step": 31056 }, { "epoch": 0.8334317303563761, "grad_norm": 0.18359375, "learning_rate": 0.0008692157934808348, "loss": 2.2448, "step": 31057 }, { "epoch": 0.8334585659081151, "grad_norm": 0.18359375, "learning_rate": 0.0008691906786368444, "loss": 2.2503, "step": 31058 }, { "epoch": 0.8334854014598541, "grad_norm": 0.1826171875, "learning_rate": 0.0008691655632023289, "loss": 2.245, "step": 31059 }, { "epoch": 0.8335122370115929, "grad_norm": 0.1826171875, "learning_rate": 0.0008691404471773435, "loss": 2.1859, "step": 31060 }, { "epoch": 0.8335390725633319, "grad_norm": 0.181640625, "learning_rate": 0.000869115330561943, "loss": 2.2577, "step": 31061 }, { "epoch": 0.8335659081150708, "grad_norm": 0.1806640625, "learning_rate": 0.0008690902133561825, "loss": 2.2546, "step": 31062 }, { "epoch": 0.8335927436668098, "grad_norm": 0.1826171875, "learning_rate": 0.0008690650955601176, "loss": 2.2154, "step": 31063 }, { "epoch": 0.8336195792185487, "grad_norm": 0.1787109375, "learning_rate": 0.0008690399771738028, "loss": 2.1542, "step": 31064 }, { "epoch": 0.8336464147702877, "grad_norm": 0.1826171875, "learning_rate": 0.0008690148581972935, "loss": 2.2612, "step": 31065 }, { "epoch": 0.8336732503220267, "grad_norm": 0.1845703125, "learning_rate": 0.0008689897386306446, "loss": 2.273, "step": 31066 }, { "epoch": 0.8337000858737655, "grad_norm": 0.1845703125, "learning_rate": 0.0008689646184739117, "loss": 2.253, "step": 31067 }, { "epoch": 0.8337269214255045, "grad_norm": 0.185546875, "learning_rate": 0.0008689394977271492, "loss": 2.2464, "step": 31068 }, { "epoch": 0.8337537569772434, "grad_norm": 0.181640625, "learning_rate": 0.0008689143763904127, "loss": 2.2271, "step": 31069 }, { "epoch": 0.8337805925289824, "grad_norm": 0.1806640625, "learning_rate": 0.0008688892544637572, "loss": 2.1904, "step": 31070 }, { "epoch": 0.8338074280807214, "grad_norm": 0.1787109375, "learning_rate": 0.0008688641319472378, "loss": 2.2912, "step": 31071 }, { "epoch": 0.8338342636324603, "grad_norm": 0.1845703125, "learning_rate": 0.0008688390088409095, "loss": 2.2477, "step": 31072 }, { "epoch": 0.8338610991841993, "grad_norm": 0.1865234375, "learning_rate": 0.0008688138851448274, "loss": 2.3274, "step": 31073 }, { "epoch": 0.8338879347359381, "grad_norm": 0.1826171875, "learning_rate": 0.0008687887608590471, "loss": 2.2156, "step": 31074 }, { "epoch": 0.8339147702876771, "grad_norm": 0.18359375, "learning_rate": 0.0008687636359836231, "loss": 2.2388, "step": 31075 }, { "epoch": 0.833941605839416, "grad_norm": 0.185546875, "learning_rate": 0.0008687385105186108, "loss": 2.2294, "step": 31076 }, { "epoch": 0.833968441391155, "grad_norm": 0.185546875, "learning_rate": 0.0008687133844640651, "loss": 2.2336, "step": 31077 }, { "epoch": 0.833995276942894, "grad_norm": 0.1806640625, "learning_rate": 0.0008686882578200416, "loss": 2.2855, "step": 31078 }, { "epoch": 0.8340221124946329, "grad_norm": 0.1806640625, "learning_rate": 0.0008686631305865949, "loss": 2.1979, "step": 31079 }, { "epoch": 0.8340489480463719, "grad_norm": 0.185546875, "learning_rate": 0.0008686380027637803, "loss": 2.2298, "step": 31080 }, { "epoch": 0.8340757835981107, "grad_norm": 0.1875, "learning_rate": 0.0008686128743516532, "loss": 2.2935, "step": 31081 }, { "epoch": 0.8341026191498497, "grad_norm": 0.18359375, "learning_rate": 0.0008685877453502683, "loss": 2.2602, "step": 31082 }, { "epoch": 0.8341294547015886, "grad_norm": 0.1875, "learning_rate": 0.0008685626157596809, "loss": 2.2941, "step": 31083 }, { "epoch": 0.8341562902533276, "grad_norm": 0.1845703125, "learning_rate": 0.000868537485579946, "loss": 2.2549, "step": 31084 }, { "epoch": 0.8341831258050666, "grad_norm": 0.1826171875, "learning_rate": 0.0008685123548111193, "loss": 2.2752, "step": 31085 }, { "epoch": 0.8342099613568055, "grad_norm": 0.1787109375, "learning_rate": 0.0008684872234532552, "loss": 2.2208, "step": 31086 }, { "epoch": 0.8342367969085445, "grad_norm": 0.1826171875, "learning_rate": 0.000868462091506409, "loss": 2.2516, "step": 31087 }, { "epoch": 0.8342636324602833, "grad_norm": 0.1787109375, "learning_rate": 0.0008684369589706362, "loss": 2.2763, "step": 31088 }, { "epoch": 0.8342904680120223, "grad_norm": 0.1865234375, "learning_rate": 0.0008684118258459916, "loss": 2.3372, "step": 31089 }, { "epoch": 0.8343173035637613, "grad_norm": 0.1904296875, "learning_rate": 0.0008683866921325303, "loss": 2.3053, "step": 31090 }, { "epoch": 0.8343441391155002, "grad_norm": 0.181640625, "learning_rate": 0.0008683615578303077, "loss": 2.2061, "step": 31091 }, { "epoch": 0.8343709746672392, "grad_norm": 0.1875, "learning_rate": 0.0008683364229393788, "loss": 2.2921, "step": 31092 }, { "epoch": 0.8343978102189781, "grad_norm": 0.1875, "learning_rate": 0.0008683112874597987, "loss": 2.2423, "step": 31093 }, { "epoch": 0.8344246457707171, "grad_norm": 0.185546875, "learning_rate": 0.0008682861513916225, "loss": 2.2085, "step": 31094 }, { "epoch": 0.834451481322456, "grad_norm": 0.1806640625, "learning_rate": 0.0008682610147349055, "loss": 2.2443, "step": 31095 }, { "epoch": 0.8344783168741949, "grad_norm": 0.18359375, "learning_rate": 0.0008682358774897028, "loss": 2.2854, "step": 31096 }, { "epoch": 0.8345051524259339, "grad_norm": 0.1826171875, "learning_rate": 0.0008682107396560694, "loss": 2.2973, "step": 31097 }, { "epoch": 0.8345319879776728, "grad_norm": 0.18359375, "learning_rate": 0.0008681856012340604, "loss": 2.2724, "step": 31098 }, { "epoch": 0.8345588235294118, "grad_norm": 0.185546875, "learning_rate": 0.0008681604622237315, "loss": 2.2963, "step": 31099 }, { "epoch": 0.8345856590811507, "grad_norm": 0.1796875, "learning_rate": 0.0008681353226251372, "loss": 2.1691, "step": 31100 }, { "epoch": 0.8346124946328897, "grad_norm": 0.1826171875, "learning_rate": 0.0008681101824383327, "loss": 2.2264, "step": 31101 }, { "epoch": 0.8346393301846285, "grad_norm": 0.181640625, "learning_rate": 0.0008680850416633737, "loss": 2.2744, "step": 31102 }, { "epoch": 0.8346661657363675, "grad_norm": 0.18359375, "learning_rate": 0.0008680599003003149, "loss": 2.2931, "step": 31103 }, { "epoch": 0.8346930012881065, "grad_norm": 0.1845703125, "learning_rate": 0.0008680347583492114, "loss": 2.2178, "step": 31104 }, { "epoch": 0.8347198368398454, "grad_norm": 0.1796875, "learning_rate": 0.0008680096158101185, "loss": 2.2377, "step": 31105 }, { "epoch": 0.8347466723915844, "grad_norm": 0.1796875, "learning_rate": 0.0008679844726830915, "loss": 2.203, "step": 31106 }, { "epoch": 0.8347735079433233, "grad_norm": 0.1826171875, "learning_rate": 0.0008679593289681853, "loss": 2.2305, "step": 31107 }, { "epoch": 0.8348003434950623, "grad_norm": 0.185546875, "learning_rate": 0.0008679341846654552, "loss": 2.2155, "step": 31108 }, { "epoch": 0.8348271790468011, "grad_norm": 0.1826171875, "learning_rate": 0.0008679090397749565, "loss": 2.2256, "step": 31109 }, { "epoch": 0.8348540145985401, "grad_norm": 0.18359375, "learning_rate": 0.0008678838942967439, "loss": 2.3233, "step": 31110 }, { "epoch": 0.8348808501502791, "grad_norm": 0.185546875, "learning_rate": 0.000867858748230873, "loss": 2.2309, "step": 31111 }, { "epoch": 0.834907685702018, "grad_norm": 0.18359375, "learning_rate": 0.0008678336015773986, "loss": 2.2735, "step": 31112 }, { "epoch": 0.834934521253757, "grad_norm": 0.197265625, "learning_rate": 0.0008678084543363763, "loss": 2.2096, "step": 31113 }, { "epoch": 0.8349613568054959, "grad_norm": 0.1904296875, "learning_rate": 0.0008677833065078611, "loss": 2.3486, "step": 31114 }, { "epoch": 0.8349881923572349, "grad_norm": 0.185546875, "learning_rate": 0.000867758158091908, "loss": 2.2179, "step": 31115 }, { "epoch": 0.8350150279089739, "grad_norm": 0.1884765625, "learning_rate": 0.0008677330090885722, "loss": 2.2686, "step": 31116 }, { "epoch": 0.8350418634607127, "grad_norm": 0.1845703125, "learning_rate": 0.000867707859497909, "loss": 2.2536, "step": 31117 }, { "epoch": 0.8350686990124517, "grad_norm": 0.1806640625, "learning_rate": 0.0008676827093199736, "loss": 2.2133, "step": 31118 }, { "epoch": 0.8350955345641906, "grad_norm": 0.181640625, "learning_rate": 0.0008676575585548211, "loss": 2.2394, "step": 31119 }, { "epoch": 0.8351223701159296, "grad_norm": 0.189453125, "learning_rate": 0.0008676324072025066, "loss": 2.2759, "step": 31120 }, { "epoch": 0.8351492056676685, "grad_norm": 0.1865234375, "learning_rate": 0.0008676072552630852, "loss": 2.2246, "step": 31121 }, { "epoch": 0.8351760412194075, "grad_norm": 0.1806640625, "learning_rate": 0.0008675821027366125, "loss": 2.2152, "step": 31122 }, { "epoch": 0.8352028767711465, "grad_norm": 0.1845703125, "learning_rate": 0.0008675569496231432, "loss": 2.2581, "step": 31123 }, { "epoch": 0.8352297123228853, "grad_norm": 0.1826171875, "learning_rate": 0.0008675317959227325, "loss": 2.2591, "step": 31124 }, { "epoch": 0.8352565478746243, "grad_norm": 0.1865234375, "learning_rate": 0.0008675066416354362, "loss": 2.2019, "step": 31125 }, { "epoch": 0.8352833834263632, "grad_norm": 0.18359375, "learning_rate": 0.0008674814867613087, "loss": 2.2478, "step": 31126 }, { "epoch": 0.8353102189781022, "grad_norm": 0.1826171875, "learning_rate": 0.0008674563313004056, "loss": 2.2397, "step": 31127 }, { "epoch": 0.8353370545298411, "grad_norm": 0.1826171875, "learning_rate": 0.000867431175252782, "loss": 2.2308, "step": 31128 }, { "epoch": 0.8353638900815801, "grad_norm": 0.18359375, "learning_rate": 0.000867406018618493, "loss": 2.2595, "step": 31129 }, { "epoch": 0.8353907256333191, "grad_norm": 0.181640625, "learning_rate": 0.0008673808613975939, "loss": 2.2726, "step": 31130 }, { "epoch": 0.8354175611850579, "grad_norm": 0.1884765625, "learning_rate": 0.0008673557035901399, "loss": 2.4, "step": 31131 }, { "epoch": 0.8354443967367969, "grad_norm": 0.1845703125, "learning_rate": 0.0008673305451961862, "loss": 2.2021, "step": 31132 }, { "epoch": 0.8354712322885358, "grad_norm": 0.1806640625, "learning_rate": 0.0008673053862157878, "loss": 2.2648, "step": 31133 }, { "epoch": 0.8354980678402748, "grad_norm": 0.1826171875, "learning_rate": 0.0008672802266490001, "loss": 2.1957, "step": 31134 }, { "epoch": 0.8355249033920137, "grad_norm": 0.1806640625, "learning_rate": 0.0008672550664958782, "loss": 2.2008, "step": 31135 }, { "epoch": 0.8355517389437527, "grad_norm": 0.18359375, "learning_rate": 0.0008672299057564774, "loss": 2.1998, "step": 31136 }, { "epoch": 0.8355785744954917, "grad_norm": 0.1796875, "learning_rate": 0.0008672047444308527, "loss": 2.234, "step": 31137 }, { "epoch": 0.8356054100472305, "grad_norm": 0.1806640625, "learning_rate": 0.0008671795825190595, "loss": 2.2209, "step": 31138 }, { "epoch": 0.8356322455989695, "grad_norm": 0.1845703125, "learning_rate": 0.0008671544200211528, "loss": 2.254, "step": 31139 }, { "epoch": 0.8356590811507084, "grad_norm": 0.1826171875, "learning_rate": 0.0008671292569371879, "loss": 2.2338, "step": 31140 }, { "epoch": 0.8356859167024474, "grad_norm": 0.1806640625, "learning_rate": 0.0008671040932672201, "loss": 2.3253, "step": 31141 }, { "epoch": 0.8357127522541864, "grad_norm": 0.181640625, "learning_rate": 0.0008670789290113044, "loss": 2.2753, "step": 31142 }, { "epoch": 0.8357395878059253, "grad_norm": 0.1796875, "learning_rate": 0.0008670537641694961, "loss": 2.2901, "step": 31143 }, { "epoch": 0.8357664233576643, "grad_norm": 0.18359375, "learning_rate": 0.0008670285987418505, "loss": 2.2576, "step": 31144 }, { "epoch": 0.8357932589094031, "grad_norm": 0.1787109375, "learning_rate": 0.0008670034327284226, "loss": 2.1709, "step": 31145 }, { "epoch": 0.8358200944611421, "grad_norm": 0.1806640625, "learning_rate": 0.0008669782661292677, "loss": 2.2556, "step": 31146 }, { "epoch": 0.835846930012881, "grad_norm": 0.185546875, "learning_rate": 0.0008669530989444413, "loss": 2.2424, "step": 31147 }, { "epoch": 0.83587376556462, "grad_norm": 0.1845703125, "learning_rate": 0.0008669279311739982, "loss": 2.2781, "step": 31148 }, { "epoch": 0.835900601116359, "grad_norm": 0.18359375, "learning_rate": 0.0008669027628179936, "loss": 2.2692, "step": 31149 }, { "epoch": 0.8359274366680979, "grad_norm": 0.18359375, "learning_rate": 0.000866877593876483, "loss": 2.3214, "step": 31150 }, { "epoch": 0.8359542722198369, "grad_norm": 0.1806640625, "learning_rate": 0.0008668524243495214, "loss": 2.2219, "step": 31151 }, { "epoch": 0.8359811077715757, "grad_norm": 0.18359375, "learning_rate": 0.0008668272542371643, "loss": 2.2002, "step": 31152 }, { "epoch": 0.8360079433233147, "grad_norm": 0.1806640625, "learning_rate": 0.0008668020835394664, "loss": 2.2424, "step": 31153 }, { "epoch": 0.8360347788750536, "grad_norm": 0.181640625, "learning_rate": 0.0008667769122564833, "loss": 2.2334, "step": 31154 }, { "epoch": 0.8360616144267926, "grad_norm": 0.18359375, "learning_rate": 0.0008667517403882704, "loss": 2.2662, "step": 31155 }, { "epoch": 0.8360884499785316, "grad_norm": 0.189453125, "learning_rate": 0.0008667265679348825, "loss": 2.2316, "step": 31156 }, { "epoch": 0.8361152855302705, "grad_norm": 0.1826171875, "learning_rate": 0.0008667013948963749, "loss": 2.2123, "step": 31157 }, { "epoch": 0.8361421210820095, "grad_norm": 0.1826171875, "learning_rate": 0.000866676221272803, "loss": 2.2659, "step": 31158 }, { "epoch": 0.8361689566337484, "grad_norm": 0.1796875, "learning_rate": 0.000866651047064222, "loss": 2.2293, "step": 31159 }, { "epoch": 0.8361957921854873, "grad_norm": 0.185546875, "learning_rate": 0.0008666258722706868, "loss": 2.2122, "step": 31160 }, { "epoch": 0.8362226277372263, "grad_norm": 0.1796875, "learning_rate": 0.0008666006968922531, "loss": 2.2824, "step": 31161 }, { "epoch": 0.8362494632889652, "grad_norm": 0.185546875, "learning_rate": 0.0008665755209289758, "loss": 2.2315, "step": 31162 }, { "epoch": 0.8362762988407042, "grad_norm": 0.1806640625, "learning_rate": 0.0008665503443809102, "loss": 2.2318, "step": 31163 }, { "epoch": 0.8363031343924431, "grad_norm": 0.18359375, "learning_rate": 0.0008665251672481117, "loss": 2.2041, "step": 31164 }, { "epoch": 0.8363299699441821, "grad_norm": 0.1845703125, "learning_rate": 0.0008664999895306351, "loss": 2.2513, "step": 31165 }, { "epoch": 0.836356805495921, "grad_norm": 0.181640625, "learning_rate": 0.0008664748112285363, "loss": 2.3009, "step": 31166 }, { "epoch": 0.8363836410476599, "grad_norm": 0.177734375, "learning_rate": 0.00086644963234187, "loss": 2.1454, "step": 31167 }, { "epoch": 0.8364104765993989, "grad_norm": 0.1826171875, "learning_rate": 0.0008664244528706917, "loss": 2.2374, "step": 31168 }, { "epoch": 0.8364373121511378, "grad_norm": 0.1806640625, "learning_rate": 0.0008663992728150565, "loss": 2.281, "step": 31169 }, { "epoch": 0.8364641477028768, "grad_norm": 0.1806640625, "learning_rate": 0.0008663740921750195, "loss": 2.2404, "step": 31170 }, { "epoch": 0.8364909832546157, "grad_norm": 0.1845703125, "learning_rate": 0.0008663489109506361, "loss": 2.323, "step": 31171 }, { "epoch": 0.8365178188063547, "grad_norm": 0.185546875, "learning_rate": 0.0008663237291419618, "loss": 2.2254, "step": 31172 }, { "epoch": 0.8365446543580936, "grad_norm": 0.1845703125, "learning_rate": 0.0008662985467490516, "loss": 2.2225, "step": 31173 }, { "epoch": 0.8365714899098325, "grad_norm": 0.181640625, "learning_rate": 0.0008662733637719604, "loss": 2.2645, "step": 31174 }, { "epoch": 0.8365983254615715, "grad_norm": 0.1904296875, "learning_rate": 0.0008662481802107439, "loss": 2.3324, "step": 31175 }, { "epoch": 0.8366251610133104, "grad_norm": 0.185546875, "learning_rate": 0.0008662229960654575, "loss": 2.2864, "step": 31176 }, { "epoch": 0.8366519965650494, "grad_norm": 0.1796875, "learning_rate": 0.0008661978113361559, "loss": 2.2319, "step": 31177 }, { "epoch": 0.8366788321167883, "grad_norm": 0.189453125, "learning_rate": 0.0008661726260228946, "loss": 2.2628, "step": 31178 }, { "epoch": 0.8367056676685273, "grad_norm": 0.18359375, "learning_rate": 0.000866147440125729, "loss": 2.2467, "step": 31179 }, { "epoch": 0.8367325032202662, "grad_norm": 0.18359375, "learning_rate": 0.0008661222536447143, "loss": 2.2213, "step": 31180 }, { "epoch": 0.8367593387720051, "grad_norm": 0.1826171875, "learning_rate": 0.0008660970665799055, "loss": 2.2622, "step": 31181 }, { "epoch": 0.8367861743237441, "grad_norm": 0.1845703125, "learning_rate": 0.000866071878931358, "loss": 2.2208, "step": 31182 }, { "epoch": 0.836813009875483, "grad_norm": 0.1826171875, "learning_rate": 0.0008660466906991272, "loss": 2.2321, "step": 31183 }, { "epoch": 0.836839845427222, "grad_norm": 0.18359375, "learning_rate": 0.0008660215018832683, "loss": 2.1998, "step": 31184 }, { "epoch": 0.8368666809789609, "grad_norm": 0.1845703125, "learning_rate": 0.0008659963124838362, "loss": 2.2169, "step": 31185 }, { "epoch": 0.8368935165306999, "grad_norm": 0.1806640625, "learning_rate": 0.0008659711225008866, "loss": 2.2945, "step": 31186 }, { "epoch": 0.8369203520824389, "grad_norm": 0.1826171875, "learning_rate": 0.0008659459319344748, "loss": 2.2578, "step": 31187 }, { "epoch": 0.8369471876341777, "grad_norm": 0.1875, "learning_rate": 0.0008659207407846556, "loss": 2.3192, "step": 31188 }, { "epoch": 0.8369740231859167, "grad_norm": 0.1865234375, "learning_rate": 0.0008658955490514846, "loss": 2.2658, "step": 31189 }, { "epoch": 0.8370008587376556, "grad_norm": 0.18359375, "learning_rate": 0.000865870356735017, "loss": 2.2694, "step": 31190 }, { "epoch": 0.8370276942893946, "grad_norm": 0.1904296875, "learning_rate": 0.0008658451638353081, "loss": 2.2694, "step": 31191 }, { "epoch": 0.8370545298411335, "grad_norm": 0.1806640625, "learning_rate": 0.000865819970352413, "loss": 2.2754, "step": 31192 }, { "epoch": 0.8370813653928725, "grad_norm": 0.185546875, "learning_rate": 0.0008657947762863871, "loss": 2.2367, "step": 31193 }, { "epoch": 0.8371082009446115, "grad_norm": 0.1826171875, "learning_rate": 0.0008657695816372859, "loss": 2.2532, "step": 31194 }, { "epoch": 0.8371350364963503, "grad_norm": 0.1826171875, "learning_rate": 0.0008657443864051643, "loss": 2.2153, "step": 31195 }, { "epoch": 0.8371618720480893, "grad_norm": 0.1875, "learning_rate": 0.0008657191905900775, "loss": 2.2645, "step": 31196 }, { "epoch": 0.8371887075998282, "grad_norm": 0.1884765625, "learning_rate": 0.0008656939941920813, "loss": 2.2429, "step": 31197 }, { "epoch": 0.8372155431515672, "grad_norm": 0.1865234375, "learning_rate": 0.0008656687972112305, "loss": 2.2677, "step": 31198 }, { "epoch": 0.8372423787033061, "grad_norm": 0.185546875, "learning_rate": 0.0008656435996475805, "loss": 2.219, "step": 31199 }, { "epoch": 0.8372692142550451, "grad_norm": 0.1826171875, "learning_rate": 0.0008656184015011865, "loss": 2.239, "step": 31200 }, { "epoch": 0.8372960498067841, "grad_norm": 0.189453125, "learning_rate": 0.0008655932027721041, "loss": 2.2274, "step": 31201 }, { "epoch": 0.837322885358523, "grad_norm": 0.1806640625, "learning_rate": 0.0008655680034603883, "loss": 2.2504, "step": 31202 }, { "epoch": 0.8373497209102619, "grad_norm": 0.1796875, "learning_rate": 0.0008655428035660944, "loss": 2.1935, "step": 31203 }, { "epoch": 0.8373765564620008, "grad_norm": 0.1865234375, "learning_rate": 0.0008655176030892775, "loss": 2.222, "step": 31204 }, { "epoch": 0.8374033920137398, "grad_norm": 0.1806640625, "learning_rate": 0.0008654924020299933, "loss": 2.2325, "step": 31205 }, { "epoch": 0.8374302275654787, "grad_norm": 0.1845703125, "learning_rate": 0.000865467200388297, "loss": 2.2272, "step": 31206 }, { "epoch": 0.8374570631172177, "grad_norm": 0.1865234375, "learning_rate": 0.0008654419981642434, "loss": 2.2221, "step": 31207 }, { "epoch": 0.8374838986689567, "grad_norm": 0.1884765625, "learning_rate": 0.0008654167953578884, "loss": 2.2666, "step": 31208 }, { "epoch": 0.8375107342206956, "grad_norm": 0.1826171875, "learning_rate": 0.0008653915919692871, "loss": 2.2204, "step": 31209 }, { "epoch": 0.8375375697724345, "grad_norm": 0.18359375, "learning_rate": 0.0008653663879984946, "loss": 2.2207, "step": 31210 }, { "epoch": 0.8375644053241734, "grad_norm": 0.1826171875, "learning_rate": 0.0008653411834455662, "loss": 2.2309, "step": 31211 }, { "epoch": 0.8375912408759124, "grad_norm": 0.1806640625, "learning_rate": 0.0008653159783105575, "loss": 2.2079, "step": 31212 }, { "epoch": 0.8376180764276514, "grad_norm": 0.181640625, "learning_rate": 0.0008652907725935237, "loss": 2.2755, "step": 31213 }, { "epoch": 0.8376449119793903, "grad_norm": 0.1826171875, "learning_rate": 0.0008652655662945197, "loss": 2.2288, "step": 31214 }, { "epoch": 0.8376717475311293, "grad_norm": 0.1845703125, "learning_rate": 0.0008652403594136013, "loss": 2.276, "step": 31215 }, { "epoch": 0.8376985830828682, "grad_norm": 0.1875, "learning_rate": 0.0008652151519508235, "loss": 2.2768, "step": 31216 }, { "epoch": 0.8377254186346071, "grad_norm": 0.1826171875, "learning_rate": 0.0008651899439062418, "loss": 2.2839, "step": 31217 }, { "epoch": 0.837752254186346, "grad_norm": 0.18359375, "learning_rate": 0.0008651647352799113, "loss": 2.245, "step": 31218 }, { "epoch": 0.837779089738085, "grad_norm": 0.1826171875, "learning_rate": 0.0008651395260718875, "loss": 2.2532, "step": 31219 }, { "epoch": 0.837805925289824, "grad_norm": 0.1806640625, "learning_rate": 0.0008651143162822253, "loss": 2.1825, "step": 31220 }, { "epoch": 0.8378327608415629, "grad_norm": 0.1796875, "learning_rate": 0.0008650891059109804, "loss": 2.281, "step": 31221 }, { "epoch": 0.8378595963933019, "grad_norm": 0.1806640625, "learning_rate": 0.000865063894958208, "loss": 2.1886, "step": 31222 }, { "epoch": 0.8378864319450408, "grad_norm": 0.1845703125, "learning_rate": 0.0008650386834239636, "loss": 2.3053, "step": 31223 }, { "epoch": 0.8379132674967797, "grad_norm": 0.1826171875, "learning_rate": 0.0008650134713083019, "loss": 2.2821, "step": 31224 }, { "epoch": 0.8379401030485186, "grad_norm": 0.1904296875, "learning_rate": 0.000864988258611279, "loss": 2.2441, "step": 31225 }, { "epoch": 0.8379669386002576, "grad_norm": 0.177734375, "learning_rate": 0.0008649630453329496, "loss": 2.1589, "step": 31226 }, { "epoch": 0.8379937741519966, "grad_norm": 0.185546875, "learning_rate": 0.0008649378314733694, "loss": 2.2394, "step": 31227 }, { "epoch": 0.8380206097037355, "grad_norm": 0.18359375, "learning_rate": 0.0008649126170325934, "loss": 2.1938, "step": 31228 }, { "epoch": 0.8380474452554745, "grad_norm": 0.1826171875, "learning_rate": 0.0008648874020106771, "loss": 2.2459, "step": 31229 }, { "epoch": 0.8380742808072134, "grad_norm": 0.1865234375, "learning_rate": 0.000864862186407676, "loss": 2.242, "step": 31230 }, { "epoch": 0.8381011163589523, "grad_norm": 0.177734375, "learning_rate": 0.0008648369702236448, "loss": 2.2389, "step": 31231 }, { "epoch": 0.8381279519106913, "grad_norm": 0.1865234375, "learning_rate": 0.0008648117534586395, "loss": 2.2345, "step": 31232 }, { "epoch": 0.8381547874624302, "grad_norm": 0.1806640625, "learning_rate": 0.0008647865361127148, "loss": 2.2688, "step": 31233 }, { "epoch": 0.8381816230141692, "grad_norm": 0.1826171875, "learning_rate": 0.0008647613181859266, "loss": 2.3263, "step": 31234 }, { "epoch": 0.8382084585659081, "grad_norm": 0.1826171875, "learning_rate": 0.0008647360996783298, "loss": 2.3141, "step": 31235 }, { "epoch": 0.8382352941176471, "grad_norm": 0.177734375, "learning_rate": 0.00086471088058998, "loss": 2.2484, "step": 31236 }, { "epoch": 0.838262129669386, "grad_norm": 0.1865234375, "learning_rate": 0.0008646856609209324, "loss": 2.2691, "step": 31237 }, { "epoch": 0.838288965221125, "grad_norm": 0.185546875, "learning_rate": 0.0008646604406712424, "loss": 2.3337, "step": 31238 }, { "epoch": 0.8383158007728639, "grad_norm": 0.1865234375, "learning_rate": 0.0008646352198409651, "loss": 2.2608, "step": 31239 }, { "epoch": 0.8383426363246028, "grad_norm": 0.181640625, "learning_rate": 0.0008646099984301562, "loss": 2.2565, "step": 31240 }, { "epoch": 0.8383694718763418, "grad_norm": 0.1796875, "learning_rate": 0.0008645847764388705, "loss": 2.3047, "step": 31241 }, { "epoch": 0.8383963074280807, "grad_norm": 0.1875, "learning_rate": 0.0008645595538671638, "loss": 2.2445, "step": 31242 }, { "epoch": 0.8384231429798197, "grad_norm": 0.1845703125, "learning_rate": 0.0008645343307150914, "loss": 2.2946, "step": 31243 }, { "epoch": 0.8384499785315586, "grad_norm": 0.181640625, "learning_rate": 0.0008645091069827082, "loss": 2.234, "step": 31244 }, { "epoch": 0.8384768140832976, "grad_norm": 0.1806640625, "learning_rate": 0.0008644838826700703, "loss": 2.2808, "step": 31245 }, { "epoch": 0.8385036496350365, "grad_norm": 0.181640625, "learning_rate": 0.0008644586577772322, "loss": 2.2775, "step": 31246 }, { "epoch": 0.8385304851867754, "grad_norm": 0.1748046875, "learning_rate": 0.0008644334323042496, "loss": 2.2043, "step": 31247 }, { "epoch": 0.8385573207385144, "grad_norm": 0.1826171875, "learning_rate": 0.000864408206251178, "loss": 2.18, "step": 31248 }, { "epoch": 0.8385841562902533, "grad_norm": 0.189453125, "learning_rate": 0.0008643829796180726, "loss": 2.3047, "step": 31249 }, { "epoch": 0.8386109918419923, "grad_norm": 0.1787109375, "learning_rate": 0.0008643577524049886, "loss": 2.18, "step": 31250 }, { "epoch": 0.8386378273937312, "grad_norm": 0.1796875, "learning_rate": 0.0008643325246119817, "loss": 2.2182, "step": 31251 }, { "epoch": 0.8386646629454702, "grad_norm": 0.1787109375, "learning_rate": 0.0008643072962391067, "loss": 2.2122, "step": 31252 }, { "epoch": 0.8386914984972091, "grad_norm": 0.181640625, "learning_rate": 0.0008642820672864196, "loss": 2.2515, "step": 31253 }, { "epoch": 0.838718334048948, "grad_norm": 0.1845703125, "learning_rate": 0.0008642568377539751, "loss": 2.264, "step": 31254 }, { "epoch": 0.838745169600687, "grad_norm": 0.1826171875, "learning_rate": 0.0008642316076418288, "loss": 2.208, "step": 31255 }, { "epoch": 0.8387720051524259, "grad_norm": 0.18359375, "learning_rate": 0.0008642063769500365, "loss": 2.2891, "step": 31256 }, { "epoch": 0.8387988407041649, "grad_norm": 0.1796875, "learning_rate": 0.0008641811456786527, "loss": 2.2299, "step": 31257 }, { "epoch": 0.8388256762559039, "grad_norm": 0.1845703125, "learning_rate": 0.0008641559138277333, "loss": 2.2494, "step": 31258 }, { "epoch": 0.8388525118076428, "grad_norm": 0.185546875, "learning_rate": 0.0008641306813973337, "loss": 2.2465, "step": 31259 }, { "epoch": 0.8388793473593817, "grad_norm": 0.181640625, "learning_rate": 0.0008641054483875091, "loss": 2.2885, "step": 31260 }, { "epoch": 0.8389061829111206, "grad_norm": 0.1796875, "learning_rate": 0.0008640802147983146, "loss": 2.2116, "step": 31261 }, { "epoch": 0.8389330184628596, "grad_norm": 0.1826171875, "learning_rate": 0.0008640549806298058, "loss": 2.2106, "step": 31262 }, { "epoch": 0.8389598540145985, "grad_norm": 0.1845703125, "learning_rate": 0.0008640297458820383, "loss": 2.2844, "step": 31263 }, { "epoch": 0.8389866895663375, "grad_norm": 0.1826171875, "learning_rate": 0.0008640045105550672, "loss": 2.2, "step": 31264 }, { "epoch": 0.8390135251180765, "grad_norm": 0.185546875, "learning_rate": 0.0008639792746489478, "loss": 2.2696, "step": 31265 }, { "epoch": 0.8390403606698154, "grad_norm": 0.1806640625, "learning_rate": 0.0008639540381637354, "loss": 2.1751, "step": 31266 }, { "epoch": 0.8390671962215543, "grad_norm": 0.1845703125, "learning_rate": 0.0008639288010994858, "loss": 2.2223, "step": 31267 }, { "epoch": 0.8390940317732932, "grad_norm": 0.181640625, "learning_rate": 0.0008639035634562538, "loss": 2.3276, "step": 31268 }, { "epoch": 0.8391208673250322, "grad_norm": 0.181640625, "learning_rate": 0.000863878325234095, "loss": 2.2193, "step": 31269 }, { "epoch": 0.8391477028767711, "grad_norm": 0.1845703125, "learning_rate": 0.000863853086433065, "loss": 2.281, "step": 31270 }, { "epoch": 0.8391745384285101, "grad_norm": 0.1826171875, "learning_rate": 0.000863827847053219, "loss": 2.165, "step": 31271 }, { "epoch": 0.8392013739802491, "grad_norm": 0.1826171875, "learning_rate": 0.000863802607094612, "loss": 2.24, "step": 31272 }, { "epoch": 0.839228209531988, "grad_norm": 0.1796875, "learning_rate": 0.0008637773665572998, "loss": 2.1956, "step": 31273 }, { "epoch": 0.839255045083727, "grad_norm": 0.185546875, "learning_rate": 0.000863752125441338, "loss": 2.2684, "step": 31274 }, { "epoch": 0.8392818806354658, "grad_norm": 0.1787109375, "learning_rate": 0.0008637268837467811, "loss": 2.2522, "step": 31275 }, { "epoch": 0.8393087161872048, "grad_norm": 0.1845703125, "learning_rate": 0.0008637016414736854, "loss": 2.2542, "step": 31276 }, { "epoch": 0.8393355517389438, "grad_norm": 0.1845703125, "learning_rate": 0.0008636763986221058, "loss": 2.2209, "step": 31277 }, { "epoch": 0.8393623872906827, "grad_norm": 0.1904296875, "learning_rate": 0.0008636511551920977, "loss": 2.2542, "step": 31278 }, { "epoch": 0.8393892228424217, "grad_norm": 0.189453125, "learning_rate": 0.0008636259111837164, "loss": 2.3173, "step": 31279 }, { "epoch": 0.8394160583941606, "grad_norm": 0.1796875, "learning_rate": 0.0008636006665970176, "loss": 2.2685, "step": 31280 }, { "epoch": 0.8394428939458995, "grad_norm": 0.1796875, "learning_rate": 0.0008635754214320565, "loss": 2.162, "step": 31281 }, { "epoch": 0.8394697294976384, "grad_norm": 0.1796875, "learning_rate": 0.0008635501756888884, "loss": 2.2332, "step": 31282 }, { "epoch": 0.8394965650493774, "grad_norm": 0.1826171875, "learning_rate": 0.0008635249293675688, "loss": 2.2698, "step": 31283 }, { "epoch": 0.8395234006011164, "grad_norm": 0.185546875, "learning_rate": 0.0008634996824681528, "loss": 2.2684, "step": 31284 }, { "epoch": 0.8395502361528553, "grad_norm": 0.1826171875, "learning_rate": 0.0008634744349906965, "loss": 2.1487, "step": 31285 }, { "epoch": 0.8395770717045943, "grad_norm": 0.18359375, "learning_rate": 0.0008634491869352544, "loss": 2.2555, "step": 31286 }, { "epoch": 0.8396039072563332, "grad_norm": 0.1806640625, "learning_rate": 0.0008634239383018825, "loss": 2.2178, "step": 31287 }, { "epoch": 0.8396307428080722, "grad_norm": 0.18359375, "learning_rate": 0.0008633986890906358, "loss": 2.3327, "step": 31288 }, { "epoch": 0.839657578359811, "grad_norm": 0.1865234375, "learning_rate": 0.00086337343930157, "loss": 2.3039, "step": 31289 }, { "epoch": 0.83968441391155, "grad_norm": 0.1826171875, "learning_rate": 0.0008633481889347403, "loss": 2.2659, "step": 31290 }, { "epoch": 0.839711249463289, "grad_norm": 0.181640625, "learning_rate": 0.000863322937990202, "loss": 2.2284, "step": 31291 }, { "epoch": 0.8397380850150279, "grad_norm": 0.185546875, "learning_rate": 0.0008632976864680111, "loss": 2.2803, "step": 31292 }, { "epoch": 0.8397649205667669, "grad_norm": 0.1806640625, "learning_rate": 0.0008632724343682223, "loss": 2.2662, "step": 31293 }, { "epoch": 0.8397917561185058, "grad_norm": 0.1845703125, "learning_rate": 0.000863247181690891, "loss": 2.2656, "step": 31294 }, { "epoch": 0.8398185916702448, "grad_norm": 0.1787109375, "learning_rate": 0.0008632219284360731, "loss": 2.3008, "step": 31295 }, { "epoch": 0.8398454272219836, "grad_norm": 0.1845703125, "learning_rate": 0.0008631966746038236, "loss": 2.2343, "step": 31296 }, { "epoch": 0.8398722627737226, "grad_norm": 0.181640625, "learning_rate": 0.000863171420194198, "loss": 2.2415, "step": 31297 }, { "epoch": 0.8398990983254616, "grad_norm": 0.1767578125, "learning_rate": 0.0008631461652072519, "loss": 2.2127, "step": 31298 }, { "epoch": 0.8399259338772005, "grad_norm": 0.177734375, "learning_rate": 0.0008631209096430405, "loss": 2.2064, "step": 31299 }, { "epoch": 0.8399527694289395, "grad_norm": 0.1865234375, "learning_rate": 0.0008630956535016192, "loss": 2.2632, "step": 31300 }, { "epoch": 0.8399796049806784, "grad_norm": 0.1845703125, "learning_rate": 0.0008630703967830434, "loss": 2.3327, "step": 31301 }, { "epoch": 0.8400064405324174, "grad_norm": 0.1796875, "learning_rate": 0.0008630451394873686, "loss": 2.2326, "step": 31302 }, { "epoch": 0.8400332760841563, "grad_norm": 0.1826171875, "learning_rate": 0.0008630198816146501, "loss": 2.2835, "step": 31303 }, { "epoch": 0.8400601116358952, "grad_norm": 0.1826171875, "learning_rate": 0.0008629946231649433, "loss": 2.2584, "step": 31304 }, { "epoch": 0.8400869471876342, "grad_norm": 0.18359375, "learning_rate": 0.0008629693641383037, "loss": 2.2323, "step": 31305 }, { "epoch": 0.8401137827393731, "grad_norm": 0.1826171875, "learning_rate": 0.0008629441045347869, "loss": 2.269, "step": 31306 }, { "epoch": 0.8401406182911121, "grad_norm": 0.1806640625, "learning_rate": 0.000862918844354448, "loss": 2.1955, "step": 31307 }, { "epoch": 0.840167453842851, "grad_norm": 0.1865234375, "learning_rate": 0.0008628935835973423, "loss": 2.3162, "step": 31308 }, { "epoch": 0.84019428939459, "grad_norm": 0.177734375, "learning_rate": 0.0008628683222635256, "loss": 2.2463, "step": 31309 }, { "epoch": 0.840221124946329, "grad_norm": 0.1865234375, "learning_rate": 0.0008628430603530531, "loss": 2.2581, "step": 31310 }, { "epoch": 0.8402479604980678, "grad_norm": 0.1865234375, "learning_rate": 0.0008628177978659802, "loss": 2.2926, "step": 31311 }, { "epoch": 0.8402747960498068, "grad_norm": 0.173828125, "learning_rate": 0.0008627925348023625, "loss": 2.19, "step": 31312 }, { "epoch": 0.8403016316015457, "grad_norm": 0.1845703125, "learning_rate": 0.0008627672711622553, "loss": 2.206, "step": 31313 }, { "epoch": 0.8403284671532847, "grad_norm": 0.18359375, "learning_rate": 0.0008627420069457138, "loss": 2.2451, "step": 31314 }, { "epoch": 0.8403553027050236, "grad_norm": 0.181640625, "learning_rate": 0.0008627167421527939, "loss": 2.326, "step": 31315 }, { "epoch": 0.8403821382567626, "grad_norm": 0.18359375, "learning_rate": 0.0008626914767835506, "loss": 2.2775, "step": 31316 }, { "epoch": 0.8404089738085015, "grad_norm": 0.1826171875, "learning_rate": 0.0008626662108380394, "loss": 2.3, "step": 31317 }, { "epoch": 0.8404358093602404, "grad_norm": 0.1826171875, "learning_rate": 0.0008626409443163161, "loss": 2.2088, "step": 31318 }, { "epoch": 0.8404626449119794, "grad_norm": 0.1865234375, "learning_rate": 0.0008626156772184355, "loss": 2.238, "step": 31319 }, { "epoch": 0.8404894804637183, "grad_norm": 0.189453125, "learning_rate": 0.0008625904095444536, "loss": 2.3231, "step": 31320 }, { "epoch": 0.8405163160154573, "grad_norm": 0.1875, "learning_rate": 0.0008625651412944254, "loss": 2.2726, "step": 31321 }, { "epoch": 0.8405431515671962, "grad_norm": 0.181640625, "learning_rate": 0.0008625398724684067, "loss": 2.2239, "step": 31322 }, { "epoch": 0.8405699871189352, "grad_norm": 0.1826171875, "learning_rate": 0.0008625146030664528, "loss": 2.2564, "step": 31323 }, { "epoch": 0.8405968226706741, "grad_norm": 0.181640625, "learning_rate": 0.0008624893330886191, "loss": 2.2723, "step": 31324 }, { "epoch": 0.840623658222413, "grad_norm": 0.181640625, "learning_rate": 0.0008624640625349609, "loss": 2.2333, "step": 31325 }, { "epoch": 0.840650493774152, "grad_norm": 0.1796875, "learning_rate": 0.0008624387914055336, "loss": 2.3184, "step": 31326 }, { "epoch": 0.8406773293258909, "grad_norm": 0.1865234375, "learning_rate": 0.0008624135197003931, "loss": 2.2442, "step": 31327 }, { "epoch": 0.8407041648776299, "grad_norm": 0.1845703125, "learning_rate": 0.0008623882474195944, "loss": 2.2087, "step": 31328 }, { "epoch": 0.8407310004293689, "grad_norm": 0.18359375, "learning_rate": 0.0008623629745631933, "loss": 2.2316, "step": 31329 }, { "epoch": 0.8407578359811078, "grad_norm": 0.177734375, "learning_rate": 0.0008623377011312449, "loss": 2.1807, "step": 31330 }, { "epoch": 0.8407846715328468, "grad_norm": 0.1787109375, "learning_rate": 0.0008623124271238046, "loss": 2.235, "step": 31331 }, { "epoch": 0.8408115070845856, "grad_norm": 0.1845703125, "learning_rate": 0.0008622871525409281, "loss": 2.2553, "step": 31332 }, { "epoch": 0.8408383426363246, "grad_norm": 0.185546875, "learning_rate": 0.0008622618773826708, "loss": 2.2545, "step": 31333 }, { "epoch": 0.8408651781880635, "grad_norm": 0.177734375, "learning_rate": 0.000862236601649088, "loss": 2.1553, "step": 31334 }, { "epoch": 0.8408920137398025, "grad_norm": 0.1806640625, "learning_rate": 0.0008622113253402353, "loss": 2.2708, "step": 31335 }, { "epoch": 0.8409188492915415, "grad_norm": 0.1806640625, "learning_rate": 0.0008621860484561681, "loss": 2.193, "step": 31336 }, { "epoch": 0.8409456848432804, "grad_norm": 0.181640625, "learning_rate": 0.000862160770996942, "loss": 2.2937, "step": 31337 }, { "epoch": 0.8409725203950194, "grad_norm": 0.1796875, "learning_rate": 0.0008621354929626121, "loss": 2.193, "step": 31338 }, { "epoch": 0.8409993559467582, "grad_norm": 0.19140625, "learning_rate": 0.0008621102143532341, "loss": 2.1834, "step": 31339 }, { "epoch": 0.8410261914984972, "grad_norm": 0.18359375, "learning_rate": 0.0008620849351688635, "loss": 2.2421, "step": 31340 }, { "epoch": 0.8410530270502361, "grad_norm": 0.181640625, "learning_rate": 0.0008620596554095555, "loss": 2.2702, "step": 31341 }, { "epoch": 0.8410798626019751, "grad_norm": 0.1865234375, "learning_rate": 0.0008620343750753658, "loss": 2.2572, "step": 31342 }, { "epoch": 0.8411066981537141, "grad_norm": 0.18359375, "learning_rate": 0.0008620090941663499, "loss": 2.2063, "step": 31343 }, { "epoch": 0.841133533705453, "grad_norm": 0.181640625, "learning_rate": 0.0008619838126825629, "loss": 2.252, "step": 31344 }, { "epoch": 0.841160369257192, "grad_norm": 0.18359375, "learning_rate": 0.0008619585306240605, "loss": 2.2491, "step": 31345 }, { "epoch": 0.8411872048089308, "grad_norm": 0.1796875, "learning_rate": 0.0008619332479908983, "loss": 2.1313, "step": 31346 }, { "epoch": 0.8412140403606698, "grad_norm": 0.1826171875, "learning_rate": 0.0008619079647831315, "loss": 2.2129, "step": 31347 }, { "epoch": 0.8412408759124088, "grad_norm": 0.177734375, "learning_rate": 0.0008618826810008159, "loss": 2.2026, "step": 31348 }, { "epoch": 0.8412677114641477, "grad_norm": 0.181640625, "learning_rate": 0.0008618573966440065, "loss": 2.2526, "step": 31349 }, { "epoch": 0.8412945470158867, "grad_norm": 0.1826171875, "learning_rate": 0.0008618321117127591, "loss": 2.2205, "step": 31350 }, { "epoch": 0.8413213825676256, "grad_norm": 0.18359375, "learning_rate": 0.0008618068262071292, "loss": 2.2504, "step": 31351 }, { "epoch": 0.8413482181193646, "grad_norm": 0.1826171875, "learning_rate": 0.0008617815401271719, "loss": 2.2204, "step": 31352 }, { "epoch": 0.8413750536711034, "grad_norm": 0.185546875, "learning_rate": 0.000861756253472943, "loss": 2.3003, "step": 31353 }, { "epoch": 0.8414018892228424, "grad_norm": 0.1806640625, "learning_rate": 0.0008617309662444979, "loss": 2.1934, "step": 31354 }, { "epoch": 0.8414287247745814, "grad_norm": 0.185546875, "learning_rate": 0.0008617056784418922, "loss": 2.258, "step": 31355 }, { "epoch": 0.8414555603263203, "grad_norm": 0.1826171875, "learning_rate": 0.0008616803900651811, "loss": 2.2083, "step": 31356 }, { "epoch": 0.8414823958780593, "grad_norm": 0.181640625, "learning_rate": 0.0008616551011144201, "loss": 2.2659, "step": 31357 }, { "epoch": 0.8415092314297982, "grad_norm": 0.1787109375, "learning_rate": 0.0008616298115896652, "loss": 2.2518, "step": 31358 }, { "epoch": 0.8415360669815372, "grad_norm": 0.18359375, "learning_rate": 0.000861604521490971, "loss": 2.3275, "step": 31359 }, { "epoch": 0.841562902533276, "grad_norm": 0.1845703125, "learning_rate": 0.0008615792308183936, "loss": 2.2745, "step": 31360 }, { "epoch": 0.841589738085015, "grad_norm": 0.19140625, "learning_rate": 0.0008615539395719885, "loss": 2.3469, "step": 31361 }, { "epoch": 0.841616573636754, "grad_norm": 0.1865234375, "learning_rate": 0.0008615286477518108, "loss": 2.2301, "step": 31362 }, { "epoch": 0.8416434091884929, "grad_norm": 0.1767578125, "learning_rate": 0.0008615033553579164, "loss": 2.2041, "step": 31363 }, { "epoch": 0.8416702447402319, "grad_norm": 0.1796875, "learning_rate": 0.0008614780623903603, "loss": 2.1733, "step": 31364 }, { "epoch": 0.8416970802919708, "grad_norm": 0.185546875, "learning_rate": 0.0008614527688491985, "loss": 2.2348, "step": 31365 }, { "epoch": 0.8417239158437098, "grad_norm": 0.1845703125, "learning_rate": 0.0008614274747344861, "loss": 2.1982, "step": 31366 }, { "epoch": 0.8417507513954486, "grad_norm": 0.181640625, "learning_rate": 0.0008614021800462786, "loss": 2.1831, "step": 31367 }, { "epoch": 0.8417775869471876, "grad_norm": 0.1826171875, "learning_rate": 0.0008613768847846318, "loss": 2.2215, "step": 31368 }, { "epoch": 0.8418044224989266, "grad_norm": 0.18359375, "learning_rate": 0.0008613515889496012, "loss": 2.3325, "step": 31369 }, { "epoch": 0.8418312580506655, "grad_norm": 0.1787109375, "learning_rate": 0.0008613262925412418, "loss": 2.2939, "step": 31370 }, { "epoch": 0.8418580936024045, "grad_norm": 0.1787109375, "learning_rate": 0.0008613009955596094, "loss": 2.2677, "step": 31371 }, { "epoch": 0.8418849291541434, "grad_norm": 0.1826171875, "learning_rate": 0.0008612756980047598, "loss": 2.2598, "step": 31372 }, { "epoch": 0.8419117647058824, "grad_norm": 0.185546875, "learning_rate": 0.000861250399876748, "loss": 2.2428, "step": 31373 }, { "epoch": 0.8419386002576214, "grad_norm": 0.181640625, "learning_rate": 0.0008612251011756296, "loss": 2.2458, "step": 31374 }, { "epoch": 0.8419654358093602, "grad_norm": 0.177734375, "learning_rate": 0.0008611998019014603, "loss": 2.2439, "step": 31375 }, { "epoch": 0.8419922713610992, "grad_norm": 0.1806640625, "learning_rate": 0.0008611745020542955, "loss": 2.2898, "step": 31376 }, { "epoch": 0.8420191069128381, "grad_norm": 0.181640625, "learning_rate": 0.0008611492016341905, "loss": 2.2536, "step": 31377 }, { "epoch": 0.8420459424645771, "grad_norm": 0.1796875, "learning_rate": 0.000861123900641201, "loss": 2.2599, "step": 31378 }, { "epoch": 0.842072778016316, "grad_norm": 0.177734375, "learning_rate": 0.0008610985990753828, "loss": 2.1976, "step": 31379 }, { "epoch": 0.842099613568055, "grad_norm": 0.1826171875, "learning_rate": 0.0008610732969367908, "loss": 2.192, "step": 31380 }, { "epoch": 0.842126449119794, "grad_norm": 0.1865234375, "learning_rate": 0.0008610479942254808, "loss": 2.2223, "step": 31381 }, { "epoch": 0.8421532846715328, "grad_norm": 0.185546875, "learning_rate": 0.0008610226909415084, "loss": 2.264, "step": 31382 }, { "epoch": 0.8421801202232718, "grad_norm": 0.19921875, "learning_rate": 0.000860997387084929, "loss": 2.3543, "step": 31383 }, { "epoch": 0.8422069557750107, "grad_norm": 0.1953125, "learning_rate": 0.0008609720826557981, "loss": 2.2036, "step": 31384 }, { "epoch": 0.8422337913267497, "grad_norm": 0.1796875, "learning_rate": 0.0008609467776541712, "loss": 2.2232, "step": 31385 }, { "epoch": 0.8422606268784886, "grad_norm": 0.1884765625, "learning_rate": 0.0008609214720801039, "loss": 2.2753, "step": 31386 }, { "epoch": 0.8422874624302276, "grad_norm": 0.18359375, "learning_rate": 0.0008608961659336517, "loss": 2.2402, "step": 31387 }, { "epoch": 0.8423142979819666, "grad_norm": 0.1767578125, "learning_rate": 0.00086087085921487, "loss": 2.223, "step": 31388 }, { "epoch": 0.8423411335337054, "grad_norm": 0.1826171875, "learning_rate": 0.0008608455519238142, "loss": 2.2804, "step": 31389 }, { "epoch": 0.8423679690854444, "grad_norm": 0.1875, "learning_rate": 0.0008608202440605401, "loss": 2.2603, "step": 31390 }, { "epoch": 0.8423948046371833, "grad_norm": 0.1845703125, "learning_rate": 0.0008607949356251034, "loss": 2.224, "step": 31391 }, { "epoch": 0.8424216401889223, "grad_norm": 0.181640625, "learning_rate": 0.0008607696266175591, "loss": 2.2284, "step": 31392 }, { "epoch": 0.8424484757406612, "grad_norm": 0.1865234375, "learning_rate": 0.0008607443170379629, "loss": 2.261, "step": 31393 }, { "epoch": 0.8424753112924002, "grad_norm": 0.1796875, "learning_rate": 0.0008607190068863706, "loss": 2.2288, "step": 31394 }, { "epoch": 0.8425021468441392, "grad_norm": 0.1875, "learning_rate": 0.0008606936961628373, "loss": 2.2854, "step": 31395 }, { "epoch": 0.842528982395878, "grad_norm": 0.1806640625, "learning_rate": 0.000860668384867419, "loss": 2.1834, "step": 31396 }, { "epoch": 0.842555817947617, "grad_norm": 0.1796875, "learning_rate": 0.0008606430730001706, "loss": 2.2199, "step": 31397 }, { "epoch": 0.8425826534993559, "grad_norm": 0.181640625, "learning_rate": 0.0008606177605611482, "loss": 2.2916, "step": 31398 }, { "epoch": 0.8426094890510949, "grad_norm": 0.177734375, "learning_rate": 0.0008605924475504071, "loss": 2.1984, "step": 31399 }, { "epoch": 0.8426363246028339, "grad_norm": 0.1796875, "learning_rate": 0.0008605671339680027, "loss": 2.219, "step": 31400 }, { "epoch": 0.8426631601545728, "grad_norm": 0.1796875, "learning_rate": 0.0008605418198139908, "loss": 2.2127, "step": 31401 }, { "epoch": 0.8426899957063118, "grad_norm": 0.1806640625, "learning_rate": 0.0008605165050884268, "loss": 2.2791, "step": 31402 }, { "epoch": 0.8427168312580506, "grad_norm": 0.1806640625, "learning_rate": 0.000860491189791366, "loss": 2.2923, "step": 31403 }, { "epoch": 0.8427436668097896, "grad_norm": 0.1806640625, "learning_rate": 0.0008604658739228643, "loss": 2.2313, "step": 31404 }, { "epoch": 0.8427705023615285, "grad_norm": 0.1796875, "learning_rate": 0.0008604405574829774, "loss": 2.1951, "step": 31405 }, { "epoch": 0.8427973379132675, "grad_norm": 0.1845703125, "learning_rate": 0.0008604152404717602, "loss": 2.2876, "step": 31406 }, { "epoch": 0.8428241734650065, "grad_norm": 0.1787109375, "learning_rate": 0.0008603899228892687, "loss": 2.2336, "step": 31407 }, { "epoch": 0.8428510090167454, "grad_norm": 0.1796875, "learning_rate": 0.0008603646047355583, "loss": 2.2401, "step": 31408 }, { "epoch": 0.8428778445684844, "grad_norm": 0.1806640625, "learning_rate": 0.0008603392860106845, "loss": 2.2242, "step": 31409 }, { "epoch": 0.8429046801202232, "grad_norm": 0.1826171875, "learning_rate": 0.0008603139667147031, "loss": 2.315, "step": 31410 }, { "epoch": 0.8429315156719622, "grad_norm": 0.181640625, "learning_rate": 0.0008602886468476692, "loss": 2.266, "step": 31411 }, { "epoch": 0.8429583512237011, "grad_norm": 0.185546875, "learning_rate": 0.0008602633264096387, "loss": 2.2802, "step": 31412 }, { "epoch": 0.8429851867754401, "grad_norm": 0.1767578125, "learning_rate": 0.000860238005400667, "loss": 2.275, "step": 31413 }, { "epoch": 0.8430120223271791, "grad_norm": 0.1806640625, "learning_rate": 0.0008602126838208097, "loss": 2.2895, "step": 31414 }, { "epoch": 0.843038857878918, "grad_norm": 0.1806640625, "learning_rate": 0.0008601873616701222, "loss": 2.2337, "step": 31415 }, { "epoch": 0.843065693430657, "grad_norm": 0.1787109375, "learning_rate": 0.0008601620389486605, "loss": 2.2406, "step": 31416 }, { "epoch": 0.8430925289823958, "grad_norm": 0.177734375, "learning_rate": 0.0008601367156564795, "loss": 2.1999, "step": 31417 }, { "epoch": 0.8431193645341348, "grad_norm": 0.1826171875, "learning_rate": 0.0008601113917936353, "loss": 2.2173, "step": 31418 }, { "epoch": 0.8431462000858738, "grad_norm": 0.1796875, "learning_rate": 0.000860086067360183, "loss": 2.2583, "step": 31419 }, { "epoch": 0.8431730356376127, "grad_norm": 0.185546875, "learning_rate": 0.0008600607423561785, "loss": 2.3006, "step": 31420 }, { "epoch": 0.8431998711893517, "grad_norm": 0.181640625, "learning_rate": 0.0008600354167816773, "loss": 2.249, "step": 31421 }, { "epoch": 0.8432267067410906, "grad_norm": 0.177734375, "learning_rate": 0.0008600100906367349, "loss": 2.2462, "step": 31422 }, { "epoch": 0.8432535422928296, "grad_norm": 0.1904296875, "learning_rate": 0.0008599847639214069, "loss": 2.2469, "step": 31423 }, { "epoch": 0.8432803778445684, "grad_norm": 0.181640625, "learning_rate": 0.0008599594366357487, "loss": 2.2963, "step": 31424 }, { "epoch": 0.8433072133963074, "grad_norm": 0.1904296875, "learning_rate": 0.000859934108779816, "loss": 2.2242, "step": 31425 }, { "epoch": 0.8433340489480464, "grad_norm": 0.181640625, "learning_rate": 0.0008599087803536642, "loss": 2.3006, "step": 31426 }, { "epoch": 0.8433608844997853, "grad_norm": 0.177734375, "learning_rate": 0.0008598834513573494, "loss": 2.2259, "step": 31427 }, { "epoch": 0.8433877200515243, "grad_norm": 0.1845703125, "learning_rate": 0.0008598581217909264, "loss": 2.2742, "step": 31428 }, { "epoch": 0.8434145556032632, "grad_norm": 0.1787109375, "learning_rate": 0.0008598327916544512, "loss": 2.2192, "step": 31429 }, { "epoch": 0.8434413911550022, "grad_norm": 0.1806640625, "learning_rate": 0.0008598074609479794, "loss": 2.2394, "step": 31430 }, { "epoch": 0.843468226706741, "grad_norm": 0.1826171875, "learning_rate": 0.0008597821296715664, "loss": 2.2432, "step": 31431 }, { "epoch": 0.84349506225848, "grad_norm": 0.1796875, "learning_rate": 0.0008597567978252679, "loss": 2.2161, "step": 31432 }, { "epoch": 0.843521897810219, "grad_norm": 0.1826171875, "learning_rate": 0.0008597314654091394, "loss": 2.213, "step": 31433 }, { "epoch": 0.8435487333619579, "grad_norm": 0.1826171875, "learning_rate": 0.0008597061324232363, "loss": 2.2098, "step": 31434 }, { "epoch": 0.8435755689136969, "grad_norm": 0.1787109375, "learning_rate": 0.0008596807988676146, "loss": 2.2616, "step": 31435 }, { "epoch": 0.8436024044654358, "grad_norm": 0.181640625, "learning_rate": 0.0008596554647423294, "loss": 2.2567, "step": 31436 }, { "epoch": 0.8436292400171748, "grad_norm": 0.1806640625, "learning_rate": 0.0008596301300474366, "loss": 2.2245, "step": 31437 }, { "epoch": 0.8436560755689136, "grad_norm": 0.1796875, "learning_rate": 0.0008596047947829918, "loss": 2.2371, "step": 31438 }, { "epoch": 0.8436829111206526, "grad_norm": 0.1826171875, "learning_rate": 0.0008595794589490505, "loss": 2.264, "step": 31439 }, { "epoch": 0.8437097466723916, "grad_norm": 0.177734375, "learning_rate": 0.000859554122545668, "loss": 2.2248, "step": 31440 }, { "epoch": 0.8437365822241305, "grad_norm": 0.181640625, "learning_rate": 0.0008595287855729003, "loss": 2.2079, "step": 31441 }, { "epoch": 0.8437634177758695, "grad_norm": 0.1884765625, "learning_rate": 0.0008595034480308027, "loss": 2.2863, "step": 31442 }, { "epoch": 0.8437902533276084, "grad_norm": 0.1806640625, "learning_rate": 0.0008594781099194308, "loss": 2.2192, "step": 31443 }, { "epoch": 0.8438170888793474, "grad_norm": 0.1875, "learning_rate": 0.0008594527712388403, "loss": 2.2851, "step": 31444 }, { "epoch": 0.8438439244310864, "grad_norm": 0.181640625, "learning_rate": 0.0008594274319890869, "loss": 2.2365, "step": 31445 }, { "epoch": 0.8438707599828252, "grad_norm": 0.181640625, "learning_rate": 0.000859402092170226, "loss": 2.2871, "step": 31446 }, { "epoch": 0.8438975955345642, "grad_norm": 0.185546875, "learning_rate": 0.0008593767517823132, "loss": 2.2687, "step": 31447 }, { "epoch": 0.8439244310863031, "grad_norm": 0.1787109375, "learning_rate": 0.0008593514108254039, "loss": 2.1785, "step": 31448 }, { "epoch": 0.8439512666380421, "grad_norm": 0.1796875, "learning_rate": 0.0008593260692995543, "loss": 2.2505, "step": 31449 }, { "epoch": 0.843978102189781, "grad_norm": 0.1845703125, "learning_rate": 0.0008593007272048193, "loss": 2.2497, "step": 31450 }, { "epoch": 0.84400493774152, "grad_norm": 0.1767578125, "learning_rate": 0.0008592753845412548, "loss": 2.2852, "step": 31451 }, { "epoch": 0.844031773293259, "grad_norm": 0.1796875, "learning_rate": 0.0008592500413089165, "loss": 2.2184, "step": 31452 }, { "epoch": 0.8440586088449978, "grad_norm": 0.1796875, "learning_rate": 0.0008592246975078601, "loss": 2.165, "step": 31453 }, { "epoch": 0.8440854443967368, "grad_norm": 0.1826171875, "learning_rate": 0.0008591993531381406, "loss": 2.2118, "step": 31454 }, { "epoch": 0.8441122799484757, "grad_norm": 0.1796875, "learning_rate": 0.0008591740081998142, "loss": 2.249, "step": 31455 }, { "epoch": 0.8441391155002147, "grad_norm": 0.1806640625, "learning_rate": 0.0008591486626929362, "loss": 2.3298, "step": 31456 }, { "epoch": 0.8441659510519536, "grad_norm": 0.1806640625, "learning_rate": 0.0008591233166175622, "loss": 2.2296, "step": 31457 }, { "epoch": 0.8441927866036926, "grad_norm": 0.1787109375, "learning_rate": 0.0008590979699737479, "loss": 2.1901, "step": 31458 }, { "epoch": 0.8442196221554316, "grad_norm": 0.18359375, "learning_rate": 0.0008590726227615489, "loss": 2.2775, "step": 31459 }, { "epoch": 0.8442464577071704, "grad_norm": 0.1806640625, "learning_rate": 0.000859047274981021, "loss": 2.2221, "step": 31460 }, { "epoch": 0.8442732932589094, "grad_norm": 0.1787109375, "learning_rate": 0.0008590219266322194, "loss": 2.3047, "step": 31461 }, { "epoch": 0.8443001288106483, "grad_norm": 0.18359375, "learning_rate": 0.0008589965777151997, "loss": 2.2934, "step": 31462 }, { "epoch": 0.8443269643623873, "grad_norm": 0.1806640625, "learning_rate": 0.000858971228230018, "loss": 2.1731, "step": 31463 }, { "epoch": 0.8443537999141262, "grad_norm": 0.1826171875, "learning_rate": 0.0008589458781767295, "loss": 2.3077, "step": 31464 }, { "epoch": 0.8443806354658652, "grad_norm": 0.185546875, "learning_rate": 0.00085892052755539, "loss": 2.2136, "step": 31465 }, { "epoch": 0.8444074710176042, "grad_norm": 0.185546875, "learning_rate": 0.0008588951763660549, "loss": 2.3449, "step": 31466 }, { "epoch": 0.844434306569343, "grad_norm": 0.1796875, "learning_rate": 0.0008588698246087802, "loss": 2.2867, "step": 31467 }, { "epoch": 0.844461142121082, "grad_norm": 0.1875, "learning_rate": 0.000858844472283621, "loss": 2.2945, "step": 31468 }, { "epoch": 0.8444879776728209, "grad_norm": 0.18359375, "learning_rate": 0.0008588191193906333, "loss": 2.3062, "step": 31469 }, { "epoch": 0.8445148132245599, "grad_norm": 0.1767578125, "learning_rate": 0.0008587937659298726, "loss": 2.2007, "step": 31470 }, { "epoch": 0.8445416487762989, "grad_norm": 0.177734375, "learning_rate": 0.0008587684119013946, "loss": 2.2637, "step": 31471 }, { "epoch": 0.8445684843280378, "grad_norm": 0.193359375, "learning_rate": 0.0008587430573052546, "loss": 2.2509, "step": 31472 }, { "epoch": 0.8445953198797768, "grad_norm": 0.1796875, "learning_rate": 0.0008587177021415086, "loss": 2.2205, "step": 31473 }, { "epoch": 0.8446221554315156, "grad_norm": 0.1787109375, "learning_rate": 0.0008586923464102121, "loss": 2.2492, "step": 31474 }, { "epoch": 0.8446489909832546, "grad_norm": 0.1845703125, "learning_rate": 0.0008586669901114207, "loss": 2.1778, "step": 31475 }, { "epoch": 0.8446758265349935, "grad_norm": 0.177734375, "learning_rate": 0.00085864163324519, "loss": 2.2297, "step": 31476 }, { "epoch": 0.8447026620867325, "grad_norm": 0.1787109375, "learning_rate": 0.0008586162758115756, "loss": 2.2285, "step": 31477 }, { "epoch": 0.8447294976384715, "grad_norm": 0.1806640625, "learning_rate": 0.0008585909178106334, "loss": 2.2291, "step": 31478 }, { "epoch": 0.8447563331902104, "grad_norm": 0.1875, "learning_rate": 0.0008585655592424185, "loss": 2.3168, "step": 31479 }, { "epoch": 0.8447831687419494, "grad_norm": 0.1904296875, "learning_rate": 0.000858540200106987, "loss": 2.3043, "step": 31480 }, { "epoch": 0.8448100042936882, "grad_norm": 0.1787109375, "learning_rate": 0.0008585148404043943, "loss": 2.1978, "step": 31481 }, { "epoch": 0.8448368398454272, "grad_norm": 0.181640625, "learning_rate": 0.0008584894801346961, "loss": 2.2152, "step": 31482 }, { "epoch": 0.8448636753971661, "grad_norm": 0.1796875, "learning_rate": 0.000858464119297948, "loss": 2.2529, "step": 31483 }, { "epoch": 0.8448905109489051, "grad_norm": 0.18359375, "learning_rate": 0.0008584387578942057, "loss": 2.2613, "step": 31484 }, { "epoch": 0.8449173465006441, "grad_norm": 0.19140625, "learning_rate": 0.000858413395923525, "loss": 2.2527, "step": 31485 }, { "epoch": 0.844944182052383, "grad_norm": 0.1806640625, "learning_rate": 0.0008583880333859611, "loss": 2.2484, "step": 31486 }, { "epoch": 0.844971017604122, "grad_norm": 0.1875, "learning_rate": 0.0008583626702815698, "loss": 2.3083, "step": 31487 }, { "epoch": 0.8449978531558608, "grad_norm": 0.1806640625, "learning_rate": 0.0008583373066104069, "loss": 2.2162, "step": 31488 }, { "epoch": 0.8450246887075998, "grad_norm": 0.1767578125, "learning_rate": 0.0008583119423725282, "loss": 2.2001, "step": 31489 }, { "epoch": 0.8450515242593388, "grad_norm": 0.1865234375, "learning_rate": 0.0008582865775679886, "loss": 2.2267, "step": 31490 }, { "epoch": 0.8450783598110777, "grad_norm": 0.1845703125, "learning_rate": 0.0008582612121968447, "loss": 2.2336, "step": 31491 }, { "epoch": 0.8451051953628167, "grad_norm": 0.18359375, "learning_rate": 0.0008582358462591516, "loss": 2.218, "step": 31492 }, { "epoch": 0.8451320309145556, "grad_norm": 0.177734375, "learning_rate": 0.0008582104797549649, "loss": 2.2675, "step": 31493 }, { "epoch": 0.8451588664662946, "grad_norm": 0.1845703125, "learning_rate": 0.0008581851126843403, "loss": 2.23, "step": 31494 }, { "epoch": 0.8451857020180334, "grad_norm": 0.1904296875, "learning_rate": 0.0008581597450473337, "loss": 2.2784, "step": 31495 }, { "epoch": 0.8452125375697724, "grad_norm": 0.1796875, "learning_rate": 0.0008581343768440005, "loss": 2.2564, "step": 31496 }, { "epoch": 0.8452393731215114, "grad_norm": 0.177734375, "learning_rate": 0.0008581090080743964, "loss": 2.1947, "step": 31497 }, { "epoch": 0.8452662086732503, "grad_norm": 0.181640625, "learning_rate": 0.0008580836387385772, "loss": 2.2905, "step": 31498 }, { "epoch": 0.8452930442249893, "grad_norm": 0.1796875, "learning_rate": 0.0008580582688365981, "loss": 2.2361, "step": 31499 }, { "epoch": 0.8453198797767282, "grad_norm": 0.177734375, "learning_rate": 0.0008580328983685155, "loss": 2.2902, "step": 31500 }, { "epoch": 0.8453467153284672, "grad_norm": 0.1787109375, "learning_rate": 0.0008580075273343843, "loss": 2.2411, "step": 31501 }, { "epoch": 0.845373550880206, "grad_norm": 0.1806640625, "learning_rate": 0.0008579821557342606, "loss": 2.2143, "step": 31502 }, { "epoch": 0.845400386431945, "grad_norm": 0.1806640625, "learning_rate": 0.0008579567835682, "loss": 2.2307, "step": 31503 }, { "epoch": 0.845427221983684, "grad_norm": 0.1796875, "learning_rate": 0.0008579314108362582, "loss": 2.2516, "step": 31504 }, { "epoch": 0.8454540575354229, "grad_norm": 0.1767578125, "learning_rate": 0.0008579060375384906, "loss": 2.1967, "step": 31505 }, { "epoch": 0.8454808930871619, "grad_norm": 0.18359375, "learning_rate": 0.0008578806636749532, "loss": 2.2617, "step": 31506 }, { "epoch": 0.8455077286389008, "grad_norm": 0.1787109375, "learning_rate": 0.0008578552892457012, "loss": 2.262, "step": 31507 }, { "epoch": 0.8455345641906398, "grad_norm": 0.1767578125, "learning_rate": 0.0008578299142507909, "loss": 2.2186, "step": 31508 }, { "epoch": 0.8455613997423786, "grad_norm": 0.1865234375, "learning_rate": 0.0008578045386902774, "loss": 2.274, "step": 31509 }, { "epoch": 0.8455882352941176, "grad_norm": 0.17578125, "learning_rate": 0.0008577791625642166, "loss": 2.2587, "step": 31510 }, { "epoch": 0.8456150708458566, "grad_norm": 0.177734375, "learning_rate": 0.0008577537858726643, "loss": 2.2745, "step": 31511 }, { "epoch": 0.8456419063975955, "grad_norm": 0.1845703125, "learning_rate": 0.0008577284086156759, "loss": 2.2822, "step": 31512 }, { "epoch": 0.8456687419493345, "grad_norm": 0.1806640625, "learning_rate": 0.0008577030307933072, "loss": 2.1944, "step": 31513 }, { "epoch": 0.8456955775010734, "grad_norm": 0.1806640625, "learning_rate": 0.000857677652405614, "loss": 2.2558, "step": 31514 }, { "epoch": 0.8457224130528124, "grad_norm": 0.18359375, "learning_rate": 0.0008576522734526516, "loss": 2.2361, "step": 31515 }, { "epoch": 0.8457492486045514, "grad_norm": 0.1787109375, "learning_rate": 0.000857626893934476, "loss": 2.1906, "step": 31516 }, { "epoch": 0.8457760841562902, "grad_norm": 0.1826171875, "learning_rate": 0.0008576015138511429, "loss": 2.225, "step": 31517 }, { "epoch": 0.8458029197080292, "grad_norm": 0.181640625, "learning_rate": 0.0008575761332027078, "loss": 2.2042, "step": 31518 }, { "epoch": 0.8458297552597681, "grad_norm": 0.181640625, "learning_rate": 0.0008575507519892265, "loss": 2.2243, "step": 31519 }, { "epoch": 0.8458565908115071, "grad_norm": 0.181640625, "learning_rate": 0.0008575253702107546, "loss": 2.3055, "step": 31520 }, { "epoch": 0.845883426363246, "grad_norm": 0.19140625, "learning_rate": 0.0008574999878673478, "loss": 2.2793, "step": 31521 }, { "epoch": 0.845910261914985, "grad_norm": 0.17578125, "learning_rate": 0.0008574746049590618, "loss": 2.159, "step": 31522 }, { "epoch": 0.845937097466724, "grad_norm": 0.1796875, "learning_rate": 0.0008574492214859522, "loss": 2.3156, "step": 31523 }, { "epoch": 0.8459639330184628, "grad_norm": 0.1826171875, "learning_rate": 0.0008574238374480747, "loss": 2.3016, "step": 31524 }, { "epoch": 0.8459907685702018, "grad_norm": 0.181640625, "learning_rate": 0.0008573984528454852, "loss": 2.2238, "step": 31525 }, { "epoch": 0.8460176041219407, "grad_norm": 0.1796875, "learning_rate": 0.0008573730676782392, "loss": 2.2568, "step": 31526 }, { "epoch": 0.8460444396736797, "grad_norm": 0.189453125, "learning_rate": 0.0008573476819463922, "loss": 2.2814, "step": 31527 }, { "epoch": 0.8460712752254186, "grad_norm": 0.177734375, "learning_rate": 0.0008573222956500003, "loss": 2.1746, "step": 31528 }, { "epoch": 0.8460981107771576, "grad_norm": 0.1796875, "learning_rate": 0.0008572969087891188, "loss": 2.2438, "step": 31529 }, { "epoch": 0.8461249463288966, "grad_norm": 0.181640625, "learning_rate": 0.0008572715213638037, "loss": 2.2753, "step": 31530 }, { "epoch": 0.8461517818806354, "grad_norm": 0.1826171875, "learning_rate": 0.0008572461333741107, "loss": 2.1862, "step": 31531 }, { "epoch": 0.8461786174323744, "grad_norm": 0.1845703125, "learning_rate": 0.0008572207448200953, "loss": 2.2643, "step": 31532 }, { "epoch": 0.8462054529841133, "grad_norm": 0.1826171875, "learning_rate": 0.0008571953557018133, "loss": 2.3095, "step": 31533 }, { "epoch": 0.8462322885358523, "grad_norm": 0.1787109375, "learning_rate": 0.0008571699660193202, "loss": 2.2593, "step": 31534 }, { "epoch": 0.8462591240875912, "grad_norm": 0.1826171875, "learning_rate": 0.0008571445757726718, "loss": 2.2097, "step": 31535 }, { "epoch": 0.8462859596393302, "grad_norm": 0.177734375, "learning_rate": 0.0008571191849619242, "loss": 2.2378, "step": 31536 }, { "epoch": 0.8463127951910692, "grad_norm": 0.181640625, "learning_rate": 0.0008570937935871325, "loss": 2.2722, "step": 31537 }, { "epoch": 0.846339630742808, "grad_norm": 0.1806640625, "learning_rate": 0.0008570684016483525, "loss": 2.2349, "step": 31538 }, { "epoch": 0.846366466294547, "grad_norm": 0.18359375, "learning_rate": 0.0008570430091456402, "loss": 2.2025, "step": 31539 }, { "epoch": 0.8463933018462859, "grad_norm": 0.1787109375, "learning_rate": 0.0008570176160790514, "loss": 2.2478, "step": 31540 }, { "epoch": 0.8464201373980249, "grad_norm": 0.1787109375, "learning_rate": 0.0008569922224486413, "loss": 2.2882, "step": 31541 }, { "epoch": 0.8464469729497639, "grad_norm": 0.18359375, "learning_rate": 0.000856966828254466, "loss": 2.1716, "step": 31542 }, { "epoch": 0.8464738085015028, "grad_norm": 0.1796875, "learning_rate": 0.000856941433496581, "loss": 2.1842, "step": 31543 }, { "epoch": 0.8465006440532418, "grad_norm": 0.1806640625, "learning_rate": 0.0008569160381750422, "loss": 2.2406, "step": 31544 }, { "epoch": 0.8465274796049806, "grad_norm": 0.18359375, "learning_rate": 0.0008568906422899051, "loss": 2.2003, "step": 31545 }, { "epoch": 0.8465543151567196, "grad_norm": 0.18359375, "learning_rate": 0.0008568652458412255, "loss": 2.2508, "step": 31546 }, { "epoch": 0.8465811507084585, "grad_norm": 0.1787109375, "learning_rate": 0.0008568398488290591, "loss": 2.2904, "step": 31547 }, { "epoch": 0.8466079862601975, "grad_norm": 0.18359375, "learning_rate": 0.0008568144512534618, "loss": 2.2406, "step": 31548 }, { "epoch": 0.8466348218119365, "grad_norm": 0.18359375, "learning_rate": 0.0008567890531144888, "loss": 2.2979, "step": 31549 }, { "epoch": 0.8466616573636754, "grad_norm": 0.1806640625, "learning_rate": 0.0008567636544121965, "loss": 2.2106, "step": 31550 }, { "epoch": 0.8466884929154144, "grad_norm": 0.1796875, "learning_rate": 0.0008567382551466403, "loss": 2.2066, "step": 31551 }, { "epoch": 0.8467153284671532, "grad_norm": 0.1787109375, "learning_rate": 0.0008567128553178756, "loss": 2.2868, "step": 31552 }, { "epoch": 0.8467421640188922, "grad_norm": 0.1796875, "learning_rate": 0.0008566874549259586, "loss": 2.2175, "step": 31553 }, { "epoch": 0.8467689995706311, "grad_norm": 0.1796875, "learning_rate": 0.0008566620539709448, "loss": 2.2734, "step": 31554 }, { "epoch": 0.8467958351223701, "grad_norm": 0.1826171875, "learning_rate": 0.0008566366524528901, "loss": 2.2101, "step": 31555 }, { "epoch": 0.8468226706741091, "grad_norm": 0.1787109375, "learning_rate": 0.0008566112503718501, "loss": 2.2664, "step": 31556 }, { "epoch": 0.846849506225848, "grad_norm": 0.1787109375, "learning_rate": 0.0008565858477278803, "loss": 2.2107, "step": 31557 }, { "epoch": 0.846876341777587, "grad_norm": 0.1796875, "learning_rate": 0.0008565604445210369, "loss": 2.2347, "step": 31558 }, { "epoch": 0.8469031773293259, "grad_norm": 0.177734375, "learning_rate": 0.0008565350407513751, "loss": 2.2659, "step": 31559 }, { "epoch": 0.8469300128810648, "grad_norm": 0.1826171875, "learning_rate": 0.0008565096364189508, "loss": 2.2169, "step": 31560 }, { "epoch": 0.8469568484328038, "grad_norm": 0.177734375, "learning_rate": 0.0008564842315238202, "loss": 2.1884, "step": 31561 }, { "epoch": 0.8469836839845427, "grad_norm": 0.181640625, "learning_rate": 0.0008564588260660385, "loss": 2.2488, "step": 31562 }, { "epoch": 0.8470105195362817, "grad_norm": 0.1826171875, "learning_rate": 0.0008564334200456614, "loss": 2.2468, "step": 31563 }, { "epoch": 0.8470373550880206, "grad_norm": 0.181640625, "learning_rate": 0.0008564080134627452, "loss": 2.2342, "step": 31564 }, { "epoch": 0.8470641906397596, "grad_norm": 0.1796875, "learning_rate": 0.0008563826063173449, "loss": 2.2514, "step": 31565 }, { "epoch": 0.8470910261914985, "grad_norm": 0.1806640625, "learning_rate": 0.0008563571986095169, "loss": 2.2719, "step": 31566 }, { "epoch": 0.8471178617432374, "grad_norm": 0.181640625, "learning_rate": 0.0008563317903393165, "loss": 2.2778, "step": 31567 }, { "epoch": 0.8471446972949764, "grad_norm": 0.177734375, "learning_rate": 0.0008563063815067995, "loss": 2.3019, "step": 31568 }, { "epoch": 0.8471715328467153, "grad_norm": 0.1904296875, "learning_rate": 0.0008562809721120218, "loss": 2.2491, "step": 31569 }, { "epoch": 0.8471983683984543, "grad_norm": 0.1875, "learning_rate": 0.000856255562155039, "loss": 2.3194, "step": 31570 }, { "epoch": 0.8472252039501932, "grad_norm": 0.2060546875, "learning_rate": 0.0008562301516359067, "loss": 2.192, "step": 31571 }, { "epoch": 0.8472520395019322, "grad_norm": 0.1845703125, "learning_rate": 0.000856204740554681, "loss": 2.3112, "step": 31572 }, { "epoch": 0.847278875053671, "grad_norm": 0.18359375, "learning_rate": 0.0008561793289114175, "loss": 2.2285, "step": 31573 }, { "epoch": 0.84730571060541, "grad_norm": 0.1845703125, "learning_rate": 0.0008561539167061718, "loss": 2.3068, "step": 31574 }, { "epoch": 0.847332546157149, "grad_norm": 0.1787109375, "learning_rate": 0.000856128503939, "loss": 2.3278, "step": 31575 }, { "epoch": 0.8473593817088879, "grad_norm": 0.18359375, "learning_rate": 0.0008561030906099577, "loss": 2.2895, "step": 31576 }, { "epoch": 0.8473862172606269, "grad_norm": 0.1826171875, "learning_rate": 0.0008560776767191001, "loss": 2.2842, "step": 31577 }, { "epoch": 0.8474130528123658, "grad_norm": 0.185546875, "learning_rate": 0.0008560522622664838, "loss": 2.2481, "step": 31578 }, { "epoch": 0.8474398883641048, "grad_norm": 0.1787109375, "learning_rate": 0.0008560268472521639, "loss": 2.2152, "step": 31579 }, { "epoch": 0.8474667239158437, "grad_norm": 0.1826171875, "learning_rate": 0.0008560014316761966, "loss": 2.2602, "step": 31580 }, { "epoch": 0.8474935594675826, "grad_norm": 0.1806640625, "learning_rate": 0.0008559760155386374, "loss": 2.2423, "step": 31581 }, { "epoch": 0.8475203950193216, "grad_norm": 0.181640625, "learning_rate": 0.0008559505988395423, "loss": 2.2462, "step": 31582 }, { "epoch": 0.8475472305710605, "grad_norm": 0.1787109375, "learning_rate": 0.0008559251815789667, "loss": 2.2389, "step": 31583 }, { "epoch": 0.8475740661227995, "grad_norm": 0.1806640625, "learning_rate": 0.0008558997637569666, "loss": 2.2229, "step": 31584 }, { "epoch": 0.8476009016745384, "grad_norm": 0.181640625, "learning_rate": 0.0008558743453735976, "loss": 2.2888, "step": 31585 }, { "epoch": 0.8476277372262774, "grad_norm": 0.177734375, "learning_rate": 0.0008558489264289158, "loss": 2.2257, "step": 31586 }, { "epoch": 0.8476545727780164, "grad_norm": 0.1806640625, "learning_rate": 0.0008558235069229767, "loss": 2.2458, "step": 31587 }, { "epoch": 0.8476814083297552, "grad_norm": 0.1826171875, "learning_rate": 0.0008557980868558358, "loss": 2.2558, "step": 31588 }, { "epoch": 0.8477082438814942, "grad_norm": 0.18359375, "learning_rate": 0.0008557726662275493, "loss": 2.2453, "step": 31589 }, { "epoch": 0.8477350794332331, "grad_norm": 0.181640625, "learning_rate": 0.0008557472450381728, "loss": 2.2415, "step": 31590 }, { "epoch": 0.8477619149849721, "grad_norm": 0.181640625, "learning_rate": 0.0008557218232877622, "loss": 2.2203, "step": 31591 }, { "epoch": 0.847788750536711, "grad_norm": 0.1787109375, "learning_rate": 0.0008556964009763732, "loss": 2.2667, "step": 31592 }, { "epoch": 0.84781558608845, "grad_norm": 0.181640625, "learning_rate": 0.0008556709781040614, "loss": 2.2456, "step": 31593 }, { "epoch": 0.847842421640189, "grad_norm": 0.1796875, "learning_rate": 0.0008556455546708827, "loss": 2.2926, "step": 31594 }, { "epoch": 0.8478692571919278, "grad_norm": 0.1767578125, "learning_rate": 0.0008556201306768929, "loss": 2.2264, "step": 31595 }, { "epoch": 0.8478960927436668, "grad_norm": 0.1826171875, "learning_rate": 0.0008555947061221478, "loss": 2.311, "step": 31596 }, { "epoch": 0.8479229282954057, "grad_norm": 0.1845703125, "learning_rate": 0.000855569281006703, "loss": 2.2988, "step": 31597 }, { "epoch": 0.8479497638471447, "grad_norm": 0.1767578125, "learning_rate": 0.0008555438553306143, "loss": 2.2701, "step": 31598 }, { "epoch": 0.8479765993988836, "grad_norm": 0.1796875, "learning_rate": 0.0008555184290939378, "loss": 2.2892, "step": 31599 }, { "epoch": 0.8480034349506226, "grad_norm": 0.18359375, "learning_rate": 0.0008554930022967289, "loss": 2.2717, "step": 31600 }, { "epoch": 0.8480302705023616, "grad_norm": 0.1826171875, "learning_rate": 0.0008554675749390435, "loss": 2.2403, "step": 31601 }, { "epoch": 0.8480571060541005, "grad_norm": 0.177734375, "learning_rate": 0.0008554421470209375, "loss": 2.303, "step": 31602 }, { "epoch": 0.8480839416058394, "grad_norm": 0.1787109375, "learning_rate": 0.0008554167185424664, "loss": 2.3568, "step": 31603 }, { "epoch": 0.8481107771575783, "grad_norm": 0.1748046875, "learning_rate": 0.0008553912895036865, "loss": 2.2524, "step": 31604 }, { "epoch": 0.8481376127093173, "grad_norm": 0.181640625, "learning_rate": 0.000855365859904653, "loss": 2.2082, "step": 31605 }, { "epoch": 0.8481644482610562, "grad_norm": 0.1826171875, "learning_rate": 0.000855340429745422, "loss": 2.3232, "step": 31606 }, { "epoch": 0.8481912838127952, "grad_norm": 0.1787109375, "learning_rate": 0.0008553149990260491, "loss": 2.212, "step": 31607 }, { "epoch": 0.8482181193645342, "grad_norm": 0.177734375, "learning_rate": 0.0008552895677465903, "loss": 2.2782, "step": 31608 }, { "epoch": 0.848244954916273, "grad_norm": 0.18359375, "learning_rate": 0.0008552641359071014, "loss": 2.2523, "step": 31609 }, { "epoch": 0.848271790468012, "grad_norm": 0.1806640625, "learning_rate": 0.0008552387035076379, "loss": 2.2382, "step": 31610 }, { "epoch": 0.8482986260197509, "grad_norm": 0.177734375, "learning_rate": 0.0008552132705482558, "loss": 2.205, "step": 31611 }, { "epoch": 0.8483254615714899, "grad_norm": 0.185546875, "learning_rate": 0.000855187837029011, "loss": 2.2769, "step": 31612 }, { "epoch": 0.8483522971232289, "grad_norm": 0.181640625, "learning_rate": 0.0008551624029499591, "loss": 2.2268, "step": 31613 }, { "epoch": 0.8483791326749678, "grad_norm": 0.181640625, "learning_rate": 0.0008551369683111559, "loss": 2.3371, "step": 31614 }, { "epoch": 0.8484059682267068, "grad_norm": 0.177734375, "learning_rate": 0.0008551115331126574, "loss": 2.1656, "step": 31615 }, { "epoch": 0.8484328037784457, "grad_norm": 0.1806640625, "learning_rate": 0.0008550860973545191, "loss": 2.2099, "step": 31616 }, { "epoch": 0.8484596393301846, "grad_norm": 0.18359375, "learning_rate": 0.000855060661036797, "loss": 2.2652, "step": 31617 }, { "epoch": 0.8484864748819235, "grad_norm": 0.1826171875, "learning_rate": 0.0008550352241595468, "loss": 2.2381, "step": 31618 }, { "epoch": 0.8485133104336625, "grad_norm": 0.185546875, "learning_rate": 0.0008550097867228244, "loss": 2.2783, "step": 31619 }, { "epoch": 0.8485401459854015, "grad_norm": 0.177734375, "learning_rate": 0.0008549843487266856, "loss": 2.3078, "step": 31620 }, { "epoch": 0.8485669815371404, "grad_norm": 0.1787109375, "learning_rate": 0.0008549589101711861, "loss": 2.2175, "step": 31621 }, { "epoch": 0.8485938170888794, "grad_norm": 0.1787109375, "learning_rate": 0.0008549334710563817, "loss": 2.2432, "step": 31622 }, { "epoch": 0.8486206526406183, "grad_norm": 0.1806640625, "learning_rate": 0.0008549080313823283, "loss": 2.2746, "step": 31623 }, { "epoch": 0.8486474881923572, "grad_norm": 0.1787109375, "learning_rate": 0.0008548825911490819, "loss": 2.2494, "step": 31624 }, { "epoch": 0.8486743237440961, "grad_norm": 0.177734375, "learning_rate": 0.0008548571503566978, "loss": 2.21, "step": 31625 }, { "epoch": 0.8487011592958351, "grad_norm": 0.18359375, "learning_rate": 0.0008548317090052321, "loss": 2.2722, "step": 31626 }, { "epoch": 0.8487279948475741, "grad_norm": 0.17578125, "learning_rate": 0.0008548062670947409, "loss": 2.2903, "step": 31627 }, { "epoch": 0.848754830399313, "grad_norm": 0.1796875, "learning_rate": 0.0008547808246252794, "loss": 2.2188, "step": 31628 }, { "epoch": 0.848781665951052, "grad_norm": 0.1826171875, "learning_rate": 0.0008547553815969039, "loss": 2.2256, "step": 31629 }, { "epoch": 0.8488085015027909, "grad_norm": 0.1806640625, "learning_rate": 0.0008547299380096699, "loss": 2.2861, "step": 31630 }, { "epoch": 0.8488353370545298, "grad_norm": 0.177734375, "learning_rate": 0.0008547044938636334, "loss": 2.2966, "step": 31631 }, { "epoch": 0.8488621726062688, "grad_norm": 0.1796875, "learning_rate": 0.0008546790491588503, "loss": 2.2387, "step": 31632 }, { "epoch": 0.8488890081580077, "grad_norm": 0.185546875, "learning_rate": 0.0008546536038953762, "loss": 2.2884, "step": 31633 }, { "epoch": 0.8489158437097467, "grad_norm": 0.1806640625, "learning_rate": 0.000854628158073267, "loss": 2.3217, "step": 31634 }, { "epoch": 0.8489426792614856, "grad_norm": 0.1826171875, "learning_rate": 0.0008546027116925787, "loss": 2.2062, "step": 31635 }, { "epoch": 0.8489695148132246, "grad_norm": 0.1884765625, "learning_rate": 0.0008545772647533667, "loss": 2.2687, "step": 31636 }, { "epoch": 0.8489963503649635, "grad_norm": 0.181640625, "learning_rate": 0.0008545518172556871, "loss": 2.2309, "step": 31637 }, { "epoch": 0.8490231859167024, "grad_norm": 0.1787109375, "learning_rate": 0.0008545263691995959, "loss": 2.2876, "step": 31638 }, { "epoch": 0.8490500214684414, "grad_norm": 0.1796875, "learning_rate": 0.0008545009205851485, "loss": 2.2004, "step": 31639 }, { "epoch": 0.8490768570201803, "grad_norm": 0.17578125, "learning_rate": 0.0008544754714124011, "loss": 2.2073, "step": 31640 }, { "epoch": 0.8491036925719193, "grad_norm": 0.1845703125, "learning_rate": 0.0008544500216814094, "loss": 2.3341, "step": 31641 }, { "epoch": 0.8491305281236582, "grad_norm": 0.1806640625, "learning_rate": 0.0008544245713922292, "loss": 2.3164, "step": 31642 }, { "epoch": 0.8491573636753972, "grad_norm": 0.177734375, "learning_rate": 0.0008543991205449163, "loss": 2.2059, "step": 31643 }, { "epoch": 0.8491841992271361, "grad_norm": 0.18359375, "learning_rate": 0.0008543736691395265, "loss": 2.2442, "step": 31644 }, { "epoch": 0.849211034778875, "grad_norm": 0.1796875, "learning_rate": 0.000854348217176116, "loss": 2.1781, "step": 31645 }, { "epoch": 0.849237870330614, "grad_norm": 0.1787109375, "learning_rate": 0.00085432276465474, "loss": 2.2597, "step": 31646 }, { "epoch": 0.8492647058823529, "grad_norm": 0.1796875, "learning_rate": 0.0008542973115754547, "loss": 2.2535, "step": 31647 }, { "epoch": 0.8492915414340919, "grad_norm": 0.1767578125, "learning_rate": 0.0008542718579383161, "loss": 2.1676, "step": 31648 }, { "epoch": 0.8493183769858308, "grad_norm": 0.1904296875, "learning_rate": 0.00085424640374338, "loss": 2.2723, "step": 31649 }, { "epoch": 0.8493452125375698, "grad_norm": 0.1845703125, "learning_rate": 0.0008542209489907018, "loss": 2.2187, "step": 31650 }, { "epoch": 0.8493720480893087, "grad_norm": 0.1787109375, "learning_rate": 0.0008541954936803377, "loss": 2.222, "step": 31651 }, { "epoch": 0.8493988836410477, "grad_norm": 0.1806640625, "learning_rate": 0.0008541700378123433, "loss": 2.273, "step": 31652 }, { "epoch": 0.8494257191927866, "grad_norm": 0.1884765625, "learning_rate": 0.0008541445813867749, "loss": 2.2744, "step": 31653 }, { "epoch": 0.8494525547445255, "grad_norm": 0.1923828125, "learning_rate": 0.000854119124403688, "loss": 2.2765, "step": 31654 }, { "epoch": 0.8494793902962645, "grad_norm": 0.1787109375, "learning_rate": 0.0008540936668631383, "loss": 2.2526, "step": 31655 }, { "epoch": 0.8495062258480034, "grad_norm": 0.1806640625, "learning_rate": 0.0008540682087651822, "loss": 2.2419, "step": 31656 }, { "epoch": 0.8495330613997424, "grad_norm": 0.1806640625, "learning_rate": 0.000854042750109875, "loss": 2.2732, "step": 31657 }, { "epoch": 0.8495598969514814, "grad_norm": 0.1767578125, "learning_rate": 0.0008540172908972726, "loss": 2.2068, "step": 31658 }, { "epoch": 0.8495867325032203, "grad_norm": 0.181640625, "learning_rate": 0.0008539918311274311, "loss": 2.224, "step": 31659 }, { "epoch": 0.8496135680549592, "grad_norm": 0.1806640625, "learning_rate": 0.0008539663708004065, "loss": 2.1814, "step": 31660 }, { "epoch": 0.8496404036066981, "grad_norm": 0.1806640625, "learning_rate": 0.0008539409099162542, "loss": 2.285, "step": 31661 }, { "epoch": 0.8496672391584371, "grad_norm": 0.1787109375, "learning_rate": 0.0008539154484750302, "loss": 2.2477, "step": 31662 }, { "epoch": 0.849694074710176, "grad_norm": 0.181640625, "learning_rate": 0.0008538899864767904, "loss": 2.2395, "step": 31663 }, { "epoch": 0.849720910261915, "grad_norm": 0.185546875, "learning_rate": 0.0008538645239215908, "loss": 2.1927, "step": 31664 }, { "epoch": 0.849747745813654, "grad_norm": 0.177734375, "learning_rate": 0.000853839060809487, "loss": 2.1818, "step": 31665 }, { "epoch": 0.8497745813653929, "grad_norm": 0.181640625, "learning_rate": 0.0008538135971405351, "loss": 2.2618, "step": 31666 }, { "epoch": 0.8498014169171318, "grad_norm": 0.181640625, "learning_rate": 0.0008537881329147908, "loss": 2.2902, "step": 31667 }, { "epoch": 0.8498282524688707, "grad_norm": 0.18359375, "learning_rate": 0.0008537626681323099, "loss": 2.256, "step": 31668 }, { "epoch": 0.8498550880206097, "grad_norm": 0.177734375, "learning_rate": 0.0008537372027931483, "loss": 2.208, "step": 31669 }, { "epoch": 0.8498819235723486, "grad_norm": 0.1787109375, "learning_rate": 0.0008537117368973621, "loss": 2.3474, "step": 31670 }, { "epoch": 0.8499087591240876, "grad_norm": 0.181640625, "learning_rate": 0.0008536862704450071, "loss": 2.2316, "step": 31671 }, { "epoch": 0.8499355946758266, "grad_norm": 0.1787109375, "learning_rate": 0.0008536608034361388, "loss": 2.28, "step": 31672 }, { "epoch": 0.8499624302275655, "grad_norm": 0.1806640625, "learning_rate": 0.0008536353358708134, "loss": 2.2534, "step": 31673 }, { "epoch": 0.8499892657793044, "grad_norm": 0.181640625, "learning_rate": 0.0008536098677490867, "loss": 2.195, "step": 31674 }, { "epoch": 0.8500161013310433, "grad_norm": 0.1787109375, "learning_rate": 0.0008535843990710146, "loss": 2.2097, "step": 31675 }, { "epoch": 0.8500429368827823, "grad_norm": 0.1796875, "learning_rate": 0.000853558929836653, "loss": 2.2934, "step": 31676 }, { "epoch": 0.8500697724345213, "grad_norm": 0.1826171875, "learning_rate": 0.0008535334600460577, "loss": 2.2724, "step": 31677 }, { "epoch": 0.8500966079862602, "grad_norm": 0.185546875, "learning_rate": 0.0008535079896992844, "loss": 2.2578, "step": 31678 }, { "epoch": 0.8501234435379992, "grad_norm": 0.1796875, "learning_rate": 0.0008534825187963893, "loss": 2.2496, "step": 31679 }, { "epoch": 0.8501502790897381, "grad_norm": 0.1787109375, "learning_rate": 0.0008534570473374282, "loss": 2.2317, "step": 31680 }, { "epoch": 0.850177114641477, "grad_norm": 0.181640625, "learning_rate": 0.0008534315753224566, "loss": 2.1543, "step": 31681 }, { "epoch": 0.8502039501932159, "grad_norm": 0.1787109375, "learning_rate": 0.000853406102751531, "loss": 2.2987, "step": 31682 }, { "epoch": 0.8502307857449549, "grad_norm": 0.1767578125, "learning_rate": 0.0008533806296247069, "loss": 2.2739, "step": 31683 }, { "epoch": 0.8502576212966939, "grad_norm": 0.1806640625, "learning_rate": 0.0008533551559420402, "loss": 2.2287, "step": 31684 }, { "epoch": 0.8502844568484328, "grad_norm": 0.1826171875, "learning_rate": 0.0008533296817035868, "loss": 2.2515, "step": 31685 }, { "epoch": 0.8503112924001718, "grad_norm": 0.1796875, "learning_rate": 0.0008533042069094026, "loss": 2.2514, "step": 31686 }, { "epoch": 0.8503381279519107, "grad_norm": 0.1826171875, "learning_rate": 0.0008532787315595436, "loss": 2.2114, "step": 31687 }, { "epoch": 0.8503649635036497, "grad_norm": 0.18359375, "learning_rate": 0.0008532532556540655, "loss": 2.2843, "step": 31688 }, { "epoch": 0.8503917990553885, "grad_norm": 0.1787109375, "learning_rate": 0.0008532277791930243, "loss": 2.2509, "step": 31689 }, { "epoch": 0.8504186346071275, "grad_norm": 0.1806640625, "learning_rate": 0.0008532023021764757, "loss": 2.1872, "step": 31690 }, { "epoch": 0.8504454701588665, "grad_norm": 0.1787109375, "learning_rate": 0.0008531768246044759, "loss": 2.2619, "step": 31691 }, { "epoch": 0.8504723057106054, "grad_norm": 0.1865234375, "learning_rate": 0.0008531513464770804, "loss": 2.2987, "step": 31692 }, { "epoch": 0.8504991412623444, "grad_norm": 0.17578125, "learning_rate": 0.0008531258677943458, "loss": 2.212, "step": 31693 } ], "logging_steps": 1, "max_steps": 74528, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2982, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.326593748582957e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }