{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 1600, "global_step": 3199, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00031259768677711783, "grad_norm": 0.21875, "learning_rate": 2e-05, "loss": 2.2083, "step": 1 }, { "epoch": 0.00031259768677711783, "eval_loss": 2.0116653442382812, "eval_runtime": 1898.4475, "eval_samples_per_second": 4.813, "eval_steps_per_second": 2.407, "step": 1 }, { "epoch": 0.0006251953735542357, "grad_norm": 0.216796875, "learning_rate": 4e-05, "loss": 2.0017, "step": 2 }, { "epoch": 0.0009377930603313535, "grad_norm": 0.216796875, "learning_rate": 6e-05, "loss": 2.2668, "step": 3 }, { "epoch": 0.0012503907471084713, "grad_norm": 0.220703125, "learning_rate": 8e-05, "loss": 1.9291, "step": 4 }, { "epoch": 0.0015629884338855893, "grad_norm": 0.22265625, "learning_rate": 0.0001, "loss": 1.8984, "step": 5 }, { "epoch": 0.001875586120662707, "grad_norm": 0.1845703125, "learning_rate": 0.00012, "loss": 2.1924, "step": 6 }, { "epoch": 0.002188183807439825, "grad_norm": 0.1982421875, "learning_rate": 0.00014, "loss": 2.135, "step": 7 }, { "epoch": 0.0025007814942169426, "grad_norm": 0.1865234375, "learning_rate": 0.00016, "loss": 2.0434, "step": 8 }, { "epoch": 0.002813379180994061, "grad_norm": 0.1826171875, "learning_rate": 0.00018, "loss": 1.892, "step": 9 }, { "epoch": 0.0031259768677711786, "grad_norm": 0.1953125, "learning_rate": 0.0002, "loss": 2.0976, "step": 10 }, { "epoch": 0.0034385745545482964, "grad_norm": 0.2060546875, "learning_rate": 0.0001999999969814363, "loss": 2.257, "step": 11 }, { "epoch": 0.003751172241325414, "grad_norm": 0.185546875, "learning_rate": 0.00019999998792574533, "loss": 2.0452, "step": 12 }, { "epoch": 0.004063769928102532, "grad_norm": 0.1865234375, "learning_rate": 0.00019999997283292765, "loss": 1.9728, "step": 13 }, { "epoch": 0.00437636761487965, "grad_norm": 0.1806640625, "learning_rate": 0.0001999999517029842, "loss": 2.0065, "step": 14 }, { "epoch": 0.004688965301656768, "grad_norm": 0.1826171875, "learning_rate": 0.00019999992453591622, "loss": 2.0628, "step": 15 }, { "epoch": 0.005001562988433885, "grad_norm": 0.1728515625, "learning_rate": 0.00019999989133172538, "loss": 2.0292, "step": 16 }, { "epoch": 0.0053141606752110035, "grad_norm": 0.1669921875, "learning_rate": 0.00019999985209041366, "loss": 1.9125, "step": 17 }, { "epoch": 0.005626758361988122, "grad_norm": 0.1748046875, "learning_rate": 0.00019999980681198345, "loss": 1.8677, "step": 18 }, { "epoch": 0.005939356048765239, "grad_norm": 0.16796875, "learning_rate": 0.00019999975549643746, "loss": 1.984, "step": 19 }, { "epoch": 0.006251953735542357, "grad_norm": 0.177734375, "learning_rate": 0.00019999969814377878, "loss": 1.9886, "step": 20 }, { "epoch": 0.006564551422319475, "grad_norm": 0.1689453125, "learning_rate": 0.0001999996347540109, "loss": 2.0277, "step": 21 }, { "epoch": 0.006877149109096593, "grad_norm": 0.1728515625, "learning_rate": 0.0001999995653271377, "loss": 2.0216, "step": 22 }, { "epoch": 0.00718974679587371, "grad_norm": 0.1689453125, "learning_rate": 0.00019999948986316324, "loss": 1.8909, "step": 23 }, { "epoch": 0.007502344482650828, "grad_norm": 0.1748046875, "learning_rate": 0.00019999940836209215, "loss": 1.9551, "step": 24 }, { "epoch": 0.007814942169427946, "grad_norm": 0.1826171875, "learning_rate": 0.00019999932082392937, "loss": 1.9186, "step": 25 }, { "epoch": 0.008127539856205065, "grad_norm": 0.18359375, "learning_rate": 0.00019999922724868015, "loss": 2.076, "step": 26 }, { "epoch": 0.008440137542982182, "grad_norm": 0.181640625, "learning_rate": 0.00019999912763635016, "loss": 1.9682, "step": 27 }, { "epoch": 0.0087527352297593, "grad_norm": 0.1630859375, "learning_rate": 0.00019999902198694543, "loss": 1.8056, "step": 28 }, { "epoch": 0.009065332916536417, "grad_norm": 0.1728515625, "learning_rate": 0.00019999891030047227, "loss": 1.8404, "step": 29 }, { "epoch": 0.009377930603313536, "grad_norm": 0.16015625, "learning_rate": 0.0001999987925769375, "loss": 2.113, "step": 30 }, { "epoch": 0.009690528290090653, "grad_norm": 0.166015625, "learning_rate": 0.00019999866881634815, "loss": 1.8715, "step": 31 }, { "epoch": 0.01000312597686777, "grad_norm": 0.8359375, "learning_rate": 0.00019999853901871175, "loss": 3.09, "step": 32 }, { "epoch": 0.01031572366364489, "grad_norm": 0.1640625, "learning_rate": 0.00019999840318403613, "loss": 2.1366, "step": 33 }, { "epoch": 0.010628321350422007, "grad_norm": 0.1787109375, "learning_rate": 0.00019999826131232947, "loss": 1.9007, "step": 34 }, { "epoch": 0.010940919037199124, "grad_norm": 0.173828125, "learning_rate": 0.00019999811340360034, "loss": 1.9831, "step": 35 }, { "epoch": 0.011253516723976243, "grad_norm": 0.1611328125, "learning_rate": 0.0001999979594578577, "loss": 1.9656, "step": 36 }, { "epoch": 0.01156611441075336, "grad_norm": 0.48828125, "learning_rate": 0.0001999977994751108, "loss": 2.8559, "step": 37 }, { "epoch": 0.011878712097530478, "grad_norm": 0.177734375, "learning_rate": 0.00019999763345536934, "loss": 2.0553, "step": 38 }, { "epoch": 0.012191309784307595, "grad_norm": 0.18359375, "learning_rate": 0.0001999974613986433, "loss": 1.8912, "step": 39 }, { "epoch": 0.012503907471084715, "grad_norm": 0.169921875, "learning_rate": 0.0001999972833049431, "loss": 1.8342, "step": 40 }, { "epoch": 0.012816505157861832, "grad_norm": 0.1669921875, "learning_rate": 0.00019999709917427946, "loss": 1.9032, "step": 41 }, { "epoch": 0.01312910284463895, "grad_norm": 0.1806640625, "learning_rate": 0.00019999690900666353, "loss": 1.9228, "step": 42 }, { "epoch": 0.013441700531416068, "grad_norm": 0.1845703125, "learning_rate": 0.00019999671280210676, "loss": 1.9666, "step": 43 }, { "epoch": 0.013754298218193186, "grad_norm": 0.1787109375, "learning_rate": 0.00019999651056062102, "loss": 1.901, "step": 44 }, { "epoch": 0.014066895904970303, "grad_norm": 0.1806640625, "learning_rate": 0.00019999630228221852, "loss": 2.1406, "step": 45 }, { "epoch": 0.01437949359174742, "grad_norm": 0.16796875, "learning_rate": 0.0001999960879669118, "loss": 1.9389, "step": 46 }, { "epoch": 0.01469209127852454, "grad_norm": 0.1689453125, "learning_rate": 0.00019999586761471384, "loss": 1.9625, "step": 47 }, { "epoch": 0.015004688965301657, "grad_norm": 0.1767578125, "learning_rate": 0.00019999564122563795, "loss": 2.0456, "step": 48 }, { "epoch": 0.015317286652078774, "grad_norm": 0.171875, "learning_rate": 0.00019999540879969775, "loss": 1.9955, "step": 49 }, { "epoch": 0.01562988433885589, "grad_norm": 0.1669921875, "learning_rate": 0.00019999517033690727, "loss": 1.8969, "step": 50 }, { "epoch": 0.01594248202563301, "grad_norm": 0.166015625, "learning_rate": 0.00019999492583728097, "loss": 1.8544, "step": 51 }, { "epoch": 0.01625507971241013, "grad_norm": 0.1748046875, "learning_rate": 0.00019999467530083356, "loss": 1.779, "step": 52 }, { "epoch": 0.016567677399187245, "grad_norm": 0.177734375, "learning_rate": 0.00019999441872758017, "loss": 1.8127, "step": 53 }, { "epoch": 0.016880275085964364, "grad_norm": 0.1796875, "learning_rate": 0.0001999941561175363, "loss": 2.0516, "step": 54 }, { "epoch": 0.017192872772741483, "grad_norm": 0.173828125, "learning_rate": 0.0001999938874707178, "loss": 2.1641, "step": 55 }, { "epoch": 0.0175054704595186, "grad_norm": 0.1708984375, "learning_rate": 0.00019999361278714092, "loss": 2.0483, "step": 56 }, { "epoch": 0.017818068146295718, "grad_norm": 0.1748046875, "learning_rate": 0.00019999333206682218, "loss": 1.9931, "step": 57 }, { "epoch": 0.018130665833072834, "grad_norm": 0.181640625, "learning_rate": 0.00019999304530977856, "loss": 1.9795, "step": 58 }, { "epoch": 0.018443263519849953, "grad_norm": 0.1669921875, "learning_rate": 0.00019999275251602738, "loss": 2.13, "step": 59 }, { "epoch": 0.018755861206627072, "grad_norm": 0.1669921875, "learning_rate": 0.0001999924536855863, "loss": 2.0652, "step": 60 }, { "epoch": 0.019068458893404187, "grad_norm": 0.1689453125, "learning_rate": 0.00019999214881847338, "loss": 1.9731, "step": 61 }, { "epoch": 0.019381056580181306, "grad_norm": 0.1708984375, "learning_rate": 0.00019999183791470702, "loss": 1.9303, "step": 62 }, { "epoch": 0.019693654266958426, "grad_norm": 0.169921875, "learning_rate": 0.000199991520974306, "loss": 1.9115, "step": 63 }, { "epoch": 0.02000625195373554, "grad_norm": 0.1669921875, "learning_rate": 0.0001999911979972894, "loss": 2.1622, "step": 64 }, { "epoch": 0.02031884964051266, "grad_norm": 0.169921875, "learning_rate": 0.00019999086898367678, "loss": 1.9662, "step": 65 }, { "epoch": 0.02063144732728978, "grad_norm": 0.16796875, "learning_rate": 0.00019999053393348796, "loss": 1.8382, "step": 66 }, { "epoch": 0.020944045014066895, "grad_norm": 0.17578125, "learning_rate": 0.00019999019284674317, "loss": 1.9147, "step": 67 }, { "epoch": 0.021256642700844014, "grad_norm": 0.171875, "learning_rate": 0.00019998984572346308, "loss": 2.0712, "step": 68 }, { "epoch": 0.021569240387621133, "grad_norm": 0.1767578125, "learning_rate": 0.00019998949256366854, "loss": 2.0207, "step": 69 }, { "epoch": 0.02188183807439825, "grad_norm": 0.1806640625, "learning_rate": 0.00019998913336738094, "loss": 2.1334, "step": 70 }, { "epoch": 0.022194435761175368, "grad_norm": 0.7109375, "learning_rate": 0.00019998876813462192, "loss": 2.7085, "step": 71 }, { "epoch": 0.022507033447952487, "grad_norm": 0.17578125, "learning_rate": 0.00019998839686541356, "loss": 1.7364, "step": 72 }, { "epoch": 0.022819631134729602, "grad_norm": 0.18359375, "learning_rate": 0.0001999880195597783, "loss": 1.9281, "step": 73 }, { "epoch": 0.02313222882150672, "grad_norm": 0.1787109375, "learning_rate": 0.00019998763621773883, "loss": 1.9648, "step": 74 }, { "epoch": 0.023444826508283837, "grad_norm": 0.1806640625, "learning_rate": 0.00019998724683931838, "loss": 1.9874, "step": 75 }, { "epoch": 0.023757424195060956, "grad_norm": 0.171875, "learning_rate": 0.0001999868514245404, "loss": 1.785, "step": 76 }, { "epoch": 0.024070021881838075, "grad_norm": 0.181640625, "learning_rate": 0.0001999864499734288, "loss": 1.9094, "step": 77 }, { "epoch": 0.02438261956861519, "grad_norm": 0.162109375, "learning_rate": 0.00019998604248600777, "loss": 1.9723, "step": 78 }, { "epoch": 0.02469521725539231, "grad_norm": 0.1689453125, "learning_rate": 0.00019998562896230196, "loss": 1.8739, "step": 79 }, { "epoch": 0.02500781494216943, "grad_norm": 0.1845703125, "learning_rate": 0.00019998520940233636, "loss": 1.936, "step": 80 }, { "epoch": 0.025320412628946545, "grad_norm": 0.16015625, "learning_rate": 0.0001999847838061362, "loss": 1.8807, "step": 81 }, { "epoch": 0.025633010315723664, "grad_norm": 0.173828125, "learning_rate": 0.00019998435217372728, "loss": 1.7412, "step": 82 }, { "epoch": 0.025945608002500783, "grad_norm": 0.17578125, "learning_rate": 0.00019998391450513556, "loss": 1.8404, "step": 83 }, { "epoch": 0.0262582056892779, "grad_norm": 0.169921875, "learning_rate": 0.00019998347080038754, "loss": 1.8108, "step": 84 }, { "epoch": 0.026570803376055017, "grad_norm": 0.181640625, "learning_rate": 0.00019998302105950994, "loss": 2.0934, "step": 85 }, { "epoch": 0.026883401062832137, "grad_norm": 0.19140625, "learning_rate": 0.00019998256528252998, "loss": 2.0021, "step": 86 }, { "epoch": 0.027195998749609252, "grad_norm": 0.1630859375, "learning_rate": 0.00019998210346947515, "loss": 1.9675, "step": 87 }, { "epoch": 0.02750859643638637, "grad_norm": 0.1650390625, "learning_rate": 0.00019998163562037332, "loss": 1.8488, "step": 88 }, { "epoch": 0.02782119412316349, "grad_norm": 0.1806640625, "learning_rate": 0.00019998116173525272, "loss": 1.9255, "step": 89 }, { "epoch": 0.028133791809940606, "grad_norm": 0.16796875, "learning_rate": 0.000199980681814142, "loss": 2.1055, "step": 90 }, { "epoch": 0.028446389496717725, "grad_norm": 0.1748046875, "learning_rate": 0.0001999801958570701, "loss": 2.1303, "step": 91 }, { "epoch": 0.02875898718349484, "grad_norm": 0.1728515625, "learning_rate": 0.00019997970386406637, "loss": 2.0517, "step": 92 }, { "epoch": 0.02907158487027196, "grad_norm": 0.181640625, "learning_rate": 0.00019997920583516053, "loss": 1.8314, "step": 93 }, { "epoch": 0.02938418255704908, "grad_norm": 0.1845703125, "learning_rate": 0.0001999787017703826, "loss": 1.7953, "step": 94 }, { "epoch": 0.029696780243826194, "grad_norm": 0.169921875, "learning_rate": 0.00019997819166976308, "loss": 1.8238, "step": 95 }, { "epoch": 0.030009377930603313, "grad_norm": 0.173828125, "learning_rate": 0.0001999776755333327, "loss": 1.8905, "step": 96 }, { "epoch": 0.030321975617380433, "grad_norm": 0.169921875, "learning_rate": 0.00019997715336112263, "loss": 1.7594, "step": 97 }, { "epoch": 0.030634573304157548, "grad_norm": 0.171875, "learning_rate": 0.0001999766251531644, "loss": 1.9648, "step": 98 }, { "epoch": 0.030947170990934667, "grad_norm": 0.1669921875, "learning_rate": 0.00019997609090948996, "loss": 2.1577, "step": 99 }, { "epoch": 0.03125976867771178, "grad_norm": 0.1806640625, "learning_rate": 0.0001999755506301315, "loss": 2.0563, "step": 100 }, { "epoch": 0.0315723663644889, "grad_norm": 0.1669921875, "learning_rate": 0.0001999750043151216, "loss": 1.8857, "step": 101 }, { "epoch": 0.03188496405126602, "grad_norm": 0.1748046875, "learning_rate": 0.00019997445196449337, "loss": 1.8832, "step": 102 }, { "epoch": 0.03219756173804314, "grad_norm": 0.189453125, "learning_rate": 0.00019997389357828, "loss": 2.0352, "step": 103 }, { "epoch": 0.03251015942482026, "grad_norm": 0.18359375, "learning_rate": 0.00019997332915651532, "loss": 2.0126, "step": 104 }, { "epoch": 0.03282275711159737, "grad_norm": 0.1650390625, "learning_rate": 0.00019997275869923335, "loss": 2.0201, "step": 105 }, { "epoch": 0.03313535479837449, "grad_norm": 0.177734375, "learning_rate": 0.00019997218220646853, "loss": 2.3295, "step": 106 }, { "epoch": 0.03344795248515161, "grad_norm": 0.16796875, "learning_rate": 0.0001999715996782557, "loss": 1.8919, "step": 107 }, { "epoch": 0.03376055017192873, "grad_norm": 0.1806640625, "learning_rate": 0.00019997101111462998, "loss": 1.7797, "step": 108 }, { "epoch": 0.03407314785870585, "grad_norm": 0.193359375, "learning_rate": 0.00019997041651562695, "loss": 1.8956, "step": 109 }, { "epoch": 0.03438574554548297, "grad_norm": 0.1640625, "learning_rate": 0.00019996981588128244, "loss": 1.9683, "step": 110 }, { "epoch": 0.03469834323226008, "grad_norm": 0.1748046875, "learning_rate": 0.00019996920921163278, "loss": 1.727, "step": 111 }, { "epoch": 0.0350109409190372, "grad_norm": 0.1630859375, "learning_rate": 0.00019996859650671457, "loss": 1.8966, "step": 112 }, { "epoch": 0.03532353860581432, "grad_norm": 0.16796875, "learning_rate": 0.0001999679777665648, "loss": 1.8195, "step": 113 }, { "epoch": 0.035636136292591436, "grad_norm": 0.173828125, "learning_rate": 0.0001999673529912208, "loss": 1.8785, "step": 114 }, { "epoch": 0.035948733979368555, "grad_norm": 0.17578125, "learning_rate": 0.0001999667221807203, "loss": 1.9363, "step": 115 }, { "epoch": 0.03626133166614567, "grad_norm": 0.1689453125, "learning_rate": 0.00019996608533510144, "loss": 2.0314, "step": 116 }, { "epoch": 0.036573929352922786, "grad_norm": 0.17578125, "learning_rate": 0.0001999654424544026, "loss": 1.9304, "step": 117 }, { "epoch": 0.036886527039699905, "grad_norm": 0.181640625, "learning_rate": 0.0001999647935386626, "loss": 2.1456, "step": 118 }, { "epoch": 0.037199124726477024, "grad_norm": 0.1806640625, "learning_rate": 0.0001999641385879206, "loss": 1.6728, "step": 119 }, { "epoch": 0.037511722413254144, "grad_norm": 0.1650390625, "learning_rate": 0.00019996347760221624, "loss": 1.8201, "step": 120 }, { "epoch": 0.03782432010003126, "grad_norm": 0.1708984375, "learning_rate": 0.0001999628105815893, "loss": 2.096, "step": 121 }, { "epoch": 0.038136917786808375, "grad_norm": 0.171875, "learning_rate": 0.0001999621375260801, "loss": 1.9948, "step": 122 }, { "epoch": 0.038449515473585494, "grad_norm": 0.1748046875, "learning_rate": 0.0001999614584357293, "loss": 1.9562, "step": 123 }, { "epoch": 0.03876211316036261, "grad_norm": 0.173828125, "learning_rate": 0.00019996077331057788, "loss": 1.8452, "step": 124 }, { "epoch": 0.03907471084713973, "grad_norm": 0.1689453125, "learning_rate": 0.00019996008215066716, "loss": 1.7615, "step": 125 }, { "epoch": 0.03938730853391685, "grad_norm": 0.1728515625, "learning_rate": 0.00019995938495603893, "loss": 1.7628, "step": 126 }, { "epoch": 0.03969990622069397, "grad_norm": 0.1845703125, "learning_rate": 0.00019995868172673523, "loss": 2.0241, "step": 127 }, { "epoch": 0.04001250390747108, "grad_norm": 0.1689453125, "learning_rate": 0.00019995797246279856, "loss": 2.0807, "step": 128 }, { "epoch": 0.0403251015942482, "grad_norm": 0.1884765625, "learning_rate": 0.00019995725716427169, "loss": 1.8564, "step": 129 }, { "epoch": 0.04063769928102532, "grad_norm": 0.1767578125, "learning_rate": 0.00019995653583119785, "loss": 2.1278, "step": 130 }, { "epoch": 0.04095029696780244, "grad_norm": 0.1689453125, "learning_rate": 0.00019995580846362055, "loss": 2.095, "step": 131 }, { "epoch": 0.04126289465457956, "grad_norm": 0.1748046875, "learning_rate": 0.00019995507506158372, "loss": 1.6848, "step": 132 }, { "epoch": 0.04157549234135667, "grad_norm": 0.1748046875, "learning_rate": 0.00019995433562513163, "loss": 1.8979, "step": 133 }, { "epoch": 0.04188809002813379, "grad_norm": 0.427734375, "learning_rate": 0.00019995359015430894, "loss": 2.9492, "step": 134 }, { "epoch": 0.04220068771491091, "grad_norm": 0.173828125, "learning_rate": 0.0001999528386491606, "loss": 1.8951, "step": 135 }, { "epoch": 0.04251328540168803, "grad_norm": 0.17578125, "learning_rate": 0.00019995208110973206, "loss": 1.7656, "step": 136 }, { "epoch": 0.04282588308846515, "grad_norm": 0.1787109375, "learning_rate": 0.00019995131753606902, "loss": 2.0607, "step": 137 }, { "epoch": 0.043138480775242266, "grad_norm": 0.1611328125, "learning_rate": 0.00019995054792821754, "loss": 1.6803, "step": 138 }, { "epoch": 0.04345107846201938, "grad_norm": 0.1767578125, "learning_rate": 0.00019994977228622414, "loss": 2.0165, "step": 139 }, { "epoch": 0.0437636761487965, "grad_norm": 0.1787109375, "learning_rate": 0.0001999489906101356, "loss": 1.9388, "step": 140 }, { "epoch": 0.044076273835573616, "grad_norm": 0.1884765625, "learning_rate": 0.00019994820289999913, "loss": 1.6209, "step": 141 }, { "epoch": 0.044388871522350735, "grad_norm": 0.1826171875, "learning_rate": 0.0001999474091558623, "loss": 1.8157, "step": 142 }, { "epoch": 0.044701469209127855, "grad_norm": 0.171875, "learning_rate": 0.00019994660937777301, "loss": 1.7581, "step": 143 }, { "epoch": 0.045014066895904974, "grad_norm": 0.279296875, "learning_rate": 0.00019994580356577957, "loss": 2.6888, "step": 144 }, { "epoch": 0.045326664582682086, "grad_norm": 0.1796875, "learning_rate": 0.00019994499171993056, "loss": 2.0103, "step": 145 }, { "epoch": 0.045639262269459205, "grad_norm": 0.171875, "learning_rate": 0.00019994417384027507, "loss": 1.7455, "step": 146 }, { "epoch": 0.045951859956236324, "grad_norm": 0.1884765625, "learning_rate": 0.00019994334992686245, "loss": 1.9287, "step": 147 }, { "epoch": 0.04626445764301344, "grad_norm": 0.1875, "learning_rate": 0.00019994251997974241, "loss": 1.8521, "step": 148 }, { "epoch": 0.04657705532979056, "grad_norm": 0.1806640625, "learning_rate": 0.00019994168399896508, "loss": 2.0915, "step": 149 }, { "epoch": 0.046889653016567674, "grad_norm": 0.1787109375, "learning_rate": 0.00019994084198458097, "loss": 2.0972, "step": 150 }, { "epoch": 0.04720225070334479, "grad_norm": 0.1884765625, "learning_rate": 0.00019993999393664083, "loss": 2.2031, "step": 151 }, { "epoch": 0.04751484839012191, "grad_norm": 0.1748046875, "learning_rate": 0.00019993913985519592, "loss": 1.8532, "step": 152 }, { "epoch": 0.04782744607689903, "grad_norm": 0.18359375, "learning_rate": 0.0001999382797402978, "loss": 1.9278, "step": 153 }, { "epoch": 0.04814004376367615, "grad_norm": 0.1796875, "learning_rate": 0.00019993741359199834, "loss": 1.6459, "step": 154 }, { "epoch": 0.04845264145045327, "grad_norm": 0.1826171875, "learning_rate": 0.0001999365414103499, "loss": 1.8459, "step": 155 }, { "epoch": 0.04876523913723038, "grad_norm": 0.1748046875, "learning_rate": 0.0001999356631954051, "loss": 1.9833, "step": 156 }, { "epoch": 0.0490778368240075, "grad_norm": 0.177734375, "learning_rate": 0.00019993477894721698, "loss": 1.8361, "step": 157 }, { "epoch": 0.04939043451078462, "grad_norm": 0.17578125, "learning_rate": 0.0001999338886658389, "loss": 2.0878, "step": 158 }, { "epoch": 0.04970303219756174, "grad_norm": 0.1943359375, "learning_rate": 0.0001999329923513246, "loss": 1.9454, "step": 159 }, { "epoch": 0.05001562988433886, "grad_norm": 0.177734375, "learning_rate": 0.00019993209000372818, "loss": 1.982, "step": 160 }, { "epoch": 0.05032822757111598, "grad_norm": 0.1796875, "learning_rate": 0.00019993118162310415, "loss": 1.9192, "step": 161 }, { "epoch": 0.05064082525789309, "grad_norm": 0.1826171875, "learning_rate": 0.0001999302672095074, "loss": 1.8865, "step": 162 }, { "epoch": 0.05095342294467021, "grad_norm": 0.1748046875, "learning_rate": 0.00019992934676299302, "loss": 1.6733, "step": 163 }, { "epoch": 0.05126602063144733, "grad_norm": 0.169921875, "learning_rate": 0.00019992842028361665, "loss": 1.9374, "step": 164 }, { "epoch": 0.051578618318224446, "grad_norm": 0.1953125, "learning_rate": 0.0001999274877714342, "loss": 1.9537, "step": 165 }, { "epoch": 0.051891216005001566, "grad_norm": 0.17578125, "learning_rate": 0.000199926549226502, "loss": 1.8767, "step": 166 }, { "epoch": 0.05220381369177868, "grad_norm": 0.1728515625, "learning_rate": 0.00019992560464887667, "loss": 1.8994, "step": 167 }, { "epoch": 0.0525164113785558, "grad_norm": 0.185546875, "learning_rate": 0.00019992465403861524, "loss": 1.7415, "step": 168 }, { "epoch": 0.052829009065332916, "grad_norm": 0.1826171875, "learning_rate": 0.00019992369739577512, "loss": 1.7688, "step": 169 }, { "epoch": 0.053141606752110035, "grad_norm": 0.1826171875, "learning_rate": 0.00019992273472041404, "loss": 1.7507, "step": 170 }, { "epoch": 0.053454204438887154, "grad_norm": 0.1728515625, "learning_rate": 0.00019992176601259015, "loss": 1.995, "step": 171 }, { "epoch": 0.05376680212566427, "grad_norm": 0.1806640625, "learning_rate": 0.00019992079127236192, "loss": 1.9025, "step": 172 }, { "epoch": 0.054079399812441385, "grad_norm": 0.1689453125, "learning_rate": 0.0001999198104997882, "loss": 1.7634, "step": 173 }, { "epoch": 0.054391997499218504, "grad_norm": 0.1708984375, "learning_rate": 0.00019991882369492815, "loss": 1.8371, "step": 174 }, { "epoch": 0.05470459518599562, "grad_norm": 0.17578125, "learning_rate": 0.0001999178308578414, "loss": 1.7978, "step": 175 }, { "epoch": 0.05501719287277274, "grad_norm": 0.1748046875, "learning_rate": 0.0001999168319885879, "loss": 2.0066, "step": 176 }, { "epoch": 0.05532979055954986, "grad_norm": 0.17578125, "learning_rate": 0.00019991582708722792, "loss": 1.6957, "step": 177 }, { "epoch": 0.05564238824632698, "grad_norm": 0.1767578125, "learning_rate": 0.0001999148161538221, "loss": 1.8989, "step": 178 }, { "epoch": 0.05595498593310409, "grad_norm": 0.1767578125, "learning_rate": 0.00019991379918843155, "loss": 2.0687, "step": 179 }, { "epoch": 0.05626758361988121, "grad_norm": 0.1865234375, "learning_rate": 0.00019991277619111763, "loss": 1.9398, "step": 180 }, { "epoch": 0.05658018130665833, "grad_norm": 0.1904296875, "learning_rate": 0.00019991174716194203, "loss": 1.7309, "step": 181 }, { "epoch": 0.05689277899343545, "grad_norm": 0.1845703125, "learning_rate": 0.00019991071210096698, "loss": 1.8865, "step": 182 }, { "epoch": 0.05720537668021257, "grad_norm": 0.173828125, "learning_rate": 0.00019990967100825491, "loss": 1.8802, "step": 183 }, { "epoch": 0.05751797436698968, "grad_norm": 0.1826171875, "learning_rate": 0.0001999086238838687, "loss": 1.814, "step": 184 }, { "epoch": 0.0578305720537668, "grad_norm": 0.16796875, "learning_rate": 0.00019990757072787152, "loss": 1.6507, "step": 185 }, { "epoch": 0.05814316974054392, "grad_norm": 0.17578125, "learning_rate": 0.000199906511540327, "loss": 1.921, "step": 186 }, { "epoch": 0.05845576742732104, "grad_norm": 0.1923828125, "learning_rate": 0.0001999054463212991, "loss": 1.9151, "step": 187 }, { "epoch": 0.05876836511409816, "grad_norm": 0.1748046875, "learning_rate": 0.00019990437507085202, "loss": 2.0727, "step": 188 }, { "epoch": 0.05908096280087528, "grad_norm": 0.17578125, "learning_rate": 0.00019990329778905058, "loss": 2.0359, "step": 189 }, { "epoch": 0.05939356048765239, "grad_norm": 0.19921875, "learning_rate": 0.00019990221447595968, "loss": 1.9311, "step": 190 }, { "epoch": 0.05970615817442951, "grad_norm": 0.1767578125, "learning_rate": 0.00019990112513164484, "loss": 1.8018, "step": 191 }, { "epoch": 0.06001875586120663, "grad_norm": 0.1826171875, "learning_rate": 0.00019990002975617174, "loss": 1.9104, "step": 192 }, { "epoch": 0.060331353547983746, "grad_norm": 0.1806640625, "learning_rate": 0.00019989892834960656, "loss": 1.7227, "step": 193 }, { "epoch": 0.060643951234760865, "grad_norm": 0.1787109375, "learning_rate": 0.00019989782091201573, "loss": 1.7287, "step": 194 }, { "epoch": 0.06095654892153798, "grad_norm": 0.181640625, "learning_rate": 0.0001998967074434662, "loss": 1.8525, "step": 195 }, { "epoch": 0.061269146608315096, "grad_norm": 0.439453125, "learning_rate": 0.00019989558794402515, "loss": 2.4259, "step": 196 }, { "epoch": 0.061581744295092215, "grad_norm": 0.1865234375, "learning_rate": 0.0001998944624137601, "loss": 2.1134, "step": 197 }, { "epoch": 0.061894341981869334, "grad_norm": 0.169921875, "learning_rate": 0.0001998933308527391, "loss": 1.9239, "step": 198 }, { "epoch": 0.06220693966864645, "grad_norm": 0.1806640625, "learning_rate": 0.0001998921932610304, "loss": 1.7292, "step": 199 }, { "epoch": 0.06251953735542357, "grad_norm": 0.1826171875, "learning_rate": 0.0001998910496387027, "loss": 1.7629, "step": 200 }, { "epoch": 0.06283213504220068, "grad_norm": 0.1962890625, "learning_rate": 0.00019988989998582506, "loss": 2.005, "step": 201 }, { "epoch": 0.0631447327289778, "grad_norm": 0.173828125, "learning_rate": 0.00019988874430246686, "loss": 1.7605, "step": 202 }, { "epoch": 0.06345733041575492, "grad_norm": 0.1787109375, "learning_rate": 0.0001998875825886979, "loss": 1.748, "step": 203 }, { "epoch": 0.06376992810253204, "grad_norm": 0.1787109375, "learning_rate": 0.00019988641484458826, "loss": 2.1037, "step": 204 }, { "epoch": 0.06408252578930916, "grad_norm": 0.181640625, "learning_rate": 0.00019988524107020846, "loss": 1.9274, "step": 205 }, { "epoch": 0.06439512347608628, "grad_norm": 0.173828125, "learning_rate": 0.00019988406126562937, "loss": 1.7823, "step": 206 }, { "epoch": 0.0647077211628634, "grad_norm": 0.1796875, "learning_rate": 0.00019988287543092225, "loss": 2.06, "step": 207 }, { "epoch": 0.06502031884964052, "grad_norm": 0.193359375, "learning_rate": 0.00019988168356615865, "loss": 1.9327, "step": 208 }, { "epoch": 0.06533291653641764, "grad_norm": 0.17578125, "learning_rate": 0.00019988048567141052, "loss": 1.9889, "step": 209 }, { "epoch": 0.06564551422319474, "grad_norm": 0.1787109375, "learning_rate": 0.00019987928174675023, "loss": 1.6262, "step": 210 }, { "epoch": 0.06595811190997186, "grad_norm": 0.173828125, "learning_rate": 0.00019987807179225035, "loss": 1.8805, "step": 211 }, { "epoch": 0.06627070959674898, "grad_norm": 0.181640625, "learning_rate": 0.00019987685580798403, "loss": 1.7265, "step": 212 }, { "epoch": 0.0665833072835261, "grad_norm": 0.17578125, "learning_rate": 0.0001998756337940247, "loss": 1.7049, "step": 213 }, { "epoch": 0.06689590497030322, "grad_norm": 0.173828125, "learning_rate": 0.00019987440575044602, "loss": 1.7256, "step": 214 }, { "epoch": 0.06720850265708034, "grad_norm": 0.1748046875, "learning_rate": 0.00019987317167732222, "loss": 1.9469, "step": 215 }, { "epoch": 0.06752110034385746, "grad_norm": 0.177734375, "learning_rate": 0.00019987193157472777, "loss": 2.0254, "step": 216 }, { "epoch": 0.06783369803063458, "grad_norm": 0.1904296875, "learning_rate": 0.00019987068544273756, "loss": 2.1006, "step": 217 }, { "epoch": 0.0681462957174117, "grad_norm": 0.1767578125, "learning_rate": 0.00019986943328142678, "loss": 1.9486, "step": 218 }, { "epoch": 0.06845889340418881, "grad_norm": 0.181640625, "learning_rate": 0.00019986817509087107, "loss": 1.9707, "step": 219 }, { "epoch": 0.06877149109096593, "grad_norm": 0.169921875, "learning_rate": 0.00019986691087114635, "loss": 1.868, "step": 220 }, { "epoch": 0.06908408877774304, "grad_norm": 0.181640625, "learning_rate": 0.00019986564062232897, "loss": 1.9028, "step": 221 }, { "epoch": 0.06939668646452016, "grad_norm": 0.1787109375, "learning_rate": 0.0001998643643444956, "loss": 1.9136, "step": 222 }, { "epoch": 0.06970928415129728, "grad_norm": 0.181640625, "learning_rate": 0.0001998630820377233, "loss": 1.8039, "step": 223 }, { "epoch": 0.0700218818380744, "grad_norm": 0.1884765625, "learning_rate": 0.00019986179370208947, "loss": 1.7326, "step": 224 }, { "epoch": 0.07033447952485151, "grad_norm": 0.169921875, "learning_rate": 0.0001998604993376719, "loss": 1.7712, "step": 225 }, { "epoch": 0.07064707721162863, "grad_norm": 0.1826171875, "learning_rate": 0.00019985919894454875, "loss": 1.9061, "step": 226 }, { "epoch": 0.07095967489840575, "grad_norm": 0.181640625, "learning_rate": 0.00019985789252279846, "loss": 1.8444, "step": 227 }, { "epoch": 0.07127227258518287, "grad_norm": 0.1787109375, "learning_rate": 0.0001998565800725, "loss": 2.1696, "step": 228 }, { "epoch": 0.07158487027195999, "grad_norm": 0.19140625, "learning_rate": 0.00019985526159373255, "loss": 1.9888, "step": 229 }, { "epoch": 0.07189746795873711, "grad_norm": 0.1865234375, "learning_rate": 0.00019985393708657568, "loss": 2.018, "step": 230 }, { "epoch": 0.07221006564551423, "grad_norm": 0.18359375, "learning_rate": 0.0001998526065511094, "loss": 1.7847, "step": 231 }, { "epoch": 0.07252266333229133, "grad_norm": 0.1826171875, "learning_rate": 0.00019985126998741404, "loss": 1.879, "step": 232 }, { "epoch": 0.07283526101906845, "grad_norm": 0.177734375, "learning_rate": 0.00019984992739557024, "loss": 1.7065, "step": 233 }, { "epoch": 0.07314785870584557, "grad_norm": 0.1806640625, "learning_rate": 0.00019984857877565907, "loss": 1.7451, "step": 234 }, { "epoch": 0.07346045639262269, "grad_norm": 0.173828125, "learning_rate": 0.000199847224127762, "loss": 1.8228, "step": 235 }, { "epoch": 0.07377305407939981, "grad_norm": 0.1806640625, "learning_rate": 0.00019984586345196074, "loss": 1.9904, "step": 236 }, { "epoch": 0.07408565176617693, "grad_norm": 0.1767578125, "learning_rate": 0.0001998444967483375, "loss": 1.8958, "step": 237 }, { "epoch": 0.07439824945295405, "grad_norm": 0.1845703125, "learning_rate": 0.00019984312401697473, "loss": 1.8913, "step": 238 }, { "epoch": 0.07471084713973117, "grad_norm": 0.193359375, "learning_rate": 0.00019984174525795536, "loss": 1.9273, "step": 239 }, { "epoch": 0.07502344482650829, "grad_norm": 0.189453125, "learning_rate": 0.00019984036047136257, "loss": 1.8831, "step": 240 }, { "epoch": 0.0753360425132854, "grad_norm": 0.19140625, "learning_rate": 0.00019983896965728001, "loss": 1.9506, "step": 241 }, { "epoch": 0.07564864020006253, "grad_norm": 0.173828125, "learning_rate": 0.00019983757281579162, "loss": 1.971, "step": 242 }, { "epoch": 0.07596123788683964, "grad_norm": 0.1865234375, "learning_rate": 0.00019983616994698173, "loss": 1.8156, "step": 243 }, { "epoch": 0.07627383557361675, "grad_norm": 0.1826171875, "learning_rate": 0.00019983476105093505, "loss": 1.9397, "step": 244 }, { "epoch": 0.07658643326039387, "grad_norm": 0.177734375, "learning_rate": 0.00019983334612773662, "loss": 1.7567, "step": 245 }, { "epoch": 0.07689903094717099, "grad_norm": 0.1767578125, "learning_rate": 0.00019983192517747186, "loss": 1.8685, "step": 246 }, { "epoch": 0.0772116286339481, "grad_norm": 0.1767578125, "learning_rate": 0.00019983049820022656, "loss": 2.2285, "step": 247 }, { "epoch": 0.07752422632072523, "grad_norm": 0.193359375, "learning_rate": 0.00019982906519608687, "loss": 1.9532, "step": 248 }, { "epoch": 0.07783682400750234, "grad_norm": 0.1767578125, "learning_rate": 0.0001998276261651393, "loss": 1.8775, "step": 249 }, { "epoch": 0.07814942169427946, "grad_norm": 0.18359375, "learning_rate": 0.00019982618110747074, "loss": 1.892, "step": 250 }, { "epoch": 0.07846201938105658, "grad_norm": 0.1787109375, "learning_rate": 0.00019982473002316838, "loss": 2.2827, "step": 251 }, { "epoch": 0.0787746170678337, "grad_norm": 0.185546875, "learning_rate": 0.0001998232729123199, "loss": 2.1452, "step": 252 }, { "epoch": 0.07908721475461082, "grad_norm": 0.1845703125, "learning_rate": 0.00019982180977501322, "loss": 1.7888, "step": 253 }, { "epoch": 0.07939981244138794, "grad_norm": 0.1845703125, "learning_rate": 0.00019982034061133666, "loss": 1.7486, "step": 254 }, { "epoch": 0.07971241012816505, "grad_norm": 0.1728515625, "learning_rate": 0.00019981886542137892, "loss": 1.8143, "step": 255 }, { "epoch": 0.08002500781494216, "grad_norm": 0.18359375, "learning_rate": 0.00019981738420522913, "loss": 1.839, "step": 256 }, { "epoch": 0.08033760550171928, "grad_norm": 0.169921875, "learning_rate": 0.00019981589696297663, "loss": 1.918, "step": 257 }, { "epoch": 0.0806502031884964, "grad_norm": 0.19140625, "learning_rate": 0.00019981440369471124, "loss": 1.9144, "step": 258 }, { "epoch": 0.08096280087527352, "grad_norm": 0.1826171875, "learning_rate": 0.00019981290440052306, "loss": 1.7846, "step": 259 }, { "epoch": 0.08127539856205064, "grad_norm": 0.1826171875, "learning_rate": 0.0001998113990805027, "loss": 1.9837, "step": 260 }, { "epoch": 0.08158799624882776, "grad_norm": 0.1875, "learning_rate": 0.00019980988773474098, "loss": 1.9422, "step": 261 }, { "epoch": 0.08190059393560488, "grad_norm": 0.1845703125, "learning_rate": 0.00019980837036332917, "loss": 1.7637, "step": 262 }, { "epoch": 0.082213191622382, "grad_norm": 0.67578125, "learning_rate": 0.0001998068469663588, "loss": 2.5924, "step": 263 }, { "epoch": 0.08252578930915912, "grad_norm": 0.185546875, "learning_rate": 0.0001998053175439219, "loss": 1.8041, "step": 264 }, { "epoch": 0.08283838699593624, "grad_norm": 0.19921875, "learning_rate": 0.00019980378209611083, "loss": 2.139, "step": 265 }, { "epoch": 0.08315098468271334, "grad_norm": 0.1904296875, "learning_rate": 0.0001998022406230182, "loss": 1.8233, "step": 266 }, { "epoch": 0.08346358236949046, "grad_norm": 0.2021484375, "learning_rate": 0.0001998006931247372, "loss": 1.9227, "step": 267 }, { "epoch": 0.08377618005626758, "grad_norm": 0.19140625, "learning_rate": 0.00019979913960136114, "loss": 1.7389, "step": 268 }, { "epoch": 0.0840887777430447, "grad_norm": 0.1826171875, "learning_rate": 0.00019979758005298385, "loss": 1.6342, "step": 269 }, { "epoch": 0.08440137542982182, "grad_norm": 0.181640625, "learning_rate": 0.0001997960144796995, "loss": 1.9472, "step": 270 }, { "epoch": 0.08471397311659894, "grad_norm": 0.18359375, "learning_rate": 0.00019979444288160253, "loss": 1.7985, "step": 271 }, { "epoch": 0.08502657080337606, "grad_norm": 0.1806640625, "learning_rate": 0.00019979286525878792, "loss": 1.8546, "step": 272 }, { "epoch": 0.08533916849015317, "grad_norm": 0.19140625, "learning_rate": 0.00019979128161135083, "loss": 1.9697, "step": 273 }, { "epoch": 0.0856517661769303, "grad_norm": 0.2001953125, "learning_rate": 0.00019978969193938694, "loss": 2.095, "step": 274 }, { "epoch": 0.08596436386370741, "grad_norm": 0.1884765625, "learning_rate": 0.00019978809624299218, "loss": 1.9491, "step": 275 }, { "epoch": 0.08627696155048453, "grad_norm": 0.1787109375, "learning_rate": 0.00019978649452226285, "loss": 1.9463, "step": 276 }, { "epoch": 0.08658955923726164, "grad_norm": 0.1865234375, "learning_rate": 0.00019978488677729574, "loss": 1.8981, "step": 277 }, { "epoch": 0.08690215692403876, "grad_norm": 0.1923828125, "learning_rate": 0.00019978327300818784, "loss": 1.9126, "step": 278 }, { "epoch": 0.08721475461081588, "grad_norm": 0.18359375, "learning_rate": 0.0001997816532150366, "loss": 1.8987, "step": 279 }, { "epoch": 0.087527352297593, "grad_norm": 0.201171875, "learning_rate": 0.00019978002739793978, "loss": 1.7486, "step": 280 }, { "epoch": 0.08783994998437011, "grad_norm": 0.2041015625, "learning_rate": 0.00019977839555699553, "loss": 1.9603, "step": 281 }, { "epoch": 0.08815254767114723, "grad_norm": 0.19140625, "learning_rate": 0.00019977675769230246, "loss": 1.8714, "step": 282 }, { "epoch": 0.08846514535792435, "grad_norm": 0.2001953125, "learning_rate": 0.00019977511380395933, "loss": 2.0087, "step": 283 }, { "epoch": 0.08877774304470147, "grad_norm": 0.177734375, "learning_rate": 0.00019977346389206545, "loss": 2.1653, "step": 284 }, { "epoch": 0.08909034073147859, "grad_norm": 0.1845703125, "learning_rate": 0.00019977180795672044, "loss": 2.0311, "step": 285 }, { "epoch": 0.08940293841825571, "grad_norm": 0.1826171875, "learning_rate": 0.00019977014599802418, "loss": 1.8212, "step": 286 }, { "epoch": 0.08971553610503283, "grad_norm": 0.193359375, "learning_rate": 0.00019976847801607712, "loss": 2.0245, "step": 287 }, { "epoch": 0.09002813379180995, "grad_norm": 0.1806640625, "learning_rate": 0.0001997668040109799, "loss": 1.8573, "step": 288 }, { "epoch": 0.09034073147858705, "grad_norm": 0.1806640625, "learning_rate": 0.00019976512398283357, "loss": 1.7208, "step": 289 }, { "epoch": 0.09065332916536417, "grad_norm": 0.181640625, "learning_rate": 0.00019976343793173958, "loss": 1.7056, "step": 290 }, { "epoch": 0.09096592685214129, "grad_norm": 0.1806640625, "learning_rate": 0.00019976174585779972, "loss": 1.8874, "step": 291 }, { "epoch": 0.09127852453891841, "grad_norm": 0.181640625, "learning_rate": 0.00019976004776111613, "loss": 1.5886, "step": 292 }, { "epoch": 0.09159112222569553, "grad_norm": 0.181640625, "learning_rate": 0.00019975834364179134, "loss": 1.7725, "step": 293 }, { "epoch": 0.09190371991247265, "grad_norm": 0.189453125, "learning_rate": 0.0001997566334999282, "loss": 1.7855, "step": 294 }, { "epoch": 0.09221631759924977, "grad_norm": 0.1875, "learning_rate": 0.00019975491733563, "loss": 1.7919, "step": 295 }, { "epoch": 0.09252891528602689, "grad_norm": 0.185546875, "learning_rate": 0.00019975319514900028, "loss": 1.7353, "step": 296 }, { "epoch": 0.092841512972804, "grad_norm": 0.1826171875, "learning_rate": 0.00019975146694014312, "loss": 1.8983, "step": 297 }, { "epoch": 0.09315411065958112, "grad_norm": 0.185546875, "learning_rate": 0.00019974973270916273, "loss": 2.115, "step": 298 }, { "epoch": 0.09346670834635824, "grad_norm": 0.177734375, "learning_rate": 0.00019974799245616387, "loss": 1.9605, "step": 299 }, { "epoch": 0.09377930603313535, "grad_norm": 0.1953125, "learning_rate": 0.0001997462461812516, "loss": 1.9963, "step": 300 }, { "epoch": 0.09409190371991247, "grad_norm": 0.189453125, "learning_rate": 0.00019974449388453135, "loss": 1.8288, "step": 301 }, { "epoch": 0.09440450140668959, "grad_norm": 0.1904296875, "learning_rate": 0.0001997427355661089, "loss": 1.7948, "step": 302 }, { "epoch": 0.0947170990934667, "grad_norm": 0.1826171875, "learning_rate": 0.0001997409712260904, "loss": 1.868, "step": 303 }, { "epoch": 0.09502969678024382, "grad_norm": 0.1708984375, "learning_rate": 0.00019973920086458237, "loss": 1.8929, "step": 304 }, { "epoch": 0.09534229446702094, "grad_norm": 0.1796875, "learning_rate": 0.00019973742448169165, "loss": 1.6884, "step": 305 }, { "epoch": 0.09565489215379806, "grad_norm": 0.1826171875, "learning_rate": 0.00019973564207752554, "loss": 1.6901, "step": 306 }, { "epoch": 0.09596748984057518, "grad_norm": 0.1875, "learning_rate": 0.00019973385365219164, "loss": 1.7943, "step": 307 }, { "epoch": 0.0962800875273523, "grad_norm": 0.1875, "learning_rate": 0.0001997320592057979, "loss": 1.9581, "step": 308 }, { "epoch": 0.09659268521412942, "grad_norm": 0.1845703125, "learning_rate": 0.00019973025873845263, "loss": 1.6522, "step": 309 }, { "epoch": 0.09690528290090654, "grad_norm": 0.189453125, "learning_rate": 0.00019972845225026456, "loss": 1.9327, "step": 310 }, { "epoch": 0.09721788058768364, "grad_norm": 0.18359375, "learning_rate": 0.00019972663974134275, "loss": 1.9542, "step": 311 }, { "epoch": 0.09753047827446076, "grad_norm": 0.189453125, "learning_rate": 0.00019972482121179664, "loss": 2.0571, "step": 312 }, { "epoch": 0.09784307596123788, "grad_norm": 0.181640625, "learning_rate": 0.00019972299666173594, "loss": 2.2707, "step": 313 }, { "epoch": 0.098155673648015, "grad_norm": 0.185546875, "learning_rate": 0.0001997211660912709, "loss": 1.9587, "step": 314 }, { "epoch": 0.09846827133479212, "grad_norm": 0.189453125, "learning_rate": 0.00019971932950051198, "loss": 2.0126, "step": 315 }, { "epoch": 0.09878086902156924, "grad_norm": 0.1806640625, "learning_rate": 0.00019971748688957003, "loss": 1.7935, "step": 316 }, { "epoch": 0.09909346670834636, "grad_norm": 0.18359375, "learning_rate": 0.00019971563825855638, "loss": 1.8761, "step": 317 }, { "epoch": 0.09940606439512348, "grad_norm": 0.19921875, "learning_rate": 0.00019971378360758254, "loss": 2.2404, "step": 318 }, { "epoch": 0.0997186620819006, "grad_norm": 0.177734375, "learning_rate": 0.0001997119229367605, "loss": 1.8394, "step": 319 }, { "epoch": 0.10003125976867772, "grad_norm": 0.1845703125, "learning_rate": 0.00019971005624620265, "loss": 1.8923, "step": 320 }, { "epoch": 0.10034385745545484, "grad_norm": 0.1953125, "learning_rate": 0.00019970818353602163, "loss": 1.6077, "step": 321 }, { "epoch": 0.10065645514223195, "grad_norm": 0.1806640625, "learning_rate": 0.00019970630480633047, "loss": 1.8617, "step": 322 }, { "epoch": 0.10096905282900906, "grad_norm": 0.1845703125, "learning_rate": 0.0001997044200572427, "loss": 1.892, "step": 323 }, { "epoch": 0.10128165051578618, "grad_norm": 0.181640625, "learning_rate": 0.000199702529288872, "loss": 1.7457, "step": 324 }, { "epoch": 0.1015942482025633, "grad_norm": 0.173828125, "learning_rate": 0.00019970063250133256, "loss": 1.9309, "step": 325 }, { "epoch": 0.10190684588934042, "grad_norm": 0.1923828125, "learning_rate": 0.00019969872969473888, "loss": 1.905, "step": 326 }, { "epoch": 0.10221944357611754, "grad_norm": 0.1826171875, "learning_rate": 0.00019969682086920585, "loss": 1.697, "step": 327 }, { "epoch": 0.10253204126289465, "grad_norm": 0.1865234375, "learning_rate": 0.0001996949060248487, "loss": 1.8728, "step": 328 }, { "epoch": 0.10284463894967177, "grad_norm": 0.1796875, "learning_rate": 0.00019969298516178303, "loss": 1.7783, "step": 329 }, { "epoch": 0.10315723663644889, "grad_norm": 0.1806640625, "learning_rate": 0.0001996910582801248, "loss": 1.8591, "step": 330 }, { "epoch": 0.10346983432322601, "grad_norm": 0.181640625, "learning_rate": 0.00019968912537999034, "loss": 1.8009, "step": 331 }, { "epoch": 0.10378243201000313, "grad_norm": 0.177734375, "learning_rate": 0.00019968718646149635, "loss": 1.6679, "step": 332 }, { "epoch": 0.10409502969678025, "grad_norm": 0.1787109375, "learning_rate": 0.00019968524152475986, "loss": 1.9598, "step": 333 }, { "epoch": 0.10440762738355736, "grad_norm": 0.185546875, "learning_rate": 0.00019968329056989836, "loss": 1.7525, "step": 334 }, { "epoch": 0.10472022507033447, "grad_norm": 0.1875, "learning_rate": 0.00019968133359702956, "loss": 1.9891, "step": 335 }, { "epoch": 0.1050328227571116, "grad_norm": 0.27734375, "learning_rate": 0.00019967937060627163, "loss": 2.6398, "step": 336 }, { "epoch": 0.10534542044388871, "grad_norm": 0.1884765625, "learning_rate": 0.00019967740159774304, "loss": 1.8126, "step": 337 }, { "epoch": 0.10565801813066583, "grad_norm": 0.1845703125, "learning_rate": 0.0001996754265715627, "loss": 1.5844, "step": 338 }, { "epoch": 0.10597061581744295, "grad_norm": 0.443359375, "learning_rate": 0.00019967344552784987, "loss": 2.6948, "step": 339 }, { "epoch": 0.10628321350422007, "grad_norm": 0.1796875, "learning_rate": 0.00019967145846672412, "loss": 1.8124, "step": 340 }, { "epoch": 0.10659581119099719, "grad_norm": 0.17578125, "learning_rate": 0.00019966946538830537, "loss": 1.7512, "step": 341 }, { "epoch": 0.10690840887777431, "grad_norm": 0.203125, "learning_rate": 0.00019966746629271402, "loss": 1.886, "step": 342 }, { "epoch": 0.10722100656455143, "grad_norm": 0.1865234375, "learning_rate": 0.0001996654611800707, "loss": 1.8067, "step": 343 }, { "epoch": 0.10753360425132855, "grad_norm": 0.185546875, "learning_rate": 0.0001996634500504965, "loss": 1.8013, "step": 344 }, { "epoch": 0.10784620193810565, "grad_norm": 0.1904296875, "learning_rate": 0.00019966143290411282, "loss": 1.701, "step": 345 }, { "epoch": 0.10815879962488277, "grad_norm": 0.1953125, "learning_rate": 0.00019965940974104145, "loss": 1.6386, "step": 346 }, { "epoch": 0.10847139731165989, "grad_norm": 0.1962890625, "learning_rate": 0.0001996573805614045, "loss": 1.9652, "step": 347 }, { "epoch": 0.10878399499843701, "grad_norm": 0.189453125, "learning_rate": 0.0001996553453653245, "loss": 1.8178, "step": 348 }, { "epoch": 0.10909659268521413, "grad_norm": 0.177734375, "learning_rate": 0.00019965330415292428, "loss": 1.8802, "step": 349 }, { "epoch": 0.10940919037199125, "grad_norm": 0.1767578125, "learning_rate": 0.0001996512569243271, "loss": 1.6879, "step": 350 }, { "epoch": 0.10972178805876837, "grad_norm": 0.1923828125, "learning_rate": 0.0001996492036796566, "loss": 1.8288, "step": 351 }, { "epoch": 0.11003438574554548, "grad_norm": 0.1826171875, "learning_rate": 0.00019964714441903663, "loss": 1.8453, "step": 352 }, { "epoch": 0.1103469834323226, "grad_norm": 0.1904296875, "learning_rate": 0.00019964507914259157, "loss": 1.8259, "step": 353 }, { "epoch": 0.11065958111909972, "grad_norm": 0.1884765625, "learning_rate": 0.00019964300785044615, "loss": 1.9748, "step": 354 }, { "epoch": 0.11097217880587684, "grad_norm": 0.18359375, "learning_rate": 0.00019964093054272535, "loss": 2.0296, "step": 355 }, { "epoch": 0.11128477649265396, "grad_norm": 0.1806640625, "learning_rate": 0.0001996388472195546, "loss": 2.1065, "step": 356 }, { "epoch": 0.11159737417943107, "grad_norm": 0.19140625, "learning_rate": 0.00019963675788105967, "loss": 1.712, "step": 357 }, { "epoch": 0.11190997186620819, "grad_norm": 0.173828125, "learning_rate": 0.0001996346625273667, "loss": 2.178, "step": 358 }, { "epoch": 0.1122225695529853, "grad_norm": 0.2001953125, "learning_rate": 0.00019963256115860219, "loss": 1.6854, "step": 359 }, { "epoch": 0.11253516723976242, "grad_norm": 0.1845703125, "learning_rate": 0.00019963045377489297, "loss": 1.7912, "step": 360 }, { "epoch": 0.11284776492653954, "grad_norm": 0.181640625, "learning_rate": 0.00019962834037636634, "loss": 1.7385, "step": 361 }, { "epoch": 0.11316036261331666, "grad_norm": 0.19140625, "learning_rate": 0.00019962622096314983, "loss": 1.787, "step": 362 }, { "epoch": 0.11347296030009378, "grad_norm": 0.185546875, "learning_rate": 0.00019962409553537141, "loss": 1.7083, "step": 363 }, { "epoch": 0.1137855579868709, "grad_norm": 0.177734375, "learning_rate": 0.00019962196409315937, "loss": 1.7489, "step": 364 }, { "epoch": 0.11409815567364802, "grad_norm": 0.1875, "learning_rate": 0.00019961982663664244, "loss": 1.8184, "step": 365 }, { "epoch": 0.11441075336042514, "grad_norm": 0.181640625, "learning_rate": 0.0001996176831659496, "loss": 1.924, "step": 366 }, { "epoch": 0.11472335104720226, "grad_norm": 0.1904296875, "learning_rate": 0.0001996155336812103, "loss": 2.1837, "step": 367 }, { "epoch": 0.11503594873397936, "grad_norm": 0.1806640625, "learning_rate": 0.00019961337818255424, "loss": 1.9305, "step": 368 }, { "epoch": 0.11534854642075648, "grad_norm": 0.1923828125, "learning_rate": 0.00019961121667011166, "loss": 1.9867, "step": 369 }, { "epoch": 0.1156611441075336, "grad_norm": 0.1904296875, "learning_rate": 0.00019960904914401298, "loss": 1.968, "step": 370 }, { "epoch": 0.11597374179431072, "grad_norm": 0.1845703125, "learning_rate": 0.00019960687560438908, "loss": 1.6922, "step": 371 }, { "epoch": 0.11628633948108784, "grad_norm": 0.169921875, "learning_rate": 0.00019960469605137114, "loss": 1.7978, "step": 372 }, { "epoch": 0.11659893716786496, "grad_norm": 0.189453125, "learning_rate": 0.0001996025104850908, "loss": 1.8674, "step": 373 }, { "epoch": 0.11691153485464208, "grad_norm": 0.1796875, "learning_rate": 0.00019960031890567997, "loss": 1.7445, "step": 374 }, { "epoch": 0.1172241325414192, "grad_norm": 0.185546875, "learning_rate": 0.00019959812131327095, "loss": 1.7513, "step": 375 }, { "epoch": 0.11753673022819631, "grad_norm": 0.1923828125, "learning_rate": 0.00019959591770799643, "loss": 1.7463, "step": 376 }, { "epoch": 0.11784932791497343, "grad_norm": 0.189453125, "learning_rate": 0.00019959370808998945, "loss": 1.6496, "step": 377 }, { "epoch": 0.11816192560175055, "grad_norm": 0.18359375, "learning_rate": 0.0001995914924593834, "loss": 1.6407, "step": 378 }, { "epoch": 0.11847452328852766, "grad_norm": 0.19140625, "learning_rate": 0.00019958927081631205, "loss": 1.9992, "step": 379 }, { "epoch": 0.11878712097530478, "grad_norm": 0.1875, "learning_rate": 0.0001995870431609095, "loss": 1.7538, "step": 380 }, { "epoch": 0.1190997186620819, "grad_norm": 0.1845703125, "learning_rate": 0.00019958480949331024, "loss": 1.6851, "step": 381 }, { "epoch": 0.11941231634885902, "grad_norm": 0.189453125, "learning_rate": 0.00019958256981364916, "loss": 1.7887, "step": 382 }, { "epoch": 0.11972491403563613, "grad_norm": 0.181640625, "learning_rate": 0.00019958032412206142, "loss": 1.8162, "step": 383 }, { "epoch": 0.12003751172241325, "grad_norm": 0.1875, "learning_rate": 0.0001995780724186826, "loss": 1.8541, "step": 384 }, { "epoch": 0.12035010940919037, "grad_norm": 0.1875, "learning_rate": 0.00019957581470364869, "loss": 1.8194, "step": 385 }, { "epoch": 0.12066270709596749, "grad_norm": 0.20703125, "learning_rate": 0.0001995735509770959, "loss": 1.7891, "step": 386 }, { "epoch": 0.12097530478274461, "grad_norm": 0.1884765625, "learning_rate": 0.00019957128123916103, "loss": 1.992, "step": 387 }, { "epoch": 0.12128790246952173, "grad_norm": 0.1845703125, "learning_rate": 0.00019956900548998097, "loss": 1.9259, "step": 388 }, { "epoch": 0.12160050015629885, "grad_norm": 0.1845703125, "learning_rate": 0.00019956672372969315, "loss": 2.0642, "step": 389 }, { "epoch": 0.12191309784307595, "grad_norm": 0.1826171875, "learning_rate": 0.0001995644359584354, "loss": 1.6211, "step": 390 }, { "epoch": 0.12222569552985307, "grad_norm": 0.1904296875, "learning_rate": 0.00019956214217634575, "loss": 1.7604, "step": 391 }, { "epoch": 0.12253829321663019, "grad_norm": 0.177734375, "learning_rate": 0.00019955984238356268, "loss": 1.8761, "step": 392 }, { "epoch": 0.12285089090340731, "grad_norm": 0.193359375, "learning_rate": 0.0001995575365802251, "loss": 2.0069, "step": 393 }, { "epoch": 0.12316348859018443, "grad_norm": 0.1845703125, "learning_rate": 0.0001995552247664721, "loss": 1.7372, "step": 394 }, { "epoch": 0.12347608627696155, "grad_norm": 0.1923828125, "learning_rate": 0.00019955290694244338, "loss": 1.8025, "step": 395 }, { "epoch": 0.12378868396373867, "grad_norm": 0.1884765625, "learning_rate": 0.00019955058310827878, "loss": 1.8633, "step": 396 }, { "epoch": 0.12410128165051579, "grad_norm": 0.1787109375, "learning_rate": 0.00019954825326411863, "loss": 1.9765, "step": 397 }, { "epoch": 0.1244138793372929, "grad_norm": 0.197265625, "learning_rate": 0.0001995459174101036, "loss": 1.6959, "step": 398 }, { "epoch": 0.12472647702407003, "grad_norm": 0.1865234375, "learning_rate": 0.0001995435755463746, "loss": 1.6401, "step": 399 }, { "epoch": 0.12503907471084713, "grad_norm": 0.185546875, "learning_rate": 0.00019954122767307318, "loss": 2.1424, "step": 400 }, { "epoch": 0.12535167239762426, "grad_norm": 0.17578125, "learning_rate": 0.00019953887379034094, "loss": 1.9393, "step": 401 }, { "epoch": 0.12566427008440137, "grad_norm": 0.193359375, "learning_rate": 0.00019953651389832008, "loss": 1.8414, "step": 402 }, { "epoch": 0.1259768677711785, "grad_norm": 0.2001953125, "learning_rate": 0.00019953414799715304, "loss": 1.9348, "step": 403 }, { "epoch": 0.1262894654579556, "grad_norm": 0.1904296875, "learning_rate": 0.00019953177608698263, "loss": 1.6774, "step": 404 }, { "epoch": 0.12660206314473274, "grad_norm": 0.18359375, "learning_rate": 0.00019952939816795205, "loss": 1.9635, "step": 405 }, { "epoch": 0.12691466083150985, "grad_norm": 0.189453125, "learning_rate": 0.0001995270142402049, "loss": 1.788, "step": 406 }, { "epoch": 0.12722725851828695, "grad_norm": 0.177734375, "learning_rate": 0.00019952462430388506, "loss": 1.7256, "step": 407 }, { "epoch": 0.12753985620506408, "grad_norm": 0.1884765625, "learning_rate": 0.00019952222835913682, "loss": 1.8476, "step": 408 }, { "epoch": 0.1278524538918412, "grad_norm": 0.19140625, "learning_rate": 0.00019951982640610484, "loss": 1.9212, "step": 409 }, { "epoch": 0.12816505157861832, "grad_norm": 0.2001953125, "learning_rate": 0.00019951741844493413, "loss": 1.807, "step": 410 }, { "epoch": 0.12847764926539543, "grad_norm": 0.1865234375, "learning_rate": 0.00019951500447577003, "loss": 1.6015, "step": 411 }, { "epoch": 0.12879024695217256, "grad_norm": 0.1845703125, "learning_rate": 0.00019951258449875828, "loss": 1.8802, "step": 412 }, { "epoch": 0.12910284463894967, "grad_norm": 0.1884765625, "learning_rate": 0.00019951015851404504, "loss": 1.9614, "step": 413 }, { "epoch": 0.1294154423257268, "grad_norm": 0.197265625, "learning_rate": 0.0001995077265217767, "loss": 1.8907, "step": 414 }, { "epoch": 0.1297280400125039, "grad_norm": 0.197265625, "learning_rate": 0.00019950528852210014, "loss": 1.8123, "step": 415 }, { "epoch": 0.13004063769928104, "grad_norm": 0.18359375, "learning_rate": 0.00019950284451516245, "loss": 1.6966, "step": 416 }, { "epoch": 0.13035323538605814, "grad_norm": 0.1865234375, "learning_rate": 0.00019950039450111127, "loss": 2.0439, "step": 417 }, { "epoch": 0.13066583307283527, "grad_norm": 0.185546875, "learning_rate": 0.00019949793848009448, "loss": 1.9781, "step": 418 }, { "epoch": 0.13097843075961238, "grad_norm": 0.1884765625, "learning_rate": 0.00019949547645226035, "loss": 1.9264, "step": 419 }, { "epoch": 0.13129102844638948, "grad_norm": 0.197265625, "learning_rate": 0.00019949300841775753, "loss": 2.0297, "step": 420 }, { "epoch": 0.13160362613316662, "grad_norm": 0.19140625, "learning_rate": 0.000199490534376735, "loss": 1.9136, "step": 421 }, { "epoch": 0.13191622381994372, "grad_norm": 0.1904296875, "learning_rate": 0.00019948805432934213, "loss": 1.8224, "step": 422 }, { "epoch": 0.13222882150672086, "grad_norm": 0.1923828125, "learning_rate": 0.00019948556827572862, "loss": 1.7871, "step": 423 }, { "epoch": 0.13254141919349796, "grad_norm": 0.1962890625, "learning_rate": 0.00019948307621604457, "loss": 1.7048, "step": 424 }, { "epoch": 0.1328540168802751, "grad_norm": 0.1904296875, "learning_rate": 0.00019948057815044048, "loss": 1.9041, "step": 425 }, { "epoch": 0.1331666145670522, "grad_norm": 0.1796875, "learning_rate": 0.0001994780740790671, "loss": 1.7443, "step": 426 }, { "epoch": 0.13347921225382933, "grad_norm": 0.189453125, "learning_rate": 0.0001994755640020756, "loss": 1.6474, "step": 427 }, { "epoch": 0.13379180994060644, "grad_norm": 0.1962890625, "learning_rate": 0.00019947304791961758, "loss": 1.8303, "step": 428 }, { "epoch": 0.13410440762738357, "grad_norm": 0.1962890625, "learning_rate": 0.00019947052583184488, "loss": 1.64, "step": 429 }, { "epoch": 0.13441700531416068, "grad_norm": 0.189453125, "learning_rate": 0.00019946799773890974, "loss": 1.7586, "step": 430 }, { "epoch": 0.13472960300093778, "grad_norm": 0.1826171875, "learning_rate": 0.00019946546364096488, "loss": 1.8402, "step": 431 }, { "epoch": 0.13504220068771491, "grad_norm": 0.64453125, "learning_rate": 0.00019946292353816318, "loss": 2.2409, "step": 432 }, { "epoch": 0.13535479837449202, "grad_norm": 0.193359375, "learning_rate": 0.0001994603774306581, "loss": 1.8416, "step": 433 }, { "epoch": 0.13566739606126915, "grad_norm": 0.181640625, "learning_rate": 0.00019945782531860325, "loss": 1.7372, "step": 434 }, { "epoch": 0.13597999374804626, "grad_norm": 0.1923828125, "learning_rate": 0.00019945526720215273, "loss": 1.9704, "step": 435 }, { "epoch": 0.1362925914348234, "grad_norm": 0.185546875, "learning_rate": 0.00019945270308146103, "loss": 1.6651, "step": 436 }, { "epoch": 0.1366051891216005, "grad_norm": 0.19921875, "learning_rate": 0.00019945013295668288, "loss": 1.7958, "step": 437 }, { "epoch": 0.13691778680837763, "grad_norm": 0.1904296875, "learning_rate": 0.0001994475568279735, "loss": 2.0826, "step": 438 }, { "epoch": 0.13723038449515473, "grad_norm": 0.19140625, "learning_rate": 0.00019944497469548837, "loss": 1.8808, "step": 439 }, { "epoch": 0.13754298218193187, "grad_norm": 0.2041015625, "learning_rate": 0.00019944238655938339, "loss": 2.257, "step": 440 }, { "epoch": 0.13785557986870897, "grad_norm": 0.1796875, "learning_rate": 0.0001994397924198148, "loss": 2.0791, "step": 441 }, { "epoch": 0.13816817755548608, "grad_norm": 0.193359375, "learning_rate": 0.00019943719227693928, "loss": 1.8917, "step": 442 }, { "epoch": 0.1384807752422632, "grad_norm": 0.2001953125, "learning_rate": 0.0001994345861309137, "loss": 1.8261, "step": 443 }, { "epoch": 0.13879337292904032, "grad_norm": 0.189453125, "learning_rate": 0.00019943197398189546, "loss": 1.626, "step": 444 }, { "epoch": 0.13910597061581745, "grad_norm": 0.193359375, "learning_rate": 0.00019942935583004223, "loss": 1.7819, "step": 445 }, { "epoch": 0.13941856830259455, "grad_norm": 0.19921875, "learning_rate": 0.0001994267316755121, "loss": 1.8149, "step": 446 }, { "epoch": 0.1397311659893717, "grad_norm": 0.1796875, "learning_rate": 0.00019942410151846347, "loss": 1.9703, "step": 447 }, { "epoch": 0.1400437636761488, "grad_norm": 0.1884765625, "learning_rate": 0.00019942146535905514, "loss": 1.7519, "step": 448 }, { "epoch": 0.14035636136292592, "grad_norm": 0.201171875, "learning_rate": 0.00019941882319744625, "loss": 1.8088, "step": 449 }, { "epoch": 0.14066895904970303, "grad_norm": 0.1953125, "learning_rate": 0.0001994161750337963, "loss": 2.0352, "step": 450 }, { "epoch": 0.14098155673648016, "grad_norm": 0.19921875, "learning_rate": 0.0001994135208682652, "loss": 1.7832, "step": 451 }, { "epoch": 0.14129415442325727, "grad_norm": 0.1865234375, "learning_rate": 0.00019941086070101314, "loss": 1.7351, "step": 452 }, { "epoch": 0.14160675211003437, "grad_norm": 0.1845703125, "learning_rate": 0.00019940819453220074, "loss": 1.9127, "step": 453 }, { "epoch": 0.1419193497968115, "grad_norm": 0.478515625, "learning_rate": 0.00019940552236198897, "loss": 2.6953, "step": 454 }, { "epoch": 0.1422319474835886, "grad_norm": 0.1962890625, "learning_rate": 0.00019940284419053914, "loss": 2.0053, "step": 455 }, { "epoch": 0.14254454517036574, "grad_norm": 0.1865234375, "learning_rate": 0.00019940016001801294, "loss": 1.7283, "step": 456 }, { "epoch": 0.14285714285714285, "grad_norm": 0.1923828125, "learning_rate": 0.0001993974698445724, "loss": 1.7655, "step": 457 }, { "epoch": 0.14316974054391998, "grad_norm": 0.19921875, "learning_rate": 0.00019939477367037994, "loss": 1.8373, "step": 458 }, { "epoch": 0.1434823382306971, "grad_norm": 0.1962890625, "learning_rate": 0.00019939207149559835, "loss": 1.8626, "step": 459 }, { "epoch": 0.14379493591747422, "grad_norm": 0.1943359375, "learning_rate": 0.00019938936332039077, "loss": 1.6125, "step": 460 }, { "epoch": 0.14410753360425133, "grad_norm": 0.2021484375, "learning_rate": 0.00019938664914492062, "loss": 2.0307, "step": 461 }, { "epoch": 0.14442013129102846, "grad_norm": 0.193359375, "learning_rate": 0.00019938392896935183, "loss": 1.84, "step": 462 }, { "epoch": 0.14473272897780556, "grad_norm": 0.19921875, "learning_rate": 0.0001993812027938486, "loss": 1.9634, "step": 463 }, { "epoch": 0.14504532666458267, "grad_norm": 0.1953125, "learning_rate": 0.00019937847061857552, "loss": 2.0152, "step": 464 }, { "epoch": 0.1453579243513598, "grad_norm": 0.201171875, "learning_rate": 0.00019937573244369753, "loss": 1.8692, "step": 465 }, { "epoch": 0.1456705220381369, "grad_norm": 0.19140625, "learning_rate": 0.00019937298826937995, "loss": 1.7805, "step": 466 }, { "epoch": 0.14598311972491404, "grad_norm": 0.197265625, "learning_rate": 0.00019937023809578843, "loss": 1.9569, "step": 467 }, { "epoch": 0.14629571741169115, "grad_norm": 0.1865234375, "learning_rate": 0.000199367481923089, "loss": 1.9791, "step": 468 }, { "epoch": 0.14660831509846828, "grad_norm": 0.189453125, "learning_rate": 0.00019936471975144805, "loss": 1.7193, "step": 469 }, { "epoch": 0.14692091278524538, "grad_norm": 0.19140625, "learning_rate": 0.00019936195158103237, "loss": 1.7506, "step": 470 }, { "epoch": 0.14723351047202252, "grad_norm": 0.1865234375, "learning_rate": 0.00019935917741200902, "loss": 1.9867, "step": 471 }, { "epoch": 0.14754610815879962, "grad_norm": 0.1953125, "learning_rate": 0.00019935639724454556, "loss": 1.8894, "step": 472 }, { "epoch": 0.14785870584557675, "grad_norm": 0.197265625, "learning_rate": 0.00019935361107880977, "loss": 1.7917, "step": 473 }, { "epoch": 0.14817130353235386, "grad_norm": 0.1884765625, "learning_rate": 0.00019935081891496985, "loss": 1.9643, "step": 474 }, { "epoch": 0.14848390121913096, "grad_norm": 0.1962890625, "learning_rate": 0.0001993480207531944, "loss": 1.6624, "step": 475 }, { "epoch": 0.1487964989059081, "grad_norm": 0.1943359375, "learning_rate": 0.00019934521659365235, "loss": 1.5768, "step": 476 }, { "epoch": 0.1491090965926852, "grad_norm": 0.1884765625, "learning_rate": 0.00019934240643651298, "loss": 1.8556, "step": 477 }, { "epoch": 0.14942169427946234, "grad_norm": 0.189453125, "learning_rate": 0.00019933959028194592, "loss": 1.9329, "step": 478 }, { "epoch": 0.14973429196623944, "grad_norm": 0.203125, "learning_rate": 0.0001993367681301212, "loss": 1.7054, "step": 479 }, { "epoch": 0.15004688965301657, "grad_norm": 0.1904296875, "learning_rate": 0.0001993339399812092, "loss": 1.8809, "step": 480 }, { "epoch": 0.15035948733979368, "grad_norm": 0.1865234375, "learning_rate": 0.0001993311058353807, "loss": 1.5983, "step": 481 }, { "epoch": 0.1506720850265708, "grad_norm": 0.1865234375, "learning_rate": 0.00019932826569280673, "loss": 1.7169, "step": 482 }, { "epoch": 0.15098468271334792, "grad_norm": 0.1953125, "learning_rate": 0.00019932541955365883, "loss": 1.9345, "step": 483 }, { "epoch": 0.15129728040012505, "grad_norm": 0.1982421875, "learning_rate": 0.00019932256741810874, "loss": 2.1597, "step": 484 }, { "epoch": 0.15160987808690216, "grad_norm": 0.1826171875, "learning_rate": 0.0001993197092863287, "loss": 1.5661, "step": 485 }, { "epoch": 0.1519224757736793, "grad_norm": 0.19140625, "learning_rate": 0.0001993168451584912, "loss": 1.8121, "step": 486 }, { "epoch": 0.1522350734604564, "grad_norm": 0.18359375, "learning_rate": 0.00019931397503476924, "loss": 1.7365, "step": 487 }, { "epoch": 0.1525476711472335, "grad_norm": 0.1962890625, "learning_rate": 0.00019931109891533605, "loss": 1.6982, "step": 488 }, { "epoch": 0.15286026883401063, "grad_norm": 0.189453125, "learning_rate": 0.00019930821680036527, "loss": 1.9638, "step": 489 }, { "epoch": 0.15317286652078774, "grad_norm": 0.201171875, "learning_rate": 0.00019930532869003086, "loss": 2.1991, "step": 490 }, { "epoch": 0.15348546420756487, "grad_norm": 0.1923828125, "learning_rate": 0.00019930243458450724, "loss": 1.8095, "step": 491 }, { "epoch": 0.15379806189434198, "grad_norm": 0.177734375, "learning_rate": 0.0001992995344839691, "loss": 1.9021, "step": 492 }, { "epoch": 0.1541106595811191, "grad_norm": 0.19921875, "learning_rate": 0.0001992966283885915, "loss": 1.9448, "step": 493 }, { "epoch": 0.1544232572678962, "grad_norm": 0.19921875, "learning_rate": 0.00019929371629854992, "loss": 1.9806, "step": 494 }, { "epoch": 0.15473585495467335, "grad_norm": 0.1982421875, "learning_rate": 0.0001992907982140202, "loss": 1.7495, "step": 495 }, { "epoch": 0.15504845264145045, "grad_norm": 0.203125, "learning_rate": 0.00019928787413517842, "loss": 2.0022, "step": 496 }, { "epoch": 0.15536105032822758, "grad_norm": 0.193359375, "learning_rate": 0.00019928494406220115, "loss": 1.7185, "step": 497 }, { "epoch": 0.1556736480150047, "grad_norm": 0.1943359375, "learning_rate": 0.00019928200799526532, "loss": 2.0288, "step": 498 }, { "epoch": 0.1559862457017818, "grad_norm": 0.1923828125, "learning_rate": 0.00019927906593454812, "loss": 1.7969, "step": 499 }, { "epoch": 0.15629884338855893, "grad_norm": 0.1943359375, "learning_rate": 0.0001992761178802272, "loss": 2.1816, "step": 500 }, { "epoch": 0.15661144107533603, "grad_norm": 0.1953125, "learning_rate": 0.00019927316383248054, "loss": 1.8524, "step": 501 }, { "epoch": 0.15692403876211317, "grad_norm": 0.1923828125, "learning_rate": 0.00019927020379148646, "loss": 1.6543, "step": 502 }, { "epoch": 0.15723663644889027, "grad_norm": 0.203125, "learning_rate": 0.0001992672377574237, "loss": 1.7662, "step": 503 }, { "epoch": 0.1575492341356674, "grad_norm": 0.1953125, "learning_rate": 0.0001992642657304713, "loss": 1.8305, "step": 504 }, { "epoch": 0.1578618318224445, "grad_norm": 0.1845703125, "learning_rate": 0.00019926128771080868, "loss": 1.6887, "step": 505 }, { "epoch": 0.15817442950922164, "grad_norm": 0.1953125, "learning_rate": 0.00019925830369861564, "loss": 1.9668, "step": 506 }, { "epoch": 0.15848702719599875, "grad_norm": 0.2021484375, "learning_rate": 0.00019925531369407228, "loss": 1.8739, "step": 507 }, { "epoch": 0.15879962488277588, "grad_norm": 0.1962890625, "learning_rate": 0.00019925231769735917, "loss": 1.8289, "step": 508 }, { "epoch": 0.15911222256955299, "grad_norm": 0.185546875, "learning_rate": 0.0001992493157086572, "loss": 1.9057, "step": 509 }, { "epoch": 0.1594248202563301, "grad_norm": 0.1904296875, "learning_rate": 0.00019924630772814753, "loss": 1.8643, "step": 510 }, { "epoch": 0.15973741794310722, "grad_norm": 0.2080078125, "learning_rate": 0.00019924329375601177, "loss": 1.8911, "step": 511 }, { "epoch": 0.16005001562988433, "grad_norm": 0.1962890625, "learning_rate": 0.00019924027379243192, "loss": 1.6922, "step": 512 }, { "epoch": 0.16036261331666146, "grad_norm": 0.1923828125, "learning_rate": 0.0001992372478375903, "loss": 1.9621, "step": 513 }, { "epoch": 0.16067521100343857, "grad_norm": 0.1962890625, "learning_rate": 0.00019923421589166954, "loss": 1.8731, "step": 514 }, { "epoch": 0.1609878086902157, "grad_norm": 0.201171875, "learning_rate": 0.00019923117795485272, "loss": 1.6659, "step": 515 }, { "epoch": 0.1613004063769928, "grad_norm": 0.2080078125, "learning_rate": 0.00019922813402732325, "loss": 1.9896, "step": 516 }, { "epoch": 0.16161300406376994, "grad_norm": 0.1982421875, "learning_rate": 0.00019922508410926489, "loss": 1.8087, "step": 517 }, { "epoch": 0.16192560175054704, "grad_norm": 0.19921875, "learning_rate": 0.00019922202820086171, "loss": 2.0338, "step": 518 }, { "epoch": 0.16223819943732418, "grad_norm": 0.1884765625, "learning_rate": 0.00019921896630229827, "loss": 1.8984, "step": 519 }, { "epoch": 0.16255079712410128, "grad_norm": 0.205078125, "learning_rate": 0.0001992158984137594, "loss": 1.7892, "step": 520 }, { "epoch": 0.1628633948108784, "grad_norm": 0.19921875, "learning_rate": 0.00019921282453543032, "loss": 1.6763, "step": 521 }, { "epoch": 0.16317599249765552, "grad_norm": 0.185546875, "learning_rate": 0.0001992097446674966, "loss": 1.8474, "step": 522 }, { "epoch": 0.16348859018443263, "grad_norm": 0.193359375, "learning_rate": 0.00019920665881014416, "loss": 1.9876, "step": 523 }, { "epoch": 0.16380118787120976, "grad_norm": 0.1904296875, "learning_rate": 0.0001992035669635593, "loss": 1.7454, "step": 524 }, { "epoch": 0.16411378555798686, "grad_norm": 0.1904296875, "learning_rate": 0.0001992004691279287, "loss": 1.9164, "step": 525 }, { "epoch": 0.164426383244764, "grad_norm": 0.1923828125, "learning_rate": 0.00019919736530343935, "loss": 1.9096, "step": 526 }, { "epoch": 0.1647389809315411, "grad_norm": 0.1953125, "learning_rate": 0.00019919425549027865, "loss": 1.9148, "step": 527 }, { "epoch": 0.16505157861831823, "grad_norm": 0.1953125, "learning_rate": 0.00019919113968863437, "loss": 1.9967, "step": 528 }, { "epoch": 0.16536417630509534, "grad_norm": 0.2109375, "learning_rate": 0.00019918801789869453, "loss": 1.9329, "step": 529 }, { "epoch": 0.16567677399187247, "grad_norm": 0.19921875, "learning_rate": 0.00019918489012064772, "loss": 1.9399, "step": 530 }, { "epoch": 0.16598937167864958, "grad_norm": 0.1904296875, "learning_rate": 0.00019918175635468265, "loss": 1.9082, "step": 531 }, { "epoch": 0.16630196936542668, "grad_norm": 0.193359375, "learning_rate": 0.00019917861660098858, "loss": 1.9138, "step": 532 }, { "epoch": 0.16661456705220382, "grad_norm": 0.1904296875, "learning_rate": 0.00019917547085975505, "loss": 1.7534, "step": 533 }, { "epoch": 0.16692716473898092, "grad_norm": 0.181640625, "learning_rate": 0.00019917231913117197, "loss": 1.8574, "step": 534 }, { "epoch": 0.16723976242575805, "grad_norm": 0.19921875, "learning_rate": 0.0001991691614154296, "loss": 1.7967, "step": 535 }, { "epoch": 0.16755236011253516, "grad_norm": 0.1943359375, "learning_rate": 0.00019916599771271855, "loss": 1.765, "step": 536 }, { "epoch": 0.1678649577993123, "grad_norm": 0.1962890625, "learning_rate": 0.00019916282802322989, "loss": 1.9999, "step": 537 }, { "epoch": 0.1681775554860894, "grad_norm": 0.197265625, "learning_rate": 0.00019915965234715491, "loss": 1.9353, "step": 538 }, { "epoch": 0.16849015317286653, "grad_norm": 0.1962890625, "learning_rate": 0.00019915647068468538, "loss": 1.8003, "step": 539 }, { "epoch": 0.16880275085964364, "grad_norm": 0.19921875, "learning_rate": 0.00019915328303601334, "loss": 2.1542, "step": 540 }, { "epoch": 0.16911534854642077, "grad_norm": 0.2158203125, "learning_rate": 0.00019915008940133127, "loss": 1.9446, "step": 541 }, { "epoch": 0.16942794623319787, "grad_norm": 0.1982421875, "learning_rate": 0.00019914688978083192, "loss": 2.0184, "step": 542 }, { "epoch": 0.16974054391997498, "grad_norm": 0.1875, "learning_rate": 0.00019914368417470852, "loss": 1.8707, "step": 543 }, { "epoch": 0.1700531416067521, "grad_norm": 0.1982421875, "learning_rate": 0.00019914047258315457, "loss": 1.8503, "step": 544 }, { "epoch": 0.17036573929352922, "grad_norm": 0.1884765625, "learning_rate": 0.00019913725500636393, "loss": 1.9382, "step": 545 }, { "epoch": 0.17067833698030635, "grad_norm": 0.1962890625, "learning_rate": 0.00019913403144453088, "loss": 1.6436, "step": 546 }, { "epoch": 0.17099093466708346, "grad_norm": 0.197265625, "learning_rate": 0.00019913080189785002, "loss": 2.0155, "step": 547 }, { "epoch": 0.1713035323538606, "grad_norm": 0.1875, "learning_rate": 0.00019912756636651638, "loss": 1.9679, "step": 548 }, { "epoch": 0.1716161300406377, "grad_norm": 0.197265625, "learning_rate": 0.00019912432485072516, "loss": 1.619, "step": 549 }, { "epoch": 0.17192872772741483, "grad_norm": 0.1943359375, "learning_rate": 0.0001991210773506722, "loss": 1.8251, "step": 550 }, { "epoch": 0.17224132541419193, "grad_norm": 0.197265625, "learning_rate": 0.00019911782386655341, "loss": 1.9356, "step": 551 }, { "epoch": 0.17255392310096906, "grad_norm": 0.193359375, "learning_rate": 0.00019911456439856536, "loss": 1.7967, "step": 552 }, { "epoch": 0.17286652078774617, "grad_norm": 0.1953125, "learning_rate": 0.00019911129894690475, "loss": 1.7887, "step": 553 }, { "epoch": 0.17317911847452327, "grad_norm": 0.201171875, "learning_rate": 0.00019910802751176867, "loss": 1.8225, "step": 554 }, { "epoch": 0.1734917161613004, "grad_norm": 0.1943359375, "learning_rate": 0.00019910475009335472, "loss": 1.7761, "step": 555 }, { "epoch": 0.1738043138480775, "grad_norm": 0.1943359375, "learning_rate": 0.0001991014666918607, "loss": 1.917, "step": 556 }, { "epoch": 0.17411691153485465, "grad_norm": 0.1865234375, "learning_rate": 0.00019909817730748487, "loss": 1.707, "step": 557 }, { "epoch": 0.17442950922163175, "grad_norm": 0.1953125, "learning_rate": 0.00019909488194042575, "loss": 2.2473, "step": 558 }, { "epoch": 0.17474210690840888, "grad_norm": 0.1953125, "learning_rate": 0.00019909158059088235, "loss": 1.5952, "step": 559 }, { "epoch": 0.175054704595186, "grad_norm": 0.1923828125, "learning_rate": 0.000199088273259054, "loss": 1.6575, "step": 560 }, { "epoch": 0.17536730228196312, "grad_norm": 0.2001953125, "learning_rate": 0.00019908495994514026, "loss": 1.9749, "step": 561 }, { "epoch": 0.17567989996874023, "grad_norm": 0.1923828125, "learning_rate": 0.00019908164064934126, "loss": 1.681, "step": 562 }, { "epoch": 0.17599249765551736, "grad_norm": 0.1806640625, "learning_rate": 0.00019907831537185734, "loss": 1.7532, "step": 563 }, { "epoch": 0.17630509534229447, "grad_norm": 0.19140625, "learning_rate": 0.00019907498411288925, "loss": 2.0639, "step": 564 }, { "epoch": 0.1766176930290716, "grad_norm": 0.2119140625, "learning_rate": 0.00019907164687263813, "loss": 2.1285, "step": 565 }, { "epoch": 0.1769302907158487, "grad_norm": 0.189453125, "learning_rate": 0.00019906830365130546, "loss": 1.7988, "step": 566 }, { "epoch": 0.1772428884026258, "grad_norm": 0.1923828125, "learning_rate": 0.00019906495444909302, "loss": 1.6593, "step": 567 }, { "epoch": 0.17755548608940294, "grad_norm": 0.1904296875, "learning_rate": 0.00019906159926620306, "loss": 1.8094, "step": 568 }, { "epoch": 0.17786808377618005, "grad_norm": 0.1826171875, "learning_rate": 0.00019905823810283812, "loss": 1.6249, "step": 569 }, { "epoch": 0.17818068146295718, "grad_norm": 0.185546875, "learning_rate": 0.0001990548709592011, "loss": 1.6268, "step": 570 }, { "epoch": 0.17849327914973429, "grad_norm": 0.1962890625, "learning_rate": 0.00019905149783549532, "loss": 1.5067, "step": 571 }, { "epoch": 0.17880587683651142, "grad_norm": 0.19140625, "learning_rate": 0.00019904811873192437, "loss": 1.7792, "step": 572 }, { "epoch": 0.17911847452328852, "grad_norm": 0.1845703125, "learning_rate": 0.0001990447336486923, "loss": 1.7893, "step": 573 }, { "epoch": 0.17943107221006566, "grad_norm": 0.2001953125, "learning_rate": 0.0001990413425860034, "loss": 1.7304, "step": 574 }, { "epoch": 0.17974366989684276, "grad_norm": 0.193359375, "learning_rate": 0.00019903794554406248, "loss": 1.9092, "step": 575 }, { "epoch": 0.1800562675836199, "grad_norm": 0.193359375, "learning_rate": 0.00019903454252307454, "loss": 1.6916, "step": 576 }, { "epoch": 0.180368865270397, "grad_norm": 0.1943359375, "learning_rate": 0.0001990311335232451, "loss": 1.7437, "step": 577 }, { "epoch": 0.1806814629571741, "grad_norm": 0.203125, "learning_rate": 0.00019902771854477994, "loss": 1.7296, "step": 578 }, { "epoch": 0.18099406064395124, "grad_norm": 0.2001953125, "learning_rate": 0.0001990242975878852, "loss": 1.6257, "step": 579 }, { "epoch": 0.18130665833072834, "grad_norm": 0.2041015625, "learning_rate": 0.0001990208706527674, "loss": 1.6635, "step": 580 }, { "epoch": 0.18161925601750548, "grad_norm": 0.208984375, "learning_rate": 0.00019901743773963353, "loss": 1.8428, "step": 581 }, { "epoch": 0.18193185370428258, "grad_norm": 0.1953125, "learning_rate": 0.00019901399884869072, "loss": 1.7945, "step": 582 }, { "epoch": 0.18224445139105971, "grad_norm": 0.1923828125, "learning_rate": 0.00019901055398014662, "loss": 1.7858, "step": 583 }, { "epoch": 0.18255704907783682, "grad_norm": 0.19921875, "learning_rate": 0.0001990071031342092, "loss": 1.62, "step": 584 }, { "epoch": 0.18286964676461395, "grad_norm": 0.201171875, "learning_rate": 0.00019900364631108682, "loss": 1.8136, "step": 585 }, { "epoch": 0.18318224445139106, "grad_norm": 0.1962890625, "learning_rate": 0.00019900018351098813, "loss": 1.9074, "step": 586 }, { "epoch": 0.1834948421381682, "grad_norm": 0.2099609375, "learning_rate": 0.0001989967147341222, "loss": 1.8761, "step": 587 }, { "epoch": 0.1838074398249453, "grad_norm": 0.19921875, "learning_rate": 0.00019899323998069846, "loss": 1.8516, "step": 588 }, { "epoch": 0.1841200375117224, "grad_norm": 0.1865234375, "learning_rate": 0.0001989897592509267, "loss": 1.7505, "step": 589 }, { "epoch": 0.18443263519849953, "grad_norm": 0.189453125, "learning_rate": 0.00019898627254501697, "loss": 1.9066, "step": 590 }, { "epoch": 0.18474523288527664, "grad_norm": 0.1982421875, "learning_rate": 0.0001989827798631799, "loss": 1.926, "step": 591 }, { "epoch": 0.18505783057205377, "grad_norm": 0.2138671875, "learning_rate": 0.00019897928120562623, "loss": 1.9225, "step": 592 }, { "epoch": 0.18537042825883088, "grad_norm": 0.2041015625, "learning_rate": 0.00019897577657256724, "loss": 2.0965, "step": 593 }, { "epoch": 0.185683025945608, "grad_norm": 0.20703125, "learning_rate": 0.00019897226596421447, "loss": 1.7195, "step": 594 }, { "epoch": 0.18599562363238512, "grad_norm": 0.197265625, "learning_rate": 0.00019896874938077992, "loss": 1.8197, "step": 595 }, { "epoch": 0.18630822131916225, "grad_norm": 0.1884765625, "learning_rate": 0.0001989652268224758, "loss": 2.2171, "step": 596 }, { "epoch": 0.18662081900593935, "grad_norm": 0.1875, "learning_rate": 0.00019896169828951488, "loss": 1.8195, "step": 597 }, { "epoch": 0.1869334166927165, "grad_norm": 0.1826171875, "learning_rate": 0.00019895816378211008, "loss": 1.6969, "step": 598 }, { "epoch": 0.1872460143794936, "grad_norm": 0.19921875, "learning_rate": 0.00019895462330047484, "loss": 1.8099, "step": 599 }, { "epoch": 0.1875586120662707, "grad_norm": 0.189453125, "learning_rate": 0.00019895107684482293, "loss": 1.7597, "step": 600 }, { "epoch": 0.18787120975304783, "grad_norm": 0.1962890625, "learning_rate": 0.00019894752441536838, "loss": 1.7928, "step": 601 }, { "epoch": 0.18818380743982493, "grad_norm": 0.2021484375, "learning_rate": 0.00019894396601232567, "loss": 1.7385, "step": 602 }, { "epoch": 0.18849640512660207, "grad_norm": 0.1982421875, "learning_rate": 0.0001989404016359097, "loss": 1.7216, "step": 603 }, { "epoch": 0.18880900281337917, "grad_norm": 0.19140625, "learning_rate": 0.00019893683128633557, "loss": 1.749, "step": 604 }, { "epoch": 0.1891216005001563, "grad_norm": 0.189453125, "learning_rate": 0.00019893325496381884, "loss": 1.8708, "step": 605 }, { "epoch": 0.1894341981869334, "grad_norm": 0.197265625, "learning_rate": 0.00019892967266857547, "loss": 1.9852, "step": 606 }, { "epoch": 0.18974679587371054, "grad_norm": 0.203125, "learning_rate": 0.0001989260844008217, "loss": 1.7595, "step": 607 }, { "epoch": 0.19005939356048765, "grad_norm": 0.197265625, "learning_rate": 0.00019892249016077412, "loss": 1.7231, "step": 608 }, { "epoch": 0.19037199124726478, "grad_norm": 0.212890625, "learning_rate": 0.0001989188899486498, "loss": 1.7735, "step": 609 }, { "epoch": 0.1906845889340419, "grad_norm": 0.1953125, "learning_rate": 0.00019891528376466598, "loss": 1.8502, "step": 610 }, { "epoch": 0.190997186620819, "grad_norm": 0.19921875, "learning_rate": 0.00019891167160904046, "loss": 1.8522, "step": 611 }, { "epoch": 0.19130978430759613, "grad_norm": 0.19921875, "learning_rate": 0.0001989080534819913, "loss": 2.0308, "step": 612 }, { "epoch": 0.19162238199437323, "grad_norm": 0.2001953125, "learning_rate": 0.00019890442938373686, "loss": 1.7471, "step": 613 }, { "epoch": 0.19193497968115036, "grad_norm": 0.1962890625, "learning_rate": 0.000198900799314496, "loss": 1.5426, "step": 614 }, { "epoch": 0.19224757736792747, "grad_norm": 0.197265625, "learning_rate": 0.0001988971632744879, "loss": 2.0733, "step": 615 }, { "epoch": 0.1925601750547046, "grad_norm": 0.193359375, "learning_rate": 0.00019889352126393198, "loss": 1.8229, "step": 616 }, { "epoch": 0.1928727727414817, "grad_norm": 0.19921875, "learning_rate": 0.00019888987328304817, "loss": 1.9119, "step": 617 }, { "epoch": 0.19318537042825884, "grad_norm": 0.2021484375, "learning_rate": 0.0001988862193320567, "loss": 1.6569, "step": 618 }, { "epoch": 0.19349796811503595, "grad_norm": 0.189453125, "learning_rate": 0.00019888255941117816, "loss": 2.0652, "step": 619 }, { "epoch": 0.19381056580181308, "grad_norm": 0.2021484375, "learning_rate": 0.0001988788935206335, "loss": 1.6115, "step": 620 }, { "epoch": 0.19412316348859018, "grad_norm": 0.197265625, "learning_rate": 0.00019887522166064402, "loss": 1.6017, "step": 621 }, { "epoch": 0.1944357611753673, "grad_norm": 0.1875, "learning_rate": 0.00019887154383143143, "loss": 1.9108, "step": 622 }, { "epoch": 0.19474835886214442, "grad_norm": 0.1943359375, "learning_rate": 0.00019886786003321772, "loss": 1.6372, "step": 623 }, { "epoch": 0.19506095654892153, "grad_norm": 0.296875, "learning_rate": 0.0001988641702662253, "loss": 2.4569, "step": 624 }, { "epoch": 0.19537355423569866, "grad_norm": 0.1875, "learning_rate": 0.000198860474530677, "loss": 1.6954, "step": 625 }, { "epoch": 0.19568615192247577, "grad_norm": 0.2060546875, "learning_rate": 0.00019885677282679585, "loss": 1.8825, "step": 626 }, { "epoch": 0.1959987496092529, "grad_norm": 0.1943359375, "learning_rate": 0.00019885306515480533, "loss": 1.7887, "step": 627 }, { "epoch": 0.19631134729603, "grad_norm": 0.2109375, "learning_rate": 0.00019884935151492933, "loss": 1.8936, "step": 628 }, { "epoch": 0.19662394498280714, "grad_norm": 0.19921875, "learning_rate": 0.00019884563190739196, "loss": 1.7583, "step": 629 }, { "epoch": 0.19693654266958424, "grad_norm": 0.201171875, "learning_rate": 0.0001988419063324179, "loss": 1.898, "step": 630 }, { "epoch": 0.19724914035636137, "grad_norm": 0.193359375, "learning_rate": 0.0001988381747902319, "loss": 2.0045, "step": 631 }, { "epoch": 0.19756173804313848, "grad_norm": 0.1923828125, "learning_rate": 0.00019883443728105943, "loss": 1.9453, "step": 632 }, { "epoch": 0.1978743357299156, "grad_norm": 0.2001953125, "learning_rate": 0.000198830693805126, "loss": 1.8481, "step": 633 }, { "epoch": 0.19818693341669272, "grad_norm": 0.203125, "learning_rate": 0.00019882694436265764, "loss": 1.8409, "step": 634 }, { "epoch": 0.19849953110346982, "grad_norm": 0.208984375, "learning_rate": 0.00019882318895388072, "loss": 1.8232, "step": 635 }, { "epoch": 0.19881212879024696, "grad_norm": 0.193359375, "learning_rate": 0.00019881942757902197, "loss": 1.7768, "step": 636 }, { "epoch": 0.19912472647702406, "grad_norm": 0.2099609375, "learning_rate": 0.0001988156602383084, "loss": 1.7056, "step": 637 }, { "epoch": 0.1994373241638012, "grad_norm": 0.2041015625, "learning_rate": 0.00019881188693196756, "loss": 1.5243, "step": 638 }, { "epoch": 0.1997499218505783, "grad_norm": 0.2001953125, "learning_rate": 0.00019880810766022714, "loss": 2.0564, "step": 639 }, { "epoch": 0.20006251953735543, "grad_norm": 0.203125, "learning_rate": 0.00019880432242331536, "loss": 1.8789, "step": 640 }, { "epoch": 0.20037511722413254, "grad_norm": 0.1923828125, "learning_rate": 0.00019880053122146073, "loss": 1.8037, "step": 641 }, { "epoch": 0.20068771491090967, "grad_norm": 0.205078125, "learning_rate": 0.00019879673405489215, "loss": 1.7692, "step": 642 }, { "epoch": 0.20100031259768678, "grad_norm": 0.1923828125, "learning_rate": 0.00019879293092383882, "loss": 1.7066, "step": 643 }, { "epoch": 0.2013129102844639, "grad_norm": 0.197265625, "learning_rate": 0.00019878912182853036, "loss": 1.8715, "step": 644 }, { "epoch": 0.201625507971241, "grad_norm": 0.19140625, "learning_rate": 0.0001987853067691967, "loss": 1.6647, "step": 645 }, { "epoch": 0.20193810565801812, "grad_norm": 0.2001953125, "learning_rate": 0.00019878148574606824, "loss": 1.6027, "step": 646 }, { "epoch": 0.20225070334479525, "grad_norm": 0.1904296875, "learning_rate": 0.00019877765875937558, "loss": 1.6788, "step": 647 }, { "epoch": 0.20256330103157236, "grad_norm": 0.205078125, "learning_rate": 0.00019877382580934977, "loss": 1.7934, "step": 648 }, { "epoch": 0.2028758987183495, "grad_norm": 0.1904296875, "learning_rate": 0.00019876998689622225, "loss": 1.6556, "step": 649 }, { "epoch": 0.2031884964051266, "grad_norm": 0.1953125, "learning_rate": 0.00019876614202022475, "loss": 1.7103, "step": 650 }, { "epoch": 0.20350109409190373, "grad_norm": 0.193359375, "learning_rate": 0.0001987622911815894, "loss": 1.7654, "step": 651 }, { "epoch": 0.20381369177868083, "grad_norm": 0.1875, "learning_rate": 0.00019875843438054864, "loss": 1.7043, "step": 652 }, { "epoch": 0.20412628946545797, "grad_norm": 0.1943359375, "learning_rate": 0.0001987545716173354, "loss": 1.966, "step": 653 }, { "epoch": 0.20443888715223507, "grad_norm": 0.19921875, "learning_rate": 0.0001987507028921828, "loss": 1.7629, "step": 654 }, { "epoch": 0.2047514848390122, "grad_norm": 0.2060546875, "learning_rate": 0.00019874682820532444, "loss": 1.766, "step": 655 }, { "epoch": 0.2050640825257893, "grad_norm": 0.201171875, "learning_rate": 0.00019874294755699423, "loss": 1.6821, "step": 656 }, { "epoch": 0.20537668021256641, "grad_norm": 0.21875, "learning_rate": 0.00019873906094742644, "loss": 1.806, "step": 657 }, { "epoch": 0.20568927789934355, "grad_norm": 0.1962890625, "learning_rate": 0.0001987351683768557, "loss": 1.8864, "step": 658 }, { "epoch": 0.20600187558612065, "grad_norm": 0.19921875, "learning_rate": 0.00019873126984551703, "loss": 1.7406, "step": 659 }, { "epoch": 0.20631447327289779, "grad_norm": 0.201171875, "learning_rate": 0.0001987273653536458, "loss": 1.7246, "step": 660 }, { "epoch": 0.2066270709596749, "grad_norm": 0.1875, "learning_rate": 0.00019872345490147772, "loss": 1.9874, "step": 661 }, { "epoch": 0.20693966864645202, "grad_norm": 0.2001953125, "learning_rate": 0.00019871953848924886, "loss": 1.7792, "step": 662 }, { "epoch": 0.20725226633322913, "grad_norm": 0.1982421875, "learning_rate": 0.00019871561611719564, "loss": 1.8759, "step": 663 }, { "epoch": 0.20756486402000626, "grad_norm": 0.2001953125, "learning_rate": 0.00019871168778555492, "loss": 1.9906, "step": 664 }, { "epoch": 0.20787746170678337, "grad_norm": 0.2021484375, "learning_rate": 0.0001987077534945638, "loss": 1.8973, "step": 665 }, { "epoch": 0.2081900593935605, "grad_norm": 0.1982421875, "learning_rate": 0.00019870381324445978, "loss": 1.6312, "step": 666 }, { "epoch": 0.2085026570803376, "grad_norm": 0.208984375, "learning_rate": 0.0001986998670354808, "loss": 1.8406, "step": 667 }, { "epoch": 0.2088152547671147, "grad_norm": 0.2099609375, "learning_rate": 0.0001986959148678651, "loss": 1.7828, "step": 668 }, { "epoch": 0.20912785245389184, "grad_norm": 0.201171875, "learning_rate": 0.00019869195674185122, "loss": 1.9185, "step": 669 }, { "epoch": 0.20944045014066895, "grad_norm": 0.201171875, "learning_rate": 0.00019868799265767816, "loss": 1.7588, "step": 670 }, { "epoch": 0.20975304782744608, "grad_norm": 0.203125, "learning_rate": 0.00019868402261558524, "loss": 1.7387, "step": 671 }, { "epoch": 0.2100656455142232, "grad_norm": 0.1904296875, "learning_rate": 0.00019868004661581208, "loss": 1.6164, "step": 672 }, { "epoch": 0.21037824320100032, "grad_norm": 0.19140625, "learning_rate": 0.0001986760646585988, "loss": 1.8667, "step": 673 }, { "epoch": 0.21069084088777743, "grad_norm": 0.189453125, "learning_rate": 0.00019867207674418568, "loss": 1.9312, "step": 674 }, { "epoch": 0.21100343857455456, "grad_norm": 0.19921875, "learning_rate": 0.0001986680828728136, "loss": 1.7665, "step": 675 }, { "epoch": 0.21131603626133166, "grad_norm": 0.2099609375, "learning_rate": 0.00019866408304472364, "loss": 1.6056, "step": 676 }, { "epoch": 0.2116286339481088, "grad_norm": 0.2001953125, "learning_rate": 0.00019866007726015723, "loss": 1.5752, "step": 677 }, { "epoch": 0.2119412316348859, "grad_norm": 0.208984375, "learning_rate": 0.00019865606551935626, "loss": 1.8815, "step": 678 }, { "epoch": 0.212253829321663, "grad_norm": 0.203125, "learning_rate": 0.00019865204782256287, "loss": 1.7828, "step": 679 }, { "epoch": 0.21256642700844014, "grad_norm": 0.2080078125, "learning_rate": 0.0001986480241700196, "loss": 1.9457, "step": 680 }, { "epoch": 0.21287902469521724, "grad_norm": 0.1953125, "learning_rate": 0.00019864399456196946, "loss": 1.9523, "step": 681 }, { "epoch": 0.21319162238199438, "grad_norm": 0.19140625, "learning_rate": 0.00019863995899865565, "loss": 1.5974, "step": 682 }, { "epoch": 0.21350422006877148, "grad_norm": 0.2021484375, "learning_rate": 0.00019863591748032184, "loss": 1.8886, "step": 683 }, { "epoch": 0.21381681775554862, "grad_norm": 0.201171875, "learning_rate": 0.00019863187000721197, "loss": 1.8564, "step": 684 }, { "epoch": 0.21412941544232572, "grad_norm": 0.203125, "learning_rate": 0.00019862781657957045, "loss": 1.8022, "step": 685 }, { "epoch": 0.21444201312910285, "grad_norm": 0.2041015625, "learning_rate": 0.00019862375719764192, "loss": 1.855, "step": 686 }, { "epoch": 0.21475461081587996, "grad_norm": 0.208984375, "learning_rate": 0.0001986196918616715, "loss": 2.0019, "step": 687 }, { "epoch": 0.2150672085026571, "grad_norm": 0.1953125, "learning_rate": 0.00019861562057190462, "loss": 1.8597, "step": 688 }, { "epoch": 0.2153798061894342, "grad_norm": 0.1962890625, "learning_rate": 0.00019861154332858708, "loss": 1.9685, "step": 689 }, { "epoch": 0.2156924038762113, "grad_norm": 0.197265625, "learning_rate": 0.00019860746013196495, "loss": 1.8702, "step": 690 }, { "epoch": 0.21600500156298844, "grad_norm": 0.2138671875, "learning_rate": 0.00019860337098228485, "loss": 1.6556, "step": 691 }, { "epoch": 0.21631759924976554, "grad_norm": 0.2060546875, "learning_rate": 0.00019859927587979358, "loss": 2.0366, "step": 692 }, { "epoch": 0.21663019693654267, "grad_norm": 0.1982421875, "learning_rate": 0.00019859517482473838, "loss": 1.9303, "step": 693 }, { "epoch": 0.21694279462331978, "grad_norm": 0.1982421875, "learning_rate": 0.00019859106781736682, "loss": 1.6981, "step": 694 }, { "epoch": 0.2172553923100969, "grad_norm": 0.205078125, "learning_rate": 0.00019858695485792686, "loss": 1.4825, "step": 695 }, { "epoch": 0.21756798999687402, "grad_norm": 0.19921875, "learning_rate": 0.0001985828359466668, "loss": 1.779, "step": 696 }, { "epoch": 0.21788058768365115, "grad_norm": 0.2001953125, "learning_rate": 0.00019857871108383532, "loss": 1.7535, "step": 697 }, { "epoch": 0.21819318537042826, "grad_norm": 0.2060546875, "learning_rate": 0.00019857458026968143, "loss": 1.7039, "step": 698 }, { "epoch": 0.2185057830572054, "grad_norm": 0.2001953125, "learning_rate": 0.0001985704435044545, "loss": 1.7501, "step": 699 }, { "epoch": 0.2188183807439825, "grad_norm": 0.2041015625, "learning_rate": 0.0001985663007884043, "loss": 1.8791, "step": 700 }, { "epoch": 0.2191309784307596, "grad_norm": 0.20703125, "learning_rate": 0.00019856215212178094, "loss": 1.904, "step": 701 }, { "epoch": 0.21944357611753673, "grad_norm": 0.2109375, "learning_rate": 0.00019855799750483484, "loss": 1.4772, "step": 702 }, { "epoch": 0.21975617380431384, "grad_norm": 0.3984375, "learning_rate": 0.00019855383693781682, "loss": 2.4316, "step": 703 }, { "epoch": 0.22006877149109097, "grad_norm": 0.2109375, "learning_rate": 0.0001985496704209781, "loss": 1.6331, "step": 704 }, { "epoch": 0.22038136917786808, "grad_norm": 0.22265625, "learning_rate": 0.0001985454979545702, "loss": 1.7665, "step": 705 }, { "epoch": 0.2206939668646452, "grad_norm": 0.1923828125, "learning_rate": 0.00019854131953884495, "loss": 1.9052, "step": 706 }, { "epoch": 0.2210065645514223, "grad_norm": 0.1923828125, "learning_rate": 0.00019853713517405472, "loss": 1.8316, "step": 707 }, { "epoch": 0.22131916223819945, "grad_norm": 0.1962890625, "learning_rate": 0.00019853294486045208, "loss": 1.6123, "step": 708 }, { "epoch": 0.22163175992497655, "grad_norm": 0.2060546875, "learning_rate": 0.00019852874859828997, "loss": 1.8111, "step": 709 }, { "epoch": 0.22194435761175368, "grad_norm": 0.2060546875, "learning_rate": 0.00019852454638782176, "loss": 1.8234, "step": 710 }, { "epoch": 0.2222569552985308, "grad_norm": 0.2060546875, "learning_rate": 0.00019852033822930114, "loss": 1.6664, "step": 711 }, { "epoch": 0.22256955298530792, "grad_norm": 0.201171875, "learning_rate": 0.00019851612412298214, "loss": 1.9896, "step": 712 }, { "epoch": 0.22288215067208503, "grad_norm": 0.2001953125, "learning_rate": 0.0001985119040691192, "loss": 1.6152, "step": 713 }, { "epoch": 0.22319474835886213, "grad_norm": 0.19140625, "learning_rate": 0.00019850767806796707, "loss": 2.165, "step": 714 }, { "epoch": 0.22350734604563927, "grad_norm": 0.1982421875, "learning_rate": 0.00019850344611978087, "loss": 2.1852, "step": 715 }, { "epoch": 0.22381994373241637, "grad_norm": 0.2001953125, "learning_rate": 0.00019849920822481614, "loss": 1.7914, "step": 716 }, { "epoch": 0.2241325414191935, "grad_norm": 0.2158203125, "learning_rate": 0.00019849496438332866, "loss": 2.0296, "step": 717 }, { "epoch": 0.2244451391059706, "grad_norm": 0.201171875, "learning_rate": 0.0001984907145955747, "loss": 1.7981, "step": 718 }, { "epoch": 0.22475773679274774, "grad_norm": 0.2080078125, "learning_rate": 0.00019848645886181074, "loss": 1.7928, "step": 719 }, { "epoch": 0.22507033447952485, "grad_norm": 0.2080078125, "learning_rate": 0.00019848219718229378, "loss": 1.8671, "step": 720 }, { "epoch": 0.22538293216630198, "grad_norm": 0.203125, "learning_rate": 0.00019847792955728107, "loss": 1.8564, "step": 721 }, { "epoch": 0.22569552985307909, "grad_norm": 0.20703125, "learning_rate": 0.0001984736559870303, "loss": 1.6293, "step": 722 }, { "epoch": 0.22600812753985622, "grad_norm": 0.201171875, "learning_rate": 0.0001984693764717994, "loss": 1.9545, "step": 723 }, { "epoch": 0.22632072522663332, "grad_norm": 0.2021484375, "learning_rate": 0.00019846509101184679, "loss": 1.8173, "step": 724 }, { "epoch": 0.22663332291341043, "grad_norm": 0.197265625, "learning_rate": 0.00019846079960743112, "loss": 1.649, "step": 725 }, { "epoch": 0.22694592060018756, "grad_norm": 0.1923828125, "learning_rate": 0.00019845650225881154, "loss": 1.8916, "step": 726 }, { "epoch": 0.22725851828696467, "grad_norm": 0.2099609375, "learning_rate": 0.00019845219896624743, "loss": 1.7553, "step": 727 }, { "epoch": 0.2275711159737418, "grad_norm": 0.19140625, "learning_rate": 0.0001984478897299986, "loss": 1.6849, "step": 728 }, { "epoch": 0.2278837136605189, "grad_norm": 0.19921875, "learning_rate": 0.00019844357455032526, "loss": 1.8667, "step": 729 }, { "epoch": 0.22819631134729604, "grad_norm": 0.203125, "learning_rate": 0.00019843925342748783, "loss": 1.651, "step": 730 }, { "epoch": 0.22850890903407314, "grad_norm": 0.19921875, "learning_rate": 0.00019843492636174728, "loss": 1.6074, "step": 731 }, { "epoch": 0.22882150672085028, "grad_norm": 0.189453125, "learning_rate": 0.00019843059335336474, "loss": 1.8431, "step": 732 }, { "epoch": 0.22913410440762738, "grad_norm": 0.1904296875, "learning_rate": 0.00019842625440260188, "loss": 1.7872, "step": 733 }, { "epoch": 0.22944670209440451, "grad_norm": 0.197265625, "learning_rate": 0.0001984219095097206, "loss": 1.6808, "step": 734 }, { "epoch": 0.22975929978118162, "grad_norm": 0.1962890625, "learning_rate": 0.00019841755867498322, "loss": 1.7461, "step": 735 }, { "epoch": 0.23007189746795872, "grad_norm": 0.2109375, "learning_rate": 0.00019841320189865243, "loss": 1.7838, "step": 736 }, { "epoch": 0.23038449515473586, "grad_norm": 0.20703125, "learning_rate": 0.0001984088391809912, "loss": 1.9104, "step": 737 }, { "epoch": 0.23069709284151296, "grad_norm": 0.197265625, "learning_rate": 0.00019840447052226298, "loss": 1.9961, "step": 738 }, { "epoch": 0.2310096905282901, "grad_norm": 0.2060546875, "learning_rate": 0.00019840009592273143, "loss": 1.987, "step": 739 }, { "epoch": 0.2313222882150672, "grad_norm": 0.1982421875, "learning_rate": 0.00019839571538266072, "loss": 1.6382, "step": 740 }, { "epoch": 0.23163488590184433, "grad_norm": 0.19921875, "learning_rate": 0.0001983913289023153, "loss": 1.6738, "step": 741 }, { "epoch": 0.23194748358862144, "grad_norm": 0.2119140625, "learning_rate": 0.00019838693648195995, "loss": 1.8182, "step": 742 }, { "epoch": 0.23226008127539857, "grad_norm": 0.2041015625, "learning_rate": 0.00019838253812185988, "loss": 1.598, "step": 743 }, { "epoch": 0.23257267896217568, "grad_norm": 0.19140625, "learning_rate": 0.00019837813382228063, "loss": 1.7465, "step": 744 }, { "epoch": 0.2328852766489528, "grad_norm": 0.2119140625, "learning_rate": 0.00019837372358348806, "loss": 1.8831, "step": 745 }, { "epoch": 0.23319787433572992, "grad_norm": 0.1962890625, "learning_rate": 0.00019836930740574845, "loss": 1.525, "step": 746 }, { "epoch": 0.23351047202250702, "grad_norm": 0.84375, "learning_rate": 0.00019836488528932836, "loss": 3.4084, "step": 747 }, { "epoch": 0.23382306970928415, "grad_norm": 0.1982421875, "learning_rate": 0.00019836045723449483, "loss": 1.7993, "step": 748 }, { "epoch": 0.23413566739606126, "grad_norm": 0.2314453125, "learning_rate": 0.00019835602324151514, "loss": 1.8971, "step": 749 }, { "epoch": 0.2344482650828384, "grad_norm": 0.1962890625, "learning_rate": 0.00019835158331065703, "loss": 1.7286, "step": 750 }, { "epoch": 0.2347608627696155, "grad_norm": 0.1982421875, "learning_rate": 0.00019834713744218844, "loss": 1.6018, "step": 751 }, { "epoch": 0.23507346045639263, "grad_norm": 0.2109375, "learning_rate": 0.00019834268563637787, "loss": 1.8705, "step": 752 }, { "epoch": 0.23538605814316974, "grad_norm": 0.2001953125, "learning_rate": 0.00019833822789349409, "loss": 1.8121, "step": 753 }, { "epoch": 0.23569865582994687, "grad_norm": 0.1953125, "learning_rate": 0.00019833376421380612, "loss": 1.6886, "step": 754 }, { "epoch": 0.23601125351672397, "grad_norm": 0.205078125, "learning_rate": 0.00019832929459758352, "loss": 1.6922, "step": 755 }, { "epoch": 0.2363238512035011, "grad_norm": 0.208984375, "learning_rate": 0.0001983248190450961, "loss": 1.7953, "step": 756 }, { "epoch": 0.2366364488902782, "grad_norm": 0.2021484375, "learning_rate": 0.00019832033755661405, "loss": 1.7892, "step": 757 }, { "epoch": 0.23694904657705532, "grad_norm": 0.208984375, "learning_rate": 0.00019831585013240793, "loss": 1.5738, "step": 758 }, { "epoch": 0.23726164426383245, "grad_norm": 0.2041015625, "learning_rate": 0.0001983113567727487, "loss": 1.6719, "step": 759 }, { "epoch": 0.23757424195060955, "grad_norm": 0.203125, "learning_rate": 0.00019830685747790748, "loss": 1.9564, "step": 760 }, { "epoch": 0.2378868396373867, "grad_norm": 0.2138671875, "learning_rate": 0.0001983023522481561, "loss": 2.1432, "step": 761 }, { "epoch": 0.2381994373241638, "grad_norm": 0.2099609375, "learning_rate": 0.0001982978410837664, "loss": 1.8179, "step": 762 }, { "epoch": 0.23851203501094093, "grad_norm": 0.2119140625, "learning_rate": 0.0001982933239850108, "loss": 1.9499, "step": 763 }, { "epoch": 0.23882463269771803, "grad_norm": 0.2060546875, "learning_rate": 0.00019828880095216193, "loss": 1.5989, "step": 764 }, { "epoch": 0.23913723038449516, "grad_norm": 0.2109375, "learning_rate": 0.00019828427198549293, "loss": 1.8161, "step": 765 }, { "epoch": 0.23944982807127227, "grad_norm": 0.208984375, "learning_rate": 0.0001982797370852772, "loss": 1.9312, "step": 766 }, { "epoch": 0.2397624257580494, "grad_norm": 0.2041015625, "learning_rate": 0.00019827519625178846, "loss": 1.7275, "step": 767 }, { "epoch": 0.2400750234448265, "grad_norm": 0.203125, "learning_rate": 0.0001982706494853009, "loss": 2.0002, "step": 768 }, { "epoch": 0.2403876211316036, "grad_norm": 0.2080078125, "learning_rate": 0.00019826609678608902, "loss": 1.8021, "step": 769 }, { "epoch": 0.24070021881838075, "grad_norm": 0.205078125, "learning_rate": 0.00019826153815442763, "loss": 1.5546, "step": 770 }, { "epoch": 0.24101281650515785, "grad_norm": 0.212890625, "learning_rate": 0.000198256973590592, "loss": 1.5848, "step": 771 }, { "epoch": 0.24132541419193498, "grad_norm": 0.2021484375, "learning_rate": 0.00019825240309485765, "loss": 1.7763, "step": 772 }, { "epoch": 0.2416380118787121, "grad_norm": 0.2119140625, "learning_rate": 0.0001982478266675005, "loss": 1.6906, "step": 773 }, { "epoch": 0.24195060956548922, "grad_norm": 0.21484375, "learning_rate": 0.00019824324430879687, "loss": 1.9644, "step": 774 }, { "epoch": 0.24226320725226633, "grad_norm": 0.2041015625, "learning_rate": 0.00019823865601902341, "loss": 1.9122, "step": 775 }, { "epoch": 0.24257580493904346, "grad_norm": 0.2158203125, "learning_rate": 0.00019823406179845707, "loss": 1.9017, "step": 776 }, { "epoch": 0.24288840262582057, "grad_norm": 0.21875, "learning_rate": 0.00019822946164737526, "loss": 1.8361, "step": 777 }, { "epoch": 0.2432010003125977, "grad_norm": 0.1982421875, "learning_rate": 0.00019822485556605566, "loss": 1.7349, "step": 778 }, { "epoch": 0.2435135979993748, "grad_norm": 0.212890625, "learning_rate": 0.00019822024355477637, "loss": 1.6017, "step": 779 }, { "epoch": 0.2438261956861519, "grad_norm": 0.2060546875, "learning_rate": 0.0001982156256138158, "loss": 1.8296, "step": 780 }, { "epoch": 0.24413879337292904, "grad_norm": 0.2041015625, "learning_rate": 0.00019821100174345277, "loss": 1.6754, "step": 781 }, { "epoch": 0.24445139105970615, "grad_norm": 0.216796875, "learning_rate": 0.0001982063719439664, "loss": 2.0037, "step": 782 }, { "epoch": 0.24476398874648328, "grad_norm": 0.2109375, "learning_rate": 0.00019820173621563623, "loss": 1.887, "step": 783 }, { "epoch": 0.24507658643326038, "grad_norm": 0.19140625, "learning_rate": 0.0001981970945587421, "loss": 1.5708, "step": 784 }, { "epoch": 0.24538918412003752, "grad_norm": 0.68359375, "learning_rate": 0.0001981924469735642, "loss": 2.3282, "step": 785 }, { "epoch": 0.24570178180681462, "grad_norm": 0.203125, "learning_rate": 0.00019818779346038318, "loss": 1.7515, "step": 786 }, { "epoch": 0.24601437949359176, "grad_norm": 0.20703125, "learning_rate": 0.00019818313401947997, "loss": 1.7623, "step": 787 }, { "epoch": 0.24632697718036886, "grad_norm": 0.2099609375, "learning_rate": 0.00019817846865113577, "loss": 1.8036, "step": 788 }, { "epoch": 0.246639574867146, "grad_norm": 0.2001953125, "learning_rate": 0.0001981737973556324, "loss": 1.8455, "step": 789 }, { "epoch": 0.2469521725539231, "grad_norm": 0.2099609375, "learning_rate": 0.0001981691201332517, "loss": 1.7791, "step": 790 }, { "epoch": 0.24726477024070023, "grad_norm": 0.205078125, "learning_rate": 0.00019816443698427615, "loss": 2.0416, "step": 791 }, { "epoch": 0.24757736792747734, "grad_norm": 0.2265625, "learning_rate": 0.00019815974790898846, "loss": 2.2271, "step": 792 }, { "epoch": 0.24788996561425444, "grad_norm": 0.203125, "learning_rate": 0.00019815505290767172, "loss": 1.5433, "step": 793 }, { "epoch": 0.24820256330103158, "grad_norm": 0.2138671875, "learning_rate": 0.0001981503519806093, "loss": 1.7228, "step": 794 }, { "epoch": 0.24851516098780868, "grad_norm": 0.20703125, "learning_rate": 0.00019814564512808512, "loss": 1.8217, "step": 795 }, { "epoch": 0.2488277586745858, "grad_norm": 0.203125, "learning_rate": 0.00019814093235038323, "loss": 1.8205, "step": 796 }, { "epoch": 0.24914035636136292, "grad_norm": 0.2109375, "learning_rate": 0.00019813621364778817, "loss": 1.8541, "step": 797 }, { "epoch": 0.24945295404814005, "grad_norm": 0.2080078125, "learning_rate": 0.0001981314890205849, "loss": 1.8656, "step": 798 }, { "epoch": 0.24976555173491716, "grad_norm": 0.2109375, "learning_rate": 0.00019812675846905855, "loss": 1.809, "step": 799 }, { "epoch": 0.25007814942169426, "grad_norm": 0.20703125, "learning_rate": 0.00019812202199349476, "loss": 2.0585, "step": 800 }, { "epoch": 0.2503907471084714, "grad_norm": 0.2138671875, "learning_rate": 0.00019811727959417945, "loss": 1.9492, "step": 801 }, { "epoch": 0.25070334479524853, "grad_norm": 0.2080078125, "learning_rate": 0.00019811253127139896, "loss": 1.8192, "step": 802 }, { "epoch": 0.25101594248202563, "grad_norm": 0.2109375, "learning_rate": 0.0001981077770254399, "loss": 1.4981, "step": 803 }, { "epoch": 0.25132854016880274, "grad_norm": 0.205078125, "learning_rate": 0.00019810301685658935, "loss": 1.8598, "step": 804 }, { "epoch": 0.25164113785557984, "grad_norm": 0.2119140625, "learning_rate": 0.00019809825076513464, "loss": 1.7946, "step": 805 }, { "epoch": 0.251953735542357, "grad_norm": 0.23046875, "learning_rate": 0.00019809347875136352, "loss": 1.784, "step": 806 }, { "epoch": 0.2522663332291341, "grad_norm": 0.1962890625, "learning_rate": 0.00019808870081556413, "loss": 1.9401, "step": 807 }, { "epoch": 0.2525789309159112, "grad_norm": 0.2080078125, "learning_rate": 0.00019808391695802483, "loss": 2.0217, "step": 808 }, { "epoch": 0.2528915286026883, "grad_norm": 0.20703125, "learning_rate": 0.0001980791271790345, "loss": 1.7354, "step": 809 }, { "epoch": 0.2532041262894655, "grad_norm": 0.20703125, "learning_rate": 0.00019807433147888225, "loss": 2.1094, "step": 810 }, { "epoch": 0.2535167239762426, "grad_norm": 0.8125, "learning_rate": 0.00019806952985785764, "loss": 2.8019, "step": 811 }, { "epoch": 0.2538293216630197, "grad_norm": 0.193359375, "learning_rate": 0.00019806472231625056, "loss": 1.554, "step": 812 }, { "epoch": 0.2541419193497968, "grad_norm": 0.1953125, "learning_rate": 0.0001980599088543512, "loss": 1.7158, "step": 813 }, { "epoch": 0.2544545170365739, "grad_norm": 0.2060546875, "learning_rate": 0.00019805508947245021, "loss": 1.934, "step": 814 }, { "epoch": 0.25476711472335106, "grad_norm": 0.2119140625, "learning_rate": 0.0001980502641708385, "loss": 2.0267, "step": 815 }, { "epoch": 0.25507971241012817, "grad_norm": 0.20703125, "learning_rate": 0.0001980454329498074, "loss": 1.6819, "step": 816 }, { "epoch": 0.2553923100969053, "grad_norm": 0.203125, "learning_rate": 0.00019804059580964855, "loss": 1.7279, "step": 817 }, { "epoch": 0.2557049077836824, "grad_norm": 0.2109375, "learning_rate": 0.00019803575275065404, "loss": 1.6234, "step": 818 }, { "epoch": 0.25601750547045954, "grad_norm": 0.205078125, "learning_rate": 0.0001980309037731162, "loss": 1.4631, "step": 819 }, { "epoch": 0.25633010315723664, "grad_norm": 0.2109375, "learning_rate": 0.00019802604887732774, "loss": 1.7769, "step": 820 }, { "epoch": 0.25664270084401375, "grad_norm": 0.2119140625, "learning_rate": 0.00019802118806358182, "loss": 1.7928, "step": 821 }, { "epoch": 0.25695529853079085, "grad_norm": 0.2158203125, "learning_rate": 0.00019801632133217189, "loss": 1.639, "step": 822 }, { "epoch": 0.257267896217568, "grad_norm": 0.212890625, "learning_rate": 0.0001980114486833917, "loss": 1.6918, "step": 823 }, { "epoch": 0.2575804939043451, "grad_norm": 0.2138671875, "learning_rate": 0.00019800657011753548, "loss": 1.8273, "step": 824 }, { "epoch": 0.2578930915911222, "grad_norm": 0.2041015625, "learning_rate": 0.0001980016856348977, "loss": 1.9625, "step": 825 }, { "epoch": 0.25820568927789933, "grad_norm": 0.201171875, "learning_rate": 0.00019799679523577332, "loss": 1.741, "step": 826 }, { "epoch": 0.25851828696467644, "grad_norm": 0.19921875, "learning_rate": 0.00019799189892045748, "loss": 2.0397, "step": 827 }, { "epoch": 0.2588308846514536, "grad_norm": 0.2001953125, "learning_rate": 0.00019798699668924585, "loss": 1.7246, "step": 828 }, { "epoch": 0.2591434823382307, "grad_norm": 0.203125, "learning_rate": 0.00019798208854243437, "loss": 1.5622, "step": 829 }, { "epoch": 0.2594560800250078, "grad_norm": 0.1953125, "learning_rate": 0.00019797717448031936, "loss": 1.4121, "step": 830 }, { "epoch": 0.2597686777117849, "grad_norm": 0.20703125, "learning_rate": 0.00019797225450319744, "loss": 1.6693, "step": 831 }, { "epoch": 0.2600812753985621, "grad_norm": 0.208984375, "learning_rate": 0.0001979673286113657, "loss": 1.6021, "step": 832 }, { "epoch": 0.2603938730853392, "grad_norm": 0.2119140625, "learning_rate": 0.0001979623968051215, "loss": 1.9199, "step": 833 }, { "epoch": 0.2607064707721163, "grad_norm": 0.2080078125, "learning_rate": 0.00019795745908476254, "loss": 2.0403, "step": 834 }, { "epoch": 0.2610190684588934, "grad_norm": 0.2138671875, "learning_rate": 0.00019795251545058694, "loss": 1.8294, "step": 835 }, { "epoch": 0.26133166614567055, "grad_norm": 0.20703125, "learning_rate": 0.00019794756590289317, "loss": 1.8625, "step": 836 }, { "epoch": 0.26164426383244765, "grad_norm": 0.21484375, "learning_rate": 0.00019794261044198003, "loss": 1.8086, "step": 837 }, { "epoch": 0.26195686151922476, "grad_norm": 0.2119140625, "learning_rate": 0.0001979376490681467, "loss": 1.6601, "step": 838 }, { "epoch": 0.26226945920600186, "grad_norm": 0.2060546875, "learning_rate": 0.00019793268178169268, "loss": 1.5396, "step": 839 }, { "epoch": 0.26258205689277897, "grad_norm": 0.20703125, "learning_rate": 0.00019792770858291788, "loss": 1.7095, "step": 840 }, { "epoch": 0.26289465457955613, "grad_norm": 0.2177734375, "learning_rate": 0.00019792272947212253, "loss": 1.8782, "step": 841 }, { "epoch": 0.26320725226633324, "grad_norm": 0.19921875, "learning_rate": 0.00019791774444960717, "loss": 1.9358, "step": 842 }, { "epoch": 0.26351984995311034, "grad_norm": 0.2001953125, "learning_rate": 0.00019791275351567286, "loss": 1.7342, "step": 843 }, { "epoch": 0.26383244763988745, "grad_norm": 0.2001953125, "learning_rate": 0.00019790775667062081, "loss": 1.7846, "step": 844 }, { "epoch": 0.2641450453266646, "grad_norm": 0.201171875, "learning_rate": 0.0001979027539147527, "loss": 1.845, "step": 845 }, { "epoch": 0.2644576430134417, "grad_norm": 0.2060546875, "learning_rate": 0.0001978977452483706, "loss": 1.9035, "step": 846 }, { "epoch": 0.2647702407002188, "grad_norm": 0.1982421875, "learning_rate": 0.0001978927306717769, "loss": 1.5762, "step": 847 }, { "epoch": 0.2650828383869959, "grad_norm": 0.1953125, "learning_rate": 0.0001978877101852743, "loss": 2.1721, "step": 848 }, { "epoch": 0.26539543607377303, "grad_norm": 0.2080078125, "learning_rate": 0.00019788268378916586, "loss": 1.7108, "step": 849 }, { "epoch": 0.2657080337605502, "grad_norm": 1.0234375, "learning_rate": 0.00019787765148375508, "loss": 2.5699, "step": 850 }, { "epoch": 0.2660206314473273, "grad_norm": 0.203125, "learning_rate": 0.00019787261326934577, "loss": 1.6568, "step": 851 }, { "epoch": 0.2663332291341044, "grad_norm": 0.2001953125, "learning_rate": 0.00019786756914624208, "loss": 1.8594, "step": 852 }, { "epoch": 0.2666458268208815, "grad_norm": 0.2041015625, "learning_rate": 0.00019786251911474849, "loss": 1.8597, "step": 853 }, { "epoch": 0.26695842450765866, "grad_norm": 0.216796875, "learning_rate": 0.00019785746317516994, "loss": 2.0457, "step": 854 }, { "epoch": 0.26727102219443577, "grad_norm": 0.2275390625, "learning_rate": 0.00019785240132781163, "loss": 1.832, "step": 855 }, { "epoch": 0.2675836198812129, "grad_norm": 0.2080078125, "learning_rate": 0.00019784733357297915, "loss": 1.68, "step": 856 }, { "epoch": 0.26789621756799, "grad_norm": 0.2109375, "learning_rate": 0.00019784225991097848, "loss": 1.8997, "step": 857 }, { "epoch": 0.26820881525476714, "grad_norm": 0.2119140625, "learning_rate": 0.00019783718034211586, "loss": 1.7594, "step": 858 }, { "epoch": 0.26852141294154425, "grad_norm": 0.2138671875, "learning_rate": 0.000197832094866698, "loss": 1.7918, "step": 859 }, { "epoch": 0.26883401062832135, "grad_norm": 0.203125, "learning_rate": 0.00019782700348503193, "loss": 1.6616, "step": 860 }, { "epoch": 0.26914660831509846, "grad_norm": 0.201171875, "learning_rate": 0.00019782190619742495, "loss": 1.8357, "step": 861 }, { "epoch": 0.26945920600187556, "grad_norm": 0.2119140625, "learning_rate": 0.0001978168030041849, "loss": 1.74, "step": 862 }, { "epoch": 0.2697718036886527, "grad_norm": 0.203125, "learning_rate": 0.00019781169390561975, "loss": 1.4934, "step": 863 }, { "epoch": 0.27008440137542983, "grad_norm": 0.216796875, "learning_rate": 0.000197806578902038, "loss": 1.6285, "step": 864 }, { "epoch": 0.27039699906220693, "grad_norm": 0.2060546875, "learning_rate": 0.00019780145799374848, "loss": 1.5881, "step": 865 }, { "epoch": 0.27070959674898404, "grad_norm": 0.2080078125, "learning_rate": 0.00019779633118106028, "loss": 1.714, "step": 866 }, { "epoch": 0.2710221944357612, "grad_norm": 0.2119140625, "learning_rate": 0.000197791198464283, "loss": 1.9303, "step": 867 }, { "epoch": 0.2713347921225383, "grad_norm": 0.2060546875, "learning_rate": 0.0001977860598437264, "loss": 1.6095, "step": 868 }, { "epoch": 0.2716473898093154, "grad_norm": 0.2216796875, "learning_rate": 0.00019778091531970072, "loss": 1.7565, "step": 869 }, { "epoch": 0.2719599874960925, "grad_norm": 0.2099609375, "learning_rate": 0.00019777576489251664, "loss": 1.5668, "step": 870 }, { "epoch": 0.2722725851828696, "grad_norm": 0.20703125, "learning_rate": 0.00019777060856248504, "loss": 1.6762, "step": 871 }, { "epoch": 0.2725851828696468, "grad_norm": 0.2158203125, "learning_rate": 0.00019776544632991717, "loss": 1.7808, "step": 872 }, { "epoch": 0.2728977805564239, "grad_norm": 0.203125, "learning_rate": 0.00019776027819512474, "loss": 1.8983, "step": 873 }, { "epoch": 0.273210378243201, "grad_norm": 0.20703125, "learning_rate": 0.00019775510415841977, "loss": 1.837, "step": 874 }, { "epoch": 0.2735229759299781, "grad_norm": 0.20703125, "learning_rate": 0.00019774992422011452, "loss": 1.7363, "step": 875 }, { "epoch": 0.27383557361675526, "grad_norm": 0.2109375, "learning_rate": 0.00019774473838052184, "loss": 1.8509, "step": 876 }, { "epoch": 0.27414817130353236, "grad_norm": 0.23046875, "learning_rate": 0.00019773954663995476, "loss": 1.8239, "step": 877 }, { "epoch": 0.27446076899030947, "grad_norm": 0.205078125, "learning_rate": 0.00019773434899872665, "loss": 2.0633, "step": 878 }, { "epoch": 0.2747733666770866, "grad_norm": 0.21484375, "learning_rate": 0.00019772914545715135, "loss": 2.0269, "step": 879 }, { "epoch": 0.27508596436386373, "grad_norm": 0.2021484375, "learning_rate": 0.00019772393601554303, "loss": 1.7389, "step": 880 }, { "epoch": 0.27539856205064084, "grad_norm": 0.212890625, "learning_rate": 0.00019771872067421615, "loss": 2.0936, "step": 881 }, { "epoch": 0.27571115973741794, "grad_norm": 0.2080078125, "learning_rate": 0.00019771349943348558, "loss": 1.7132, "step": 882 }, { "epoch": 0.27602375742419505, "grad_norm": 0.21484375, "learning_rate": 0.00019770827229366654, "loss": 1.6179, "step": 883 }, { "epoch": 0.27633635511097215, "grad_norm": 0.2021484375, "learning_rate": 0.00019770303925507456, "loss": 1.9907, "step": 884 }, { "epoch": 0.2766489527977493, "grad_norm": 0.2080078125, "learning_rate": 0.0001976978003180256, "loss": 1.5918, "step": 885 }, { "epoch": 0.2769615504845264, "grad_norm": 0.2060546875, "learning_rate": 0.000197692555482836, "loss": 1.6069, "step": 886 }, { "epoch": 0.2772741481713035, "grad_norm": 0.2060546875, "learning_rate": 0.00019768730474982227, "loss": 1.9966, "step": 887 }, { "epoch": 0.27758674585808063, "grad_norm": 0.2236328125, "learning_rate": 0.00019768204811930148, "loss": 1.7923, "step": 888 }, { "epoch": 0.2778993435448578, "grad_norm": 0.2001953125, "learning_rate": 0.00019767678559159098, "loss": 1.6497, "step": 889 }, { "epoch": 0.2782119412316349, "grad_norm": 0.2109375, "learning_rate": 0.00019767151716700845, "loss": 1.9629, "step": 890 }, { "epoch": 0.278524538918412, "grad_norm": 0.20703125, "learning_rate": 0.00019766624284587195, "loss": 1.8348, "step": 891 }, { "epoch": 0.2788371366051891, "grad_norm": 0.2158203125, "learning_rate": 0.00019766096262849994, "loss": 1.8409, "step": 892 }, { "epoch": 0.27914973429196627, "grad_norm": 0.208984375, "learning_rate": 0.00019765567651521115, "loss": 1.7796, "step": 893 }, { "epoch": 0.2794623319787434, "grad_norm": 0.201171875, "learning_rate": 0.00019765038450632476, "loss": 1.9009, "step": 894 }, { "epoch": 0.2797749296655205, "grad_norm": 0.203125, "learning_rate": 0.00019764508660216019, "loss": 1.4491, "step": 895 }, { "epoch": 0.2800875273522976, "grad_norm": 0.2275390625, "learning_rate": 0.0001976397828030373, "loss": 1.5436, "step": 896 }, { "epoch": 0.2804001250390747, "grad_norm": 0.2109375, "learning_rate": 0.0001976344731092763, "loss": 1.8577, "step": 897 }, { "epoch": 0.28071272272585185, "grad_norm": 0.3828125, "learning_rate": 0.0001976291575211978, "loss": 2.6341, "step": 898 }, { "epoch": 0.28102532041262895, "grad_norm": 0.203125, "learning_rate": 0.00019762383603912258, "loss": 1.6624, "step": 899 }, { "epoch": 0.28133791809940606, "grad_norm": 0.205078125, "learning_rate": 0.000197618508663372, "loss": 1.6193, "step": 900 }, { "epoch": 0.28165051578618316, "grad_norm": 0.1982421875, "learning_rate": 0.00019761317539426765, "loss": 1.6416, "step": 901 }, { "epoch": 0.2819631134729603, "grad_norm": 0.2138671875, "learning_rate": 0.00019760783623213153, "loss": 1.5813, "step": 902 }, { "epoch": 0.28227571115973743, "grad_norm": 0.2119140625, "learning_rate": 0.00019760249117728592, "loss": 2.1245, "step": 903 }, { "epoch": 0.28258830884651454, "grad_norm": 0.236328125, "learning_rate": 0.00019759714023005357, "loss": 2.0305, "step": 904 }, { "epoch": 0.28290090653329164, "grad_norm": 0.2060546875, "learning_rate": 0.0001975917833907575, "loss": 1.4689, "step": 905 }, { "epoch": 0.28321350422006875, "grad_norm": 0.1962890625, "learning_rate": 0.00019758642065972112, "loss": 1.9306, "step": 906 }, { "epoch": 0.2835261019068459, "grad_norm": 0.212890625, "learning_rate": 0.0001975810520372681, "loss": 1.8309, "step": 907 }, { "epoch": 0.283838699593623, "grad_norm": 0.216796875, "learning_rate": 0.0001975756775237227, "loss": 1.732, "step": 908 }, { "epoch": 0.2841512972804001, "grad_norm": 0.208984375, "learning_rate": 0.00019757029711940923, "loss": 1.5233, "step": 909 }, { "epoch": 0.2844638949671772, "grad_norm": 0.2099609375, "learning_rate": 0.00019756491082465263, "loss": 1.6491, "step": 910 }, { "epoch": 0.2847764926539544, "grad_norm": 0.201171875, "learning_rate": 0.00019755951863977805, "loss": 2.2236, "step": 911 }, { "epoch": 0.2850890903407315, "grad_norm": 0.2060546875, "learning_rate": 0.00019755412056511097, "loss": 1.8299, "step": 912 }, { "epoch": 0.2854016880275086, "grad_norm": 0.2119140625, "learning_rate": 0.00019754871660097734, "loss": 1.5403, "step": 913 }, { "epoch": 0.2857142857142857, "grad_norm": 0.2001953125, "learning_rate": 0.00019754330674770339, "loss": 1.5712, "step": 914 }, { "epoch": 0.28602688340106286, "grad_norm": 0.2080078125, "learning_rate": 0.00019753789100561569, "loss": 1.8814, "step": 915 }, { "epoch": 0.28633948108783996, "grad_norm": 0.2080078125, "learning_rate": 0.0001975324693750412, "loss": 1.7153, "step": 916 }, { "epoch": 0.28665207877461707, "grad_norm": 0.2060546875, "learning_rate": 0.0001975270418563073, "loss": 2.0221, "step": 917 }, { "epoch": 0.2869646764613942, "grad_norm": 0.2109375, "learning_rate": 0.00019752160844974158, "loss": 1.7176, "step": 918 }, { "epoch": 0.2872772741481713, "grad_norm": 0.208984375, "learning_rate": 0.0001975161691556721, "loss": 1.8581, "step": 919 }, { "epoch": 0.28758987183494844, "grad_norm": 0.2021484375, "learning_rate": 0.00019751072397442715, "loss": 1.8127, "step": 920 }, { "epoch": 0.28790246952172555, "grad_norm": 0.2041015625, "learning_rate": 0.0001975052729063356, "loss": 1.5183, "step": 921 }, { "epoch": 0.28821506720850265, "grad_norm": 0.2060546875, "learning_rate": 0.00019749981595172647, "loss": 1.7887, "step": 922 }, { "epoch": 0.28852766489527976, "grad_norm": 0.2001953125, "learning_rate": 0.00019749435311092919, "loss": 1.7053, "step": 923 }, { "epoch": 0.2888402625820569, "grad_norm": 0.212890625, "learning_rate": 0.00019748888438427358, "loss": 1.7008, "step": 924 }, { "epoch": 0.289152860268834, "grad_norm": 0.2099609375, "learning_rate": 0.00019748340977208977, "loss": 1.888, "step": 925 }, { "epoch": 0.2894654579556111, "grad_norm": 0.2001953125, "learning_rate": 0.0001974779292747083, "loss": 1.708, "step": 926 }, { "epoch": 0.28977805564238823, "grad_norm": 0.21484375, "learning_rate": 0.00019747244289246006, "loss": 1.8244, "step": 927 }, { "epoch": 0.29009065332916534, "grad_norm": 0.1982421875, "learning_rate": 0.0001974669506256762, "loss": 1.5614, "step": 928 }, { "epoch": 0.2904032510159425, "grad_norm": 0.38671875, "learning_rate": 0.00019746145247468832, "loss": 2.2925, "step": 929 }, { "epoch": 0.2907158487027196, "grad_norm": 0.2001953125, "learning_rate": 0.00019745594843982836, "loss": 1.7933, "step": 930 }, { "epoch": 0.2910284463894967, "grad_norm": 0.2041015625, "learning_rate": 0.0001974504385214286, "loss": 1.8521, "step": 931 }, { "epoch": 0.2913410440762738, "grad_norm": 0.2119140625, "learning_rate": 0.00019744492271982168, "loss": 1.6939, "step": 932 }, { "epoch": 0.291653641763051, "grad_norm": 0.2158203125, "learning_rate": 0.00019743940103534062, "loss": 1.6783, "step": 933 }, { "epoch": 0.2919662394498281, "grad_norm": 0.203125, "learning_rate": 0.00019743387346831876, "loss": 2.0204, "step": 934 }, { "epoch": 0.2922788371366052, "grad_norm": 0.197265625, "learning_rate": 0.00019742834001908977, "loss": 1.7812, "step": 935 }, { "epoch": 0.2925914348233823, "grad_norm": 0.2041015625, "learning_rate": 0.00019742280068798775, "loss": 1.7483, "step": 936 }, { "epoch": 0.29290403251015945, "grad_norm": 0.21484375, "learning_rate": 0.00019741725547534712, "loss": 1.8223, "step": 937 }, { "epoch": 0.29321663019693656, "grad_norm": 0.20703125, "learning_rate": 0.0001974117043815026, "loss": 1.8306, "step": 938 }, { "epoch": 0.29352922788371366, "grad_norm": 0.2080078125, "learning_rate": 0.00019740614740678937, "loss": 1.9111, "step": 939 }, { "epoch": 0.29384182557049077, "grad_norm": 0.224609375, "learning_rate": 0.0001974005845515429, "loss": 1.7384, "step": 940 }, { "epoch": 0.29415442325726787, "grad_norm": 0.2197265625, "learning_rate": 0.00019739501581609903, "loss": 1.7809, "step": 941 }, { "epoch": 0.29446702094404503, "grad_norm": 0.212890625, "learning_rate": 0.00019738944120079393, "loss": 1.8266, "step": 942 }, { "epoch": 0.29477961863082214, "grad_norm": 0.203125, "learning_rate": 0.0001973838607059642, "loss": 2.0459, "step": 943 }, { "epoch": 0.29509221631759924, "grad_norm": 0.2197265625, "learning_rate": 0.00019737827433194665, "loss": 1.7519, "step": 944 }, { "epoch": 0.29540481400437635, "grad_norm": 0.2080078125, "learning_rate": 0.0001973726820790786, "loss": 1.6264, "step": 945 }, { "epoch": 0.2957174116911535, "grad_norm": 0.2041015625, "learning_rate": 0.00019736708394769764, "loss": 1.6892, "step": 946 }, { "epoch": 0.2960300093779306, "grad_norm": 0.2158203125, "learning_rate": 0.00019736147993814176, "loss": 1.9491, "step": 947 }, { "epoch": 0.2963426070647077, "grad_norm": 0.1953125, "learning_rate": 0.00019735587005074927, "loss": 1.7754, "step": 948 }, { "epoch": 0.2966552047514848, "grad_norm": 0.2236328125, "learning_rate": 0.00019735025428585886, "loss": 1.9126, "step": 949 }, { "epoch": 0.29696780243826193, "grad_norm": 0.2080078125, "learning_rate": 0.00019734463264380953, "loss": 2.071, "step": 950 }, { "epoch": 0.2972804001250391, "grad_norm": 0.2041015625, "learning_rate": 0.0001973390051249407, "loss": 1.6336, "step": 951 }, { "epoch": 0.2975929978118162, "grad_norm": 0.2080078125, "learning_rate": 0.00019733337172959204, "loss": 1.4598, "step": 952 }, { "epoch": 0.2979055954985933, "grad_norm": 0.2041015625, "learning_rate": 0.0001973277324581037, "loss": 1.5984, "step": 953 }, { "epoch": 0.2982181931853704, "grad_norm": 0.21875, "learning_rate": 0.00019732208731081615, "loss": 1.9082, "step": 954 }, { "epoch": 0.29853079087214757, "grad_norm": 0.2099609375, "learning_rate": 0.00019731643628807018, "loss": 1.6075, "step": 955 }, { "epoch": 0.29884338855892467, "grad_norm": 0.2021484375, "learning_rate": 0.00019731077939020693, "loss": 1.9933, "step": 956 }, { "epoch": 0.2991559862457018, "grad_norm": 0.20703125, "learning_rate": 0.00019730511661756792, "loss": 1.5719, "step": 957 }, { "epoch": 0.2994685839324789, "grad_norm": 0.20703125, "learning_rate": 0.00019729944797049502, "loss": 1.6318, "step": 958 }, { "epoch": 0.29978118161925604, "grad_norm": 0.1982421875, "learning_rate": 0.00019729377344933043, "loss": 1.8574, "step": 959 }, { "epoch": 0.30009377930603315, "grad_norm": 0.208984375, "learning_rate": 0.0001972880930544168, "loss": 1.9144, "step": 960 }, { "epoch": 0.30040637699281025, "grad_norm": 0.2001953125, "learning_rate": 0.000197282406786097, "loss": 1.7335, "step": 961 }, { "epoch": 0.30071897467958736, "grad_norm": 0.203125, "learning_rate": 0.00019727671464471436, "loss": 1.7289, "step": 962 }, { "epoch": 0.30103157236636446, "grad_norm": 0.2119140625, "learning_rate": 0.00019727101663061247, "loss": 1.994, "step": 963 }, { "epoch": 0.3013441700531416, "grad_norm": 0.205078125, "learning_rate": 0.00019726531274413532, "loss": 1.7233, "step": 964 }, { "epoch": 0.30165676773991873, "grad_norm": 0.2099609375, "learning_rate": 0.00019725960298562733, "loss": 1.8961, "step": 965 }, { "epoch": 0.30196936542669583, "grad_norm": 0.21484375, "learning_rate": 0.00019725388735543318, "loss": 1.6978, "step": 966 }, { "epoch": 0.30228196311347294, "grad_norm": 0.2119140625, "learning_rate": 0.0001972481658538979, "loss": 1.752, "step": 967 }, { "epoch": 0.3025945608002501, "grad_norm": 0.205078125, "learning_rate": 0.00019724243848136692, "loss": 2.0531, "step": 968 }, { "epoch": 0.3029071584870272, "grad_norm": 0.208984375, "learning_rate": 0.000197236705238186, "loss": 1.7117, "step": 969 }, { "epoch": 0.3032197561738043, "grad_norm": 0.20703125, "learning_rate": 0.00019723096612470133, "loss": 1.5911, "step": 970 }, { "epoch": 0.3035323538605814, "grad_norm": 0.20703125, "learning_rate": 0.00019722522114125929, "loss": 1.8811, "step": 971 }, { "epoch": 0.3038449515473586, "grad_norm": 0.22265625, "learning_rate": 0.00019721947028820676, "loss": 1.6444, "step": 972 }, { "epoch": 0.3041575492341357, "grad_norm": 0.2080078125, "learning_rate": 0.0001972137135658909, "loss": 1.5187, "step": 973 }, { "epoch": 0.3044701469209128, "grad_norm": 0.2138671875, "learning_rate": 0.0001972079509746593, "loss": 1.6957, "step": 974 }, { "epoch": 0.3047827446076899, "grad_norm": 0.2099609375, "learning_rate": 0.00019720218251485983, "loss": 1.5887, "step": 975 }, { "epoch": 0.305095342294467, "grad_norm": 0.216796875, "learning_rate": 0.0001971964081868407, "loss": 1.7837, "step": 976 }, { "epoch": 0.30540793998124416, "grad_norm": 0.2099609375, "learning_rate": 0.0001971906279909506, "loss": 1.8848, "step": 977 }, { "epoch": 0.30572053766802126, "grad_norm": 0.224609375, "learning_rate": 0.0001971848419275384, "loss": 1.8966, "step": 978 }, { "epoch": 0.30603313535479837, "grad_norm": 0.1982421875, "learning_rate": 0.00019717904999695348, "loss": 1.6581, "step": 979 }, { "epoch": 0.3063457330415755, "grad_norm": 0.2021484375, "learning_rate": 0.00019717325219954543, "loss": 1.6071, "step": 980 }, { "epoch": 0.30665833072835263, "grad_norm": 0.2060546875, "learning_rate": 0.00019716744853566436, "loss": 1.8169, "step": 981 }, { "epoch": 0.30697092841512974, "grad_norm": 0.197265625, "learning_rate": 0.0001971616390056606, "loss": 1.6017, "step": 982 }, { "epoch": 0.30728352610190685, "grad_norm": 0.1982421875, "learning_rate": 0.00019715582360988482, "loss": 1.6999, "step": 983 }, { "epoch": 0.30759612378868395, "grad_norm": 0.2099609375, "learning_rate": 0.00019715000234868821, "loss": 1.7758, "step": 984 }, { "epoch": 0.30790872147546106, "grad_norm": 0.2080078125, "learning_rate": 0.00019714417522242214, "loss": 1.9776, "step": 985 }, { "epoch": 0.3082213191622382, "grad_norm": 0.2265625, "learning_rate": 0.00019713834223143844, "loss": 1.7776, "step": 986 }, { "epoch": 0.3085339168490153, "grad_norm": 0.2197265625, "learning_rate": 0.00019713250337608922, "loss": 1.8847, "step": 987 }, { "epoch": 0.3088465145357924, "grad_norm": 0.2109375, "learning_rate": 0.000197126658656727, "loss": 1.8091, "step": 988 }, { "epoch": 0.30915911222256953, "grad_norm": 0.212890625, "learning_rate": 0.00019712080807370464, "loss": 1.804, "step": 989 }, { "epoch": 0.3094717099093467, "grad_norm": 0.22265625, "learning_rate": 0.00019711495162737529, "loss": 1.782, "step": 990 }, { "epoch": 0.3097843075961238, "grad_norm": 0.201171875, "learning_rate": 0.0001971090893180926, "loss": 1.5211, "step": 991 }, { "epoch": 0.3100969052829009, "grad_norm": 0.2001953125, "learning_rate": 0.0001971032211462104, "loss": 1.4168, "step": 992 }, { "epoch": 0.310409502969678, "grad_norm": 0.212890625, "learning_rate": 0.00019709734711208303, "loss": 1.5656, "step": 993 }, { "epoch": 0.31072210065645517, "grad_norm": 0.2080078125, "learning_rate": 0.00019709146721606509, "loss": 1.818, "step": 994 }, { "epoch": 0.3110346983432323, "grad_norm": 0.205078125, "learning_rate": 0.00019708558145851152, "loss": 1.7158, "step": 995 }, { "epoch": 0.3113472960300094, "grad_norm": 0.2197265625, "learning_rate": 0.0001970796898397777, "loss": 1.6944, "step": 996 }, { "epoch": 0.3116598937167865, "grad_norm": 0.2109375, "learning_rate": 0.0001970737923602193, "loss": 1.7961, "step": 997 }, { "epoch": 0.3119724914035636, "grad_norm": 0.201171875, "learning_rate": 0.00019706788902019233, "loss": 1.8871, "step": 998 }, { "epoch": 0.31228508909034075, "grad_norm": 0.205078125, "learning_rate": 0.00019706197982005322, "loss": 1.8513, "step": 999 }, { "epoch": 0.31259768677711786, "grad_norm": 0.2138671875, "learning_rate": 0.0001970560647601587, "loss": 1.6529, "step": 1000 }, { "epoch": 0.31291028446389496, "grad_norm": 0.21875, "learning_rate": 0.0001970501438408659, "loss": 1.7564, "step": 1001 }, { "epoch": 0.31322288215067207, "grad_norm": 0.2041015625, "learning_rate": 0.0001970442170625322, "loss": 1.5718, "step": 1002 }, { "epoch": 0.3135354798374492, "grad_norm": 0.201171875, "learning_rate": 0.00019703828442551547, "loss": 1.9791, "step": 1003 }, { "epoch": 0.31384807752422633, "grad_norm": 0.2158203125, "learning_rate": 0.00019703234593017386, "loss": 1.5583, "step": 1004 }, { "epoch": 0.31416067521100344, "grad_norm": 0.19921875, "learning_rate": 0.00019702640157686586, "loss": 1.8005, "step": 1005 }, { "epoch": 0.31447327289778054, "grad_norm": 0.216796875, "learning_rate": 0.00019702045136595032, "loss": 2.0622, "step": 1006 }, { "epoch": 0.31478587058455765, "grad_norm": 0.19921875, "learning_rate": 0.00019701449529778656, "loss": 1.6313, "step": 1007 }, { "epoch": 0.3150984682713348, "grad_norm": 0.2138671875, "learning_rate": 0.00019700853337273406, "loss": 1.7088, "step": 1008 }, { "epoch": 0.3154110659581119, "grad_norm": 0.2041015625, "learning_rate": 0.0001970025655911528, "loss": 1.7942, "step": 1009 }, { "epoch": 0.315723663644889, "grad_norm": 0.2041015625, "learning_rate": 0.00019699659195340303, "loss": 1.8139, "step": 1010 }, { "epoch": 0.3160362613316661, "grad_norm": 0.212890625, "learning_rate": 0.0001969906124598454, "loss": 1.6704, "step": 1011 }, { "epoch": 0.3163488590184433, "grad_norm": 0.2109375, "learning_rate": 0.00019698462711084091, "loss": 1.9731, "step": 1012 }, { "epoch": 0.3166614567052204, "grad_norm": 0.2109375, "learning_rate": 0.00019697863590675086, "loss": 1.6923, "step": 1013 }, { "epoch": 0.3169740543919975, "grad_norm": 0.21484375, "learning_rate": 0.00019697263884793702, "loss": 1.8974, "step": 1014 }, { "epoch": 0.3172866520787746, "grad_norm": 0.212890625, "learning_rate": 0.0001969666359347614, "loss": 2.0298, "step": 1015 }, { "epoch": 0.31759924976555176, "grad_norm": 0.2001953125, "learning_rate": 0.00019696062716758638, "loss": 1.6155, "step": 1016 }, { "epoch": 0.31791184745232887, "grad_norm": 0.212890625, "learning_rate": 0.00019695461254677475, "loss": 1.6622, "step": 1017 }, { "epoch": 0.31822444513910597, "grad_norm": 0.201171875, "learning_rate": 0.00019694859207268958, "loss": 2.0245, "step": 1018 }, { "epoch": 0.3185370428258831, "grad_norm": 0.205078125, "learning_rate": 0.0001969425657456944, "loss": 1.7654, "step": 1019 }, { "epoch": 0.3188496405126602, "grad_norm": 0.203125, "learning_rate": 0.00019693653356615297, "loss": 1.6629, "step": 1020 }, { "epoch": 0.31916223819943734, "grad_norm": 0.2099609375, "learning_rate": 0.00019693049553442952, "loss": 1.7823, "step": 1021 }, { "epoch": 0.31947483588621445, "grad_norm": 0.2099609375, "learning_rate": 0.0001969244516508885, "loss": 1.5993, "step": 1022 }, { "epoch": 0.31978743357299155, "grad_norm": 0.2109375, "learning_rate": 0.0001969184019158948, "loss": 1.7385, "step": 1023 }, { "epoch": 0.32010003125976866, "grad_norm": 0.220703125, "learning_rate": 0.00019691234632981372, "loss": 2.0781, "step": 1024 }, { "epoch": 0.3204126289465458, "grad_norm": 0.2080078125, "learning_rate": 0.00019690628489301077, "loss": 1.6396, "step": 1025 }, { "epoch": 0.3207252266333229, "grad_norm": 0.2099609375, "learning_rate": 0.00019690021760585192, "loss": 1.7066, "step": 1026 }, { "epoch": 0.32103782432010003, "grad_norm": 0.216796875, "learning_rate": 0.00019689414446870344, "loss": 1.6741, "step": 1027 }, { "epoch": 0.32135042200687713, "grad_norm": 0.2109375, "learning_rate": 0.000196888065481932, "loss": 1.8628, "step": 1028 }, { "epoch": 0.32166301969365424, "grad_norm": 0.234375, "learning_rate": 0.00019688198064590458, "loss": 1.8129, "step": 1029 }, { "epoch": 0.3219756173804314, "grad_norm": 0.203125, "learning_rate": 0.00019687588996098853, "loss": 1.9068, "step": 1030 }, { "epoch": 0.3222882150672085, "grad_norm": 0.2109375, "learning_rate": 0.00019686979342755154, "loss": 1.8664, "step": 1031 }, { "epoch": 0.3226008127539856, "grad_norm": 0.2236328125, "learning_rate": 0.0001968636910459617, "loss": 1.7239, "step": 1032 }, { "epoch": 0.3229134104407627, "grad_norm": 0.201171875, "learning_rate": 0.00019685758281658738, "loss": 1.9294, "step": 1033 }, { "epoch": 0.3232260081275399, "grad_norm": 0.20703125, "learning_rate": 0.00019685146873979736, "loss": 1.7469, "step": 1034 }, { "epoch": 0.323538605814317, "grad_norm": 0.208984375, "learning_rate": 0.00019684534881596078, "loss": 1.8425, "step": 1035 }, { "epoch": 0.3238512035010941, "grad_norm": 0.208984375, "learning_rate": 0.00019683922304544705, "loss": 1.5658, "step": 1036 }, { "epoch": 0.3241638011878712, "grad_norm": 0.20703125, "learning_rate": 0.000196833091428626, "loss": 1.7025, "step": 1037 }, { "epoch": 0.32447639887464835, "grad_norm": 0.20703125, "learning_rate": 0.00019682695396586785, "loss": 1.7166, "step": 1038 }, { "epoch": 0.32478899656142546, "grad_norm": 0.220703125, "learning_rate": 0.00019682081065754313, "loss": 1.8159, "step": 1039 }, { "epoch": 0.32510159424820256, "grad_norm": 0.2080078125, "learning_rate": 0.00019681466150402266, "loss": 1.7957, "step": 1040 }, { "epoch": 0.32541419193497967, "grad_norm": 0.2373046875, "learning_rate": 0.0001968085065056777, "loss": 1.6375, "step": 1041 }, { "epoch": 0.3257267896217568, "grad_norm": 0.21484375, "learning_rate": 0.00019680234566287985, "loss": 2.1855, "step": 1042 }, { "epoch": 0.32603938730853393, "grad_norm": 0.20703125, "learning_rate": 0.00019679617897600102, "loss": 1.8348, "step": 1043 }, { "epoch": 0.32635198499531104, "grad_norm": 0.2021484375, "learning_rate": 0.00019679000644541356, "loss": 1.6444, "step": 1044 }, { "epoch": 0.32666458268208814, "grad_norm": 0.205078125, "learning_rate": 0.00019678382807149003, "loss": 1.8918, "step": 1045 }, { "epoch": 0.32697718036886525, "grad_norm": 0.2080078125, "learning_rate": 0.00019677764385460348, "loss": 1.6544, "step": 1046 }, { "epoch": 0.3272897780556424, "grad_norm": 0.2099609375, "learning_rate": 0.00019677145379512723, "loss": 1.8734, "step": 1047 }, { "epoch": 0.3276023757424195, "grad_norm": 0.208984375, "learning_rate": 0.00019676525789343502, "loss": 1.8792, "step": 1048 }, { "epoch": 0.3279149734291966, "grad_norm": 0.1982421875, "learning_rate": 0.00019675905614990085, "loss": 1.8914, "step": 1049 }, { "epoch": 0.3282275711159737, "grad_norm": 0.20703125, "learning_rate": 0.0001967528485648992, "loss": 1.6186, "step": 1050 }, { "epoch": 0.3285401688027509, "grad_norm": 0.2236328125, "learning_rate": 0.00019674663513880475, "loss": 1.7937, "step": 1051 }, { "epoch": 0.328852766489528, "grad_norm": 0.203125, "learning_rate": 0.00019674041587199268, "loss": 1.7155, "step": 1052 }, { "epoch": 0.3291653641763051, "grad_norm": 0.2099609375, "learning_rate": 0.0001967341907648384, "loss": 1.8787, "step": 1053 }, { "epoch": 0.3294779618630822, "grad_norm": 0.220703125, "learning_rate": 0.00019672795981771777, "loss": 1.6195, "step": 1054 }, { "epoch": 0.3297905595498593, "grad_norm": 0.203125, "learning_rate": 0.00019672172303100696, "loss": 1.9987, "step": 1055 }, { "epoch": 0.33010315723663647, "grad_norm": 0.2099609375, "learning_rate": 0.00019671548040508244, "loss": 1.6107, "step": 1056 }, { "epoch": 0.3304157549234136, "grad_norm": 0.20703125, "learning_rate": 0.00019670923194032116, "loss": 1.6394, "step": 1057 }, { "epoch": 0.3307283526101907, "grad_norm": 0.19921875, "learning_rate": 0.00019670297763710028, "loss": 1.7142, "step": 1058 }, { "epoch": 0.3310409502969678, "grad_norm": 0.205078125, "learning_rate": 0.00019669671749579742, "loss": 1.8344, "step": 1059 }, { "epoch": 0.33135354798374494, "grad_norm": 0.212890625, "learning_rate": 0.0001966904515167905, "loss": 1.933, "step": 1060 }, { "epoch": 0.33166614567052205, "grad_norm": 0.2041015625, "learning_rate": 0.0001966841797004578, "loss": 1.763, "step": 1061 }, { "epoch": 0.33197874335729916, "grad_norm": 0.2041015625, "learning_rate": 0.000196677902047178, "loss": 1.8741, "step": 1062 }, { "epoch": 0.33229134104407626, "grad_norm": 0.2099609375, "learning_rate": 0.00019667161855733002, "loss": 1.8624, "step": 1063 }, { "epoch": 0.33260393873085337, "grad_norm": 0.216796875, "learning_rate": 0.00019666532923129327, "loss": 1.899, "step": 1064 }, { "epoch": 0.3329165364176305, "grad_norm": 0.30078125, "learning_rate": 0.00019665903406944737, "loss": 2.3084, "step": 1065 }, { "epoch": 0.33322913410440763, "grad_norm": 0.197265625, "learning_rate": 0.00019665273307217245, "loss": 1.6737, "step": 1066 }, { "epoch": 0.33354173179118474, "grad_norm": 0.216796875, "learning_rate": 0.00019664642623984886, "loss": 1.6899, "step": 1067 }, { "epoch": 0.33385432947796184, "grad_norm": 0.2060546875, "learning_rate": 0.00019664011357285735, "loss": 1.8702, "step": 1068 }, { "epoch": 0.334166927164739, "grad_norm": 0.2060546875, "learning_rate": 0.00019663379507157903, "loss": 2.0766, "step": 1069 }, { "epoch": 0.3344795248515161, "grad_norm": 0.1953125, "learning_rate": 0.00019662747073639537, "loss": 1.9336, "step": 1070 }, { "epoch": 0.3347921225382932, "grad_norm": 0.2041015625, "learning_rate": 0.00019662114056768815, "loss": 1.8872, "step": 1071 }, { "epoch": 0.3351047202250703, "grad_norm": 0.2216796875, "learning_rate": 0.00019661480456583958, "loss": 1.7719, "step": 1072 }, { "epoch": 0.3354173179118475, "grad_norm": 0.220703125, "learning_rate": 0.00019660846273123213, "loss": 1.695, "step": 1073 }, { "epoch": 0.3357299155986246, "grad_norm": 0.208984375, "learning_rate": 0.00019660211506424867, "loss": 1.8269, "step": 1074 }, { "epoch": 0.3360425132854017, "grad_norm": 0.2119140625, "learning_rate": 0.0001965957615652724, "loss": 1.8746, "step": 1075 }, { "epoch": 0.3363551109721788, "grad_norm": 0.2216796875, "learning_rate": 0.00019658940223468693, "loss": 1.5041, "step": 1076 }, { "epoch": 0.3366677086589559, "grad_norm": 0.22265625, "learning_rate": 0.00019658303707287617, "loss": 1.8079, "step": 1077 }, { "epoch": 0.33698030634573306, "grad_norm": 0.2060546875, "learning_rate": 0.00019657666608022438, "loss": 1.7644, "step": 1078 }, { "epoch": 0.33729290403251017, "grad_norm": 0.212890625, "learning_rate": 0.00019657028925711617, "loss": 1.759, "step": 1079 }, { "epoch": 0.33760550171928727, "grad_norm": 0.220703125, "learning_rate": 0.00019656390660393659, "loss": 1.9192, "step": 1080 }, { "epoch": 0.3379180994060644, "grad_norm": 0.2099609375, "learning_rate": 0.00019655751812107085, "loss": 1.9153, "step": 1081 }, { "epoch": 0.33823069709284154, "grad_norm": 0.2158203125, "learning_rate": 0.00019655112380890475, "loss": 1.688, "step": 1082 }, { "epoch": 0.33854329477961864, "grad_norm": 0.2197265625, "learning_rate": 0.00019654472366782425, "loss": 1.907, "step": 1083 }, { "epoch": 0.33885589246639575, "grad_norm": 0.212890625, "learning_rate": 0.00019653831769821575, "loss": 1.9453, "step": 1084 }, { "epoch": 0.33916849015317285, "grad_norm": 0.2099609375, "learning_rate": 0.000196531905900466, "loss": 1.6311, "step": 1085 }, { "epoch": 0.33948108783994996, "grad_norm": 0.2119140625, "learning_rate": 0.00019652548827496207, "loss": 1.9493, "step": 1086 }, { "epoch": 0.3397936855267271, "grad_norm": 0.208984375, "learning_rate": 0.0001965190648220914, "loss": 1.8175, "step": 1087 }, { "epoch": 0.3401062832135042, "grad_norm": 0.19921875, "learning_rate": 0.0001965126355422418, "loss": 1.8018, "step": 1088 }, { "epoch": 0.34041888090028133, "grad_norm": 0.2099609375, "learning_rate": 0.0001965062004358014, "loss": 1.6674, "step": 1089 }, { "epoch": 0.34073147858705843, "grad_norm": 0.2158203125, "learning_rate": 0.0001964997595031587, "loss": 1.9538, "step": 1090 }, { "epoch": 0.3410440762738356, "grad_norm": 0.203125, "learning_rate": 0.00019649331274470256, "loss": 1.8417, "step": 1091 }, { "epoch": 0.3413566739606127, "grad_norm": 0.21484375, "learning_rate": 0.00019648686016082216, "loss": 2.0019, "step": 1092 }, { "epoch": 0.3416692716473898, "grad_norm": 0.21484375, "learning_rate": 0.00019648040175190707, "loss": 1.7955, "step": 1093 }, { "epoch": 0.3419818693341669, "grad_norm": 0.22265625, "learning_rate": 0.00019647393751834718, "loss": 1.6747, "step": 1094 }, { "epoch": 0.34229446702094407, "grad_norm": 0.2177734375, "learning_rate": 0.00019646746746053274, "loss": 1.7818, "step": 1095 }, { "epoch": 0.3426070647077212, "grad_norm": 0.20703125, "learning_rate": 0.00019646099157885437, "loss": 1.7983, "step": 1096 }, { "epoch": 0.3429196623944983, "grad_norm": 0.2265625, "learning_rate": 0.00019645450987370298, "loss": 1.677, "step": 1097 }, { "epoch": 0.3432322600812754, "grad_norm": 0.2119140625, "learning_rate": 0.00019644802234546993, "loss": 1.9241, "step": 1098 }, { "epoch": 0.3435448577680525, "grad_norm": 0.212890625, "learning_rate": 0.0001964415289945469, "loss": 1.9008, "step": 1099 }, { "epoch": 0.34385745545482965, "grad_norm": 0.2021484375, "learning_rate": 0.00019643502982132581, "loss": 1.6438, "step": 1100 }, { "epoch": 0.34417005314160676, "grad_norm": 0.2177734375, "learning_rate": 0.0001964285248261991, "loss": 1.7665, "step": 1101 }, { "epoch": 0.34448265082838386, "grad_norm": 0.2109375, "learning_rate": 0.0001964220140095595, "loss": 1.7259, "step": 1102 }, { "epoch": 0.34479524851516097, "grad_norm": 0.2099609375, "learning_rate": 0.00019641549737180002, "loss": 1.7119, "step": 1103 }, { "epoch": 0.34510784620193813, "grad_norm": 0.2314453125, "learning_rate": 0.00019640897491331408, "loss": 1.6551, "step": 1104 }, { "epoch": 0.34542044388871523, "grad_norm": 0.2138671875, "learning_rate": 0.0001964024466344955, "loss": 1.9882, "step": 1105 }, { "epoch": 0.34573304157549234, "grad_norm": 0.2275390625, "learning_rate": 0.00019639591253573836, "loss": 1.7573, "step": 1106 }, { "epoch": 0.34604563926226944, "grad_norm": 0.208984375, "learning_rate": 0.00019638937261743714, "loss": 1.6814, "step": 1107 }, { "epoch": 0.34635823694904655, "grad_norm": 0.2060546875, "learning_rate": 0.00019638282687998667, "loss": 1.943, "step": 1108 }, { "epoch": 0.3466708346358237, "grad_norm": 0.220703125, "learning_rate": 0.00019637627532378212, "loss": 1.6896, "step": 1109 }, { "epoch": 0.3469834323226008, "grad_norm": 0.2197265625, "learning_rate": 0.000196369717949219, "loss": 1.8984, "step": 1110 }, { "epoch": 0.3472960300093779, "grad_norm": 0.201171875, "learning_rate": 0.00019636315475669324, "loss": 1.4845, "step": 1111 }, { "epoch": 0.347608627696155, "grad_norm": 0.2080078125, "learning_rate": 0.00019635658574660098, "loss": 1.7234, "step": 1112 }, { "epoch": 0.3479212253829322, "grad_norm": 0.201171875, "learning_rate": 0.0001963500109193389, "loss": 1.5583, "step": 1113 }, { "epoch": 0.3482338230697093, "grad_norm": 0.220703125, "learning_rate": 0.00019634343027530383, "loss": 1.8789, "step": 1114 }, { "epoch": 0.3485464207564864, "grad_norm": 0.21484375, "learning_rate": 0.00019633684381489315, "loss": 2.0262, "step": 1115 }, { "epoch": 0.3488590184432635, "grad_norm": 0.2080078125, "learning_rate": 0.00019633025153850442, "loss": 1.7877, "step": 1116 }, { "epoch": 0.34917161613004066, "grad_norm": 0.216796875, "learning_rate": 0.00019632365344653563, "loss": 1.7381, "step": 1117 }, { "epoch": 0.34948421381681777, "grad_norm": 0.2138671875, "learning_rate": 0.00019631704953938518, "loss": 1.7758, "step": 1118 }, { "epoch": 0.3497968115035949, "grad_norm": 0.212890625, "learning_rate": 0.0001963104398174517, "loss": 1.8063, "step": 1119 }, { "epoch": 0.350109409190372, "grad_norm": 0.2041015625, "learning_rate": 0.00019630382428113417, "loss": 1.8691, "step": 1120 }, { "epoch": 0.3504220068771491, "grad_norm": 0.203125, "learning_rate": 0.00019629720293083214, "loss": 1.7844, "step": 1121 }, { "epoch": 0.35073460456392624, "grad_norm": 0.2001953125, "learning_rate": 0.00019629057576694522, "loss": 1.6097, "step": 1122 }, { "epoch": 0.35104720225070335, "grad_norm": 0.21875, "learning_rate": 0.00019628394278987355, "loss": 1.9393, "step": 1123 }, { "epoch": 0.35135979993748045, "grad_norm": 0.2060546875, "learning_rate": 0.0001962773040000175, "loss": 1.7556, "step": 1124 }, { "epoch": 0.35167239762425756, "grad_norm": 0.220703125, "learning_rate": 0.000196270659397778, "loss": 1.7145, "step": 1125 }, { "epoch": 0.3519849953110347, "grad_norm": 0.220703125, "learning_rate": 0.0001962640089835561, "loss": 1.6505, "step": 1126 }, { "epoch": 0.3522975929978118, "grad_norm": 0.2197265625, "learning_rate": 0.00019625735275775327, "loss": 1.6953, "step": 1127 }, { "epoch": 0.35261019068458893, "grad_norm": 0.224609375, "learning_rate": 0.00019625069072077138, "loss": 1.7897, "step": 1128 }, { "epoch": 0.35292278837136604, "grad_norm": 0.2109375, "learning_rate": 0.0001962440228730127, "loss": 1.8916, "step": 1129 }, { "epoch": 0.3532353860581432, "grad_norm": 0.2138671875, "learning_rate": 0.00019623734921487965, "loss": 1.5444, "step": 1130 }, { "epoch": 0.3535479837449203, "grad_norm": 0.2138671875, "learning_rate": 0.00019623066974677525, "loss": 1.6391, "step": 1131 }, { "epoch": 0.3538605814316974, "grad_norm": 0.2138671875, "learning_rate": 0.00019622398446910263, "loss": 1.6171, "step": 1132 }, { "epoch": 0.3541731791184745, "grad_norm": 0.216796875, "learning_rate": 0.0001962172933822655, "loss": 1.6352, "step": 1133 }, { "epoch": 0.3544857768052516, "grad_norm": 0.220703125, "learning_rate": 0.00019621059648666772, "loss": 1.8147, "step": 1134 }, { "epoch": 0.3547983744920288, "grad_norm": 0.2099609375, "learning_rate": 0.00019620389378271366, "loss": 1.7773, "step": 1135 }, { "epoch": 0.3551109721788059, "grad_norm": 0.212890625, "learning_rate": 0.0001961971852708079, "loss": 1.7441, "step": 1136 }, { "epoch": 0.355423569865583, "grad_norm": 0.2197265625, "learning_rate": 0.00019619047095135553, "loss": 1.9931, "step": 1137 }, { "epoch": 0.3557361675523601, "grad_norm": 0.2177734375, "learning_rate": 0.00019618375082476182, "loss": 1.6723, "step": 1138 }, { "epoch": 0.35604876523913725, "grad_norm": 0.2119140625, "learning_rate": 0.0001961770248914325, "loss": 1.8312, "step": 1139 }, { "epoch": 0.35636136292591436, "grad_norm": 0.2216796875, "learning_rate": 0.00019617029315177365, "loss": 2.0553, "step": 1140 }, { "epoch": 0.35667396061269147, "grad_norm": 0.20703125, "learning_rate": 0.00019616355560619163, "loss": 1.6513, "step": 1141 }, { "epoch": 0.35698655829946857, "grad_norm": 0.2041015625, "learning_rate": 0.00019615681225509325, "loss": 1.8244, "step": 1142 }, { "epoch": 0.3572991559862457, "grad_norm": 0.20703125, "learning_rate": 0.00019615006309888552, "loss": 1.9322, "step": 1143 }, { "epoch": 0.35761175367302284, "grad_norm": 0.2138671875, "learning_rate": 0.000196143308137976, "loss": 1.7572, "step": 1144 }, { "epoch": 0.35792435135979994, "grad_norm": 0.2099609375, "learning_rate": 0.00019613654737277245, "loss": 1.5536, "step": 1145 }, { "epoch": 0.35823694904657705, "grad_norm": 0.2197265625, "learning_rate": 0.000196129780803683, "loss": 1.9036, "step": 1146 }, { "epoch": 0.35854954673335415, "grad_norm": 0.2158203125, "learning_rate": 0.00019612300843111622, "loss": 2.1856, "step": 1147 }, { "epoch": 0.3588621444201313, "grad_norm": 0.212890625, "learning_rate": 0.0001961162302554809, "loss": 1.6396, "step": 1148 }, { "epoch": 0.3591747421069084, "grad_norm": 0.2177734375, "learning_rate": 0.00019610944627718627, "loss": 1.8837, "step": 1149 }, { "epoch": 0.3594873397936855, "grad_norm": 0.2158203125, "learning_rate": 0.00019610265649664193, "loss": 1.7418, "step": 1150 }, { "epoch": 0.35979993748046263, "grad_norm": 0.2109375, "learning_rate": 0.00019609586091425774, "loss": 1.8848, "step": 1151 }, { "epoch": 0.3601125351672398, "grad_norm": 0.2099609375, "learning_rate": 0.00019608905953044396, "loss": 1.4857, "step": 1152 }, { "epoch": 0.3604251328540169, "grad_norm": 0.2119140625, "learning_rate": 0.00019608225234561121, "loss": 1.6741, "step": 1153 }, { "epoch": 0.360737730540794, "grad_norm": 0.19921875, "learning_rate": 0.00019607543936017046, "loss": 1.6363, "step": 1154 }, { "epoch": 0.3610503282275711, "grad_norm": 0.2109375, "learning_rate": 0.00019606862057453298, "loss": 1.8323, "step": 1155 }, { "epoch": 0.3613629259143482, "grad_norm": 0.21484375, "learning_rate": 0.00019606179598911049, "loss": 1.6778, "step": 1156 }, { "epoch": 0.36167552360112537, "grad_norm": 0.208984375, "learning_rate": 0.00019605496560431496, "loss": 1.8691, "step": 1157 }, { "epoch": 0.3619881212879025, "grad_norm": 0.2109375, "learning_rate": 0.00019604812942055873, "loss": 1.6175, "step": 1158 }, { "epoch": 0.3623007189746796, "grad_norm": 0.212890625, "learning_rate": 0.00019604128743825453, "loss": 1.717, "step": 1159 }, { "epoch": 0.3626133166614567, "grad_norm": 0.201171875, "learning_rate": 0.00019603443965781543, "loss": 1.773, "step": 1160 }, { "epoch": 0.36292591434823385, "grad_norm": 0.212890625, "learning_rate": 0.00019602758607965484, "loss": 1.8844, "step": 1161 }, { "epoch": 0.36323851203501095, "grad_norm": 0.2109375, "learning_rate": 0.00019602072670418647, "loss": 1.9545, "step": 1162 }, { "epoch": 0.36355110972178806, "grad_norm": 0.2021484375, "learning_rate": 0.00019601386153182451, "loss": 1.523, "step": 1163 }, { "epoch": 0.36386370740856516, "grad_norm": 0.224609375, "learning_rate": 0.00019600699056298337, "loss": 2.0468, "step": 1164 }, { "epoch": 0.36417630509534227, "grad_norm": 0.2275390625, "learning_rate": 0.00019600011379807786, "loss": 1.9032, "step": 1165 }, { "epoch": 0.36448890278211943, "grad_norm": 0.2099609375, "learning_rate": 0.00019599323123752315, "loss": 1.3631, "step": 1166 }, { "epoch": 0.36480150046889653, "grad_norm": 0.21875, "learning_rate": 0.00019598634288173474, "loss": 1.6805, "step": 1167 }, { "epoch": 0.36511409815567364, "grad_norm": 0.2158203125, "learning_rate": 0.00019597944873112852, "loss": 1.4813, "step": 1168 }, { "epoch": 0.36542669584245074, "grad_norm": 0.216796875, "learning_rate": 0.00019597254878612065, "loss": 1.7945, "step": 1169 }, { "epoch": 0.3657392935292279, "grad_norm": 0.2177734375, "learning_rate": 0.0001959656430471277, "loss": 1.5851, "step": 1170 }, { "epoch": 0.366051891216005, "grad_norm": 0.21875, "learning_rate": 0.0001959587315145666, "loss": 1.8493, "step": 1171 }, { "epoch": 0.3663644889027821, "grad_norm": 0.1982421875, "learning_rate": 0.0001959518141888546, "loss": 1.7852, "step": 1172 }, { "epoch": 0.3666770865895592, "grad_norm": 0.22265625, "learning_rate": 0.00019594489107040928, "loss": 1.9668, "step": 1173 }, { "epoch": 0.3669896842763364, "grad_norm": 0.216796875, "learning_rate": 0.00019593796215964867, "loss": 1.656, "step": 1174 }, { "epoch": 0.3673022819631135, "grad_norm": 0.20703125, "learning_rate": 0.000195931027456991, "loss": 1.5947, "step": 1175 }, { "epoch": 0.3676148796498906, "grad_norm": 0.2255859375, "learning_rate": 0.00019592408696285496, "loss": 1.7685, "step": 1176 }, { "epoch": 0.3679274773366677, "grad_norm": 0.220703125, "learning_rate": 0.00019591714067765953, "loss": 1.6027, "step": 1177 }, { "epoch": 0.3682400750234448, "grad_norm": 0.205078125, "learning_rate": 0.0001959101886018241, "loss": 2.2013, "step": 1178 }, { "epoch": 0.36855267271022196, "grad_norm": 0.208984375, "learning_rate": 0.0001959032307357684, "loss": 1.6995, "step": 1179 }, { "epoch": 0.36886527039699907, "grad_norm": 0.20703125, "learning_rate": 0.00019589626707991242, "loss": 1.7104, "step": 1180 }, { "epoch": 0.3691778680837762, "grad_norm": 0.2041015625, "learning_rate": 0.00019588929763467657, "loss": 1.6798, "step": 1181 }, { "epoch": 0.3694904657705533, "grad_norm": 0.20703125, "learning_rate": 0.00019588232240048167, "loss": 1.5464, "step": 1182 }, { "epoch": 0.36980306345733044, "grad_norm": 0.2216796875, "learning_rate": 0.0001958753413777488, "loss": 1.7789, "step": 1183 }, { "epoch": 0.37011566114410754, "grad_norm": 0.2265625, "learning_rate": 0.00019586835456689934, "loss": 1.7634, "step": 1184 }, { "epoch": 0.37042825883088465, "grad_norm": 0.2109375, "learning_rate": 0.0001958613619683552, "loss": 1.9015, "step": 1185 }, { "epoch": 0.37074085651766175, "grad_norm": 0.318359375, "learning_rate": 0.00019585436358253845, "loss": 2.3964, "step": 1186 }, { "epoch": 0.37105345420443886, "grad_norm": 0.216796875, "learning_rate": 0.00019584735940987163, "loss": 1.7068, "step": 1187 }, { "epoch": 0.371366051891216, "grad_norm": 0.2060546875, "learning_rate": 0.00019584034945077758, "loss": 1.9431, "step": 1188 }, { "epoch": 0.3716786495779931, "grad_norm": 0.2177734375, "learning_rate": 0.0001958333337056795, "loss": 1.6602, "step": 1189 }, { "epoch": 0.37199124726477023, "grad_norm": 0.2060546875, "learning_rate": 0.00019582631217500093, "loss": 1.9655, "step": 1190 }, { "epoch": 0.37230384495154734, "grad_norm": 0.2099609375, "learning_rate": 0.0001958192848591658, "loss": 1.7755, "step": 1191 }, { "epoch": 0.3726164426383245, "grad_norm": 0.2158203125, "learning_rate": 0.00019581225175859833, "loss": 1.7425, "step": 1192 }, { "epoch": 0.3729290403251016, "grad_norm": 0.2080078125, "learning_rate": 0.00019580521287372317, "loss": 1.8308, "step": 1193 }, { "epoch": 0.3732416380118787, "grad_norm": 0.2265625, "learning_rate": 0.00019579816820496516, "loss": 1.7996, "step": 1194 }, { "epoch": 0.3735542356986558, "grad_norm": 0.224609375, "learning_rate": 0.0001957911177527497, "loss": 1.8265, "step": 1195 }, { "epoch": 0.373866833385433, "grad_norm": 0.21484375, "learning_rate": 0.00019578406151750236, "loss": 1.5686, "step": 1196 }, { "epoch": 0.3741794310722101, "grad_norm": 0.212890625, "learning_rate": 0.0001957769994996492, "loss": 1.7951, "step": 1197 }, { "epoch": 0.3744920287589872, "grad_norm": 0.2314453125, "learning_rate": 0.00019576993169961653, "loss": 1.7821, "step": 1198 }, { "epoch": 0.3748046264457643, "grad_norm": 0.2158203125, "learning_rate": 0.000195762858117831, "loss": 1.7286, "step": 1199 }, { "epoch": 0.3751172241325414, "grad_norm": 0.212890625, "learning_rate": 0.00019575577875471974, "loss": 1.707, "step": 1200 }, { "epoch": 0.37542982181931855, "grad_norm": 0.2080078125, "learning_rate": 0.00019574869361071006, "loss": 1.9656, "step": 1201 }, { "epoch": 0.37574241950609566, "grad_norm": 0.2265625, "learning_rate": 0.00019574160268622976, "loss": 1.7242, "step": 1202 }, { "epoch": 0.37605501719287276, "grad_norm": 0.21484375, "learning_rate": 0.00019573450598170687, "loss": 1.7001, "step": 1203 }, { "epoch": 0.37636761487964987, "grad_norm": 0.21875, "learning_rate": 0.00019572740349756992, "loss": 1.8952, "step": 1204 }, { "epoch": 0.37668021256642703, "grad_norm": 0.2255859375, "learning_rate": 0.00019572029523424756, "loss": 1.8052, "step": 1205 }, { "epoch": 0.37699281025320414, "grad_norm": 0.21875, "learning_rate": 0.00019571318119216904, "loss": 1.8727, "step": 1206 }, { "epoch": 0.37730540793998124, "grad_norm": 0.2158203125, "learning_rate": 0.0001957060613717638, "loss": 1.6054, "step": 1207 }, { "epoch": 0.37761800562675835, "grad_norm": 0.2041015625, "learning_rate": 0.00019569893577346168, "loss": 1.8537, "step": 1208 }, { "epoch": 0.3779306033135355, "grad_norm": 0.2060546875, "learning_rate": 0.00019569180439769283, "loss": 1.6096, "step": 1209 }, { "epoch": 0.3782432010003126, "grad_norm": 0.2275390625, "learning_rate": 0.00019568466724488782, "loss": 1.9668, "step": 1210 }, { "epoch": 0.3785557986870897, "grad_norm": 0.20703125, "learning_rate": 0.00019567752431547754, "loss": 1.6992, "step": 1211 }, { "epoch": 0.3788683963738668, "grad_norm": 0.2109375, "learning_rate": 0.00019567037560989315, "loss": 1.6169, "step": 1212 }, { "epoch": 0.37918099406064393, "grad_norm": 0.21875, "learning_rate": 0.00019566322112856633, "loss": 1.7126, "step": 1213 }, { "epoch": 0.3794935917474211, "grad_norm": 0.203125, "learning_rate": 0.0001956560608719289, "loss": 1.6279, "step": 1214 }, { "epoch": 0.3798061894341982, "grad_norm": 0.2177734375, "learning_rate": 0.0001956488948404132, "loss": 2.0578, "step": 1215 }, { "epoch": 0.3801187871209753, "grad_norm": 0.2236328125, "learning_rate": 0.00019564172303445182, "loss": 1.7761, "step": 1216 }, { "epoch": 0.3804313848077524, "grad_norm": 0.2119140625, "learning_rate": 0.00019563454545447773, "loss": 1.6644, "step": 1217 }, { "epoch": 0.38074398249452956, "grad_norm": 0.2109375, "learning_rate": 0.00019562736210092428, "loss": 1.8542, "step": 1218 }, { "epoch": 0.38105658018130667, "grad_norm": 0.208984375, "learning_rate": 0.0001956201729742251, "loss": 1.7917, "step": 1219 }, { "epoch": 0.3813691778680838, "grad_norm": 0.2177734375, "learning_rate": 0.00019561297807481427, "loss": 1.8474, "step": 1220 }, { "epoch": 0.3816817755548609, "grad_norm": 0.2080078125, "learning_rate": 0.0001956057774031261, "loss": 1.627, "step": 1221 }, { "epoch": 0.381994373241638, "grad_norm": 0.2109375, "learning_rate": 0.00019559857095959528, "loss": 1.6842, "step": 1222 }, { "epoch": 0.38230697092841515, "grad_norm": 0.2197265625, "learning_rate": 0.00019559135874465695, "loss": 1.7735, "step": 1223 }, { "epoch": 0.38261956861519225, "grad_norm": 0.2158203125, "learning_rate": 0.00019558414075874646, "loss": 1.8281, "step": 1224 }, { "epoch": 0.38293216630196936, "grad_norm": 0.21484375, "learning_rate": 0.00019557691700229957, "loss": 1.5633, "step": 1225 }, { "epoch": 0.38324476398874646, "grad_norm": 0.212890625, "learning_rate": 0.00019556968747575244, "loss": 1.8649, "step": 1226 }, { "epoch": 0.3835573616755236, "grad_norm": 0.2177734375, "learning_rate": 0.00019556245217954149, "loss": 1.6938, "step": 1227 }, { "epoch": 0.38386995936230073, "grad_norm": 0.2158203125, "learning_rate": 0.0001955552111141035, "loss": 1.6866, "step": 1228 }, { "epoch": 0.38418255704907783, "grad_norm": 0.232421875, "learning_rate": 0.00019554796427987566, "loss": 1.9343, "step": 1229 }, { "epoch": 0.38449515473585494, "grad_norm": 0.2177734375, "learning_rate": 0.00019554071167729545, "loss": 1.9785, "step": 1230 }, { "epoch": 0.3848077524226321, "grad_norm": 0.2080078125, "learning_rate": 0.00019553345330680077, "loss": 1.876, "step": 1231 }, { "epoch": 0.3851203501094092, "grad_norm": 0.2236328125, "learning_rate": 0.00019552618916882973, "loss": 1.671, "step": 1232 }, { "epoch": 0.3854329477961863, "grad_norm": 0.2255859375, "learning_rate": 0.00019551891926382093, "loss": 1.6575, "step": 1233 }, { "epoch": 0.3857455454829634, "grad_norm": 0.216796875, "learning_rate": 0.00019551164359221326, "loss": 1.9775, "step": 1234 }, { "epoch": 0.3860581431697405, "grad_norm": 0.2177734375, "learning_rate": 0.00019550436215444594, "loss": 1.7329, "step": 1235 }, { "epoch": 0.3863707408565177, "grad_norm": 0.2119140625, "learning_rate": 0.0001954970749509586, "loss": 1.6745, "step": 1236 }, { "epoch": 0.3866833385432948, "grad_norm": 0.21484375, "learning_rate": 0.00019548978198219113, "loss": 1.7502, "step": 1237 }, { "epoch": 0.3869959362300719, "grad_norm": 0.228515625, "learning_rate": 0.00019548248324858386, "loss": 1.6299, "step": 1238 }, { "epoch": 0.387308533916849, "grad_norm": 0.21875, "learning_rate": 0.00019547517875057738, "loss": 1.6477, "step": 1239 }, { "epoch": 0.38762113160362616, "grad_norm": 0.2138671875, "learning_rate": 0.00019546786848861268, "loss": 1.8717, "step": 1240 }, { "epoch": 0.38793372929040326, "grad_norm": 0.2080078125, "learning_rate": 0.00019546055246313113, "loss": 1.5382, "step": 1241 }, { "epoch": 0.38824632697718037, "grad_norm": 0.2294921875, "learning_rate": 0.00019545323067457439, "loss": 2.0394, "step": 1242 }, { "epoch": 0.3885589246639575, "grad_norm": 0.2138671875, "learning_rate": 0.00019544590312338444, "loss": 1.8064, "step": 1243 }, { "epoch": 0.3888715223507346, "grad_norm": 0.2265625, "learning_rate": 0.00019543856981000371, "loss": 1.6846, "step": 1244 }, { "epoch": 0.38918412003751174, "grad_norm": 0.203125, "learning_rate": 0.0001954312307348749, "loss": 1.7834, "step": 1245 }, { "epoch": 0.38949671772428884, "grad_norm": 0.21484375, "learning_rate": 0.0001954238858984411, "loss": 1.8043, "step": 1246 }, { "epoch": 0.38980931541106595, "grad_norm": 0.2119140625, "learning_rate": 0.00019541653530114568, "loss": 1.7905, "step": 1247 }, { "epoch": 0.39012191309784305, "grad_norm": 0.2119140625, "learning_rate": 0.00019540917894343246, "loss": 1.6521, "step": 1248 }, { "epoch": 0.3904345107846202, "grad_norm": 0.2177734375, "learning_rate": 0.00019540181682574552, "loss": 1.6881, "step": 1249 }, { "epoch": 0.3907471084713973, "grad_norm": 0.2158203125, "learning_rate": 0.0001953944489485293, "loss": 2.0565, "step": 1250 }, { "epoch": 0.3910597061581744, "grad_norm": 0.2197265625, "learning_rate": 0.00019538707531222867, "loss": 1.7884, "step": 1251 }, { "epoch": 0.39137230384495153, "grad_norm": 0.2041015625, "learning_rate": 0.00019537969591728872, "loss": 1.5153, "step": 1252 }, { "epoch": 0.3916849015317287, "grad_norm": 0.220703125, "learning_rate": 0.000195372310764155, "loss": 1.8401, "step": 1253 }, { "epoch": 0.3919974992185058, "grad_norm": 0.2138671875, "learning_rate": 0.00019536491985327334, "loss": 1.5898, "step": 1254 }, { "epoch": 0.3923100969052829, "grad_norm": 0.208984375, "learning_rate": 0.00019535752318508998, "loss": 1.8118, "step": 1255 }, { "epoch": 0.39262269459206, "grad_norm": 0.2060546875, "learning_rate": 0.00019535012076005138, "loss": 1.4033, "step": 1256 }, { "epoch": 0.3929352922788371, "grad_norm": 0.2099609375, "learning_rate": 0.00019534271257860448, "loss": 1.672, "step": 1257 }, { "epoch": 0.3932478899656143, "grad_norm": 0.2197265625, "learning_rate": 0.00019533529864119658, "loss": 1.752, "step": 1258 }, { "epoch": 0.3935604876523914, "grad_norm": 0.2099609375, "learning_rate": 0.0001953278789482752, "loss": 1.3813, "step": 1259 }, { "epoch": 0.3938730853391685, "grad_norm": 0.2265625, "learning_rate": 0.00019532045350028826, "loss": 1.8827, "step": 1260 }, { "epoch": 0.3941856830259456, "grad_norm": 0.224609375, "learning_rate": 0.00019531302229768404, "loss": 1.9363, "step": 1261 }, { "epoch": 0.39449828071272275, "grad_norm": 0.2138671875, "learning_rate": 0.00019530558534091127, "loss": 1.8975, "step": 1262 }, { "epoch": 0.39481087839949985, "grad_norm": 0.2275390625, "learning_rate": 0.00019529814263041884, "loss": 1.7931, "step": 1263 }, { "epoch": 0.39512347608627696, "grad_norm": 0.2099609375, "learning_rate": 0.0001952906941666561, "loss": 1.7258, "step": 1264 }, { "epoch": 0.39543607377305406, "grad_norm": 0.2119140625, "learning_rate": 0.0001952832399500727, "loss": 1.8547, "step": 1265 }, { "epoch": 0.3957486714598312, "grad_norm": 0.2138671875, "learning_rate": 0.00019527577998111874, "loss": 1.7344, "step": 1266 }, { "epoch": 0.39606126914660833, "grad_norm": 0.2109375, "learning_rate": 0.0001952683142602445, "loss": 1.7313, "step": 1267 }, { "epoch": 0.39637386683338544, "grad_norm": 0.2119140625, "learning_rate": 0.00019526084278790074, "loss": 1.8261, "step": 1268 }, { "epoch": 0.39668646452016254, "grad_norm": 0.2060546875, "learning_rate": 0.00019525336556453852, "loss": 1.7306, "step": 1269 }, { "epoch": 0.39699906220693965, "grad_norm": 0.2021484375, "learning_rate": 0.0001952458825906092, "loss": 1.9536, "step": 1270 }, { "epoch": 0.3973116598937168, "grad_norm": 0.2138671875, "learning_rate": 0.00019523839386656458, "loss": 1.7486, "step": 1271 }, { "epoch": 0.3976242575804939, "grad_norm": 0.2099609375, "learning_rate": 0.00019523089939285675, "loss": 1.9232, "step": 1272 }, { "epoch": 0.397936855267271, "grad_norm": 0.220703125, "learning_rate": 0.0001952233991699382, "loss": 1.5959, "step": 1273 }, { "epoch": 0.3982494529540481, "grad_norm": 0.224609375, "learning_rate": 0.00019521589319826168, "loss": 1.9811, "step": 1274 }, { "epoch": 0.3985620506408253, "grad_norm": 0.2255859375, "learning_rate": 0.00019520838147828035, "loss": 1.6908, "step": 1275 }, { "epoch": 0.3988746483276024, "grad_norm": 0.208984375, "learning_rate": 0.00019520086401044772, "loss": 1.7011, "step": 1276 }, { "epoch": 0.3991872460143795, "grad_norm": 0.2197265625, "learning_rate": 0.0001951933407952176, "loss": 1.6478, "step": 1277 }, { "epoch": 0.3994998437011566, "grad_norm": 0.2255859375, "learning_rate": 0.0001951858118330442, "loss": 1.5169, "step": 1278 }, { "epoch": 0.3998124413879337, "grad_norm": 0.2216796875, "learning_rate": 0.00019517827712438207, "loss": 1.7061, "step": 1279 }, { "epoch": 0.40012503907471086, "grad_norm": 0.212890625, "learning_rate": 0.00019517073666968604, "loss": 1.7499, "step": 1280 }, { "epoch": 0.40043763676148797, "grad_norm": 0.212890625, "learning_rate": 0.00019516319046941134, "loss": 2.132, "step": 1281 }, { "epoch": 0.4007502344482651, "grad_norm": 0.20703125, "learning_rate": 0.00019515563852401358, "loss": 1.56, "step": 1282 }, { "epoch": 0.4010628321350422, "grad_norm": 0.216796875, "learning_rate": 0.00019514808083394866, "loss": 1.86, "step": 1283 }, { "epoch": 0.40137542982181934, "grad_norm": 0.22265625, "learning_rate": 0.00019514051739967286, "loss": 1.6877, "step": 1284 }, { "epoch": 0.40168802750859645, "grad_norm": 0.2099609375, "learning_rate": 0.00019513294822164277, "loss": 1.5612, "step": 1285 }, { "epoch": 0.40200062519537355, "grad_norm": 0.2099609375, "learning_rate": 0.00019512537330031537, "loss": 1.7812, "step": 1286 }, { "epoch": 0.40231322288215066, "grad_norm": 0.2119140625, "learning_rate": 0.00019511779263614798, "loss": 1.5228, "step": 1287 }, { "epoch": 0.4026258205689278, "grad_norm": 0.2119140625, "learning_rate": 0.00019511020622959823, "loss": 1.4276, "step": 1288 }, { "epoch": 0.4029384182557049, "grad_norm": 0.2080078125, "learning_rate": 0.00019510261408112414, "loss": 1.8561, "step": 1289 }, { "epoch": 0.403251015942482, "grad_norm": 0.21875, "learning_rate": 0.00019509501619118403, "loss": 1.8674, "step": 1290 }, { "epoch": 0.40356361362925913, "grad_norm": 0.20703125, "learning_rate": 0.0001950874125602366, "loss": 1.8583, "step": 1291 }, { "epoch": 0.40387621131603624, "grad_norm": 0.2099609375, "learning_rate": 0.00019507980318874096, "loss": 1.686, "step": 1292 }, { "epoch": 0.4041888090028134, "grad_norm": 0.21484375, "learning_rate": 0.00019507218807715638, "loss": 1.7897, "step": 1293 }, { "epoch": 0.4045014066895905, "grad_norm": 0.228515625, "learning_rate": 0.00019506456722594265, "loss": 1.7626, "step": 1294 }, { "epoch": 0.4048140043763676, "grad_norm": 0.212890625, "learning_rate": 0.0001950569406355599, "loss": 1.9098, "step": 1295 }, { "epoch": 0.4051266020631447, "grad_norm": 0.2080078125, "learning_rate": 0.0001950493083064685, "loss": 1.5848, "step": 1296 }, { "epoch": 0.4054391997499219, "grad_norm": 0.220703125, "learning_rate": 0.00019504167023912922, "loss": 1.6362, "step": 1297 }, { "epoch": 0.405751797436699, "grad_norm": 0.2177734375, "learning_rate": 0.0001950340264340032, "loss": 1.9604, "step": 1298 }, { "epoch": 0.4060643951234761, "grad_norm": 0.306640625, "learning_rate": 0.0001950263768915519, "loss": 2.5325, "step": 1299 }, { "epoch": 0.4063769928102532, "grad_norm": 0.21484375, "learning_rate": 0.00019501872161223712, "loss": 1.9979, "step": 1300 }, { "epoch": 0.4066895904970303, "grad_norm": 0.2177734375, "learning_rate": 0.00019501106059652108, "loss": 1.714, "step": 1301 }, { "epoch": 0.40700218818380746, "grad_norm": 0.220703125, "learning_rate": 0.0001950033938448662, "loss": 1.7827, "step": 1302 }, { "epoch": 0.40731478587058456, "grad_norm": 0.21484375, "learning_rate": 0.00019499572135773537, "loss": 1.6062, "step": 1303 }, { "epoch": 0.40762738355736167, "grad_norm": 0.21484375, "learning_rate": 0.0001949880431355918, "loss": 1.6599, "step": 1304 }, { "epoch": 0.40793998124413877, "grad_norm": 0.2197265625, "learning_rate": 0.000194980359178899, "loss": 1.5345, "step": 1305 }, { "epoch": 0.40825257893091593, "grad_norm": 0.220703125, "learning_rate": 0.0001949726694881209, "loss": 1.8149, "step": 1306 }, { "epoch": 0.40856517661769304, "grad_norm": 0.220703125, "learning_rate": 0.00019496497406372174, "loss": 1.6207, "step": 1307 }, { "epoch": 0.40887777430447014, "grad_norm": 0.2099609375, "learning_rate": 0.00019495727290616606, "loss": 1.7058, "step": 1308 }, { "epoch": 0.40919037199124725, "grad_norm": 0.216796875, "learning_rate": 0.0001949495660159188, "loss": 1.5045, "step": 1309 }, { "epoch": 0.4095029696780244, "grad_norm": 0.21875, "learning_rate": 0.00019494185339344523, "loss": 1.8221, "step": 1310 }, { "epoch": 0.4098155673648015, "grad_norm": 0.224609375, "learning_rate": 0.000194934135039211, "loss": 1.4478, "step": 1311 }, { "epoch": 0.4101281650515786, "grad_norm": 0.228515625, "learning_rate": 0.0001949264109536821, "loss": 1.4922, "step": 1312 }, { "epoch": 0.4104407627383557, "grad_norm": 0.2275390625, "learning_rate": 0.00019491868113732474, "loss": 1.8462, "step": 1313 }, { "epoch": 0.41075336042513283, "grad_norm": 0.2138671875, "learning_rate": 0.0001949109455906057, "loss": 1.831, "step": 1314 }, { "epoch": 0.41106595811191, "grad_norm": 0.2099609375, "learning_rate": 0.0001949032043139919, "loss": 1.5742, "step": 1315 }, { "epoch": 0.4113785557986871, "grad_norm": 0.2099609375, "learning_rate": 0.0001948954573079507, "loss": 1.7099, "step": 1316 }, { "epoch": 0.4116911534854642, "grad_norm": 0.21875, "learning_rate": 0.00019488770457294985, "loss": 1.8771, "step": 1317 }, { "epoch": 0.4120037511722413, "grad_norm": 0.21875, "learning_rate": 0.00019487994610945734, "loss": 1.9056, "step": 1318 }, { "epoch": 0.41231634885901847, "grad_norm": 0.20703125, "learning_rate": 0.00019487218191794158, "loss": 1.7384, "step": 1319 }, { "epoch": 0.41262894654579557, "grad_norm": 0.212890625, "learning_rate": 0.00019486441199887132, "loss": 1.9079, "step": 1320 }, { "epoch": 0.4129415442325727, "grad_norm": 0.224609375, "learning_rate": 0.00019485663635271562, "loss": 1.8313, "step": 1321 }, { "epoch": 0.4132541419193498, "grad_norm": 0.2216796875, "learning_rate": 0.00019484885497994387, "loss": 1.642, "step": 1322 }, { "epoch": 0.4135667396061269, "grad_norm": 0.2138671875, "learning_rate": 0.00019484106788102593, "loss": 1.7165, "step": 1323 }, { "epoch": 0.41387933729290405, "grad_norm": 0.2197265625, "learning_rate": 0.0001948332750564318, "loss": 1.6474, "step": 1324 }, { "epoch": 0.41419193497968115, "grad_norm": 0.2099609375, "learning_rate": 0.00019482547650663206, "loss": 1.5541, "step": 1325 }, { "epoch": 0.41450453266645826, "grad_norm": 0.20703125, "learning_rate": 0.00019481767223209745, "loss": 2.0118, "step": 1326 }, { "epoch": 0.41481713035323536, "grad_norm": 0.2138671875, "learning_rate": 0.00019480986223329913, "loss": 1.8306, "step": 1327 }, { "epoch": 0.4151297280400125, "grad_norm": 0.2314453125, "learning_rate": 0.00019480204651070864, "loss": 1.6828, "step": 1328 }, { "epoch": 0.41544232572678963, "grad_norm": 0.2177734375, "learning_rate": 0.00019479422506479775, "loss": 1.6071, "step": 1329 }, { "epoch": 0.41575492341356673, "grad_norm": 0.203125, "learning_rate": 0.00019478639789603872, "loss": 1.6847, "step": 1330 }, { "epoch": 0.41606752110034384, "grad_norm": 0.21484375, "learning_rate": 0.00019477856500490405, "loss": 1.6309, "step": 1331 }, { "epoch": 0.416380118787121, "grad_norm": 0.21484375, "learning_rate": 0.00019477072639186664, "loss": 1.9451, "step": 1332 }, { "epoch": 0.4166927164738981, "grad_norm": 0.220703125, "learning_rate": 0.0001947628820573997, "loss": 1.8675, "step": 1333 }, { "epoch": 0.4170053141606752, "grad_norm": 0.2236328125, "learning_rate": 0.00019475503200197685, "loss": 1.5601, "step": 1334 }, { "epoch": 0.4173179118474523, "grad_norm": 0.2119140625, "learning_rate": 0.00019474717622607195, "loss": 1.5294, "step": 1335 }, { "epoch": 0.4176305095342294, "grad_norm": 0.2177734375, "learning_rate": 0.00019473931473015926, "loss": 1.7433, "step": 1336 }, { "epoch": 0.4179431072210066, "grad_norm": 0.2138671875, "learning_rate": 0.00019473144751471345, "loss": 1.6771, "step": 1337 }, { "epoch": 0.4182557049077837, "grad_norm": 0.212890625, "learning_rate": 0.0001947235745802094, "loss": 1.9994, "step": 1338 }, { "epoch": 0.4185683025945608, "grad_norm": 0.2216796875, "learning_rate": 0.0001947156959271225, "loss": 1.726, "step": 1339 }, { "epoch": 0.4188809002813379, "grad_norm": 0.2080078125, "learning_rate": 0.00019470781155592827, "loss": 1.8079, "step": 1340 }, { "epoch": 0.41919349796811506, "grad_norm": 0.2099609375, "learning_rate": 0.00019469992146710282, "loss": 1.8046, "step": 1341 }, { "epoch": 0.41950609565489216, "grad_norm": 0.2177734375, "learning_rate": 0.0001946920256611224, "loss": 1.619, "step": 1342 }, { "epoch": 0.41981869334166927, "grad_norm": 0.2158203125, "learning_rate": 0.00019468412413846373, "loss": 1.6015, "step": 1343 }, { "epoch": 0.4201312910284464, "grad_norm": 0.212890625, "learning_rate": 0.00019467621689960385, "loss": 1.7538, "step": 1344 }, { "epoch": 0.42044388871522353, "grad_norm": 0.20703125, "learning_rate": 0.00019466830394502009, "loss": 1.8732, "step": 1345 }, { "epoch": 0.42075648640200064, "grad_norm": 0.2294921875, "learning_rate": 0.0001946603852751902, "loss": 1.7492, "step": 1346 }, { "epoch": 0.42106908408877775, "grad_norm": 0.224609375, "learning_rate": 0.0001946524608905922, "loss": 1.6893, "step": 1347 }, { "epoch": 0.42138168177555485, "grad_norm": 0.220703125, "learning_rate": 0.00019464453079170454, "loss": 1.5848, "step": 1348 }, { "epoch": 0.42169427946233196, "grad_norm": 0.2080078125, "learning_rate": 0.00019463659497900593, "loss": 1.5974, "step": 1349 }, { "epoch": 0.4220068771491091, "grad_norm": 0.2216796875, "learning_rate": 0.0001946286534529755, "loss": 1.9757, "step": 1350 }, { "epoch": 0.4223194748358862, "grad_norm": 0.2275390625, "learning_rate": 0.0001946207062140927, "loss": 1.9514, "step": 1351 }, { "epoch": 0.4226320725226633, "grad_norm": 0.2158203125, "learning_rate": 0.00019461275326283724, "loss": 1.894, "step": 1352 }, { "epoch": 0.42294467020944043, "grad_norm": 0.2236328125, "learning_rate": 0.00019460479459968932, "loss": 1.5872, "step": 1353 }, { "epoch": 0.4232572678962176, "grad_norm": 0.21484375, "learning_rate": 0.0001945968302251294, "loss": 1.5275, "step": 1354 }, { "epoch": 0.4235698655829947, "grad_norm": 0.220703125, "learning_rate": 0.0001945888601396383, "loss": 1.6427, "step": 1355 }, { "epoch": 0.4238824632697718, "grad_norm": 0.21875, "learning_rate": 0.00019458088434369715, "loss": 1.6407, "step": 1356 }, { "epoch": 0.4241950609565489, "grad_norm": 0.224609375, "learning_rate": 0.00019457290283778747, "loss": 1.9373, "step": 1357 }, { "epoch": 0.424507658643326, "grad_norm": 0.21484375, "learning_rate": 0.0001945649156223912, "loss": 1.7385, "step": 1358 }, { "epoch": 0.4248202563301032, "grad_norm": 0.2236328125, "learning_rate": 0.0001945569226979904, "loss": 1.8262, "step": 1359 }, { "epoch": 0.4251328540168803, "grad_norm": 0.2158203125, "learning_rate": 0.00019454892406506775, "loss": 1.6286, "step": 1360 }, { "epoch": 0.4254454517036574, "grad_norm": 0.22265625, "learning_rate": 0.00019454091972410603, "loss": 1.7992, "step": 1361 }, { "epoch": 0.4257580493904345, "grad_norm": 0.2197265625, "learning_rate": 0.0001945329096755885, "loss": 1.9609, "step": 1362 }, { "epoch": 0.42607064707721165, "grad_norm": 0.2138671875, "learning_rate": 0.00019452489391999874, "loss": 1.9051, "step": 1363 }, { "epoch": 0.42638324476398876, "grad_norm": 0.2294921875, "learning_rate": 0.00019451687245782072, "loss": 1.7331, "step": 1364 }, { "epoch": 0.42669584245076586, "grad_norm": 0.2158203125, "learning_rate": 0.00019450884528953864, "loss": 2.1455, "step": 1365 }, { "epoch": 0.42700844013754297, "grad_norm": 0.2353515625, "learning_rate": 0.00019450081241563716, "loss": 1.8298, "step": 1366 }, { "epoch": 0.4273210378243201, "grad_norm": 0.2216796875, "learning_rate": 0.00019449277383660118, "loss": 1.8084, "step": 1367 }, { "epoch": 0.42763363551109723, "grad_norm": 0.2236328125, "learning_rate": 0.00019448472955291605, "loss": 1.6876, "step": 1368 }, { "epoch": 0.42794623319787434, "grad_norm": 0.2080078125, "learning_rate": 0.0001944766795650674, "loss": 1.7431, "step": 1369 }, { "epoch": 0.42825883088465144, "grad_norm": 0.228515625, "learning_rate": 0.0001944686238735412, "loss": 1.7904, "step": 1370 }, { "epoch": 0.42857142857142855, "grad_norm": 0.2099609375, "learning_rate": 0.00019446056247882378, "loss": 1.8465, "step": 1371 }, { "epoch": 0.4288840262582057, "grad_norm": 0.2197265625, "learning_rate": 0.00019445249538140185, "loss": 1.6672, "step": 1372 }, { "epoch": 0.4291966239449828, "grad_norm": 0.2275390625, "learning_rate": 0.0001944444225817624, "loss": 1.9209, "step": 1373 }, { "epoch": 0.4295092216317599, "grad_norm": 0.220703125, "learning_rate": 0.00019443634408039282, "loss": 1.8336, "step": 1374 }, { "epoch": 0.429821819318537, "grad_norm": 0.22265625, "learning_rate": 0.0001944282598777808, "loss": 1.9261, "step": 1375 }, { "epoch": 0.4301344170053142, "grad_norm": 0.2255859375, "learning_rate": 0.0001944201699744144, "loss": 1.6371, "step": 1376 }, { "epoch": 0.4304470146920913, "grad_norm": 0.220703125, "learning_rate": 0.00019441207437078203, "loss": 1.4774, "step": 1377 }, { "epoch": 0.4307596123788684, "grad_norm": 0.2255859375, "learning_rate": 0.0001944039730673724, "loss": 1.5849, "step": 1378 }, { "epoch": 0.4310722100656455, "grad_norm": 0.2255859375, "learning_rate": 0.0001943958660646746, "loss": 1.8103, "step": 1379 }, { "epoch": 0.4313848077524226, "grad_norm": 0.2158203125, "learning_rate": 0.00019438775336317812, "loss": 1.8946, "step": 1380 }, { "epoch": 0.43169740543919977, "grad_norm": 0.20703125, "learning_rate": 0.00019437963496337266, "loss": 1.6056, "step": 1381 }, { "epoch": 0.43201000312597687, "grad_norm": 0.220703125, "learning_rate": 0.00019437151086574837, "loss": 1.6991, "step": 1382 }, { "epoch": 0.432322600812754, "grad_norm": 0.2265625, "learning_rate": 0.00019436338107079574, "loss": 1.6126, "step": 1383 }, { "epoch": 0.4326351984995311, "grad_norm": 0.216796875, "learning_rate": 0.00019435524557900551, "loss": 1.4967, "step": 1384 }, { "epoch": 0.43294779618630824, "grad_norm": 0.212890625, "learning_rate": 0.00019434710439086888, "loss": 1.5868, "step": 1385 }, { "epoch": 0.43326039387308535, "grad_norm": 0.2265625, "learning_rate": 0.00019433895750687734, "loss": 1.7528, "step": 1386 }, { "epoch": 0.43357299155986245, "grad_norm": 0.2255859375, "learning_rate": 0.00019433080492752268, "loss": 1.899, "step": 1387 }, { "epoch": 0.43388558924663956, "grad_norm": 0.2275390625, "learning_rate": 0.00019432264665329715, "loss": 2.0873, "step": 1388 }, { "epoch": 0.4341981869334167, "grad_norm": 0.216796875, "learning_rate": 0.00019431448268469325, "loss": 1.4453, "step": 1389 }, { "epoch": 0.4345107846201938, "grad_norm": 0.2177734375, "learning_rate": 0.00019430631302220385, "loss": 1.9314, "step": 1390 }, { "epoch": 0.43482338230697093, "grad_norm": 0.21875, "learning_rate": 0.0001942981376663221, "loss": 1.5989, "step": 1391 }, { "epoch": 0.43513597999374803, "grad_norm": 0.2216796875, "learning_rate": 0.00019428995661754171, "loss": 1.8037, "step": 1392 }, { "epoch": 0.43544857768052514, "grad_norm": 0.20703125, "learning_rate": 0.0001942817698763564, "loss": 1.7903, "step": 1393 }, { "epoch": 0.4357611753673023, "grad_norm": 0.2216796875, "learning_rate": 0.00019427357744326057, "loss": 1.7809, "step": 1394 }, { "epoch": 0.4360737730540794, "grad_norm": 0.2099609375, "learning_rate": 0.0001942653793187487, "loss": 1.552, "step": 1395 }, { "epoch": 0.4363863707408565, "grad_norm": 0.2138671875, "learning_rate": 0.00019425717550331572, "loss": 1.7079, "step": 1396 }, { "epoch": 0.4366989684276336, "grad_norm": 0.2119140625, "learning_rate": 0.000194248965997457, "loss": 1.8321, "step": 1397 }, { "epoch": 0.4370115661144108, "grad_norm": 0.2255859375, "learning_rate": 0.00019424075080166805, "loss": 1.6185, "step": 1398 }, { "epoch": 0.4373241638011879, "grad_norm": 0.2216796875, "learning_rate": 0.00019423252991644492, "loss": 1.7149, "step": 1399 }, { "epoch": 0.437636761487965, "grad_norm": 0.2236328125, "learning_rate": 0.00019422430334228386, "loss": 1.7048, "step": 1400 }, { "epoch": 0.4379493591747421, "grad_norm": 0.2158203125, "learning_rate": 0.00019421607107968154, "loss": 1.8062, "step": 1401 }, { "epoch": 0.4382619568615192, "grad_norm": 0.2265625, "learning_rate": 0.00019420783312913494, "loss": 1.8332, "step": 1402 }, { "epoch": 0.43857455454829636, "grad_norm": 0.306640625, "learning_rate": 0.0001941995894911414, "loss": 2.397, "step": 1403 }, { "epoch": 0.43888715223507346, "grad_norm": 0.2216796875, "learning_rate": 0.00019419134016619865, "loss": 1.6672, "step": 1404 }, { "epoch": 0.43919974992185057, "grad_norm": 0.2265625, "learning_rate": 0.0001941830851548046, "loss": 1.6112, "step": 1405 }, { "epoch": 0.4395123476086277, "grad_norm": 0.2197265625, "learning_rate": 0.0001941748244574577, "loss": 1.7182, "step": 1406 }, { "epoch": 0.43982494529540483, "grad_norm": 0.2236328125, "learning_rate": 0.00019416655807465667, "loss": 1.7438, "step": 1407 }, { "epoch": 0.44013754298218194, "grad_norm": 0.216796875, "learning_rate": 0.0001941582860069005, "loss": 1.8327, "step": 1408 }, { "epoch": 0.44045014066895904, "grad_norm": 0.224609375, "learning_rate": 0.00019415000825468863, "loss": 2.0563, "step": 1409 }, { "epoch": 0.44076273835573615, "grad_norm": 0.2158203125, "learning_rate": 0.0001941417248185208, "loss": 1.9451, "step": 1410 }, { "epoch": 0.4410753360425133, "grad_norm": 0.224609375, "learning_rate": 0.00019413343569889702, "loss": 1.8786, "step": 1411 }, { "epoch": 0.4413879337292904, "grad_norm": 0.2294921875, "learning_rate": 0.00019412514089631785, "loss": 1.7905, "step": 1412 }, { "epoch": 0.4417005314160675, "grad_norm": 0.2255859375, "learning_rate": 0.00019411684041128392, "loss": 1.7573, "step": 1413 }, { "epoch": 0.4420131291028446, "grad_norm": 0.220703125, "learning_rate": 0.00019410853424429642, "loss": 1.6898, "step": 1414 }, { "epoch": 0.44232572678962173, "grad_norm": 0.220703125, "learning_rate": 0.00019410022239585678, "loss": 1.7676, "step": 1415 }, { "epoch": 0.4426383244763989, "grad_norm": 0.2470703125, "learning_rate": 0.0001940919048664668, "loss": 1.7774, "step": 1416 }, { "epoch": 0.442950922163176, "grad_norm": 0.2119140625, "learning_rate": 0.00019408358165662866, "loss": 1.6328, "step": 1417 }, { "epoch": 0.4432635198499531, "grad_norm": 0.2265625, "learning_rate": 0.00019407525276684474, "loss": 1.7037, "step": 1418 }, { "epoch": 0.4435761175367302, "grad_norm": 0.2080078125, "learning_rate": 0.00019406691819761796, "loss": 1.81, "step": 1419 }, { "epoch": 0.44388871522350737, "grad_norm": 0.2421875, "learning_rate": 0.00019405857794945147, "loss": 1.8474, "step": 1420 }, { "epoch": 0.4442013129102845, "grad_norm": 0.2197265625, "learning_rate": 0.00019405023202284874, "loss": 1.6398, "step": 1421 }, { "epoch": 0.4445139105970616, "grad_norm": 0.22265625, "learning_rate": 0.0001940418804183137, "loss": 1.5592, "step": 1422 }, { "epoch": 0.4448265082838387, "grad_norm": 0.2099609375, "learning_rate": 0.00019403352313635046, "loss": 1.6566, "step": 1423 }, { "epoch": 0.44513910597061584, "grad_norm": 0.21484375, "learning_rate": 0.0001940251601774636, "loss": 1.6928, "step": 1424 }, { "epoch": 0.44545170365739295, "grad_norm": 0.2158203125, "learning_rate": 0.00019401679154215802, "loss": 2.029, "step": 1425 }, { "epoch": 0.44576430134417006, "grad_norm": 0.220703125, "learning_rate": 0.0001940084172309389, "loss": 1.9225, "step": 1426 }, { "epoch": 0.44607689903094716, "grad_norm": 0.2431640625, "learning_rate": 0.00019400003724431185, "loss": 1.9033, "step": 1427 }, { "epoch": 0.44638949671772427, "grad_norm": 0.2197265625, "learning_rate": 0.00019399165158278279, "loss": 1.9373, "step": 1428 }, { "epoch": 0.4467020944045014, "grad_norm": 0.2216796875, "learning_rate": 0.00019398326024685792, "loss": 1.8287, "step": 1429 }, { "epoch": 0.44701469209127853, "grad_norm": 0.2353515625, "learning_rate": 0.00019397486323704388, "loss": 1.4876, "step": 1430 }, { "epoch": 0.44732728977805564, "grad_norm": 0.2109375, "learning_rate": 0.0001939664605538476, "loss": 1.7532, "step": 1431 }, { "epoch": 0.44763988746483274, "grad_norm": 0.2216796875, "learning_rate": 0.0001939580521977763, "loss": 1.8811, "step": 1432 }, { "epoch": 0.4479524851516099, "grad_norm": 0.22265625, "learning_rate": 0.00019394963816933772, "loss": 1.8956, "step": 1433 }, { "epoch": 0.448265082838387, "grad_norm": 0.2236328125, "learning_rate": 0.00019394121846903975, "loss": 1.7634, "step": 1434 }, { "epoch": 0.4485776805251641, "grad_norm": 0.2158203125, "learning_rate": 0.0001939327930973907, "loss": 1.5284, "step": 1435 }, { "epoch": 0.4488902782119412, "grad_norm": 0.234375, "learning_rate": 0.00019392436205489924, "loss": 1.8581, "step": 1436 }, { "epoch": 0.4492028758987183, "grad_norm": 0.2216796875, "learning_rate": 0.00019391592534207436, "loss": 1.4981, "step": 1437 }, { "epoch": 0.4495154735854955, "grad_norm": 0.220703125, "learning_rate": 0.00019390748295942535, "loss": 1.6315, "step": 1438 }, { "epoch": 0.4498280712722726, "grad_norm": 0.216796875, "learning_rate": 0.00019389903490746194, "loss": 1.755, "step": 1439 }, { "epoch": 0.4501406689590497, "grad_norm": 0.2177734375, "learning_rate": 0.00019389058118669418, "loss": 1.6564, "step": 1440 }, { "epoch": 0.4504532666458268, "grad_norm": 0.232421875, "learning_rate": 0.00019388212179763235, "loss": 1.8079, "step": 1441 }, { "epoch": 0.45076586433260396, "grad_norm": 0.2197265625, "learning_rate": 0.0001938736567407872, "loss": 1.7621, "step": 1442 }, { "epoch": 0.45107846201938107, "grad_norm": 0.2216796875, "learning_rate": 0.00019386518601666977, "loss": 2.0246, "step": 1443 }, { "epoch": 0.45139105970615817, "grad_norm": 0.228515625, "learning_rate": 0.0001938567096257914, "loss": 1.7006, "step": 1444 }, { "epoch": 0.4517036573929353, "grad_norm": 0.23046875, "learning_rate": 0.00019384822756866394, "loss": 1.7433, "step": 1445 }, { "epoch": 0.45201625507971244, "grad_norm": 0.220703125, "learning_rate": 0.00019383973984579936, "loss": 1.6673, "step": 1446 }, { "epoch": 0.45232885276648954, "grad_norm": 0.20703125, "learning_rate": 0.00019383124645771008, "loss": 1.7402, "step": 1447 }, { "epoch": 0.45264145045326665, "grad_norm": 0.220703125, "learning_rate": 0.00019382274740490892, "loss": 1.7445, "step": 1448 }, { "epoch": 0.45295404814004375, "grad_norm": 0.2216796875, "learning_rate": 0.0001938142426879089, "loss": 1.752, "step": 1449 }, { "epoch": 0.45326664582682086, "grad_norm": 0.224609375, "learning_rate": 0.00019380573230722353, "loss": 1.7653, "step": 1450 }, { "epoch": 0.453579243513598, "grad_norm": 0.224609375, "learning_rate": 0.00019379721626336656, "loss": 1.4672, "step": 1451 }, { "epoch": 0.4538918412003751, "grad_norm": 0.224609375, "learning_rate": 0.0001937886945568521, "loss": 1.6907, "step": 1452 }, { "epoch": 0.45420443888715223, "grad_norm": 0.2265625, "learning_rate": 0.00019378016718819466, "loss": 1.7775, "step": 1453 }, { "epoch": 0.45451703657392933, "grad_norm": 0.2216796875, "learning_rate": 0.00019377163415790902, "loss": 1.913, "step": 1454 }, { "epoch": 0.4548296342607065, "grad_norm": 0.216796875, "learning_rate": 0.00019376309546651033, "loss": 1.8471, "step": 1455 }, { "epoch": 0.4551422319474836, "grad_norm": 0.228515625, "learning_rate": 0.00019375455111451405, "loss": 1.5682, "step": 1456 }, { "epoch": 0.4554548296342607, "grad_norm": 0.220703125, "learning_rate": 0.00019374600110243608, "loss": 1.7008, "step": 1457 }, { "epoch": 0.4557674273210378, "grad_norm": 0.21875, "learning_rate": 0.00019373744543079257, "loss": 1.7075, "step": 1458 }, { "epoch": 0.4560800250078149, "grad_norm": 0.2158203125, "learning_rate": 0.0001937288841001, "loss": 1.6143, "step": 1459 }, { "epoch": 0.4563926226945921, "grad_norm": 0.21484375, "learning_rate": 0.00019372031711087527, "loss": 1.6665, "step": 1460 }, { "epoch": 0.4567052203813692, "grad_norm": 0.2158203125, "learning_rate": 0.00019371174446363557, "loss": 1.6533, "step": 1461 }, { "epoch": 0.4570178180681463, "grad_norm": 0.2294921875, "learning_rate": 0.00019370316615889842, "loss": 1.5501, "step": 1462 }, { "epoch": 0.4573304157549234, "grad_norm": 0.232421875, "learning_rate": 0.00019369458219718175, "loss": 1.8101, "step": 1463 }, { "epoch": 0.45764301344170055, "grad_norm": 0.216796875, "learning_rate": 0.00019368599257900372, "loss": 1.6708, "step": 1464 }, { "epoch": 0.45795561112847766, "grad_norm": 0.2158203125, "learning_rate": 0.00019367739730488296, "loss": 1.6922, "step": 1465 }, { "epoch": 0.45826820881525476, "grad_norm": 0.220703125, "learning_rate": 0.00019366879637533834, "loss": 1.6808, "step": 1466 }, { "epoch": 0.45858080650203187, "grad_norm": 0.2216796875, "learning_rate": 0.00019366018979088913, "loss": 1.654, "step": 1467 }, { "epoch": 0.45889340418880903, "grad_norm": 0.2099609375, "learning_rate": 0.0001936515775520549, "loss": 1.7892, "step": 1468 }, { "epoch": 0.45920600187558613, "grad_norm": 0.2158203125, "learning_rate": 0.00019364295965935562, "loss": 1.6039, "step": 1469 }, { "epoch": 0.45951859956236324, "grad_norm": 0.2294921875, "learning_rate": 0.0001936343361133115, "loss": 1.6348, "step": 1470 }, { "epoch": 0.45983119724914034, "grad_norm": 0.2177734375, "learning_rate": 0.0001936257069144432, "loss": 2.0579, "step": 1471 }, { "epoch": 0.46014379493591745, "grad_norm": 0.2275390625, "learning_rate": 0.00019361707206327168, "loss": 1.5824, "step": 1472 }, { "epoch": 0.4604563926226946, "grad_norm": 0.2197265625, "learning_rate": 0.0001936084315603182, "loss": 1.6563, "step": 1473 }, { "epoch": 0.4607689903094717, "grad_norm": 0.21484375, "learning_rate": 0.0001935997854061044, "loss": 1.7782, "step": 1474 }, { "epoch": 0.4610815879962488, "grad_norm": 0.22265625, "learning_rate": 0.00019359113360115234, "loss": 1.7625, "step": 1475 }, { "epoch": 0.4613941856830259, "grad_norm": 0.2294921875, "learning_rate": 0.00019358247614598427, "loss": 1.5607, "step": 1476 }, { "epoch": 0.4617067833698031, "grad_norm": 0.2197265625, "learning_rate": 0.00019357381304112281, "loss": 1.6091, "step": 1477 }, { "epoch": 0.4620193810565802, "grad_norm": 0.21875, "learning_rate": 0.00019356514428709104, "loss": 1.5822, "step": 1478 }, { "epoch": 0.4623319787433573, "grad_norm": 0.21875, "learning_rate": 0.0001935564698844123, "loss": 1.8785, "step": 1479 }, { "epoch": 0.4626445764301344, "grad_norm": 0.2197265625, "learning_rate": 0.0001935477898336102, "loss": 1.4933, "step": 1480 }, { "epoch": 0.4629571741169115, "grad_norm": 0.341796875, "learning_rate": 0.00019353910413520887, "loss": 2.2543, "step": 1481 }, { "epoch": 0.46326977180368867, "grad_norm": 0.2060546875, "learning_rate": 0.0001935304127897326, "loss": 1.6022, "step": 1482 }, { "epoch": 0.4635823694904658, "grad_norm": 0.224609375, "learning_rate": 0.00019352171579770615, "loss": 1.9542, "step": 1483 }, { "epoch": 0.4638949671772429, "grad_norm": 0.2158203125, "learning_rate": 0.00019351301315965452, "loss": 1.5863, "step": 1484 }, { "epoch": 0.46420756486402, "grad_norm": 0.2216796875, "learning_rate": 0.00019350430487610312, "loss": 1.9259, "step": 1485 }, { "epoch": 0.46452016255079714, "grad_norm": 0.2197265625, "learning_rate": 0.0001934955909475777, "loss": 1.9044, "step": 1486 }, { "epoch": 0.46483276023757425, "grad_norm": 0.2255859375, "learning_rate": 0.00019348687137460432, "loss": 1.829, "step": 1487 }, { "epoch": 0.46514535792435135, "grad_norm": 0.2294921875, "learning_rate": 0.00019347814615770933, "loss": 1.5524, "step": 1488 }, { "epoch": 0.46545795561112846, "grad_norm": 0.2236328125, "learning_rate": 0.00019346941529741954, "loss": 1.683, "step": 1489 }, { "epoch": 0.4657705532979056, "grad_norm": 0.220703125, "learning_rate": 0.0001934606787942621, "loss": 1.8919, "step": 1490 }, { "epoch": 0.4660831509846827, "grad_norm": 0.2275390625, "learning_rate": 0.00019345193664876433, "loss": 1.7553, "step": 1491 }, { "epoch": 0.46639574867145983, "grad_norm": 0.21875, "learning_rate": 0.0001934431888614541, "loss": 1.9543, "step": 1492 }, { "epoch": 0.46670834635823694, "grad_norm": 0.2197265625, "learning_rate": 0.00019343443543285945, "loss": 1.6919, "step": 1493 }, { "epoch": 0.46702094404501404, "grad_norm": 0.2412109375, "learning_rate": 0.00019342567636350887, "loss": 1.6121, "step": 1494 }, { "epoch": 0.4673335417317912, "grad_norm": 0.2255859375, "learning_rate": 0.00019341691165393116, "loss": 1.5772, "step": 1495 }, { "epoch": 0.4676461394185683, "grad_norm": 0.2216796875, "learning_rate": 0.00019340814130465548, "loss": 1.9449, "step": 1496 }, { "epoch": 0.4679587371053454, "grad_norm": 0.224609375, "learning_rate": 0.00019339936531621122, "loss": 1.7063, "step": 1497 }, { "epoch": 0.4682713347921225, "grad_norm": 0.212890625, "learning_rate": 0.0001933905836891283, "loss": 1.7768, "step": 1498 }, { "epoch": 0.4685839324788997, "grad_norm": 0.21875, "learning_rate": 0.00019338179642393685, "loss": 1.7279, "step": 1499 }, { "epoch": 0.4688965301656768, "grad_norm": 0.2197265625, "learning_rate": 0.0001933730035211673, "loss": 1.7344, "step": 1500 }, { "epoch": 0.4692091278524539, "grad_norm": 0.21484375, "learning_rate": 0.00019336420498135057, "loss": 1.6349, "step": 1501 }, { "epoch": 0.469521725539231, "grad_norm": 0.2255859375, "learning_rate": 0.0001933554008050178, "loss": 1.703, "step": 1502 }, { "epoch": 0.46983432322600815, "grad_norm": 0.21875, "learning_rate": 0.00019334659099270053, "loss": 1.6039, "step": 1503 }, { "epoch": 0.47014692091278526, "grad_norm": 0.2236328125, "learning_rate": 0.0001933377755449306, "loss": 1.7018, "step": 1504 }, { "epoch": 0.47045951859956237, "grad_norm": 0.22265625, "learning_rate": 0.00019332895446224022, "loss": 1.5957, "step": 1505 }, { "epoch": 0.47077211628633947, "grad_norm": 0.2265625, "learning_rate": 0.00019332012774516191, "loss": 1.6054, "step": 1506 }, { "epoch": 0.4710847139731166, "grad_norm": 0.216796875, "learning_rate": 0.0001933112953942286, "loss": 1.6822, "step": 1507 }, { "epoch": 0.47139731165989374, "grad_norm": 0.2216796875, "learning_rate": 0.00019330245740997346, "loss": 1.6045, "step": 1508 }, { "epoch": 0.47170990934667084, "grad_norm": 0.2294921875, "learning_rate": 0.00019329361379293006, "loss": 1.6817, "step": 1509 }, { "epoch": 0.47202250703344795, "grad_norm": 0.26953125, "learning_rate": 0.00019328476454363237, "loss": 1.6334, "step": 1510 }, { "epoch": 0.47233510472022505, "grad_norm": 0.236328125, "learning_rate": 0.00019327590966261452, "loss": 1.9416, "step": 1511 }, { "epoch": 0.4726477024070022, "grad_norm": 0.21484375, "learning_rate": 0.00019326704915041115, "loss": 1.8148, "step": 1512 }, { "epoch": 0.4729603000937793, "grad_norm": 0.2158203125, "learning_rate": 0.0001932581830075572, "loss": 1.6804, "step": 1513 }, { "epoch": 0.4732728977805564, "grad_norm": 0.224609375, "learning_rate": 0.00019324931123458784, "loss": 1.6578, "step": 1514 }, { "epoch": 0.47358549546733353, "grad_norm": 0.232421875, "learning_rate": 0.00019324043383203875, "loss": 1.7513, "step": 1515 }, { "epoch": 0.47389809315411063, "grad_norm": 0.2060546875, "learning_rate": 0.00019323155080044587, "loss": 1.8009, "step": 1516 }, { "epoch": 0.4742106908408878, "grad_norm": 0.23828125, "learning_rate": 0.00019322266214034546, "loss": 1.5399, "step": 1517 }, { "epoch": 0.4745232885276649, "grad_norm": 0.2216796875, "learning_rate": 0.00019321376785227416, "loss": 1.6751, "step": 1518 }, { "epoch": 0.474835886214442, "grad_norm": 0.2236328125, "learning_rate": 0.00019320486793676889, "loss": 1.5572, "step": 1519 }, { "epoch": 0.4751484839012191, "grad_norm": 0.228515625, "learning_rate": 0.00019319596239436698, "loss": 1.6178, "step": 1520 }, { "epoch": 0.47546108158799627, "grad_norm": 0.2294921875, "learning_rate": 0.00019318705122560602, "loss": 1.5581, "step": 1521 }, { "epoch": 0.4757736792747734, "grad_norm": 0.2265625, "learning_rate": 0.00019317813443102408, "loss": 1.6904, "step": 1522 }, { "epoch": 0.4760862769615505, "grad_norm": 0.2373046875, "learning_rate": 0.0001931692120111594, "loss": 1.9162, "step": 1523 }, { "epoch": 0.4763988746483276, "grad_norm": 0.2265625, "learning_rate": 0.0001931602839665507, "loss": 1.6703, "step": 1524 }, { "epoch": 0.47671147233510475, "grad_norm": 0.2216796875, "learning_rate": 0.0001931513502977369, "loss": 1.6865, "step": 1525 }, { "epoch": 0.47702407002188185, "grad_norm": 0.2177734375, "learning_rate": 0.00019314241100525738, "loss": 1.7221, "step": 1526 }, { "epoch": 0.47733666770865896, "grad_norm": 0.2216796875, "learning_rate": 0.00019313346608965183, "loss": 1.6306, "step": 1527 }, { "epoch": 0.47764926539543606, "grad_norm": 0.224609375, "learning_rate": 0.00019312451555146022, "loss": 2.0435, "step": 1528 }, { "epoch": 0.47796186308221317, "grad_norm": 0.2265625, "learning_rate": 0.00019311555939122298, "loss": 1.4892, "step": 1529 }, { "epoch": 0.47827446076899033, "grad_norm": 0.2236328125, "learning_rate": 0.00019310659760948075, "loss": 1.7291, "step": 1530 }, { "epoch": 0.47858705845576743, "grad_norm": 0.2119140625, "learning_rate": 0.00019309763020677458, "loss": 1.7014, "step": 1531 }, { "epoch": 0.47889965614254454, "grad_norm": 0.23828125, "learning_rate": 0.00019308865718364583, "loss": 2.0065, "step": 1532 }, { "epoch": 0.47921225382932164, "grad_norm": 0.23046875, "learning_rate": 0.00019307967854063622, "loss": 1.5883, "step": 1533 }, { "epoch": 0.4795248515160988, "grad_norm": 0.236328125, "learning_rate": 0.0001930706942782878, "loss": 1.7971, "step": 1534 }, { "epoch": 0.4798374492028759, "grad_norm": 0.224609375, "learning_rate": 0.00019306170439714298, "loss": 1.6701, "step": 1535 }, { "epoch": 0.480150046889653, "grad_norm": 0.2275390625, "learning_rate": 0.00019305270889774444, "loss": 1.611, "step": 1536 }, { "epoch": 0.4804626445764301, "grad_norm": 0.240234375, "learning_rate": 0.00019304370778063534, "loss": 1.8515, "step": 1537 }, { "epoch": 0.4807752422632072, "grad_norm": 0.2216796875, "learning_rate": 0.00019303470104635898, "loss": 1.64, "step": 1538 }, { "epoch": 0.4810878399499844, "grad_norm": 0.228515625, "learning_rate": 0.0001930256886954592, "loss": 1.7283, "step": 1539 }, { "epoch": 0.4814004376367615, "grad_norm": 0.244140625, "learning_rate": 0.00019301667072848004, "loss": 1.8076, "step": 1540 }, { "epoch": 0.4817130353235386, "grad_norm": 0.2294921875, "learning_rate": 0.00019300764714596594, "loss": 1.9384, "step": 1541 }, { "epoch": 0.4820256330103157, "grad_norm": 0.2294921875, "learning_rate": 0.00019299861794846166, "loss": 1.8492, "step": 1542 }, { "epoch": 0.48233823069709286, "grad_norm": 0.21875, "learning_rate": 0.00019298958313651227, "loss": 1.744, "step": 1543 }, { "epoch": 0.48265082838386997, "grad_norm": 0.220703125, "learning_rate": 0.0001929805427106633, "loss": 1.7691, "step": 1544 }, { "epoch": 0.4829634260706471, "grad_norm": 0.2216796875, "learning_rate": 0.00019297149667146045, "loss": 1.6095, "step": 1545 }, { "epoch": 0.4832760237574242, "grad_norm": 0.2099609375, "learning_rate": 0.0001929624450194499, "loss": 1.8153, "step": 1546 }, { "epoch": 0.48358862144420134, "grad_norm": 0.2236328125, "learning_rate": 0.00019295338775517803, "loss": 1.8315, "step": 1547 }, { "epoch": 0.48390121913097844, "grad_norm": 0.208984375, "learning_rate": 0.00019294432487919173, "loss": 1.6651, "step": 1548 }, { "epoch": 0.48421381681775555, "grad_norm": 0.20703125, "learning_rate": 0.0001929352563920381, "loss": 1.632, "step": 1549 }, { "epoch": 0.48452641450453265, "grad_norm": 0.2080078125, "learning_rate": 0.0001929261822942646, "loss": 1.5682, "step": 1550 }, { "epoch": 0.48483901219130976, "grad_norm": 0.220703125, "learning_rate": 0.00019291710258641907, "loss": 1.7631, "step": 1551 }, { "epoch": 0.4851516098780869, "grad_norm": 0.212890625, "learning_rate": 0.00019290801726904962, "loss": 1.6418, "step": 1552 }, { "epoch": 0.485464207564864, "grad_norm": 0.2255859375, "learning_rate": 0.0001928989263427048, "loss": 1.4744, "step": 1553 }, { "epoch": 0.48577680525164113, "grad_norm": 0.2080078125, "learning_rate": 0.0001928898298079334, "loss": 1.7507, "step": 1554 }, { "epoch": 0.48608940293841824, "grad_norm": 0.2275390625, "learning_rate": 0.00019288072766528462, "loss": 1.5483, "step": 1555 }, { "epoch": 0.4864020006251954, "grad_norm": 0.228515625, "learning_rate": 0.00019287161991530792, "loss": 1.7318, "step": 1556 }, { "epoch": 0.4867145983119725, "grad_norm": 0.228515625, "learning_rate": 0.0001928625065585532, "loss": 1.8483, "step": 1557 }, { "epoch": 0.4870271959987496, "grad_norm": 0.21875, "learning_rate": 0.00019285338759557065, "loss": 1.6431, "step": 1558 }, { "epoch": 0.4873397936855267, "grad_norm": 0.2294921875, "learning_rate": 0.00019284426302691073, "loss": 1.6648, "step": 1559 }, { "epoch": 0.4876523913723038, "grad_norm": 0.21484375, "learning_rate": 0.00019283513285312437, "loss": 1.5061, "step": 1560 }, { "epoch": 0.487964989059081, "grad_norm": 0.310546875, "learning_rate": 0.0001928259970747627, "loss": 2.72, "step": 1561 }, { "epoch": 0.4882775867458581, "grad_norm": 0.2314453125, "learning_rate": 0.00019281685569237734, "loss": 1.6893, "step": 1562 }, { "epoch": 0.4885901844326352, "grad_norm": 0.216796875, "learning_rate": 0.0001928077087065201, "loss": 1.6951, "step": 1563 }, { "epoch": 0.4889027821194123, "grad_norm": 0.220703125, "learning_rate": 0.0001927985561177432, "loss": 1.7366, "step": 1564 }, { "epoch": 0.48921537980618945, "grad_norm": 0.2060546875, "learning_rate": 0.00019278939792659924, "loss": 1.7637, "step": 1565 }, { "epoch": 0.48952797749296656, "grad_norm": 0.212890625, "learning_rate": 0.00019278023413364106, "loss": 1.5522, "step": 1566 }, { "epoch": 0.48984057517974366, "grad_norm": 0.2373046875, "learning_rate": 0.00019277106473942194, "loss": 1.8184, "step": 1567 }, { "epoch": 0.49015317286652077, "grad_norm": 0.2255859375, "learning_rate": 0.00019276188974449543, "loss": 1.5573, "step": 1568 }, { "epoch": 0.49046577055329793, "grad_norm": 0.2255859375, "learning_rate": 0.00019275270914941538, "loss": 1.5074, "step": 1569 }, { "epoch": 0.49077836824007504, "grad_norm": 0.2275390625, "learning_rate": 0.00019274352295473612, "loss": 1.9685, "step": 1570 }, { "epoch": 0.49109096592685214, "grad_norm": 0.23046875, "learning_rate": 0.00019273433116101217, "loss": 1.8918, "step": 1571 }, { "epoch": 0.49140356361362925, "grad_norm": 0.240234375, "learning_rate": 0.00019272513376879854, "loss": 1.8173, "step": 1572 }, { "epoch": 0.49171616130040635, "grad_norm": 0.220703125, "learning_rate": 0.00019271593077865035, "loss": 1.7093, "step": 1573 }, { "epoch": 0.4920287589871835, "grad_norm": 0.2255859375, "learning_rate": 0.00019270672219112332, "loss": 1.7993, "step": 1574 }, { "epoch": 0.4923413566739606, "grad_norm": 0.2080078125, "learning_rate": 0.00019269750800677331, "loss": 1.7468, "step": 1575 }, { "epoch": 0.4926539543607377, "grad_norm": 0.208984375, "learning_rate": 0.00019268828822615661, "loss": 1.4455, "step": 1576 }, { "epoch": 0.4929665520475148, "grad_norm": 0.2392578125, "learning_rate": 0.00019267906284982985, "loss": 1.9409, "step": 1577 }, { "epoch": 0.493279149734292, "grad_norm": 0.220703125, "learning_rate": 0.00019266983187834995, "loss": 1.8848, "step": 1578 }, { "epoch": 0.4935917474210691, "grad_norm": 0.220703125, "learning_rate": 0.0001926605953122742, "loss": 1.5927, "step": 1579 }, { "epoch": 0.4939043451078462, "grad_norm": 0.2216796875, "learning_rate": 0.00019265135315216028, "loss": 1.7506, "step": 1580 }, { "epoch": 0.4942169427946233, "grad_norm": 0.22265625, "learning_rate": 0.00019264210539856607, "loss": 1.7024, "step": 1581 }, { "epoch": 0.49452954048140046, "grad_norm": 0.228515625, "learning_rate": 0.0001926328520520499, "loss": 1.8899, "step": 1582 }, { "epoch": 0.49484213816817757, "grad_norm": 0.2138671875, "learning_rate": 0.0001926235931131704, "loss": 1.7209, "step": 1583 }, { "epoch": 0.4951547358549547, "grad_norm": 0.22265625, "learning_rate": 0.00019261432858248657, "loss": 1.582, "step": 1584 }, { "epoch": 0.4954673335417318, "grad_norm": 0.216796875, "learning_rate": 0.0001926050584605577, "loss": 1.7583, "step": 1585 }, { "epoch": 0.4957799312285089, "grad_norm": 0.2353515625, "learning_rate": 0.00019259578274794344, "loss": 1.7366, "step": 1586 }, { "epoch": 0.49609252891528605, "grad_norm": 0.2294921875, "learning_rate": 0.0001925865014452038, "loss": 1.7721, "step": 1587 }, { "epoch": 0.49640512660206315, "grad_norm": 0.2236328125, "learning_rate": 0.00019257721455289906, "loss": 1.9818, "step": 1588 }, { "epoch": 0.49671772428884026, "grad_norm": 0.220703125, "learning_rate": 0.00019256792207158991, "loss": 1.719, "step": 1589 }, { "epoch": 0.49703032197561736, "grad_norm": 0.220703125, "learning_rate": 0.00019255862400183733, "loss": 1.7085, "step": 1590 }, { "epoch": 0.4973429196623945, "grad_norm": 0.2197265625, "learning_rate": 0.00019254932034420266, "loss": 1.5593, "step": 1591 }, { "epoch": 0.4976555173491716, "grad_norm": 0.220703125, "learning_rate": 0.00019254001109924763, "loss": 1.6743, "step": 1592 }, { "epoch": 0.49796811503594873, "grad_norm": 0.220703125, "learning_rate": 0.0001925306962675342, "loss": 1.5977, "step": 1593 }, { "epoch": 0.49828071272272584, "grad_norm": 0.216796875, "learning_rate": 0.00019252137584962472, "loss": 1.6007, "step": 1594 }, { "epoch": 0.49859331040950294, "grad_norm": 0.2177734375, "learning_rate": 0.00019251204984608184, "loss": 1.5078, "step": 1595 }, { "epoch": 0.4989059080962801, "grad_norm": 0.2216796875, "learning_rate": 0.00019250271825746866, "loss": 1.9624, "step": 1596 }, { "epoch": 0.4992185057830572, "grad_norm": 0.21484375, "learning_rate": 0.0001924933810843485, "loss": 1.6749, "step": 1597 }, { "epoch": 0.4995311034698343, "grad_norm": 0.23046875, "learning_rate": 0.00019248403832728504, "loss": 1.7965, "step": 1598 }, { "epoch": 0.4998437011566114, "grad_norm": 0.2255859375, "learning_rate": 0.00019247468998684233, "loss": 1.7333, "step": 1599 }, { "epoch": 0.5001562988433885, "grad_norm": 0.21484375, "learning_rate": 0.00019246533606358476, "loss": 1.9014, "step": 1600 }, { "epoch": 0.5001562988433885, "eval_loss": 1.6468836069107056, "eval_runtime": 1904.4552, "eval_samples_per_second": 4.798, "eval_steps_per_second": 2.399, "step": 1600 }, { "epoch": 0.5004688965301657, "grad_norm": 0.2060546875, "learning_rate": 0.000192455976558077, "loss": 1.8399, "step": 1601 }, { "epoch": 0.5007814942169428, "grad_norm": 0.22265625, "learning_rate": 0.00019244661147088413, "loss": 1.7516, "step": 1602 }, { "epoch": 0.5010940919037199, "grad_norm": 0.2236328125, "learning_rate": 0.00019243724080257154, "loss": 1.6023, "step": 1603 }, { "epoch": 0.5014066895904971, "grad_norm": 0.240234375, "learning_rate": 0.0001924278645537049, "loss": 1.8678, "step": 1604 }, { "epoch": 0.5017192872772741, "grad_norm": 0.2138671875, "learning_rate": 0.0001924184827248503, "loss": 1.8877, "step": 1605 }, { "epoch": 0.5020318849640513, "grad_norm": 0.2197265625, "learning_rate": 0.00019240909531657415, "loss": 1.7109, "step": 1606 }, { "epoch": 0.5023444826508284, "grad_norm": 0.21484375, "learning_rate": 0.00019239970232944314, "loss": 1.9394, "step": 1607 }, { "epoch": 0.5026570803376055, "grad_norm": 0.2265625, "learning_rate": 0.00019239030376402437, "loss": 1.6907, "step": 1608 }, { "epoch": 0.5029696780243826, "grad_norm": 0.21875, "learning_rate": 0.00019238089962088522, "loss": 1.3726, "step": 1609 }, { "epoch": 0.5032822757111597, "grad_norm": 0.2197265625, "learning_rate": 0.00019237148990059342, "loss": 1.4186, "step": 1610 }, { "epoch": 0.5035948733979368, "grad_norm": 0.232421875, "learning_rate": 0.00019236207460371707, "loss": 1.8961, "step": 1611 }, { "epoch": 0.503907471084714, "grad_norm": 0.2216796875, "learning_rate": 0.0001923526537308246, "loss": 1.5122, "step": 1612 }, { "epoch": 0.5042200687714911, "grad_norm": 0.2099609375, "learning_rate": 0.00019234322728248473, "loss": 1.6718, "step": 1613 }, { "epoch": 0.5045326664582682, "grad_norm": 0.2177734375, "learning_rate": 0.00019233379525926652, "loss": 1.5157, "step": 1614 }, { "epoch": 0.5048452641450454, "grad_norm": 0.2197265625, "learning_rate": 0.00019232435766173946, "loss": 1.8013, "step": 1615 }, { "epoch": 0.5051578618318224, "grad_norm": 0.2138671875, "learning_rate": 0.00019231491449047327, "loss": 1.6126, "step": 1616 }, { "epoch": 0.5054704595185996, "grad_norm": 0.2314453125, "learning_rate": 0.00019230546574603805, "loss": 1.9199, "step": 1617 }, { "epoch": 0.5057830572053766, "grad_norm": 0.216796875, "learning_rate": 0.00019229601142900426, "loss": 1.8629, "step": 1618 }, { "epoch": 0.5060956548921538, "grad_norm": 0.2177734375, "learning_rate": 0.0001922865515399426, "loss": 1.9572, "step": 1619 }, { "epoch": 0.506408252578931, "grad_norm": 0.212890625, "learning_rate": 0.0001922770860794243, "loss": 1.8666, "step": 1620 }, { "epoch": 0.506720850265708, "grad_norm": 0.2158203125, "learning_rate": 0.00019226761504802066, "loss": 1.6269, "step": 1621 }, { "epoch": 0.5070334479524852, "grad_norm": 0.212890625, "learning_rate": 0.00019225813844630355, "loss": 1.4542, "step": 1622 }, { "epoch": 0.5073460456392622, "grad_norm": 0.232421875, "learning_rate": 0.00019224865627484502, "loss": 1.726, "step": 1623 }, { "epoch": 0.5076586433260394, "grad_norm": 0.2197265625, "learning_rate": 0.00019223916853421756, "loss": 1.9227, "step": 1624 }, { "epoch": 0.5079712410128165, "grad_norm": 0.232421875, "learning_rate": 0.000192229675224994, "loss": 1.7876, "step": 1625 }, { "epoch": 0.5082838386995936, "grad_norm": 0.2119140625, "learning_rate": 0.0001922201763477474, "loss": 1.9213, "step": 1626 }, { "epoch": 0.5085964363863708, "grad_norm": 0.2294921875, "learning_rate": 0.00019221067190305121, "loss": 1.8536, "step": 1627 }, { "epoch": 0.5089090340731478, "grad_norm": 0.236328125, "learning_rate": 0.00019220116189147928, "loss": 1.7391, "step": 1628 }, { "epoch": 0.509221631759925, "grad_norm": 0.22265625, "learning_rate": 0.00019219164631360572, "loss": 1.5871, "step": 1629 }, { "epoch": 0.5095342294467021, "grad_norm": 0.236328125, "learning_rate": 0.00019218212517000497, "loss": 1.7358, "step": 1630 }, { "epoch": 0.5098468271334792, "grad_norm": 0.2197265625, "learning_rate": 0.00019217259846125186, "loss": 1.7538, "step": 1631 }, { "epoch": 0.5101594248202563, "grad_norm": 0.228515625, "learning_rate": 0.00019216306618792151, "loss": 2.0148, "step": 1632 }, { "epoch": 0.5104720225070335, "grad_norm": 0.2216796875, "learning_rate": 0.00019215352835058944, "loss": 1.655, "step": 1633 }, { "epoch": 0.5107846201938105, "grad_norm": 0.2353515625, "learning_rate": 0.0001921439849498314, "loss": 1.8552, "step": 1634 }, { "epoch": 0.5110972178805877, "grad_norm": 0.2333984375, "learning_rate": 0.0001921344359862236, "loss": 2.0283, "step": 1635 }, { "epoch": 0.5114098155673648, "grad_norm": 0.2158203125, "learning_rate": 0.00019212488146034247, "loss": 1.8859, "step": 1636 }, { "epoch": 0.5117224132541419, "grad_norm": 0.2041015625, "learning_rate": 0.00019211532137276485, "loss": 1.7173, "step": 1637 }, { "epoch": 0.5120350109409191, "grad_norm": 0.2119140625, "learning_rate": 0.0001921057557240679, "loss": 1.6262, "step": 1638 }, { "epoch": 0.5123476086276961, "grad_norm": 0.234375, "learning_rate": 0.00019209618451482911, "loss": 1.6141, "step": 1639 }, { "epoch": 0.5126602063144733, "grad_norm": 0.224609375, "learning_rate": 0.0001920866077456263, "loss": 1.7475, "step": 1640 }, { "epoch": 0.5129728040012503, "grad_norm": 0.23828125, "learning_rate": 0.0001920770254170376, "loss": 1.7333, "step": 1641 }, { "epoch": 0.5132854016880275, "grad_norm": 0.2314453125, "learning_rate": 0.0001920674375296416, "loss": 1.8058, "step": 1642 }, { "epoch": 0.5135979993748047, "grad_norm": 0.216796875, "learning_rate": 0.00019205784408401705, "loss": 1.5659, "step": 1643 }, { "epoch": 0.5139105970615817, "grad_norm": 0.2421875, "learning_rate": 0.00019204824508074314, "loss": 1.6922, "step": 1644 }, { "epoch": 0.5142231947483589, "grad_norm": 0.20703125, "learning_rate": 0.00019203864052039937, "loss": 1.5329, "step": 1645 }, { "epoch": 0.514535792435136, "grad_norm": 0.2265625, "learning_rate": 0.00019202903040356557, "loss": 1.5799, "step": 1646 }, { "epoch": 0.5148483901219131, "grad_norm": 0.216796875, "learning_rate": 0.00019201941473082196, "loss": 1.7131, "step": 1647 }, { "epoch": 0.5151609878086902, "grad_norm": 0.2236328125, "learning_rate": 0.00019200979350274898, "loss": 1.668, "step": 1648 }, { "epoch": 0.5154735854954673, "grad_norm": 0.22265625, "learning_rate": 0.00019200016671992755, "loss": 1.8212, "step": 1649 }, { "epoch": 0.5157861831822445, "grad_norm": 0.2353515625, "learning_rate": 0.00019199053438293884, "loss": 1.745, "step": 1650 }, { "epoch": 0.5160987808690216, "grad_norm": 0.224609375, "learning_rate": 0.0001919808964923643, "loss": 1.9392, "step": 1651 }, { "epoch": 0.5164113785557987, "grad_norm": 0.2255859375, "learning_rate": 0.00019197125304878587, "loss": 1.8001, "step": 1652 }, { "epoch": 0.5167239762425758, "grad_norm": 0.2119140625, "learning_rate": 0.00019196160405278567, "loss": 1.6449, "step": 1653 }, { "epoch": 0.5170365739293529, "grad_norm": 0.2177734375, "learning_rate": 0.00019195194950494623, "loss": 1.7974, "step": 1654 }, { "epoch": 0.51734917161613, "grad_norm": 0.2333984375, "learning_rate": 0.00019194228940585043, "loss": 1.6213, "step": 1655 }, { "epoch": 0.5176617693029072, "grad_norm": 0.2216796875, "learning_rate": 0.0001919326237560815, "loss": 1.7459, "step": 1656 }, { "epoch": 0.5179743669896842, "grad_norm": 0.23046875, "learning_rate": 0.00019192295255622286, "loss": 2.0187, "step": 1657 }, { "epoch": 0.5182869646764614, "grad_norm": 0.22265625, "learning_rate": 0.00019191327580685846, "loss": 1.465, "step": 1658 }, { "epoch": 0.5185995623632386, "grad_norm": 0.21875, "learning_rate": 0.0001919035935085725, "loss": 1.7626, "step": 1659 }, { "epoch": 0.5189121600500156, "grad_norm": 0.228515625, "learning_rate": 0.00019189390566194943, "loss": 1.6333, "step": 1660 }, { "epoch": 0.5192247577367928, "grad_norm": 0.2236328125, "learning_rate": 0.00019188421226757423, "loss": 1.6854, "step": 1661 }, { "epoch": 0.5195373554235698, "grad_norm": 0.2177734375, "learning_rate": 0.00019187451332603202, "loss": 1.5598, "step": 1662 }, { "epoch": 0.519849953110347, "grad_norm": 0.224609375, "learning_rate": 0.00019186480883790836, "loss": 1.7953, "step": 1663 }, { "epoch": 0.5201625507971241, "grad_norm": 0.22265625, "learning_rate": 0.00019185509880378912, "loss": 1.7901, "step": 1664 }, { "epoch": 0.5204751484839012, "grad_norm": 0.23046875, "learning_rate": 0.00019184538322426054, "loss": 1.6819, "step": 1665 }, { "epoch": 0.5207877461706784, "grad_norm": 0.2236328125, "learning_rate": 0.00019183566209990911, "loss": 1.8034, "step": 1666 }, { "epoch": 0.5211003438574554, "grad_norm": 0.2255859375, "learning_rate": 0.00019182593543132174, "loss": 2.0384, "step": 1667 }, { "epoch": 0.5214129415442326, "grad_norm": 0.2080078125, "learning_rate": 0.00019181620321908564, "loss": 1.9369, "step": 1668 }, { "epoch": 0.5217255392310097, "grad_norm": 0.2333984375, "learning_rate": 0.00019180646546378832, "loss": 1.8764, "step": 1669 }, { "epoch": 0.5220381369177868, "grad_norm": 0.220703125, "learning_rate": 0.00019179672216601773, "loss": 1.6419, "step": 1670 }, { "epoch": 0.5223507346045639, "grad_norm": 0.408203125, "learning_rate": 0.00019178697332636202, "loss": 2.427, "step": 1671 }, { "epoch": 0.5226633322913411, "grad_norm": 0.2314453125, "learning_rate": 0.00019177721894540975, "loss": 1.81, "step": 1672 }, { "epoch": 0.5229759299781181, "grad_norm": 0.216796875, "learning_rate": 0.0001917674590237499, "loss": 1.67, "step": 1673 }, { "epoch": 0.5232885276648953, "grad_norm": 0.2255859375, "learning_rate": 0.00019175769356197153, "loss": 1.6198, "step": 1674 }, { "epoch": 0.5236011253516724, "grad_norm": 0.2314453125, "learning_rate": 0.0001917479225606643, "loss": 1.8033, "step": 1675 }, { "epoch": 0.5239137230384495, "grad_norm": 0.220703125, "learning_rate": 0.00019173814602041803, "loss": 1.6005, "step": 1676 }, { "epoch": 0.5242263207252267, "grad_norm": 0.22265625, "learning_rate": 0.00019172836394182303, "loss": 1.6983, "step": 1677 }, { "epoch": 0.5245389184120037, "grad_norm": 0.2216796875, "learning_rate": 0.00019171857632546978, "loss": 1.8186, "step": 1678 }, { "epoch": 0.5248515160987809, "grad_norm": 0.220703125, "learning_rate": 0.00019170878317194924, "loss": 1.6052, "step": 1679 }, { "epoch": 0.5251641137855579, "grad_norm": 0.23828125, "learning_rate": 0.00019169898448185256, "loss": 1.7156, "step": 1680 }, { "epoch": 0.5254767114723351, "grad_norm": 0.2177734375, "learning_rate": 0.00019168918025577134, "loss": 1.7039, "step": 1681 }, { "epoch": 0.5257893091591123, "grad_norm": 0.2275390625, "learning_rate": 0.00019167937049429745, "loss": 1.8326, "step": 1682 }, { "epoch": 0.5261019068458893, "grad_norm": 0.228515625, "learning_rate": 0.00019166955519802316, "loss": 1.6872, "step": 1683 }, { "epoch": 0.5264145045326665, "grad_norm": 0.2138671875, "learning_rate": 0.00019165973436754098, "loss": 1.6172, "step": 1684 }, { "epoch": 0.5267271022194435, "grad_norm": 0.2255859375, "learning_rate": 0.00019164990800344387, "loss": 1.7482, "step": 1685 }, { "epoch": 0.5270396999062207, "grad_norm": 0.224609375, "learning_rate": 0.000191640076106325, "loss": 1.6177, "step": 1686 }, { "epoch": 0.5273522975929978, "grad_norm": 0.2333984375, "learning_rate": 0.00019163023867677797, "loss": 1.6793, "step": 1687 }, { "epoch": 0.5276648952797749, "grad_norm": 0.2275390625, "learning_rate": 0.00019162039571539666, "loss": 1.6634, "step": 1688 }, { "epoch": 0.527977492966552, "grad_norm": 0.2197265625, "learning_rate": 0.0001916105472227753, "loss": 1.7808, "step": 1689 }, { "epoch": 0.5282900906533292, "grad_norm": 0.228515625, "learning_rate": 0.00019160069319950845, "loss": 1.7203, "step": 1690 }, { "epoch": 0.5286026883401063, "grad_norm": 0.23046875, "learning_rate": 0.00019159083364619103, "loss": 1.6893, "step": 1691 }, { "epoch": 0.5289152860268834, "grad_norm": 0.349609375, "learning_rate": 0.0001915809685634183, "loss": 2.3232, "step": 1692 }, { "epoch": 0.5292278837136605, "grad_norm": 0.2138671875, "learning_rate": 0.0001915710979517858, "loss": 1.554, "step": 1693 }, { "epoch": 0.5295404814004376, "grad_norm": 0.234375, "learning_rate": 0.0001915612218118894, "loss": 1.6621, "step": 1694 }, { "epoch": 0.5298530790872148, "grad_norm": 0.2197265625, "learning_rate": 0.00019155134014432534, "loss": 1.8881, "step": 1695 }, { "epoch": 0.5301656767739918, "grad_norm": 0.22265625, "learning_rate": 0.00019154145294969022, "loss": 1.8313, "step": 1696 }, { "epoch": 0.530478274460769, "grad_norm": 0.2158203125, "learning_rate": 0.00019153156022858094, "loss": 1.7908, "step": 1697 }, { "epoch": 0.5307908721475461, "grad_norm": 0.224609375, "learning_rate": 0.00019152166198159476, "loss": 1.6425, "step": 1698 }, { "epoch": 0.5311034698343232, "grad_norm": 0.21484375, "learning_rate": 0.00019151175820932917, "loss": 1.7114, "step": 1699 }, { "epoch": 0.5314160675211004, "grad_norm": 0.2109375, "learning_rate": 0.00019150184891238216, "loss": 1.5121, "step": 1700 }, { "epoch": 0.5317286652078774, "grad_norm": 0.2353515625, "learning_rate": 0.00019149193409135192, "loss": 1.7762, "step": 1701 }, { "epoch": 0.5320412628946546, "grad_norm": 0.2216796875, "learning_rate": 0.00019148201374683704, "loss": 1.8021, "step": 1702 }, { "epoch": 0.5323538605814317, "grad_norm": 0.2392578125, "learning_rate": 0.00019147208787943638, "loss": 1.8559, "step": 1703 }, { "epoch": 0.5326664582682088, "grad_norm": 0.33984375, "learning_rate": 0.00019146215648974924, "loss": 2.3382, "step": 1704 }, { "epoch": 0.532979055954986, "grad_norm": 0.23046875, "learning_rate": 0.00019145221957837515, "loss": 1.6269, "step": 1705 }, { "epoch": 0.533291653641763, "grad_norm": 0.2197265625, "learning_rate": 0.00019144227714591402, "loss": 1.8329, "step": 1706 }, { "epoch": 0.5336042513285402, "grad_norm": 0.23046875, "learning_rate": 0.0001914323291929661, "loss": 1.7395, "step": 1707 }, { "epoch": 0.5339168490153173, "grad_norm": 0.2216796875, "learning_rate": 0.00019142237572013197, "loss": 1.4983, "step": 1708 }, { "epoch": 0.5342294467020944, "grad_norm": 0.220703125, "learning_rate": 0.00019141241672801247, "loss": 1.7625, "step": 1709 }, { "epoch": 0.5345420443888715, "grad_norm": 0.23046875, "learning_rate": 0.0001914024522172089, "loss": 1.8429, "step": 1710 }, { "epoch": 0.5348546420756486, "grad_norm": 0.2294921875, "learning_rate": 0.00019139248218832285, "loss": 1.9247, "step": 1711 }, { "epoch": 0.5351672397624258, "grad_norm": 0.2216796875, "learning_rate": 0.00019138250664195615, "loss": 1.6563, "step": 1712 }, { "epoch": 0.5354798374492029, "grad_norm": 0.216796875, "learning_rate": 0.0001913725255787111, "loss": 1.5108, "step": 1713 }, { "epoch": 0.53579243513598, "grad_norm": 0.2197265625, "learning_rate": 0.00019136253899919024, "loss": 1.8109, "step": 1714 }, { "epoch": 0.5361050328227571, "grad_norm": 0.21875, "learning_rate": 0.00019135254690399648, "loss": 1.7063, "step": 1715 }, { "epoch": 0.5364176305095343, "grad_norm": 0.2294921875, "learning_rate": 0.00019134254929373303, "loss": 1.7218, "step": 1716 }, { "epoch": 0.5367302281963113, "grad_norm": 0.232421875, "learning_rate": 0.00019133254616900347, "loss": 1.6555, "step": 1717 }, { "epoch": 0.5370428258830885, "grad_norm": 0.2216796875, "learning_rate": 0.00019132253753041174, "loss": 1.9246, "step": 1718 }, { "epoch": 0.5373554235698655, "grad_norm": 0.2216796875, "learning_rate": 0.00019131252337856205, "loss": 1.818, "step": 1719 }, { "epoch": 0.5376680212566427, "grad_norm": 0.2216796875, "learning_rate": 0.00019130250371405895, "loss": 1.6691, "step": 1720 }, { "epoch": 0.5379806189434199, "grad_norm": 0.2255859375, "learning_rate": 0.00019129247853750733, "loss": 1.6272, "step": 1721 }, { "epoch": 0.5382932166301969, "grad_norm": 0.2109375, "learning_rate": 0.0001912824478495125, "loss": 1.529, "step": 1722 }, { "epoch": 0.5386058143169741, "grad_norm": 0.224609375, "learning_rate": 0.00019127241165067994, "loss": 1.8957, "step": 1723 }, { "epoch": 0.5389184120037511, "grad_norm": 0.216796875, "learning_rate": 0.00019126236994161558, "loss": 1.6643, "step": 1724 }, { "epoch": 0.5392310096905283, "grad_norm": 0.2216796875, "learning_rate": 0.00019125232272292563, "loss": 1.8746, "step": 1725 }, { "epoch": 0.5395436073773054, "grad_norm": 0.2392578125, "learning_rate": 0.00019124226999521672, "loss": 1.5691, "step": 1726 }, { "epoch": 0.5398562050640825, "grad_norm": 0.2158203125, "learning_rate": 0.00019123221175909567, "loss": 1.7902, "step": 1727 }, { "epoch": 0.5401688027508597, "grad_norm": 0.2314453125, "learning_rate": 0.00019122214801516973, "loss": 1.6767, "step": 1728 }, { "epoch": 0.5404814004376368, "grad_norm": 0.2177734375, "learning_rate": 0.00019121207876404648, "loss": 1.727, "step": 1729 }, { "epoch": 0.5407939981244139, "grad_norm": 0.228515625, "learning_rate": 0.0001912020040063338, "loss": 1.6355, "step": 1730 }, { "epoch": 0.541106595811191, "grad_norm": 0.2255859375, "learning_rate": 0.00019119192374263992, "loss": 1.9062, "step": 1731 }, { "epoch": 0.5414191934979681, "grad_norm": 0.2353515625, "learning_rate": 0.00019118183797357338, "loss": 1.5986, "step": 1732 }, { "epoch": 0.5417317911847452, "grad_norm": 0.2119140625, "learning_rate": 0.00019117174669974312, "loss": 1.5961, "step": 1733 }, { "epoch": 0.5420443888715224, "grad_norm": 0.2294921875, "learning_rate": 0.00019116164992175828, "loss": 1.8585, "step": 1734 }, { "epoch": 0.5423569865582994, "grad_norm": 0.2412109375, "learning_rate": 0.00019115154764022852, "loss": 1.731, "step": 1735 }, { "epoch": 0.5426695842450766, "grad_norm": 0.2138671875, "learning_rate": 0.00019114143985576366, "loss": 1.9891, "step": 1736 }, { "epoch": 0.5429821819318537, "grad_norm": 0.23046875, "learning_rate": 0.0001911313265689739, "loss": 1.6551, "step": 1737 }, { "epoch": 0.5432947796186308, "grad_norm": 0.2158203125, "learning_rate": 0.00019112120778046987, "loss": 2.0219, "step": 1738 }, { "epoch": 0.543607377305408, "grad_norm": 0.21875, "learning_rate": 0.0001911110834908624, "loss": 1.7808, "step": 1739 }, { "epoch": 0.543919974992185, "grad_norm": 0.23046875, "learning_rate": 0.0001911009537007627, "loss": 1.7043, "step": 1740 }, { "epoch": 0.5442325726789622, "grad_norm": 0.2255859375, "learning_rate": 0.00019109081841078233, "loss": 1.7296, "step": 1741 }, { "epoch": 0.5445451703657392, "grad_norm": 0.2177734375, "learning_rate": 0.0001910806776215332, "loss": 1.6465, "step": 1742 }, { "epoch": 0.5448577680525164, "grad_norm": 0.2216796875, "learning_rate": 0.00019107053133362749, "loss": 1.8411, "step": 1743 }, { "epoch": 0.5451703657392936, "grad_norm": 0.220703125, "learning_rate": 0.00019106037954767774, "loss": 1.4522, "step": 1744 }, { "epoch": 0.5454829634260706, "grad_norm": 0.212890625, "learning_rate": 0.00019105022226429682, "loss": 1.7463, "step": 1745 }, { "epoch": 0.5457955611128478, "grad_norm": 0.22265625, "learning_rate": 0.00019104005948409797, "loss": 1.622, "step": 1746 }, { "epoch": 0.5461081587996249, "grad_norm": 0.234375, "learning_rate": 0.00019102989120769475, "loss": 1.8334, "step": 1747 }, { "epoch": 0.546420756486402, "grad_norm": 0.236328125, "learning_rate": 0.00019101971743570094, "loss": 1.6375, "step": 1748 }, { "epoch": 0.5467333541731791, "grad_norm": 0.224609375, "learning_rate": 0.00019100953816873084, "loss": 1.4945, "step": 1749 }, { "epoch": 0.5470459518599562, "grad_norm": 0.21875, "learning_rate": 0.00019099935340739893, "loss": 1.687, "step": 1750 }, { "epoch": 0.5473585495467334, "grad_norm": 0.251953125, "learning_rate": 0.0001909891631523201, "loss": 1.8769, "step": 1751 }, { "epoch": 0.5476711472335105, "grad_norm": 0.228515625, "learning_rate": 0.00019097896740410955, "loss": 1.814, "step": 1752 }, { "epoch": 0.5479837449202876, "grad_norm": 0.224609375, "learning_rate": 0.00019096876616338278, "loss": 1.8215, "step": 1753 }, { "epoch": 0.5482963426070647, "grad_norm": 0.2470703125, "learning_rate": 0.00019095855943075568, "loss": 1.6682, "step": 1754 }, { "epoch": 0.5486089402938418, "grad_norm": 0.234375, "learning_rate": 0.00019094834720684447, "loss": 1.8052, "step": 1755 }, { "epoch": 0.5489215379806189, "grad_norm": 0.2255859375, "learning_rate": 0.0001909381294922656, "loss": 1.7685, "step": 1756 }, { "epoch": 0.5492341356673961, "grad_norm": 0.2392578125, "learning_rate": 0.000190927906287636, "loss": 1.6704, "step": 1757 }, { "epoch": 0.5495467333541731, "grad_norm": 0.2197265625, "learning_rate": 0.0001909176775935728, "loss": 1.75, "step": 1758 }, { "epoch": 0.5498593310409503, "grad_norm": 0.240234375, "learning_rate": 0.00019090744341069356, "loss": 1.5139, "step": 1759 }, { "epoch": 0.5501719287277275, "grad_norm": 0.2275390625, "learning_rate": 0.00019089720373961612, "loss": 1.5844, "step": 1760 }, { "epoch": 0.5504845264145045, "grad_norm": 0.2314453125, "learning_rate": 0.00019088695858095864, "loss": 1.7899, "step": 1761 }, { "epoch": 0.5507971241012817, "grad_norm": 0.2275390625, "learning_rate": 0.00019087670793533967, "loss": 1.7717, "step": 1762 }, { "epoch": 0.5511097217880587, "grad_norm": 0.2255859375, "learning_rate": 0.00019086645180337803, "loss": 1.7754, "step": 1763 }, { "epoch": 0.5514223194748359, "grad_norm": 0.2177734375, "learning_rate": 0.0001908561901856929, "loss": 1.8412, "step": 1764 }, { "epoch": 0.551734917161613, "grad_norm": 0.2294921875, "learning_rate": 0.0001908459230829038, "loss": 1.7254, "step": 1765 }, { "epoch": 0.5520475148483901, "grad_norm": 0.2314453125, "learning_rate": 0.00019083565049563057, "loss": 1.8097, "step": 1766 }, { "epoch": 0.5523601125351673, "grad_norm": 0.2255859375, "learning_rate": 0.00019082537242449333, "loss": 1.8441, "step": 1767 }, { "epoch": 0.5526727102219443, "grad_norm": 0.328125, "learning_rate": 0.00019081508887011263, "loss": 2.4757, "step": 1768 }, { "epoch": 0.5529853079087215, "grad_norm": 0.21875, "learning_rate": 0.0001908047998331093, "loss": 1.5833, "step": 1769 }, { "epoch": 0.5532979055954986, "grad_norm": 0.359375, "learning_rate": 0.0001907945053141045, "loss": 2.4293, "step": 1770 }, { "epoch": 0.5536105032822757, "grad_norm": 0.2236328125, "learning_rate": 0.0001907842053137197, "loss": 1.9397, "step": 1771 }, { "epoch": 0.5539231009690528, "grad_norm": 0.2353515625, "learning_rate": 0.0001907738998325767, "loss": 2.0662, "step": 1772 }, { "epoch": 0.55423569865583, "grad_norm": 0.228515625, "learning_rate": 0.00019076358887129774, "loss": 1.8447, "step": 1773 }, { "epoch": 0.554548296342607, "grad_norm": 0.32421875, "learning_rate": 0.00019075327243050526, "loss": 2.3451, "step": 1774 }, { "epoch": 0.5548608940293842, "grad_norm": 0.228515625, "learning_rate": 0.00019074295051082205, "loss": 1.623, "step": 1775 }, { "epoch": 0.5551734917161613, "grad_norm": 0.240234375, "learning_rate": 0.0001907326231128713, "loss": 2.0579, "step": 1776 }, { "epoch": 0.5554860894029384, "grad_norm": 0.216796875, "learning_rate": 0.00019072229023727645, "loss": 1.6111, "step": 1777 }, { "epoch": 0.5557986870897156, "grad_norm": 0.224609375, "learning_rate": 0.00019071195188466135, "loss": 1.87, "step": 1778 }, { "epoch": 0.5561112847764926, "grad_norm": 0.2041015625, "learning_rate": 0.00019070160805565012, "loss": 1.6437, "step": 1779 }, { "epoch": 0.5564238824632698, "grad_norm": 0.2314453125, "learning_rate": 0.00019069125875086722, "loss": 1.6752, "step": 1780 }, { "epoch": 0.5567364801500468, "grad_norm": 0.236328125, "learning_rate": 0.00019068090397093745, "loss": 1.7323, "step": 1781 }, { "epoch": 0.557049077836824, "grad_norm": 0.228515625, "learning_rate": 0.000190670543716486, "loss": 1.7324, "step": 1782 }, { "epoch": 0.5573616755236012, "grad_norm": 0.22265625, "learning_rate": 0.00019066017798813825, "loss": 1.5224, "step": 1783 }, { "epoch": 0.5576742732103782, "grad_norm": 0.326171875, "learning_rate": 0.00019064980678652, "loss": 2.3167, "step": 1784 }, { "epoch": 0.5579868708971554, "grad_norm": 0.212890625, "learning_rate": 0.00019063943011225743, "loss": 1.7731, "step": 1785 }, { "epoch": 0.5582994685839325, "grad_norm": 0.23828125, "learning_rate": 0.00019062904796597697, "loss": 1.6789, "step": 1786 }, { "epoch": 0.5586120662707096, "grad_norm": 0.224609375, "learning_rate": 0.00019061866034830534, "loss": 1.7119, "step": 1787 }, { "epoch": 0.5589246639574867, "grad_norm": 0.2294921875, "learning_rate": 0.00019060826725986977, "loss": 1.6962, "step": 1788 }, { "epoch": 0.5592372616442638, "grad_norm": 0.2294921875, "learning_rate": 0.00019059786870129761, "loss": 1.6318, "step": 1789 }, { "epoch": 0.559549859331041, "grad_norm": 0.21875, "learning_rate": 0.0001905874646732167, "loss": 1.8541, "step": 1790 }, { "epoch": 0.5598624570178181, "grad_norm": 0.2265625, "learning_rate": 0.00019057705517625505, "loss": 1.8081, "step": 1791 }, { "epoch": 0.5601750547045952, "grad_norm": 0.2333984375, "learning_rate": 0.0001905666402110412, "loss": 1.4779, "step": 1792 }, { "epoch": 0.5604876523913723, "grad_norm": 0.2060546875, "learning_rate": 0.00019055621977820387, "loss": 1.6657, "step": 1793 }, { "epoch": 0.5608002500781494, "grad_norm": 0.2255859375, "learning_rate": 0.00019054579387837214, "loss": 1.5665, "step": 1794 }, { "epoch": 0.5611128477649265, "grad_norm": 0.2392578125, "learning_rate": 0.00019053536251217545, "loss": 1.5586, "step": 1795 }, { "epoch": 0.5614254454517037, "grad_norm": 0.2294921875, "learning_rate": 0.00019052492568024355, "loss": 1.5323, "step": 1796 }, { "epoch": 0.5617380431384807, "grad_norm": 0.2255859375, "learning_rate": 0.00019051448338320656, "loss": 1.7868, "step": 1797 }, { "epoch": 0.5620506408252579, "grad_norm": 0.2255859375, "learning_rate": 0.00019050403562169486, "loss": 1.6351, "step": 1798 }, { "epoch": 0.562363238512035, "grad_norm": 0.2216796875, "learning_rate": 0.00019049358239633916, "loss": 2.0889, "step": 1799 }, { "epoch": 0.5626758361988121, "grad_norm": 0.2255859375, "learning_rate": 0.00019048312370777062, "loss": 1.5398, "step": 1800 }, { "epoch": 0.5629884338855893, "grad_norm": 0.2275390625, "learning_rate": 0.00019047265955662054, "loss": 1.6967, "step": 1801 }, { "epoch": 0.5633010315723663, "grad_norm": 0.2177734375, "learning_rate": 0.00019046218994352076, "loss": 1.6917, "step": 1802 }, { "epoch": 0.5636136292591435, "grad_norm": 0.2216796875, "learning_rate": 0.0001904517148691033, "loss": 1.4587, "step": 1803 }, { "epoch": 0.5639262269459207, "grad_norm": 0.23046875, "learning_rate": 0.00019044123433400052, "loss": 1.8214, "step": 1804 }, { "epoch": 0.5642388246326977, "grad_norm": 0.2314453125, "learning_rate": 0.0001904307483388452, "loss": 1.6375, "step": 1805 }, { "epoch": 0.5645514223194749, "grad_norm": 0.2265625, "learning_rate": 0.00019042025688427035, "loss": 1.5963, "step": 1806 }, { "epoch": 0.5648640200062519, "grad_norm": 0.2314453125, "learning_rate": 0.00019040975997090936, "loss": 1.8623, "step": 1807 }, { "epoch": 0.5651766176930291, "grad_norm": 0.220703125, "learning_rate": 0.00019039925759939597, "loss": 1.6458, "step": 1808 }, { "epoch": 0.5654892153798062, "grad_norm": 0.2255859375, "learning_rate": 0.0001903887497703642, "loss": 1.6367, "step": 1809 }, { "epoch": 0.5658018130665833, "grad_norm": 0.216796875, "learning_rate": 0.00019037823648444842, "loss": 1.6211, "step": 1810 }, { "epoch": 0.5661144107533604, "grad_norm": 0.220703125, "learning_rate": 0.0001903677177422833, "loss": 1.5955, "step": 1811 }, { "epoch": 0.5664270084401375, "grad_norm": 0.2353515625, "learning_rate": 0.00019035719354450393, "loss": 1.6509, "step": 1812 }, { "epoch": 0.5667396061269147, "grad_norm": 0.2373046875, "learning_rate": 0.00019034666389174568, "loss": 1.5193, "step": 1813 }, { "epoch": 0.5670522038136918, "grad_norm": 0.23046875, "learning_rate": 0.00019033612878464412, "loss": 1.8779, "step": 1814 }, { "epoch": 0.5673648015004689, "grad_norm": 0.232421875, "learning_rate": 0.00019032558822383542, "loss": 1.746, "step": 1815 }, { "epoch": 0.567677399187246, "grad_norm": 0.21875, "learning_rate": 0.0001903150422099558, "loss": 1.6802, "step": 1816 }, { "epoch": 0.5679899968740232, "grad_norm": 0.2373046875, "learning_rate": 0.00019030449074364204, "loss": 1.8168, "step": 1817 }, { "epoch": 0.5683025945608002, "grad_norm": 0.23046875, "learning_rate": 0.00019029393382553108, "loss": 1.6261, "step": 1818 }, { "epoch": 0.5686151922475774, "grad_norm": 0.2197265625, "learning_rate": 0.00019028337145626028, "loss": 1.6126, "step": 1819 }, { "epoch": 0.5689277899343544, "grad_norm": 0.2333984375, "learning_rate": 0.00019027280363646728, "loss": 1.7607, "step": 1820 }, { "epoch": 0.5692403876211316, "grad_norm": 0.2275390625, "learning_rate": 0.0001902622303667901, "loss": 1.6267, "step": 1821 }, { "epoch": 0.5695529853079088, "grad_norm": 0.22265625, "learning_rate": 0.00019025165164786705, "loss": 1.7209, "step": 1822 }, { "epoch": 0.5698655829946858, "grad_norm": 0.2314453125, "learning_rate": 0.00019024106748033679, "loss": 1.4932, "step": 1823 }, { "epoch": 0.570178180681463, "grad_norm": 0.236328125, "learning_rate": 0.00019023047786483828, "loss": 1.4764, "step": 1824 }, { "epoch": 0.57049077836824, "grad_norm": 0.228515625, "learning_rate": 0.00019021988280201084, "loss": 1.6664, "step": 1825 }, { "epoch": 0.5708033760550172, "grad_norm": 0.2490234375, "learning_rate": 0.0001902092822924941, "loss": 1.5628, "step": 1826 }, { "epoch": 0.5711159737417943, "grad_norm": 0.2412109375, "learning_rate": 0.00019019867633692802, "loss": 1.8942, "step": 1827 }, { "epoch": 0.5714285714285714, "grad_norm": 0.2275390625, "learning_rate": 0.00019018806493595293, "loss": 1.5664, "step": 1828 }, { "epoch": 0.5717411691153486, "grad_norm": 0.2314453125, "learning_rate": 0.00019017744809020942, "loss": 1.4663, "step": 1829 }, { "epoch": 0.5720537668021257, "grad_norm": 0.22265625, "learning_rate": 0.00019016682580033848, "loss": 1.8574, "step": 1830 }, { "epoch": 0.5723663644889028, "grad_norm": 0.2216796875, "learning_rate": 0.00019015619806698135, "loss": 1.7824, "step": 1831 }, { "epoch": 0.5726789621756799, "grad_norm": 0.224609375, "learning_rate": 0.00019014556489077965, "loss": 1.5226, "step": 1832 }, { "epoch": 0.572991559862457, "grad_norm": 0.2216796875, "learning_rate": 0.00019013492627237532, "loss": 1.8333, "step": 1833 }, { "epoch": 0.5733041575492341, "grad_norm": 0.2236328125, "learning_rate": 0.00019012428221241065, "loss": 1.5824, "step": 1834 }, { "epoch": 0.5736167552360113, "grad_norm": 0.232421875, "learning_rate": 0.00019011363271152822, "loss": 1.7483, "step": 1835 }, { "epoch": 0.5739293529227883, "grad_norm": 0.2099609375, "learning_rate": 0.00019010297777037093, "loss": 1.6215, "step": 1836 }, { "epoch": 0.5742419506095655, "grad_norm": 0.2314453125, "learning_rate": 0.00019009231738958206, "loss": 1.6124, "step": 1837 }, { "epoch": 0.5745545482963426, "grad_norm": 0.234375, "learning_rate": 0.00019008165156980517, "loss": 1.8104, "step": 1838 }, { "epoch": 0.5748671459831197, "grad_norm": 0.2333984375, "learning_rate": 0.0001900709803116842, "loss": 1.7839, "step": 1839 }, { "epoch": 0.5751797436698969, "grad_norm": 0.216796875, "learning_rate": 0.0001900603036158634, "loss": 1.6926, "step": 1840 }, { "epoch": 0.5754923413566739, "grad_norm": 0.2275390625, "learning_rate": 0.00019004962148298725, "loss": 1.8372, "step": 1841 }, { "epoch": 0.5758049390434511, "grad_norm": 0.2275390625, "learning_rate": 0.0001900389339137007, "loss": 1.5496, "step": 1842 }, { "epoch": 0.5761175367302281, "grad_norm": 0.2275390625, "learning_rate": 0.000190028240908649, "loss": 1.7024, "step": 1843 }, { "epoch": 0.5764301344170053, "grad_norm": 0.236328125, "learning_rate": 0.00019001754246847767, "loss": 1.6237, "step": 1844 }, { "epoch": 0.5767427321037825, "grad_norm": 0.23046875, "learning_rate": 0.00019000683859383258, "loss": 1.6012, "step": 1845 }, { "epoch": 0.5770553297905595, "grad_norm": 0.2119140625, "learning_rate": 0.00018999612928535995, "loss": 1.7586, "step": 1846 }, { "epoch": 0.5773679274773367, "grad_norm": 0.2373046875, "learning_rate": 0.00018998541454370632, "loss": 1.4823, "step": 1847 }, { "epoch": 0.5776805251641138, "grad_norm": 0.2265625, "learning_rate": 0.00018997469436951854, "loss": 1.5688, "step": 1848 }, { "epoch": 0.5779931228508909, "grad_norm": 0.318359375, "learning_rate": 0.0001899639687634438, "loss": 2.5108, "step": 1849 }, { "epoch": 0.578305720537668, "grad_norm": 0.2490234375, "learning_rate": 0.00018995323772612964, "loss": 1.6868, "step": 1850 }, { "epoch": 0.5786183182244451, "grad_norm": 0.2275390625, "learning_rate": 0.00018994250125822386, "loss": 1.6238, "step": 1851 }, { "epoch": 0.5789309159112223, "grad_norm": 0.220703125, "learning_rate": 0.0001899317593603747, "loss": 1.5826, "step": 1852 }, { "epoch": 0.5792435135979994, "grad_norm": 0.2265625, "learning_rate": 0.0001899210120332306, "loss": 1.6792, "step": 1853 }, { "epoch": 0.5795561112847765, "grad_norm": 0.2275390625, "learning_rate": 0.00018991025927744042, "loss": 1.8574, "step": 1854 }, { "epoch": 0.5798687089715536, "grad_norm": 0.2412109375, "learning_rate": 0.0001898995010936533, "loss": 1.7686, "step": 1855 }, { "epoch": 0.5801813066583307, "grad_norm": 0.2255859375, "learning_rate": 0.00018988873748251877, "loss": 1.7198, "step": 1856 }, { "epoch": 0.5804939043451078, "grad_norm": 0.2177734375, "learning_rate": 0.00018987796844468658, "loss": 1.7134, "step": 1857 }, { "epoch": 0.580806502031885, "grad_norm": 0.212890625, "learning_rate": 0.00018986719398080695, "loss": 1.5788, "step": 1858 }, { "epoch": 0.581119099718662, "grad_norm": 0.2265625, "learning_rate": 0.00018985641409153026, "loss": 1.6557, "step": 1859 }, { "epoch": 0.5814316974054392, "grad_norm": 0.23046875, "learning_rate": 0.00018984562877750737, "loss": 1.719, "step": 1860 }, { "epoch": 0.5817442950922164, "grad_norm": 0.2265625, "learning_rate": 0.00018983483803938932, "loss": 1.7116, "step": 1861 }, { "epoch": 0.5820568927789934, "grad_norm": 0.236328125, "learning_rate": 0.0001898240418778277, "loss": 1.9006, "step": 1862 }, { "epoch": 0.5823694904657706, "grad_norm": 0.2255859375, "learning_rate": 0.00018981324029347416, "loss": 1.3191, "step": 1863 }, { "epoch": 0.5826820881525476, "grad_norm": 0.2255859375, "learning_rate": 0.00018980243328698088, "loss": 1.7602, "step": 1864 }, { "epoch": 0.5829946858393248, "grad_norm": 0.2392578125, "learning_rate": 0.00018979162085900025, "loss": 2.0473, "step": 1865 }, { "epoch": 0.583307283526102, "grad_norm": 0.234375, "learning_rate": 0.00018978080301018503, "loss": 1.7591, "step": 1866 }, { "epoch": 0.583619881212879, "grad_norm": 0.2275390625, "learning_rate": 0.00018976997974118836, "loss": 1.9532, "step": 1867 }, { "epoch": 0.5839324788996562, "grad_norm": 0.234375, "learning_rate": 0.0001897591510526636, "loss": 1.8456, "step": 1868 }, { "epoch": 0.5842450765864332, "grad_norm": 0.23046875, "learning_rate": 0.00018974831694526452, "loss": 1.7148, "step": 1869 }, { "epoch": 0.5845576742732104, "grad_norm": 0.2158203125, "learning_rate": 0.00018973747741964515, "loss": 1.6221, "step": 1870 }, { "epoch": 0.5848702719599875, "grad_norm": 0.2236328125, "learning_rate": 0.00018972663247645994, "loss": 2.0677, "step": 1871 }, { "epoch": 0.5851828696467646, "grad_norm": 0.2421875, "learning_rate": 0.00018971578211636359, "loss": 1.4428, "step": 1872 }, { "epoch": 0.5854954673335417, "grad_norm": 0.224609375, "learning_rate": 0.00018970492634001114, "loss": 1.6225, "step": 1873 }, { "epoch": 0.5858080650203189, "grad_norm": 0.2236328125, "learning_rate": 0.00018969406514805797, "loss": 1.5286, "step": 1874 }, { "epoch": 0.586120662707096, "grad_norm": 0.2421875, "learning_rate": 0.00018968319854115978, "loss": 1.7499, "step": 1875 }, { "epoch": 0.5864332603938731, "grad_norm": 0.2197265625, "learning_rate": 0.00018967232651997265, "loss": 1.9038, "step": 1876 }, { "epoch": 0.5867458580806502, "grad_norm": 0.21484375, "learning_rate": 0.00018966144908515284, "loss": 1.5464, "step": 1877 }, { "epoch": 0.5870584557674273, "grad_norm": 0.23046875, "learning_rate": 0.00018965056623735713, "loss": 1.6405, "step": 1878 }, { "epoch": 0.5873710534542045, "grad_norm": 0.2099609375, "learning_rate": 0.00018963967797724248, "loss": 1.727, "step": 1879 }, { "epoch": 0.5876836511409815, "grad_norm": 0.2333984375, "learning_rate": 0.00018962878430546626, "loss": 1.7438, "step": 1880 }, { "epoch": 0.5879962488277587, "grad_norm": 0.2314453125, "learning_rate": 0.0001896178852226861, "loss": 1.6973, "step": 1881 }, { "epoch": 0.5883088465145357, "grad_norm": 0.228515625, "learning_rate": 0.00018960698072956, "loss": 1.7813, "step": 1882 }, { "epoch": 0.5886214442013129, "grad_norm": 0.224609375, "learning_rate": 0.00018959607082674632, "loss": 1.8691, "step": 1883 }, { "epoch": 0.5889340418880901, "grad_norm": 0.2265625, "learning_rate": 0.00018958515551490364, "loss": 1.8186, "step": 1884 }, { "epoch": 0.5892466395748671, "grad_norm": 0.224609375, "learning_rate": 0.00018957423479469096, "loss": 1.6628, "step": 1885 }, { "epoch": 0.5895592372616443, "grad_norm": 0.2314453125, "learning_rate": 0.0001895633086667676, "loss": 1.8004, "step": 1886 }, { "epoch": 0.5898718349484214, "grad_norm": 0.24609375, "learning_rate": 0.00018955237713179314, "loss": 1.781, "step": 1887 }, { "epoch": 0.5901844326351985, "grad_norm": 0.2275390625, "learning_rate": 0.00018954144019042759, "loss": 1.7539, "step": 1888 }, { "epoch": 0.5904970303219756, "grad_norm": 0.23046875, "learning_rate": 0.00018953049784333116, "loss": 1.6668, "step": 1889 }, { "epoch": 0.5908096280087527, "grad_norm": 0.228515625, "learning_rate": 0.00018951955009116449, "loss": 1.954, "step": 1890 }, { "epoch": 0.5911222256955299, "grad_norm": 0.2275390625, "learning_rate": 0.0001895085969345885, "loss": 1.8232, "step": 1891 }, { "epoch": 0.591434823382307, "grad_norm": 0.2216796875, "learning_rate": 0.00018949763837426445, "loss": 1.5966, "step": 1892 }, { "epoch": 0.5917474210690841, "grad_norm": 0.2236328125, "learning_rate": 0.00018948667441085398, "loss": 1.5623, "step": 1893 }, { "epoch": 0.5920600187558612, "grad_norm": 0.2216796875, "learning_rate": 0.00018947570504501888, "loss": 1.689, "step": 1894 }, { "epoch": 0.5923726164426383, "grad_norm": 0.2216796875, "learning_rate": 0.00018946473027742146, "loss": 1.6939, "step": 1895 }, { "epoch": 0.5926852141294154, "grad_norm": 0.228515625, "learning_rate": 0.00018945375010872426, "loss": 1.7252, "step": 1896 }, { "epoch": 0.5929978118161926, "grad_norm": 0.220703125, "learning_rate": 0.0001894427645395902, "loss": 1.7894, "step": 1897 }, { "epoch": 0.5933104095029696, "grad_norm": 0.234375, "learning_rate": 0.00018943177357068244, "loss": 1.8643, "step": 1898 }, { "epoch": 0.5936230071897468, "grad_norm": 0.2158203125, "learning_rate": 0.00018942077720266454, "loss": 1.6017, "step": 1899 }, { "epoch": 0.5939356048765239, "grad_norm": 0.22265625, "learning_rate": 0.0001894097754362004, "loss": 1.514, "step": 1900 }, { "epoch": 0.594248202563301, "grad_norm": 0.2275390625, "learning_rate": 0.00018939876827195418, "loss": 1.8716, "step": 1901 }, { "epoch": 0.5945608002500782, "grad_norm": 0.232421875, "learning_rate": 0.00018938775571059039, "loss": 1.8103, "step": 1902 }, { "epoch": 0.5948733979368552, "grad_norm": 0.2216796875, "learning_rate": 0.00018937673775277388, "loss": 1.5777, "step": 1903 }, { "epoch": 0.5951859956236324, "grad_norm": 0.2314453125, "learning_rate": 0.0001893657143991698, "loss": 1.6428, "step": 1904 }, { "epoch": 0.5954985933104096, "grad_norm": 0.224609375, "learning_rate": 0.00018935468565044368, "loss": 2.0165, "step": 1905 }, { "epoch": 0.5958111909971866, "grad_norm": 0.22265625, "learning_rate": 0.00018934365150726133, "loss": 1.5724, "step": 1906 }, { "epoch": 0.5961237886839638, "grad_norm": 0.2216796875, "learning_rate": 0.00018933261197028885, "loss": 1.9301, "step": 1907 }, { "epoch": 0.5964363863707408, "grad_norm": 0.2275390625, "learning_rate": 0.0001893215670401928, "loss": 1.6571, "step": 1908 }, { "epoch": 0.596748984057518, "grad_norm": 0.2255859375, "learning_rate": 0.00018931051671763988, "loss": 1.7479, "step": 1909 }, { "epoch": 0.5970615817442951, "grad_norm": 0.228515625, "learning_rate": 0.00018929946100329725, "loss": 1.6891, "step": 1910 }, { "epoch": 0.5973741794310722, "grad_norm": 0.23046875, "learning_rate": 0.0001892883998978324, "loss": 1.646, "step": 1911 }, { "epoch": 0.5976867771178493, "grad_norm": 0.2197265625, "learning_rate": 0.00018927733340191308, "loss": 1.6963, "step": 1912 }, { "epoch": 0.5979993748046264, "grad_norm": 0.2265625, "learning_rate": 0.00018926626151620732, "loss": 1.9789, "step": 1913 }, { "epoch": 0.5983119724914036, "grad_norm": 0.2236328125, "learning_rate": 0.00018925518424138361, "loss": 1.9244, "step": 1914 }, { "epoch": 0.5986245701781807, "grad_norm": 0.2275390625, "learning_rate": 0.00018924410157811073, "loss": 1.5019, "step": 1915 }, { "epoch": 0.5989371678649578, "grad_norm": 0.2265625, "learning_rate": 0.0001892330135270577, "loss": 1.7337, "step": 1916 }, { "epoch": 0.5992497655517349, "grad_norm": 0.2236328125, "learning_rate": 0.0001892219200888939, "loss": 1.6027, "step": 1917 }, { "epoch": 0.5995623632385121, "grad_norm": 0.2470703125, "learning_rate": 0.00018921082126428912, "loss": 1.6431, "step": 1918 }, { "epoch": 0.5998749609252891, "grad_norm": 0.22265625, "learning_rate": 0.00018919971705391335, "loss": 1.822, "step": 1919 }, { "epoch": 0.6001875586120663, "grad_norm": 0.2275390625, "learning_rate": 0.00018918860745843703, "loss": 1.656, "step": 1920 }, { "epoch": 0.6005001562988433, "grad_norm": 0.2314453125, "learning_rate": 0.00018917749247853078, "loss": 1.6685, "step": 1921 }, { "epoch": 0.6008127539856205, "grad_norm": 0.21875, "learning_rate": 0.0001891663721148657, "loss": 1.8229, "step": 1922 }, { "epoch": 0.6011253516723977, "grad_norm": 0.228515625, "learning_rate": 0.0001891552463681131, "loss": 1.7224, "step": 1923 }, { "epoch": 0.6014379493591747, "grad_norm": 0.23046875, "learning_rate": 0.00018914411523894467, "loss": 1.9986, "step": 1924 }, { "epoch": 0.6017505470459519, "grad_norm": 0.22265625, "learning_rate": 0.0001891329787280324, "loss": 1.4848, "step": 1925 }, { "epoch": 0.6020631447327289, "grad_norm": 0.224609375, "learning_rate": 0.00018912183683604864, "loss": 1.7737, "step": 1926 }, { "epoch": 0.6023757424195061, "grad_norm": 0.2255859375, "learning_rate": 0.00018911068956366597, "loss": 1.7155, "step": 1927 }, { "epoch": 0.6026883401062832, "grad_norm": 0.2353515625, "learning_rate": 0.00018909953691155745, "loss": 1.7669, "step": 1928 }, { "epoch": 0.6030009377930603, "grad_norm": 0.232421875, "learning_rate": 0.00018908837888039637, "loss": 1.8628, "step": 1929 }, { "epoch": 0.6033135354798375, "grad_norm": 0.23046875, "learning_rate": 0.0001890772154708563, "loss": 1.7606, "step": 1930 }, { "epoch": 0.6036261331666146, "grad_norm": 0.2275390625, "learning_rate": 0.0001890660466836112, "loss": 1.5453, "step": 1931 }, { "epoch": 0.6039387308533917, "grad_norm": 0.236328125, "learning_rate": 0.00018905487251933542, "loss": 1.7034, "step": 1932 }, { "epoch": 0.6042513285401688, "grad_norm": 0.23046875, "learning_rate": 0.00018904369297870349, "loss": 1.6582, "step": 1933 }, { "epoch": 0.6045639262269459, "grad_norm": 0.2421875, "learning_rate": 0.0001890325080623903, "loss": 1.5893, "step": 1934 }, { "epoch": 0.604876523913723, "grad_norm": 0.220703125, "learning_rate": 0.00018902131777107117, "loss": 1.602, "step": 1935 }, { "epoch": 0.6051891216005002, "grad_norm": 0.2275390625, "learning_rate": 0.00018901012210542165, "loss": 1.636, "step": 1936 }, { "epoch": 0.6055017192872773, "grad_norm": 0.2373046875, "learning_rate": 0.00018899892106611762, "loss": 1.8495, "step": 1937 }, { "epoch": 0.6058143169740544, "grad_norm": 0.2373046875, "learning_rate": 0.00018898771465383532, "loss": 1.7294, "step": 1938 }, { "epoch": 0.6061269146608315, "grad_norm": 0.23046875, "learning_rate": 0.0001889765028692513, "loss": 1.6063, "step": 1939 }, { "epoch": 0.6064395123476086, "grad_norm": 0.23046875, "learning_rate": 0.0001889652857130424, "loss": 1.5972, "step": 1940 }, { "epoch": 0.6067521100343858, "grad_norm": 0.224609375, "learning_rate": 0.00018895406318588585, "loss": 1.9705, "step": 1941 }, { "epoch": 0.6070647077211628, "grad_norm": 0.2294921875, "learning_rate": 0.00018894283528845914, "loss": 1.9463, "step": 1942 }, { "epoch": 0.60737730540794, "grad_norm": 0.2333984375, "learning_rate": 0.00018893160202144012, "loss": 1.7365, "step": 1943 }, { "epoch": 0.6076899030947172, "grad_norm": 0.2275390625, "learning_rate": 0.00018892036338550696, "loss": 1.6313, "step": 1944 }, { "epoch": 0.6080025007814942, "grad_norm": 0.228515625, "learning_rate": 0.00018890911938133814, "loss": 1.7297, "step": 1945 }, { "epoch": 0.6083150984682714, "grad_norm": 0.2275390625, "learning_rate": 0.0001888978700096125, "loss": 1.5932, "step": 1946 }, { "epoch": 0.6086276961550484, "grad_norm": 0.255859375, "learning_rate": 0.00018888661527100914, "loss": 1.7416, "step": 1947 }, { "epoch": 0.6089402938418256, "grad_norm": 0.2177734375, "learning_rate": 0.0001888753551662076, "loss": 1.5615, "step": 1948 }, { "epoch": 0.6092528915286027, "grad_norm": 0.21875, "learning_rate": 0.00018886408969588756, "loss": 1.9525, "step": 1949 }, { "epoch": 0.6095654892153798, "grad_norm": 0.2275390625, "learning_rate": 0.0001888528188607292, "loss": 1.4709, "step": 1950 }, { "epoch": 0.609878086902157, "grad_norm": 0.2265625, "learning_rate": 0.00018884154266141296, "loss": 1.6341, "step": 1951 }, { "epoch": 0.610190684588934, "grad_norm": 0.2314453125, "learning_rate": 0.00018883026109861955, "loss": 1.6915, "step": 1952 }, { "epoch": 0.6105032822757112, "grad_norm": 0.2216796875, "learning_rate": 0.0001888189741730301, "loss": 1.7387, "step": 1953 }, { "epoch": 0.6108158799624883, "grad_norm": 0.23828125, "learning_rate": 0.000188807681885326, "loss": 1.4454, "step": 1954 }, { "epoch": 0.6111284776492654, "grad_norm": 0.22265625, "learning_rate": 0.00018879638423618893, "loss": 1.644, "step": 1955 }, { "epoch": 0.6114410753360425, "grad_norm": 0.2265625, "learning_rate": 0.00018878508122630106, "loss": 1.6955, "step": 1956 }, { "epoch": 0.6117536730228196, "grad_norm": 0.228515625, "learning_rate": 0.00018877377285634464, "loss": 1.5826, "step": 1957 }, { "epoch": 0.6120662707095967, "grad_norm": 0.2255859375, "learning_rate": 0.00018876245912700243, "loss": 1.7957, "step": 1958 }, { "epoch": 0.6123788683963739, "grad_norm": 0.23046875, "learning_rate": 0.00018875114003895748, "loss": 1.5181, "step": 1959 }, { "epoch": 0.612691466083151, "grad_norm": 0.23046875, "learning_rate": 0.00018873981559289308, "loss": 1.7115, "step": 1960 }, { "epoch": 0.6130040637699281, "grad_norm": 0.236328125, "learning_rate": 0.00018872848578949296, "loss": 1.9347, "step": 1961 }, { "epoch": 0.6133166614567053, "grad_norm": 0.23046875, "learning_rate": 0.00018871715062944108, "loss": 1.7506, "step": 1962 }, { "epoch": 0.6136292591434823, "grad_norm": 0.29296875, "learning_rate": 0.00018870581011342174, "loss": 2.3271, "step": 1963 }, { "epoch": 0.6139418568302595, "grad_norm": 0.228515625, "learning_rate": 0.00018869446424211962, "loss": 2.0109, "step": 1964 }, { "epoch": 0.6142544545170365, "grad_norm": 0.23046875, "learning_rate": 0.00018868311301621968, "loss": 1.5306, "step": 1965 }, { "epoch": 0.6145670522038137, "grad_norm": 0.224609375, "learning_rate": 0.00018867175643640717, "loss": 1.7745, "step": 1966 }, { "epoch": 0.6148796498905909, "grad_norm": 0.23046875, "learning_rate": 0.00018866039450336777, "loss": 1.7684, "step": 1967 }, { "epoch": 0.6151922475773679, "grad_norm": 0.2373046875, "learning_rate": 0.00018864902721778734, "loss": 1.738, "step": 1968 }, { "epoch": 0.6155048452641451, "grad_norm": 0.2314453125, "learning_rate": 0.00018863765458035218, "loss": 1.6707, "step": 1969 }, { "epoch": 0.6158174429509221, "grad_norm": 0.2255859375, "learning_rate": 0.00018862627659174886, "loss": 1.5577, "step": 1970 }, { "epoch": 0.6161300406376993, "grad_norm": 0.2275390625, "learning_rate": 0.00018861489325266425, "loss": 1.6428, "step": 1971 }, { "epoch": 0.6164426383244764, "grad_norm": 0.2421875, "learning_rate": 0.00018860350456378566, "loss": 1.5885, "step": 1972 }, { "epoch": 0.6167552360112535, "grad_norm": 0.21875, "learning_rate": 0.00018859211052580057, "loss": 1.3899, "step": 1973 }, { "epoch": 0.6170678336980306, "grad_norm": 0.23046875, "learning_rate": 0.0001885807111393969, "loss": 1.8002, "step": 1974 }, { "epoch": 0.6173804313848078, "grad_norm": 0.2265625, "learning_rate": 0.0001885693064052628, "loss": 1.7554, "step": 1975 }, { "epoch": 0.6176930290715849, "grad_norm": 0.22265625, "learning_rate": 0.0001885578963240868, "loss": 1.5717, "step": 1976 }, { "epoch": 0.618005626758362, "grad_norm": 0.228515625, "learning_rate": 0.00018854648089655776, "loss": 1.6693, "step": 1977 }, { "epoch": 0.6183182244451391, "grad_norm": 0.2265625, "learning_rate": 0.00018853506012336482, "loss": 1.8787, "step": 1978 }, { "epoch": 0.6186308221319162, "grad_norm": 0.220703125, "learning_rate": 0.00018852363400519745, "loss": 1.6435, "step": 1979 }, { "epoch": 0.6189434198186934, "grad_norm": 0.224609375, "learning_rate": 0.00018851220254274554, "loss": 1.7522, "step": 1980 }, { "epoch": 0.6192560175054704, "grad_norm": 0.2333984375, "learning_rate": 0.00018850076573669915, "loss": 1.5828, "step": 1981 }, { "epoch": 0.6195686151922476, "grad_norm": 0.2294921875, "learning_rate": 0.0001884893235877488, "loss": 1.457, "step": 1982 }, { "epoch": 0.6198812128790246, "grad_norm": 0.22265625, "learning_rate": 0.00018847787609658516, "loss": 1.5991, "step": 1983 }, { "epoch": 0.6201938105658018, "grad_norm": 0.2392578125, "learning_rate": 0.0001884664232638994, "loss": 1.598, "step": 1984 }, { "epoch": 0.620506408252579, "grad_norm": 0.228515625, "learning_rate": 0.00018845496509038294, "loss": 1.6774, "step": 1985 }, { "epoch": 0.620819005939356, "grad_norm": 0.220703125, "learning_rate": 0.00018844350157672755, "loss": 1.7232, "step": 1986 }, { "epoch": 0.6211316036261332, "grad_norm": 0.228515625, "learning_rate": 0.00018843203272362523, "loss": 1.7184, "step": 1987 }, { "epoch": 0.6214442013129103, "grad_norm": 0.22265625, "learning_rate": 0.00018842055853176838, "loss": 1.6561, "step": 1988 }, { "epoch": 0.6217567989996874, "grad_norm": 0.2294921875, "learning_rate": 0.0001884090790018498, "loss": 1.5792, "step": 1989 }, { "epoch": 0.6220693966864645, "grad_norm": 0.2255859375, "learning_rate": 0.0001883975941345624, "loss": 1.9449, "step": 1990 }, { "epoch": 0.6223819943732416, "grad_norm": 0.251953125, "learning_rate": 0.00018838610393059964, "loss": 2.1031, "step": 1991 }, { "epoch": 0.6226945920600188, "grad_norm": 0.228515625, "learning_rate": 0.00018837460839065515, "loss": 1.9063, "step": 1992 }, { "epoch": 0.6230071897467959, "grad_norm": 0.25390625, "learning_rate": 0.0001883631075154229, "loss": 2.1289, "step": 1993 }, { "epoch": 0.623319787433573, "grad_norm": 0.23828125, "learning_rate": 0.0001883516013055973, "loss": 2.0025, "step": 1994 }, { "epoch": 0.6236323851203501, "grad_norm": 0.2353515625, "learning_rate": 0.0001883400897618729, "loss": 1.8512, "step": 1995 }, { "epoch": 0.6239449828071272, "grad_norm": 0.236328125, "learning_rate": 0.0001883285728849447, "loss": 1.8326, "step": 1996 }, { "epoch": 0.6242575804939043, "grad_norm": 0.224609375, "learning_rate": 0.00018831705067550805, "loss": 1.6852, "step": 1997 }, { "epoch": 0.6245701781806815, "grad_norm": 0.2197265625, "learning_rate": 0.00018830552313425845, "loss": 1.8256, "step": 1998 }, { "epoch": 0.6248827758674586, "grad_norm": 0.23046875, "learning_rate": 0.0001882939902618919, "loss": 1.6083, "step": 1999 }, { "epoch": 0.6251953735542357, "grad_norm": 0.224609375, "learning_rate": 0.00018828245205910465, "loss": 1.7561, "step": 2000 }, { "epoch": 0.6255079712410128, "grad_norm": 0.2421875, "learning_rate": 0.0001882709085265933, "loss": 1.7635, "step": 2001 }, { "epoch": 0.6258205689277899, "grad_norm": 0.2333984375, "learning_rate": 0.0001882593596650547, "loss": 1.8553, "step": 2002 }, { "epoch": 0.6261331666145671, "grad_norm": 0.2353515625, "learning_rate": 0.0001882478054751861, "loss": 1.6012, "step": 2003 }, { "epoch": 0.6264457643013441, "grad_norm": 0.2333984375, "learning_rate": 0.00018823624595768498, "loss": 1.8742, "step": 2004 }, { "epoch": 0.6267583619881213, "grad_norm": 0.23828125, "learning_rate": 0.0001882246811132493, "loss": 1.2608, "step": 2005 }, { "epoch": 0.6270709596748985, "grad_norm": 0.2216796875, "learning_rate": 0.00018821311094257716, "loss": 1.5808, "step": 2006 }, { "epoch": 0.6273835573616755, "grad_norm": 0.2392578125, "learning_rate": 0.00018820153544636713, "loss": 1.6451, "step": 2007 }, { "epoch": 0.6276961550484527, "grad_norm": 0.2392578125, "learning_rate": 0.000188189954625318, "loss": 1.6479, "step": 2008 }, { "epoch": 0.6280087527352297, "grad_norm": 0.23046875, "learning_rate": 0.0001881783684801289, "loss": 1.6755, "step": 2009 }, { "epoch": 0.6283213504220069, "grad_norm": 0.228515625, "learning_rate": 0.00018816677701149939, "loss": 1.6337, "step": 2010 }, { "epoch": 0.628633948108784, "grad_norm": 0.2373046875, "learning_rate": 0.00018815518022012915, "loss": 1.648, "step": 2011 }, { "epoch": 0.6289465457955611, "grad_norm": 0.234375, "learning_rate": 0.00018814357810671833, "loss": 1.586, "step": 2012 }, { "epoch": 0.6292591434823382, "grad_norm": 0.232421875, "learning_rate": 0.0001881319706719674, "loss": 1.5722, "step": 2013 }, { "epoch": 0.6295717411691153, "grad_norm": 0.251953125, "learning_rate": 0.0001881203579165771, "loss": 1.946, "step": 2014 }, { "epoch": 0.6298843388558925, "grad_norm": 0.228515625, "learning_rate": 0.0001881087398412485, "loss": 1.7869, "step": 2015 }, { "epoch": 0.6301969365426696, "grad_norm": 0.21875, "learning_rate": 0.000188097116446683, "loss": 1.7194, "step": 2016 }, { "epoch": 0.6305095342294467, "grad_norm": 0.2353515625, "learning_rate": 0.0001880854877335823, "loss": 2.0099, "step": 2017 }, { "epoch": 0.6308221319162238, "grad_norm": 0.228515625, "learning_rate": 0.00018807385370264848, "loss": 1.8415, "step": 2018 }, { "epoch": 0.631134729603001, "grad_norm": 0.2314453125, "learning_rate": 0.00018806221435458388, "loss": 1.6398, "step": 2019 }, { "epoch": 0.631447327289778, "grad_norm": 0.2216796875, "learning_rate": 0.00018805056969009115, "loss": 1.8436, "step": 2020 }, { "epoch": 0.6317599249765552, "grad_norm": 0.22265625, "learning_rate": 0.00018803891970987333, "loss": 1.5016, "step": 2021 }, { "epoch": 0.6320725226633322, "grad_norm": 0.2275390625, "learning_rate": 0.00018802726441463375, "loss": 1.5147, "step": 2022 }, { "epoch": 0.6323851203501094, "grad_norm": 0.2294921875, "learning_rate": 0.00018801560380507604, "loss": 1.5146, "step": 2023 }, { "epoch": 0.6326977180368866, "grad_norm": 0.224609375, "learning_rate": 0.00018800393788190415, "loss": 1.8504, "step": 2024 }, { "epoch": 0.6330103157236636, "grad_norm": 0.2333984375, "learning_rate": 0.00018799226664582245, "loss": 1.6024, "step": 2025 }, { "epoch": 0.6333229134104408, "grad_norm": 0.2294921875, "learning_rate": 0.00018798059009753542, "loss": 1.8456, "step": 2026 }, { "epoch": 0.6336355110972178, "grad_norm": 0.2236328125, "learning_rate": 0.00018796890823774806, "loss": 1.5829, "step": 2027 }, { "epoch": 0.633948108783995, "grad_norm": 0.2333984375, "learning_rate": 0.00018795722106716562, "loss": 1.8332, "step": 2028 }, { "epoch": 0.6342607064707722, "grad_norm": 0.2255859375, "learning_rate": 0.00018794552858649366, "loss": 1.8867, "step": 2029 }, { "epoch": 0.6345733041575492, "grad_norm": 0.23828125, "learning_rate": 0.00018793383079643804, "loss": 1.7046, "step": 2030 }, { "epoch": 0.6348859018443264, "grad_norm": 0.2353515625, "learning_rate": 0.00018792212769770507, "loss": 1.4539, "step": 2031 }, { "epoch": 0.6351984995311035, "grad_norm": 0.224609375, "learning_rate": 0.00018791041929100115, "loss": 1.7966, "step": 2032 }, { "epoch": 0.6355110972178806, "grad_norm": 0.2373046875, "learning_rate": 0.0001878987055770332, "loss": 1.7888, "step": 2033 }, { "epoch": 0.6358236949046577, "grad_norm": 0.24609375, "learning_rate": 0.0001878869865565084, "loss": 1.5578, "step": 2034 }, { "epoch": 0.6361362925914348, "grad_norm": 0.228515625, "learning_rate": 0.0001878752622301342, "loss": 1.7211, "step": 2035 }, { "epoch": 0.6364488902782119, "grad_norm": 0.228515625, "learning_rate": 0.00018786353259861847, "loss": 1.5837, "step": 2036 }, { "epoch": 0.6367614879649891, "grad_norm": 0.2333984375, "learning_rate": 0.0001878517976626693, "loss": 1.6654, "step": 2037 }, { "epoch": 0.6370740856517662, "grad_norm": 0.2255859375, "learning_rate": 0.00018784005742299514, "loss": 1.9085, "step": 2038 }, { "epoch": 0.6373866833385433, "grad_norm": 0.275390625, "learning_rate": 0.0001878283118803048, "loss": 1.6215, "step": 2039 }, { "epoch": 0.6376992810253204, "grad_norm": 0.240234375, "learning_rate": 0.00018781656103530737, "loss": 1.9168, "step": 2040 }, { "epoch": 0.6380118787120975, "grad_norm": 0.224609375, "learning_rate": 0.0001878048048887122, "loss": 1.8944, "step": 2041 }, { "epoch": 0.6383244763988747, "grad_norm": 0.2275390625, "learning_rate": 0.00018779304344122908, "loss": 1.7528, "step": 2042 }, { "epoch": 0.6386370740856517, "grad_norm": 0.228515625, "learning_rate": 0.00018778127669356805, "loss": 1.8204, "step": 2043 }, { "epoch": 0.6389496717724289, "grad_norm": 0.2314453125, "learning_rate": 0.0001877695046464395, "loss": 1.7069, "step": 2044 }, { "epoch": 0.6392622694592061, "grad_norm": 0.2197265625, "learning_rate": 0.0001877577273005541, "loss": 1.3533, "step": 2045 }, { "epoch": 0.6395748671459831, "grad_norm": 0.22265625, "learning_rate": 0.00018774594465662288, "loss": 1.6023, "step": 2046 }, { "epoch": 0.6398874648327603, "grad_norm": 0.2255859375, "learning_rate": 0.00018773415671535714, "loss": 1.9426, "step": 2047 }, { "epoch": 0.6402000625195373, "grad_norm": 0.2216796875, "learning_rate": 0.00018772236347746856, "loss": 1.7982, "step": 2048 }, { "epoch": 0.6405126602063145, "grad_norm": 0.2255859375, "learning_rate": 0.00018771056494366913, "loss": 1.7041, "step": 2049 }, { "epoch": 0.6408252578930916, "grad_norm": 0.2216796875, "learning_rate": 0.00018769876111467113, "loss": 1.7406, "step": 2050 }, { "epoch": 0.6411378555798687, "grad_norm": 0.240234375, "learning_rate": 0.00018768695199118717, "loss": 1.6077, "step": 2051 }, { "epoch": 0.6414504532666458, "grad_norm": 0.2373046875, "learning_rate": 0.00018767513757393016, "loss": 1.7813, "step": 2052 }, { "epoch": 0.6417630509534229, "grad_norm": 0.2294921875, "learning_rate": 0.00018766331786361338, "loss": 1.6976, "step": 2053 }, { "epoch": 0.6420756486402001, "grad_norm": 0.2421875, "learning_rate": 0.00018765149286095037, "loss": 1.6368, "step": 2054 }, { "epoch": 0.6423882463269772, "grad_norm": 0.2353515625, "learning_rate": 0.00018763966256665505, "loss": 1.6045, "step": 2055 }, { "epoch": 0.6427008440137543, "grad_norm": 0.22265625, "learning_rate": 0.00018762782698144163, "loss": 1.5185, "step": 2056 }, { "epoch": 0.6430134417005314, "grad_norm": 0.23828125, "learning_rate": 0.00018761598610602463, "loss": 1.5806, "step": 2057 }, { "epoch": 0.6433260393873085, "grad_norm": 0.2470703125, "learning_rate": 0.0001876041399411189, "loss": 1.6609, "step": 2058 }, { "epoch": 0.6436386370740856, "grad_norm": 0.2197265625, "learning_rate": 0.0001875922884874396, "loss": 1.6643, "step": 2059 }, { "epoch": 0.6439512347608628, "grad_norm": 0.236328125, "learning_rate": 0.00018758043174570222, "loss": 1.5697, "step": 2060 }, { "epoch": 0.6442638324476399, "grad_norm": 0.22265625, "learning_rate": 0.00018756856971662258, "loss": 1.6761, "step": 2061 }, { "epoch": 0.644576430134417, "grad_norm": 0.2255859375, "learning_rate": 0.00018755670240091677, "loss": 1.5763, "step": 2062 }, { "epoch": 0.6448890278211942, "grad_norm": 0.240234375, "learning_rate": 0.0001875448297993013, "loss": 1.7233, "step": 2063 }, { "epoch": 0.6452016255079712, "grad_norm": 0.2353515625, "learning_rate": 0.00018753295191249286, "loss": 1.623, "step": 2064 }, { "epoch": 0.6455142231947484, "grad_norm": 0.2236328125, "learning_rate": 0.00018752106874120862, "loss": 1.5065, "step": 2065 }, { "epoch": 0.6458268208815254, "grad_norm": 0.251953125, "learning_rate": 0.0001875091802861659, "loss": 2.0689, "step": 2066 }, { "epoch": 0.6461394185683026, "grad_norm": 0.2216796875, "learning_rate": 0.00018749728654808242, "loss": 1.7316, "step": 2067 }, { "epoch": 0.6464520162550798, "grad_norm": 0.224609375, "learning_rate": 0.0001874853875276763, "loss": 1.7759, "step": 2068 }, { "epoch": 0.6467646139418568, "grad_norm": 0.2236328125, "learning_rate": 0.00018747348322566582, "loss": 1.6177, "step": 2069 }, { "epoch": 0.647077211628634, "grad_norm": 0.244140625, "learning_rate": 0.0001874615736427697, "loss": 1.8813, "step": 2070 }, { "epoch": 0.647389809315411, "grad_norm": 0.2255859375, "learning_rate": 0.00018744965877970696, "loss": 1.6428, "step": 2071 }, { "epoch": 0.6477024070021882, "grad_norm": 0.2890625, "learning_rate": 0.00018743773863719683, "loss": 2.3381, "step": 2072 }, { "epoch": 0.6480150046889653, "grad_norm": 0.2265625, "learning_rate": 0.00018742581321595902, "loss": 1.4568, "step": 2073 }, { "epoch": 0.6483276023757424, "grad_norm": 0.220703125, "learning_rate": 0.00018741388251671345, "loss": 1.5651, "step": 2074 }, { "epoch": 0.6486402000625195, "grad_norm": 0.2421875, "learning_rate": 0.0001874019465401804, "loss": 1.8459, "step": 2075 }, { "epoch": 0.6489527977492967, "grad_norm": 0.2265625, "learning_rate": 0.00018739000528708046, "loss": 1.6691, "step": 2076 }, { "epoch": 0.6492653954360738, "grad_norm": 0.2255859375, "learning_rate": 0.00018737805875813454, "loss": 1.8378, "step": 2077 }, { "epoch": 0.6495779931228509, "grad_norm": 0.2353515625, "learning_rate": 0.00018736610695406386, "loss": 1.8245, "step": 2078 }, { "epoch": 0.649890590809628, "grad_norm": 0.2373046875, "learning_rate": 0.00018735414987559, "loss": 1.7107, "step": 2079 }, { "epoch": 0.6502031884964051, "grad_norm": 0.2333984375, "learning_rate": 0.00018734218752343478, "loss": 1.7694, "step": 2080 }, { "epoch": 0.6505157861831823, "grad_norm": 0.2236328125, "learning_rate": 0.00018733021989832035, "loss": 1.7134, "step": 2081 }, { "epoch": 0.6508283838699593, "grad_norm": 0.216796875, "learning_rate": 0.00018731824700096933, "loss": 1.8064, "step": 2082 }, { "epoch": 0.6511409815567365, "grad_norm": 0.23828125, "learning_rate": 0.00018730626883210443, "loss": 1.694, "step": 2083 }, { "epoch": 0.6514535792435135, "grad_norm": 0.2158203125, "learning_rate": 0.00018729428539244884, "loss": 1.7573, "step": 2084 }, { "epoch": 0.6517661769302907, "grad_norm": 0.228515625, "learning_rate": 0.00018728229668272598, "loss": 1.6263, "step": 2085 }, { "epoch": 0.6520787746170679, "grad_norm": 0.2158203125, "learning_rate": 0.00018727030270365965, "loss": 1.846, "step": 2086 }, { "epoch": 0.6523913723038449, "grad_norm": 0.244140625, "learning_rate": 0.00018725830345597396, "loss": 1.7912, "step": 2087 }, { "epoch": 0.6527039699906221, "grad_norm": 0.2236328125, "learning_rate": 0.0001872462989403933, "loss": 1.777, "step": 2088 }, { "epoch": 0.6530165676773992, "grad_norm": 0.2216796875, "learning_rate": 0.00018723428915764237, "loss": 1.675, "step": 2089 }, { "epoch": 0.6533291653641763, "grad_norm": 0.2314453125, "learning_rate": 0.00018722227410844625, "loss": 1.5869, "step": 2090 }, { "epoch": 0.6536417630509535, "grad_norm": 0.244140625, "learning_rate": 0.00018721025379353026, "loss": 1.8295, "step": 2091 }, { "epoch": 0.6539543607377305, "grad_norm": 0.23046875, "learning_rate": 0.00018719822821362017, "loss": 1.6437, "step": 2092 }, { "epoch": 0.6542669584245077, "grad_norm": 0.2421875, "learning_rate": 0.0001871861973694419, "loss": 1.8373, "step": 2093 }, { "epoch": 0.6545795561112848, "grad_norm": 0.2353515625, "learning_rate": 0.00018717416126172177, "loss": 1.3641, "step": 2094 }, { "epoch": 0.6548921537980619, "grad_norm": 0.2373046875, "learning_rate": 0.00018716211989118646, "loss": 1.7446, "step": 2095 }, { "epoch": 0.655204751484839, "grad_norm": 0.234375, "learning_rate": 0.00018715007325856292, "loss": 1.7373, "step": 2096 }, { "epoch": 0.6555173491716161, "grad_norm": 0.23828125, "learning_rate": 0.00018713802136457837, "loss": 1.6263, "step": 2097 }, { "epoch": 0.6558299468583932, "grad_norm": 0.23046875, "learning_rate": 0.00018712596420996045, "loss": 1.7508, "step": 2098 }, { "epoch": 0.6561425445451704, "grad_norm": 0.232421875, "learning_rate": 0.00018711390179543703, "loss": 1.8481, "step": 2099 }, { "epoch": 0.6564551422319475, "grad_norm": 0.232421875, "learning_rate": 0.00018710183412173635, "loss": 1.7739, "step": 2100 }, { "epoch": 0.6567677399187246, "grad_norm": 0.2265625, "learning_rate": 0.00018708976118958693, "loss": 1.989, "step": 2101 }, { "epoch": 0.6570803376055018, "grad_norm": 0.2216796875, "learning_rate": 0.0001870776829997177, "loss": 1.8054, "step": 2102 }, { "epoch": 0.6573929352922788, "grad_norm": 0.2265625, "learning_rate": 0.00018706559955285773, "loss": 1.665, "step": 2103 }, { "epoch": 0.657705532979056, "grad_norm": 0.22265625, "learning_rate": 0.0001870535108497366, "loss": 1.703, "step": 2104 }, { "epoch": 0.658018130665833, "grad_norm": 0.2333984375, "learning_rate": 0.0001870414168910841, "loss": 1.7818, "step": 2105 }, { "epoch": 0.6583307283526102, "grad_norm": 0.2275390625, "learning_rate": 0.00018702931767763028, "loss": 1.5893, "step": 2106 }, { "epoch": 0.6586433260393874, "grad_norm": 0.2236328125, "learning_rate": 0.0001870172132101057, "loss": 1.6743, "step": 2107 }, { "epoch": 0.6589559237261644, "grad_norm": 0.2255859375, "learning_rate": 0.00018700510348924106, "loss": 1.5062, "step": 2108 }, { "epoch": 0.6592685214129416, "grad_norm": 0.2314453125, "learning_rate": 0.00018699298851576743, "loss": 1.4517, "step": 2109 }, { "epoch": 0.6595811190997186, "grad_norm": 0.23828125, "learning_rate": 0.00018698086829041627, "loss": 1.7555, "step": 2110 }, { "epoch": 0.6598937167864958, "grad_norm": 0.2333984375, "learning_rate": 0.0001869687428139192, "loss": 1.7701, "step": 2111 }, { "epoch": 0.6602063144732729, "grad_norm": 0.228515625, "learning_rate": 0.00018695661208700836, "loss": 1.5693, "step": 2112 }, { "epoch": 0.66051891216005, "grad_norm": 0.2265625, "learning_rate": 0.000186944476110416, "loss": 1.473, "step": 2113 }, { "epoch": 0.6608315098468271, "grad_norm": 0.2353515625, "learning_rate": 0.00018693233488487483, "loss": 1.4396, "step": 2114 }, { "epoch": 0.6611441075336042, "grad_norm": 0.236328125, "learning_rate": 0.00018692018841111782, "loss": 1.9964, "step": 2115 }, { "epoch": 0.6614567052203814, "grad_norm": 0.2255859375, "learning_rate": 0.00018690803668987827, "loss": 1.6639, "step": 2116 }, { "epoch": 0.6617693029071585, "grad_norm": 0.2353515625, "learning_rate": 0.0001868958797218898, "loss": 1.7607, "step": 2117 }, { "epoch": 0.6620819005939356, "grad_norm": 0.2314453125, "learning_rate": 0.00018688371750788635, "loss": 1.5137, "step": 2118 }, { "epoch": 0.6623944982807127, "grad_norm": 0.21875, "learning_rate": 0.00018687155004860215, "loss": 1.5756, "step": 2119 }, { "epoch": 0.6627070959674899, "grad_norm": 0.2314453125, "learning_rate": 0.00018685937734477177, "loss": 1.7926, "step": 2120 }, { "epoch": 0.6630196936542669, "grad_norm": 0.2333984375, "learning_rate": 0.0001868471993971301, "loss": 1.7269, "step": 2121 }, { "epoch": 0.6633322913410441, "grad_norm": 0.232421875, "learning_rate": 0.0001868350162064123, "loss": 1.6515, "step": 2122 }, { "epoch": 0.6636448890278211, "grad_norm": 0.2255859375, "learning_rate": 0.00018682282777335397, "loss": 1.5462, "step": 2123 }, { "epoch": 0.6639574867145983, "grad_norm": 0.2470703125, "learning_rate": 0.00018681063409869085, "loss": 1.7719, "step": 2124 }, { "epoch": 0.6642700844013755, "grad_norm": 0.2294921875, "learning_rate": 0.00018679843518315913, "loss": 1.9495, "step": 2125 }, { "epoch": 0.6645826820881525, "grad_norm": 0.232421875, "learning_rate": 0.0001867862310274953, "loss": 1.5323, "step": 2126 }, { "epoch": 0.6648952797749297, "grad_norm": 0.2470703125, "learning_rate": 0.00018677402163243606, "loss": 1.5997, "step": 2127 }, { "epoch": 0.6652078774617067, "grad_norm": 0.2265625, "learning_rate": 0.0001867618069987186, "loss": 1.891, "step": 2128 }, { "epoch": 0.6655204751484839, "grad_norm": 0.2275390625, "learning_rate": 0.00018674958712708027, "loss": 1.7805, "step": 2129 }, { "epoch": 0.665833072835261, "grad_norm": 0.228515625, "learning_rate": 0.00018673736201825882, "loss": 1.7896, "step": 2130 }, { "epoch": 0.6661456705220381, "grad_norm": 0.2314453125, "learning_rate": 0.0001867251316729923, "loss": 1.8483, "step": 2131 }, { "epoch": 0.6664582682088153, "grad_norm": 0.234375, "learning_rate": 0.00018671289609201907, "loss": 1.8642, "step": 2132 }, { "epoch": 0.6667708658955924, "grad_norm": 0.2294921875, "learning_rate": 0.0001867006552760778, "loss": 1.4944, "step": 2133 }, { "epoch": 0.6670834635823695, "grad_norm": 0.2265625, "learning_rate": 0.00018668840922590746, "loss": 1.4096, "step": 2134 }, { "epoch": 0.6673960612691466, "grad_norm": 0.2216796875, "learning_rate": 0.00018667615794224743, "loss": 1.8447, "step": 2135 }, { "epoch": 0.6677086589559237, "grad_norm": 0.228515625, "learning_rate": 0.00018666390142583724, "loss": 1.7672, "step": 2136 }, { "epoch": 0.6680212566427008, "grad_norm": 0.224609375, "learning_rate": 0.00018665163967741694, "loss": 1.4677, "step": 2137 }, { "epoch": 0.668333854329478, "grad_norm": 0.248046875, "learning_rate": 0.0001866393726977267, "loss": 1.9113, "step": 2138 }, { "epoch": 0.668646452016255, "grad_norm": 0.2294921875, "learning_rate": 0.00018662710048750712, "loss": 1.6074, "step": 2139 }, { "epoch": 0.6689590497030322, "grad_norm": 0.2412109375, "learning_rate": 0.00018661482304749915, "loss": 1.9865, "step": 2140 }, { "epoch": 0.6692716473898093, "grad_norm": 0.234375, "learning_rate": 0.00018660254037844388, "loss": 1.5433, "step": 2141 }, { "epoch": 0.6695842450765864, "grad_norm": 0.2412109375, "learning_rate": 0.00018659025248108288, "loss": 1.7213, "step": 2142 }, { "epoch": 0.6698968427633636, "grad_norm": 0.220703125, "learning_rate": 0.00018657795935615802, "loss": 1.7668, "step": 2143 }, { "epoch": 0.6702094404501406, "grad_norm": 0.240234375, "learning_rate": 0.00018656566100441144, "loss": 1.7344, "step": 2144 }, { "epoch": 0.6705220381369178, "grad_norm": 0.21875, "learning_rate": 0.00018655335742658556, "loss": 1.6451, "step": 2145 }, { "epoch": 0.670834635823695, "grad_norm": 0.224609375, "learning_rate": 0.00018654104862342324, "loss": 1.6888, "step": 2146 }, { "epoch": 0.671147233510472, "grad_norm": 0.2294921875, "learning_rate": 0.00018652873459566749, "loss": 1.426, "step": 2147 }, { "epoch": 0.6714598311972492, "grad_norm": 0.2353515625, "learning_rate": 0.00018651641534406178, "loss": 1.6177, "step": 2148 }, { "epoch": 0.6717724288840262, "grad_norm": 0.240234375, "learning_rate": 0.00018650409086934985, "loss": 1.6962, "step": 2149 }, { "epoch": 0.6720850265708034, "grad_norm": 0.2275390625, "learning_rate": 0.0001864917611722757, "loss": 1.6879, "step": 2150 }, { "epoch": 0.6723976242575805, "grad_norm": 0.22265625, "learning_rate": 0.0001864794262535837, "loss": 1.9992, "step": 2151 }, { "epoch": 0.6727102219443576, "grad_norm": 0.23046875, "learning_rate": 0.0001864670861140186, "loss": 1.9401, "step": 2152 }, { "epoch": 0.6730228196311347, "grad_norm": 0.2421875, "learning_rate": 0.00018645474075432524, "loss": 1.8057, "step": 2153 }, { "epoch": 0.6733354173179118, "grad_norm": 0.2412109375, "learning_rate": 0.00018644239017524906, "loss": 2.0631, "step": 2154 }, { "epoch": 0.673648015004689, "grad_norm": 0.234375, "learning_rate": 0.00018643003437753558, "loss": 1.6794, "step": 2155 }, { "epoch": 0.6739606126914661, "grad_norm": 0.2236328125, "learning_rate": 0.00018641767336193086, "loss": 1.7738, "step": 2156 }, { "epoch": 0.6742732103782432, "grad_norm": 0.240234375, "learning_rate": 0.000186405307129181, "loss": 1.8517, "step": 2157 }, { "epoch": 0.6745858080650203, "grad_norm": 0.2353515625, "learning_rate": 0.00018639293568003268, "loss": 1.5776, "step": 2158 }, { "epoch": 0.6748984057517975, "grad_norm": 0.2236328125, "learning_rate": 0.00018638055901523277, "loss": 1.6955, "step": 2159 }, { "epoch": 0.6752110034385745, "grad_norm": 0.23828125, "learning_rate": 0.00018636817713552837, "loss": 1.6111, "step": 2160 }, { "epoch": 0.6755236011253517, "grad_norm": 0.2421875, "learning_rate": 0.00018635579004166712, "loss": 1.8155, "step": 2161 }, { "epoch": 0.6758361988121288, "grad_norm": 0.2177734375, "learning_rate": 0.00018634339773439674, "loss": 1.6656, "step": 2162 }, { "epoch": 0.6761487964989059, "grad_norm": 0.2314453125, "learning_rate": 0.0001863310002144654, "loss": 1.5922, "step": 2163 }, { "epoch": 0.6764613941856831, "grad_norm": 0.220703125, "learning_rate": 0.0001863185974826216, "loss": 1.7238, "step": 2164 }, { "epoch": 0.6767739918724601, "grad_norm": 0.2431640625, "learning_rate": 0.00018630618953961408, "loss": 1.6582, "step": 2165 }, { "epoch": 0.6770865895592373, "grad_norm": 0.283203125, "learning_rate": 0.0001862937763861919, "loss": 2.3931, "step": 2166 }, { "epoch": 0.6773991872460143, "grad_norm": 0.2294921875, "learning_rate": 0.00018628135802310446, "loss": 1.7434, "step": 2167 }, { "epoch": 0.6777117849327915, "grad_norm": 0.251953125, "learning_rate": 0.0001862689344511015, "loss": 2.0366, "step": 2168 }, { "epoch": 0.6780243826195687, "grad_norm": 0.232421875, "learning_rate": 0.000186256505670933, "loss": 1.6197, "step": 2169 }, { "epoch": 0.6783369803063457, "grad_norm": 0.22265625, "learning_rate": 0.0001862440716833494, "loss": 1.5561, "step": 2170 }, { "epoch": 0.6786495779931229, "grad_norm": 0.248046875, "learning_rate": 0.00018623163248910127, "loss": 1.8304, "step": 2171 }, { "epoch": 0.6789621756798999, "grad_norm": 0.2255859375, "learning_rate": 0.00018621918808893958, "loss": 1.3873, "step": 2172 }, { "epoch": 0.6792747733666771, "grad_norm": 0.23046875, "learning_rate": 0.00018620673848361566, "loss": 1.4493, "step": 2173 }, { "epoch": 0.6795873710534542, "grad_norm": 0.25390625, "learning_rate": 0.00018619428367388103, "loss": 1.7057, "step": 2174 }, { "epoch": 0.6798999687402313, "grad_norm": 0.232421875, "learning_rate": 0.0001861818236604877, "loss": 1.5443, "step": 2175 }, { "epoch": 0.6802125664270084, "grad_norm": 0.228515625, "learning_rate": 0.00018616935844418785, "loss": 1.651, "step": 2176 }, { "epoch": 0.6805251641137856, "grad_norm": 0.2470703125, "learning_rate": 0.000186156888025734, "loss": 1.7987, "step": 2177 }, { "epoch": 0.6808377618005627, "grad_norm": 0.234375, "learning_rate": 0.00018614441240587907, "loss": 1.8154, "step": 2178 }, { "epoch": 0.6811503594873398, "grad_norm": 0.232421875, "learning_rate": 0.0001861319315853762, "loss": 1.7168, "step": 2179 }, { "epoch": 0.6814629571741169, "grad_norm": 0.234375, "learning_rate": 0.0001861194455649788, "loss": 1.4816, "step": 2180 }, { "epoch": 0.681775554860894, "grad_norm": 0.2236328125, "learning_rate": 0.00018610695434544074, "loss": 1.5243, "step": 2181 }, { "epoch": 0.6820881525476712, "grad_norm": 0.2255859375, "learning_rate": 0.00018609445792751618, "loss": 1.7344, "step": 2182 }, { "epoch": 0.6824007502344482, "grad_norm": 0.228515625, "learning_rate": 0.00018608195631195939, "loss": 1.8136, "step": 2183 }, { "epoch": 0.6827133479212254, "grad_norm": 0.2353515625, "learning_rate": 0.00018606944949952524, "loss": 1.7538, "step": 2184 }, { "epoch": 0.6830259456080024, "grad_norm": 0.236328125, "learning_rate": 0.00018605693749096876, "loss": 1.8747, "step": 2185 }, { "epoch": 0.6833385432947796, "grad_norm": 0.2353515625, "learning_rate": 0.00018604442028704533, "loss": 1.6926, "step": 2186 }, { "epoch": 0.6836511409815568, "grad_norm": 0.228515625, "learning_rate": 0.00018603189788851055, "loss": 1.7869, "step": 2187 }, { "epoch": 0.6839637386683338, "grad_norm": 0.2236328125, "learning_rate": 0.00018601937029612048, "loss": 1.6719, "step": 2188 }, { "epoch": 0.684276336355111, "grad_norm": 0.23828125, "learning_rate": 0.0001860068375106314, "loss": 1.7719, "step": 2189 }, { "epoch": 0.6845889340418881, "grad_norm": 0.2265625, "learning_rate": 0.00018599429953279994, "loss": 1.618, "step": 2190 }, { "epoch": 0.6849015317286652, "grad_norm": 0.2294921875, "learning_rate": 0.00018598175636338305, "loss": 1.7768, "step": 2191 }, { "epoch": 0.6852141294154424, "grad_norm": 0.234375, "learning_rate": 0.00018596920800313798, "loss": 1.9978, "step": 2192 }, { "epoch": 0.6855267271022194, "grad_norm": 0.22265625, "learning_rate": 0.0001859566544528222, "loss": 1.3867, "step": 2193 }, { "epoch": 0.6858393247889966, "grad_norm": 0.2294921875, "learning_rate": 0.0001859440957131937, "loss": 1.5844, "step": 2194 }, { "epoch": 0.6861519224757737, "grad_norm": 0.234375, "learning_rate": 0.00018593153178501063, "loss": 1.7227, "step": 2195 }, { "epoch": 0.6864645201625508, "grad_norm": 0.25390625, "learning_rate": 0.0001859189626690315, "loss": 1.8812, "step": 2196 }, { "epoch": 0.6867771178493279, "grad_norm": 0.2236328125, "learning_rate": 0.00018590638836601505, "loss": 1.5477, "step": 2197 }, { "epoch": 0.687089715536105, "grad_norm": 0.2255859375, "learning_rate": 0.0001858938088767205, "loss": 1.8684, "step": 2198 }, { "epoch": 0.6874023132228821, "grad_norm": 0.236328125, "learning_rate": 0.00018588122420190722, "loss": 1.8864, "step": 2199 }, { "epoch": 0.6877149109096593, "grad_norm": 0.2333984375, "learning_rate": 0.00018586863434233504, "loss": 1.7888, "step": 2200 }, { "epoch": 0.6880275085964364, "grad_norm": 0.2353515625, "learning_rate": 0.00018585603929876395, "loss": 1.6452, "step": 2201 }, { "epoch": 0.6883401062832135, "grad_norm": 0.2373046875, "learning_rate": 0.00018584343907195437, "loss": 1.585, "step": 2202 }, { "epoch": 0.6886527039699907, "grad_norm": 0.23828125, "learning_rate": 0.000185830833662667, "loss": 1.7144, "step": 2203 }, { "epoch": 0.6889653016567677, "grad_norm": 0.2275390625, "learning_rate": 0.00018581822307166281, "loss": 1.7379, "step": 2204 }, { "epoch": 0.6892778993435449, "grad_norm": 0.2333984375, "learning_rate": 0.00018580560729970313, "loss": 1.777, "step": 2205 }, { "epoch": 0.6895904970303219, "grad_norm": 0.2451171875, "learning_rate": 0.00018579298634754962, "loss": 1.902, "step": 2206 }, { "epoch": 0.6899030947170991, "grad_norm": 0.220703125, "learning_rate": 0.00018578036021596415, "loss": 1.6602, "step": 2207 }, { "epoch": 0.6902156924038763, "grad_norm": 0.2412109375, "learning_rate": 0.00018576772890570905, "loss": 1.8837, "step": 2208 }, { "epoch": 0.6905282900906533, "grad_norm": 0.251953125, "learning_rate": 0.00018575509241754685, "loss": 1.6694, "step": 2209 }, { "epoch": 0.6908408877774305, "grad_norm": 0.2294921875, "learning_rate": 0.00018574245075224046, "loss": 1.7201, "step": 2210 }, { "epoch": 0.6911534854642075, "grad_norm": 0.2392578125, "learning_rate": 0.00018572980391055305, "loss": 1.4998, "step": 2211 }, { "epoch": 0.6914660831509847, "grad_norm": 0.2255859375, "learning_rate": 0.00018571715189324813, "loss": 1.4607, "step": 2212 }, { "epoch": 0.6917786808377618, "grad_norm": 0.2158203125, "learning_rate": 0.00018570449470108952, "loss": 1.8028, "step": 2213 }, { "epoch": 0.6920912785245389, "grad_norm": 0.234375, "learning_rate": 0.00018569183233484133, "loss": 1.5558, "step": 2214 }, { "epoch": 0.692403876211316, "grad_norm": 0.2275390625, "learning_rate": 0.00018567916479526804, "loss": 1.5834, "step": 2215 }, { "epoch": 0.6927164738980931, "grad_norm": 0.232421875, "learning_rate": 0.0001856664920831344, "loss": 1.6607, "step": 2216 }, { "epoch": 0.6930290715848703, "grad_norm": 0.236328125, "learning_rate": 0.00018565381419920546, "loss": 1.5378, "step": 2217 }, { "epoch": 0.6933416692716474, "grad_norm": 0.2412109375, "learning_rate": 0.00018564113114424662, "loss": 1.8949, "step": 2218 }, { "epoch": 0.6936542669584245, "grad_norm": 0.234375, "learning_rate": 0.00018562844291902353, "loss": 1.9261, "step": 2219 }, { "epoch": 0.6939668646452016, "grad_norm": 0.2353515625, "learning_rate": 0.00018561574952430222, "loss": 2.0413, "step": 2220 }, { "epoch": 0.6942794623319788, "grad_norm": 0.2216796875, "learning_rate": 0.00018560305096084904, "loss": 1.7628, "step": 2221 }, { "epoch": 0.6945920600187558, "grad_norm": 0.240234375, "learning_rate": 0.00018559034722943056, "loss": 1.6226, "step": 2222 }, { "epoch": 0.694904657705533, "grad_norm": 0.22265625, "learning_rate": 0.00018557763833081377, "loss": 1.8693, "step": 2223 }, { "epoch": 0.69521725539231, "grad_norm": 0.2216796875, "learning_rate": 0.0001855649242657659, "loss": 1.4996, "step": 2224 }, { "epoch": 0.6955298530790872, "grad_norm": 0.25390625, "learning_rate": 0.00018555220503505452, "loss": 2.2346, "step": 2225 }, { "epoch": 0.6958424507658644, "grad_norm": 0.2255859375, "learning_rate": 0.00018553948063944749, "loss": 1.773, "step": 2226 }, { "epoch": 0.6961550484526414, "grad_norm": 0.2373046875, "learning_rate": 0.000185526751079713, "loss": 1.8362, "step": 2227 }, { "epoch": 0.6964676461394186, "grad_norm": 0.2373046875, "learning_rate": 0.00018551401635661958, "loss": 1.6007, "step": 2228 }, { "epoch": 0.6967802438261956, "grad_norm": 0.234375, "learning_rate": 0.00018550127647093601, "loss": 1.5875, "step": 2229 }, { "epoch": 0.6970928415129728, "grad_norm": 0.228515625, "learning_rate": 0.00018548853142343142, "loss": 1.7156, "step": 2230 }, { "epoch": 0.69740543919975, "grad_norm": 0.2412109375, "learning_rate": 0.00018547578121487528, "loss": 1.784, "step": 2231 }, { "epoch": 0.697718036886527, "grad_norm": 0.2333984375, "learning_rate": 0.00018546302584603727, "loss": 1.6756, "step": 2232 }, { "epoch": 0.6980306345733042, "grad_norm": 0.25, "learning_rate": 0.0001854502653176875, "loss": 1.8622, "step": 2233 }, { "epoch": 0.6983432322600813, "grad_norm": 0.2197265625, "learning_rate": 0.0001854374996305963, "loss": 1.383, "step": 2234 }, { "epoch": 0.6986558299468584, "grad_norm": 0.2314453125, "learning_rate": 0.0001854247287855344, "loss": 1.516, "step": 2235 }, { "epoch": 0.6989684276336355, "grad_norm": 0.2294921875, "learning_rate": 0.00018541195278327276, "loss": 1.5284, "step": 2236 }, { "epoch": 0.6992810253204126, "grad_norm": 0.2255859375, "learning_rate": 0.0001853991716245827, "loss": 1.4208, "step": 2237 }, { "epoch": 0.6995936230071897, "grad_norm": 0.2314453125, "learning_rate": 0.0001853863853102358, "loss": 1.8169, "step": 2238 }, { "epoch": 0.6999062206939669, "grad_norm": 0.23046875, "learning_rate": 0.000185373593841004, "loss": 1.686, "step": 2239 }, { "epoch": 0.700218818380744, "grad_norm": 0.234375, "learning_rate": 0.00018536079721765956, "loss": 1.4067, "step": 2240 }, { "epoch": 0.7005314160675211, "grad_norm": 0.228515625, "learning_rate": 0.00018534799544097505, "loss": 1.7239, "step": 2241 }, { "epoch": 0.7008440137542982, "grad_norm": 0.2255859375, "learning_rate": 0.00018533518851172325, "loss": 1.6176, "step": 2242 }, { "epoch": 0.7011566114410753, "grad_norm": 0.2333984375, "learning_rate": 0.0001853223764306774, "loss": 1.6086, "step": 2243 }, { "epoch": 0.7014692091278525, "grad_norm": 0.2255859375, "learning_rate": 0.00018530955919861096, "loss": 1.5131, "step": 2244 }, { "epoch": 0.7017818068146295, "grad_norm": 0.224609375, "learning_rate": 0.0001852967368162977, "loss": 1.685, "step": 2245 }, { "epoch": 0.7020944045014067, "grad_norm": 0.232421875, "learning_rate": 0.00018528390928451173, "loss": 1.8137, "step": 2246 }, { "epoch": 0.7024070021881839, "grad_norm": 0.2333984375, "learning_rate": 0.00018527107660402752, "loss": 1.7175, "step": 2247 }, { "epoch": 0.7027195998749609, "grad_norm": 0.2255859375, "learning_rate": 0.00018525823877561974, "loss": 1.6921, "step": 2248 }, { "epoch": 0.7030321975617381, "grad_norm": 0.23046875, "learning_rate": 0.0001852453958000634, "loss": 1.9215, "step": 2249 }, { "epoch": 0.7033447952485151, "grad_norm": 0.228515625, "learning_rate": 0.00018523254767813393, "loss": 1.5655, "step": 2250 }, { "epoch": 0.7036573929352923, "grad_norm": 0.2177734375, "learning_rate": 0.00018521969441060695, "loss": 1.6418, "step": 2251 }, { "epoch": 0.7039699906220694, "grad_norm": 0.240234375, "learning_rate": 0.0001852068359982584, "loss": 1.8771, "step": 2252 }, { "epoch": 0.7042825883088465, "grad_norm": 0.2412109375, "learning_rate": 0.00018519397244186458, "loss": 1.7217, "step": 2253 }, { "epoch": 0.7045951859956237, "grad_norm": 0.228515625, "learning_rate": 0.0001851811037422021, "loss": 1.8586, "step": 2254 }, { "epoch": 0.7049077836824007, "grad_norm": 0.2236328125, "learning_rate": 0.00018516822990004782, "loss": 1.5904, "step": 2255 }, { "epoch": 0.7052203813691779, "grad_norm": 0.2255859375, "learning_rate": 0.00018515535091617898, "loss": 1.6428, "step": 2256 }, { "epoch": 0.705532979055955, "grad_norm": 0.234375, "learning_rate": 0.0001851424667913731, "loss": 1.7164, "step": 2257 }, { "epoch": 0.7058455767427321, "grad_norm": 0.23046875, "learning_rate": 0.00018512957752640799, "loss": 1.7193, "step": 2258 }, { "epoch": 0.7061581744295092, "grad_norm": 0.2294921875, "learning_rate": 0.00018511668312206177, "loss": 1.5025, "step": 2259 }, { "epoch": 0.7064707721162864, "grad_norm": 0.2294921875, "learning_rate": 0.00018510378357911296, "loss": 1.612, "step": 2260 }, { "epoch": 0.7067833698030634, "grad_norm": 0.2412109375, "learning_rate": 0.00018509087889834031, "loss": 1.5849, "step": 2261 }, { "epoch": 0.7070959674898406, "grad_norm": 0.25, "learning_rate": 0.00018507796908052285, "loss": 1.6807, "step": 2262 }, { "epoch": 0.7074085651766177, "grad_norm": 0.228515625, "learning_rate": 0.00018506505412643995, "loss": 1.6728, "step": 2263 }, { "epoch": 0.7077211628633948, "grad_norm": 0.234375, "learning_rate": 0.00018505213403687137, "loss": 1.7322, "step": 2264 }, { "epoch": 0.708033760550172, "grad_norm": 0.2265625, "learning_rate": 0.00018503920881259703, "loss": 1.6204, "step": 2265 }, { "epoch": 0.708346358236949, "grad_norm": 0.228515625, "learning_rate": 0.00018502627845439732, "loss": 1.5918, "step": 2266 }, { "epoch": 0.7086589559237262, "grad_norm": 0.2421875, "learning_rate": 0.00018501334296305285, "loss": 1.8249, "step": 2267 }, { "epoch": 0.7089715536105032, "grad_norm": 0.2431640625, "learning_rate": 0.00018500040233934454, "loss": 1.974, "step": 2268 }, { "epoch": 0.7092841512972804, "grad_norm": 0.2255859375, "learning_rate": 0.00018498745658405356, "loss": 1.6999, "step": 2269 }, { "epoch": 0.7095967489840576, "grad_norm": 0.23046875, "learning_rate": 0.00018497450569796158, "loss": 1.9307, "step": 2270 }, { "epoch": 0.7099093466708346, "grad_norm": 0.240234375, "learning_rate": 0.00018496154968185036, "loss": 1.7392, "step": 2271 }, { "epoch": 0.7102219443576118, "grad_norm": 0.2353515625, "learning_rate": 0.00018494858853650213, "loss": 1.7068, "step": 2272 }, { "epoch": 0.7105345420443888, "grad_norm": 0.232421875, "learning_rate": 0.0001849356222626994, "loss": 1.8758, "step": 2273 }, { "epoch": 0.710847139731166, "grad_norm": 0.236328125, "learning_rate": 0.00018492265086122488, "loss": 1.6345, "step": 2274 }, { "epoch": 0.7111597374179431, "grad_norm": 0.24609375, "learning_rate": 0.0001849096743328617, "loss": 1.7491, "step": 2275 }, { "epoch": 0.7114723351047202, "grad_norm": 0.2265625, "learning_rate": 0.0001848966926783933, "loss": 1.5166, "step": 2276 }, { "epoch": 0.7117849327914973, "grad_norm": 0.234375, "learning_rate": 0.0001848837058986034, "loss": 1.7068, "step": 2277 }, { "epoch": 0.7120975304782745, "grad_norm": 0.23046875, "learning_rate": 0.00018487071399427599, "loss": 1.7652, "step": 2278 }, { "epoch": 0.7124101281650516, "grad_norm": 0.2392578125, "learning_rate": 0.00018485771696619542, "loss": 1.7871, "step": 2279 }, { "epoch": 0.7127227258518287, "grad_norm": 0.228515625, "learning_rate": 0.00018484471481514635, "loss": 1.9055, "step": 2280 }, { "epoch": 0.7130353235386058, "grad_norm": 0.2275390625, "learning_rate": 0.0001848317075419137, "loss": 1.8693, "step": 2281 }, { "epoch": 0.7133479212253829, "grad_norm": 0.2294921875, "learning_rate": 0.00018481869514728279, "loss": 1.548, "step": 2282 }, { "epoch": 0.7136605189121601, "grad_norm": 0.2392578125, "learning_rate": 0.00018480567763203918, "loss": 1.614, "step": 2283 }, { "epoch": 0.7139731165989371, "grad_norm": 0.2314453125, "learning_rate": 0.0001847926549969687, "loss": 1.4828, "step": 2284 }, { "epoch": 0.7142857142857143, "grad_norm": 0.2294921875, "learning_rate": 0.00018477962724285763, "loss": 1.8229, "step": 2285 }, { "epoch": 0.7145983119724914, "grad_norm": 0.2314453125, "learning_rate": 0.00018476659437049238, "loss": 1.877, "step": 2286 }, { "epoch": 0.7149109096592685, "grad_norm": 0.23828125, "learning_rate": 0.00018475355638065984, "loss": 1.5996, "step": 2287 }, { "epoch": 0.7152235073460457, "grad_norm": 0.2373046875, "learning_rate": 0.00018474051327414709, "loss": 1.6033, "step": 2288 }, { "epoch": 0.7155361050328227, "grad_norm": 0.248046875, "learning_rate": 0.00018472746505174156, "loss": 1.6509, "step": 2289 }, { "epoch": 0.7158487027195999, "grad_norm": 0.2353515625, "learning_rate": 0.00018471441171423103, "loss": 1.8609, "step": 2290 }, { "epoch": 0.716161300406377, "grad_norm": 0.224609375, "learning_rate": 0.00018470135326240347, "loss": 1.8864, "step": 2291 }, { "epoch": 0.7164738980931541, "grad_norm": 0.2412109375, "learning_rate": 0.0001846882896970473, "loss": 1.5743, "step": 2292 }, { "epoch": 0.7167864957799313, "grad_norm": 0.2314453125, "learning_rate": 0.00018467522101895116, "loss": 1.8124, "step": 2293 }, { "epoch": 0.7170990934667083, "grad_norm": 0.2373046875, "learning_rate": 0.00018466214722890402, "loss": 1.4247, "step": 2294 }, { "epoch": 0.7174116911534855, "grad_norm": 0.2275390625, "learning_rate": 0.00018464906832769517, "loss": 1.5627, "step": 2295 }, { "epoch": 0.7177242888402626, "grad_norm": 0.2314453125, "learning_rate": 0.0001846359843161142, "loss": 1.8247, "step": 2296 }, { "epoch": 0.7180368865270397, "grad_norm": 0.234375, "learning_rate": 0.000184622895194951, "loss": 1.6003, "step": 2297 }, { "epoch": 0.7183494842138168, "grad_norm": 0.236328125, "learning_rate": 0.0001846098009649958, "loss": 1.6546, "step": 2298 }, { "epoch": 0.7186620819005939, "grad_norm": 0.23046875, "learning_rate": 0.00018459670162703905, "loss": 1.8521, "step": 2299 }, { "epoch": 0.718974679587371, "grad_norm": 0.2421875, "learning_rate": 0.00018458359718187165, "loss": 1.7397, "step": 2300 }, { "epoch": 0.7192872772741482, "grad_norm": 0.232421875, "learning_rate": 0.0001845704876302847, "loss": 1.7336, "step": 2301 }, { "epoch": 0.7195998749609253, "grad_norm": 0.2490234375, "learning_rate": 0.00018455737297306963, "loss": 1.6112, "step": 2302 }, { "epoch": 0.7199124726477024, "grad_norm": 0.2275390625, "learning_rate": 0.00018454425321101826, "loss": 1.8522, "step": 2303 }, { "epoch": 0.7202250703344796, "grad_norm": 0.236328125, "learning_rate": 0.0001845311283449225, "loss": 1.6348, "step": 2304 }, { "epoch": 0.7205376680212566, "grad_norm": 0.2275390625, "learning_rate": 0.00018451799837557485, "loss": 1.7101, "step": 2305 }, { "epoch": 0.7208502657080338, "grad_norm": 0.2392578125, "learning_rate": 0.00018450486330376793, "loss": 1.4738, "step": 2306 }, { "epoch": 0.7211628633948108, "grad_norm": 0.23828125, "learning_rate": 0.00018449172313029472, "loss": 1.6334, "step": 2307 }, { "epoch": 0.721475461081588, "grad_norm": 0.2373046875, "learning_rate": 0.00018447857785594852, "loss": 1.5218, "step": 2308 }, { "epoch": 0.7217880587683652, "grad_norm": 0.2421875, "learning_rate": 0.00018446542748152292, "loss": 1.8324, "step": 2309 }, { "epoch": 0.7221006564551422, "grad_norm": 0.234375, "learning_rate": 0.00018445227200781185, "loss": 1.8051, "step": 2310 }, { "epoch": 0.7224132541419194, "grad_norm": 0.228515625, "learning_rate": 0.0001844391114356095, "loss": 1.65, "step": 2311 }, { "epoch": 0.7227258518286964, "grad_norm": 0.2373046875, "learning_rate": 0.00018442594576571035, "loss": 1.8499, "step": 2312 }, { "epoch": 0.7230384495154736, "grad_norm": 0.2353515625, "learning_rate": 0.0001844127749989093, "loss": 1.671, "step": 2313 }, { "epoch": 0.7233510472022507, "grad_norm": 0.22265625, "learning_rate": 0.0001843995991360014, "loss": 1.7405, "step": 2314 }, { "epoch": 0.7236636448890278, "grad_norm": 0.2451171875, "learning_rate": 0.0001843864181777822, "loss": 1.8025, "step": 2315 }, { "epoch": 0.723976242575805, "grad_norm": 0.244140625, "learning_rate": 0.00018437323212504742, "loss": 1.5695, "step": 2316 }, { "epoch": 0.7242888402625821, "grad_norm": 0.22265625, "learning_rate": 0.00018436004097859308, "loss": 1.2384, "step": 2317 }, { "epoch": 0.7246014379493592, "grad_norm": 0.2265625, "learning_rate": 0.00018434684473921556, "loss": 1.6555, "step": 2318 }, { "epoch": 0.7249140356361363, "grad_norm": 0.2294921875, "learning_rate": 0.00018433364340771153, "loss": 1.6447, "step": 2319 }, { "epoch": 0.7252266333229134, "grad_norm": 0.232421875, "learning_rate": 0.00018432043698487797, "loss": 1.6859, "step": 2320 }, { "epoch": 0.7255392310096905, "grad_norm": 0.232421875, "learning_rate": 0.0001843072254715122, "loss": 1.7087, "step": 2321 }, { "epoch": 0.7258518286964677, "grad_norm": 0.2353515625, "learning_rate": 0.0001842940088684118, "loss": 1.7149, "step": 2322 }, { "epoch": 0.7261644263832447, "grad_norm": 0.2294921875, "learning_rate": 0.00018428078717637467, "loss": 1.8408, "step": 2323 }, { "epoch": 0.7264770240700219, "grad_norm": 0.2294921875, "learning_rate": 0.000184267560396199, "loss": 1.7943, "step": 2324 }, { "epoch": 0.726789621756799, "grad_norm": 0.2333984375, "learning_rate": 0.00018425432852868333, "loss": 1.7252, "step": 2325 }, { "epoch": 0.7271022194435761, "grad_norm": 0.2392578125, "learning_rate": 0.0001842410915746265, "loss": 1.6914, "step": 2326 }, { "epoch": 0.7274148171303533, "grad_norm": 0.232421875, "learning_rate": 0.0001842278495348276, "loss": 1.9011, "step": 2327 }, { "epoch": 0.7277274148171303, "grad_norm": 0.236328125, "learning_rate": 0.00018421460241008607, "loss": 1.8245, "step": 2328 }, { "epoch": 0.7280400125039075, "grad_norm": 0.2451171875, "learning_rate": 0.00018420135020120172, "loss": 1.8638, "step": 2329 }, { "epoch": 0.7283526101906845, "grad_norm": 0.2294921875, "learning_rate": 0.00018418809290897455, "loss": 1.7493, "step": 2330 }, { "epoch": 0.7286652078774617, "grad_norm": 0.2353515625, "learning_rate": 0.0001841748305342049, "loss": 1.5843, "step": 2331 }, { "epoch": 0.7289778055642389, "grad_norm": 0.236328125, "learning_rate": 0.0001841615630776935, "loss": 1.5289, "step": 2332 }, { "epoch": 0.7292904032510159, "grad_norm": 0.240234375, "learning_rate": 0.00018414829054024128, "loss": 1.6851, "step": 2333 }, { "epoch": 0.7296030009377931, "grad_norm": 0.21875, "learning_rate": 0.0001841350129226495, "loss": 1.3236, "step": 2334 }, { "epoch": 0.7299155986245702, "grad_norm": 0.2431640625, "learning_rate": 0.00018412173022571982, "loss": 1.9465, "step": 2335 }, { "epoch": 0.7302281963113473, "grad_norm": 0.2412109375, "learning_rate": 0.00018410844245025408, "loss": 1.7362, "step": 2336 }, { "epoch": 0.7305407939981244, "grad_norm": 0.228515625, "learning_rate": 0.00018409514959705448, "loss": 1.7688, "step": 2337 }, { "epoch": 0.7308533916849015, "grad_norm": 0.24609375, "learning_rate": 0.0001840818516669235, "loss": 1.658, "step": 2338 }, { "epoch": 0.7311659893716786, "grad_norm": 0.2373046875, "learning_rate": 0.00018406854866066403, "loss": 1.6786, "step": 2339 }, { "epoch": 0.7314785870584558, "grad_norm": 0.23828125, "learning_rate": 0.00018405524057907915, "loss": 1.6658, "step": 2340 }, { "epoch": 0.7317911847452329, "grad_norm": 0.2578125, "learning_rate": 0.0001840419274229723, "loss": 1.6022, "step": 2341 }, { "epoch": 0.73210378243201, "grad_norm": 0.2294921875, "learning_rate": 0.00018402860919314713, "loss": 1.7735, "step": 2342 }, { "epoch": 0.7324163801187871, "grad_norm": 0.2275390625, "learning_rate": 0.0001840152858904078, "loss": 1.4977, "step": 2343 }, { "epoch": 0.7327289778055642, "grad_norm": 0.236328125, "learning_rate": 0.00018400195751555858, "loss": 1.7735, "step": 2344 }, { "epoch": 0.7330415754923414, "grad_norm": 0.2421875, "learning_rate": 0.00018398862406940412, "loss": 1.5705, "step": 2345 }, { "epoch": 0.7333541731791184, "grad_norm": 0.228515625, "learning_rate": 0.00018397528555274943, "loss": 1.9914, "step": 2346 }, { "epoch": 0.7336667708658956, "grad_norm": 0.224609375, "learning_rate": 0.00018396194196639972, "loss": 1.6567, "step": 2347 }, { "epoch": 0.7339793685526728, "grad_norm": 0.2255859375, "learning_rate": 0.0001839485933111606, "loss": 1.5779, "step": 2348 }, { "epoch": 0.7342919662394498, "grad_norm": 0.2294921875, "learning_rate": 0.00018393523958783788, "loss": 1.6902, "step": 2349 }, { "epoch": 0.734604563926227, "grad_norm": 0.23046875, "learning_rate": 0.00018392188079723786, "loss": 1.8415, "step": 2350 }, { "epoch": 0.734917161613004, "grad_norm": 0.2421875, "learning_rate": 0.0001839085169401669, "loss": 1.7724, "step": 2351 }, { "epoch": 0.7352297592997812, "grad_norm": 0.23046875, "learning_rate": 0.00018389514801743186, "loss": 1.4619, "step": 2352 }, { "epoch": 0.7355423569865583, "grad_norm": 0.2392578125, "learning_rate": 0.00018388177402983984, "loss": 1.7035, "step": 2353 }, { "epoch": 0.7358549546733354, "grad_norm": 0.23828125, "learning_rate": 0.00018386839497819821, "loss": 1.6311, "step": 2354 }, { "epoch": 0.7361675523601126, "grad_norm": 0.2353515625, "learning_rate": 0.00018385501086331472, "loss": 1.4891, "step": 2355 }, { "epoch": 0.7364801500468896, "grad_norm": 0.23046875, "learning_rate": 0.00018384162168599735, "loss": 1.7706, "step": 2356 }, { "epoch": 0.7367927477336668, "grad_norm": 0.259765625, "learning_rate": 0.00018382822744705444, "loss": 1.7342, "step": 2357 }, { "epoch": 0.7371053454204439, "grad_norm": 0.2451171875, "learning_rate": 0.0001838148281472946, "loss": 1.7338, "step": 2358 }, { "epoch": 0.737417943107221, "grad_norm": 0.244140625, "learning_rate": 0.0001838014237875268, "loss": 1.6715, "step": 2359 }, { "epoch": 0.7377305407939981, "grad_norm": 0.2294921875, "learning_rate": 0.00018378801436856027, "loss": 1.8231, "step": 2360 }, { "epoch": 0.7380431384807753, "grad_norm": 0.25, "learning_rate": 0.00018377459989120452, "loss": 1.6681, "step": 2361 }, { "epoch": 0.7383557361675523, "grad_norm": 0.2265625, "learning_rate": 0.00018376118035626942, "loss": 1.6599, "step": 2362 }, { "epoch": 0.7386683338543295, "grad_norm": 0.265625, "learning_rate": 0.00018374775576456513, "loss": 1.8036, "step": 2363 }, { "epoch": 0.7389809315411066, "grad_norm": 0.232421875, "learning_rate": 0.00018373432611690208, "loss": 1.8082, "step": 2364 }, { "epoch": 0.7392935292278837, "grad_norm": 0.2216796875, "learning_rate": 0.0001837208914140911, "loss": 1.4781, "step": 2365 }, { "epoch": 0.7396061269146609, "grad_norm": 0.2236328125, "learning_rate": 0.00018370745165694318, "loss": 1.3993, "step": 2366 }, { "epoch": 0.7399187246014379, "grad_norm": 0.23046875, "learning_rate": 0.00018369400684626976, "loss": 1.5936, "step": 2367 }, { "epoch": 0.7402313222882151, "grad_norm": 0.2265625, "learning_rate": 0.00018368055698288248, "loss": 1.4418, "step": 2368 }, { "epoch": 0.7405439199749921, "grad_norm": 0.2412109375, "learning_rate": 0.00018366710206759335, "loss": 1.5162, "step": 2369 }, { "epoch": 0.7408565176617693, "grad_norm": 0.2353515625, "learning_rate": 0.00018365364210121466, "loss": 1.9776, "step": 2370 }, { "epoch": 0.7411691153485465, "grad_norm": 0.2265625, "learning_rate": 0.00018364017708455895, "loss": 1.3729, "step": 2371 }, { "epoch": 0.7414817130353235, "grad_norm": 0.2353515625, "learning_rate": 0.0001836267070184392, "loss": 1.6031, "step": 2372 }, { "epoch": 0.7417943107221007, "grad_norm": 0.251953125, "learning_rate": 0.0001836132319036686, "loss": 1.5944, "step": 2373 }, { "epoch": 0.7421069084088777, "grad_norm": 0.2353515625, "learning_rate": 0.0001835997517410606, "loss": 1.3653, "step": 2374 }, { "epoch": 0.7424195060956549, "grad_norm": 0.232421875, "learning_rate": 0.0001835862665314291, "loss": 1.7253, "step": 2375 }, { "epoch": 0.742732103782432, "grad_norm": 0.234375, "learning_rate": 0.00018357277627558815, "loss": 1.326, "step": 2376 }, { "epoch": 0.7430447014692091, "grad_norm": 0.2294921875, "learning_rate": 0.00018355928097435218, "loss": 1.8161, "step": 2377 }, { "epoch": 0.7433572991559863, "grad_norm": 0.2216796875, "learning_rate": 0.00018354578062853595, "loss": 1.8656, "step": 2378 }, { "epoch": 0.7436698968427634, "grad_norm": 0.23046875, "learning_rate": 0.0001835322752389545, "loss": 1.8657, "step": 2379 }, { "epoch": 0.7439824945295405, "grad_norm": 0.2275390625, "learning_rate": 0.0001835187648064231, "loss": 1.5715, "step": 2380 }, { "epoch": 0.7442950922163176, "grad_norm": 0.2392578125, "learning_rate": 0.0001835052493317575, "loss": 1.7185, "step": 2381 }, { "epoch": 0.7446076899030947, "grad_norm": 0.234375, "learning_rate": 0.00018349172881577356, "loss": 1.7779, "step": 2382 }, { "epoch": 0.7449202875898718, "grad_norm": 0.2275390625, "learning_rate": 0.00018347820325928754, "loss": 1.9479, "step": 2383 }, { "epoch": 0.745232885276649, "grad_norm": 0.234375, "learning_rate": 0.00018346467266311604, "loss": 1.7667, "step": 2384 }, { "epoch": 0.745545482963426, "grad_norm": 0.248046875, "learning_rate": 0.00018345113702807585, "loss": 1.4014, "step": 2385 }, { "epoch": 0.7458580806502032, "grad_norm": 0.240234375, "learning_rate": 0.00018343759635498422, "loss": 1.8576, "step": 2386 }, { "epoch": 0.7461706783369803, "grad_norm": 0.23828125, "learning_rate": 0.00018342405064465856, "loss": 1.6006, "step": 2387 }, { "epoch": 0.7464832760237574, "grad_norm": 0.2333984375, "learning_rate": 0.00018341049989791666, "loss": 1.5874, "step": 2388 }, { "epoch": 0.7467958737105346, "grad_norm": 0.251953125, "learning_rate": 0.00018339694411557655, "loss": 1.6729, "step": 2389 }, { "epoch": 0.7471084713973116, "grad_norm": 0.2333984375, "learning_rate": 0.00018338338329845668, "loss": 1.5282, "step": 2390 }, { "epoch": 0.7474210690840888, "grad_norm": 0.2373046875, "learning_rate": 0.00018336981744737573, "loss": 1.5829, "step": 2391 }, { "epoch": 0.747733666770866, "grad_norm": 0.2373046875, "learning_rate": 0.0001833562465631526, "loss": 1.6278, "step": 2392 }, { "epoch": 0.748046264457643, "grad_norm": 0.2412109375, "learning_rate": 0.00018334267064660668, "loss": 1.6944, "step": 2393 }, { "epoch": 0.7483588621444202, "grad_norm": 0.2353515625, "learning_rate": 0.00018332908969855753, "loss": 1.8641, "step": 2394 }, { "epoch": 0.7486714598311972, "grad_norm": 0.2421875, "learning_rate": 0.00018331550371982505, "loss": 1.6727, "step": 2395 }, { "epoch": 0.7489840575179744, "grad_norm": 0.2314453125, "learning_rate": 0.00018330191271122943, "loss": 1.6077, "step": 2396 }, { "epoch": 0.7492966552047515, "grad_norm": 0.244140625, "learning_rate": 0.0001832883166735912, "loss": 1.4713, "step": 2397 }, { "epoch": 0.7496092528915286, "grad_norm": 0.248046875, "learning_rate": 0.00018327471560773112, "loss": 1.9724, "step": 2398 }, { "epoch": 0.7499218505783057, "grad_norm": 0.2470703125, "learning_rate": 0.00018326110951447037, "loss": 1.852, "step": 2399 }, { "epoch": 0.7502344482650828, "grad_norm": 0.2353515625, "learning_rate": 0.00018324749839463035, "loss": 1.7013, "step": 2400 }, { "epoch": 0.75054704595186, "grad_norm": 0.234375, "learning_rate": 0.00018323388224903274, "loss": 2.0012, "step": 2401 }, { "epoch": 0.7508596436386371, "grad_norm": 0.2421875, "learning_rate": 0.0001832202610784996, "loss": 1.7133, "step": 2402 }, { "epoch": 0.7511722413254142, "grad_norm": 0.2392578125, "learning_rate": 0.00018320663488385327, "loss": 1.7841, "step": 2403 }, { "epoch": 0.7514848390121913, "grad_norm": 0.2275390625, "learning_rate": 0.00018319300366591637, "loss": 1.8134, "step": 2404 }, { "epoch": 0.7517974366989685, "grad_norm": 0.23046875, "learning_rate": 0.00018317936742551178, "loss": 1.5865, "step": 2405 }, { "epoch": 0.7521100343857455, "grad_norm": 0.2333984375, "learning_rate": 0.0001831657261634628, "loss": 1.6447, "step": 2406 }, { "epoch": 0.7524226320725227, "grad_norm": 0.2451171875, "learning_rate": 0.00018315207988059298, "loss": 1.4747, "step": 2407 }, { "epoch": 0.7527352297592997, "grad_norm": 0.2451171875, "learning_rate": 0.0001831384285777261, "loss": 1.7538, "step": 2408 }, { "epoch": 0.7530478274460769, "grad_norm": 0.2275390625, "learning_rate": 0.00018312477225568635, "loss": 1.6004, "step": 2409 }, { "epoch": 0.7533604251328541, "grad_norm": 0.25, "learning_rate": 0.00018311111091529818, "loss": 1.6864, "step": 2410 }, { "epoch": 0.7536730228196311, "grad_norm": 0.2412109375, "learning_rate": 0.00018309744455738633, "loss": 1.8215, "step": 2411 }, { "epoch": 0.7539856205064083, "grad_norm": 0.236328125, "learning_rate": 0.00018308377318277587, "loss": 1.672, "step": 2412 }, { "epoch": 0.7542982181931853, "grad_norm": 0.2451171875, "learning_rate": 0.0001830700967922921, "loss": 1.7247, "step": 2413 }, { "epoch": 0.7546108158799625, "grad_norm": 0.24609375, "learning_rate": 0.00018305641538676079, "loss": 1.6188, "step": 2414 }, { "epoch": 0.7549234135667396, "grad_norm": 0.251953125, "learning_rate": 0.00018304272896700784, "loss": 1.8593, "step": 2415 }, { "epoch": 0.7552360112535167, "grad_norm": 0.2490234375, "learning_rate": 0.0001830290375338595, "loss": 1.5332, "step": 2416 }, { "epoch": 0.7555486089402939, "grad_norm": 0.224609375, "learning_rate": 0.00018301534108814234, "loss": 1.5756, "step": 2417 }, { "epoch": 0.755861206627071, "grad_norm": 0.244140625, "learning_rate": 0.0001830016396306833, "loss": 2.0714, "step": 2418 }, { "epoch": 0.7561738043138481, "grad_norm": 0.2451171875, "learning_rate": 0.00018298793316230948, "loss": 1.64, "step": 2419 }, { "epoch": 0.7564864020006252, "grad_norm": 0.220703125, "learning_rate": 0.00018297422168384836, "loss": 1.5317, "step": 2420 }, { "epoch": 0.7567989996874023, "grad_norm": 0.2451171875, "learning_rate": 0.00018296050519612777, "loss": 1.8879, "step": 2421 }, { "epoch": 0.7571115973741794, "grad_norm": 0.2421875, "learning_rate": 0.00018294678369997578, "loss": 1.7005, "step": 2422 }, { "epoch": 0.7574241950609566, "grad_norm": 0.376953125, "learning_rate": 0.00018293305719622072, "loss": 2.244, "step": 2423 }, { "epoch": 0.7577367927477336, "grad_norm": 0.2431640625, "learning_rate": 0.00018291932568569134, "loss": 1.5323, "step": 2424 }, { "epoch": 0.7580493904345108, "grad_norm": 0.2451171875, "learning_rate": 0.00018290558916921659, "loss": 1.6395, "step": 2425 }, { "epoch": 0.7583619881212879, "grad_norm": 0.25, "learning_rate": 0.00018289184764762575, "loss": 1.648, "step": 2426 }, { "epoch": 0.758674585808065, "grad_norm": 0.234375, "learning_rate": 0.0001828781011217485, "loss": 1.5622, "step": 2427 }, { "epoch": 0.7589871834948422, "grad_norm": 0.2275390625, "learning_rate": 0.00018286434959241462, "loss": 1.5481, "step": 2428 }, { "epoch": 0.7592997811816192, "grad_norm": 0.2294921875, "learning_rate": 0.00018285059306045437, "loss": 1.77, "step": 2429 }, { "epoch": 0.7596123788683964, "grad_norm": 0.2314453125, "learning_rate": 0.00018283683152669824, "loss": 1.4071, "step": 2430 }, { "epoch": 0.7599249765551734, "grad_norm": 0.2490234375, "learning_rate": 0.00018282306499197703, "loss": 2.0644, "step": 2431 }, { "epoch": 0.7602375742419506, "grad_norm": 0.2421875, "learning_rate": 0.00018280929345712186, "loss": 1.7075, "step": 2432 }, { "epoch": 0.7605501719287278, "grad_norm": 0.2353515625, "learning_rate": 0.0001827955169229641, "loss": 1.3107, "step": 2433 }, { "epoch": 0.7608627696155048, "grad_norm": 0.23046875, "learning_rate": 0.00018278173539033548, "loss": 1.7646, "step": 2434 }, { "epoch": 0.761175367302282, "grad_norm": 0.23828125, "learning_rate": 0.00018276794886006804, "loss": 2.0252, "step": 2435 }, { "epoch": 0.7614879649890591, "grad_norm": 0.2294921875, "learning_rate": 0.00018275415733299402, "loss": 1.5208, "step": 2436 }, { "epoch": 0.7618005626758362, "grad_norm": 0.2412109375, "learning_rate": 0.00018274036080994605, "loss": 1.8906, "step": 2437 }, { "epoch": 0.7621131603626133, "grad_norm": 0.2275390625, "learning_rate": 0.00018272655929175708, "loss": 1.8472, "step": 2438 }, { "epoch": 0.7624257580493904, "grad_norm": 0.2392578125, "learning_rate": 0.0001827127527792603, "loss": 1.7364, "step": 2439 }, { "epoch": 0.7627383557361676, "grad_norm": 0.2373046875, "learning_rate": 0.00018269894127328926, "loss": 1.8149, "step": 2440 }, { "epoch": 0.7630509534229447, "grad_norm": 0.232421875, "learning_rate": 0.00018268512477467774, "loss": 1.8335, "step": 2441 }, { "epoch": 0.7633635511097218, "grad_norm": 0.2353515625, "learning_rate": 0.00018267130328425985, "loss": 1.7762, "step": 2442 }, { "epoch": 0.7636761487964989, "grad_norm": 0.2265625, "learning_rate": 0.00018265747680287008, "loss": 1.5251, "step": 2443 }, { "epoch": 0.763988746483276, "grad_norm": 0.2333984375, "learning_rate": 0.00018264364533134304, "loss": 1.5232, "step": 2444 }, { "epoch": 0.7643013441700531, "grad_norm": 0.2333984375, "learning_rate": 0.00018262980887051385, "loss": 1.5101, "step": 2445 }, { "epoch": 0.7646139418568303, "grad_norm": 0.2275390625, "learning_rate": 0.00018261596742121777, "loss": 1.6831, "step": 2446 }, { "epoch": 0.7649265395436073, "grad_norm": 0.24609375, "learning_rate": 0.00018260212098429054, "loss": 1.8748, "step": 2447 }, { "epoch": 0.7652391372303845, "grad_norm": 0.2451171875, "learning_rate": 0.00018258826956056793, "loss": 1.7539, "step": 2448 }, { "epoch": 0.7655517349171617, "grad_norm": 0.2412109375, "learning_rate": 0.00018257441315088627, "loss": 1.5779, "step": 2449 }, { "epoch": 0.7658643326039387, "grad_norm": 0.2421875, "learning_rate": 0.00018256055175608205, "loss": 1.7147, "step": 2450 }, { "epoch": 0.7661769302907159, "grad_norm": 0.2265625, "learning_rate": 0.00018254668537699212, "loss": 1.682, "step": 2451 }, { "epoch": 0.7664895279774929, "grad_norm": 0.2412109375, "learning_rate": 0.0001825328140144536, "loss": 1.8002, "step": 2452 }, { "epoch": 0.7668021256642701, "grad_norm": 0.234375, "learning_rate": 0.0001825189376693039, "loss": 1.3419, "step": 2453 }, { "epoch": 0.7671147233510472, "grad_norm": 0.24609375, "learning_rate": 0.0001825050563423808, "loss": 1.7038, "step": 2454 }, { "epoch": 0.7674273210378243, "grad_norm": 0.232421875, "learning_rate": 0.00018249117003452234, "loss": 1.6925, "step": 2455 }, { "epoch": 0.7677399187246015, "grad_norm": 0.2333984375, "learning_rate": 0.00018247727874656683, "loss": 1.7601, "step": 2456 }, { "epoch": 0.7680525164113785, "grad_norm": 0.23828125, "learning_rate": 0.00018246338247935285, "loss": 1.6095, "step": 2457 }, { "epoch": 0.7683651140981557, "grad_norm": 0.2431640625, "learning_rate": 0.0001824494812337194, "loss": 1.4805, "step": 2458 }, { "epoch": 0.7686777117849328, "grad_norm": 0.23046875, "learning_rate": 0.00018243557501050573, "loss": 1.6642, "step": 2459 }, { "epoch": 0.7689903094717099, "grad_norm": 0.2431640625, "learning_rate": 0.00018242166381055133, "loss": 1.4541, "step": 2460 }, { "epoch": 0.769302907158487, "grad_norm": 0.2470703125, "learning_rate": 0.00018240774763469606, "loss": 1.5884, "step": 2461 }, { "epoch": 0.7696155048452642, "grad_norm": 0.224609375, "learning_rate": 0.00018239382648378006, "loss": 1.6074, "step": 2462 }, { "epoch": 0.7699281025320412, "grad_norm": 0.232421875, "learning_rate": 0.00018237990035864372, "loss": 1.7759, "step": 2463 }, { "epoch": 0.7702407002188184, "grad_norm": 0.2236328125, "learning_rate": 0.00018236596926012787, "loss": 1.6379, "step": 2464 }, { "epoch": 0.7705532979055955, "grad_norm": 0.248046875, "learning_rate": 0.00018235203318907347, "loss": 1.7159, "step": 2465 }, { "epoch": 0.7708658955923726, "grad_norm": 0.251953125, "learning_rate": 0.00018233809214632184, "loss": 1.6911, "step": 2466 }, { "epoch": 0.7711784932791498, "grad_norm": 0.2392578125, "learning_rate": 0.00018232414613271475, "loss": 1.422, "step": 2467 }, { "epoch": 0.7714910909659268, "grad_norm": 0.2314453125, "learning_rate": 0.00018231019514909397, "loss": 1.551, "step": 2468 }, { "epoch": 0.771803688652704, "grad_norm": 0.2412109375, "learning_rate": 0.00018229623919630188, "loss": 1.8121, "step": 2469 }, { "epoch": 0.772116286339481, "grad_norm": 0.2265625, "learning_rate": 0.00018228227827518095, "loss": 1.9086, "step": 2470 }, { "epoch": 0.7724288840262582, "grad_norm": 0.330078125, "learning_rate": 0.000182268312386574, "loss": 2.5265, "step": 2471 }, { "epoch": 0.7727414817130354, "grad_norm": 0.234375, "learning_rate": 0.0001822543415313242, "loss": 1.8133, "step": 2472 }, { "epoch": 0.7730540793998124, "grad_norm": 0.255859375, "learning_rate": 0.00018224036571027501, "loss": 1.9486, "step": 2473 }, { "epoch": 0.7733666770865896, "grad_norm": 0.240234375, "learning_rate": 0.0001822263849242701, "loss": 1.7464, "step": 2474 }, { "epoch": 0.7736792747733667, "grad_norm": 0.2431640625, "learning_rate": 0.0001822123991741536, "loss": 1.743, "step": 2475 }, { "epoch": 0.7739918724601438, "grad_norm": 0.2353515625, "learning_rate": 0.00018219840846076977, "loss": 1.4856, "step": 2476 }, { "epoch": 0.7743044701469209, "grad_norm": 0.2451171875, "learning_rate": 0.00018218441278496328, "loss": 1.7813, "step": 2477 }, { "epoch": 0.774617067833698, "grad_norm": 0.234375, "learning_rate": 0.00018217041214757903, "loss": 1.7274, "step": 2478 }, { "epoch": 0.7749296655204752, "grad_norm": 0.2333984375, "learning_rate": 0.00018215640654946233, "loss": 1.5569, "step": 2479 }, { "epoch": 0.7752422632072523, "grad_norm": 0.2353515625, "learning_rate": 0.00018214239599145866, "loss": 1.5575, "step": 2480 }, { "epoch": 0.7755548608940294, "grad_norm": 0.2294921875, "learning_rate": 0.00018212838047441387, "loss": 1.5972, "step": 2481 }, { "epoch": 0.7758674585808065, "grad_norm": 0.2275390625, "learning_rate": 0.0001821143599991741, "loss": 1.668, "step": 2482 }, { "epoch": 0.7761800562675836, "grad_norm": 0.2373046875, "learning_rate": 0.00018210033456658576, "loss": 1.646, "step": 2483 }, { "epoch": 0.7764926539543607, "grad_norm": 0.26171875, "learning_rate": 0.00018208630417749561, "loss": 2.3322, "step": 2484 }, { "epoch": 0.7768052516411379, "grad_norm": 0.2353515625, "learning_rate": 0.00018207226883275069, "loss": 1.5657, "step": 2485 }, { "epoch": 0.777117849327915, "grad_norm": 0.2275390625, "learning_rate": 0.0001820582285331983, "loss": 1.4964, "step": 2486 }, { "epoch": 0.7774304470146921, "grad_norm": 0.2333984375, "learning_rate": 0.00018204418327968607, "loss": 1.5711, "step": 2487 }, { "epoch": 0.7777430447014692, "grad_norm": 0.26171875, "learning_rate": 0.00018203013307306195, "loss": 1.999, "step": 2488 }, { "epoch": 0.7780556423882463, "grad_norm": 0.244140625, "learning_rate": 0.00018201607791417418, "loss": 1.5581, "step": 2489 }, { "epoch": 0.7783682400750235, "grad_norm": 0.23046875, "learning_rate": 0.00018200201780387126, "loss": 1.5618, "step": 2490 }, { "epoch": 0.7786808377618005, "grad_norm": 0.251953125, "learning_rate": 0.00018198795274300205, "loss": 1.6855, "step": 2491 }, { "epoch": 0.7789934354485777, "grad_norm": 0.23046875, "learning_rate": 0.00018197388273241563, "loss": 1.4388, "step": 2492 }, { "epoch": 0.7793060331353548, "grad_norm": 0.23046875, "learning_rate": 0.00018195980777296146, "loss": 1.3961, "step": 2493 }, { "epoch": 0.7796186308221319, "grad_norm": 0.2373046875, "learning_rate": 0.00018194572786548924, "loss": 1.3543, "step": 2494 }, { "epoch": 0.7799312285089091, "grad_norm": 0.2451171875, "learning_rate": 0.00018193164301084905, "loss": 1.6291, "step": 2495 }, { "epoch": 0.7802438261956861, "grad_norm": 0.244140625, "learning_rate": 0.00018191755320989112, "loss": 1.8612, "step": 2496 }, { "epoch": 0.7805564238824633, "grad_norm": 0.2431640625, "learning_rate": 0.00018190345846346613, "loss": 1.507, "step": 2497 }, { "epoch": 0.7808690215692404, "grad_norm": 0.2392578125, "learning_rate": 0.00018188935877242496, "loss": 1.4034, "step": 2498 }, { "epoch": 0.7811816192560175, "grad_norm": 0.2431640625, "learning_rate": 0.00018187525413761887, "loss": 1.3682, "step": 2499 }, { "epoch": 0.7814942169427946, "grad_norm": 0.25, "learning_rate": 0.00018186114455989936, "loss": 1.3907, "step": 2500 }, { "epoch": 0.7818068146295717, "grad_norm": 0.251953125, "learning_rate": 0.00018184703004011822, "loss": 1.506, "step": 2501 }, { "epoch": 0.7821194123163488, "grad_norm": 0.2294921875, "learning_rate": 0.00018183291057912758, "loss": 1.6376, "step": 2502 }, { "epoch": 0.782432010003126, "grad_norm": 0.23046875, "learning_rate": 0.00018181878617777985, "loss": 1.6524, "step": 2503 }, { "epoch": 0.7827446076899031, "grad_norm": 0.251953125, "learning_rate": 0.00018180465683692774, "loss": 1.6575, "step": 2504 }, { "epoch": 0.7830572053766802, "grad_norm": 0.2353515625, "learning_rate": 0.00018179052255742423, "loss": 1.6608, "step": 2505 }, { "epoch": 0.7833698030634574, "grad_norm": 0.2255859375, "learning_rate": 0.00018177638334012267, "loss": 1.7274, "step": 2506 }, { "epoch": 0.7836824007502344, "grad_norm": 0.2451171875, "learning_rate": 0.00018176223918587664, "loss": 1.7459, "step": 2507 }, { "epoch": 0.7839949984370116, "grad_norm": 0.232421875, "learning_rate": 0.00018174809009554005, "loss": 1.366, "step": 2508 }, { "epoch": 0.7843075961237886, "grad_norm": 0.234375, "learning_rate": 0.00018173393606996707, "loss": 1.7907, "step": 2509 }, { "epoch": 0.7846201938105658, "grad_norm": 0.240234375, "learning_rate": 0.0001817197771100122, "loss": 1.7705, "step": 2510 }, { "epoch": 0.784932791497343, "grad_norm": 0.248046875, "learning_rate": 0.00018170561321653026, "loss": 1.4995, "step": 2511 }, { "epoch": 0.78524538918412, "grad_norm": 0.24609375, "learning_rate": 0.00018169144439037632, "loss": 1.6226, "step": 2512 }, { "epoch": 0.7855579868708972, "grad_norm": 0.240234375, "learning_rate": 0.00018167727063240582, "loss": 1.619, "step": 2513 }, { "epoch": 0.7858705845576742, "grad_norm": 0.2275390625, "learning_rate": 0.00018166309194347438, "loss": 1.9021, "step": 2514 }, { "epoch": 0.7861831822444514, "grad_norm": 0.2275390625, "learning_rate": 0.000181648908324438, "loss": 1.9489, "step": 2515 }, { "epoch": 0.7864957799312285, "grad_norm": 0.2421875, "learning_rate": 0.00018163471977615303, "loss": 1.5399, "step": 2516 }, { "epoch": 0.7868083776180056, "grad_norm": 0.236328125, "learning_rate": 0.000181620526299476, "loss": 1.5515, "step": 2517 }, { "epoch": 0.7871209753047828, "grad_norm": 0.240234375, "learning_rate": 0.00018160632789526374, "loss": 1.4493, "step": 2518 }, { "epoch": 0.7874335729915599, "grad_norm": 0.2392578125, "learning_rate": 0.00018159212456437347, "loss": 1.6494, "step": 2519 }, { "epoch": 0.787746170678337, "grad_norm": 0.23828125, "learning_rate": 0.0001815779163076627, "loss": 1.7547, "step": 2520 }, { "epoch": 0.7880587683651141, "grad_norm": 0.2294921875, "learning_rate": 0.00018156370312598914, "loss": 1.7275, "step": 2521 }, { "epoch": 0.7883713660518912, "grad_norm": 0.2294921875, "learning_rate": 0.0001815494850202109, "loss": 1.3418, "step": 2522 }, { "epoch": 0.7886839637386683, "grad_norm": 0.2431640625, "learning_rate": 0.00018153526199118634, "loss": 1.5102, "step": 2523 }, { "epoch": 0.7889965614254455, "grad_norm": 0.248046875, "learning_rate": 0.0001815210340397741, "loss": 2.0452, "step": 2524 }, { "epoch": 0.7893091591122225, "grad_norm": 0.25, "learning_rate": 0.00018150680116683313, "loss": 1.5017, "step": 2525 }, { "epoch": 0.7896217567989997, "grad_norm": 0.2373046875, "learning_rate": 0.00018149256337322275, "loss": 2.0215, "step": 2526 }, { "epoch": 0.7899343544857768, "grad_norm": 0.2392578125, "learning_rate": 0.00018147832065980245, "loss": 1.7694, "step": 2527 }, { "epoch": 0.7902469521725539, "grad_norm": 0.240234375, "learning_rate": 0.00018146407302743208, "loss": 1.6186, "step": 2528 }, { "epoch": 0.7905595498593311, "grad_norm": 0.232421875, "learning_rate": 0.00018144982047697185, "loss": 1.7227, "step": 2529 }, { "epoch": 0.7908721475461081, "grad_norm": 0.2470703125, "learning_rate": 0.00018143556300928215, "loss": 1.6313, "step": 2530 }, { "epoch": 0.7911847452328853, "grad_norm": 0.232421875, "learning_rate": 0.00018142130062522377, "loss": 1.4294, "step": 2531 }, { "epoch": 0.7914973429196624, "grad_norm": 0.23828125, "learning_rate": 0.00018140703332565768, "loss": 1.5747, "step": 2532 }, { "epoch": 0.7918099406064395, "grad_norm": 0.25390625, "learning_rate": 0.00018139276111144525, "loss": 1.6087, "step": 2533 }, { "epoch": 0.7921225382932167, "grad_norm": 0.248046875, "learning_rate": 0.0001813784839834481, "loss": 1.6986, "step": 2534 }, { "epoch": 0.7924351359799937, "grad_norm": 0.251953125, "learning_rate": 0.00018136420194252818, "loss": 1.5952, "step": 2535 }, { "epoch": 0.7927477336667709, "grad_norm": 0.2392578125, "learning_rate": 0.00018134991498954773, "loss": 1.7808, "step": 2536 }, { "epoch": 0.793060331353548, "grad_norm": 0.2333984375, "learning_rate": 0.0001813356231253692, "loss": 1.518, "step": 2537 }, { "epoch": 0.7933729290403251, "grad_norm": 0.2421875, "learning_rate": 0.0001813213263508555, "loss": 1.82, "step": 2538 }, { "epoch": 0.7936855267271022, "grad_norm": 0.2353515625, "learning_rate": 0.0001813070246668697, "loss": 1.5595, "step": 2539 }, { "epoch": 0.7939981244138793, "grad_norm": 0.2333984375, "learning_rate": 0.00018129271807427517, "loss": 1.8371, "step": 2540 }, { "epoch": 0.7943107221006565, "grad_norm": 0.2265625, "learning_rate": 0.0001812784065739357, "loss": 1.5297, "step": 2541 }, { "epoch": 0.7946233197874336, "grad_norm": 0.236328125, "learning_rate": 0.0001812640901667152, "loss": 1.6262, "step": 2542 }, { "epoch": 0.7949359174742107, "grad_norm": 0.2373046875, "learning_rate": 0.00018124976885347806, "loss": 1.7128, "step": 2543 }, { "epoch": 0.7952485151609878, "grad_norm": 0.2314453125, "learning_rate": 0.00018123544263508884, "loss": 1.9219, "step": 2544 }, { "epoch": 0.7955611128477649, "grad_norm": 0.228515625, "learning_rate": 0.00018122111151241241, "loss": 1.5844, "step": 2545 }, { "epoch": 0.795873710534542, "grad_norm": 0.2333984375, "learning_rate": 0.000181206775486314, "loss": 1.806, "step": 2546 }, { "epoch": 0.7961863082213192, "grad_norm": 0.2470703125, "learning_rate": 0.00018119243455765903, "loss": 1.648, "step": 2547 }, { "epoch": 0.7964989059080962, "grad_norm": 0.251953125, "learning_rate": 0.00018117808872731336, "loss": 1.5256, "step": 2548 }, { "epoch": 0.7968115035948734, "grad_norm": 0.2421875, "learning_rate": 0.000181163737996143, "loss": 1.491, "step": 2549 }, { "epoch": 0.7971241012816506, "grad_norm": 0.2294921875, "learning_rate": 0.00018114938236501438, "loss": 1.8205, "step": 2550 }, { "epoch": 0.7974366989684276, "grad_norm": 0.234375, "learning_rate": 0.0001811350218347941, "loss": 1.6017, "step": 2551 }, { "epoch": 0.7977492966552048, "grad_norm": 0.240234375, "learning_rate": 0.0001811206564063492, "loss": 1.4423, "step": 2552 }, { "epoch": 0.7980618943419818, "grad_norm": 0.255859375, "learning_rate": 0.00018110628608054686, "loss": 1.8525, "step": 2553 }, { "epoch": 0.798374492028759, "grad_norm": 0.2373046875, "learning_rate": 0.0001810919108582547, "loss": 1.7098, "step": 2554 }, { "epoch": 0.7986870897155361, "grad_norm": 0.2275390625, "learning_rate": 0.00018107753074034054, "loss": 1.7347, "step": 2555 }, { "epoch": 0.7989996874023132, "grad_norm": 0.244140625, "learning_rate": 0.00018106314572767252, "loss": 1.6353, "step": 2556 }, { "epoch": 0.7993122850890904, "grad_norm": 0.244140625, "learning_rate": 0.00018104875582111913, "loss": 1.7014, "step": 2557 }, { "epoch": 0.7996248827758674, "grad_norm": 0.23046875, "learning_rate": 0.00018103436102154903, "loss": 1.5313, "step": 2558 }, { "epoch": 0.7999374804626446, "grad_norm": 0.24609375, "learning_rate": 0.0001810199613298313, "loss": 1.671, "step": 2559 }, { "epoch": 0.8002500781494217, "grad_norm": 0.240234375, "learning_rate": 0.00018100555674683527, "loss": 1.5859, "step": 2560 }, { "epoch": 0.8005626758361988, "grad_norm": 0.232421875, "learning_rate": 0.00018099114727343057, "loss": 1.4992, "step": 2561 }, { "epoch": 0.8008752735229759, "grad_norm": 0.232421875, "learning_rate": 0.00018097673291048706, "loss": 1.6654, "step": 2562 }, { "epoch": 0.8011878712097531, "grad_norm": 0.236328125, "learning_rate": 0.000180962313658875, "loss": 1.6192, "step": 2563 }, { "epoch": 0.8015004688965301, "grad_norm": 0.2333984375, "learning_rate": 0.0001809478895194649, "loss": 1.7311, "step": 2564 }, { "epoch": 0.8018130665833073, "grad_norm": 0.2373046875, "learning_rate": 0.00018093346049312758, "loss": 1.5685, "step": 2565 }, { "epoch": 0.8021256642700844, "grad_norm": 0.2412109375, "learning_rate": 0.0001809190265807341, "loss": 1.9562, "step": 2566 }, { "epoch": 0.8024382619568615, "grad_norm": 0.251953125, "learning_rate": 0.00018090458778315588, "loss": 1.662, "step": 2567 }, { "epoch": 0.8027508596436387, "grad_norm": 0.251953125, "learning_rate": 0.00018089014410126457, "loss": 1.611, "step": 2568 }, { "epoch": 0.8030634573304157, "grad_norm": 0.2265625, "learning_rate": 0.0001808756955359322, "loss": 1.7113, "step": 2569 }, { "epoch": 0.8033760550171929, "grad_norm": 0.234375, "learning_rate": 0.00018086124208803103, "loss": 1.3589, "step": 2570 }, { "epoch": 0.8036886527039699, "grad_norm": 0.23828125, "learning_rate": 0.00018084678375843364, "loss": 1.819, "step": 2571 }, { "epoch": 0.8040012503907471, "grad_norm": 0.2412109375, "learning_rate": 0.00018083232054801288, "loss": 1.6764, "step": 2572 }, { "epoch": 0.8043138480775243, "grad_norm": 0.2451171875, "learning_rate": 0.0001808178524576419, "loss": 1.5922, "step": 2573 }, { "epoch": 0.8046264457643013, "grad_norm": 0.251953125, "learning_rate": 0.0001808033794881942, "loss": 1.5336, "step": 2574 }, { "epoch": 0.8049390434510785, "grad_norm": 0.2275390625, "learning_rate": 0.0001807889016405435, "loss": 1.443, "step": 2575 }, { "epoch": 0.8052516411378556, "grad_norm": 0.2431640625, "learning_rate": 0.0001807744189155639, "loss": 1.7123, "step": 2576 }, { "epoch": 0.8055642388246327, "grad_norm": 0.24609375, "learning_rate": 0.00018075993131412966, "loss": 1.9127, "step": 2577 }, { "epoch": 0.8058768365114098, "grad_norm": 0.2412109375, "learning_rate": 0.00018074543883711547, "loss": 1.7716, "step": 2578 }, { "epoch": 0.8061894341981869, "grad_norm": 0.2470703125, "learning_rate": 0.00018073094148539625, "loss": 1.7905, "step": 2579 }, { "epoch": 0.806502031884964, "grad_norm": 0.236328125, "learning_rate": 0.00018071643925984717, "loss": 1.5217, "step": 2580 }, { "epoch": 0.8068146295717412, "grad_norm": 0.2412109375, "learning_rate": 0.00018070193216134384, "loss": 1.6451, "step": 2581 }, { "epoch": 0.8071272272585183, "grad_norm": 0.2353515625, "learning_rate": 0.00018068742019076203, "loss": 1.7439, "step": 2582 }, { "epoch": 0.8074398249452954, "grad_norm": 0.25, "learning_rate": 0.0001806729033489778, "loss": 2.0439, "step": 2583 }, { "epoch": 0.8077524226320725, "grad_norm": 0.263671875, "learning_rate": 0.0001806583816368676, "loss": 1.7726, "step": 2584 }, { "epoch": 0.8080650203188496, "grad_norm": 0.2333984375, "learning_rate": 0.00018064385505530813, "loss": 1.8142, "step": 2585 }, { "epoch": 0.8083776180056268, "grad_norm": 0.234375, "learning_rate": 0.00018062932360517637, "loss": 1.8507, "step": 2586 }, { "epoch": 0.8086902156924038, "grad_norm": 0.2412109375, "learning_rate": 0.0001806147872873496, "loss": 1.8861, "step": 2587 }, { "epoch": 0.809002813379181, "grad_norm": 0.24609375, "learning_rate": 0.00018060024610270538, "loss": 2.04, "step": 2588 }, { "epoch": 0.8093154110659581, "grad_norm": 0.23828125, "learning_rate": 0.0001805857000521216, "loss": 1.5433, "step": 2589 }, { "epoch": 0.8096280087527352, "grad_norm": 0.23828125, "learning_rate": 0.00018057114913647642, "loss": 1.5803, "step": 2590 }, { "epoch": 0.8099406064395124, "grad_norm": 0.244140625, "learning_rate": 0.0001805565933566483, "loss": 1.7928, "step": 2591 }, { "epoch": 0.8102532041262894, "grad_norm": 0.25390625, "learning_rate": 0.00018054203271351599, "loss": 1.8568, "step": 2592 }, { "epoch": 0.8105658018130666, "grad_norm": 0.2353515625, "learning_rate": 0.00018052746720795848, "loss": 1.5727, "step": 2593 }, { "epoch": 0.8108783994998437, "grad_norm": 0.251953125, "learning_rate": 0.00018051289684085518, "loss": 1.543, "step": 2594 }, { "epoch": 0.8111909971866208, "grad_norm": 0.2421875, "learning_rate": 0.00018049832161308574, "loss": 1.5196, "step": 2595 }, { "epoch": 0.811503594873398, "grad_norm": 0.2412109375, "learning_rate": 0.00018048374152553, "loss": 1.592, "step": 2596 }, { "epoch": 0.811816192560175, "grad_norm": 0.2421875, "learning_rate": 0.00018046915657906826, "loss": 1.6238, "step": 2597 }, { "epoch": 0.8121287902469522, "grad_norm": 0.2421875, "learning_rate": 0.00018045456677458094, "loss": 1.6494, "step": 2598 }, { "epoch": 0.8124413879337293, "grad_norm": 0.2373046875, "learning_rate": 0.00018043997211294896, "loss": 1.7159, "step": 2599 }, { "epoch": 0.8127539856205064, "grad_norm": 0.244140625, "learning_rate": 0.00018042537259505332, "loss": 1.7333, "step": 2600 }, { "epoch": 0.8130665833072835, "grad_norm": 0.2275390625, "learning_rate": 0.00018041076822177546, "loss": 1.7428, "step": 2601 }, { "epoch": 0.8133791809940606, "grad_norm": 0.244140625, "learning_rate": 0.00018039615899399704, "loss": 1.5266, "step": 2602 }, { "epoch": 0.8136917786808378, "grad_norm": 0.2451171875, "learning_rate": 0.00018038154491260006, "loss": 1.4482, "step": 2603 }, { "epoch": 0.8140043763676149, "grad_norm": 0.25, "learning_rate": 0.0001803669259784668, "loss": 1.8164, "step": 2604 }, { "epoch": 0.814316974054392, "grad_norm": 0.24609375, "learning_rate": 0.00018035230219247978, "loss": 1.7801, "step": 2605 }, { "epoch": 0.8146295717411691, "grad_norm": 0.224609375, "learning_rate": 0.0001803376735555219, "loss": 1.5818, "step": 2606 }, { "epoch": 0.8149421694279463, "grad_norm": 0.236328125, "learning_rate": 0.0001803230400684763, "loss": 2.0025, "step": 2607 }, { "epoch": 0.8152547671147233, "grad_norm": 0.240234375, "learning_rate": 0.0001803084017322264, "loss": 1.6328, "step": 2608 }, { "epoch": 0.8155673648015005, "grad_norm": 0.30859375, "learning_rate": 0.00018029375854765597, "loss": 2.289, "step": 2609 }, { "epoch": 0.8158799624882775, "grad_norm": 0.2333984375, "learning_rate": 0.00018027911051564897, "loss": 1.4681, "step": 2610 }, { "epoch": 0.8161925601750547, "grad_norm": 0.2373046875, "learning_rate": 0.0001802644576370898, "loss": 1.7437, "step": 2611 }, { "epoch": 0.8165051578618319, "grad_norm": 0.228515625, "learning_rate": 0.00018024979991286303, "loss": 2.0136, "step": 2612 }, { "epoch": 0.8168177555486089, "grad_norm": 0.240234375, "learning_rate": 0.0001802351373438536, "loss": 1.6401, "step": 2613 }, { "epoch": 0.8171303532353861, "grad_norm": 0.2373046875, "learning_rate": 0.00018022046993094665, "loss": 1.5986, "step": 2614 }, { "epoch": 0.8174429509221631, "grad_norm": 0.228515625, "learning_rate": 0.00018020579767502774, "loss": 1.7392, "step": 2615 }, { "epoch": 0.8177555486089403, "grad_norm": 0.2333984375, "learning_rate": 0.0001801911205769826, "loss": 1.6622, "step": 2616 }, { "epoch": 0.8180681462957174, "grad_norm": 0.232421875, "learning_rate": 0.0001801764386376973, "loss": 1.6786, "step": 2617 }, { "epoch": 0.8183807439824945, "grad_norm": 0.2490234375, "learning_rate": 0.0001801617518580583, "loss": 1.6723, "step": 2618 }, { "epoch": 0.8186933416692717, "grad_norm": 0.232421875, "learning_rate": 0.0001801470602389521, "loss": 1.6344, "step": 2619 }, { "epoch": 0.8190059393560488, "grad_norm": 0.2431640625, "learning_rate": 0.0001801323637812658, "loss": 1.8773, "step": 2620 }, { "epoch": 0.8193185370428259, "grad_norm": 0.2412109375, "learning_rate": 0.00018011766248588655, "loss": 1.7633, "step": 2621 }, { "epoch": 0.819631134729603, "grad_norm": 0.24609375, "learning_rate": 0.00018010295635370192, "loss": 1.7818, "step": 2622 }, { "epoch": 0.8199437324163801, "grad_norm": 0.2314453125, "learning_rate": 0.00018008824538559977, "loss": 1.5338, "step": 2623 }, { "epoch": 0.8202563301031572, "grad_norm": 0.2890625, "learning_rate": 0.00018007352958246818, "loss": 2.1521, "step": 2624 }, { "epoch": 0.8205689277899344, "grad_norm": 0.2421875, "learning_rate": 0.00018005880894519555, "loss": 1.6819, "step": 2625 }, { "epoch": 0.8208815254767114, "grad_norm": 0.2314453125, "learning_rate": 0.00018004408347467062, "loss": 1.7966, "step": 2626 }, { "epoch": 0.8211941231634886, "grad_norm": 0.2275390625, "learning_rate": 0.00018002935317178235, "loss": 1.5681, "step": 2627 }, { "epoch": 0.8215067208502657, "grad_norm": 0.2490234375, "learning_rate": 0.00018001461803742008, "loss": 1.8119, "step": 2628 }, { "epoch": 0.8218193185370428, "grad_norm": 0.259765625, "learning_rate": 0.00017999987807247334, "loss": 2.2241, "step": 2629 }, { "epoch": 0.82213191622382, "grad_norm": 0.2470703125, "learning_rate": 0.00017998513327783199, "loss": 1.5033, "step": 2630 }, { "epoch": 0.822444513910597, "grad_norm": 0.2431640625, "learning_rate": 0.00017997038365438628, "loss": 1.481, "step": 2631 }, { "epoch": 0.8227571115973742, "grad_norm": 0.2578125, "learning_rate": 0.00017995562920302652, "loss": 1.7684, "step": 2632 }, { "epoch": 0.8230697092841514, "grad_norm": 0.251953125, "learning_rate": 0.0001799408699246436, "loss": 1.6599, "step": 2633 }, { "epoch": 0.8233823069709284, "grad_norm": 0.255859375, "learning_rate": 0.00017992610582012847, "loss": 1.3327, "step": 2634 }, { "epoch": 0.8236949046577056, "grad_norm": 0.236328125, "learning_rate": 0.0001799113368903725, "loss": 1.7121, "step": 2635 }, { "epoch": 0.8240075023444826, "grad_norm": 0.22265625, "learning_rate": 0.00017989656313626727, "loss": 1.766, "step": 2636 }, { "epoch": 0.8243201000312598, "grad_norm": 0.2265625, "learning_rate": 0.0001798817845587047, "loss": 1.85, "step": 2637 }, { "epoch": 0.8246326977180369, "grad_norm": 0.240234375, "learning_rate": 0.000179867001158577, "loss": 1.8962, "step": 2638 }, { "epoch": 0.824945295404814, "grad_norm": 0.2236328125, "learning_rate": 0.0001798522129367767, "loss": 1.4497, "step": 2639 }, { "epoch": 0.8252578930915911, "grad_norm": 0.2421875, "learning_rate": 0.00017983741989419655, "loss": 1.6794, "step": 2640 }, { "epoch": 0.8255704907783682, "grad_norm": 0.2490234375, "learning_rate": 0.0001798226220317296, "loss": 1.718, "step": 2641 }, { "epoch": 0.8258830884651454, "grad_norm": 0.23046875, "learning_rate": 0.00017980781935026925, "loss": 1.7489, "step": 2642 }, { "epoch": 0.8261956861519225, "grad_norm": 0.25390625, "learning_rate": 0.0001797930118507091, "loss": 1.7344, "step": 2643 }, { "epoch": 0.8265082838386996, "grad_norm": 0.25, "learning_rate": 0.0001797781995339432, "loss": 1.7674, "step": 2644 }, { "epoch": 0.8268208815254767, "grad_norm": 0.2373046875, "learning_rate": 0.0001797633824008657, "loss": 2.0352, "step": 2645 }, { "epoch": 0.8271334792122538, "grad_norm": 0.2265625, "learning_rate": 0.00017974856045237117, "loss": 1.6354, "step": 2646 }, { "epoch": 0.8274460768990309, "grad_norm": 0.2294921875, "learning_rate": 0.00017973373368935445, "loss": 1.737, "step": 2647 }, { "epoch": 0.8277586745858081, "grad_norm": 0.25, "learning_rate": 0.00017971890211271059, "loss": 1.7081, "step": 2648 }, { "epoch": 0.8280712722725851, "grad_norm": 0.251953125, "learning_rate": 0.000179704065723335, "loss": 1.3865, "step": 2649 }, { "epoch": 0.8283838699593623, "grad_norm": 0.2373046875, "learning_rate": 0.00017968922452212343, "loss": 1.5347, "step": 2650 }, { "epoch": 0.8286964676461395, "grad_norm": 0.2255859375, "learning_rate": 0.00017967437850997185, "loss": 1.7372, "step": 2651 }, { "epoch": 0.8290090653329165, "grad_norm": 0.2392578125, "learning_rate": 0.00017965952768777649, "loss": 1.5994, "step": 2652 }, { "epoch": 0.8293216630196937, "grad_norm": 0.2431640625, "learning_rate": 0.0001796446720564339, "loss": 1.8905, "step": 2653 }, { "epoch": 0.8296342607064707, "grad_norm": 0.3359375, "learning_rate": 0.00017962981161684098, "loss": 2.5074, "step": 2654 }, { "epoch": 0.8299468583932479, "grad_norm": 0.2421875, "learning_rate": 0.00017961494636989486, "loss": 1.9347, "step": 2655 }, { "epoch": 0.830259456080025, "grad_norm": 0.24609375, "learning_rate": 0.00017960007631649298, "loss": 1.8819, "step": 2656 }, { "epoch": 0.8305720537668021, "grad_norm": 0.240234375, "learning_rate": 0.00017958520145753307, "loss": 1.6299, "step": 2657 }, { "epoch": 0.8308846514535793, "grad_norm": 0.25390625, "learning_rate": 0.00017957032179391312, "loss": 1.7028, "step": 2658 }, { "epoch": 0.8311972491403563, "grad_norm": 0.23046875, "learning_rate": 0.00017955543732653143, "loss": 1.8788, "step": 2659 }, { "epoch": 0.8315098468271335, "grad_norm": 0.228515625, "learning_rate": 0.0001795405480562866, "loss": 1.7432, "step": 2660 }, { "epoch": 0.8318224445139106, "grad_norm": 0.24609375, "learning_rate": 0.00017952565398407757, "loss": 1.583, "step": 2661 }, { "epoch": 0.8321350422006877, "grad_norm": 0.240234375, "learning_rate": 0.00017951075511080347, "loss": 1.7078, "step": 2662 }, { "epoch": 0.8324476398874648, "grad_norm": 0.234375, "learning_rate": 0.0001794958514373637, "loss": 1.4488, "step": 2663 }, { "epoch": 0.832760237574242, "grad_norm": 0.2421875, "learning_rate": 0.00017948094296465814, "loss": 1.6082, "step": 2664 }, { "epoch": 0.833072835261019, "grad_norm": 0.244140625, "learning_rate": 0.00017946602969358673, "loss": 1.6088, "step": 2665 }, { "epoch": 0.8333854329477962, "grad_norm": 0.248046875, "learning_rate": 0.00017945111162504987, "loss": 1.7525, "step": 2666 }, { "epoch": 0.8336980306345733, "grad_norm": 0.2470703125, "learning_rate": 0.00017943618875994815, "loss": 1.8168, "step": 2667 }, { "epoch": 0.8340106283213504, "grad_norm": 0.244140625, "learning_rate": 0.00017942126109918248, "loss": 1.7631, "step": 2668 }, { "epoch": 0.8343232260081276, "grad_norm": 0.234375, "learning_rate": 0.00017940632864365408, "loss": 1.665, "step": 2669 }, { "epoch": 0.8346358236949046, "grad_norm": 0.2265625, "learning_rate": 0.00017939139139426443, "loss": 1.7743, "step": 2670 }, { "epoch": 0.8349484213816818, "grad_norm": 0.2353515625, "learning_rate": 0.0001793764493519153, "loss": 1.6251, "step": 2671 }, { "epoch": 0.8352610190684588, "grad_norm": 0.2294921875, "learning_rate": 0.00017936150251750876, "loss": 1.5676, "step": 2672 }, { "epoch": 0.835573616755236, "grad_norm": 0.244140625, "learning_rate": 0.0001793465508919472, "loss": 1.9198, "step": 2673 }, { "epoch": 0.8358862144420132, "grad_norm": 0.2333984375, "learning_rate": 0.00017933159447613325, "loss": 1.8999, "step": 2674 }, { "epoch": 0.8361988121287902, "grad_norm": 0.232421875, "learning_rate": 0.00017931663327096985, "loss": 1.5773, "step": 2675 }, { "epoch": 0.8365114098155674, "grad_norm": 0.251953125, "learning_rate": 0.00017930166727736022, "loss": 1.5615, "step": 2676 }, { "epoch": 0.8368240075023445, "grad_norm": 0.2265625, "learning_rate": 0.0001792866964962079, "loss": 1.7466, "step": 2677 }, { "epoch": 0.8371366051891216, "grad_norm": 0.236328125, "learning_rate": 0.00017927172092841665, "loss": 1.5719, "step": 2678 }, { "epoch": 0.8374492028758987, "grad_norm": 0.236328125, "learning_rate": 0.00017925674057489062, "loss": 1.8351, "step": 2679 }, { "epoch": 0.8377618005626758, "grad_norm": 0.2421875, "learning_rate": 0.00017924175543653412, "loss": 1.3423, "step": 2680 }, { "epoch": 0.838074398249453, "grad_norm": 0.2314453125, "learning_rate": 0.0001792267655142519, "loss": 1.8691, "step": 2681 }, { "epoch": 0.8383869959362301, "grad_norm": 0.23046875, "learning_rate": 0.00017921177080894887, "loss": 1.5727, "step": 2682 }, { "epoch": 0.8386995936230072, "grad_norm": 0.2236328125, "learning_rate": 0.0001791967713215303, "loss": 1.5138, "step": 2683 }, { "epoch": 0.8390121913097843, "grad_norm": 0.244140625, "learning_rate": 0.00017918176705290174, "loss": 1.7783, "step": 2684 }, { "epoch": 0.8393247889965614, "grad_norm": 0.23828125, "learning_rate": 0.00017916675800396897, "loss": 1.8948, "step": 2685 }, { "epoch": 0.8396373866833385, "grad_norm": 0.240234375, "learning_rate": 0.00017915174417563816, "loss": 1.6654, "step": 2686 }, { "epoch": 0.8399499843701157, "grad_norm": 0.2451171875, "learning_rate": 0.00017913672556881566, "loss": 1.8393, "step": 2687 }, { "epoch": 0.8402625820568927, "grad_norm": 0.232421875, "learning_rate": 0.00017912170218440822, "loss": 1.5724, "step": 2688 }, { "epoch": 0.8405751797436699, "grad_norm": 0.236328125, "learning_rate": 0.0001791066740233228, "loss": 1.5801, "step": 2689 }, { "epoch": 0.8408877774304471, "grad_norm": 0.259765625, "learning_rate": 0.00017909164108646667, "loss": 1.6645, "step": 2690 }, { "epoch": 0.8412003751172241, "grad_norm": 0.2255859375, "learning_rate": 0.00017907660337474735, "loss": 1.6794, "step": 2691 }, { "epoch": 0.8415129728040013, "grad_norm": 0.2392578125, "learning_rate": 0.0001790615608890727, "loss": 1.6382, "step": 2692 }, { "epoch": 0.8418255704907783, "grad_norm": 0.24609375, "learning_rate": 0.00017904651363035093, "loss": 1.6977, "step": 2693 }, { "epoch": 0.8421381681775555, "grad_norm": 0.23828125, "learning_rate": 0.00017903146159949036, "loss": 1.4432, "step": 2694 }, { "epoch": 0.8424507658643327, "grad_norm": 0.22265625, "learning_rate": 0.00017901640479739975, "loss": 1.7628, "step": 2695 }, { "epoch": 0.8427633635511097, "grad_norm": 0.2470703125, "learning_rate": 0.0001790013432249881, "loss": 1.6406, "step": 2696 }, { "epoch": 0.8430759612378869, "grad_norm": 0.37890625, "learning_rate": 0.00017898627688316468, "loss": 2.2605, "step": 2697 }, { "epoch": 0.8433885589246639, "grad_norm": 0.255859375, "learning_rate": 0.00017897120577283908, "loss": 1.6559, "step": 2698 }, { "epoch": 0.8437011566114411, "grad_norm": 0.2373046875, "learning_rate": 0.00017895612989492113, "loss": 1.7878, "step": 2699 }, { "epoch": 0.8440137542982182, "grad_norm": 0.25, "learning_rate": 0.000178941049250321, "loss": 1.7082, "step": 2700 }, { "epoch": 0.8443263519849953, "grad_norm": 0.228515625, "learning_rate": 0.00017892596383994915, "loss": 1.6265, "step": 2701 }, { "epoch": 0.8446389496717724, "grad_norm": 0.2421875, "learning_rate": 0.00017891087366471632, "loss": 1.6036, "step": 2702 }, { "epoch": 0.8449515473585495, "grad_norm": 0.2470703125, "learning_rate": 0.00017889577872553343, "loss": 1.4701, "step": 2703 }, { "epoch": 0.8452641450453267, "grad_norm": 0.236328125, "learning_rate": 0.00017888067902331186, "loss": 1.7345, "step": 2704 }, { "epoch": 0.8455767427321038, "grad_norm": 0.2373046875, "learning_rate": 0.0001788655745589632, "loss": 1.7042, "step": 2705 }, { "epoch": 0.8458893404188809, "grad_norm": 0.2392578125, "learning_rate": 0.0001788504653333993, "loss": 1.9033, "step": 2706 }, { "epoch": 0.846201938105658, "grad_norm": 0.216796875, "learning_rate": 0.0001788353513475323, "loss": 1.6525, "step": 2707 }, { "epoch": 0.8465145357924352, "grad_norm": 0.251953125, "learning_rate": 0.0001788202326022747, "loss": 1.6119, "step": 2708 }, { "epoch": 0.8468271334792122, "grad_norm": 0.2392578125, "learning_rate": 0.0001788051090985392, "loss": 1.7473, "step": 2709 }, { "epoch": 0.8471397311659894, "grad_norm": 0.244140625, "learning_rate": 0.00017878998083723885, "loss": 1.8992, "step": 2710 }, { "epoch": 0.8474523288527664, "grad_norm": 0.2353515625, "learning_rate": 0.00017877484781928698, "loss": 1.6285, "step": 2711 }, { "epoch": 0.8477649265395436, "grad_norm": 0.24609375, "learning_rate": 0.00017875971004559712, "loss": 1.671, "step": 2712 }, { "epoch": 0.8480775242263208, "grad_norm": 0.2314453125, "learning_rate": 0.0001787445675170832, "loss": 1.639, "step": 2713 }, { "epoch": 0.8483901219130978, "grad_norm": 0.263671875, "learning_rate": 0.00017872942023465944, "loss": 2.2887, "step": 2714 }, { "epoch": 0.848702719599875, "grad_norm": 0.2392578125, "learning_rate": 0.00017871426819924025, "loss": 1.6424, "step": 2715 }, { "epoch": 0.849015317286652, "grad_norm": 0.2373046875, "learning_rate": 0.00017869911141174034, "loss": 1.6615, "step": 2716 }, { "epoch": 0.8493279149734292, "grad_norm": 0.251953125, "learning_rate": 0.00017868394987307482, "loss": 1.8865, "step": 2717 }, { "epoch": 0.8496405126602063, "grad_norm": 0.251953125, "learning_rate": 0.00017866878358415895, "loss": 1.4584, "step": 2718 }, { "epoch": 0.8499531103469834, "grad_norm": 0.236328125, "learning_rate": 0.0001786536125459084, "loss": 1.7852, "step": 2719 }, { "epoch": 0.8502657080337606, "grad_norm": 0.2392578125, "learning_rate": 0.000178638436759239, "loss": 1.5773, "step": 2720 }, { "epoch": 0.8505783057205377, "grad_norm": 0.255859375, "learning_rate": 0.00017862325622506698, "loss": 1.5571, "step": 2721 }, { "epoch": 0.8508909034073148, "grad_norm": 0.2490234375, "learning_rate": 0.00017860807094430877, "loss": 1.6325, "step": 2722 }, { "epoch": 0.8512035010940919, "grad_norm": 0.2421875, "learning_rate": 0.0001785928809178812, "loss": 2.1872, "step": 2723 }, { "epoch": 0.851516098780869, "grad_norm": 0.2421875, "learning_rate": 0.0001785776861467012, "loss": 1.7218, "step": 2724 }, { "epoch": 0.8518286964676461, "grad_norm": 0.232421875, "learning_rate": 0.00017856248663168618, "loss": 1.8967, "step": 2725 }, { "epoch": 0.8521412941544233, "grad_norm": 0.234375, "learning_rate": 0.00017854728237375373, "loss": 1.412, "step": 2726 }, { "epoch": 0.8524538918412004, "grad_norm": 0.2490234375, "learning_rate": 0.00017853207337382174, "loss": 1.5824, "step": 2727 }, { "epoch": 0.8527664895279775, "grad_norm": 0.2392578125, "learning_rate": 0.0001785168596328084, "loss": 1.6068, "step": 2728 }, { "epoch": 0.8530790872147546, "grad_norm": 0.2314453125, "learning_rate": 0.0001785016411516322, "loss": 1.5164, "step": 2729 }, { "epoch": 0.8533916849015317, "grad_norm": 0.2412109375, "learning_rate": 0.00017848641793121188, "loss": 1.8491, "step": 2730 }, { "epoch": 0.8537042825883089, "grad_norm": 0.24609375, "learning_rate": 0.0001784711899724665, "loss": 1.6247, "step": 2731 }, { "epoch": 0.8540168802750859, "grad_norm": 0.25390625, "learning_rate": 0.0001784559572763154, "loss": 1.4966, "step": 2732 }, { "epoch": 0.8543294779618631, "grad_norm": 0.2275390625, "learning_rate": 0.00017844071984367816, "loss": 1.5311, "step": 2733 }, { "epoch": 0.8546420756486403, "grad_norm": 0.2255859375, "learning_rate": 0.0001784254776754747, "loss": 1.521, "step": 2734 }, { "epoch": 0.8549546733354173, "grad_norm": 0.2353515625, "learning_rate": 0.00017841023077262523, "loss": 1.7637, "step": 2735 }, { "epoch": 0.8552672710221945, "grad_norm": 0.2353515625, "learning_rate": 0.0001783949791360502, "loss": 1.3663, "step": 2736 }, { "epoch": 0.8555798687089715, "grad_norm": 0.2431640625, "learning_rate": 0.0001783797227666704, "loss": 1.6854, "step": 2737 }, { "epoch": 0.8558924663957487, "grad_norm": 0.232421875, "learning_rate": 0.00017836446166540683, "loss": 1.7461, "step": 2738 }, { "epoch": 0.8562050640825258, "grad_norm": 0.2373046875, "learning_rate": 0.00017834919583318087, "loss": 1.5579, "step": 2739 }, { "epoch": 0.8565176617693029, "grad_norm": 0.2333984375, "learning_rate": 0.00017833392527091412, "loss": 1.8503, "step": 2740 }, { "epoch": 0.85683025945608, "grad_norm": 0.2373046875, "learning_rate": 0.00017831864997952846, "loss": 1.7036, "step": 2741 }, { "epoch": 0.8571428571428571, "grad_norm": 0.2392578125, "learning_rate": 0.00017830336995994608, "loss": 1.546, "step": 2742 }, { "epoch": 0.8574554548296343, "grad_norm": 0.2421875, "learning_rate": 0.00017828808521308949, "loss": 1.7367, "step": 2743 }, { "epoch": 0.8577680525164114, "grad_norm": 0.2294921875, "learning_rate": 0.00017827279573988145, "loss": 1.6342, "step": 2744 }, { "epoch": 0.8580806502031885, "grad_norm": 0.2421875, "learning_rate": 0.00017825750154124497, "loss": 1.4992, "step": 2745 }, { "epoch": 0.8583932478899656, "grad_norm": 0.2333984375, "learning_rate": 0.00017824220261810337, "loss": 1.6274, "step": 2746 }, { "epoch": 0.8587058455767427, "grad_norm": 0.25, "learning_rate": 0.00017822689897138035, "loss": 1.4625, "step": 2747 }, { "epoch": 0.8590184432635198, "grad_norm": 0.232421875, "learning_rate": 0.00017821159060199974, "loss": 1.4388, "step": 2748 }, { "epoch": 0.859331040950297, "grad_norm": 0.23828125, "learning_rate": 0.00017819627751088573, "loss": 1.4505, "step": 2749 }, { "epoch": 0.859643638637074, "grad_norm": 0.2255859375, "learning_rate": 0.0001781809596989628, "loss": 1.4593, "step": 2750 }, { "epoch": 0.8599562363238512, "grad_norm": 0.224609375, "learning_rate": 0.0001781656371671557, "loss": 1.5498, "step": 2751 }, { "epoch": 0.8602688340106284, "grad_norm": 0.30859375, "learning_rate": 0.00017815030991638947, "loss": 2.1876, "step": 2752 }, { "epoch": 0.8605814316974054, "grad_norm": 0.2392578125, "learning_rate": 0.00017813497794758946, "loss": 1.4955, "step": 2753 }, { "epoch": 0.8608940293841826, "grad_norm": 0.236328125, "learning_rate": 0.00017811964126168123, "loss": 1.6525, "step": 2754 }, { "epoch": 0.8612066270709596, "grad_norm": 0.24609375, "learning_rate": 0.00017810429985959077, "loss": 1.7273, "step": 2755 }, { "epoch": 0.8615192247577368, "grad_norm": 0.26171875, "learning_rate": 0.00017808895374224414, "loss": 1.6337, "step": 2756 }, { "epoch": 0.861831822444514, "grad_norm": 0.232421875, "learning_rate": 0.0001780736029105679, "loss": 1.572, "step": 2757 }, { "epoch": 0.862144420131291, "grad_norm": 0.2421875, "learning_rate": 0.00017805824736548872, "loss": 1.7677, "step": 2758 }, { "epoch": 0.8624570178180682, "grad_norm": 0.2353515625, "learning_rate": 0.00017804288710793374, "loss": 1.4813, "step": 2759 }, { "epoch": 0.8627696155048452, "grad_norm": 0.255859375, "learning_rate": 0.00017802752213883017, "loss": 1.863, "step": 2760 }, { "epoch": 0.8630822131916224, "grad_norm": 0.232421875, "learning_rate": 0.00017801215245910569, "loss": 1.7106, "step": 2761 }, { "epoch": 0.8633948108783995, "grad_norm": 0.2392578125, "learning_rate": 0.00017799677806968811, "loss": 1.5748, "step": 2762 }, { "epoch": 0.8637074085651766, "grad_norm": 0.263671875, "learning_rate": 0.00017798139897150564, "loss": 1.7248, "step": 2763 }, { "epoch": 0.8640200062519537, "grad_norm": 0.2451171875, "learning_rate": 0.00017796601516548676, "loss": 1.7132, "step": 2764 }, { "epoch": 0.8643326039387309, "grad_norm": 0.2373046875, "learning_rate": 0.0001779506266525602, "loss": 1.742, "step": 2765 }, { "epoch": 0.864645201625508, "grad_norm": 0.2431640625, "learning_rate": 0.000177935233433655, "loss": 1.8706, "step": 2766 }, { "epoch": 0.8649577993122851, "grad_norm": 0.2412109375, "learning_rate": 0.0001779198355097004, "loss": 1.5686, "step": 2767 }, { "epoch": 0.8652703969990622, "grad_norm": 0.234375, "learning_rate": 0.00017790443288162605, "loss": 1.7863, "step": 2768 }, { "epoch": 0.8655829946858393, "grad_norm": 0.248046875, "learning_rate": 0.00017788902555036182, "loss": 1.6466, "step": 2769 }, { "epoch": 0.8658955923726165, "grad_norm": 0.26171875, "learning_rate": 0.00017787361351683786, "loss": 1.7133, "step": 2770 }, { "epoch": 0.8662081900593935, "grad_norm": 0.2314453125, "learning_rate": 0.00017785819678198462, "loss": 1.7669, "step": 2771 }, { "epoch": 0.8665207877461707, "grad_norm": 0.23046875, "learning_rate": 0.0001778427753467328, "loss": 1.7054, "step": 2772 }, { "epoch": 0.8668333854329477, "grad_norm": 0.240234375, "learning_rate": 0.00017782734921201348, "loss": 1.5878, "step": 2773 }, { "epoch": 0.8671459831197249, "grad_norm": 0.2490234375, "learning_rate": 0.00017781191837875788, "loss": 1.5847, "step": 2774 }, { "epoch": 0.8674585808065021, "grad_norm": 0.240234375, "learning_rate": 0.0001777964828478976, "loss": 1.556, "step": 2775 }, { "epoch": 0.8677711784932791, "grad_norm": 0.236328125, "learning_rate": 0.00017778104262036455, "loss": 1.481, "step": 2776 }, { "epoch": 0.8680837761800563, "grad_norm": 0.2373046875, "learning_rate": 0.0001777655976970908, "loss": 1.5842, "step": 2777 }, { "epoch": 0.8683963738668334, "grad_norm": 0.251953125, "learning_rate": 0.00017775014807900884, "loss": 1.6188, "step": 2778 }, { "epoch": 0.8687089715536105, "grad_norm": 0.2431640625, "learning_rate": 0.00017773469376705138, "loss": 1.7405, "step": 2779 }, { "epoch": 0.8690215692403876, "grad_norm": 0.234375, "learning_rate": 0.00017771923476215138, "loss": 2.009, "step": 2780 }, { "epoch": 0.8693341669271647, "grad_norm": 0.2373046875, "learning_rate": 0.00017770377106524215, "loss": 1.5022, "step": 2781 }, { "epoch": 0.8696467646139419, "grad_norm": 0.298828125, "learning_rate": 0.0001776883026772572, "loss": 2.3243, "step": 2782 }, { "epoch": 0.869959362300719, "grad_norm": 0.25, "learning_rate": 0.00017767282959913047, "loss": 1.5778, "step": 2783 }, { "epoch": 0.8702719599874961, "grad_norm": 0.244140625, "learning_rate": 0.00017765735183179602, "loss": 1.648, "step": 2784 }, { "epoch": 0.8705845576742732, "grad_norm": 0.23828125, "learning_rate": 0.00017764186937618828, "loss": 1.9461, "step": 2785 }, { "epoch": 0.8708971553610503, "grad_norm": 0.23828125, "learning_rate": 0.00017762638223324192, "loss": 1.6331, "step": 2786 }, { "epoch": 0.8712097530478274, "grad_norm": 0.23046875, "learning_rate": 0.00017761089040389198, "loss": 1.5506, "step": 2787 }, { "epoch": 0.8715223507346046, "grad_norm": 0.236328125, "learning_rate": 0.00017759539388907366, "loss": 1.4817, "step": 2788 }, { "epoch": 0.8718349484213817, "grad_norm": 0.2412109375, "learning_rate": 0.00017757989268972257, "loss": 1.4606, "step": 2789 }, { "epoch": 0.8721475461081588, "grad_norm": 0.2373046875, "learning_rate": 0.00017756438680677445, "loss": 1.4484, "step": 2790 }, { "epoch": 0.872460143794936, "grad_norm": 0.234375, "learning_rate": 0.00017754887624116548, "loss": 1.5865, "step": 2791 }, { "epoch": 0.872772741481713, "grad_norm": 0.2353515625, "learning_rate": 0.00017753336099383203, "loss": 1.514, "step": 2792 }, { "epoch": 0.8730853391684902, "grad_norm": 0.2373046875, "learning_rate": 0.00017751784106571079, "loss": 1.3963, "step": 2793 }, { "epoch": 0.8733979368552672, "grad_norm": 0.2392578125, "learning_rate": 0.00017750231645773869, "loss": 1.8982, "step": 2794 }, { "epoch": 0.8737105345420444, "grad_norm": 0.232421875, "learning_rate": 0.00017748678717085297, "loss": 1.7107, "step": 2795 }, { "epoch": 0.8740231322288216, "grad_norm": 0.2431640625, "learning_rate": 0.00017747125320599118, "loss": 1.5219, "step": 2796 }, { "epoch": 0.8743357299155986, "grad_norm": 0.236328125, "learning_rate": 0.0001774557145640911, "loss": 1.6148, "step": 2797 }, { "epoch": 0.8746483276023758, "grad_norm": 0.2412109375, "learning_rate": 0.00017744017124609083, "loss": 1.4968, "step": 2798 }, { "epoch": 0.8749609252891528, "grad_norm": 0.24609375, "learning_rate": 0.00017742462325292873, "loss": 1.6438, "step": 2799 }, { "epoch": 0.87527352297593, "grad_norm": 0.2431640625, "learning_rate": 0.0001774090705855435, "loss": 1.8157, "step": 2800 }, { "epoch": 0.8755861206627071, "grad_norm": 0.2314453125, "learning_rate": 0.000177393513244874, "loss": 1.8969, "step": 2801 }, { "epoch": 0.8758987183494842, "grad_norm": 0.2353515625, "learning_rate": 0.0001773779512318595, "loss": 1.7561, "step": 2802 }, { "epoch": 0.8762113160362613, "grad_norm": 0.2421875, "learning_rate": 0.00017736238454743946, "loss": 1.8387, "step": 2803 }, { "epoch": 0.8765239137230384, "grad_norm": 0.2421875, "learning_rate": 0.0001773468131925537, "loss": 1.8426, "step": 2804 }, { "epoch": 0.8768365114098156, "grad_norm": 0.25, "learning_rate": 0.00017733123716814225, "loss": 1.5613, "step": 2805 }, { "epoch": 0.8771491090965927, "grad_norm": 0.255859375, "learning_rate": 0.0001773156564751455, "loss": 1.9907, "step": 2806 }, { "epoch": 0.8774617067833698, "grad_norm": 0.232421875, "learning_rate": 0.00017730007111450402, "loss": 1.3814, "step": 2807 }, { "epoch": 0.8777743044701469, "grad_norm": 0.23046875, "learning_rate": 0.00017728448108715874, "loss": 1.459, "step": 2808 }, { "epoch": 0.8780869021569241, "grad_norm": 0.2451171875, "learning_rate": 0.00017726888639405086, "loss": 1.6541, "step": 2809 }, { "epoch": 0.8783994998437011, "grad_norm": 0.2294921875, "learning_rate": 0.00017725328703612183, "loss": 1.6136, "step": 2810 }, { "epoch": 0.8787120975304783, "grad_norm": 0.2392578125, "learning_rate": 0.00017723768301431344, "loss": 1.9023, "step": 2811 }, { "epoch": 0.8790246952172553, "grad_norm": 0.3203125, "learning_rate": 0.00017722207432956767, "loss": 2.4062, "step": 2812 }, { "epoch": 0.8793372929040325, "grad_norm": 0.232421875, "learning_rate": 0.00017720646098282687, "loss": 1.6481, "step": 2813 }, { "epoch": 0.8796498905908097, "grad_norm": 0.232421875, "learning_rate": 0.00017719084297503367, "loss": 1.7955, "step": 2814 }, { "epoch": 0.8799624882775867, "grad_norm": 0.2333984375, "learning_rate": 0.0001771752203071309, "loss": 1.7442, "step": 2815 }, { "epoch": 0.8802750859643639, "grad_norm": 0.2451171875, "learning_rate": 0.0001771595929800617, "loss": 1.9734, "step": 2816 }, { "epoch": 0.8805876836511409, "grad_norm": 0.244140625, "learning_rate": 0.0001771439609947696, "loss": 1.651, "step": 2817 }, { "epoch": 0.8809002813379181, "grad_norm": 0.240234375, "learning_rate": 0.00017712832435219823, "loss": 1.6914, "step": 2818 }, { "epoch": 0.8812128790246953, "grad_norm": 0.2490234375, "learning_rate": 0.00017711268305329166, "loss": 1.9028, "step": 2819 }, { "epoch": 0.8815254767114723, "grad_norm": 0.2412109375, "learning_rate": 0.00017709703709899413, "loss": 1.7345, "step": 2820 }, { "epoch": 0.8818380743982495, "grad_norm": 0.2255859375, "learning_rate": 0.00017708138649025023, "loss": 1.8512, "step": 2821 }, { "epoch": 0.8821506720850266, "grad_norm": 0.2412109375, "learning_rate": 0.0001770657312280048, "loss": 1.6781, "step": 2822 }, { "epoch": 0.8824632697718037, "grad_norm": 0.23828125, "learning_rate": 0.00017705007131320298, "loss": 1.5084, "step": 2823 }, { "epoch": 0.8827758674585808, "grad_norm": 0.2431640625, "learning_rate": 0.00017703440674679015, "loss": 1.5801, "step": 2824 }, { "epoch": 0.8830884651453579, "grad_norm": 0.2265625, "learning_rate": 0.00017701873752971206, "loss": 1.7738, "step": 2825 }, { "epoch": 0.883401062832135, "grad_norm": 0.232421875, "learning_rate": 0.00017700306366291458, "loss": 1.7093, "step": 2826 }, { "epoch": 0.8837136605189122, "grad_norm": 0.23046875, "learning_rate": 0.00017698738514734406, "loss": 1.7994, "step": 2827 }, { "epoch": 0.8840262582056893, "grad_norm": 0.2216796875, "learning_rate": 0.00017697170198394696, "loss": 1.7524, "step": 2828 }, { "epoch": 0.8843388558924664, "grad_norm": 0.2470703125, "learning_rate": 0.0001769560141736702, "loss": 1.4667, "step": 2829 }, { "epoch": 0.8846514535792435, "grad_norm": 0.23828125, "learning_rate": 0.00017694032171746072, "loss": 1.4843, "step": 2830 }, { "epoch": 0.8849640512660206, "grad_norm": 0.240234375, "learning_rate": 0.000176924624616266, "loss": 1.4988, "step": 2831 }, { "epoch": 0.8852766489527978, "grad_norm": 0.2412109375, "learning_rate": 0.00017690892287103367, "loss": 1.5816, "step": 2832 }, { "epoch": 0.8855892466395748, "grad_norm": 0.2333984375, "learning_rate": 0.00017689321648271166, "loss": 1.7245, "step": 2833 }, { "epoch": 0.885901844326352, "grad_norm": 0.2216796875, "learning_rate": 0.00017687750545224815, "loss": 1.7804, "step": 2834 }, { "epoch": 0.8862144420131292, "grad_norm": 0.251953125, "learning_rate": 0.0001768617897805917, "loss": 1.5097, "step": 2835 }, { "epoch": 0.8865270396999062, "grad_norm": 0.2333984375, "learning_rate": 0.00017684606946869106, "loss": 1.5496, "step": 2836 }, { "epoch": 0.8868396373866834, "grad_norm": 0.236328125, "learning_rate": 0.00017683034451749526, "loss": 1.829, "step": 2837 }, { "epoch": 0.8871522350734604, "grad_norm": 0.251953125, "learning_rate": 0.0001768146149279537, "loss": 1.4844, "step": 2838 }, { "epoch": 0.8874648327602376, "grad_norm": 0.2333984375, "learning_rate": 0.00017679888070101592, "loss": 1.7066, "step": 2839 }, { "epoch": 0.8877774304470147, "grad_norm": 0.2333984375, "learning_rate": 0.00017678314183763183, "loss": 1.5307, "step": 2840 }, { "epoch": 0.8880900281337918, "grad_norm": 0.240234375, "learning_rate": 0.00017676739833875164, "loss": 1.4304, "step": 2841 }, { "epoch": 0.888402625820569, "grad_norm": 0.22265625, "learning_rate": 0.00017675165020532578, "loss": 1.6068, "step": 2842 }, { "epoch": 0.888715223507346, "grad_norm": 0.244140625, "learning_rate": 0.000176735897438305, "loss": 1.4709, "step": 2843 }, { "epoch": 0.8890278211941232, "grad_norm": 0.23828125, "learning_rate": 0.00017672014003864033, "loss": 1.6562, "step": 2844 }, { "epoch": 0.8893404188809003, "grad_norm": 0.2265625, "learning_rate": 0.000176704378007283, "loss": 1.8352, "step": 2845 }, { "epoch": 0.8896530165676774, "grad_norm": 0.259765625, "learning_rate": 0.0001766886113451846, "loss": 1.8639, "step": 2846 }, { "epoch": 0.8899656142544545, "grad_norm": 0.2353515625, "learning_rate": 0.00017667284005329708, "loss": 1.6163, "step": 2847 }, { "epoch": 0.8902782119412317, "grad_norm": 0.2412109375, "learning_rate": 0.00017665706413257245, "loss": 1.7933, "step": 2848 }, { "epoch": 0.8905908096280087, "grad_norm": 0.232421875, "learning_rate": 0.0001766412835839632, "loss": 1.6013, "step": 2849 }, { "epoch": 0.8909034073147859, "grad_norm": 0.248046875, "learning_rate": 0.000176625498408422, "loss": 1.6694, "step": 2850 }, { "epoch": 0.891216005001563, "grad_norm": 0.25390625, "learning_rate": 0.0001766097086069018, "loss": 1.6816, "step": 2851 }, { "epoch": 0.8915286026883401, "grad_norm": 0.2451171875, "learning_rate": 0.00017659391418035588, "loss": 1.7289, "step": 2852 }, { "epoch": 0.8918412003751173, "grad_norm": 0.2353515625, "learning_rate": 0.0001765781151297377, "loss": 1.4146, "step": 2853 }, { "epoch": 0.8921537980618943, "grad_norm": 0.2373046875, "learning_rate": 0.0001765623114560012, "loss": 1.6338, "step": 2854 }, { "epoch": 0.8924663957486715, "grad_norm": 0.2412109375, "learning_rate": 0.00017654650316010036, "loss": 1.623, "step": 2855 }, { "epoch": 0.8927789934354485, "grad_norm": 0.24609375, "learning_rate": 0.00017653069024298957, "loss": 1.6547, "step": 2856 }, { "epoch": 0.8930915911222257, "grad_norm": 0.2314453125, "learning_rate": 0.0001765148727056235, "loss": 1.7697, "step": 2857 }, { "epoch": 0.8934041888090029, "grad_norm": 0.2412109375, "learning_rate": 0.00017649905054895705, "loss": 1.7488, "step": 2858 }, { "epoch": 0.8937167864957799, "grad_norm": 0.3046875, "learning_rate": 0.00017648322377394546, "loss": 2.1237, "step": 2859 }, { "epoch": 0.8940293841825571, "grad_norm": 0.2373046875, "learning_rate": 0.00017646739238154417, "loss": 1.6839, "step": 2860 }, { "epoch": 0.8943419818693341, "grad_norm": 0.2373046875, "learning_rate": 0.00017645155637270897, "loss": 1.6423, "step": 2861 }, { "epoch": 0.8946545795561113, "grad_norm": 0.2294921875, "learning_rate": 0.00017643571574839587, "loss": 1.7184, "step": 2862 }, { "epoch": 0.8949671772428884, "grad_norm": 0.251953125, "learning_rate": 0.00017641987050956122, "loss": 1.8355, "step": 2863 }, { "epoch": 0.8952797749296655, "grad_norm": 0.2333984375, "learning_rate": 0.0001764040206571616, "loss": 1.6686, "step": 2864 }, { "epoch": 0.8955923726164426, "grad_norm": 0.23046875, "learning_rate": 0.00017638816619215388, "loss": 1.7545, "step": 2865 }, { "epoch": 0.8959049703032198, "grad_norm": 0.2392578125, "learning_rate": 0.00017637230711549525, "loss": 1.7738, "step": 2866 }, { "epoch": 0.8962175679899969, "grad_norm": 0.2431640625, "learning_rate": 0.0001763564434281431, "loss": 1.7099, "step": 2867 }, { "epoch": 0.896530165676774, "grad_norm": 0.23046875, "learning_rate": 0.00017634057513105515, "loss": 1.6731, "step": 2868 }, { "epoch": 0.8968427633635511, "grad_norm": 0.244140625, "learning_rate": 0.0001763247022251894, "loss": 1.4654, "step": 2869 }, { "epoch": 0.8971553610503282, "grad_norm": 0.2470703125, "learning_rate": 0.00017630882471150413, "loss": 1.7359, "step": 2870 }, { "epoch": 0.8974679587371054, "grad_norm": 0.2421875, "learning_rate": 0.00017629294259095785, "loss": 1.5702, "step": 2871 }, { "epoch": 0.8977805564238824, "grad_norm": 0.26171875, "learning_rate": 0.00017627705586450944, "loss": 2.429, "step": 2872 }, { "epoch": 0.8980931541106596, "grad_norm": 0.25, "learning_rate": 0.00017626116453311794, "loss": 1.8714, "step": 2873 }, { "epoch": 0.8984057517974366, "grad_norm": 0.2421875, "learning_rate": 0.00017624526859774274, "loss": 1.592, "step": 2874 }, { "epoch": 0.8987183494842138, "grad_norm": 0.234375, "learning_rate": 0.00017622936805934355, "loss": 1.9351, "step": 2875 }, { "epoch": 0.899030947170991, "grad_norm": 0.244140625, "learning_rate": 0.00017621346291888025, "loss": 1.5676, "step": 2876 }, { "epoch": 0.899343544857768, "grad_norm": 0.2490234375, "learning_rate": 0.0001761975531773131, "loss": 2.0676, "step": 2877 }, { "epoch": 0.8996561425445452, "grad_norm": 0.234375, "learning_rate": 0.00017618163883560255, "loss": 1.8676, "step": 2878 }, { "epoch": 0.8999687402313223, "grad_norm": 0.2412109375, "learning_rate": 0.00017616571989470937, "loss": 1.6823, "step": 2879 }, { "epoch": 0.9002813379180994, "grad_norm": 0.24609375, "learning_rate": 0.00017614979635559462, "loss": 1.6829, "step": 2880 }, { "epoch": 0.9005939356048765, "grad_norm": 0.2333984375, "learning_rate": 0.00017613386821921964, "loss": 1.3811, "step": 2881 }, { "epoch": 0.9009065332916536, "grad_norm": 0.259765625, "learning_rate": 0.00017611793548654602, "loss": 1.3734, "step": 2882 }, { "epoch": 0.9012191309784308, "grad_norm": 0.220703125, "learning_rate": 0.00017610199815853563, "loss": 1.8464, "step": 2883 }, { "epoch": 0.9015317286652079, "grad_norm": 0.2392578125, "learning_rate": 0.00017608605623615063, "loss": 1.4275, "step": 2884 }, { "epoch": 0.901844326351985, "grad_norm": 0.232421875, "learning_rate": 0.00017607010972035348, "loss": 1.5875, "step": 2885 }, { "epoch": 0.9021569240387621, "grad_norm": 0.236328125, "learning_rate": 0.00017605415861210685, "loss": 1.8575, "step": 2886 }, { "epoch": 0.9024695217255392, "grad_norm": 0.2451171875, "learning_rate": 0.00017603820291237375, "loss": 1.8156, "step": 2887 }, { "epoch": 0.9027821194123163, "grad_norm": 0.240234375, "learning_rate": 0.00017602224262211743, "loss": 1.4908, "step": 2888 }, { "epoch": 0.9030947170990935, "grad_norm": 0.244140625, "learning_rate": 0.00017600627774230144, "loss": 1.7584, "step": 2889 }, { "epoch": 0.9034073147858706, "grad_norm": 0.25, "learning_rate": 0.00017599030827388965, "loss": 1.7706, "step": 2890 }, { "epoch": 0.9037199124726477, "grad_norm": 0.2431640625, "learning_rate": 0.0001759743342178461, "loss": 1.6771, "step": 2891 }, { "epoch": 0.9040325101594249, "grad_norm": 0.251953125, "learning_rate": 0.00017595835557513516, "loss": 1.838, "step": 2892 }, { "epoch": 0.9043451078462019, "grad_norm": 0.2373046875, "learning_rate": 0.00017594237234672152, "loss": 1.7833, "step": 2893 }, { "epoch": 0.9046577055329791, "grad_norm": 0.244140625, "learning_rate": 0.00017592638453357005, "loss": 1.8564, "step": 2894 }, { "epoch": 0.9049703032197561, "grad_norm": 0.236328125, "learning_rate": 0.000175910392136646, "loss": 1.4054, "step": 2895 }, { "epoch": 0.9052829009065333, "grad_norm": 0.234375, "learning_rate": 0.00017589439515691487, "loss": 1.7344, "step": 2896 }, { "epoch": 0.9055954985933105, "grad_norm": 0.2412109375, "learning_rate": 0.0001758783935953424, "loss": 1.6391, "step": 2897 }, { "epoch": 0.9059080962800875, "grad_norm": 0.236328125, "learning_rate": 0.00017586238745289457, "loss": 1.6244, "step": 2898 }, { "epoch": 0.9062206939668647, "grad_norm": 0.2451171875, "learning_rate": 0.00017584637673053778, "loss": 1.6056, "step": 2899 }, { "epoch": 0.9065332916536417, "grad_norm": 0.2333984375, "learning_rate": 0.00017583036142923856, "loss": 1.7858, "step": 2900 }, { "epoch": 0.9068458893404189, "grad_norm": 0.2373046875, "learning_rate": 0.0001758143415499638, "loss": 1.6028, "step": 2901 }, { "epoch": 0.907158487027196, "grad_norm": 0.23828125, "learning_rate": 0.0001757983170936806, "loss": 1.6918, "step": 2902 }, { "epoch": 0.9074710847139731, "grad_norm": 0.2490234375, "learning_rate": 0.00017578228806135643, "loss": 1.9901, "step": 2903 }, { "epoch": 0.9077836824007502, "grad_norm": 0.2392578125, "learning_rate": 0.00017576625445395893, "loss": 1.5383, "step": 2904 }, { "epoch": 0.9080962800875274, "grad_norm": 0.2412109375, "learning_rate": 0.00017575021627245612, "loss": 1.5068, "step": 2905 }, { "epoch": 0.9084088777743045, "grad_norm": 0.2333984375, "learning_rate": 0.00017573417351781625, "loss": 1.8062, "step": 2906 }, { "epoch": 0.9087214754610816, "grad_norm": 0.2275390625, "learning_rate": 0.00017571812619100778, "loss": 1.4791, "step": 2907 }, { "epoch": 0.9090340731478587, "grad_norm": 0.255859375, "learning_rate": 0.00017570207429299956, "loss": 1.7496, "step": 2908 }, { "epoch": 0.9093466708346358, "grad_norm": 0.244140625, "learning_rate": 0.00017568601782476064, "loss": 1.5202, "step": 2909 }, { "epoch": 0.909659268521413, "grad_norm": 0.2451171875, "learning_rate": 0.00017566995678726038, "loss": 1.6579, "step": 2910 }, { "epoch": 0.90997186620819, "grad_norm": 0.251953125, "learning_rate": 0.0001756538911814684, "loss": 1.606, "step": 2911 }, { "epoch": 0.9102844638949672, "grad_norm": 0.244140625, "learning_rate": 0.0001756378210083546, "loss": 1.6417, "step": 2912 }, { "epoch": 0.9105970615817442, "grad_norm": 0.232421875, "learning_rate": 0.00017562174626888918, "loss": 1.6654, "step": 2913 }, { "epoch": 0.9109096592685214, "grad_norm": 0.2392578125, "learning_rate": 0.00017560566696404254, "loss": 1.676, "step": 2914 }, { "epoch": 0.9112222569552986, "grad_norm": 0.240234375, "learning_rate": 0.00017558958309478543, "loss": 1.5845, "step": 2915 }, { "epoch": 0.9115348546420756, "grad_norm": 0.2412109375, "learning_rate": 0.0001755734946620889, "loss": 1.5907, "step": 2916 }, { "epoch": 0.9118474523288528, "grad_norm": 0.23828125, "learning_rate": 0.00017555740166692418, "loss": 1.8526, "step": 2917 }, { "epoch": 0.9121600500156298, "grad_norm": 0.255859375, "learning_rate": 0.00017554130411026283, "loss": 1.4743, "step": 2918 }, { "epoch": 0.912472647702407, "grad_norm": 0.2421875, "learning_rate": 0.0001755252019930767, "loss": 1.4929, "step": 2919 }, { "epoch": 0.9127852453891842, "grad_norm": 0.2392578125, "learning_rate": 0.0001755090953163379, "loss": 1.4583, "step": 2920 }, { "epoch": 0.9130978430759612, "grad_norm": 0.2451171875, "learning_rate": 0.00017549298408101876, "loss": 1.7967, "step": 2921 }, { "epoch": 0.9134104407627384, "grad_norm": 0.244140625, "learning_rate": 0.00017547686828809196, "loss": 1.9172, "step": 2922 }, { "epoch": 0.9137230384495155, "grad_norm": 0.244140625, "learning_rate": 0.00017546074793853048, "loss": 1.5975, "step": 2923 }, { "epoch": 0.9140356361362926, "grad_norm": 0.2255859375, "learning_rate": 0.00017544462303330748, "loss": 1.8838, "step": 2924 }, { "epoch": 0.9143482338230697, "grad_norm": 0.2421875, "learning_rate": 0.00017542849357339644, "loss": 1.8619, "step": 2925 }, { "epoch": 0.9146608315098468, "grad_norm": 0.2333984375, "learning_rate": 0.00017541235955977112, "loss": 1.6366, "step": 2926 }, { "epoch": 0.9149734291966239, "grad_norm": 0.2431640625, "learning_rate": 0.00017539622099340554, "loss": 1.5817, "step": 2927 }, { "epoch": 0.9152860268834011, "grad_norm": 0.248046875, "learning_rate": 0.000175380077875274, "loss": 1.5323, "step": 2928 }, { "epoch": 0.9155986245701782, "grad_norm": 0.240234375, "learning_rate": 0.00017536393020635118, "loss": 1.762, "step": 2929 }, { "epoch": 0.9159112222569553, "grad_norm": 0.2373046875, "learning_rate": 0.0001753477779876118, "loss": 1.5217, "step": 2930 }, { "epoch": 0.9162238199437324, "grad_norm": 0.2392578125, "learning_rate": 0.00017533162122003107, "loss": 1.6377, "step": 2931 }, { "epoch": 0.9165364176305095, "grad_norm": 0.25, "learning_rate": 0.00017531545990458436, "loss": 1.5614, "step": 2932 }, { "epoch": 0.9168490153172867, "grad_norm": 0.25390625, "learning_rate": 0.00017529929404224733, "loss": 1.9785, "step": 2933 }, { "epoch": 0.9171616130040637, "grad_norm": 0.2333984375, "learning_rate": 0.00017528312363399598, "loss": 1.6278, "step": 2934 }, { "epoch": 0.9174742106908409, "grad_norm": 0.2421875, "learning_rate": 0.00017526694868080656, "loss": 1.62, "step": 2935 }, { "epoch": 0.9177868083776181, "grad_norm": 0.244140625, "learning_rate": 0.0001752507691836555, "loss": 1.66, "step": 2936 }, { "epoch": 0.9180994060643951, "grad_norm": 0.234375, "learning_rate": 0.00017523458514351963, "loss": 1.711, "step": 2937 }, { "epoch": 0.9184120037511723, "grad_norm": 0.236328125, "learning_rate": 0.00017521839656137598, "loss": 1.606, "step": 2938 }, { "epoch": 0.9187246014379493, "grad_norm": 0.2451171875, "learning_rate": 0.00017520220343820184, "loss": 1.8548, "step": 2939 }, { "epoch": 0.9190371991247265, "grad_norm": 0.26171875, "learning_rate": 0.00017518600577497487, "loss": 1.6217, "step": 2940 }, { "epoch": 0.9193497968115036, "grad_norm": 0.326171875, "learning_rate": 0.00017516980357267295, "loss": 2.4887, "step": 2941 }, { "epoch": 0.9196623944982807, "grad_norm": 0.2431640625, "learning_rate": 0.00017515359683227416, "loss": 1.7841, "step": 2942 }, { "epoch": 0.9199749921850578, "grad_norm": 0.2314453125, "learning_rate": 0.00017513738555475697, "loss": 1.7065, "step": 2943 }, { "epoch": 0.9202875898718349, "grad_norm": 0.2373046875, "learning_rate": 0.0001751211697411001, "loss": 1.7469, "step": 2944 }, { "epoch": 0.9206001875586121, "grad_norm": 0.228515625, "learning_rate": 0.00017510494939228246, "loss": 1.5839, "step": 2945 }, { "epoch": 0.9209127852453892, "grad_norm": 0.24609375, "learning_rate": 0.0001750887245092833, "loss": 1.7413, "step": 2946 }, { "epoch": 0.9212253829321663, "grad_norm": 0.2470703125, "learning_rate": 0.00017507249509308217, "loss": 1.433, "step": 2947 }, { "epoch": 0.9215379806189434, "grad_norm": 0.244140625, "learning_rate": 0.00017505626114465886, "loss": 1.5907, "step": 2948 }, { "epoch": 0.9218505783057206, "grad_norm": 0.25, "learning_rate": 0.0001750400226649934, "loss": 1.6737, "step": 2949 }, { "epoch": 0.9221631759924976, "grad_norm": 0.25390625, "learning_rate": 0.00017502377965506613, "loss": 1.5084, "step": 2950 }, { "epoch": 0.9224757736792748, "grad_norm": 0.2392578125, "learning_rate": 0.00017500753211585772, "loss": 1.4999, "step": 2951 }, { "epoch": 0.9227883713660519, "grad_norm": 0.2451171875, "learning_rate": 0.000174991280048349, "loss": 1.6843, "step": 2952 }, { "epoch": 0.923100969052829, "grad_norm": 0.2421875, "learning_rate": 0.00017497502345352112, "loss": 1.6222, "step": 2953 }, { "epoch": 0.9234135667396062, "grad_norm": 0.23828125, "learning_rate": 0.00017495876233235554, "loss": 1.5935, "step": 2954 }, { "epoch": 0.9237261644263832, "grad_norm": 0.25390625, "learning_rate": 0.000174942496685834, "loss": 1.9363, "step": 2955 }, { "epoch": 0.9240387621131604, "grad_norm": 0.2431640625, "learning_rate": 0.00017492622651493837, "loss": 1.8212, "step": 2956 }, { "epoch": 0.9243513597999374, "grad_norm": 0.255859375, "learning_rate": 0.000174909951820651, "loss": 1.8014, "step": 2957 }, { "epoch": 0.9246639574867146, "grad_norm": 0.251953125, "learning_rate": 0.00017489367260395438, "loss": 1.7982, "step": 2958 }, { "epoch": 0.9249765551734918, "grad_norm": 0.240234375, "learning_rate": 0.0001748773888658313, "loss": 1.6039, "step": 2959 }, { "epoch": 0.9252891528602688, "grad_norm": 0.24609375, "learning_rate": 0.00017486110060726485, "loss": 1.8941, "step": 2960 }, { "epoch": 0.925601750547046, "grad_norm": 0.25, "learning_rate": 0.00017484480782923835, "loss": 2.0574, "step": 2961 }, { "epoch": 0.925914348233823, "grad_norm": 0.244140625, "learning_rate": 0.00017482851053273542, "loss": 1.404, "step": 2962 }, { "epoch": 0.9262269459206002, "grad_norm": 0.23046875, "learning_rate": 0.00017481220871873996, "loss": 1.6843, "step": 2963 }, { "epoch": 0.9265395436073773, "grad_norm": 0.263671875, "learning_rate": 0.00017479590238823613, "loss": 1.61, "step": 2964 }, { "epoch": 0.9268521412941544, "grad_norm": 0.388671875, "learning_rate": 0.00017477959154220835, "loss": 2.4723, "step": 2965 }, { "epoch": 0.9271647389809315, "grad_norm": 0.240234375, "learning_rate": 0.0001747632761816413, "loss": 1.6597, "step": 2966 }, { "epoch": 0.9274773366677087, "grad_norm": 0.2412109375, "learning_rate": 0.00017474695630752008, "loss": 1.5784, "step": 2967 }, { "epoch": 0.9277899343544858, "grad_norm": 0.2275390625, "learning_rate": 0.00017473063192082982, "loss": 1.8403, "step": 2968 }, { "epoch": 0.9281025320412629, "grad_norm": 0.25, "learning_rate": 0.00017471430302255604, "loss": 1.8024, "step": 2969 }, { "epoch": 0.92841512972804, "grad_norm": 0.2431640625, "learning_rate": 0.00017469796961368462, "loss": 1.714, "step": 2970 }, { "epoch": 0.9287277274148171, "grad_norm": 0.251953125, "learning_rate": 0.00017468163169520156, "loss": 1.4359, "step": 2971 }, { "epoch": 0.9290403251015943, "grad_norm": 0.23828125, "learning_rate": 0.00017466528926809324, "loss": 1.6177, "step": 2972 }, { "epoch": 0.9293529227883713, "grad_norm": 0.2578125, "learning_rate": 0.00017464894233334627, "loss": 1.9172, "step": 2973 }, { "epoch": 0.9296655204751485, "grad_norm": 0.2353515625, "learning_rate": 0.00017463259089194752, "loss": 2.023, "step": 2974 }, { "epoch": 0.9299781181619255, "grad_norm": 0.2392578125, "learning_rate": 0.00017461623494488416, "loss": 1.3345, "step": 2975 }, { "epoch": 0.9302907158487027, "grad_norm": 0.2373046875, "learning_rate": 0.0001745998744931436, "loss": 1.6451, "step": 2976 }, { "epoch": 0.9306033135354799, "grad_norm": 0.2431640625, "learning_rate": 0.00017458350953771355, "loss": 1.4398, "step": 2977 }, { "epoch": 0.9309159112222569, "grad_norm": 0.236328125, "learning_rate": 0.000174567140079582, "loss": 1.4698, "step": 2978 }, { "epoch": 0.9312285089090341, "grad_norm": 0.2421875, "learning_rate": 0.00017455076611973716, "loss": 1.586, "step": 2979 }, { "epoch": 0.9315411065958112, "grad_norm": 0.2412109375, "learning_rate": 0.00017453438765916758, "loss": 1.4608, "step": 2980 }, { "epoch": 0.9318537042825883, "grad_norm": 0.2392578125, "learning_rate": 0.00017451800469886207, "loss": 1.7327, "step": 2981 }, { "epoch": 0.9321663019693655, "grad_norm": 0.232421875, "learning_rate": 0.0001745016172398096, "loss": 1.7701, "step": 2982 }, { "epoch": 0.9324788996561425, "grad_norm": 0.2421875, "learning_rate": 0.0001744852252829996, "loss": 1.6054, "step": 2983 }, { "epoch": 0.9327914973429197, "grad_norm": 0.2392578125, "learning_rate": 0.00017446882882942162, "loss": 1.7484, "step": 2984 }, { "epoch": 0.9331040950296968, "grad_norm": 0.2373046875, "learning_rate": 0.00017445242788006552, "loss": 1.6647, "step": 2985 }, { "epoch": 0.9334166927164739, "grad_norm": 0.248046875, "learning_rate": 0.0001744360224359215, "loss": 1.6536, "step": 2986 }, { "epoch": 0.933729290403251, "grad_norm": 0.25, "learning_rate": 0.00017441961249797995, "loss": 1.9033, "step": 2987 }, { "epoch": 0.9340418880900281, "grad_norm": 0.24609375, "learning_rate": 0.00017440319806723157, "loss": 1.5145, "step": 2988 }, { "epoch": 0.9343544857768052, "grad_norm": 0.25390625, "learning_rate": 0.0001743867791446673, "loss": 1.6766, "step": 2989 }, { "epoch": 0.9346670834635824, "grad_norm": 0.232421875, "learning_rate": 0.00017437035573127836, "loss": 1.5665, "step": 2990 }, { "epoch": 0.9349796811503595, "grad_norm": 0.2353515625, "learning_rate": 0.00017435392782805628, "loss": 1.7932, "step": 2991 }, { "epoch": 0.9352922788371366, "grad_norm": 0.2490234375, "learning_rate": 0.00017433749543599287, "loss": 1.595, "step": 2992 }, { "epoch": 0.9356048765239138, "grad_norm": 0.228515625, "learning_rate": 0.00017432105855608008, "loss": 1.7333, "step": 2993 }, { "epoch": 0.9359174742106908, "grad_norm": 0.240234375, "learning_rate": 0.0001743046171893103, "loss": 1.6385, "step": 2994 }, { "epoch": 0.936230071897468, "grad_norm": 0.25390625, "learning_rate": 0.0001742881713366761, "loss": 1.7989, "step": 2995 }, { "epoch": 0.936542669584245, "grad_norm": 0.2353515625, "learning_rate": 0.00017427172099917032, "loss": 1.5065, "step": 2996 }, { "epoch": 0.9368552672710222, "grad_norm": 0.244140625, "learning_rate": 0.0001742552661777861, "loss": 1.6564, "step": 2997 }, { "epoch": 0.9371678649577994, "grad_norm": 0.2373046875, "learning_rate": 0.00017423880687351685, "loss": 1.5779, "step": 2998 }, { "epoch": 0.9374804626445764, "grad_norm": 0.2451171875, "learning_rate": 0.0001742223430873562, "loss": 1.7974, "step": 2999 }, { "epoch": 0.9377930603313536, "grad_norm": 0.234375, "learning_rate": 0.0001742058748202981, "loss": 1.4744, "step": 3000 }, { "epoch": 0.9381056580181306, "grad_norm": 0.236328125, "learning_rate": 0.0001741894020733368, "loss": 1.6008, "step": 3001 }, { "epoch": 0.9384182557049078, "grad_norm": 0.248046875, "learning_rate": 0.00017417292484746676, "loss": 1.5435, "step": 3002 }, { "epoch": 0.9387308533916849, "grad_norm": 0.2470703125, "learning_rate": 0.00017415644314368274, "loss": 1.6641, "step": 3003 }, { "epoch": 0.939043451078462, "grad_norm": 0.244140625, "learning_rate": 0.00017413995696297972, "loss": 1.661, "step": 3004 }, { "epoch": 0.9393560487652391, "grad_norm": 0.248046875, "learning_rate": 0.00017412346630635303, "loss": 1.5462, "step": 3005 }, { "epoch": 0.9396686464520163, "grad_norm": 0.263671875, "learning_rate": 0.00017410697117479823, "loss": 1.7804, "step": 3006 }, { "epoch": 0.9399812441387934, "grad_norm": 0.2333984375, "learning_rate": 0.00017409047156931114, "loss": 1.8893, "step": 3007 }, { "epoch": 0.9402938418255705, "grad_norm": 0.248046875, "learning_rate": 0.00017407396749088787, "loss": 1.5371, "step": 3008 }, { "epoch": 0.9406064395123476, "grad_norm": 0.2392578125, "learning_rate": 0.00017405745894052477, "loss": 1.5866, "step": 3009 }, { "epoch": 0.9409190371991247, "grad_norm": 0.24609375, "learning_rate": 0.00017404094591921853, "loss": 1.5388, "step": 3010 }, { "epoch": 0.9412316348859019, "grad_norm": 0.25390625, "learning_rate": 0.00017402442842796604, "loss": 1.438, "step": 3011 }, { "epoch": 0.9415442325726789, "grad_norm": 0.251953125, "learning_rate": 0.00017400790646776443, "loss": 1.892, "step": 3012 }, { "epoch": 0.9418568302594561, "grad_norm": 0.24609375, "learning_rate": 0.00017399138003961124, "loss": 1.4763, "step": 3013 }, { "epoch": 0.9421694279462332, "grad_norm": 0.25, "learning_rate": 0.0001739748491445041, "loss": 1.6418, "step": 3014 }, { "epoch": 0.9424820256330103, "grad_norm": 0.240234375, "learning_rate": 0.00017395831378344112, "loss": 1.7746, "step": 3015 }, { "epoch": 0.9427946233197875, "grad_norm": 0.236328125, "learning_rate": 0.00017394177395742047, "loss": 1.8002, "step": 3016 }, { "epoch": 0.9431072210065645, "grad_norm": 0.2412109375, "learning_rate": 0.00017392522966744068, "loss": 1.686, "step": 3017 }, { "epoch": 0.9434198186933417, "grad_norm": 0.2392578125, "learning_rate": 0.00017390868091450055, "loss": 1.6964, "step": 3018 }, { "epoch": 0.9437324163801187, "grad_norm": 0.240234375, "learning_rate": 0.00017389212769959922, "loss": 1.656, "step": 3019 }, { "epoch": 0.9440450140668959, "grad_norm": 0.240234375, "learning_rate": 0.00017387557002373596, "loss": 1.6357, "step": 3020 }, { "epoch": 0.944357611753673, "grad_norm": 0.236328125, "learning_rate": 0.00017385900788791038, "loss": 1.8136, "step": 3021 }, { "epoch": 0.9446702094404501, "grad_norm": 0.236328125, "learning_rate": 0.00017384244129312239, "loss": 1.5841, "step": 3022 }, { "epoch": 0.9449828071272273, "grad_norm": 0.23828125, "learning_rate": 0.00017382587024037212, "loss": 1.5595, "step": 3023 }, { "epoch": 0.9452954048140044, "grad_norm": 0.248046875, "learning_rate": 0.00017380929473066, "loss": 1.6447, "step": 3024 }, { "epoch": 0.9456080025007815, "grad_norm": 0.2421875, "learning_rate": 0.00017379271476498665, "loss": 1.6323, "step": 3025 }, { "epoch": 0.9459206001875586, "grad_norm": 0.25, "learning_rate": 0.00017377613034435315, "loss": 1.62, "step": 3026 }, { "epoch": 0.9462331978743357, "grad_norm": 0.2451171875, "learning_rate": 0.00017375954146976058, "loss": 1.4751, "step": 3027 }, { "epoch": 0.9465457955611128, "grad_norm": 0.2431640625, "learning_rate": 0.00017374294814221055, "loss": 2.2368, "step": 3028 }, { "epoch": 0.94685839324789, "grad_norm": 0.2412109375, "learning_rate": 0.00017372635036270472, "loss": 1.7495, "step": 3029 }, { "epoch": 0.9471709909346671, "grad_norm": 0.2373046875, "learning_rate": 0.0001737097481322452, "loss": 1.9299, "step": 3030 }, { "epoch": 0.9474835886214442, "grad_norm": 0.2392578125, "learning_rate": 0.00017369314145183426, "loss": 1.5842, "step": 3031 }, { "epoch": 0.9477961863082213, "grad_norm": 0.240234375, "learning_rate": 0.00017367653032247446, "loss": 1.6439, "step": 3032 }, { "epoch": 0.9481087839949984, "grad_norm": 0.24609375, "learning_rate": 0.0001736599147451686, "loss": 1.7489, "step": 3033 }, { "epoch": 0.9484213816817756, "grad_norm": 0.2431640625, "learning_rate": 0.00017364329472091986, "loss": 1.6981, "step": 3034 }, { "epoch": 0.9487339793685526, "grad_norm": 0.24609375, "learning_rate": 0.0001736266702507316, "loss": 1.98, "step": 3035 }, { "epoch": 0.9490465770553298, "grad_norm": 0.24609375, "learning_rate": 0.0001736100413356074, "loss": 1.5686, "step": 3036 }, { "epoch": 0.949359174742107, "grad_norm": 0.2421875, "learning_rate": 0.00017359340797655116, "loss": 1.6756, "step": 3037 }, { "epoch": 0.949671772428884, "grad_norm": 0.2431640625, "learning_rate": 0.00017357677017456715, "loss": 1.6345, "step": 3038 }, { "epoch": 0.9499843701156612, "grad_norm": 0.2451171875, "learning_rate": 0.00017356012793065976, "loss": 1.6958, "step": 3039 }, { "epoch": 0.9502969678024382, "grad_norm": 0.234375, "learning_rate": 0.0001735434812458337, "loss": 1.6856, "step": 3040 }, { "epoch": 0.9506095654892154, "grad_norm": 0.2412109375, "learning_rate": 0.00017352683012109395, "loss": 1.6888, "step": 3041 }, { "epoch": 0.9509221631759925, "grad_norm": 0.25, "learning_rate": 0.0001735101745574458, "loss": 1.7944, "step": 3042 }, { "epoch": 0.9512347608627696, "grad_norm": 0.244140625, "learning_rate": 0.0001734935145558947, "loss": 1.4633, "step": 3043 }, { "epoch": 0.9515473585495468, "grad_norm": 0.251953125, "learning_rate": 0.0001734768501174465, "loss": 1.5549, "step": 3044 }, { "epoch": 0.9518599562363238, "grad_norm": 0.24609375, "learning_rate": 0.00017346018124310723, "loss": 1.6942, "step": 3045 }, { "epoch": 0.952172553923101, "grad_norm": 0.232421875, "learning_rate": 0.0001734435079338832, "loss": 1.8094, "step": 3046 }, { "epoch": 0.9524851516098781, "grad_norm": 0.244140625, "learning_rate": 0.00017342683019078102, "loss": 1.6422, "step": 3047 }, { "epoch": 0.9527977492966552, "grad_norm": 0.2470703125, "learning_rate": 0.00017341014801480748, "loss": 1.4798, "step": 3048 }, { "epoch": 0.9531103469834323, "grad_norm": 0.2353515625, "learning_rate": 0.0001733934614069698, "loss": 1.6282, "step": 3049 }, { "epoch": 0.9534229446702095, "grad_norm": 0.23828125, "learning_rate": 0.00017337677036827534, "loss": 1.5165, "step": 3050 }, { "epoch": 0.9537355423569865, "grad_norm": 0.248046875, "learning_rate": 0.00017336007489973171, "loss": 1.6635, "step": 3051 }, { "epoch": 0.9540481400437637, "grad_norm": 0.2578125, "learning_rate": 0.00017334337500234687, "loss": 1.7504, "step": 3052 }, { "epoch": 0.9543607377305408, "grad_norm": 0.25, "learning_rate": 0.00017332667067712905, "loss": 1.8412, "step": 3053 }, { "epoch": 0.9546733354173179, "grad_norm": 0.2421875, "learning_rate": 0.0001733099619250867, "loss": 1.616, "step": 3054 }, { "epoch": 0.9549859331040951, "grad_norm": 0.2353515625, "learning_rate": 0.00017329324874722847, "loss": 1.7954, "step": 3055 }, { "epoch": 0.9552985307908721, "grad_norm": 0.2421875, "learning_rate": 0.00017327653114456343, "loss": 1.6591, "step": 3056 }, { "epoch": 0.9556111284776493, "grad_norm": 0.240234375, "learning_rate": 0.00017325980911810085, "loss": 1.6327, "step": 3057 }, { "epoch": 0.9559237261644263, "grad_norm": 0.24609375, "learning_rate": 0.00017324308266885026, "loss": 1.5621, "step": 3058 }, { "epoch": 0.9562363238512035, "grad_norm": 0.2578125, "learning_rate": 0.00017322635179782138, "loss": 2.0408, "step": 3059 }, { "epoch": 0.9565489215379807, "grad_norm": 0.23828125, "learning_rate": 0.00017320961650602436, "loss": 1.5293, "step": 3060 }, { "epoch": 0.9568615192247577, "grad_norm": 0.2373046875, "learning_rate": 0.00017319287679446949, "loss": 1.5787, "step": 3061 }, { "epoch": 0.9571741169115349, "grad_norm": 0.23828125, "learning_rate": 0.0001731761326641674, "loss": 1.6182, "step": 3062 }, { "epoch": 0.957486714598312, "grad_norm": 0.244140625, "learning_rate": 0.0001731593841161289, "loss": 1.6671, "step": 3063 }, { "epoch": 0.9577993122850891, "grad_norm": 0.23828125, "learning_rate": 0.00017314263115136516, "loss": 1.6618, "step": 3064 }, { "epoch": 0.9581119099718662, "grad_norm": 0.25, "learning_rate": 0.00017312587377088756, "loss": 1.6887, "step": 3065 }, { "epoch": 0.9584245076586433, "grad_norm": 0.2490234375, "learning_rate": 0.00017310911197570777, "loss": 1.6217, "step": 3066 }, { "epoch": 0.9587371053454204, "grad_norm": 0.240234375, "learning_rate": 0.00017309234576683778, "loss": 1.7303, "step": 3067 }, { "epoch": 0.9590497030321976, "grad_norm": 0.25, "learning_rate": 0.0001730755751452897, "loss": 1.6497, "step": 3068 }, { "epoch": 0.9593623007189747, "grad_norm": 0.228515625, "learning_rate": 0.000173058800112076, "loss": 1.8203, "step": 3069 }, { "epoch": 0.9596748984057518, "grad_norm": 0.255859375, "learning_rate": 0.00017304202066820948, "loss": 2.1236, "step": 3070 }, { "epoch": 0.9599874960925289, "grad_norm": 0.2392578125, "learning_rate": 0.0001730252368147031, "loss": 1.7534, "step": 3071 }, { "epoch": 0.960300093779306, "grad_norm": 0.25390625, "learning_rate": 0.00017300844855257008, "loss": 1.6816, "step": 3072 }, { "epoch": 0.9606126914660832, "grad_norm": 0.2373046875, "learning_rate": 0.000172991655882824, "loss": 1.5992, "step": 3073 }, { "epoch": 0.9609252891528602, "grad_norm": 0.24609375, "learning_rate": 0.00017297485880647862, "loss": 1.8889, "step": 3074 }, { "epoch": 0.9612378868396374, "grad_norm": 0.240234375, "learning_rate": 0.00017295805732454804, "loss": 1.6511, "step": 3075 }, { "epoch": 0.9615504845264145, "grad_norm": 0.265625, "learning_rate": 0.00017294125143804657, "loss": 1.7686, "step": 3076 }, { "epoch": 0.9618630822131916, "grad_norm": 0.26953125, "learning_rate": 0.0001729244411479888, "loss": 1.7564, "step": 3077 }, { "epoch": 0.9621756798999688, "grad_norm": 0.23046875, "learning_rate": 0.0001729076264553896, "loss": 1.6458, "step": 3078 }, { "epoch": 0.9624882775867458, "grad_norm": 0.2412109375, "learning_rate": 0.00017289080736126409, "loss": 1.698, "step": 3079 }, { "epoch": 0.962800875273523, "grad_norm": 0.25, "learning_rate": 0.00017287398386662764, "loss": 1.684, "step": 3080 }, { "epoch": 0.9631134729603001, "grad_norm": 0.234375, "learning_rate": 0.0001728571559724959, "loss": 1.7003, "step": 3081 }, { "epoch": 0.9634260706470772, "grad_norm": 0.2373046875, "learning_rate": 0.00017284032367988482, "loss": 1.5827, "step": 3082 }, { "epoch": 0.9637386683338544, "grad_norm": 0.240234375, "learning_rate": 0.0001728234869898106, "loss": 1.7952, "step": 3083 }, { "epoch": 0.9640512660206314, "grad_norm": 0.2470703125, "learning_rate": 0.00017280664590328966, "loss": 1.5528, "step": 3084 }, { "epoch": 0.9643638637074086, "grad_norm": 0.25, "learning_rate": 0.0001727898004213387, "loss": 1.8732, "step": 3085 }, { "epoch": 0.9646764613941857, "grad_norm": 0.234375, "learning_rate": 0.00017277295054497478, "loss": 1.5453, "step": 3086 }, { "epoch": 0.9649890590809628, "grad_norm": 0.25, "learning_rate": 0.00017275609627521508, "loss": 1.8652, "step": 3087 }, { "epoch": 0.9653016567677399, "grad_norm": 0.23828125, "learning_rate": 0.00017273923761307712, "loss": 1.5761, "step": 3088 }, { "epoch": 0.965614254454517, "grad_norm": 0.232421875, "learning_rate": 0.00017272237455957868, "loss": 1.3679, "step": 3089 }, { "epoch": 0.9659268521412941, "grad_norm": 0.365234375, "learning_rate": 0.00017270550711573788, "loss": 2.1864, "step": 3090 }, { "epoch": 0.9662394498280713, "grad_norm": 0.25390625, "learning_rate": 0.0001726886352825729, "loss": 1.8203, "step": 3091 }, { "epoch": 0.9665520475148484, "grad_norm": 0.236328125, "learning_rate": 0.0001726717590611024, "loss": 1.6397, "step": 3092 }, { "epoch": 0.9668646452016255, "grad_norm": 0.2412109375, "learning_rate": 0.00017265487845234524, "loss": 1.7298, "step": 3093 }, { "epoch": 0.9671772428884027, "grad_norm": 0.2392578125, "learning_rate": 0.00017263799345732043, "loss": 1.4412, "step": 3094 }, { "epoch": 0.9674898405751797, "grad_norm": 0.2578125, "learning_rate": 0.0001726211040770474, "loss": 1.6235, "step": 3095 }, { "epoch": 0.9678024382619569, "grad_norm": 0.2392578125, "learning_rate": 0.0001726042103125458, "loss": 1.4866, "step": 3096 }, { "epoch": 0.9681150359487339, "grad_norm": 0.25, "learning_rate": 0.0001725873121648355, "loss": 1.8129, "step": 3097 }, { "epoch": 0.9684276336355111, "grad_norm": 0.244140625, "learning_rate": 0.00017257040963493663, "loss": 1.7193, "step": 3098 }, { "epoch": 0.9687402313222883, "grad_norm": 0.2275390625, "learning_rate": 0.00017255350272386968, "loss": 1.6863, "step": 3099 }, { "epoch": 0.9690528290090653, "grad_norm": 0.2353515625, "learning_rate": 0.00017253659143265534, "loss": 1.5868, "step": 3100 }, { "epoch": 0.9693654266958425, "grad_norm": 0.26171875, "learning_rate": 0.00017251967576231448, "loss": 1.9038, "step": 3101 }, { "epoch": 0.9696780243826195, "grad_norm": 0.2490234375, "learning_rate": 0.0001725027557138684, "loss": 1.5963, "step": 3102 }, { "epoch": 0.9699906220693967, "grad_norm": 0.25390625, "learning_rate": 0.0001724858312883386, "loss": 1.9158, "step": 3103 }, { "epoch": 0.9703032197561738, "grad_norm": 0.2431640625, "learning_rate": 0.0001724689024867468, "loss": 1.7879, "step": 3104 }, { "epoch": 0.9706158174429509, "grad_norm": 0.349609375, "learning_rate": 0.00017245196931011495, "loss": 2.2104, "step": 3105 }, { "epoch": 0.970928415129728, "grad_norm": 0.25390625, "learning_rate": 0.00017243503175946542, "loss": 1.3733, "step": 3106 }, { "epoch": 0.9712410128165052, "grad_norm": 0.26171875, "learning_rate": 0.0001724180898358207, "loss": 1.8072, "step": 3107 }, { "epoch": 0.9715536105032823, "grad_norm": 0.25, "learning_rate": 0.00017240114354020368, "loss": 1.6554, "step": 3108 }, { "epoch": 0.9718662081900594, "grad_norm": 0.232421875, "learning_rate": 0.0001723841928736373, "loss": 1.8434, "step": 3109 }, { "epoch": 0.9721788058768365, "grad_norm": 0.251953125, "learning_rate": 0.00017236723783714496, "loss": 1.7078, "step": 3110 }, { "epoch": 0.9724914035636136, "grad_norm": 0.248046875, "learning_rate": 0.00017235027843175027, "loss": 1.4973, "step": 3111 }, { "epoch": 0.9728040012503908, "grad_norm": 0.23828125, "learning_rate": 0.00017233331465847705, "loss": 2.0236, "step": 3112 }, { "epoch": 0.9731165989371678, "grad_norm": 0.248046875, "learning_rate": 0.00017231634651834946, "loss": 1.55, "step": 3113 }, { "epoch": 0.973429196623945, "grad_norm": 0.240234375, "learning_rate": 0.00017229937401239188, "loss": 1.5074, "step": 3114 }, { "epoch": 0.973741794310722, "grad_norm": 0.232421875, "learning_rate": 0.00017228239714162896, "loss": 1.4308, "step": 3115 }, { "epoch": 0.9740543919974992, "grad_norm": 0.2412109375, "learning_rate": 0.00017226541590708566, "loss": 1.8249, "step": 3116 }, { "epoch": 0.9743669896842764, "grad_norm": 0.2451171875, "learning_rate": 0.00017224843030978705, "loss": 1.8337, "step": 3117 }, { "epoch": 0.9746795873710534, "grad_norm": 0.26171875, "learning_rate": 0.00017223144035075864, "loss": 1.7211, "step": 3118 }, { "epoch": 0.9749921850578306, "grad_norm": 0.25, "learning_rate": 0.00017221444603102617, "loss": 1.7391, "step": 3119 }, { "epoch": 0.9753047827446076, "grad_norm": 0.2470703125, "learning_rate": 0.00017219744735161554, "loss": 2.0078, "step": 3120 }, { "epoch": 0.9756173804313848, "grad_norm": 0.2412109375, "learning_rate": 0.000172180444313553, "loss": 1.7833, "step": 3121 }, { "epoch": 0.975929978118162, "grad_norm": 0.251953125, "learning_rate": 0.00017216343691786509, "loss": 1.508, "step": 3122 }, { "epoch": 0.976242575804939, "grad_norm": 0.251953125, "learning_rate": 0.0001721464251655785, "loss": 2.0652, "step": 3123 }, { "epoch": 0.9765551734917162, "grad_norm": 0.2421875, "learning_rate": 0.0001721294090577203, "loss": 1.5267, "step": 3124 }, { "epoch": 0.9768677711784933, "grad_norm": 0.2373046875, "learning_rate": 0.00017211238859531774, "loss": 1.838, "step": 3125 }, { "epoch": 0.9771803688652704, "grad_norm": 0.234375, "learning_rate": 0.00017209536377939846, "loss": 1.7286, "step": 3126 }, { "epoch": 0.9774929665520475, "grad_norm": 0.2412109375, "learning_rate": 0.0001720783346109901, "loss": 1.8045, "step": 3127 }, { "epoch": 0.9778055642388246, "grad_norm": 0.2353515625, "learning_rate": 0.0001720613010911209, "loss": 1.712, "step": 3128 }, { "epoch": 0.9781181619256017, "grad_norm": 0.234375, "learning_rate": 0.0001720442632208191, "loss": 1.5521, "step": 3129 }, { "epoch": 0.9784307596123789, "grad_norm": 0.23828125, "learning_rate": 0.0001720272210011133, "loss": 1.7718, "step": 3130 }, { "epoch": 0.978743357299156, "grad_norm": 0.2578125, "learning_rate": 0.00017201017443303242, "loss": 1.4686, "step": 3131 }, { "epoch": 0.9790559549859331, "grad_norm": 0.251953125, "learning_rate": 0.00017199312351760555, "loss": 1.6478, "step": 3132 }, { "epoch": 0.9793685526727102, "grad_norm": 0.228515625, "learning_rate": 0.00017197606825586204, "loss": 1.4012, "step": 3133 }, { "epoch": 0.9796811503594873, "grad_norm": 0.24609375, "learning_rate": 0.00017195900864883158, "loss": 1.6166, "step": 3134 }, { "epoch": 0.9799937480462645, "grad_norm": 0.255859375, "learning_rate": 0.00017194194469754407, "loss": 1.7632, "step": 3135 }, { "epoch": 0.9803063457330415, "grad_norm": 0.248046875, "learning_rate": 0.00017192487640302969, "loss": 1.497, "step": 3136 }, { "epoch": 0.9806189434198187, "grad_norm": 0.25, "learning_rate": 0.00017190780376631886, "loss": 1.756, "step": 3137 }, { "epoch": 0.9809315411065959, "grad_norm": 0.2451171875, "learning_rate": 0.0001718907267884423, "loss": 1.5489, "step": 3138 }, { "epoch": 0.9812441387933729, "grad_norm": 0.26171875, "learning_rate": 0.00017187364547043091, "loss": 1.5929, "step": 3139 }, { "epoch": 0.9815567364801501, "grad_norm": 0.244140625, "learning_rate": 0.000171856559813316, "loss": 1.7889, "step": 3140 }, { "epoch": 0.9818693341669271, "grad_norm": 0.234375, "learning_rate": 0.00017183946981812897, "loss": 1.4263, "step": 3141 }, { "epoch": 0.9821819318537043, "grad_norm": 0.259765625, "learning_rate": 0.00017182237548590162, "loss": 1.8588, "step": 3142 }, { "epoch": 0.9824945295404814, "grad_norm": 0.2431640625, "learning_rate": 0.00017180527681766593, "loss": 1.7062, "step": 3143 }, { "epoch": 0.9828071272272585, "grad_norm": 0.2373046875, "learning_rate": 0.00017178817381445418, "loss": 1.5145, "step": 3144 }, { "epoch": 0.9831197249140357, "grad_norm": 0.2431640625, "learning_rate": 0.0001717710664772989, "loss": 1.6806, "step": 3145 }, { "epoch": 0.9834323226008127, "grad_norm": 0.2373046875, "learning_rate": 0.00017175395480723286, "loss": 1.9361, "step": 3146 }, { "epoch": 0.9837449202875899, "grad_norm": 0.23828125, "learning_rate": 0.00017173683880528917, "loss": 1.5781, "step": 3147 }, { "epoch": 0.984057517974367, "grad_norm": 0.2451171875, "learning_rate": 0.00017171971847250106, "loss": 1.5337, "step": 3148 }, { "epoch": 0.9843701156611441, "grad_norm": 0.24609375, "learning_rate": 0.00017170259380990216, "loss": 1.8557, "step": 3149 }, { "epoch": 0.9846827133479212, "grad_norm": 0.2470703125, "learning_rate": 0.00017168546481852634, "loss": 1.735, "step": 3150 }, { "epoch": 0.9849953110346984, "grad_norm": 0.234375, "learning_rate": 0.00017166833149940763, "loss": 1.6696, "step": 3151 }, { "epoch": 0.9853079087214754, "grad_norm": 0.2392578125, "learning_rate": 0.00017165119385358045, "loss": 1.5103, "step": 3152 }, { "epoch": 0.9856205064082526, "grad_norm": 0.232421875, "learning_rate": 0.00017163405188207932, "loss": 1.3137, "step": 3153 }, { "epoch": 0.9859331040950297, "grad_norm": 0.3125, "learning_rate": 0.00017161690558593925, "loss": 2.1945, "step": 3154 }, { "epoch": 0.9862457017818068, "grad_norm": 0.251953125, "learning_rate": 0.0001715997549661953, "loss": 1.8129, "step": 3155 }, { "epoch": 0.986558299468584, "grad_norm": 0.2431640625, "learning_rate": 0.00017158260002388294, "loss": 1.7308, "step": 3156 }, { "epoch": 0.986870897155361, "grad_norm": 0.240234375, "learning_rate": 0.00017156544076003778, "loss": 1.7969, "step": 3157 }, { "epoch": 0.9871834948421382, "grad_norm": 0.25, "learning_rate": 0.00017154827717569577, "loss": 1.5541, "step": 3158 }, { "epoch": 0.9874960925289152, "grad_norm": 0.2431640625, "learning_rate": 0.00017153110927189307, "loss": 1.6279, "step": 3159 }, { "epoch": 0.9878086902156924, "grad_norm": 0.2578125, "learning_rate": 0.00017151393704966617, "loss": 1.7777, "step": 3160 }, { "epoch": 0.9881212879024696, "grad_norm": 0.240234375, "learning_rate": 0.00017149676051005176, "loss": 1.7864, "step": 3161 }, { "epoch": 0.9884338855892466, "grad_norm": 0.263671875, "learning_rate": 0.0001714795796540868, "loss": 1.8507, "step": 3162 }, { "epoch": 0.9887464832760238, "grad_norm": 0.2373046875, "learning_rate": 0.00017146239448280853, "loss": 1.5787, "step": 3163 }, { "epoch": 0.9890590809628009, "grad_norm": 0.25, "learning_rate": 0.00017144520499725444, "loss": 1.6532, "step": 3164 }, { "epoch": 0.989371678649578, "grad_norm": 0.265625, "learning_rate": 0.00017142801119846227, "loss": 1.4543, "step": 3165 }, { "epoch": 0.9896842763363551, "grad_norm": 0.240234375, "learning_rate": 0.00017141081308747003, "loss": 1.639, "step": 3166 }, { "epoch": 0.9899968740231322, "grad_norm": 0.26171875, "learning_rate": 0.00017139361066531605, "loss": 1.6788, "step": 3167 }, { "epoch": 0.9903094717099094, "grad_norm": 0.25390625, "learning_rate": 0.00017137640393303878, "loss": 1.5768, "step": 3168 }, { "epoch": 0.9906220693966865, "grad_norm": 0.2294921875, "learning_rate": 0.00017135919289167707, "loss": 1.6102, "step": 3169 }, { "epoch": 0.9909346670834636, "grad_norm": 0.255859375, "learning_rate": 0.00017134197754226996, "loss": 1.5106, "step": 3170 }, { "epoch": 0.9912472647702407, "grad_norm": 0.24609375, "learning_rate": 0.00017132475788585674, "loss": 1.4294, "step": 3171 }, { "epoch": 0.9915598624570178, "grad_norm": 0.2490234375, "learning_rate": 0.00017130753392347698, "loss": 1.552, "step": 3172 }, { "epoch": 0.9918724601437949, "grad_norm": 0.2421875, "learning_rate": 0.00017129030565617053, "loss": 1.4553, "step": 3173 }, { "epoch": 0.9921850578305721, "grad_norm": 0.2333984375, "learning_rate": 0.00017127307308497752, "loss": 1.6594, "step": 3174 }, { "epoch": 0.9924976555173491, "grad_norm": 0.2373046875, "learning_rate": 0.0001712558362109382, "loss": 1.7315, "step": 3175 }, { "epoch": 0.9928102532041263, "grad_norm": 0.248046875, "learning_rate": 0.0001712385950350933, "loss": 1.5794, "step": 3176 }, { "epoch": 0.9931228508909034, "grad_norm": 0.240234375, "learning_rate": 0.0001712213495584836, "loss": 1.7619, "step": 3177 }, { "epoch": 0.9934354485776805, "grad_norm": 0.228515625, "learning_rate": 0.00017120409978215034, "loss": 1.6773, "step": 3178 }, { "epoch": 0.9937480462644577, "grad_norm": 0.2294921875, "learning_rate": 0.00017118684570713476, "loss": 1.5635, "step": 3179 }, { "epoch": 0.9940606439512347, "grad_norm": 0.25390625, "learning_rate": 0.00017116958733447862, "loss": 1.8061, "step": 3180 }, { "epoch": 0.9943732416380119, "grad_norm": 0.2451171875, "learning_rate": 0.00017115232466522379, "loss": 1.496, "step": 3181 }, { "epoch": 0.994685839324789, "grad_norm": 0.283203125, "learning_rate": 0.0001711350577004125, "loss": 1.9932, "step": 3182 }, { "epoch": 0.9949984370115661, "grad_norm": 0.2373046875, "learning_rate": 0.00017111778644108707, "loss": 1.7719, "step": 3183 }, { "epoch": 0.9953110346983433, "grad_norm": 0.234375, "learning_rate": 0.00017110051088829023, "loss": 1.9202, "step": 3184 }, { "epoch": 0.9956236323851203, "grad_norm": 0.2392578125, "learning_rate": 0.000171083231043065, "loss": 1.7274, "step": 3185 }, { "epoch": 0.9959362300718975, "grad_norm": 0.2412109375, "learning_rate": 0.00017106594690645454, "loss": 1.6006, "step": 3186 }, { "epoch": 0.9962488277586746, "grad_norm": 0.25, "learning_rate": 0.00017104865847950224, "loss": 1.8627, "step": 3187 }, { "epoch": 0.9965614254454517, "grad_norm": 0.234375, "learning_rate": 0.00017103136576325194, "loss": 1.6147, "step": 3188 }, { "epoch": 0.9968740231322288, "grad_norm": 0.2412109375, "learning_rate": 0.00017101406875874754, "loss": 1.8255, "step": 3189 }, { "epoch": 0.9971866208190059, "grad_norm": 0.25390625, "learning_rate": 0.0001709967674670333, "loss": 1.6937, "step": 3190 }, { "epoch": 0.997499218505783, "grad_norm": 0.2431640625, "learning_rate": 0.0001709794618891538, "loss": 1.7125, "step": 3191 }, { "epoch": 0.9978118161925602, "grad_norm": 0.2470703125, "learning_rate": 0.0001709621520261537, "loss": 1.7602, "step": 3192 }, { "epoch": 0.9981244138793373, "grad_norm": 0.2412109375, "learning_rate": 0.00017094483787907804, "loss": 1.8293, "step": 3193 }, { "epoch": 0.9984370115661144, "grad_norm": 0.2314453125, "learning_rate": 0.00017092751944897214, "loss": 2.0362, "step": 3194 }, { "epoch": 0.9987496092528916, "grad_norm": 0.25, "learning_rate": 0.00017091019673688148, "loss": 1.8003, "step": 3195 }, { "epoch": 0.9990622069396686, "grad_norm": 0.2353515625, "learning_rate": 0.0001708928697438519, "loss": 1.6969, "step": 3196 }, { "epoch": 0.9993748046264458, "grad_norm": 0.2470703125, "learning_rate": 0.00017087553847092943, "loss": 1.4631, "step": 3197 }, { "epoch": 0.9996874023132228, "grad_norm": 0.248046875, "learning_rate": 0.0001708582029191604, "loss": 2.0063, "step": 3198 }, { "epoch": 1.0, "grad_norm": 0.2255859375, "learning_rate": 0.00017084086308959132, "loss": 1.4657, "step": 3199 } ], "logging_steps": 1, "max_steps": 12796, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 3199, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.538493341728768e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }