{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9936938658513281, "eval_steps": 1000, "global_step": 6500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001528759793617428, "grad_norm": 3.21875, "learning_rate": 7.733526299433335e-05, "loss": 2.4364, "step": 1 }, { "epoch": 0.0003057519587234856, "grad_norm": 2.125, "learning_rate": 7.733091498767206e-05, "loss": 2.0134, "step": 2 }, { "epoch": 0.00045862793808522834, "grad_norm": 1.4609375, "learning_rate": 7.732656702618828e-05, "loss": 1.9252, "step": 3 }, { "epoch": 0.0006115039174469712, "grad_norm": 1.5625, "learning_rate": 7.732221910989054e-05, "loss": 1.8263, "step": 4 }, { "epoch": 0.0007643798968087139, "grad_norm": 1.6328125, "learning_rate": 7.731787123878758e-05, "loss": 1.9568, "step": 5 }, { "epoch": 0.0009172558761704567, "grad_norm": 1.59375, "learning_rate": 7.73135234128881e-05, "loss": 1.8759, "step": 6 }, { "epoch": 0.0010701318555321995, "grad_norm": 1.546875, "learning_rate": 7.730917563220064e-05, "loss": 1.8277, "step": 7 }, { "epoch": 0.0012230078348939423, "grad_norm": 1.390625, "learning_rate": 7.730482789673401e-05, "loss": 1.8652, "step": 8 }, { "epoch": 0.001375883814255685, "grad_norm": 1.5234375, "learning_rate": 7.730048020649677e-05, "loss": 1.9619, "step": 9 }, { "epoch": 0.0015287597936174278, "grad_norm": 1.46875, "learning_rate": 7.729613256149764e-05, "loss": 2.1459, "step": 10 }, { "epoch": 0.0016816357729791706, "grad_norm": 1.6484375, "learning_rate": 7.729178496174525e-05, "loss": 2.1392, "step": 11 }, { "epoch": 0.0018345117523409134, "grad_norm": 1.5390625, "learning_rate": 7.72874374072483e-05, "loss": 1.9957, "step": 12 }, { "epoch": 0.001987387731702656, "grad_norm": 1.4609375, "learning_rate": 7.728308989801545e-05, "loss": 1.7992, "step": 13 }, { "epoch": 0.002140263711064399, "grad_norm": 1.625, "learning_rate": 7.72787424340553e-05, "loss": 2.0279, "step": 14 }, { "epoch": 0.0022931396904261416, "grad_norm": 1.4140625, "learning_rate": 7.727439501537662e-05, "loss": 1.9713, "step": 15 }, { "epoch": 0.0024460156697878846, "grad_norm": 1.625, "learning_rate": 7.727004764198797e-05, "loss": 2.0608, "step": 16 }, { "epoch": 0.002598891649149627, "grad_norm": 1.40625, "learning_rate": 7.72657003138981e-05, "loss": 2.0371, "step": 17 }, { "epoch": 0.00275176762851137, "grad_norm": 1.5546875, "learning_rate": 7.726135303111558e-05, "loss": 1.9959, "step": 18 }, { "epoch": 0.002904643607873113, "grad_norm": 1.484375, "learning_rate": 7.72570057936492e-05, "loss": 2.0808, "step": 19 }, { "epoch": 0.0030575195872348557, "grad_norm": 1.890625, "learning_rate": 7.725265860150752e-05, "loss": 2.1174, "step": 20 }, { "epoch": 0.0032103955665965986, "grad_norm": 1.6015625, "learning_rate": 7.724831145469922e-05, "loss": 2.1022, "step": 21 }, { "epoch": 0.003363271545958341, "grad_norm": 1.546875, "learning_rate": 7.724396435323302e-05, "loss": 2.0764, "step": 22 }, { "epoch": 0.003516147525320084, "grad_norm": 1.4765625, "learning_rate": 7.723961729711749e-05, "loss": 1.9716, "step": 23 }, { "epoch": 0.0036690235046818267, "grad_norm": 1.46875, "learning_rate": 7.723527028636141e-05, "loss": 1.9012, "step": 24 }, { "epoch": 0.0038218994840435697, "grad_norm": 1.6015625, "learning_rate": 7.723092332097333e-05, "loss": 2.2086, "step": 25 }, { "epoch": 0.003974775463405312, "grad_norm": 1.34375, "learning_rate": 7.722657640096198e-05, "loss": 1.6896, "step": 26 }, { "epoch": 0.004127651442767055, "grad_norm": 1.59375, "learning_rate": 7.722222952633603e-05, "loss": 1.9199, "step": 27 }, { "epoch": 0.004280527422128798, "grad_norm": 1.6171875, "learning_rate": 7.721788269710408e-05, "loss": 1.8723, "step": 28 }, { "epoch": 0.004433403401490541, "grad_norm": 1.3828125, "learning_rate": 7.721353591327487e-05, "loss": 1.6465, "step": 29 }, { "epoch": 0.004586279380852283, "grad_norm": 1.453125, "learning_rate": 7.720918917485697e-05, "loss": 1.6678, "step": 30 }, { "epoch": 0.004739155360214026, "grad_norm": 1.6484375, "learning_rate": 7.720484248185914e-05, "loss": 2.0498, "step": 31 }, { "epoch": 0.004892031339575769, "grad_norm": 1.5234375, "learning_rate": 7.720049583428996e-05, "loss": 1.8288, "step": 32 }, { "epoch": 0.005044907318937512, "grad_norm": 1.4609375, "learning_rate": 7.71961492321582e-05, "loss": 1.9382, "step": 33 }, { "epoch": 0.005197783298299254, "grad_norm": 1.3359375, "learning_rate": 7.719180267547242e-05, "loss": 1.6422, "step": 34 }, { "epoch": 0.005350659277660997, "grad_norm": 1.484375, "learning_rate": 7.71874561642413e-05, "loss": 2.0776, "step": 35 }, { "epoch": 0.00550353525702274, "grad_norm": 1.453125, "learning_rate": 7.718310969847353e-05, "loss": 1.9679, "step": 36 }, { "epoch": 0.005656411236384483, "grad_norm": 1.5078125, "learning_rate": 7.717876327817776e-05, "loss": 1.7803, "step": 37 }, { "epoch": 0.005809287215746226, "grad_norm": 1.515625, "learning_rate": 7.717441690336268e-05, "loss": 2.1271, "step": 38 }, { "epoch": 0.005962163195107968, "grad_norm": 1.7890625, "learning_rate": 7.717007057403691e-05, "loss": 2.0013, "step": 39 }, { "epoch": 0.006115039174469711, "grad_norm": 1.46875, "learning_rate": 7.716572429020909e-05, "loss": 1.8068, "step": 40 }, { "epoch": 0.006267915153831454, "grad_norm": 1.53125, "learning_rate": 7.716137805188796e-05, "loss": 1.7694, "step": 41 }, { "epoch": 0.006420791133193197, "grad_norm": 1.3671875, "learning_rate": 7.715703185908212e-05, "loss": 1.9443, "step": 42 }, { "epoch": 0.006573667112554939, "grad_norm": 1.5625, "learning_rate": 7.715268571180028e-05, "loss": 2.0892, "step": 43 }, { "epoch": 0.006726543091916682, "grad_norm": 1.3203125, "learning_rate": 7.714833961005105e-05, "loss": 1.8182, "step": 44 }, { "epoch": 0.006879419071278425, "grad_norm": 1.5078125, "learning_rate": 7.714399355384312e-05, "loss": 1.954, "step": 45 }, { "epoch": 0.007032295050640168, "grad_norm": 1.3515625, "learning_rate": 7.713964754318516e-05, "loss": 1.7942, "step": 46 }, { "epoch": 0.007185171030001911, "grad_norm": 1.3984375, "learning_rate": 7.713530157808577e-05, "loss": 1.8396, "step": 47 }, { "epoch": 0.007338047009363653, "grad_norm": 1.5078125, "learning_rate": 7.713095565855373e-05, "loss": 1.891, "step": 48 }, { "epoch": 0.007490922988725396, "grad_norm": 1.40625, "learning_rate": 7.712660978459757e-05, "loss": 1.8765, "step": 49 }, { "epoch": 0.007643798968087139, "grad_norm": 1.328125, "learning_rate": 7.712226395622604e-05, "loss": 1.5395, "step": 50 }, { "epoch": 0.007796674947448882, "grad_norm": 1.59375, "learning_rate": 7.711791817344774e-05, "loss": 1.8311, "step": 51 }, { "epoch": 0.007949550926810624, "grad_norm": 1.3671875, "learning_rate": 7.71135724362714e-05, "loss": 1.6623, "step": 52 }, { "epoch": 0.008102426906172367, "grad_norm": 1.328125, "learning_rate": 7.710922674470565e-05, "loss": 1.6881, "step": 53 }, { "epoch": 0.00825530288553411, "grad_norm": 1.515625, "learning_rate": 7.710488109875911e-05, "loss": 1.9919, "step": 54 }, { "epoch": 0.008408178864895853, "grad_norm": 1.359375, "learning_rate": 7.71005354984405e-05, "loss": 1.5439, "step": 55 }, { "epoch": 0.008561054844257596, "grad_norm": 1.3984375, "learning_rate": 7.709618994375842e-05, "loss": 1.9672, "step": 56 }, { "epoch": 0.00871393082361934, "grad_norm": 1.515625, "learning_rate": 7.709184443472161e-05, "loss": 1.9181, "step": 57 }, { "epoch": 0.008866806802981082, "grad_norm": 1.375, "learning_rate": 7.708749897133863e-05, "loss": 1.7991, "step": 58 }, { "epoch": 0.009019682782342824, "grad_norm": 1.484375, "learning_rate": 7.708315355361823e-05, "loss": 2.0441, "step": 59 }, { "epoch": 0.009172558761704567, "grad_norm": 1.4140625, "learning_rate": 7.707880818156904e-05, "loss": 1.8041, "step": 60 }, { "epoch": 0.00932543474106631, "grad_norm": 1.484375, "learning_rate": 7.70744628551997e-05, "loss": 1.8657, "step": 61 }, { "epoch": 0.009478310720428052, "grad_norm": 1.25, "learning_rate": 7.70701175745189e-05, "loss": 1.5892, "step": 62 }, { "epoch": 0.009631186699789795, "grad_norm": 1.4765625, "learning_rate": 7.706577233953524e-05, "loss": 1.941, "step": 63 }, { "epoch": 0.009784062679151538, "grad_norm": 1.4453125, "learning_rate": 7.706142715025748e-05, "loss": 1.8151, "step": 64 }, { "epoch": 0.009936938658513281, "grad_norm": 1.4765625, "learning_rate": 7.705708200669422e-05, "loss": 1.9894, "step": 65 }, { "epoch": 0.010089814637875024, "grad_norm": 1.3828125, "learning_rate": 7.705273690885408e-05, "loss": 1.8463, "step": 66 }, { "epoch": 0.010242690617236767, "grad_norm": 1.3359375, "learning_rate": 7.704839185674582e-05, "loss": 1.7026, "step": 67 }, { "epoch": 0.010395566596598509, "grad_norm": 1.5703125, "learning_rate": 7.704404685037798e-05, "loss": 2.226, "step": 68 }, { "epoch": 0.010548442575960252, "grad_norm": 1.5078125, "learning_rate": 7.703970188975932e-05, "loss": 1.803, "step": 69 }, { "epoch": 0.010701318555321995, "grad_norm": 1.4140625, "learning_rate": 7.703535697489844e-05, "loss": 1.655, "step": 70 }, { "epoch": 0.010854194534683738, "grad_norm": 1.625, "learning_rate": 7.703101210580403e-05, "loss": 1.823, "step": 71 }, { "epoch": 0.01100707051404548, "grad_norm": 1.4921875, "learning_rate": 7.702666728248476e-05, "loss": 1.7719, "step": 72 }, { "epoch": 0.011159946493407224, "grad_norm": 1.421875, "learning_rate": 7.702232250494923e-05, "loss": 1.6856, "step": 73 }, { "epoch": 0.011312822472768966, "grad_norm": 1.4140625, "learning_rate": 7.701797777320615e-05, "loss": 1.7057, "step": 74 }, { "epoch": 0.01146569845213071, "grad_norm": 1.3984375, "learning_rate": 7.701363308726415e-05, "loss": 1.7273, "step": 75 }, { "epoch": 0.011618574431492452, "grad_norm": 1.421875, "learning_rate": 7.700928844713192e-05, "loss": 1.7551, "step": 76 }, { "epoch": 0.011771450410854194, "grad_norm": 1.5703125, "learning_rate": 7.700494385281808e-05, "loss": 1.7993, "step": 77 }, { "epoch": 0.011924326390215937, "grad_norm": 1.4609375, "learning_rate": 7.700059930433137e-05, "loss": 2.07, "step": 78 }, { "epoch": 0.01207720236957768, "grad_norm": 1.3984375, "learning_rate": 7.699625480168034e-05, "loss": 1.8182, "step": 79 }, { "epoch": 0.012230078348939423, "grad_norm": 1.3203125, "learning_rate": 7.699191034487368e-05, "loss": 1.9628, "step": 80 }, { "epoch": 0.012382954328301166, "grad_norm": 1.390625, "learning_rate": 7.69875659339201e-05, "loss": 1.8127, "step": 81 }, { "epoch": 0.012535830307662909, "grad_norm": 1.6484375, "learning_rate": 7.698322156882819e-05, "loss": 2.1261, "step": 82 }, { "epoch": 0.012688706287024652, "grad_norm": 1.421875, "learning_rate": 7.697887724960669e-05, "loss": 1.8611, "step": 83 }, { "epoch": 0.012841582266386395, "grad_norm": 1.4765625, "learning_rate": 7.697453297626415e-05, "loss": 2.0288, "step": 84 }, { "epoch": 0.012994458245748138, "grad_norm": 1.5703125, "learning_rate": 7.697018874880932e-05, "loss": 1.9947, "step": 85 }, { "epoch": 0.013147334225109879, "grad_norm": 1.46875, "learning_rate": 7.696584456725083e-05, "loss": 1.9583, "step": 86 }, { "epoch": 0.013300210204471622, "grad_norm": 1.46875, "learning_rate": 7.69615004315973e-05, "loss": 1.9215, "step": 87 }, { "epoch": 0.013453086183833365, "grad_norm": 1.3359375, "learning_rate": 7.695715634185747e-05, "loss": 1.695, "step": 88 }, { "epoch": 0.013605962163195108, "grad_norm": 1.4453125, "learning_rate": 7.695281229803988e-05, "loss": 1.6786, "step": 89 }, { "epoch": 0.01375883814255685, "grad_norm": 1.375, "learning_rate": 7.69484683001533e-05, "loss": 1.705, "step": 90 }, { "epoch": 0.013911714121918594, "grad_norm": 1.4453125, "learning_rate": 7.694412434820632e-05, "loss": 1.9683, "step": 91 }, { "epoch": 0.014064590101280337, "grad_norm": 1.4609375, "learning_rate": 7.693978044220762e-05, "loss": 1.8605, "step": 92 }, { "epoch": 0.01421746608064208, "grad_norm": 1.5, "learning_rate": 7.693543658216588e-05, "loss": 1.8922, "step": 93 }, { "epoch": 0.014370342060003823, "grad_norm": 1.328125, "learning_rate": 7.693109276808969e-05, "loss": 1.6288, "step": 94 }, { "epoch": 0.014523218039365564, "grad_norm": 1.375, "learning_rate": 7.692674899998778e-05, "loss": 1.7456, "step": 95 }, { "epoch": 0.014676094018727307, "grad_norm": 1.3203125, "learning_rate": 7.692240527786875e-05, "loss": 1.6306, "step": 96 }, { "epoch": 0.01482896999808905, "grad_norm": 1.390625, "learning_rate": 7.69180616017413e-05, "loss": 1.9051, "step": 97 }, { "epoch": 0.014981845977450793, "grad_norm": 1.4375, "learning_rate": 7.691371797161409e-05, "loss": 1.8884, "step": 98 }, { "epoch": 0.015134721956812536, "grad_norm": 1.4765625, "learning_rate": 7.690937438749571e-05, "loss": 1.9103, "step": 99 }, { "epoch": 0.015287597936174279, "grad_norm": 1.3984375, "learning_rate": 7.69050308493949e-05, "loss": 1.7369, "step": 100 }, { "epoch": 0.015440473915536022, "grad_norm": 1.3671875, "learning_rate": 7.690068735732023e-05, "loss": 1.8392, "step": 101 }, { "epoch": 0.015593349894897765, "grad_norm": 1.3515625, "learning_rate": 7.689634391128045e-05, "loss": 1.7701, "step": 102 }, { "epoch": 0.015746225874259508, "grad_norm": 1.484375, "learning_rate": 7.689200051128411e-05, "loss": 2.172, "step": 103 }, { "epoch": 0.01589910185362125, "grad_norm": 1.2578125, "learning_rate": 7.688765715734e-05, "loss": 1.6241, "step": 104 }, { "epoch": 0.016051977832982994, "grad_norm": 1.4375, "learning_rate": 7.688331384945668e-05, "loss": 1.8052, "step": 105 }, { "epoch": 0.016204853812344735, "grad_norm": 1.2890625, "learning_rate": 7.687897058764279e-05, "loss": 1.6121, "step": 106 }, { "epoch": 0.01635772979170648, "grad_norm": 1.515625, "learning_rate": 7.687462737190707e-05, "loss": 1.7721, "step": 107 }, { "epoch": 0.01651060577106822, "grad_norm": 1.515625, "learning_rate": 7.687028420225807e-05, "loss": 1.9341, "step": 108 }, { "epoch": 0.016663481750429962, "grad_norm": 1.5546875, "learning_rate": 7.686594107870458e-05, "loss": 1.8355, "step": 109 }, { "epoch": 0.016816357729791707, "grad_norm": 1.4296875, "learning_rate": 7.68615980012551e-05, "loss": 1.8009, "step": 110 }, { "epoch": 0.016969233709153448, "grad_norm": 1.390625, "learning_rate": 7.685725496991843e-05, "loss": 1.6981, "step": 111 }, { "epoch": 0.017122109688515193, "grad_norm": 1.3671875, "learning_rate": 7.685291198470314e-05, "loss": 1.52, "step": 112 }, { "epoch": 0.017274985667876934, "grad_norm": 1.40625, "learning_rate": 7.68485690456179e-05, "loss": 1.6599, "step": 113 }, { "epoch": 0.01742786164723868, "grad_norm": 1.3671875, "learning_rate": 7.684422615267138e-05, "loss": 1.6889, "step": 114 }, { "epoch": 0.01758073762660042, "grad_norm": 1.3125, "learning_rate": 7.68398833058722e-05, "loss": 1.5154, "step": 115 }, { "epoch": 0.017733613605962165, "grad_norm": 1.390625, "learning_rate": 7.683554050522905e-05, "loss": 1.8143, "step": 116 }, { "epoch": 0.017886489585323906, "grad_norm": 1.203125, "learning_rate": 7.683119775075057e-05, "loss": 1.6657, "step": 117 }, { "epoch": 0.018039365564685647, "grad_norm": 1.3125, "learning_rate": 7.682685504244547e-05, "loss": 1.5498, "step": 118 }, { "epoch": 0.018192241544047392, "grad_norm": 1.3359375, "learning_rate": 7.682251238032232e-05, "loss": 1.6381, "step": 119 }, { "epoch": 0.018345117523409133, "grad_norm": 1.5234375, "learning_rate": 7.68181697643898e-05, "loss": 1.9357, "step": 120 }, { "epoch": 0.018497993502770878, "grad_norm": 1.515625, "learning_rate": 7.681382719465657e-05, "loss": 1.674, "step": 121 }, { "epoch": 0.01865086948213262, "grad_norm": 1.4375, "learning_rate": 7.680948467113129e-05, "loss": 1.7567, "step": 122 }, { "epoch": 0.018803745461494364, "grad_norm": 1.640625, "learning_rate": 7.680514219382264e-05, "loss": 1.9016, "step": 123 }, { "epoch": 0.018956621440856105, "grad_norm": 1.421875, "learning_rate": 7.680079976273921e-05, "loss": 1.9875, "step": 124 }, { "epoch": 0.01910949742021785, "grad_norm": 1.390625, "learning_rate": 7.679645737788971e-05, "loss": 1.7281, "step": 125 }, { "epoch": 0.01926237339957959, "grad_norm": 1.3203125, "learning_rate": 7.679211503928278e-05, "loss": 1.5126, "step": 126 }, { "epoch": 0.019415249378941332, "grad_norm": 1.203125, "learning_rate": 7.678777274692706e-05, "loss": 1.4126, "step": 127 }, { "epoch": 0.019568125358303077, "grad_norm": 1.5, "learning_rate": 7.678343050083123e-05, "loss": 1.5887, "step": 128 }, { "epoch": 0.019721001337664818, "grad_norm": 1.515625, "learning_rate": 7.677908830100388e-05, "loss": 1.71, "step": 129 }, { "epoch": 0.019873877317026563, "grad_norm": 1.296875, "learning_rate": 7.677474614745375e-05, "loss": 1.6243, "step": 130 }, { "epoch": 0.020026753296388304, "grad_norm": 1.25, "learning_rate": 7.677040404018947e-05, "loss": 1.4603, "step": 131 }, { "epoch": 0.02017962927575005, "grad_norm": 1.4140625, "learning_rate": 7.676606197921961e-05, "loss": 1.6627, "step": 132 }, { "epoch": 0.02033250525511179, "grad_norm": 1.3828125, "learning_rate": 7.676171996455296e-05, "loss": 1.6433, "step": 133 }, { "epoch": 0.020485381234473535, "grad_norm": 1.4921875, "learning_rate": 7.675737799619806e-05, "loss": 1.5223, "step": 134 }, { "epoch": 0.020638257213835276, "grad_norm": 1.4453125, "learning_rate": 7.675303607416362e-05, "loss": 1.4486, "step": 135 }, { "epoch": 0.020791133193197017, "grad_norm": 1.390625, "learning_rate": 7.674869419845824e-05, "loss": 1.9975, "step": 136 }, { "epoch": 0.020944009172558762, "grad_norm": 1.4765625, "learning_rate": 7.674435236909066e-05, "loss": 2.1466, "step": 137 }, { "epoch": 0.021096885151920503, "grad_norm": 1.3671875, "learning_rate": 7.674001058606951e-05, "loss": 1.5801, "step": 138 }, { "epoch": 0.021249761131282248, "grad_norm": 1.359375, "learning_rate": 7.673566884940337e-05, "loss": 1.666, "step": 139 }, { "epoch": 0.02140263711064399, "grad_norm": 1.40625, "learning_rate": 7.673132715910095e-05, "loss": 1.9784, "step": 140 }, { "epoch": 0.021555513090005734, "grad_norm": 1.375, "learning_rate": 7.672698551517088e-05, "loss": 1.6468, "step": 141 }, { "epoch": 0.021708389069367475, "grad_norm": 1.34375, "learning_rate": 7.672264391762186e-05, "loss": 1.6938, "step": 142 }, { "epoch": 0.02186126504872922, "grad_norm": 1.453125, "learning_rate": 7.671830236646244e-05, "loss": 1.5912, "step": 143 }, { "epoch": 0.02201414102809096, "grad_norm": 1.28125, "learning_rate": 7.671396086170143e-05, "loss": 1.5612, "step": 144 }, { "epoch": 0.022167017007452702, "grad_norm": 1.4296875, "learning_rate": 7.670961940334735e-05, "loss": 1.838, "step": 145 }, { "epoch": 0.022319892986814447, "grad_norm": 1.2421875, "learning_rate": 7.670527799140888e-05, "loss": 1.6883, "step": 146 }, { "epoch": 0.02247276896617619, "grad_norm": 1.3984375, "learning_rate": 7.670093662589474e-05, "loss": 1.803, "step": 147 }, { "epoch": 0.022625644945537933, "grad_norm": 1.4609375, "learning_rate": 7.669659530681345e-05, "loss": 1.8183, "step": 148 }, { "epoch": 0.022778520924899674, "grad_norm": 1.40625, "learning_rate": 7.66922540341738e-05, "loss": 1.6909, "step": 149 }, { "epoch": 0.02293139690426142, "grad_norm": 1.4296875, "learning_rate": 7.668791280798436e-05, "loss": 1.7191, "step": 150 }, { "epoch": 0.02308427288362316, "grad_norm": 1.3359375, "learning_rate": 7.668357162825382e-05, "loss": 1.8378, "step": 151 }, { "epoch": 0.023237148862984905, "grad_norm": 1.328125, "learning_rate": 7.667923049499081e-05, "loss": 1.5402, "step": 152 }, { "epoch": 0.023390024842346646, "grad_norm": 1.34375, "learning_rate": 7.667488940820398e-05, "loss": 1.789, "step": 153 }, { "epoch": 0.023542900821708387, "grad_norm": 1.40625, "learning_rate": 7.667054836790199e-05, "loss": 1.6694, "step": 154 }, { "epoch": 0.023695776801070132, "grad_norm": 1.28125, "learning_rate": 7.666620737409347e-05, "loss": 1.533, "step": 155 }, { "epoch": 0.023848652780431873, "grad_norm": 1.53125, "learning_rate": 7.666186642678712e-05, "loss": 1.8829, "step": 156 }, { "epoch": 0.024001528759793618, "grad_norm": 1.5546875, "learning_rate": 7.665752552599157e-05, "loss": 1.9101, "step": 157 }, { "epoch": 0.02415440473915536, "grad_norm": 1.2734375, "learning_rate": 7.665318467171543e-05, "loss": 1.6173, "step": 158 }, { "epoch": 0.024307280718517104, "grad_norm": 1.265625, "learning_rate": 7.664884386396741e-05, "loss": 1.6837, "step": 159 }, { "epoch": 0.024460156697878845, "grad_norm": 1.4453125, "learning_rate": 7.66445031027561e-05, "loss": 1.739, "step": 160 }, { "epoch": 0.02461303267724059, "grad_norm": 1.40625, "learning_rate": 7.664016238809022e-05, "loss": 1.8792, "step": 161 }, { "epoch": 0.02476590865660233, "grad_norm": 1.359375, "learning_rate": 7.663582171997836e-05, "loss": 1.5685, "step": 162 }, { "epoch": 0.024918784635964072, "grad_norm": 1.4453125, "learning_rate": 7.663148109842923e-05, "loss": 1.9937, "step": 163 }, { "epoch": 0.025071660615325817, "grad_norm": 1.546875, "learning_rate": 7.662714052345145e-05, "loss": 1.8666, "step": 164 }, { "epoch": 0.02522453659468756, "grad_norm": 1.4375, "learning_rate": 7.662279999505363e-05, "loss": 1.6336, "step": 165 }, { "epoch": 0.025377412574049303, "grad_norm": 1.40625, "learning_rate": 7.661845951324448e-05, "loss": 1.8163, "step": 166 }, { "epoch": 0.025530288553411044, "grad_norm": 1.421875, "learning_rate": 7.661411907803261e-05, "loss": 1.6666, "step": 167 }, { "epoch": 0.02568316453277279, "grad_norm": 1.4921875, "learning_rate": 7.660977868942673e-05, "loss": 1.4695, "step": 168 }, { "epoch": 0.02583604051213453, "grad_norm": 1.5, "learning_rate": 7.660543834743539e-05, "loss": 2.052, "step": 169 }, { "epoch": 0.025988916491496275, "grad_norm": 1.234375, "learning_rate": 7.660109805206734e-05, "loss": 1.5322, "step": 170 }, { "epoch": 0.026141792470858016, "grad_norm": 1.3515625, "learning_rate": 7.65967578033312e-05, "loss": 1.5751, "step": 171 }, { "epoch": 0.026294668450219758, "grad_norm": 1.421875, "learning_rate": 7.659241760123556e-05, "loss": 1.7505, "step": 172 }, { "epoch": 0.026447544429581502, "grad_norm": 1.3515625, "learning_rate": 7.658807744578917e-05, "loss": 1.7259, "step": 173 }, { "epoch": 0.026600420408943243, "grad_norm": 1.5546875, "learning_rate": 7.658373733700057e-05, "loss": 1.7639, "step": 174 }, { "epoch": 0.026753296388304988, "grad_norm": 1.2421875, "learning_rate": 7.657939727487851e-05, "loss": 1.5219, "step": 175 }, { "epoch": 0.02690617236766673, "grad_norm": 1.515625, "learning_rate": 7.657505725943156e-05, "loss": 1.775, "step": 176 }, { "epoch": 0.027059048347028474, "grad_norm": 1.3515625, "learning_rate": 7.657071729066842e-05, "loss": 1.5823, "step": 177 }, { "epoch": 0.027211924326390215, "grad_norm": 1.3203125, "learning_rate": 7.656637736859777e-05, "loss": 1.5862, "step": 178 }, { "epoch": 0.02736480030575196, "grad_norm": 1.4609375, "learning_rate": 7.656203749322814e-05, "loss": 1.9193, "step": 179 }, { "epoch": 0.0275176762851137, "grad_norm": 1.390625, "learning_rate": 7.655769766456831e-05, "loss": 1.5792, "step": 180 }, { "epoch": 0.027670552264475443, "grad_norm": 1.421875, "learning_rate": 7.655335788262682e-05, "loss": 1.7371, "step": 181 }, { "epoch": 0.027823428243837187, "grad_norm": 1.421875, "learning_rate": 7.654901814741242e-05, "loss": 1.6666, "step": 182 }, { "epoch": 0.02797630422319893, "grad_norm": 1.34375, "learning_rate": 7.654467845893365e-05, "loss": 1.7729, "step": 183 }, { "epoch": 0.028129180202560673, "grad_norm": 1.4609375, "learning_rate": 7.654033881719928e-05, "loss": 1.7278, "step": 184 }, { "epoch": 0.028282056181922414, "grad_norm": 1.4140625, "learning_rate": 7.653599922221786e-05, "loss": 1.6902, "step": 185 }, { "epoch": 0.02843493216128416, "grad_norm": 1.390625, "learning_rate": 7.653165967399807e-05, "loss": 1.6353, "step": 186 }, { "epoch": 0.0285878081406459, "grad_norm": 1.3359375, "learning_rate": 7.65273201725486e-05, "loss": 1.6643, "step": 187 }, { "epoch": 0.028740684120007645, "grad_norm": 1.4296875, "learning_rate": 7.6522980717878e-05, "loss": 1.8957, "step": 188 }, { "epoch": 0.028893560099369386, "grad_norm": 1.40625, "learning_rate": 7.651864130999505e-05, "loss": 1.8488, "step": 189 }, { "epoch": 0.029046436078731128, "grad_norm": 1.515625, "learning_rate": 7.651430194890828e-05, "loss": 1.9257, "step": 190 }, { "epoch": 0.029199312058092872, "grad_norm": 1.4921875, "learning_rate": 7.650996263462637e-05, "loss": 1.8177, "step": 191 }, { "epoch": 0.029352188037454614, "grad_norm": 1.296875, "learning_rate": 7.650562336715804e-05, "loss": 1.3916, "step": 192 }, { "epoch": 0.02950506401681636, "grad_norm": 1.484375, "learning_rate": 7.650128414651183e-05, "loss": 1.7392, "step": 193 }, { "epoch": 0.0296579399961781, "grad_norm": 1.328125, "learning_rate": 7.649694497269645e-05, "loss": 1.747, "step": 194 }, { "epoch": 0.029810815975539844, "grad_norm": 1.3671875, "learning_rate": 7.649260584572054e-05, "loss": 1.8003, "step": 195 }, { "epoch": 0.029963691954901586, "grad_norm": 1.3125, "learning_rate": 7.648826676559275e-05, "loss": 1.4417, "step": 196 }, { "epoch": 0.03011656793426333, "grad_norm": 1.3671875, "learning_rate": 7.648392773232173e-05, "loss": 1.5865, "step": 197 }, { "epoch": 0.03026944391362507, "grad_norm": 1.4140625, "learning_rate": 7.64795887459161e-05, "loss": 2.0582, "step": 198 }, { "epoch": 0.030422319892986813, "grad_norm": 1.3828125, "learning_rate": 7.647524980638452e-05, "loss": 1.8155, "step": 199 }, { "epoch": 0.030575195872348557, "grad_norm": 1.40625, "learning_rate": 7.647091091373564e-05, "loss": 1.6892, "step": 200 }, { "epoch": 0.0307280718517103, "grad_norm": 1.4375, "learning_rate": 7.646657206797812e-05, "loss": 1.7709, "step": 201 }, { "epoch": 0.030880947831072043, "grad_norm": 1.3671875, "learning_rate": 7.646223326912059e-05, "loss": 1.7936, "step": 202 }, { "epoch": 0.031033823810433785, "grad_norm": 1.3515625, "learning_rate": 7.645789451717174e-05, "loss": 1.4854, "step": 203 }, { "epoch": 0.03118669978979553, "grad_norm": 1.3515625, "learning_rate": 7.645355581214015e-05, "loss": 1.5896, "step": 204 }, { "epoch": 0.031339575769157274, "grad_norm": 1.5078125, "learning_rate": 7.644921715403447e-05, "loss": 1.774, "step": 205 }, { "epoch": 0.031492451748519015, "grad_norm": 1.3671875, "learning_rate": 7.644487854286339e-05, "loss": 1.6169, "step": 206 }, { "epoch": 0.03164532772788076, "grad_norm": 1.3515625, "learning_rate": 7.644053997863554e-05, "loss": 1.5632, "step": 207 }, { "epoch": 0.0317982037072425, "grad_norm": 1.34375, "learning_rate": 7.64362014613596e-05, "loss": 1.8794, "step": 208 }, { "epoch": 0.03195107968660424, "grad_norm": 1.40625, "learning_rate": 7.643186299104415e-05, "loss": 1.7138, "step": 209 }, { "epoch": 0.03210395566596599, "grad_norm": 1.34375, "learning_rate": 7.642752456769786e-05, "loss": 1.7406, "step": 210 }, { "epoch": 0.03225683164532773, "grad_norm": 1.4921875, "learning_rate": 7.642318619132941e-05, "loss": 1.766, "step": 211 }, { "epoch": 0.03240970762468947, "grad_norm": 1.265625, "learning_rate": 7.641884786194738e-05, "loss": 1.856, "step": 212 }, { "epoch": 0.03256258360405121, "grad_norm": 1.34375, "learning_rate": 7.641450957956052e-05, "loss": 1.9467, "step": 213 }, { "epoch": 0.03271545958341296, "grad_norm": 1.3828125, "learning_rate": 7.641017134417737e-05, "loss": 1.928, "step": 214 }, { "epoch": 0.0328683355627747, "grad_norm": 1.4453125, "learning_rate": 7.64058331558066e-05, "loss": 1.8897, "step": 215 }, { "epoch": 0.03302121154213644, "grad_norm": 1.2890625, "learning_rate": 7.640149501445689e-05, "loss": 1.4264, "step": 216 }, { "epoch": 0.03317408752149818, "grad_norm": 1.65625, "learning_rate": 7.639715692013689e-05, "loss": 1.492, "step": 217 }, { "epoch": 0.033326963500859924, "grad_norm": 1.3125, "learning_rate": 7.639281887285524e-05, "loss": 1.5941, "step": 218 }, { "epoch": 0.03347983948022167, "grad_norm": 1.5546875, "learning_rate": 7.63884808726205e-05, "loss": 1.9161, "step": 219 }, { "epoch": 0.033632715459583414, "grad_norm": 1.3046875, "learning_rate": 7.638414291944144e-05, "loss": 1.6613, "step": 220 }, { "epoch": 0.033785591438945155, "grad_norm": 1.453125, "learning_rate": 7.637980501332661e-05, "loss": 1.8987, "step": 221 }, { "epoch": 0.033938467418306896, "grad_norm": 1.3828125, "learning_rate": 7.637546715428472e-05, "loss": 1.5288, "step": 222 }, { "epoch": 0.034091343397668644, "grad_norm": 1.4609375, "learning_rate": 7.637112934232443e-05, "loss": 1.4259, "step": 223 }, { "epoch": 0.034244219377030385, "grad_norm": 1.3046875, "learning_rate": 7.636679157745428e-05, "loss": 1.7919, "step": 224 }, { "epoch": 0.03439709535639213, "grad_norm": 1.234375, "learning_rate": 7.6362453859683e-05, "loss": 1.3501, "step": 225 }, { "epoch": 0.03454997133575387, "grad_norm": 1.59375, "learning_rate": 7.63581161890192e-05, "loss": 1.8335, "step": 226 }, { "epoch": 0.03470284731511561, "grad_norm": 1.265625, "learning_rate": 7.635377856547159e-05, "loss": 1.595, "step": 227 }, { "epoch": 0.03485572329447736, "grad_norm": 1.3828125, "learning_rate": 7.634944098904869e-05, "loss": 1.6013, "step": 228 }, { "epoch": 0.0350085992738391, "grad_norm": 1.421875, "learning_rate": 7.634510345975928e-05, "loss": 1.4886, "step": 229 }, { "epoch": 0.03516147525320084, "grad_norm": 1.2890625, "learning_rate": 7.634076597761192e-05, "loss": 1.5723, "step": 230 }, { "epoch": 0.03531435123256258, "grad_norm": 1.25, "learning_rate": 7.633642854261526e-05, "loss": 1.7023, "step": 231 }, { "epoch": 0.03546722721192433, "grad_norm": 1.4609375, "learning_rate": 7.633209115477801e-05, "loss": 1.7373, "step": 232 }, { "epoch": 0.03562010319128607, "grad_norm": 1.4765625, "learning_rate": 7.632775381410868e-05, "loss": 2.0208, "step": 233 }, { "epoch": 0.03577297917064781, "grad_norm": 1.40625, "learning_rate": 7.632341652061607e-05, "loss": 1.6179, "step": 234 }, { "epoch": 0.03592585515000955, "grad_norm": 1.421875, "learning_rate": 7.631907927430873e-05, "loss": 1.6706, "step": 235 }, { "epoch": 0.036078731129371294, "grad_norm": 1.4296875, "learning_rate": 7.631474207519532e-05, "loss": 1.5106, "step": 236 }, { "epoch": 0.03623160710873304, "grad_norm": 1.4765625, "learning_rate": 7.631040492328452e-05, "loss": 1.6725, "step": 237 }, { "epoch": 0.036384483088094784, "grad_norm": 1.359375, "learning_rate": 7.630606781858492e-05, "loss": 1.3183, "step": 238 }, { "epoch": 0.036537359067456525, "grad_norm": 1.4296875, "learning_rate": 7.630173076110518e-05, "loss": 1.9727, "step": 239 }, { "epoch": 0.036690235046818266, "grad_norm": 1.28125, "learning_rate": 7.629739375085393e-05, "loss": 1.6341, "step": 240 }, { "epoch": 0.036843111026180014, "grad_norm": 1.375, "learning_rate": 7.629305678783986e-05, "loss": 1.5485, "step": 241 }, { "epoch": 0.036995987005541756, "grad_norm": 1.4609375, "learning_rate": 7.628871987207158e-05, "loss": 1.8564, "step": 242 }, { "epoch": 0.0371488629849035, "grad_norm": 1.265625, "learning_rate": 7.628438300355777e-05, "loss": 1.7702, "step": 243 }, { "epoch": 0.03730173896426524, "grad_norm": 1.4921875, "learning_rate": 7.628004618230702e-05, "loss": 1.7599, "step": 244 }, { "epoch": 0.03745461494362698, "grad_norm": 1.2578125, "learning_rate": 7.627570940832798e-05, "loss": 1.5367, "step": 245 }, { "epoch": 0.03760749092298873, "grad_norm": 1.203125, "learning_rate": 7.627137268162933e-05, "loss": 1.5021, "step": 246 }, { "epoch": 0.03776036690235047, "grad_norm": 1.2578125, "learning_rate": 7.626703600221966e-05, "loss": 1.4095, "step": 247 }, { "epoch": 0.03791324288171221, "grad_norm": 1.5078125, "learning_rate": 7.62626993701077e-05, "loss": 1.5902, "step": 248 }, { "epoch": 0.03806611886107395, "grad_norm": 1.3125, "learning_rate": 7.625836278530198e-05, "loss": 1.6867, "step": 249 }, { "epoch": 0.0382189948404357, "grad_norm": 1.375, "learning_rate": 7.625402624781122e-05, "loss": 1.8188, "step": 250 }, { "epoch": 0.03837187081979744, "grad_norm": 1.4375, "learning_rate": 7.624968975764404e-05, "loss": 1.7959, "step": 251 }, { "epoch": 0.03852474679915918, "grad_norm": 1.328125, "learning_rate": 7.624535331480906e-05, "loss": 1.5351, "step": 252 }, { "epoch": 0.03867762277852092, "grad_norm": 1.53125, "learning_rate": 7.6241016919315e-05, "loss": 1.8152, "step": 253 }, { "epoch": 0.038830498757882664, "grad_norm": 1.4609375, "learning_rate": 7.62366805711704e-05, "loss": 1.7508, "step": 254 }, { "epoch": 0.03898337473724441, "grad_norm": 1.3515625, "learning_rate": 7.623234427038397e-05, "loss": 1.8802, "step": 255 }, { "epoch": 0.039136250716606154, "grad_norm": 1.4140625, "learning_rate": 7.622800801696435e-05, "loss": 1.476, "step": 256 }, { "epoch": 0.039289126695967895, "grad_norm": 1.4375, "learning_rate": 7.62236718109201e-05, "loss": 1.8304, "step": 257 }, { "epoch": 0.039442002675329636, "grad_norm": 1.5625, "learning_rate": 7.621933565226e-05, "loss": 1.6279, "step": 258 }, { "epoch": 0.039594878654691384, "grad_norm": 1.4609375, "learning_rate": 7.621499954099256e-05, "loss": 1.6979, "step": 259 }, { "epoch": 0.039747754634053126, "grad_norm": 1.46875, "learning_rate": 7.62106634771265e-05, "loss": 1.6914, "step": 260 }, { "epoch": 0.03990063061341487, "grad_norm": 1.3984375, "learning_rate": 7.620632746067042e-05, "loss": 1.8665, "step": 261 }, { "epoch": 0.04005350659277661, "grad_norm": 1.1875, "learning_rate": 7.620199149163299e-05, "loss": 1.4178, "step": 262 }, { "epoch": 0.04020638257213835, "grad_norm": 1.515625, "learning_rate": 7.619765557002287e-05, "loss": 2.2844, "step": 263 }, { "epoch": 0.0403592585515001, "grad_norm": 1.265625, "learning_rate": 7.619331969584865e-05, "loss": 1.5223, "step": 264 }, { "epoch": 0.04051213453086184, "grad_norm": 1.59375, "learning_rate": 7.618898386911898e-05, "loss": 1.7807, "step": 265 }, { "epoch": 0.04066501051022358, "grad_norm": 1.2890625, "learning_rate": 7.618464808984251e-05, "loss": 1.582, "step": 266 }, { "epoch": 0.04081788648958532, "grad_norm": 1.390625, "learning_rate": 7.618031235802793e-05, "loss": 1.9585, "step": 267 }, { "epoch": 0.04097076246894707, "grad_norm": 1.3359375, "learning_rate": 7.617597667368376e-05, "loss": 1.7677, "step": 268 }, { "epoch": 0.04112363844830881, "grad_norm": 1.25, "learning_rate": 7.61716410368188e-05, "loss": 1.2806, "step": 269 }, { "epoch": 0.04127651442767055, "grad_norm": 1.3828125, "learning_rate": 7.616730544744157e-05, "loss": 1.5726, "step": 270 }, { "epoch": 0.04142939040703229, "grad_norm": 1.4609375, "learning_rate": 7.616296990556073e-05, "loss": 1.7461, "step": 271 }, { "epoch": 0.041582266386394034, "grad_norm": 1.4140625, "learning_rate": 7.615863441118499e-05, "loss": 1.7075, "step": 272 }, { "epoch": 0.04173514236575578, "grad_norm": 1.3828125, "learning_rate": 7.615429896432286e-05, "loss": 1.4386, "step": 273 }, { "epoch": 0.041888018345117524, "grad_norm": 1.4140625, "learning_rate": 7.614996356498313e-05, "loss": 1.7119, "step": 274 }, { "epoch": 0.042040894324479265, "grad_norm": 1.3125, "learning_rate": 7.614562821317432e-05, "loss": 1.7007, "step": 275 }, { "epoch": 0.042193770303841006, "grad_norm": 1.40625, "learning_rate": 7.614129290890515e-05, "loss": 1.8719, "step": 276 }, { "epoch": 0.042346646283202755, "grad_norm": 1.40625, "learning_rate": 7.613695765218424e-05, "loss": 1.5604, "step": 277 }, { "epoch": 0.042499522262564496, "grad_norm": 1.4375, "learning_rate": 7.613262244302018e-05, "loss": 1.7615, "step": 278 }, { "epoch": 0.04265239824192624, "grad_norm": 1.3515625, "learning_rate": 7.612828728142166e-05, "loss": 1.6464, "step": 279 }, { "epoch": 0.04280527422128798, "grad_norm": 1.4453125, "learning_rate": 7.61239521673973e-05, "loss": 1.7305, "step": 280 }, { "epoch": 0.04295815020064972, "grad_norm": 1.4140625, "learning_rate": 7.611961710095575e-05, "loss": 1.7684, "step": 281 }, { "epoch": 0.04311102618001147, "grad_norm": 1.296875, "learning_rate": 7.611528208210569e-05, "loss": 1.5681, "step": 282 }, { "epoch": 0.04326390215937321, "grad_norm": 1.40625, "learning_rate": 7.611094711085566e-05, "loss": 1.6698, "step": 283 }, { "epoch": 0.04341677813873495, "grad_norm": 1.421875, "learning_rate": 7.610661218721438e-05, "loss": 1.6734, "step": 284 }, { "epoch": 0.04356965411809669, "grad_norm": 1.484375, "learning_rate": 7.610227731119043e-05, "loss": 1.8016, "step": 285 }, { "epoch": 0.04372253009745844, "grad_norm": 1.1875, "learning_rate": 7.609794248279252e-05, "loss": 1.6299, "step": 286 }, { "epoch": 0.04387540607682018, "grad_norm": 1.4296875, "learning_rate": 7.60936077020292e-05, "loss": 1.7338, "step": 287 }, { "epoch": 0.04402828205618192, "grad_norm": 1.515625, "learning_rate": 7.608927296890925e-05, "loss": 1.7572, "step": 288 }, { "epoch": 0.04418115803554366, "grad_norm": 1.3984375, "learning_rate": 7.608493828344116e-05, "loss": 1.583, "step": 289 }, { "epoch": 0.044334034014905405, "grad_norm": 1.265625, "learning_rate": 7.608060364563362e-05, "loss": 1.3819, "step": 290 }, { "epoch": 0.04448690999426715, "grad_norm": 1.40625, "learning_rate": 7.60762690554953e-05, "loss": 1.6836, "step": 291 }, { "epoch": 0.044639785973628894, "grad_norm": 1.3046875, "learning_rate": 7.60719345130348e-05, "loss": 1.7593, "step": 292 }, { "epoch": 0.044792661952990635, "grad_norm": 1.2578125, "learning_rate": 7.60676000182608e-05, "loss": 1.5006, "step": 293 }, { "epoch": 0.04494553793235238, "grad_norm": 2.21875, "learning_rate": 7.606326557118187e-05, "loss": 1.6288, "step": 294 }, { "epoch": 0.045098413911714125, "grad_norm": 1.3046875, "learning_rate": 7.605893117180672e-05, "loss": 1.6673, "step": 295 }, { "epoch": 0.045251289891075866, "grad_norm": 1.34375, "learning_rate": 7.605459682014395e-05, "loss": 1.5027, "step": 296 }, { "epoch": 0.04540416587043761, "grad_norm": 1.4921875, "learning_rate": 7.60502625162022e-05, "loss": 2.0042, "step": 297 }, { "epoch": 0.04555704184979935, "grad_norm": 1.296875, "learning_rate": 7.604592825999014e-05, "loss": 1.5219, "step": 298 }, { "epoch": 0.04570991782916109, "grad_norm": 1.3671875, "learning_rate": 7.604159405151634e-05, "loss": 1.7094, "step": 299 }, { "epoch": 0.04586279380852284, "grad_norm": 1.3203125, "learning_rate": 7.60372598907895e-05, "loss": 1.6382, "step": 300 }, { "epoch": 0.04601566978788458, "grad_norm": 1.3359375, "learning_rate": 7.603292577781822e-05, "loss": 1.5856, "step": 301 }, { "epoch": 0.04616854576724632, "grad_norm": 1.3046875, "learning_rate": 7.602859171261117e-05, "loss": 1.4879, "step": 302 }, { "epoch": 0.04632142174660806, "grad_norm": 1.3359375, "learning_rate": 7.602425769517701e-05, "loss": 1.4916, "step": 303 }, { "epoch": 0.04647429772596981, "grad_norm": 1.3984375, "learning_rate": 7.601992372552427e-05, "loss": 1.8084, "step": 304 }, { "epoch": 0.04662717370533155, "grad_norm": 1.359375, "learning_rate": 7.601558980366169e-05, "loss": 1.6258, "step": 305 }, { "epoch": 0.04678004968469329, "grad_norm": 1.4296875, "learning_rate": 7.601125592959786e-05, "loss": 1.4319, "step": 306 }, { "epoch": 0.046932925664055034, "grad_norm": 1.4140625, "learning_rate": 7.600692210334145e-05, "loss": 1.9355, "step": 307 }, { "epoch": 0.047085801643416775, "grad_norm": 1.3671875, "learning_rate": 7.600258832490104e-05, "loss": 1.8701, "step": 308 }, { "epoch": 0.04723867762277852, "grad_norm": 1.4765625, "learning_rate": 7.599825459428535e-05, "loss": 1.7254, "step": 309 }, { "epoch": 0.047391553602140264, "grad_norm": 1.3671875, "learning_rate": 7.599392091150294e-05, "loss": 1.7473, "step": 310 }, { "epoch": 0.047544429581502005, "grad_norm": 1.390625, "learning_rate": 7.598958727656248e-05, "loss": 1.8313, "step": 311 }, { "epoch": 0.04769730556086375, "grad_norm": 1.390625, "learning_rate": 7.598525368947265e-05, "loss": 1.694, "step": 312 }, { "epoch": 0.047850181540225495, "grad_norm": 1.2578125, "learning_rate": 7.598092015024196e-05, "loss": 1.8997, "step": 313 }, { "epoch": 0.048003057519587236, "grad_norm": 1.25, "learning_rate": 7.59765866588792e-05, "loss": 1.5215, "step": 314 }, { "epoch": 0.04815593349894898, "grad_norm": 1.375, "learning_rate": 7.597225321539289e-05, "loss": 1.9556, "step": 315 }, { "epoch": 0.04830880947831072, "grad_norm": 1.3046875, "learning_rate": 7.59679198197917e-05, "loss": 1.7225, "step": 316 }, { "epoch": 0.04846168545767246, "grad_norm": 1.3125, "learning_rate": 7.596358647208432e-05, "loss": 1.8058, "step": 317 }, { "epoch": 0.04861456143703421, "grad_norm": 1.421875, "learning_rate": 7.595925317227929e-05, "loss": 1.6199, "step": 318 }, { "epoch": 0.04876743741639595, "grad_norm": 1.359375, "learning_rate": 7.595491992038534e-05, "loss": 1.4974, "step": 319 }, { "epoch": 0.04892031339575769, "grad_norm": 1.4453125, "learning_rate": 7.595058671641103e-05, "loss": 1.9369, "step": 320 }, { "epoch": 0.04907318937511943, "grad_norm": 1.5625, "learning_rate": 7.594625356036504e-05, "loss": 1.7068, "step": 321 }, { "epoch": 0.04922606535448118, "grad_norm": 1.3828125, "learning_rate": 7.594192045225602e-05, "loss": 1.7689, "step": 322 }, { "epoch": 0.04937894133384292, "grad_norm": 1.484375, "learning_rate": 7.593758739209254e-05, "loss": 1.8858, "step": 323 }, { "epoch": 0.04953181731320466, "grad_norm": 1.3828125, "learning_rate": 7.59332543798833e-05, "loss": 1.6995, "step": 324 }, { "epoch": 0.049684693292566404, "grad_norm": 1.3984375, "learning_rate": 7.592892141563687e-05, "loss": 1.6645, "step": 325 }, { "epoch": 0.049837569271928145, "grad_norm": 1.328125, "learning_rate": 7.592458849936197e-05, "loss": 1.5667, "step": 326 }, { "epoch": 0.04999044525128989, "grad_norm": 1.2890625, "learning_rate": 7.592025563106715e-05, "loss": 1.6425, "step": 327 }, { "epoch": 0.050143321230651634, "grad_norm": 1.4296875, "learning_rate": 7.591592281076114e-05, "loss": 1.4712, "step": 328 }, { "epoch": 0.050296197210013376, "grad_norm": 1.328125, "learning_rate": 7.591159003845249e-05, "loss": 1.6742, "step": 329 }, { "epoch": 0.05044907318937512, "grad_norm": 1.28125, "learning_rate": 7.590725731414986e-05, "loss": 1.5318, "step": 330 }, { "epoch": 0.050601949168736865, "grad_norm": 1.3515625, "learning_rate": 7.59029246378619e-05, "loss": 1.7086, "step": 331 }, { "epoch": 0.050754825148098606, "grad_norm": 1.46875, "learning_rate": 7.589859200959721e-05, "loss": 1.6468, "step": 332 }, { "epoch": 0.05090770112746035, "grad_norm": 1.3046875, "learning_rate": 7.58942594293645e-05, "loss": 1.8487, "step": 333 }, { "epoch": 0.05106057710682209, "grad_norm": 1.390625, "learning_rate": 7.588992689717231e-05, "loss": 1.5745, "step": 334 }, { "epoch": 0.05121345308618383, "grad_norm": 1.484375, "learning_rate": 7.588559441302933e-05, "loss": 1.5427, "step": 335 }, { "epoch": 0.05136632906554558, "grad_norm": 1.421875, "learning_rate": 7.58812619769442e-05, "loss": 1.7206, "step": 336 }, { "epoch": 0.05151920504490732, "grad_norm": 1.53125, "learning_rate": 7.587692958892549e-05, "loss": 1.7791, "step": 337 }, { "epoch": 0.05167208102426906, "grad_norm": 1.3671875, "learning_rate": 7.587259724898194e-05, "loss": 1.812, "step": 338 }, { "epoch": 0.0518249570036308, "grad_norm": 1.4921875, "learning_rate": 7.586826495712208e-05, "loss": 1.8337, "step": 339 }, { "epoch": 0.05197783298299255, "grad_norm": 1.265625, "learning_rate": 7.586393271335462e-05, "loss": 1.552, "step": 340 }, { "epoch": 0.05213070896235429, "grad_norm": 1.1328125, "learning_rate": 7.585960051768813e-05, "loss": 1.3929, "step": 341 }, { "epoch": 0.05228358494171603, "grad_norm": 1.546875, "learning_rate": 7.585526837013128e-05, "loss": 1.7328, "step": 342 }, { "epoch": 0.052436460921077774, "grad_norm": 1.359375, "learning_rate": 7.585093627069274e-05, "loss": 1.6829, "step": 343 }, { "epoch": 0.052589336900439515, "grad_norm": 1.2421875, "learning_rate": 7.584660421938106e-05, "loss": 1.2771, "step": 344 }, { "epoch": 0.05274221287980126, "grad_norm": 1.3984375, "learning_rate": 7.584227221620492e-05, "loss": 1.6753, "step": 345 }, { "epoch": 0.052895088859163004, "grad_norm": 1.34375, "learning_rate": 7.583794026117294e-05, "loss": 1.5456, "step": 346 }, { "epoch": 0.053047964838524746, "grad_norm": 1.3671875, "learning_rate": 7.58336083542938e-05, "loss": 1.3722, "step": 347 }, { "epoch": 0.05320084081788649, "grad_norm": 1.3515625, "learning_rate": 7.582927649557609e-05, "loss": 1.599, "step": 348 }, { "epoch": 0.053353716797248235, "grad_norm": 1.4921875, "learning_rate": 7.582494468502842e-05, "loss": 1.7832, "step": 349 }, { "epoch": 0.053506592776609976, "grad_norm": 1.5, "learning_rate": 7.582061292265947e-05, "loss": 1.7412, "step": 350 }, { "epoch": 0.05365946875597172, "grad_norm": 1.3046875, "learning_rate": 7.581628120847783e-05, "loss": 1.611, "step": 351 }, { "epoch": 0.05381234473533346, "grad_norm": 1.2890625, "learning_rate": 7.58119495424922e-05, "loss": 1.5453, "step": 352 }, { "epoch": 0.0539652207146952, "grad_norm": 1.3515625, "learning_rate": 7.580761792471112e-05, "loss": 1.6444, "step": 353 }, { "epoch": 0.05411809669405695, "grad_norm": 1.4921875, "learning_rate": 7.580328635514333e-05, "loss": 1.8233, "step": 354 }, { "epoch": 0.05427097267341869, "grad_norm": 1.46875, "learning_rate": 7.579895483379736e-05, "loss": 1.6909, "step": 355 }, { "epoch": 0.05442384865278043, "grad_norm": 1.3515625, "learning_rate": 7.579462336068189e-05, "loss": 1.7915, "step": 356 }, { "epoch": 0.05457672463214217, "grad_norm": 1.2578125, "learning_rate": 7.579029193580559e-05, "loss": 1.4485, "step": 357 }, { "epoch": 0.05472960061150392, "grad_norm": 1.359375, "learning_rate": 7.578596055917698e-05, "loss": 1.2863, "step": 358 }, { "epoch": 0.05488247659086566, "grad_norm": 1.3671875, "learning_rate": 7.578162923080483e-05, "loss": 1.8312, "step": 359 }, { "epoch": 0.0550353525702274, "grad_norm": 1.453125, "learning_rate": 7.577729795069766e-05, "loss": 1.7282, "step": 360 }, { "epoch": 0.055188228549589144, "grad_norm": 1.4453125, "learning_rate": 7.577296671886419e-05, "loss": 1.6906, "step": 361 }, { "epoch": 0.055341104528950885, "grad_norm": 1.4296875, "learning_rate": 7.5768635535313e-05, "loss": 1.5891, "step": 362 }, { "epoch": 0.05549398050831263, "grad_norm": 1.3515625, "learning_rate": 7.576430440005269e-05, "loss": 1.477, "step": 363 }, { "epoch": 0.055646856487674375, "grad_norm": 1.3515625, "learning_rate": 7.575997331309197e-05, "loss": 1.5279, "step": 364 }, { "epoch": 0.055799732467036116, "grad_norm": 1.34375, "learning_rate": 7.575564227443941e-05, "loss": 1.6677, "step": 365 }, { "epoch": 0.05595260844639786, "grad_norm": 1.2734375, "learning_rate": 7.575131128410368e-05, "loss": 1.3879, "step": 366 }, { "epoch": 0.056105484425759605, "grad_norm": 1.4140625, "learning_rate": 7.574698034209338e-05, "loss": 1.6717, "step": 367 }, { "epoch": 0.056258360405121347, "grad_norm": 1.3515625, "learning_rate": 7.574264944841719e-05, "loss": 1.5248, "step": 368 }, { "epoch": 0.05641123638448309, "grad_norm": 1.234375, "learning_rate": 7.573831860308369e-05, "loss": 1.5937, "step": 369 }, { "epoch": 0.05656411236384483, "grad_norm": 1.328125, "learning_rate": 7.573398780610151e-05, "loss": 1.6599, "step": 370 }, { "epoch": 0.05671698834320657, "grad_norm": 1.28125, "learning_rate": 7.572965705747931e-05, "loss": 1.6293, "step": 371 }, { "epoch": 0.05686986432256832, "grad_norm": 1.5234375, "learning_rate": 7.572532635722572e-05, "loss": 1.6768, "step": 372 }, { "epoch": 0.05702274030193006, "grad_norm": 1.5546875, "learning_rate": 7.572099570534939e-05, "loss": 1.6991, "step": 373 }, { "epoch": 0.0571756162812918, "grad_norm": 1.3984375, "learning_rate": 7.57166651018589e-05, "loss": 1.6637, "step": 374 }, { "epoch": 0.05732849226065354, "grad_norm": 1.375, "learning_rate": 7.571233454676287e-05, "loss": 1.4707, "step": 375 }, { "epoch": 0.05748136824001529, "grad_norm": 1.34375, "learning_rate": 7.570800404007e-05, "loss": 1.6907, "step": 376 }, { "epoch": 0.05763424421937703, "grad_norm": 1.453125, "learning_rate": 7.570367358178886e-05, "loss": 1.5609, "step": 377 }, { "epoch": 0.05778712019873877, "grad_norm": 1.3046875, "learning_rate": 7.569934317192815e-05, "loss": 1.6028, "step": 378 }, { "epoch": 0.057939996178100514, "grad_norm": 1.3828125, "learning_rate": 7.56950128104964e-05, "loss": 1.6141, "step": 379 }, { "epoch": 0.058092872157462255, "grad_norm": 1.375, "learning_rate": 7.569068249750232e-05, "loss": 1.6556, "step": 380 }, { "epoch": 0.058245748136824003, "grad_norm": 1.4140625, "learning_rate": 7.568635223295453e-05, "loss": 1.7473, "step": 381 }, { "epoch": 0.058398624116185745, "grad_norm": 1.4765625, "learning_rate": 7.568202201686158e-05, "loss": 1.7954, "step": 382 }, { "epoch": 0.058551500095547486, "grad_norm": 1.5078125, "learning_rate": 7.567769184923224e-05, "loss": 1.8888, "step": 383 }, { "epoch": 0.05870437607490923, "grad_norm": 1.3828125, "learning_rate": 7.567336173007502e-05, "loss": 1.6651, "step": 384 }, { "epoch": 0.058857252054270975, "grad_norm": 1.4375, "learning_rate": 7.566903165939861e-05, "loss": 1.7419, "step": 385 }, { "epoch": 0.05901012803363272, "grad_norm": 1.421875, "learning_rate": 7.56647016372116e-05, "loss": 1.5326, "step": 386 }, { "epoch": 0.05916300401299446, "grad_norm": 1.359375, "learning_rate": 7.566037166352266e-05, "loss": 1.8968, "step": 387 }, { "epoch": 0.0593158799923562, "grad_norm": 1.4140625, "learning_rate": 7.565604173834043e-05, "loss": 1.7008, "step": 388 }, { "epoch": 0.05946875597171794, "grad_norm": 1.5, "learning_rate": 7.565171186167345e-05, "loss": 1.6372, "step": 389 }, { "epoch": 0.05962163195107969, "grad_norm": 1.515625, "learning_rate": 7.564738203353044e-05, "loss": 1.3953, "step": 390 }, { "epoch": 0.05977450793044143, "grad_norm": 1.359375, "learning_rate": 7.564305225391998e-05, "loss": 1.7438, "step": 391 }, { "epoch": 0.05992738390980317, "grad_norm": 1.3515625, "learning_rate": 7.563872252285076e-05, "loss": 1.6682, "step": 392 }, { "epoch": 0.06008025988916491, "grad_norm": 1.65625, "learning_rate": 7.56343928403313e-05, "loss": 1.4495, "step": 393 }, { "epoch": 0.06023313586852666, "grad_norm": 1.234375, "learning_rate": 7.563006320637035e-05, "loss": 1.6719, "step": 394 }, { "epoch": 0.0603860118478884, "grad_norm": 1.3828125, "learning_rate": 7.562573362097646e-05, "loss": 1.5679, "step": 395 }, { "epoch": 0.06053888782725014, "grad_norm": 1.625, "learning_rate": 7.562140408415828e-05, "loss": 1.4409, "step": 396 }, { "epoch": 0.060691763806611884, "grad_norm": 1.5390625, "learning_rate": 7.561707459592447e-05, "loss": 1.969, "step": 397 }, { "epoch": 0.060844639785973625, "grad_norm": 1.3828125, "learning_rate": 7.561274515628355e-05, "loss": 1.6576, "step": 398 }, { "epoch": 0.060997515765335374, "grad_norm": 1.3125, "learning_rate": 7.560841576524431e-05, "loss": 1.7343, "step": 399 }, { "epoch": 0.061150391744697115, "grad_norm": 1.3203125, "learning_rate": 7.560408642281525e-05, "loss": 1.5168, "step": 400 }, { "epoch": 0.061303267724058856, "grad_norm": 1.609375, "learning_rate": 7.559975712900506e-05, "loss": 1.5196, "step": 401 }, { "epoch": 0.0614561437034206, "grad_norm": 1.265625, "learning_rate": 7.559542788382236e-05, "loss": 1.4259, "step": 402 }, { "epoch": 0.061609019682782346, "grad_norm": 1.328125, "learning_rate": 7.559109868727575e-05, "loss": 1.4675, "step": 403 }, { "epoch": 0.06176189566214409, "grad_norm": 1.3125, "learning_rate": 7.558676953937387e-05, "loss": 1.5129, "step": 404 }, { "epoch": 0.06191477164150583, "grad_norm": 1.5390625, "learning_rate": 7.558244044012535e-05, "loss": 1.5844, "step": 405 }, { "epoch": 0.06206764762086757, "grad_norm": 1.390625, "learning_rate": 7.557811138953883e-05, "loss": 1.8403, "step": 406 }, { "epoch": 0.06222052360022931, "grad_norm": 1.4296875, "learning_rate": 7.557378238762295e-05, "loss": 1.9096, "step": 407 }, { "epoch": 0.06237339957959106, "grad_norm": 1.4140625, "learning_rate": 7.556945343438628e-05, "loss": 1.6867, "step": 408 }, { "epoch": 0.06252627555895279, "grad_norm": 1.34375, "learning_rate": 7.55651245298375e-05, "loss": 1.9038, "step": 409 }, { "epoch": 0.06267915153831455, "grad_norm": 1.40625, "learning_rate": 7.55607956739852e-05, "loss": 1.4205, "step": 410 }, { "epoch": 0.06283202751767629, "grad_norm": 1.3515625, "learning_rate": 7.555646686683806e-05, "loss": 1.5537, "step": 411 }, { "epoch": 0.06298490349703803, "grad_norm": 1.65625, "learning_rate": 7.555213810840462e-05, "loss": 1.8605, "step": 412 }, { "epoch": 0.06313777947639977, "grad_norm": 1.328125, "learning_rate": 7.554780939869363e-05, "loss": 1.8208, "step": 413 }, { "epoch": 0.06329065545576151, "grad_norm": 1.484375, "learning_rate": 7.554348073771361e-05, "loss": 1.5755, "step": 414 }, { "epoch": 0.06344353143512325, "grad_norm": 1.3984375, "learning_rate": 7.553915212547323e-05, "loss": 1.8187, "step": 415 }, { "epoch": 0.063596407414485, "grad_norm": 1.40625, "learning_rate": 7.553482356198109e-05, "loss": 1.4131, "step": 416 }, { "epoch": 0.06374928339384674, "grad_norm": 1.46875, "learning_rate": 7.553049504724585e-05, "loss": 1.4356, "step": 417 }, { "epoch": 0.06390215937320848, "grad_norm": 1.3203125, "learning_rate": 7.552616658127615e-05, "loss": 1.6172, "step": 418 }, { "epoch": 0.06405503535257023, "grad_norm": 1.5625, "learning_rate": 7.552183816408055e-05, "loss": 1.6839, "step": 419 }, { "epoch": 0.06420791133193197, "grad_norm": 1.390625, "learning_rate": 7.551750979566775e-05, "loss": 1.659, "step": 420 }, { "epoch": 0.06436078731129372, "grad_norm": 1.265625, "learning_rate": 7.551318147604631e-05, "loss": 1.645, "step": 421 }, { "epoch": 0.06451366329065546, "grad_norm": 1.6015625, "learning_rate": 7.550885320522489e-05, "loss": 1.6857, "step": 422 }, { "epoch": 0.0646665392700172, "grad_norm": 1.53125, "learning_rate": 7.550452498321215e-05, "loss": 1.5365, "step": 423 }, { "epoch": 0.06481941524937894, "grad_norm": 1.421875, "learning_rate": 7.550019681001663e-05, "loss": 1.6458, "step": 424 }, { "epoch": 0.06497229122874068, "grad_norm": 1.484375, "learning_rate": 7.549586868564704e-05, "loss": 1.6842, "step": 425 }, { "epoch": 0.06512516720810242, "grad_norm": 1.4609375, "learning_rate": 7.549154061011195e-05, "loss": 1.456, "step": 426 }, { "epoch": 0.06527804318746416, "grad_norm": 1.3828125, "learning_rate": 7.548721258342001e-05, "loss": 1.4817, "step": 427 }, { "epoch": 0.06543091916682592, "grad_norm": 1.421875, "learning_rate": 7.548288460557987e-05, "loss": 1.6555, "step": 428 }, { "epoch": 0.06558379514618766, "grad_norm": 1.40625, "learning_rate": 7.547855667660009e-05, "loss": 1.4259, "step": 429 }, { "epoch": 0.0657366711255494, "grad_norm": 1.28125, "learning_rate": 7.547422879648934e-05, "loss": 1.7158, "step": 430 }, { "epoch": 0.06588954710491114, "grad_norm": 1.25, "learning_rate": 7.546990096525622e-05, "loss": 1.4867, "step": 431 }, { "epoch": 0.06604242308427288, "grad_norm": 1.328125, "learning_rate": 7.546557318290942e-05, "loss": 1.6745, "step": 432 }, { "epoch": 0.06619529906363462, "grad_norm": 1.421875, "learning_rate": 7.546124544945745e-05, "loss": 1.7607, "step": 433 }, { "epoch": 0.06634817504299637, "grad_norm": 1.359375, "learning_rate": 7.545691776490906e-05, "loss": 1.422, "step": 434 }, { "epoch": 0.06650105102235811, "grad_norm": 1.34375, "learning_rate": 7.545259012927281e-05, "loss": 1.7902, "step": 435 }, { "epoch": 0.06665392700171985, "grad_norm": 1.453125, "learning_rate": 7.544826254255729e-05, "loss": 1.6402, "step": 436 }, { "epoch": 0.0668068029810816, "grad_norm": 1.3984375, "learning_rate": 7.544393500477123e-05, "loss": 1.6185, "step": 437 }, { "epoch": 0.06695967896044334, "grad_norm": 1.375, "learning_rate": 7.54396075159231e-05, "loss": 1.3882, "step": 438 }, { "epoch": 0.06711255493980509, "grad_norm": 1.2890625, "learning_rate": 7.54352800760217e-05, "loss": 1.3521, "step": 439 }, { "epoch": 0.06726543091916683, "grad_norm": 1.4453125, "learning_rate": 7.543095268507552e-05, "loss": 2.0944, "step": 440 }, { "epoch": 0.06741830689852857, "grad_norm": 1.265625, "learning_rate": 7.542662534309323e-05, "loss": 1.4641, "step": 441 }, { "epoch": 0.06757118287789031, "grad_norm": 1.421875, "learning_rate": 7.54222980500835e-05, "loss": 1.8043, "step": 442 }, { "epoch": 0.06772405885725205, "grad_norm": 1.53125, "learning_rate": 7.541797080605486e-05, "loss": 1.96, "step": 443 }, { "epoch": 0.06787693483661379, "grad_norm": 1.4296875, "learning_rate": 7.541364361101602e-05, "loss": 1.6413, "step": 444 }, { "epoch": 0.06802981081597553, "grad_norm": 1.3515625, "learning_rate": 7.540931646497552e-05, "loss": 1.49, "step": 445 }, { "epoch": 0.06818268679533729, "grad_norm": 1.2578125, "learning_rate": 7.540498936794206e-05, "loss": 1.5277, "step": 446 }, { "epoch": 0.06833556277469903, "grad_norm": 1.28125, "learning_rate": 7.540066231992426e-05, "loss": 1.5206, "step": 447 }, { "epoch": 0.06848843875406077, "grad_norm": 1.28125, "learning_rate": 7.539633532093068e-05, "loss": 1.6879, "step": 448 }, { "epoch": 0.06864131473342251, "grad_norm": 1.2578125, "learning_rate": 7.539200837097e-05, "loss": 1.4458, "step": 449 }, { "epoch": 0.06879419071278425, "grad_norm": 1.25, "learning_rate": 7.538768147005082e-05, "loss": 1.4525, "step": 450 }, { "epoch": 0.068947066692146, "grad_norm": 1.5234375, "learning_rate": 7.538335461818175e-05, "loss": 1.7564, "step": 451 }, { "epoch": 0.06909994267150774, "grad_norm": 1.2578125, "learning_rate": 7.537902781537145e-05, "loss": 1.4728, "step": 452 }, { "epoch": 0.06925281865086948, "grad_norm": 1.390625, "learning_rate": 7.537470106162854e-05, "loss": 1.6093, "step": 453 }, { "epoch": 0.06940569463023122, "grad_norm": 1.53125, "learning_rate": 7.537037435696161e-05, "loss": 1.5562, "step": 454 }, { "epoch": 0.06955857060959297, "grad_norm": 1.28125, "learning_rate": 7.536604770137929e-05, "loss": 1.6118, "step": 455 }, { "epoch": 0.06971144658895471, "grad_norm": 1.3046875, "learning_rate": 7.536172109489022e-05, "loss": 1.5445, "step": 456 }, { "epoch": 0.06986432256831646, "grad_norm": 1.40625, "learning_rate": 7.535739453750299e-05, "loss": 1.8728, "step": 457 }, { "epoch": 0.0700171985476782, "grad_norm": 1.328125, "learning_rate": 7.53530680292263e-05, "loss": 1.6043, "step": 458 }, { "epoch": 0.07017007452703994, "grad_norm": 1.40625, "learning_rate": 7.534874157006867e-05, "loss": 1.7737, "step": 459 }, { "epoch": 0.07032295050640168, "grad_norm": 1.3125, "learning_rate": 7.53444151600388e-05, "loss": 1.5325, "step": 460 }, { "epoch": 0.07047582648576342, "grad_norm": 1.3984375, "learning_rate": 7.534008879914526e-05, "loss": 1.6283, "step": 461 }, { "epoch": 0.07062870246512516, "grad_norm": 1.3984375, "learning_rate": 7.53357624873967e-05, "loss": 1.5568, "step": 462 }, { "epoch": 0.0707815784444869, "grad_norm": 1.4296875, "learning_rate": 7.533143622480178e-05, "loss": 1.5722, "step": 463 }, { "epoch": 0.07093445442384866, "grad_norm": 1.3359375, "learning_rate": 7.532711001136903e-05, "loss": 1.7401, "step": 464 }, { "epoch": 0.0710873304032104, "grad_norm": 1.4140625, "learning_rate": 7.532278384710713e-05, "loss": 1.5632, "step": 465 }, { "epoch": 0.07124020638257214, "grad_norm": 1.2890625, "learning_rate": 7.53184577320247e-05, "loss": 1.5182, "step": 466 }, { "epoch": 0.07139308236193388, "grad_norm": 1.5, "learning_rate": 7.531413166613035e-05, "loss": 1.6272, "step": 467 }, { "epoch": 0.07154595834129562, "grad_norm": 1.203125, "learning_rate": 7.530980564943273e-05, "loss": 1.5503, "step": 468 }, { "epoch": 0.07169883432065736, "grad_norm": 1.5, "learning_rate": 7.530547968194041e-05, "loss": 1.9388, "step": 469 }, { "epoch": 0.0718517103000191, "grad_norm": 1.421875, "learning_rate": 7.530115376366204e-05, "loss": 1.8093, "step": 470 }, { "epoch": 0.07200458627938085, "grad_norm": 1.3515625, "learning_rate": 7.529682789460624e-05, "loss": 1.59, "step": 471 }, { "epoch": 0.07215746225874259, "grad_norm": 1.3046875, "learning_rate": 7.529250207478164e-05, "loss": 1.4901, "step": 472 }, { "epoch": 0.07231033823810434, "grad_norm": 1.3125, "learning_rate": 7.528817630419687e-05, "loss": 1.517, "step": 473 }, { "epoch": 0.07246321421746608, "grad_norm": 1.4453125, "learning_rate": 7.52838505828605e-05, "loss": 1.7424, "step": 474 }, { "epoch": 0.07261609019682783, "grad_norm": 1.4609375, "learning_rate": 7.527952491078119e-05, "loss": 1.7269, "step": 475 }, { "epoch": 0.07276896617618957, "grad_norm": 1.4609375, "learning_rate": 7.527519928796753e-05, "loss": 1.657, "step": 476 }, { "epoch": 0.07292184215555131, "grad_norm": 1.3203125, "learning_rate": 7.527087371442823e-05, "loss": 1.7882, "step": 477 }, { "epoch": 0.07307471813491305, "grad_norm": 1.3828125, "learning_rate": 7.526654819017176e-05, "loss": 1.6357, "step": 478 }, { "epoch": 0.07322759411427479, "grad_norm": 1.328125, "learning_rate": 7.526222271520688e-05, "loss": 1.8054, "step": 479 }, { "epoch": 0.07338047009363653, "grad_norm": 1.3671875, "learning_rate": 7.525789728954216e-05, "loss": 1.4906, "step": 480 }, { "epoch": 0.07353334607299827, "grad_norm": 1.3515625, "learning_rate": 7.525357191318618e-05, "loss": 1.6107, "step": 481 }, { "epoch": 0.07368622205236003, "grad_norm": 1.390625, "learning_rate": 7.524924658614764e-05, "loss": 1.7762, "step": 482 }, { "epoch": 0.07383909803172177, "grad_norm": 1.3828125, "learning_rate": 7.524492130843507e-05, "loss": 1.6763, "step": 483 }, { "epoch": 0.07399197401108351, "grad_norm": 1.3046875, "learning_rate": 7.524059608005719e-05, "loss": 1.5168, "step": 484 }, { "epoch": 0.07414484999044525, "grad_norm": 1.3203125, "learning_rate": 7.52362709010225e-05, "loss": 1.7284, "step": 485 }, { "epoch": 0.074297725969807, "grad_norm": 1.296875, "learning_rate": 7.523194577133974e-05, "loss": 1.4623, "step": 486 }, { "epoch": 0.07445060194916873, "grad_norm": 1.328125, "learning_rate": 7.522762069101748e-05, "loss": 1.6405, "step": 487 }, { "epoch": 0.07460347792853048, "grad_norm": 1.421875, "learning_rate": 7.52232956600643e-05, "loss": 1.5834, "step": 488 }, { "epoch": 0.07475635390789222, "grad_norm": 1.2734375, "learning_rate": 7.521897067848886e-05, "loss": 1.7262, "step": 489 }, { "epoch": 0.07490922988725396, "grad_norm": 1.453125, "learning_rate": 7.521464574629976e-05, "loss": 1.6856, "step": 490 }, { "epoch": 0.07506210586661571, "grad_norm": 1.453125, "learning_rate": 7.521032086350564e-05, "loss": 1.7428, "step": 491 }, { "epoch": 0.07521498184597745, "grad_norm": 1.34375, "learning_rate": 7.520599603011511e-05, "loss": 1.6745, "step": 492 }, { "epoch": 0.0753678578253392, "grad_norm": 1.375, "learning_rate": 7.520167124613683e-05, "loss": 1.5194, "step": 493 }, { "epoch": 0.07552073380470094, "grad_norm": 1.296875, "learning_rate": 7.519734651157935e-05, "loss": 1.7513, "step": 494 }, { "epoch": 0.07567360978406268, "grad_norm": 1.3203125, "learning_rate": 7.51930218264513e-05, "loss": 1.5291, "step": 495 }, { "epoch": 0.07582648576342442, "grad_norm": 1.234375, "learning_rate": 7.518869719076134e-05, "loss": 1.4288, "step": 496 }, { "epoch": 0.07597936174278616, "grad_norm": 1.5078125, "learning_rate": 7.518437260451804e-05, "loss": 1.7536, "step": 497 }, { "epoch": 0.0761322377221479, "grad_norm": 1.5, "learning_rate": 7.518004806773009e-05, "loss": 2.0232, "step": 498 }, { "epoch": 0.07628511370150964, "grad_norm": 1.3125, "learning_rate": 7.517572358040604e-05, "loss": 1.545, "step": 499 }, { "epoch": 0.0764379896808714, "grad_norm": 1.2890625, "learning_rate": 7.517139914255451e-05, "loss": 1.5503, "step": 500 }, { "epoch": 0.07659086566023314, "grad_norm": 1.3671875, "learning_rate": 7.516707475418415e-05, "loss": 1.4439, "step": 501 }, { "epoch": 0.07674374163959488, "grad_norm": 1.359375, "learning_rate": 7.516275041530357e-05, "loss": 1.4305, "step": 502 }, { "epoch": 0.07689661761895662, "grad_norm": 1.375, "learning_rate": 7.51584261259214e-05, "loss": 1.5342, "step": 503 }, { "epoch": 0.07704949359831836, "grad_norm": 1.28125, "learning_rate": 7.515410188604621e-05, "loss": 1.6278, "step": 504 }, { "epoch": 0.0772023695776801, "grad_norm": 1.4921875, "learning_rate": 7.514977769568668e-05, "loss": 1.8436, "step": 505 }, { "epoch": 0.07735524555704185, "grad_norm": 1.5234375, "learning_rate": 7.51454535548514e-05, "loss": 1.8123, "step": 506 }, { "epoch": 0.07750812153640359, "grad_norm": 1.3203125, "learning_rate": 7.514112946354895e-05, "loss": 1.6947, "step": 507 }, { "epoch": 0.07766099751576533, "grad_norm": 1.3515625, "learning_rate": 7.513680542178803e-05, "loss": 1.6668, "step": 508 }, { "epoch": 0.07781387349512708, "grad_norm": 1.359375, "learning_rate": 7.513248142957718e-05, "loss": 1.5481, "step": 509 }, { "epoch": 0.07796674947448883, "grad_norm": 1.1953125, "learning_rate": 7.512815748692506e-05, "loss": 1.5952, "step": 510 }, { "epoch": 0.07811962545385057, "grad_norm": 1.390625, "learning_rate": 7.512383359384025e-05, "loss": 1.5906, "step": 511 }, { "epoch": 0.07827250143321231, "grad_norm": 1.5390625, "learning_rate": 7.511950975033142e-05, "loss": 1.8126, "step": 512 }, { "epoch": 0.07842537741257405, "grad_norm": 1.6015625, "learning_rate": 7.511518595640718e-05, "loss": 1.8838, "step": 513 }, { "epoch": 0.07857825339193579, "grad_norm": 1.4296875, "learning_rate": 7.511086221207609e-05, "loss": 1.8617, "step": 514 }, { "epoch": 0.07873112937129753, "grad_norm": 1.2578125, "learning_rate": 7.510653851734681e-05, "loss": 1.4363, "step": 515 }, { "epoch": 0.07888400535065927, "grad_norm": 1.515625, "learning_rate": 7.510221487222793e-05, "loss": 1.6697, "step": 516 }, { "epoch": 0.07903688133002101, "grad_norm": 1.3984375, "learning_rate": 7.509789127672814e-05, "loss": 1.8408, "step": 517 }, { "epoch": 0.07918975730938277, "grad_norm": 1.171875, "learning_rate": 7.509356773085595e-05, "loss": 1.1575, "step": 518 }, { "epoch": 0.07934263328874451, "grad_norm": 1.4609375, "learning_rate": 7.508924423462008e-05, "loss": 1.6487, "step": 519 }, { "epoch": 0.07949550926810625, "grad_norm": 1.3984375, "learning_rate": 7.508492078802906e-05, "loss": 1.574, "step": 520 }, { "epoch": 0.07964838524746799, "grad_norm": 1.3359375, "learning_rate": 7.508059739109153e-05, "loss": 1.7686, "step": 521 }, { "epoch": 0.07980126122682973, "grad_norm": 1.4609375, "learning_rate": 7.507627404381617e-05, "loss": 1.6941, "step": 522 }, { "epoch": 0.07995413720619148, "grad_norm": 1.3515625, "learning_rate": 7.507195074621148e-05, "loss": 1.4242, "step": 523 }, { "epoch": 0.08010701318555322, "grad_norm": 1.5234375, "learning_rate": 7.506762749828619e-05, "loss": 1.9002, "step": 524 }, { "epoch": 0.08025988916491496, "grad_norm": 1.40625, "learning_rate": 7.506330430004885e-05, "loss": 1.8121, "step": 525 }, { "epoch": 0.0804127651442767, "grad_norm": 1.5078125, "learning_rate": 7.505898115150809e-05, "loss": 1.7459, "step": 526 }, { "epoch": 0.08056564112363845, "grad_norm": 1.3359375, "learning_rate": 7.505465805267256e-05, "loss": 1.5928, "step": 527 }, { "epoch": 0.0807185171030002, "grad_norm": 1.453125, "learning_rate": 7.50503350035508e-05, "loss": 1.835, "step": 528 }, { "epoch": 0.08087139308236194, "grad_norm": 1.3203125, "learning_rate": 7.504601200415148e-05, "loss": 1.7292, "step": 529 }, { "epoch": 0.08102426906172368, "grad_norm": 1.4453125, "learning_rate": 7.50416890544832e-05, "loss": 1.7575, "step": 530 }, { "epoch": 0.08117714504108542, "grad_norm": 1.515625, "learning_rate": 7.503736615455459e-05, "loss": 1.7007, "step": 531 }, { "epoch": 0.08133002102044716, "grad_norm": 1.8671875, "learning_rate": 7.503304330437427e-05, "loss": 1.7317, "step": 532 }, { "epoch": 0.0814828969998089, "grad_norm": 1.40625, "learning_rate": 7.502872050395081e-05, "loss": 1.7503, "step": 533 }, { "epoch": 0.08163577297917064, "grad_norm": 1.3359375, "learning_rate": 7.502439775329286e-05, "loss": 1.6945, "step": 534 }, { "epoch": 0.08178864895853238, "grad_norm": 1.4140625, "learning_rate": 7.502007505240903e-05, "loss": 1.6012, "step": 535 }, { "epoch": 0.08194152493789414, "grad_norm": 1.359375, "learning_rate": 7.501575240130794e-05, "loss": 1.5688, "step": 536 }, { "epoch": 0.08209440091725588, "grad_norm": 1.5078125, "learning_rate": 7.501142979999818e-05, "loss": 1.7243, "step": 537 }, { "epoch": 0.08224727689661762, "grad_norm": 1.171875, "learning_rate": 7.500710724848843e-05, "loss": 1.4018, "step": 538 }, { "epoch": 0.08240015287597936, "grad_norm": 1.375, "learning_rate": 7.500278474678723e-05, "loss": 1.4993, "step": 539 }, { "epoch": 0.0825530288553411, "grad_norm": 1.40625, "learning_rate": 7.49984622949032e-05, "loss": 1.4173, "step": 540 }, { "epoch": 0.08270590483470285, "grad_norm": 1.40625, "learning_rate": 7.499413989284501e-05, "loss": 1.5303, "step": 541 }, { "epoch": 0.08285878081406459, "grad_norm": 1.359375, "learning_rate": 7.49898175406212e-05, "loss": 1.5208, "step": 542 }, { "epoch": 0.08301165679342633, "grad_norm": 1.5078125, "learning_rate": 7.498549523824047e-05, "loss": 1.7946, "step": 543 }, { "epoch": 0.08316453277278807, "grad_norm": 1.3828125, "learning_rate": 7.498117298571136e-05, "loss": 1.6319, "step": 544 }, { "epoch": 0.08331740875214982, "grad_norm": 1.4921875, "learning_rate": 7.497685078304254e-05, "loss": 1.8693, "step": 545 }, { "epoch": 0.08347028473151157, "grad_norm": 1.28125, "learning_rate": 7.497252863024257e-05, "loss": 1.6307, "step": 546 }, { "epoch": 0.0836231607108733, "grad_norm": 1.3515625, "learning_rate": 7.496820652732007e-05, "loss": 1.7615, "step": 547 }, { "epoch": 0.08377603669023505, "grad_norm": 1.46875, "learning_rate": 7.496388447428372e-05, "loss": 1.6493, "step": 548 }, { "epoch": 0.08392891266959679, "grad_norm": 1.4296875, "learning_rate": 7.495956247114206e-05, "loss": 1.6219, "step": 549 }, { "epoch": 0.08408178864895853, "grad_norm": 1.140625, "learning_rate": 7.495524051790375e-05, "loss": 1.1307, "step": 550 }, { "epoch": 0.08423466462832027, "grad_norm": 1.3671875, "learning_rate": 7.495091861457734e-05, "loss": 1.4602, "step": 551 }, { "epoch": 0.08438754060768201, "grad_norm": 1.3203125, "learning_rate": 7.494659676117152e-05, "loss": 1.4968, "step": 552 }, { "epoch": 0.08454041658704375, "grad_norm": 1.140625, "learning_rate": 7.49422749576949e-05, "loss": 1.4048, "step": 553 }, { "epoch": 0.08469329256640551, "grad_norm": 1.625, "learning_rate": 7.493795320415602e-05, "loss": 1.6272, "step": 554 }, { "epoch": 0.08484616854576725, "grad_norm": 1.375, "learning_rate": 7.493363150056353e-05, "loss": 1.4651, "step": 555 }, { "epoch": 0.08499904452512899, "grad_norm": 1.2890625, "learning_rate": 7.492930984692606e-05, "loss": 1.6343, "step": 556 }, { "epoch": 0.08515192050449073, "grad_norm": 1.390625, "learning_rate": 7.492498824325224e-05, "loss": 1.7195, "step": 557 }, { "epoch": 0.08530479648385247, "grad_norm": 1.5, "learning_rate": 7.492066668955059e-05, "loss": 1.4111, "step": 558 }, { "epoch": 0.08545767246321422, "grad_norm": 1.34375, "learning_rate": 7.491634518582984e-05, "loss": 1.6412, "step": 559 }, { "epoch": 0.08561054844257596, "grad_norm": 1.4140625, "learning_rate": 7.491202373209854e-05, "loss": 1.8956, "step": 560 }, { "epoch": 0.0857634244219377, "grad_norm": 1.4296875, "learning_rate": 7.490770232836528e-05, "loss": 1.6138, "step": 561 }, { "epoch": 0.08591630040129944, "grad_norm": 1.3828125, "learning_rate": 7.490338097463876e-05, "loss": 1.6817, "step": 562 }, { "epoch": 0.0860691763806612, "grad_norm": 1.4140625, "learning_rate": 7.489905967092745e-05, "loss": 1.6793, "step": 563 }, { "epoch": 0.08622205236002294, "grad_norm": 1.2734375, "learning_rate": 7.489473841724013e-05, "loss": 1.5416, "step": 564 }, { "epoch": 0.08637492833938468, "grad_norm": 1.328125, "learning_rate": 7.489041721358529e-05, "loss": 1.5069, "step": 565 }, { "epoch": 0.08652780431874642, "grad_norm": 1.34375, "learning_rate": 7.488609605997158e-05, "loss": 1.4982, "step": 566 }, { "epoch": 0.08668068029810816, "grad_norm": 1.421875, "learning_rate": 7.488177495640764e-05, "loss": 1.9756, "step": 567 }, { "epoch": 0.0868335562774699, "grad_norm": 1.3984375, "learning_rate": 7.487745390290202e-05, "loss": 1.5725, "step": 568 }, { "epoch": 0.08698643225683164, "grad_norm": 1.4765625, "learning_rate": 7.487313289946337e-05, "loss": 1.5494, "step": 569 }, { "epoch": 0.08713930823619338, "grad_norm": 1.3515625, "learning_rate": 7.48688119461003e-05, "loss": 1.5646, "step": 570 }, { "epoch": 0.08729218421555512, "grad_norm": 1.21875, "learning_rate": 7.486449104282142e-05, "loss": 1.4814, "step": 571 }, { "epoch": 0.08744506019491688, "grad_norm": 1.390625, "learning_rate": 7.486017018963536e-05, "loss": 1.65, "step": 572 }, { "epoch": 0.08759793617427862, "grad_norm": 1.453125, "learning_rate": 7.485584938655066e-05, "loss": 1.7221, "step": 573 }, { "epoch": 0.08775081215364036, "grad_norm": 1.3359375, "learning_rate": 7.485152863357603e-05, "loss": 1.5373, "step": 574 }, { "epoch": 0.0879036881330021, "grad_norm": 1.34375, "learning_rate": 7.484720793071999e-05, "loss": 1.5998, "step": 575 }, { "epoch": 0.08805656411236384, "grad_norm": 1.2734375, "learning_rate": 7.484288727799122e-05, "loss": 1.3342, "step": 576 }, { "epoch": 0.08820944009172559, "grad_norm": 1.3359375, "learning_rate": 7.483856667539827e-05, "loss": 1.5786, "step": 577 }, { "epoch": 0.08836231607108733, "grad_norm": 1.3359375, "learning_rate": 7.483424612294984e-05, "loss": 1.5638, "step": 578 }, { "epoch": 0.08851519205044907, "grad_norm": 1.5234375, "learning_rate": 7.482992562065446e-05, "loss": 1.8078, "step": 579 }, { "epoch": 0.08866806802981081, "grad_norm": 1.4609375, "learning_rate": 7.482560516852075e-05, "loss": 1.4175, "step": 580 }, { "epoch": 0.08882094400917256, "grad_norm": 1.4140625, "learning_rate": 7.482128476655735e-05, "loss": 1.6962, "step": 581 }, { "epoch": 0.0889738199885343, "grad_norm": 1.4453125, "learning_rate": 7.481696441477282e-05, "loss": 1.6369, "step": 582 }, { "epoch": 0.08912669596789605, "grad_norm": 1.3203125, "learning_rate": 7.481264411317587e-05, "loss": 1.6102, "step": 583 }, { "epoch": 0.08927957194725779, "grad_norm": 1.46875, "learning_rate": 7.480832386177498e-05, "loss": 1.6475, "step": 584 }, { "epoch": 0.08943244792661953, "grad_norm": 1.296875, "learning_rate": 7.480400366057886e-05, "loss": 1.606, "step": 585 }, { "epoch": 0.08958532390598127, "grad_norm": 1.2734375, "learning_rate": 7.479968350959609e-05, "loss": 1.4476, "step": 586 }, { "epoch": 0.08973819988534301, "grad_norm": 1.3515625, "learning_rate": 7.479536340883526e-05, "loss": 1.5446, "step": 587 }, { "epoch": 0.08989107586470475, "grad_norm": 1.3828125, "learning_rate": 7.479104335830502e-05, "loss": 1.7525, "step": 588 }, { "epoch": 0.0900439518440665, "grad_norm": 1.609375, "learning_rate": 7.478672335801391e-05, "loss": 1.6138, "step": 589 }, { "epoch": 0.09019682782342825, "grad_norm": 1.3203125, "learning_rate": 7.47824034079706e-05, "loss": 1.3105, "step": 590 }, { "epoch": 0.09034970380278999, "grad_norm": 1.4609375, "learning_rate": 7.477808350818367e-05, "loss": 1.6795, "step": 591 }, { "epoch": 0.09050257978215173, "grad_norm": 1.2734375, "learning_rate": 7.477376365866178e-05, "loss": 1.5501, "step": 592 }, { "epoch": 0.09065545576151347, "grad_norm": 1.3359375, "learning_rate": 7.476944385941349e-05, "loss": 1.5484, "step": 593 }, { "epoch": 0.09080833174087521, "grad_norm": 1.265625, "learning_rate": 7.47651241104474e-05, "loss": 1.3678, "step": 594 }, { "epoch": 0.09096120772023696, "grad_norm": 1.375, "learning_rate": 7.476080441177215e-05, "loss": 1.6617, "step": 595 }, { "epoch": 0.0911140836995987, "grad_norm": 1.375, "learning_rate": 7.475648476339632e-05, "loss": 1.8723, "step": 596 }, { "epoch": 0.09126695967896044, "grad_norm": 1.3359375, "learning_rate": 7.475216516532855e-05, "loss": 1.6882, "step": 597 }, { "epoch": 0.09141983565832218, "grad_norm": 1.3984375, "learning_rate": 7.474784561757746e-05, "loss": 1.8536, "step": 598 }, { "epoch": 0.09157271163768393, "grad_norm": 1.265625, "learning_rate": 7.474352612015158e-05, "loss": 1.3962, "step": 599 }, { "epoch": 0.09172558761704568, "grad_norm": 1.3671875, "learning_rate": 7.47392066730596e-05, "loss": 1.6517, "step": 600 }, { "epoch": 0.09187846359640742, "grad_norm": 1.3359375, "learning_rate": 7.473488727631009e-05, "loss": 1.4394, "step": 601 }, { "epoch": 0.09203133957576916, "grad_norm": 1.5859375, "learning_rate": 7.473056792991169e-05, "loss": 1.4979, "step": 602 }, { "epoch": 0.0921842155551309, "grad_norm": 1.4765625, "learning_rate": 7.472624863387293e-05, "loss": 1.7684, "step": 603 }, { "epoch": 0.09233709153449264, "grad_norm": 1.3515625, "learning_rate": 7.472192938820254e-05, "loss": 1.6043, "step": 604 }, { "epoch": 0.09248996751385438, "grad_norm": 1.5, "learning_rate": 7.471761019290904e-05, "loss": 1.6785, "step": 605 }, { "epoch": 0.09264284349321612, "grad_norm": 1.484375, "learning_rate": 7.471329104800104e-05, "loss": 1.7636, "step": 606 }, { "epoch": 0.09279571947257786, "grad_norm": 1.4453125, "learning_rate": 7.470897195348721e-05, "loss": 1.7538, "step": 607 }, { "epoch": 0.09294859545193962, "grad_norm": 1.203125, "learning_rate": 7.470465290937603e-05, "loss": 1.2673, "step": 608 }, { "epoch": 0.09310147143130136, "grad_norm": 1.3359375, "learning_rate": 7.470033391567628e-05, "loss": 1.5344, "step": 609 }, { "epoch": 0.0932543474106631, "grad_norm": 1.375, "learning_rate": 7.469601497239643e-05, "loss": 1.6963, "step": 610 }, { "epoch": 0.09340722339002484, "grad_norm": 1.2890625, "learning_rate": 7.469169607954515e-05, "loss": 1.7322, "step": 611 }, { "epoch": 0.09356009936938658, "grad_norm": 1.2421875, "learning_rate": 7.468737723713106e-05, "loss": 1.2427, "step": 612 }, { "epoch": 0.09371297534874833, "grad_norm": 1.265625, "learning_rate": 7.468305844516271e-05, "loss": 1.7182, "step": 613 }, { "epoch": 0.09386585132811007, "grad_norm": 1.21875, "learning_rate": 7.467873970364874e-05, "loss": 1.5205, "step": 614 }, { "epoch": 0.09401872730747181, "grad_norm": 1.46875, "learning_rate": 7.467442101259773e-05, "loss": 1.7948, "step": 615 }, { "epoch": 0.09417160328683355, "grad_norm": 1.265625, "learning_rate": 7.467010237201835e-05, "loss": 1.3304, "step": 616 }, { "epoch": 0.0943244792661953, "grad_norm": 1.3515625, "learning_rate": 7.466578378191915e-05, "loss": 1.801, "step": 617 }, { "epoch": 0.09447735524555705, "grad_norm": 1.3515625, "learning_rate": 7.466146524230877e-05, "loss": 1.5355, "step": 618 }, { "epoch": 0.09463023122491879, "grad_norm": 1.4140625, "learning_rate": 7.46571467531958e-05, "loss": 1.802, "step": 619 }, { "epoch": 0.09478310720428053, "grad_norm": 1.4453125, "learning_rate": 7.465282831458882e-05, "loss": 1.396, "step": 620 }, { "epoch": 0.09493598318364227, "grad_norm": 1.3046875, "learning_rate": 7.46485099264965e-05, "loss": 1.4791, "step": 621 }, { "epoch": 0.09508885916300401, "grad_norm": 1.4140625, "learning_rate": 7.464419158892737e-05, "loss": 1.4932, "step": 622 }, { "epoch": 0.09524173514236575, "grad_norm": 1.421875, "learning_rate": 7.463987330189013e-05, "loss": 1.776, "step": 623 }, { "epoch": 0.0953946111217275, "grad_norm": 1.3671875, "learning_rate": 7.46355550653933e-05, "loss": 1.6195, "step": 624 }, { "epoch": 0.09554748710108923, "grad_norm": 1.4765625, "learning_rate": 7.46312368794455e-05, "loss": 1.6062, "step": 625 }, { "epoch": 0.09570036308045099, "grad_norm": 1.4140625, "learning_rate": 7.462691874405538e-05, "loss": 1.7152, "step": 626 }, { "epoch": 0.09585323905981273, "grad_norm": 1.421875, "learning_rate": 7.462260065923149e-05, "loss": 1.7491, "step": 627 }, { "epoch": 0.09600611503917447, "grad_norm": 1.390625, "learning_rate": 7.461828262498252e-05, "loss": 1.6858, "step": 628 }, { "epoch": 0.09615899101853621, "grad_norm": 1.5078125, "learning_rate": 7.461396464131695e-05, "loss": 1.7936, "step": 629 }, { "epoch": 0.09631186699789795, "grad_norm": 1.3515625, "learning_rate": 7.46096467082435e-05, "loss": 1.5857, "step": 630 }, { "epoch": 0.0964647429772597, "grad_norm": 1.3359375, "learning_rate": 7.460532882577075e-05, "loss": 1.4122, "step": 631 }, { "epoch": 0.09661761895662144, "grad_norm": 1.375, "learning_rate": 7.460101099390721e-05, "loss": 1.56, "step": 632 }, { "epoch": 0.09677049493598318, "grad_norm": 1.4453125, "learning_rate": 7.459669321266165e-05, "loss": 1.473, "step": 633 }, { "epoch": 0.09692337091534492, "grad_norm": 1.46875, "learning_rate": 7.459237548204252e-05, "loss": 1.5487, "step": 634 }, { "epoch": 0.09707624689470667, "grad_norm": 1.4140625, "learning_rate": 7.458805780205852e-05, "loss": 1.8456, "step": 635 }, { "epoch": 0.09722912287406842, "grad_norm": 1.484375, "learning_rate": 7.45837401727182e-05, "loss": 1.6469, "step": 636 }, { "epoch": 0.09738199885343016, "grad_norm": 1.4765625, "learning_rate": 7.457942259403021e-05, "loss": 1.7031, "step": 637 }, { "epoch": 0.0975348748327919, "grad_norm": 1.421875, "learning_rate": 7.457510506600316e-05, "loss": 1.6744, "step": 638 }, { "epoch": 0.09768775081215364, "grad_norm": 1.5859375, "learning_rate": 7.45707875886456e-05, "loss": 1.3874, "step": 639 }, { "epoch": 0.09784062679151538, "grad_norm": 1.5546875, "learning_rate": 7.456647016196617e-05, "loss": 1.4711, "step": 640 }, { "epoch": 0.09799350277087712, "grad_norm": 1.484375, "learning_rate": 7.456215278597344e-05, "loss": 1.7531, "step": 641 }, { "epoch": 0.09814637875023886, "grad_norm": 1.2890625, "learning_rate": 7.455783546067609e-05, "loss": 1.4346, "step": 642 }, { "epoch": 0.0982992547296006, "grad_norm": 1.421875, "learning_rate": 7.45535181860826e-05, "loss": 1.4172, "step": 643 }, { "epoch": 0.09845213070896236, "grad_norm": 1.3671875, "learning_rate": 7.454920096220173e-05, "loss": 1.559, "step": 644 }, { "epoch": 0.0986050066883241, "grad_norm": 1.4375, "learning_rate": 7.454488378904199e-05, "loss": 1.4967, "step": 645 }, { "epoch": 0.09875788266768584, "grad_norm": 1.703125, "learning_rate": 7.454056666661195e-05, "loss": 1.8167, "step": 646 }, { "epoch": 0.09891075864704758, "grad_norm": 1.4140625, "learning_rate": 7.453624959492033e-05, "loss": 1.4076, "step": 647 }, { "epoch": 0.09906363462640932, "grad_norm": 1.3125, "learning_rate": 7.45319325739756e-05, "loss": 1.8349, "step": 648 }, { "epoch": 0.09921651060577107, "grad_norm": 1.1328125, "learning_rate": 7.452761560378647e-05, "loss": 1.4006, "step": 649 }, { "epoch": 0.09936938658513281, "grad_norm": 1.34375, "learning_rate": 7.452329868436147e-05, "loss": 1.712, "step": 650 }, { "epoch": 0.09952226256449455, "grad_norm": 1.34375, "learning_rate": 7.451898181570924e-05, "loss": 1.6563, "step": 651 }, { "epoch": 0.09967513854385629, "grad_norm": 1.2734375, "learning_rate": 7.451466499783842e-05, "loss": 1.5625, "step": 652 }, { "epoch": 0.09982801452321804, "grad_norm": 1.421875, "learning_rate": 7.451034823075753e-05, "loss": 1.5267, "step": 653 }, { "epoch": 0.09998089050257979, "grad_norm": 1.265625, "learning_rate": 7.450603151447521e-05, "loss": 1.4459, "step": 654 }, { "epoch": 0.10013376648194153, "grad_norm": 1.3046875, "learning_rate": 7.450171484900009e-05, "loss": 1.4684, "step": 655 }, { "epoch": 0.10028664246130327, "grad_norm": 1.5234375, "learning_rate": 7.449739823434072e-05, "loss": 1.7515, "step": 656 }, { "epoch": 0.10043951844066501, "grad_norm": 1.40625, "learning_rate": 7.449308167050579e-05, "loss": 1.6061, "step": 657 }, { "epoch": 0.10059239442002675, "grad_norm": 1.4609375, "learning_rate": 7.448876515750379e-05, "loss": 1.7082, "step": 658 }, { "epoch": 0.10074527039938849, "grad_norm": 1.453125, "learning_rate": 7.448444869534339e-05, "loss": 1.3174, "step": 659 }, { "epoch": 0.10089814637875023, "grad_norm": 1.3828125, "learning_rate": 7.448013228403315e-05, "loss": 1.6986, "step": 660 }, { "epoch": 0.10105102235811197, "grad_norm": 1.4140625, "learning_rate": 7.447581592358174e-05, "loss": 1.8218, "step": 661 }, { "epoch": 0.10120389833747373, "grad_norm": 1.21875, "learning_rate": 7.447149961399769e-05, "loss": 1.4149, "step": 662 }, { "epoch": 0.10135677431683547, "grad_norm": 1.265625, "learning_rate": 7.446718335528968e-05, "loss": 1.5247, "step": 663 }, { "epoch": 0.10150965029619721, "grad_norm": 1.4140625, "learning_rate": 7.446286714746627e-05, "loss": 1.8784, "step": 664 }, { "epoch": 0.10166252627555895, "grad_norm": 1.4609375, "learning_rate": 7.4458550990536e-05, "loss": 1.818, "step": 665 }, { "epoch": 0.1018154022549207, "grad_norm": 1.4765625, "learning_rate": 7.445423488450758e-05, "loss": 1.6006, "step": 666 }, { "epoch": 0.10196827823428244, "grad_norm": 1.3671875, "learning_rate": 7.444991882938952e-05, "loss": 1.4143, "step": 667 }, { "epoch": 0.10212115421364418, "grad_norm": 1.4453125, "learning_rate": 7.444560282519053e-05, "loss": 1.6562, "step": 668 }, { "epoch": 0.10227403019300592, "grad_norm": 1.34375, "learning_rate": 7.44412868719191e-05, "loss": 1.8574, "step": 669 }, { "epoch": 0.10242690617236766, "grad_norm": 1.40625, "learning_rate": 7.443697096958387e-05, "loss": 1.5635, "step": 670 }, { "epoch": 0.10257978215172942, "grad_norm": 1.34375, "learning_rate": 7.443265511819347e-05, "loss": 1.7669, "step": 671 }, { "epoch": 0.10273265813109116, "grad_norm": 1.28125, "learning_rate": 7.442833931775646e-05, "loss": 1.5751, "step": 672 }, { "epoch": 0.1028855341104529, "grad_norm": 1.5, "learning_rate": 7.442402356828149e-05, "loss": 1.628, "step": 673 }, { "epoch": 0.10303841008981464, "grad_norm": 1.6328125, "learning_rate": 7.44197078697771e-05, "loss": 1.6068, "step": 674 }, { "epoch": 0.10319128606917638, "grad_norm": 1.3828125, "learning_rate": 7.441539222225194e-05, "loss": 1.5298, "step": 675 }, { "epoch": 0.10334416204853812, "grad_norm": 1.3359375, "learning_rate": 7.441107662571457e-05, "loss": 1.6455, "step": 676 }, { "epoch": 0.10349703802789986, "grad_norm": 1.3515625, "learning_rate": 7.440676108017362e-05, "loss": 1.5865, "step": 677 }, { "epoch": 0.1036499140072616, "grad_norm": 1.3359375, "learning_rate": 7.440244558563772e-05, "loss": 1.4073, "step": 678 }, { "epoch": 0.10380278998662334, "grad_norm": 1.40625, "learning_rate": 7.439813014211539e-05, "loss": 1.7093, "step": 679 }, { "epoch": 0.1039556659659851, "grad_norm": 1.3828125, "learning_rate": 7.439381474961529e-05, "loss": 1.3777, "step": 680 }, { "epoch": 0.10410854194534684, "grad_norm": 1.234375, "learning_rate": 7.438949940814598e-05, "loss": 1.4502, "step": 681 }, { "epoch": 0.10426141792470858, "grad_norm": 1.3046875, "learning_rate": 7.438518411771611e-05, "loss": 1.3523, "step": 682 }, { "epoch": 0.10441429390407032, "grad_norm": 1.3984375, "learning_rate": 7.438086887833425e-05, "loss": 1.554, "step": 683 }, { "epoch": 0.10456716988343207, "grad_norm": 1.3515625, "learning_rate": 7.437655369000902e-05, "loss": 1.8488, "step": 684 }, { "epoch": 0.1047200458627938, "grad_norm": 1.2890625, "learning_rate": 7.437223855274899e-05, "loss": 1.6711, "step": 685 }, { "epoch": 0.10487292184215555, "grad_norm": 1.390625, "learning_rate": 7.436792346656276e-05, "loss": 1.5246, "step": 686 }, { "epoch": 0.10502579782151729, "grad_norm": 1.3125, "learning_rate": 7.436360843145898e-05, "loss": 1.5601, "step": 687 }, { "epoch": 0.10517867380087903, "grad_norm": 1.3125, "learning_rate": 7.435929344744615e-05, "loss": 1.3121, "step": 688 }, { "epoch": 0.10533154978024079, "grad_norm": 1.2578125, "learning_rate": 7.435497851453299e-05, "loss": 1.3004, "step": 689 }, { "epoch": 0.10548442575960253, "grad_norm": 1.2734375, "learning_rate": 7.435066363272804e-05, "loss": 1.3957, "step": 690 }, { "epoch": 0.10563730173896427, "grad_norm": 1.3671875, "learning_rate": 7.434634880203986e-05, "loss": 1.7876, "step": 691 }, { "epoch": 0.10579017771832601, "grad_norm": 1.3671875, "learning_rate": 7.434203402247714e-05, "loss": 1.7296, "step": 692 }, { "epoch": 0.10594305369768775, "grad_norm": 1.359375, "learning_rate": 7.433771929404839e-05, "loss": 1.5095, "step": 693 }, { "epoch": 0.10609592967704949, "grad_norm": 1.2890625, "learning_rate": 7.433340461676226e-05, "loss": 1.5, "step": 694 }, { "epoch": 0.10624880565641123, "grad_norm": 1.3046875, "learning_rate": 7.432908999062731e-05, "loss": 1.5797, "step": 695 }, { "epoch": 0.10640168163577297, "grad_norm": 1.2734375, "learning_rate": 7.43247754156522e-05, "loss": 1.4124, "step": 696 }, { "epoch": 0.10655455761513472, "grad_norm": 1.296875, "learning_rate": 7.43204608918455e-05, "loss": 1.6787, "step": 697 }, { "epoch": 0.10670743359449647, "grad_norm": 1.3984375, "learning_rate": 7.431614641921577e-05, "loss": 1.6206, "step": 698 }, { "epoch": 0.10686030957385821, "grad_norm": 1.3671875, "learning_rate": 7.431183199777165e-05, "loss": 1.7964, "step": 699 }, { "epoch": 0.10701318555321995, "grad_norm": 1.375, "learning_rate": 7.430751762752173e-05, "loss": 1.4055, "step": 700 }, { "epoch": 0.1071660615325817, "grad_norm": 1.4375, "learning_rate": 7.430320330847461e-05, "loss": 1.6822, "step": 701 }, { "epoch": 0.10731893751194344, "grad_norm": 1.3515625, "learning_rate": 7.429888904063886e-05, "loss": 1.6875, "step": 702 }, { "epoch": 0.10747181349130518, "grad_norm": 1.359375, "learning_rate": 7.429457482402316e-05, "loss": 1.6019, "step": 703 }, { "epoch": 0.10762468947066692, "grad_norm": 1.3203125, "learning_rate": 7.429026065863603e-05, "loss": 1.5175, "step": 704 }, { "epoch": 0.10777756545002866, "grad_norm": 1.40625, "learning_rate": 7.428594654448605e-05, "loss": 1.7767, "step": 705 }, { "epoch": 0.1079304414293904, "grad_norm": 1.1796875, "learning_rate": 7.428163248158188e-05, "loss": 1.2472, "step": 706 }, { "epoch": 0.10808331740875216, "grad_norm": 1.375, "learning_rate": 7.427731846993209e-05, "loss": 1.6367, "step": 707 }, { "epoch": 0.1082361933881139, "grad_norm": 1.375, "learning_rate": 7.427300450954531e-05, "loss": 1.4679, "step": 708 }, { "epoch": 0.10838906936747564, "grad_norm": 1.2734375, "learning_rate": 7.426869060043006e-05, "loss": 1.5852, "step": 709 }, { "epoch": 0.10854194534683738, "grad_norm": 1.3125, "learning_rate": 7.426437674259501e-05, "loss": 1.4772, "step": 710 }, { "epoch": 0.10869482132619912, "grad_norm": 1.390625, "learning_rate": 7.426006293604873e-05, "loss": 1.7247, "step": 711 }, { "epoch": 0.10884769730556086, "grad_norm": 1.4296875, "learning_rate": 7.425574918079981e-05, "loss": 1.5486, "step": 712 }, { "epoch": 0.1090005732849226, "grad_norm": 1.4921875, "learning_rate": 7.425143547685689e-05, "loss": 1.9266, "step": 713 }, { "epoch": 0.10915344926428434, "grad_norm": 1.3828125, "learning_rate": 7.42471218242285e-05, "loss": 1.448, "step": 714 }, { "epoch": 0.10930632524364609, "grad_norm": 1.4375, "learning_rate": 7.424280822292329e-05, "loss": 1.7104, "step": 715 }, { "epoch": 0.10945920122300784, "grad_norm": 1.3828125, "learning_rate": 7.423849467294985e-05, "loss": 1.5072, "step": 716 }, { "epoch": 0.10961207720236958, "grad_norm": 1.515625, "learning_rate": 7.42341811743167e-05, "loss": 1.5711, "step": 717 }, { "epoch": 0.10976495318173132, "grad_norm": 1.421875, "learning_rate": 7.422986772703256e-05, "loss": 1.5705, "step": 718 }, { "epoch": 0.10991782916109306, "grad_norm": 1.421875, "learning_rate": 7.422555433110594e-05, "loss": 1.5371, "step": 719 }, { "epoch": 0.1100707051404548, "grad_norm": 1.5, "learning_rate": 7.422124098654546e-05, "loss": 1.713, "step": 720 }, { "epoch": 0.11022358111981655, "grad_norm": 1.375, "learning_rate": 7.421692769335971e-05, "loss": 1.6442, "step": 721 }, { "epoch": 0.11037645709917829, "grad_norm": 1.3046875, "learning_rate": 7.421261445155732e-05, "loss": 1.5471, "step": 722 }, { "epoch": 0.11052933307854003, "grad_norm": 1.453125, "learning_rate": 7.420830126114687e-05, "loss": 1.4124, "step": 723 }, { "epoch": 0.11068220905790177, "grad_norm": 1.359375, "learning_rate": 7.420398812213691e-05, "loss": 1.4362, "step": 724 }, { "epoch": 0.11083508503726353, "grad_norm": 1.5625, "learning_rate": 7.41996750345361e-05, "loss": 1.6735, "step": 725 }, { "epoch": 0.11098796101662527, "grad_norm": 1.296875, "learning_rate": 7.419536199835297e-05, "loss": 1.4218, "step": 726 }, { "epoch": 0.11114083699598701, "grad_norm": 1.46875, "learning_rate": 7.41910490135962e-05, "loss": 1.52, "step": 727 }, { "epoch": 0.11129371297534875, "grad_norm": 1.28125, "learning_rate": 7.418673608027427e-05, "loss": 1.5631, "step": 728 }, { "epoch": 0.11144658895471049, "grad_norm": 1.3359375, "learning_rate": 7.418242319839593e-05, "loss": 1.5953, "step": 729 }, { "epoch": 0.11159946493407223, "grad_norm": 1.25, "learning_rate": 7.417811036796964e-05, "loss": 1.4046, "step": 730 }, { "epoch": 0.11175234091343397, "grad_norm": 1.2890625, "learning_rate": 7.417379758900402e-05, "loss": 1.5629, "step": 731 }, { "epoch": 0.11190521689279571, "grad_norm": 1.1796875, "learning_rate": 7.416948486150776e-05, "loss": 1.4944, "step": 732 }, { "epoch": 0.11205809287215746, "grad_norm": 1.2734375, "learning_rate": 7.41651721854893e-05, "loss": 1.3186, "step": 733 }, { "epoch": 0.11221096885151921, "grad_norm": 1.4140625, "learning_rate": 7.416085956095739e-05, "loss": 1.6547, "step": 734 }, { "epoch": 0.11236384483088095, "grad_norm": 1.3125, "learning_rate": 7.415654698792049e-05, "loss": 1.4667, "step": 735 }, { "epoch": 0.11251672081024269, "grad_norm": 1.453125, "learning_rate": 7.415223446638731e-05, "loss": 1.6733, "step": 736 }, { "epoch": 0.11266959678960443, "grad_norm": 1.46875, "learning_rate": 7.41479219963664e-05, "loss": 1.7965, "step": 737 }, { "epoch": 0.11282247276896618, "grad_norm": 1.390625, "learning_rate": 7.41436095778663e-05, "loss": 1.6446, "step": 738 }, { "epoch": 0.11297534874832792, "grad_norm": 1.4765625, "learning_rate": 7.413929721089568e-05, "loss": 1.5593, "step": 739 }, { "epoch": 0.11312822472768966, "grad_norm": 1.40625, "learning_rate": 7.413498489546307e-05, "loss": 1.684, "step": 740 }, { "epoch": 0.1132811007070514, "grad_norm": 1.6015625, "learning_rate": 7.413067263157712e-05, "loss": 1.7661, "step": 741 }, { "epoch": 0.11343397668641314, "grad_norm": 1.328125, "learning_rate": 7.412636041924638e-05, "loss": 1.4222, "step": 742 }, { "epoch": 0.1135868526657749, "grad_norm": 1.28125, "learning_rate": 7.412204825847952e-05, "loss": 1.3188, "step": 743 }, { "epoch": 0.11373972864513664, "grad_norm": 1.4921875, "learning_rate": 7.411773614928505e-05, "loss": 1.6069, "step": 744 }, { "epoch": 0.11389260462449838, "grad_norm": 1.5546875, "learning_rate": 7.411342409167158e-05, "loss": 1.5895, "step": 745 }, { "epoch": 0.11404548060386012, "grad_norm": 1.609375, "learning_rate": 7.410911208564775e-05, "loss": 1.5249, "step": 746 }, { "epoch": 0.11419835658322186, "grad_norm": 1.3671875, "learning_rate": 7.410480013122208e-05, "loss": 1.5505, "step": 747 }, { "epoch": 0.1143512325625836, "grad_norm": 1.3515625, "learning_rate": 7.410048822840325e-05, "loss": 1.7686, "step": 748 }, { "epoch": 0.11450410854194534, "grad_norm": 1.3515625, "learning_rate": 7.40961763771998e-05, "loss": 1.693, "step": 749 }, { "epoch": 0.11465698452130708, "grad_norm": 1.3125, "learning_rate": 7.409186457762029e-05, "loss": 1.6948, "step": 750 }, { "epoch": 0.11480986050066883, "grad_norm": 1.3359375, "learning_rate": 7.40875528296734e-05, "loss": 1.6913, "step": 751 }, { "epoch": 0.11496273648003058, "grad_norm": 1.4296875, "learning_rate": 7.408324113336763e-05, "loss": 1.7104, "step": 752 }, { "epoch": 0.11511561245939232, "grad_norm": 1.2890625, "learning_rate": 7.407892948871167e-05, "loss": 1.6499, "step": 753 }, { "epoch": 0.11526848843875406, "grad_norm": 1.265625, "learning_rate": 7.407461789571403e-05, "loss": 1.3841, "step": 754 }, { "epoch": 0.1154213644181158, "grad_norm": 1.2421875, "learning_rate": 7.407030635438335e-05, "loss": 1.1945, "step": 755 }, { "epoch": 0.11557424039747755, "grad_norm": 1.3046875, "learning_rate": 7.406599486472823e-05, "loss": 1.461, "step": 756 }, { "epoch": 0.11572711637683929, "grad_norm": 1.4375, "learning_rate": 7.406168342675717e-05, "loss": 1.8621, "step": 757 }, { "epoch": 0.11587999235620103, "grad_norm": 1.375, "learning_rate": 7.40573720404789e-05, "loss": 1.4371, "step": 758 }, { "epoch": 0.11603286833556277, "grad_norm": 1.71875, "learning_rate": 7.405306070590188e-05, "loss": 1.7377, "step": 759 }, { "epoch": 0.11618574431492451, "grad_norm": 1.453125, "learning_rate": 7.404874942303481e-05, "loss": 1.5283, "step": 760 }, { "epoch": 0.11633862029428627, "grad_norm": 1.296875, "learning_rate": 7.404443819188622e-05, "loss": 1.438, "step": 761 }, { "epoch": 0.11649149627364801, "grad_norm": 1.328125, "learning_rate": 7.404012701246473e-05, "loss": 1.4227, "step": 762 }, { "epoch": 0.11664437225300975, "grad_norm": 1.3046875, "learning_rate": 7.403581588477895e-05, "loss": 1.6117, "step": 763 }, { "epoch": 0.11679724823237149, "grad_norm": 1.453125, "learning_rate": 7.40315048088374e-05, "loss": 1.5284, "step": 764 }, { "epoch": 0.11695012421173323, "grad_norm": 1.3515625, "learning_rate": 7.402719378464873e-05, "loss": 1.721, "step": 765 }, { "epoch": 0.11710300019109497, "grad_norm": 1.359375, "learning_rate": 7.402288281222151e-05, "loss": 1.5306, "step": 766 }, { "epoch": 0.11725587617045671, "grad_norm": 1.2265625, "learning_rate": 7.401857189156436e-05, "loss": 1.4214, "step": 767 }, { "epoch": 0.11740875214981845, "grad_norm": 1.25, "learning_rate": 7.40142610226858e-05, "loss": 1.3466, "step": 768 }, { "epoch": 0.1175616281291802, "grad_norm": 1.2578125, "learning_rate": 7.400995020559454e-05, "loss": 1.5145, "step": 769 }, { "epoch": 0.11771450410854195, "grad_norm": 1.3984375, "learning_rate": 7.400563944029906e-05, "loss": 1.4839, "step": 770 }, { "epoch": 0.11786738008790369, "grad_norm": 1.3828125, "learning_rate": 7.400132872680798e-05, "loss": 1.7468, "step": 771 }, { "epoch": 0.11802025606726543, "grad_norm": 1.328125, "learning_rate": 7.399701806512995e-05, "loss": 1.6265, "step": 772 }, { "epoch": 0.11817313204662717, "grad_norm": 1.4453125, "learning_rate": 7.399270745527346e-05, "loss": 1.8659, "step": 773 }, { "epoch": 0.11832600802598892, "grad_norm": 1.375, "learning_rate": 7.39883968972472e-05, "loss": 1.7373, "step": 774 }, { "epoch": 0.11847888400535066, "grad_norm": 1.2734375, "learning_rate": 7.398408639105969e-05, "loss": 1.6637, "step": 775 }, { "epoch": 0.1186317599847124, "grad_norm": 1.296875, "learning_rate": 7.397977593671956e-05, "loss": 1.415, "step": 776 }, { "epoch": 0.11878463596407414, "grad_norm": 1.296875, "learning_rate": 7.39754655342354e-05, "loss": 1.4546, "step": 777 }, { "epoch": 0.11893751194343588, "grad_norm": 1.3828125, "learning_rate": 7.397115518361575e-05, "loss": 1.7973, "step": 778 }, { "epoch": 0.11909038792279764, "grad_norm": 1.2890625, "learning_rate": 7.396684488486925e-05, "loss": 1.477, "step": 779 }, { "epoch": 0.11924326390215938, "grad_norm": 1.3984375, "learning_rate": 7.396253463800446e-05, "loss": 1.5758, "step": 780 }, { "epoch": 0.11939613988152112, "grad_norm": 1.3203125, "learning_rate": 7.395822444303002e-05, "loss": 1.6277, "step": 781 }, { "epoch": 0.11954901586088286, "grad_norm": 1.5, "learning_rate": 7.395391429995449e-05, "loss": 1.6788, "step": 782 }, { "epoch": 0.1197018918402446, "grad_norm": 1.2890625, "learning_rate": 7.394960420878643e-05, "loss": 1.4811, "step": 783 }, { "epoch": 0.11985476781960634, "grad_norm": 1.234375, "learning_rate": 7.394529416953446e-05, "loss": 1.4826, "step": 784 }, { "epoch": 0.12000764379896808, "grad_norm": 1.40625, "learning_rate": 7.394098418220715e-05, "loss": 1.8428, "step": 785 }, { "epoch": 0.12016051977832982, "grad_norm": 1.40625, "learning_rate": 7.393667424681313e-05, "loss": 1.784, "step": 786 }, { "epoch": 0.12031339575769157, "grad_norm": 1.3203125, "learning_rate": 7.393236436336094e-05, "loss": 1.5182, "step": 787 }, { "epoch": 0.12046627173705332, "grad_norm": 1.5234375, "learning_rate": 7.392805453185924e-05, "loss": 1.6872, "step": 788 }, { "epoch": 0.12061914771641506, "grad_norm": 1.3046875, "learning_rate": 7.392374475231654e-05, "loss": 1.4486, "step": 789 }, { "epoch": 0.1207720236957768, "grad_norm": 1.2578125, "learning_rate": 7.391943502474146e-05, "loss": 1.6246, "step": 790 }, { "epoch": 0.12092489967513854, "grad_norm": 1.3046875, "learning_rate": 7.391512534914259e-05, "loss": 1.3678, "step": 791 }, { "epoch": 0.12107777565450029, "grad_norm": 1.3046875, "learning_rate": 7.391081572552851e-05, "loss": 1.6069, "step": 792 }, { "epoch": 0.12123065163386203, "grad_norm": 1.359375, "learning_rate": 7.390650615390786e-05, "loss": 1.3358, "step": 793 }, { "epoch": 0.12138352761322377, "grad_norm": 1.6484375, "learning_rate": 7.390219663428913e-05, "loss": 1.9145, "step": 794 }, { "epoch": 0.12153640359258551, "grad_norm": 1.4296875, "learning_rate": 7.389788716668101e-05, "loss": 1.8205, "step": 795 }, { "epoch": 0.12168927957194725, "grad_norm": 1.2890625, "learning_rate": 7.389357775109201e-05, "loss": 1.4304, "step": 796 }, { "epoch": 0.121842155551309, "grad_norm": 1.546875, "learning_rate": 7.388926838753076e-05, "loss": 1.7296, "step": 797 }, { "epoch": 0.12199503153067075, "grad_norm": 1.34375, "learning_rate": 7.388495907600587e-05, "loss": 1.5933, "step": 798 }, { "epoch": 0.12214790751003249, "grad_norm": 1.25, "learning_rate": 7.388064981652584e-05, "loss": 1.5068, "step": 799 }, { "epoch": 0.12230078348939423, "grad_norm": 1.4921875, "learning_rate": 7.387634060909937e-05, "loss": 1.5238, "step": 800 }, { "epoch": 0.12245365946875597, "grad_norm": 1.4609375, "learning_rate": 7.387203145373496e-05, "loss": 1.7306, "step": 801 }, { "epoch": 0.12260653544811771, "grad_norm": 1.453125, "learning_rate": 7.386772235044124e-05, "loss": 1.7407, "step": 802 }, { "epoch": 0.12275941142747945, "grad_norm": 1.453125, "learning_rate": 7.386341329922681e-05, "loss": 1.5748, "step": 803 }, { "epoch": 0.1229122874068412, "grad_norm": 1.5, "learning_rate": 7.385910430010021e-05, "loss": 1.6665, "step": 804 }, { "epoch": 0.12306516338620294, "grad_norm": 1.3515625, "learning_rate": 7.385479535307007e-05, "loss": 1.5577, "step": 805 }, { "epoch": 0.12321803936556469, "grad_norm": 1.390625, "learning_rate": 7.385048645814493e-05, "loss": 1.6422, "step": 806 }, { "epoch": 0.12337091534492643, "grad_norm": 1.453125, "learning_rate": 7.384617761533344e-05, "loss": 1.5102, "step": 807 }, { "epoch": 0.12352379132428817, "grad_norm": 1.3515625, "learning_rate": 7.384186882464412e-05, "loss": 1.5083, "step": 808 }, { "epoch": 0.12367666730364991, "grad_norm": 1.328125, "learning_rate": 7.383756008608566e-05, "loss": 1.5893, "step": 809 }, { "epoch": 0.12382954328301166, "grad_norm": 1.2421875, "learning_rate": 7.383325139966654e-05, "loss": 1.5394, "step": 810 }, { "epoch": 0.1239824192623734, "grad_norm": 1.3671875, "learning_rate": 7.382894276539536e-05, "loss": 1.6392, "step": 811 }, { "epoch": 0.12413529524173514, "grad_norm": 1.375, "learning_rate": 7.382463418328079e-05, "loss": 1.6955, "step": 812 }, { "epoch": 0.12428817122109688, "grad_norm": 1.4453125, "learning_rate": 7.38203256533313e-05, "loss": 1.4497, "step": 813 }, { "epoch": 0.12444104720045862, "grad_norm": 1.3125, "learning_rate": 7.381601717555559e-05, "loss": 1.4173, "step": 814 }, { "epoch": 0.12459392317982038, "grad_norm": 1.3515625, "learning_rate": 7.381170874996218e-05, "loss": 1.5089, "step": 815 }, { "epoch": 0.12474679915918212, "grad_norm": 1.3671875, "learning_rate": 7.380740037655962e-05, "loss": 1.5638, "step": 816 }, { "epoch": 0.12489967513854386, "grad_norm": 1.3671875, "learning_rate": 7.380309205535661e-05, "loss": 1.6005, "step": 817 }, { "epoch": 0.12505255111790559, "grad_norm": 1.578125, "learning_rate": 7.379878378636162e-05, "loss": 1.498, "step": 818 }, { "epoch": 0.12520542709726734, "grad_norm": 1.3984375, "learning_rate": 7.379447556958333e-05, "loss": 1.673, "step": 819 }, { "epoch": 0.1253583030766291, "grad_norm": 1.28125, "learning_rate": 7.379016740503024e-05, "loss": 1.4259, "step": 820 }, { "epoch": 0.12551117905599082, "grad_norm": 1.2578125, "learning_rate": 7.3785859292711e-05, "loss": 1.4551, "step": 821 }, { "epoch": 0.12566405503535258, "grad_norm": 1.359375, "learning_rate": 7.378155123263421e-05, "loss": 1.6413, "step": 822 }, { "epoch": 0.1258169310147143, "grad_norm": 1.2734375, "learning_rate": 7.377724322480837e-05, "loss": 1.604, "step": 823 }, { "epoch": 0.12596980699407606, "grad_norm": 1.421875, "learning_rate": 7.377293526924212e-05, "loss": 1.7262, "step": 824 }, { "epoch": 0.1261226829734378, "grad_norm": 1.34375, "learning_rate": 7.376862736594404e-05, "loss": 1.598, "step": 825 }, { "epoch": 0.12627555895279954, "grad_norm": 1.359375, "learning_rate": 7.376431951492273e-05, "loss": 1.7562, "step": 826 }, { "epoch": 0.12642843493216127, "grad_norm": 1.3203125, "learning_rate": 7.376001171618675e-05, "loss": 1.6082, "step": 827 }, { "epoch": 0.12658131091152303, "grad_norm": 1.234375, "learning_rate": 7.375570396974474e-05, "loss": 1.2191, "step": 828 }, { "epoch": 0.12673418689088478, "grad_norm": 1.3984375, "learning_rate": 7.375139627560522e-05, "loss": 1.4812, "step": 829 }, { "epoch": 0.1268870628702465, "grad_norm": 1.4296875, "learning_rate": 7.374708863377675e-05, "loss": 1.4135, "step": 830 }, { "epoch": 0.12703993884960826, "grad_norm": 1.34375, "learning_rate": 7.374278104426801e-05, "loss": 1.5661, "step": 831 }, { "epoch": 0.12719281482897, "grad_norm": 1.4296875, "learning_rate": 7.37384735070875e-05, "loss": 1.7511, "step": 832 }, { "epoch": 0.12734569080833175, "grad_norm": 1.4296875, "learning_rate": 7.373416602224388e-05, "loss": 1.7522, "step": 833 }, { "epoch": 0.12749856678769347, "grad_norm": 1.2734375, "learning_rate": 7.372985858974566e-05, "loss": 1.4519, "step": 834 }, { "epoch": 0.12765144276705523, "grad_norm": 1.25, "learning_rate": 7.372555120960147e-05, "loss": 1.4102, "step": 835 }, { "epoch": 0.12780431874641696, "grad_norm": 1.578125, "learning_rate": 7.37212438818199e-05, "loss": 1.7784, "step": 836 }, { "epoch": 0.1279571947257787, "grad_norm": 1.40625, "learning_rate": 7.371693660640949e-05, "loss": 1.5575, "step": 837 }, { "epoch": 0.12811007070514047, "grad_norm": 1.2734375, "learning_rate": 7.371262938337888e-05, "loss": 1.4585, "step": 838 }, { "epoch": 0.1282629466845022, "grad_norm": 1.3359375, "learning_rate": 7.37083222127366e-05, "loss": 1.3849, "step": 839 }, { "epoch": 0.12841582266386395, "grad_norm": 1.453125, "learning_rate": 7.370401509449127e-05, "loss": 1.6871, "step": 840 }, { "epoch": 0.12856869864322568, "grad_norm": 1.3671875, "learning_rate": 7.36997080286515e-05, "loss": 1.4083, "step": 841 }, { "epoch": 0.12872157462258743, "grad_norm": 1.1328125, "learning_rate": 7.369540101522576e-05, "loss": 1.2235, "step": 842 }, { "epoch": 0.12887445060194916, "grad_norm": 1.34375, "learning_rate": 7.369109405422277e-05, "loss": 1.472, "step": 843 }, { "epoch": 0.1290273265813109, "grad_norm": 1.328125, "learning_rate": 7.368678714565101e-05, "loss": 1.4488, "step": 844 }, { "epoch": 0.12918020256067264, "grad_norm": 1.234375, "learning_rate": 7.368248028951914e-05, "loss": 1.284, "step": 845 }, { "epoch": 0.1293330785400344, "grad_norm": 1.2734375, "learning_rate": 7.367817348583568e-05, "loss": 1.3722, "step": 846 }, { "epoch": 0.12948595451939615, "grad_norm": 1.2265625, "learning_rate": 7.367386673460927e-05, "loss": 1.4052, "step": 847 }, { "epoch": 0.12963883049875788, "grad_norm": 1.265625, "learning_rate": 7.366956003584847e-05, "loss": 1.5652, "step": 848 }, { "epoch": 0.12979170647811963, "grad_norm": 1.4296875, "learning_rate": 7.366525338956182e-05, "loss": 1.2413, "step": 849 }, { "epoch": 0.12994458245748136, "grad_norm": 1.3203125, "learning_rate": 7.366094679575797e-05, "loss": 1.4426, "step": 850 }, { "epoch": 0.13009745843684312, "grad_norm": 1.3984375, "learning_rate": 7.365664025444546e-05, "loss": 1.4873, "step": 851 }, { "epoch": 0.13025033441620484, "grad_norm": 1.3515625, "learning_rate": 7.365233376563292e-05, "loss": 1.3905, "step": 852 }, { "epoch": 0.1304032103955666, "grad_norm": 1.359375, "learning_rate": 7.364802732932883e-05, "loss": 1.5992, "step": 853 }, { "epoch": 0.13055608637492833, "grad_norm": 1.2890625, "learning_rate": 7.364372094554191e-05, "loss": 1.6813, "step": 854 }, { "epoch": 0.13070896235429008, "grad_norm": 1.328125, "learning_rate": 7.363941461428066e-05, "loss": 1.4061, "step": 855 }, { "epoch": 0.13086183833365184, "grad_norm": 1.4609375, "learning_rate": 7.363510833555364e-05, "loss": 1.6379, "step": 856 }, { "epoch": 0.13101471431301356, "grad_norm": 1.4609375, "learning_rate": 7.363080210936952e-05, "loss": 1.6296, "step": 857 }, { "epoch": 0.13116759029237532, "grad_norm": 1.296875, "learning_rate": 7.362649593573676e-05, "loss": 1.5324, "step": 858 }, { "epoch": 0.13132046627173705, "grad_norm": 1.3671875, "learning_rate": 7.362218981466409e-05, "loss": 1.8453, "step": 859 }, { "epoch": 0.1314733422510988, "grad_norm": 1.40625, "learning_rate": 7.361788374615995e-05, "loss": 1.7159, "step": 860 }, { "epoch": 0.13162621823046053, "grad_norm": 1.421875, "learning_rate": 7.361357773023301e-05, "loss": 1.7014, "step": 861 }, { "epoch": 0.13177909420982228, "grad_norm": 1.3828125, "learning_rate": 7.360927176689186e-05, "loss": 1.2875, "step": 862 }, { "epoch": 0.131931970189184, "grad_norm": 1.4453125, "learning_rate": 7.3604965856145e-05, "loss": 1.5124, "step": 863 }, { "epoch": 0.13208484616854577, "grad_norm": 1.359375, "learning_rate": 7.360065999800108e-05, "loss": 1.6611, "step": 864 }, { "epoch": 0.13223772214790752, "grad_norm": 1.328125, "learning_rate": 7.359635419246863e-05, "loss": 1.4686, "step": 865 }, { "epoch": 0.13239059812726925, "grad_norm": 1.34375, "learning_rate": 7.359204843955629e-05, "loss": 1.494, "step": 866 }, { "epoch": 0.132543474106631, "grad_norm": 1.3125, "learning_rate": 7.358774273927258e-05, "loss": 1.5032, "step": 867 }, { "epoch": 0.13269635008599273, "grad_norm": 1.3828125, "learning_rate": 7.358343709162618e-05, "loss": 1.4693, "step": 868 }, { "epoch": 0.1328492260653545, "grad_norm": 1.3515625, "learning_rate": 7.357913149662557e-05, "loss": 1.5102, "step": 869 }, { "epoch": 0.13300210204471621, "grad_norm": 1.2578125, "learning_rate": 7.357482595427933e-05, "loss": 1.2933, "step": 870 }, { "epoch": 0.13315497802407797, "grad_norm": 1.3515625, "learning_rate": 7.35705204645961e-05, "loss": 1.4524, "step": 871 }, { "epoch": 0.1333078540034397, "grad_norm": 1.3203125, "learning_rate": 7.356621502758442e-05, "loss": 1.4949, "step": 872 }, { "epoch": 0.13346072998280145, "grad_norm": 1.4140625, "learning_rate": 7.356190964325293e-05, "loss": 1.6192, "step": 873 }, { "epoch": 0.1336136059621632, "grad_norm": 1.4453125, "learning_rate": 7.355760431161014e-05, "loss": 1.5864, "step": 874 }, { "epoch": 0.13376648194152493, "grad_norm": 1.296875, "learning_rate": 7.355329903266463e-05, "loss": 1.2616, "step": 875 }, { "epoch": 0.1339193579208867, "grad_norm": 1.3359375, "learning_rate": 7.354899380642505e-05, "loss": 1.63, "step": 876 }, { "epoch": 0.13407223390024842, "grad_norm": 1.2578125, "learning_rate": 7.354468863289989e-05, "loss": 1.3069, "step": 877 }, { "epoch": 0.13422510987961017, "grad_norm": 1.1875, "learning_rate": 7.354038351209783e-05, "loss": 1.3044, "step": 878 }, { "epoch": 0.1343779858589719, "grad_norm": 1.34375, "learning_rate": 7.353607844402735e-05, "loss": 1.656, "step": 879 }, { "epoch": 0.13453086183833365, "grad_norm": 1.296875, "learning_rate": 7.35317734286971e-05, "loss": 1.5933, "step": 880 }, { "epoch": 0.13468373781769538, "grad_norm": 1.34375, "learning_rate": 7.352746846611566e-05, "loss": 1.6018, "step": 881 }, { "epoch": 0.13483661379705714, "grad_norm": 1.328125, "learning_rate": 7.352316355629152e-05, "loss": 1.5389, "step": 882 }, { "epoch": 0.1349894897764189, "grad_norm": 1.28125, "learning_rate": 7.351885869923338e-05, "loss": 1.4399, "step": 883 }, { "epoch": 0.13514236575578062, "grad_norm": 1.421875, "learning_rate": 7.351455389494971e-05, "loss": 1.6378, "step": 884 }, { "epoch": 0.13529524173514237, "grad_norm": 1.34375, "learning_rate": 7.351024914344918e-05, "loss": 1.344, "step": 885 }, { "epoch": 0.1354481177145041, "grad_norm": 1.328125, "learning_rate": 7.35059444447403e-05, "loss": 1.5426, "step": 886 }, { "epoch": 0.13560099369386586, "grad_norm": 1.4609375, "learning_rate": 7.350163979883171e-05, "loss": 1.7824, "step": 887 }, { "epoch": 0.13575386967322758, "grad_norm": 1.578125, "learning_rate": 7.349733520573196e-05, "loss": 1.6467, "step": 888 }, { "epoch": 0.13590674565258934, "grad_norm": 1.625, "learning_rate": 7.349303066544961e-05, "loss": 1.4906, "step": 889 }, { "epoch": 0.13605962163195107, "grad_norm": 1.2734375, "learning_rate": 7.348872617799327e-05, "loss": 1.5553, "step": 890 }, { "epoch": 0.13621249761131282, "grad_norm": 1.1953125, "learning_rate": 7.348442174337147e-05, "loss": 1.3085, "step": 891 }, { "epoch": 0.13636537359067458, "grad_norm": 1.2734375, "learning_rate": 7.348011736159286e-05, "loss": 1.5134, "step": 892 }, { "epoch": 0.1365182495700363, "grad_norm": 1.4296875, "learning_rate": 7.347581303266594e-05, "loss": 1.7926, "step": 893 }, { "epoch": 0.13667112554939806, "grad_norm": 1.34375, "learning_rate": 7.347150875659938e-05, "loss": 1.3017, "step": 894 }, { "epoch": 0.1368240015287598, "grad_norm": 1.4453125, "learning_rate": 7.34672045334017e-05, "loss": 1.391, "step": 895 }, { "epoch": 0.13697687750812154, "grad_norm": 1.390625, "learning_rate": 7.346290036308144e-05, "loss": 1.4397, "step": 896 }, { "epoch": 0.13712975348748327, "grad_norm": 1.296875, "learning_rate": 7.345859624564728e-05, "loss": 1.5665, "step": 897 }, { "epoch": 0.13728262946684502, "grad_norm": 1.4375, "learning_rate": 7.345429218110767e-05, "loss": 1.4666, "step": 898 }, { "epoch": 0.13743550544620675, "grad_norm": 1.3046875, "learning_rate": 7.344998816947133e-05, "loss": 1.6284, "step": 899 }, { "epoch": 0.1375883814255685, "grad_norm": 1.375, "learning_rate": 7.34456842107467e-05, "loss": 1.5055, "step": 900 }, { "epoch": 0.13774125740493026, "grad_norm": 1.3828125, "learning_rate": 7.344138030494248e-05, "loss": 1.6299, "step": 901 }, { "epoch": 0.137894133384292, "grad_norm": 1.234375, "learning_rate": 7.343707645206719e-05, "loss": 1.5425, "step": 902 }, { "epoch": 0.13804700936365374, "grad_norm": 1.3984375, "learning_rate": 7.343277265212935e-05, "loss": 1.6048, "step": 903 }, { "epoch": 0.13819988534301547, "grad_norm": 1.4921875, "learning_rate": 7.342846890513763e-05, "loss": 1.8669, "step": 904 }, { "epoch": 0.13835276132237723, "grad_norm": 1.3984375, "learning_rate": 7.342416521110056e-05, "loss": 1.5981, "step": 905 }, { "epoch": 0.13850563730173895, "grad_norm": 1.3828125, "learning_rate": 7.341986157002674e-05, "loss": 1.3524, "step": 906 }, { "epoch": 0.1386585132811007, "grad_norm": 1.375, "learning_rate": 7.341555798192477e-05, "loss": 1.479, "step": 907 }, { "epoch": 0.13881138926046244, "grad_norm": 1.4296875, "learning_rate": 7.341125444680312e-05, "loss": 1.7528, "step": 908 }, { "epoch": 0.1389642652398242, "grad_norm": 1.3046875, "learning_rate": 7.340695096467049e-05, "loss": 1.6428, "step": 909 }, { "epoch": 0.13911714121918595, "grad_norm": 1.2890625, "learning_rate": 7.340264753553536e-05, "loss": 1.4669, "step": 910 }, { "epoch": 0.13927001719854767, "grad_norm": 1.34375, "learning_rate": 7.339834415940638e-05, "loss": 1.4692, "step": 911 }, { "epoch": 0.13942289317790943, "grad_norm": 1.4140625, "learning_rate": 7.33940408362921e-05, "loss": 1.7261, "step": 912 }, { "epoch": 0.13957576915727116, "grad_norm": 1.3515625, "learning_rate": 7.338973756620109e-05, "loss": 1.6994, "step": 913 }, { "epoch": 0.1397286451366329, "grad_norm": 1.3359375, "learning_rate": 7.338543434914193e-05, "loss": 1.694, "step": 914 }, { "epoch": 0.13988152111599464, "grad_norm": 1.3203125, "learning_rate": 7.338113118512317e-05, "loss": 1.5405, "step": 915 }, { "epoch": 0.1400343970953564, "grad_norm": 1.2265625, "learning_rate": 7.337682807415345e-05, "loss": 1.4237, "step": 916 }, { "epoch": 0.14018727307471812, "grad_norm": 1.328125, "learning_rate": 7.337252501624127e-05, "loss": 1.4861, "step": 917 }, { "epoch": 0.14034014905407988, "grad_norm": 1.3359375, "learning_rate": 7.336822201139529e-05, "loss": 1.7584, "step": 918 }, { "epoch": 0.14049302503344163, "grad_norm": 1.296875, "learning_rate": 7.336391905962399e-05, "loss": 1.5174, "step": 919 }, { "epoch": 0.14064590101280336, "grad_norm": 1.390625, "learning_rate": 7.335961616093603e-05, "loss": 1.7881, "step": 920 }, { "epoch": 0.14079877699216511, "grad_norm": 1.453125, "learning_rate": 7.335531331533993e-05, "loss": 1.7619, "step": 921 }, { "epoch": 0.14095165297152684, "grad_norm": 1.4140625, "learning_rate": 7.335101052284427e-05, "loss": 1.7835, "step": 922 }, { "epoch": 0.1411045289508886, "grad_norm": 1.390625, "learning_rate": 7.334670778345768e-05, "loss": 1.3444, "step": 923 }, { "epoch": 0.14125740493025032, "grad_norm": 1.359375, "learning_rate": 7.334240509718865e-05, "loss": 1.7681, "step": 924 }, { "epoch": 0.14141028090961208, "grad_norm": 1.2578125, "learning_rate": 7.333810246404581e-05, "loss": 1.3318, "step": 925 }, { "epoch": 0.1415631568889738, "grad_norm": 1.2890625, "learning_rate": 7.333379988403772e-05, "loss": 1.532, "step": 926 }, { "epoch": 0.14171603286833556, "grad_norm": 1.359375, "learning_rate": 7.332949735717299e-05, "loss": 1.4427, "step": 927 }, { "epoch": 0.14186890884769732, "grad_norm": 1.2265625, "learning_rate": 7.332519488346015e-05, "loss": 1.4955, "step": 928 }, { "epoch": 0.14202178482705904, "grad_norm": 1.40625, "learning_rate": 7.332089246290777e-05, "loss": 1.4184, "step": 929 }, { "epoch": 0.1421746608064208, "grad_norm": 1.2578125, "learning_rate": 7.331659009552445e-05, "loss": 1.5282, "step": 930 }, { "epoch": 0.14232753678578253, "grad_norm": 1.5, "learning_rate": 7.331228778131873e-05, "loss": 1.5833, "step": 931 }, { "epoch": 0.14248041276514428, "grad_norm": 1.3984375, "learning_rate": 7.330798552029925e-05, "loss": 1.374, "step": 932 }, { "epoch": 0.142633288744506, "grad_norm": 1.3046875, "learning_rate": 7.330368331247455e-05, "loss": 1.3556, "step": 933 }, { "epoch": 0.14278616472386776, "grad_norm": 1.4375, "learning_rate": 7.329938115785317e-05, "loss": 1.74, "step": 934 }, { "epoch": 0.1429390407032295, "grad_norm": 1.2890625, "learning_rate": 7.329507905644373e-05, "loss": 1.5757, "step": 935 }, { "epoch": 0.14309191668259125, "grad_norm": 1.40625, "learning_rate": 7.329077700825475e-05, "loss": 1.6718, "step": 936 }, { "epoch": 0.143244792661953, "grad_norm": 1.5078125, "learning_rate": 7.328647501329488e-05, "loss": 1.531, "step": 937 }, { "epoch": 0.14339766864131473, "grad_norm": 1.265625, "learning_rate": 7.32821730715726e-05, "loss": 1.5924, "step": 938 }, { "epoch": 0.14355054462067648, "grad_norm": 1.4296875, "learning_rate": 7.327787118309659e-05, "loss": 1.6627, "step": 939 }, { "epoch": 0.1437034206000382, "grad_norm": 1.375, "learning_rate": 7.327356934787537e-05, "loss": 1.4076, "step": 940 }, { "epoch": 0.14385629657939997, "grad_norm": 1.421875, "learning_rate": 7.326926756591747e-05, "loss": 1.5313, "step": 941 }, { "epoch": 0.1440091725587617, "grad_norm": 1.3984375, "learning_rate": 7.326496583723154e-05, "loss": 1.5525, "step": 942 }, { "epoch": 0.14416204853812345, "grad_norm": 1.390625, "learning_rate": 7.326066416182608e-05, "loss": 1.2831, "step": 943 }, { "epoch": 0.14431492451748518, "grad_norm": 1.265625, "learning_rate": 7.325636253970974e-05, "loss": 1.431, "step": 944 }, { "epoch": 0.14446780049684693, "grad_norm": 1.328125, "learning_rate": 7.325206097089104e-05, "loss": 1.5066, "step": 945 }, { "epoch": 0.1446206764762087, "grad_norm": 1.3125, "learning_rate": 7.324775945537856e-05, "loss": 1.4725, "step": 946 }, { "epoch": 0.14477355245557041, "grad_norm": 1.453125, "learning_rate": 7.324345799318091e-05, "loss": 1.6519, "step": 947 }, { "epoch": 0.14492642843493217, "grad_norm": 1.390625, "learning_rate": 7.323915658430658e-05, "loss": 1.6698, "step": 948 }, { "epoch": 0.1450793044142939, "grad_norm": 1.21875, "learning_rate": 7.323485522876423e-05, "loss": 1.1209, "step": 949 }, { "epoch": 0.14523218039365565, "grad_norm": 1.265625, "learning_rate": 7.323055392656238e-05, "loss": 1.6071, "step": 950 }, { "epoch": 0.14538505637301738, "grad_norm": 1.25, "learning_rate": 7.322625267770961e-05, "loss": 1.3299, "step": 951 }, { "epoch": 0.14553793235237913, "grad_norm": 1.2734375, "learning_rate": 7.32219514822145e-05, "loss": 1.3408, "step": 952 }, { "epoch": 0.14569080833174086, "grad_norm": 1.265625, "learning_rate": 7.321765034008566e-05, "loss": 1.5088, "step": 953 }, { "epoch": 0.14584368431110262, "grad_norm": 1.359375, "learning_rate": 7.321334925133161e-05, "loss": 1.4341, "step": 954 }, { "epoch": 0.14599656029046437, "grad_norm": 1.234375, "learning_rate": 7.320904821596091e-05, "loss": 1.3801, "step": 955 }, { "epoch": 0.1461494362698261, "grad_norm": 1.3359375, "learning_rate": 7.320474723398217e-05, "loss": 1.7789, "step": 956 }, { "epoch": 0.14630231224918785, "grad_norm": 1.3359375, "learning_rate": 7.320044630540392e-05, "loss": 1.5744, "step": 957 }, { "epoch": 0.14645518822854958, "grad_norm": 1.2109375, "learning_rate": 7.319614543023483e-05, "loss": 1.4899, "step": 958 }, { "epoch": 0.14660806420791134, "grad_norm": 1.4375, "learning_rate": 7.319184460848334e-05, "loss": 1.6339, "step": 959 }, { "epoch": 0.14676094018727306, "grad_norm": 1.5546875, "learning_rate": 7.318754384015812e-05, "loss": 1.5823, "step": 960 }, { "epoch": 0.14691381616663482, "grad_norm": 1.4296875, "learning_rate": 7.318324312526767e-05, "loss": 1.8797, "step": 961 }, { "epoch": 0.14706669214599655, "grad_norm": 1.34375, "learning_rate": 7.31789424638206e-05, "loss": 1.4564, "step": 962 }, { "epoch": 0.1472195681253583, "grad_norm": 1.5390625, "learning_rate": 7.317464185582552e-05, "loss": 1.6584, "step": 963 }, { "epoch": 0.14737244410472006, "grad_norm": 1.421875, "learning_rate": 7.317034130129089e-05, "loss": 1.6279, "step": 964 }, { "epoch": 0.14752532008408178, "grad_norm": 1.2734375, "learning_rate": 7.316604080022538e-05, "loss": 1.4354, "step": 965 }, { "epoch": 0.14767819606344354, "grad_norm": 1.4921875, "learning_rate": 7.316174035263754e-05, "loss": 1.5908, "step": 966 }, { "epoch": 0.14783107204280527, "grad_norm": 1.2421875, "learning_rate": 7.315743995853588e-05, "loss": 1.2698, "step": 967 }, { "epoch": 0.14798394802216702, "grad_norm": 1.3046875, "learning_rate": 7.315313961792907e-05, "loss": 1.5305, "step": 968 }, { "epoch": 0.14813682400152875, "grad_norm": 1.328125, "learning_rate": 7.31488393308256e-05, "loss": 1.6999, "step": 969 }, { "epoch": 0.1482896999808905, "grad_norm": 1.3203125, "learning_rate": 7.314453909723406e-05, "loss": 1.5862, "step": 970 }, { "epoch": 0.14844257596025223, "grad_norm": 1.1796875, "learning_rate": 7.314023891716304e-05, "loss": 1.2584, "step": 971 }, { "epoch": 0.148595451939614, "grad_norm": 1.4453125, "learning_rate": 7.313593879062111e-05, "loss": 1.8597, "step": 972 }, { "epoch": 0.14874832791897574, "grad_norm": 1.3828125, "learning_rate": 7.313163871761683e-05, "loss": 1.7383, "step": 973 }, { "epoch": 0.14890120389833747, "grad_norm": 1.2890625, "learning_rate": 7.312733869815873e-05, "loss": 1.4162, "step": 974 }, { "epoch": 0.14905407987769922, "grad_norm": 1.328125, "learning_rate": 7.312303873225545e-05, "loss": 1.3516, "step": 975 }, { "epoch": 0.14920695585706095, "grad_norm": 1.4140625, "learning_rate": 7.31187388199155e-05, "loss": 1.627, "step": 976 }, { "epoch": 0.1493598318364227, "grad_norm": 1.234375, "learning_rate": 7.311443896114752e-05, "loss": 1.3844, "step": 977 }, { "epoch": 0.14951270781578443, "grad_norm": 1.2890625, "learning_rate": 7.311013915595997e-05, "loss": 1.6174, "step": 978 }, { "epoch": 0.1496655837951462, "grad_norm": 1.3203125, "learning_rate": 7.310583940436153e-05, "loss": 1.4493, "step": 979 }, { "epoch": 0.14981845977450792, "grad_norm": 1.234375, "learning_rate": 7.310153970636073e-05, "loss": 1.4532, "step": 980 }, { "epoch": 0.14997133575386967, "grad_norm": 1.46875, "learning_rate": 7.309724006196608e-05, "loss": 1.5377, "step": 981 }, { "epoch": 0.15012421173323143, "grad_norm": 1.25, "learning_rate": 7.309294047118626e-05, "loss": 1.455, "step": 982 }, { "epoch": 0.15027708771259315, "grad_norm": 1.2421875, "learning_rate": 7.308864093402972e-05, "loss": 1.2145, "step": 983 }, { "epoch": 0.1504299636919549, "grad_norm": 1.3828125, "learning_rate": 7.308434145050515e-05, "loss": 1.7393, "step": 984 }, { "epoch": 0.15058283967131664, "grad_norm": 1.3359375, "learning_rate": 7.3080042020621e-05, "loss": 1.3646, "step": 985 }, { "epoch": 0.1507357156506784, "grad_norm": 1.7734375, "learning_rate": 7.307574264438593e-05, "loss": 1.5894, "step": 986 }, { "epoch": 0.15088859163004012, "grad_norm": 1.625, "learning_rate": 7.30714433218085e-05, "loss": 1.6166, "step": 987 }, { "epoch": 0.15104146760940187, "grad_norm": 1.3984375, "learning_rate": 7.30671440528972e-05, "loss": 1.7811, "step": 988 }, { "epoch": 0.1511943435887636, "grad_norm": 1.3828125, "learning_rate": 7.306284483766066e-05, "loss": 1.404, "step": 989 }, { "epoch": 0.15134721956812536, "grad_norm": 1.234375, "learning_rate": 7.305854567610743e-05, "loss": 1.6881, "step": 990 }, { "epoch": 0.1515000955474871, "grad_norm": 1.3984375, "learning_rate": 7.30542465682461e-05, "loss": 1.6933, "step": 991 }, { "epoch": 0.15165297152684884, "grad_norm": 1.2109375, "learning_rate": 7.30499475140852e-05, "loss": 1.2095, "step": 992 }, { "epoch": 0.1518058475062106, "grad_norm": 1.34375, "learning_rate": 7.304564851363335e-05, "loss": 1.4457, "step": 993 }, { "epoch": 0.15195872348557232, "grad_norm": 1.3671875, "learning_rate": 7.30413495668991e-05, "loss": 1.6733, "step": 994 }, { "epoch": 0.15211159946493408, "grad_norm": 1.453125, "learning_rate": 7.303705067389096e-05, "loss": 1.9913, "step": 995 }, { "epoch": 0.1522644754442958, "grad_norm": 1.3359375, "learning_rate": 7.303275183461757e-05, "loss": 1.5614, "step": 996 }, { "epoch": 0.15241735142365756, "grad_norm": 1.25, "learning_rate": 7.302845304908746e-05, "loss": 1.4253, "step": 997 }, { "epoch": 0.1525702274030193, "grad_norm": 1.453125, "learning_rate": 7.302415431730923e-05, "loss": 1.5346, "step": 998 }, { "epoch": 0.15272310338238104, "grad_norm": 1.1953125, "learning_rate": 7.301985563929141e-05, "loss": 1.2678, "step": 999 }, { "epoch": 0.1528759793617428, "grad_norm": 1.4453125, "learning_rate": 7.301555701504255e-05, "loss": 1.8388, "step": 1000 }, { "epoch": 0.1528759793617428, "eval_loss": 1.5122984647750854, "eval_model_preparation_time": 0.0034, "eval_runtime": 114.6619, "eval_samples_per_second": 87.213, "eval_steps_per_second": 2.73, "step": 1000 }, { "epoch": 0.15302885534110452, "grad_norm": 1.359375, "learning_rate": 7.301125844457128e-05, "loss": 1.3578, "step": 1001 }, { "epoch": 0.15318173132046628, "grad_norm": 1.234375, "learning_rate": 7.30069599278861e-05, "loss": 1.3505, "step": 1002 }, { "epoch": 0.153334607299828, "grad_norm": 1.3359375, "learning_rate": 7.300266146499568e-05, "loss": 1.4252, "step": 1003 }, { "epoch": 0.15348748327918976, "grad_norm": 1.46875, "learning_rate": 7.299836305590845e-05, "loss": 1.4454, "step": 1004 }, { "epoch": 0.1536403592585515, "grad_norm": 1.453125, "learning_rate": 7.299406470063307e-05, "loss": 1.4067, "step": 1005 }, { "epoch": 0.15379323523791325, "grad_norm": 1.390625, "learning_rate": 7.29897663991781e-05, "loss": 1.2849, "step": 1006 }, { "epoch": 0.15394611121727497, "grad_norm": 1.5078125, "learning_rate": 7.298546815155202e-05, "loss": 1.5075, "step": 1007 }, { "epoch": 0.15409898719663673, "grad_norm": 1.34375, "learning_rate": 7.298116995776355e-05, "loss": 1.4855, "step": 1008 }, { "epoch": 0.15425186317599848, "grad_norm": 1.1875, "learning_rate": 7.297687181782109e-05, "loss": 1.3102, "step": 1009 }, { "epoch": 0.1544047391553602, "grad_norm": 1.3046875, "learning_rate": 7.297257373173332e-05, "loss": 1.5318, "step": 1010 }, { "epoch": 0.15455761513472197, "grad_norm": 1.2890625, "learning_rate": 7.296827569950875e-05, "loss": 1.6276, "step": 1011 }, { "epoch": 0.1547104911140837, "grad_norm": 1.3203125, "learning_rate": 7.2963977721156e-05, "loss": 1.4639, "step": 1012 }, { "epoch": 0.15486336709344545, "grad_norm": 1.2265625, "learning_rate": 7.295967979668359e-05, "loss": 1.4991, "step": 1013 }, { "epoch": 0.15501624307280717, "grad_norm": 1.296875, "learning_rate": 7.295538192610008e-05, "loss": 1.3227, "step": 1014 }, { "epoch": 0.15516911905216893, "grad_norm": 1.5625, "learning_rate": 7.295108410941404e-05, "loss": 1.9634, "step": 1015 }, { "epoch": 0.15532199503153066, "grad_norm": 1.28125, "learning_rate": 7.294678634663405e-05, "loss": 1.4197, "step": 1016 }, { "epoch": 0.1554748710108924, "grad_norm": 1.5625, "learning_rate": 7.294248863776871e-05, "loss": 1.8426, "step": 1017 }, { "epoch": 0.15562774699025417, "grad_norm": 1.4375, "learning_rate": 7.293819098282649e-05, "loss": 1.5392, "step": 1018 }, { "epoch": 0.1557806229696159, "grad_norm": 1.2265625, "learning_rate": 7.293389338181606e-05, "loss": 1.3573, "step": 1019 }, { "epoch": 0.15593349894897765, "grad_norm": 1.5078125, "learning_rate": 7.292959583474593e-05, "loss": 1.5538, "step": 1020 }, { "epoch": 0.15608637492833938, "grad_norm": 1.390625, "learning_rate": 7.292529834162464e-05, "loss": 1.5297, "step": 1021 }, { "epoch": 0.15623925090770113, "grad_norm": 1.3046875, "learning_rate": 7.292100090246082e-05, "loss": 1.6719, "step": 1022 }, { "epoch": 0.15639212688706286, "grad_norm": 1.390625, "learning_rate": 7.291670351726294e-05, "loss": 1.4131, "step": 1023 }, { "epoch": 0.15654500286642462, "grad_norm": 1.2265625, "learning_rate": 7.29124061860397e-05, "loss": 1.2559, "step": 1024 }, { "epoch": 0.15669787884578634, "grad_norm": 1.3515625, "learning_rate": 7.290810890879953e-05, "loss": 1.4208, "step": 1025 }, { "epoch": 0.1568507548251481, "grad_norm": 1.3203125, "learning_rate": 7.290381168555108e-05, "loss": 1.2829, "step": 1026 }, { "epoch": 0.15700363080450985, "grad_norm": 1.3671875, "learning_rate": 7.28995145163029e-05, "loss": 1.5801, "step": 1027 }, { "epoch": 0.15715650678387158, "grad_norm": 1.2734375, "learning_rate": 7.289521740106351e-05, "loss": 1.5522, "step": 1028 }, { "epoch": 0.15730938276323334, "grad_norm": 1.640625, "learning_rate": 7.28909203398415e-05, "loss": 1.8502, "step": 1029 }, { "epoch": 0.15746225874259506, "grad_norm": 1.3046875, "learning_rate": 7.288662333264545e-05, "loss": 1.5291, "step": 1030 }, { "epoch": 0.15761513472195682, "grad_norm": 1.3125, "learning_rate": 7.288232637948392e-05, "loss": 1.4906, "step": 1031 }, { "epoch": 0.15776801070131855, "grad_norm": 1.2421875, "learning_rate": 7.287802948036546e-05, "loss": 1.4633, "step": 1032 }, { "epoch": 0.1579208866806803, "grad_norm": 1.40625, "learning_rate": 7.287373263529862e-05, "loss": 1.4257, "step": 1033 }, { "epoch": 0.15807376266004203, "grad_norm": 1.3203125, "learning_rate": 7.2869435844292e-05, "loss": 1.4687, "step": 1034 }, { "epoch": 0.15822663863940378, "grad_norm": 1.2265625, "learning_rate": 7.286513910735412e-05, "loss": 1.3513, "step": 1035 }, { "epoch": 0.15837951461876554, "grad_norm": 1.40625, "learning_rate": 7.286084242449358e-05, "loss": 1.5266, "step": 1036 }, { "epoch": 0.15853239059812727, "grad_norm": 1.34375, "learning_rate": 7.285654579571892e-05, "loss": 1.562, "step": 1037 }, { "epoch": 0.15868526657748902, "grad_norm": 1.4375, "learning_rate": 7.285224922103874e-05, "loss": 1.7358, "step": 1038 }, { "epoch": 0.15883814255685075, "grad_norm": 1.3828125, "learning_rate": 7.284795270046156e-05, "loss": 1.4283, "step": 1039 }, { "epoch": 0.1589910185362125, "grad_norm": 1.4765625, "learning_rate": 7.284365623399594e-05, "loss": 1.7591, "step": 1040 }, { "epoch": 0.15914389451557423, "grad_norm": 1.515625, "learning_rate": 7.283935982165049e-05, "loss": 1.5086, "step": 1041 }, { "epoch": 0.15929677049493599, "grad_norm": 1.390625, "learning_rate": 7.283506346343372e-05, "loss": 1.5567, "step": 1042 }, { "epoch": 0.1594496464742977, "grad_norm": 1.34375, "learning_rate": 7.283076715935424e-05, "loss": 1.5602, "step": 1043 }, { "epoch": 0.15960252245365947, "grad_norm": 1.3125, "learning_rate": 7.282647090942056e-05, "loss": 1.6463, "step": 1044 }, { "epoch": 0.15975539843302122, "grad_norm": 1.65625, "learning_rate": 7.282217471364127e-05, "loss": 1.7599, "step": 1045 }, { "epoch": 0.15990827441238295, "grad_norm": 1.453125, "learning_rate": 7.281787857202495e-05, "loss": 1.5516, "step": 1046 }, { "epoch": 0.1600611503917447, "grad_norm": 1.328125, "learning_rate": 7.281358248458012e-05, "loss": 1.3311, "step": 1047 }, { "epoch": 0.16021402637110643, "grad_norm": 1.375, "learning_rate": 7.28092864513154e-05, "loss": 1.588, "step": 1048 }, { "epoch": 0.1603669023504682, "grad_norm": 1.3828125, "learning_rate": 7.280499047223928e-05, "loss": 1.8091, "step": 1049 }, { "epoch": 0.16051977832982992, "grad_norm": 1.359375, "learning_rate": 7.280069454736039e-05, "loss": 1.4443, "step": 1050 }, { "epoch": 0.16067265430919167, "grad_norm": 1.390625, "learning_rate": 7.279639867668722e-05, "loss": 1.5059, "step": 1051 }, { "epoch": 0.1608255302885534, "grad_norm": 1.3671875, "learning_rate": 7.279210286022838e-05, "loss": 1.4601, "step": 1052 }, { "epoch": 0.16097840626791515, "grad_norm": 1.359375, "learning_rate": 7.278780709799246e-05, "loss": 1.432, "step": 1053 }, { "epoch": 0.1611312822472769, "grad_norm": 1.2265625, "learning_rate": 7.278351138998793e-05, "loss": 1.7537, "step": 1054 }, { "epoch": 0.16128415822663864, "grad_norm": 1.3359375, "learning_rate": 7.277921573622344e-05, "loss": 1.9609, "step": 1055 }, { "epoch": 0.1614370342060004, "grad_norm": 1.4375, "learning_rate": 7.277492013670749e-05, "loss": 1.5097, "step": 1056 }, { "epoch": 0.16158991018536212, "grad_norm": 1.234375, "learning_rate": 7.277062459144868e-05, "loss": 1.5757, "step": 1057 }, { "epoch": 0.16174278616472387, "grad_norm": 1.359375, "learning_rate": 7.276632910045557e-05, "loss": 1.2694, "step": 1058 }, { "epoch": 0.1618956621440856, "grad_norm": 1.421875, "learning_rate": 7.276203366373667e-05, "loss": 1.8519, "step": 1059 }, { "epoch": 0.16204853812344736, "grad_norm": 1.3203125, "learning_rate": 7.27577382813006e-05, "loss": 1.4413, "step": 1060 }, { "epoch": 0.16220141410280908, "grad_norm": 1.375, "learning_rate": 7.275344295315587e-05, "loss": 1.6073, "step": 1061 }, { "epoch": 0.16235429008217084, "grad_norm": 1.25, "learning_rate": 7.274914767931111e-05, "loss": 1.3746, "step": 1062 }, { "epoch": 0.1625071660615326, "grad_norm": 1.40625, "learning_rate": 7.274485245977479e-05, "loss": 1.8862, "step": 1063 }, { "epoch": 0.16266004204089432, "grad_norm": 1.25, "learning_rate": 7.274055729455555e-05, "loss": 1.3614, "step": 1064 }, { "epoch": 0.16281291802025608, "grad_norm": 1.390625, "learning_rate": 7.273626218366192e-05, "loss": 1.5238, "step": 1065 }, { "epoch": 0.1629657939996178, "grad_norm": 1.421875, "learning_rate": 7.273196712710243e-05, "loss": 1.7296, "step": 1066 }, { "epoch": 0.16311866997897956, "grad_norm": 1.3984375, "learning_rate": 7.272767212488572e-05, "loss": 1.5026, "step": 1067 }, { "epoch": 0.16327154595834129, "grad_norm": 1.34375, "learning_rate": 7.272337717702021e-05, "loss": 1.4061, "step": 1068 }, { "epoch": 0.16342442193770304, "grad_norm": 1.359375, "learning_rate": 7.271908228351463e-05, "loss": 1.7785, "step": 1069 }, { "epoch": 0.16357729791706477, "grad_norm": 1.296875, "learning_rate": 7.27147874443774e-05, "loss": 1.4514, "step": 1070 }, { "epoch": 0.16373017389642652, "grad_norm": 1.328125, "learning_rate": 7.271049265961716e-05, "loss": 1.35, "step": 1071 }, { "epoch": 0.16388304987578828, "grad_norm": 1.625, "learning_rate": 7.270619792924245e-05, "loss": 1.4055, "step": 1072 }, { "epoch": 0.16403592585515, "grad_norm": 1.296875, "learning_rate": 7.27019032532618e-05, "loss": 1.3779, "step": 1073 }, { "epoch": 0.16418880183451176, "grad_norm": 1.3984375, "learning_rate": 7.26976086316838e-05, "loss": 1.5699, "step": 1074 }, { "epoch": 0.1643416778138735, "grad_norm": 1.3125, "learning_rate": 7.269331406451698e-05, "loss": 1.3249, "step": 1075 }, { "epoch": 0.16449455379323524, "grad_norm": 1.3359375, "learning_rate": 7.268901955176995e-05, "loss": 1.646, "step": 1076 }, { "epoch": 0.16464742977259697, "grad_norm": 1.359375, "learning_rate": 7.26847250934512e-05, "loss": 1.6425, "step": 1077 }, { "epoch": 0.16480030575195873, "grad_norm": 1.2734375, "learning_rate": 7.268043068956938e-05, "loss": 1.4045, "step": 1078 }, { "epoch": 0.16495318173132045, "grad_norm": 1.6015625, "learning_rate": 7.267613634013296e-05, "loss": 1.7385, "step": 1079 }, { "epoch": 0.1651060577106822, "grad_norm": 1.484375, "learning_rate": 7.267184204515052e-05, "loss": 1.7981, "step": 1080 }, { "epoch": 0.16525893369004396, "grad_norm": 1.265625, "learning_rate": 7.266754780463065e-05, "loss": 1.3144, "step": 1081 }, { "epoch": 0.1654118096694057, "grad_norm": 1.3046875, "learning_rate": 7.266325361858186e-05, "loss": 1.4442, "step": 1082 }, { "epoch": 0.16556468564876745, "grad_norm": 1.3671875, "learning_rate": 7.26589594870128e-05, "loss": 1.744, "step": 1083 }, { "epoch": 0.16571756162812917, "grad_norm": 1.2890625, "learning_rate": 7.265466540993189e-05, "loss": 1.6681, "step": 1084 }, { "epoch": 0.16587043760749093, "grad_norm": 1.2734375, "learning_rate": 7.26503713873478e-05, "loss": 1.466, "step": 1085 }, { "epoch": 0.16602331358685266, "grad_norm": 1.3515625, "learning_rate": 7.264607741926906e-05, "loss": 1.5903, "step": 1086 }, { "epoch": 0.1661761895662144, "grad_norm": 1.46875, "learning_rate": 7.264178350570419e-05, "loss": 1.6502, "step": 1087 }, { "epoch": 0.16632906554557614, "grad_norm": 1.359375, "learning_rate": 7.263748964666182e-05, "loss": 1.4722, "step": 1088 }, { "epoch": 0.1664819415249379, "grad_norm": 1.34375, "learning_rate": 7.26331958421504e-05, "loss": 1.6528, "step": 1089 }, { "epoch": 0.16663481750429965, "grad_norm": 1.96875, "learning_rate": 7.262890209217858e-05, "loss": 1.5864, "step": 1090 }, { "epoch": 0.16678769348366138, "grad_norm": 1.3359375, "learning_rate": 7.26246083967549e-05, "loss": 1.1968, "step": 1091 }, { "epoch": 0.16694056946302313, "grad_norm": 1.296875, "learning_rate": 7.262031475588785e-05, "loss": 1.5584, "step": 1092 }, { "epoch": 0.16709344544238486, "grad_norm": 1.3984375, "learning_rate": 7.26160211695861e-05, "loss": 1.4423, "step": 1093 }, { "epoch": 0.1672463214217466, "grad_norm": 1.1953125, "learning_rate": 7.26117276378581e-05, "loss": 1.3607, "step": 1094 }, { "epoch": 0.16739919740110834, "grad_norm": 1.3828125, "learning_rate": 7.260743416071249e-05, "loss": 1.6679, "step": 1095 }, { "epoch": 0.1675520733804701, "grad_norm": 1.234375, "learning_rate": 7.260314073815775e-05, "loss": 1.3782, "step": 1096 }, { "epoch": 0.16770494935983182, "grad_norm": 1.359375, "learning_rate": 7.25988473702025e-05, "loss": 1.3631, "step": 1097 }, { "epoch": 0.16785782533919358, "grad_norm": 1.4375, "learning_rate": 7.25945540568553e-05, "loss": 1.7966, "step": 1098 }, { "epoch": 0.16801070131855533, "grad_norm": 1.3359375, "learning_rate": 7.259026079812463e-05, "loss": 1.6014, "step": 1099 }, { "epoch": 0.16816357729791706, "grad_norm": 1.3125, "learning_rate": 7.258596759401912e-05, "loss": 1.4576, "step": 1100 }, { "epoch": 0.16831645327727882, "grad_norm": 1.53125, "learning_rate": 7.258167444454728e-05, "loss": 1.6577, "step": 1101 }, { "epoch": 0.16846932925664054, "grad_norm": 1.5078125, "learning_rate": 7.257738134971772e-05, "loss": 1.6154, "step": 1102 }, { "epoch": 0.1686222052360023, "grad_norm": 1.4921875, "learning_rate": 7.25730883095389e-05, "loss": 1.7608, "step": 1103 }, { "epoch": 0.16877508121536403, "grad_norm": 1.2265625, "learning_rate": 7.25687953240195e-05, "loss": 1.3991, "step": 1104 }, { "epoch": 0.16892795719472578, "grad_norm": 1.265625, "learning_rate": 7.2564502393168e-05, "loss": 1.6815, "step": 1105 }, { "epoch": 0.1690808331740875, "grad_norm": 1.3828125, "learning_rate": 7.256020951699294e-05, "loss": 1.4937, "step": 1106 }, { "epoch": 0.16923370915344926, "grad_norm": 1.2890625, "learning_rate": 7.255591669550297e-05, "loss": 1.6217, "step": 1107 }, { "epoch": 0.16938658513281102, "grad_norm": 1.296875, "learning_rate": 7.255162392870649e-05, "loss": 1.3756, "step": 1108 }, { "epoch": 0.16953946111217275, "grad_norm": 1.4140625, "learning_rate": 7.254733121661221e-05, "loss": 1.5365, "step": 1109 }, { "epoch": 0.1696923370915345, "grad_norm": 1.34375, "learning_rate": 7.254303855922858e-05, "loss": 1.3127, "step": 1110 }, { "epoch": 0.16984521307089623, "grad_norm": 1.3671875, "learning_rate": 7.253874595656421e-05, "loss": 1.7607, "step": 1111 }, { "epoch": 0.16999808905025798, "grad_norm": 1.2265625, "learning_rate": 7.253445340862768e-05, "loss": 1.3536, "step": 1112 }, { "epoch": 0.1701509650296197, "grad_norm": 1.3203125, "learning_rate": 7.253016091542744e-05, "loss": 1.6606, "step": 1113 }, { "epoch": 0.17030384100898147, "grad_norm": 1.4921875, "learning_rate": 7.252586847697214e-05, "loss": 1.5833, "step": 1114 }, { "epoch": 0.1704567169883432, "grad_norm": 1.3125, "learning_rate": 7.252157609327026e-05, "loss": 1.5554, "step": 1115 }, { "epoch": 0.17060959296770495, "grad_norm": 1.2421875, "learning_rate": 7.251728376433044e-05, "loss": 1.8461, "step": 1116 }, { "epoch": 0.1707624689470667, "grad_norm": 1.34375, "learning_rate": 7.251299149016116e-05, "loss": 1.6122, "step": 1117 }, { "epoch": 0.17091534492642843, "grad_norm": 1.296875, "learning_rate": 7.250869927077107e-05, "loss": 1.5268, "step": 1118 }, { "epoch": 0.17106822090579019, "grad_norm": 1.3359375, "learning_rate": 7.250440710616861e-05, "loss": 1.2551, "step": 1119 }, { "epoch": 0.1712210968851519, "grad_norm": 1.3984375, "learning_rate": 7.250011499636237e-05, "loss": 1.7084, "step": 1120 }, { "epoch": 0.17137397286451367, "grad_norm": 1.3984375, "learning_rate": 7.249582294136093e-05, "loss": 1.7169, "step": 1121 }, { "epoch": 0.1715268488438754, "grad_norm": 1.40625, "learning_rate": 7.249153094117283e-05, "loss": 1.4796, "step": 1122 }, { "epoch": 0.17167972482323715, "grad_norm": 1.3515625, "learning_rate": 7.248723899580665e-05, "loss": 1.6672, "step": 1123 }, { "epoch": 0.17183260080259888, "grad_norm": 1.359375, "learning_rate": 7.248294710527091e-05, "loss": 1.3927, "step": 1124 }, { "epoch": 0.17198547678196063, "grad_norm": 1.1328125, "learning_rate": 7.247865526957414e-05, "loss": 1.2574, "step": 1125 }, { "epoch": 0.1721383527613224, "grad_norm": 1.328125, "learning_rate": 7.247436348872494e-05, "loss": 1.5171, "step": 1126 }, { "epoch": 0.17229122874068412, "grad_norm": 1.3671875, "learning_rate": 7.247007176273183e-05, "loss": 1.657, "step": 1127 }, { "epoch": 0.17244410472004587, "grad_norm": 1.3359375, "learning_rate": 7.246578009160343e-05, "loss": 1.633, "step": 1128 }, { "epoch": 0.1725969806994076, "grad_norm": 1.3203125, "learning_rate": 7.246148847534819e-05, "loss": 1.422, "step": 1129 }, { "epoch": 0.17274985667876935, "grad_norm": 1.4375, "learning_rate": 7.245719691397475e-05, "loss": 1.8312, "step": 1130 }, { "epoch": 0.17290273265813108, "grad_norm": 1.40625, "learning_rate": 7.245290540749163e-05, "loss": 1.7791, "step": 1131 }, { "epoch": 0.17305560863749284, "grad_norm": 1.203125, "learning_rate": 7.244861395590734e-05, "loss": 1.3157, "step": 1132 }, { "epoch": 0.17320848461685456, "grad_norm": 1.3828125, "learning_rate": 7.244432255923053e-05, "loss": 1.4326, "step": 1133 }, { "epoch": 0.17336136059621632, "grad_norm": 1.421875, "learning_rate": 7.244003121746964e-05, "loss": 1.549, "step": 1134 }, { "epoch": 0.17351423657557807, "grad_norm": 1.2734375, "learning_rate": 7.24357399306333e-05, "loss": 1.3517, "step": 1135 }, { "epoch": 0.1736671125549398, "grad_norm": 1.3671875, "learning_rate": 7.243144869873004e-05, "loss": 1.3534, "step": 1136 }, { "epoch": 0.17381998853430156, "grad_norm": 1.375, "learning_rate": 7.242715752176843e-05, "loss": 1.5799, "step": 1137 }, { "epoch": 0.17397286451366328, "grad_norm": 1.265625, "learning_rate": 7.242286639975701e-05, "loss": 1.4545, "step": 1138 }, { "epoch": 0.17412574049302504, "grad_norm": 1.375, "learning_rate": 7.24185753327043e-05, "loss": 1.7667, "step": 1139 }, { "epoch": 0.17427861647238677, "grad_norm": 1.3125, "learning_rate": 7.241428432061889e-05, "loss": 1.4194, "step": 1140 }, { "epoch": 0.17443149245174852, "grad_norm": 1.390625, "learning_rate": 7.240999336350929e-05, "loss": 1.5031, "step": 1141 }, { "epoch": 0.17458436843111025, "grad_norm": 1.3984375, "learning_rate": 7.240570246138414e-05, "loss": 1.4809, "step": 1142 }, { "epoch": 0.174737244410472, "grad_norm": 1.421875, "learning_rate": 7.240141161425187e-05, "loss": 1.5922, "step": 1143 }, { "epoch": 0.17489012038983376, "grad_norm": 1.25, "learning_rate": 7.239712082212114e-05, "loss": 1.3338, "step": 1144 }, { "epoch": 0.17504299636919549, "grad_norm": 1.171875, "learning_rate": 7.239283008500045e-05, "loss": 1.4723, "step": 1145 }, { "epoch": 0.17519587234855724, "grad_norm": 1.375, "learning_rate": 7.238853940289832e-05, "loss": 1.3482, "step": 1146 }, { "epoch": 0.17534874832791897, "grad_norm": 1.3671875, "learning_rate": 7.238424877582339e-05, "loss": 1.5941, "step": 1147 }, { "epoch": 0.17550162430728072, "grad_norm": 1.2578125, "learning_rate": 7.237995820378409e-05, "loss": 1.3997, "step": 1148 }, { "epoch": 0.17565450028664245, "grad_norm": 1.2890625, "learning_rate": 7.23756676867891e-05, "loss": 1.4583, "step": 1149 }, { "epoch": 0.1758073762660042, "grad_norm": 1.3984375, "learning_rate": 7.237137722484687e-05, "loss": 1.5731, "step": 1150 }, { "epoch": 0.17596025224536593, "grad_norm": 1.4921875, "learning_rate": 7.236708681796602e-05, "loss": 1.5601, "step": 1151 }, { "epoch": 0.1761131282247277, "grad_norm": 1.375, "learning_rate": 7.236279646615507e-05, "loss": 1.4283, "step": 1152 }, { "epoch": 0.17626600420408944, "grad_norm": 1.4921875, "learning_rate": 7.235850616942255e-05, "loss": 1.655, "step": 1153 }, { "epoch": 0.17641888018345117, "grad_norm": 1.3828125, "learning_rate": 7.235421592777703e-05, "loss": 1.5313, "step": 1154 }, { "epoch": 0.17657175616281293, "grad_norm": 1.2421875, "learning_rate": 7.234992574122705e-05, "loss": 1.4897, "step": 1155 }, { "epoch": 0.17672463214217465, "grad_norm": 1.2734375, "learning_rate": 7.23456356097812e-05, "loss": 1.4881, "step": 1156 }, { "epoch": 0.1768775081215364, "grad_norm": 1.3984375, "learning_rate": 7.234134553344801e-05, "loss": 1.4707, "step": 1157 }, { "epoch": 0.17703038410089814, "grad_norm": 1.359375, "learning_rate": 7.233705551223598e-05, "loss": 1.6343, "step": 1158 }, { "epoch": 0.1771832600802599, "grad_norm": 1.3046875, "learning_rate": 7.233276554615373e-05, "loss": 1.4104, "step": 1159 }, { "epoch": 0.17733613605962162, "grad_norm": 1.5546875, "learning_rate": 7.232847563520974e-05, "loss": 1.5973, "step": 1160 }, { "epoch": 0.17748901203898337, "grad_norm": 1.3984375, "learning_rate": 7.232418577941264e-05, "loss": 1.4782, "step": 1161 }, { "epoch": 0.17764188801834513, "grad_norm": 1.328125, "learning_rate": 7.23198959787709e-05, "loss": 1.7098, "step": 1162 }, { "epoch": 0.17779476399770686, "grad_norm": 1.3359375, "learning_rate": 7.231560623329315e-05, "loss": 1.549, "step": 1163 }, { "epoch": 0.1779476399770686, "grad_norm": 1.2265625, "learning_rate": 7.231131654298789e-05, "loss": 1.3115, "step": 1164 }, { "epoch": 0.17810051595643034, "grad_norm": 1.3125, "learning_rate": 7.230702690786364e-05, "loss": 1.428, "step": 1165 }, { "epoch": 0.1782533919357921, "grad_norm": 1.4296875, "learning_rate": 7.2302737327929e-05, "loss": 1.5309, "step": 1166 }, { "epoch": 0.17840626791515382, "grad_norm": 1.3203125, "learning_rate": 7.229844780319251e-05, "loss": 1.5136, "step": 1167 }, { "epoch": 0.17855914389451558, "grad_norm": 1.40625, "learning_rate": 7.229415833366272e-05, "loss": 1.5129, "step": 1168 }, { "epoch": 0.1787120198738773, "grad_norm": 1.4609375, "learning_rate": 7.228986891934815e-05, "loss": 1.5337, "step": 1169 }, { "epoch": 0.17886489585323906, "grad_norm": 1.3671875, "learning_rate": 7.228557956025736e-05, "loss": 1.3055, "step": 1170 }, { "epoch": 0.1790177718326008, "grad_norm": 1.34375, "learning_rate": 7.228129025639893e-05, "loss": 1.656, "step": 1171 }, { "epoch": 0.17917064781196254, "grad_norm": 1.2265625, "learning_rate": 7.227700100778137e-05, "loss": 1.4279, "step": 1172 }, { "epoch": 0.1793235237913243, "grad_norm": 1.296875, "learning_rate": 7.227271181441327e-05, "loss": 1.3114, "step": 1173 }, { "epoch": 0.17947639977068602, "grad_norm": 1.4375, "learning_rate": 7.226842267630312e-05, "loss": 1.5825, "step": 1174 }, { "epoch": 0.17962927575004778, "grad_norm": 1.4453125, "learning_rate": 7.226413359345952e-05, "loss": 1.4775, "step": 1175 }, { "epoch": 0.1797821517294095, "grad_norm": 1.3203125, "learning_rate": 7.225984456589096e-05, "loss": 1.4249, "step": 1176 }, { "epoch": 0.17993502770877126, "grad_norm": 1.2109375, "learning_rate": 7.225555559360607e-05, "loss": 1.4077, "step": 1177 }, { "epoch": 0.180087903688133, "grad_norm": 1.296875, "learning_rate": 7.225126667661335e-05, "loss": 1.3618, "step": 1178 }, { "epoch": 0.18024077966749474, "grad_norm": 1.390625, "learning_rate": 7.224697781492132e-05, "loss": 1.5735, "step": 1179 }, { "epoch": 0.1803936556468565, "grad_norm": 1.3515625, "learning_rate": 7.224268900853857e-05, "loss": 1.4676, "step": 1180 }, { "epoch": 0.18054653162621823, "grad_norm": 1.359375, "learning_rate": 7.22384002574736e-05, "loss": 1.4691, "step": 1181 }, { "epoch": 0.18069940760557998, "grad_norm": 1.28125, "learning_rate": 7.223411156173503e-05, "loss": 1.483, "step": 1182 }, { "epoch": 0.1808522835849417, "grad_norm": 1.3515625, "learning_rate": 7.222982292133138e-05, "loss": 1.6079, "step": 1183 }, { "epoch": 0.18100515956430346, "grad_norm": 1.53125, "learning_rate": 7.222553433627115e-05, "loss": 1.4924, "step": 1184 }, { "epoch": 0.1811580355436652, "grad_norm": 1.375, "learning_rate": 7.222124580656293e-05, "loss": 1.448, "step": 1185 }, { "epoch": 0.18131091152302695, "grad_norm": 1.53125, "learning_rate": 7.221695733221525e-05, "loss": 1.5898, "step": 1186 }, { "epoch": 0.18146378750238867, "grad_norm": 1.3984375, "learning_rate": 7.22126689132367e-05, "loss": 1.3287, "step": 1187 }, { "epoch": 0.18161666348175043, "grad_norm": 1.3046875, "learning_rate": 7.220838054963573e-05, "loss": 1.598, "step": 1188 }, { "epoch": 0.18176953946111218, "grad_norm": 1.421875, "learning_rate": 7.220409224142102e-05, "loss": 1.4717, "step": 1189 }, { "epoch": 0.1819224154404739, "grad_norm": 1.2890625, "learning_rate": 7.219980398860101e-05, "loss": 1.4728, "step": 1190 }, { "epoch": 0.18207529141983567, "grad_norm": 1.2265625, "learning_rate": 7.219551579118427e-05, "loss": 1.2526, "step": 1191 }, { "epoch": 0.1822281673991974, "grad_norm": 1.3671875, "learning_rate": 7.219122764917939e-05, "loss": 1.5365, "step": 1192 }, { "epoch": 0.18238104337855915, "grad_norm": 1.1875, "learning_rate": 7.21869395625948e-05, "loss": 1.3084, "step": 1193 }, { "epoch": 0.18253391935792088, "grad_norm": 1.234375, "learning_rate": 7.218265153143921e-05, "loss": 1.4742, "step": 1194 }, { "epoch": 0.18268679533728263, "grad_norm": 1.4609375, "learning_rate": 7.217836355572104e-05, "loss": 1.6622, "step": 1195 }, { "epoch": 0.18283967131664436, "grad_norm": 1.3828125, "learning_rate": 7.217407563544889e-05, "loss": 1.5436, "step": 1196 }, { "epoch": 0.18299254729600611, "grad_norm": 1.171875, "learning_rate": 7.216978777063133e-05, "loss": 1.3735, "step": 1197 }, { "epoch": 0.18314542327536787, "grad_norm": 1.3125, "learning_rate": 7.216549996127682e-05, "loss": 1.5789, "step": 1198 }, { "epoch": 0.1832982992547296, "grad_norm": 1.3828125, "learning_rate": 7.216121220739397e-05, "loss": 1.6176, "step": 1199 }, { "epoch": 0.18345117523409135, "grad_norm": 1.4140625, "learning_rate": 7.21569245089913e-05, "loss": 1.5995, "step": 1200 }, { "epoch": 0.18360405121345308, "grad_norm": 1.28125, "learning_rate": 7.215263686607735e-05, "loss": 1.6088, "step": 1201 }, { "epoch": 0.18375692719281483, "grad_norm": 1.3125, "learning_rate": 7.21483492786607e-05, "loss": 1.3494, "step": 1202 }, { "epoch": 0.18390980317217656, "grad_norm": 1.265625, "learning_rate": 7.21440617467499e-05, "loss": 1.3422, "step": 1203 }, { "epoch": 0.18406267915153832, "grad_norm": 1.4375, "learning_rate": 7.213977427035346e-05, "loss": 1.5243, "step": 1204 }, { "epoch": 0.18421555513090004, "grad_norm": 1.234375, "learning_rate": 7.21354868494799e-05, "loss": 1.3722, "step": 1205 }, { "epoch": 0.1843684311102618, "grad_norm": 1.265625, "learning_rate": 7.213119948413782e-05, "loss": 1.2887, "step": 1206 }, { "epoch": 0.18452130708962355, "grad_norm": 1.4375, "learning_rate": 7.212691217433572e-05, "loss": 1.4735, "step": 1207 }, { "epoch": 0.18467418306898528, "grad_norm": 1.359375, "learning_rate": 7.212262492008222e-05, "loss": 1.5786, "step": 1208 }, { "epoch": 0.18482705904834704, "grad_norm": 1.359375, "learning_rate": 7.211833772138575e-05, "loss": 1.4389, "step": 1209 }, { "epoch": 0.18497993502770876, "grad_norm": 1.2890625, "learning_rate": 7.211405057825494e-05, "loss": 1.4223, "step": 1210 }, { "epoch": 0.18513281100707052, "grad_norm": 1.3828125, "learning_rate": 7.210976349069831e-05, "loss": 1.6716, "step": 1211 }, { "epoch": 0.18528568698643225, "grad_norm": 1.1640625, "learning_rate": 7.210547645872439e-05, "loss": 1.3328, "step": 1212 }, { "epoch": 0.185438562965794, "grad_norm": 1.484375, "learning_rate": 7.210118948234177e-05, "loss": 1.6571, "step": 1213 }, { "epoch": 0.18559143894515573, "grad_norm": 1.3046875, "learning_rate": 7.209690256155892e-05, "loss": 1.6748, "step": 1214 }, { "epoch": 0.18574431492451748, "grad_norm": 1.4296875, "learning_rate": 7.209261569638443e-05, "loss": 1.6231, "step": 1215 }, { "epoch": 0.18589719090387924, "grad_norm": 1.3671875, "learning_rate": 7.208832888682689e-05, "loss": 1.5585, "step": 1216 }, { "epoch": 0.18605006688324097, "grad_norm": 1.328125, "learning_rate": 7.208404213289469e-05, "loss": 1.4855, "step": 1217 }, { "epoch": 0.18620294286260272, "grad_norm": 1.3046875, "learning_rate": 7.207975543459655e-05, "loss": 1.1566, "step": 1218 }, { "epoch": 0.18635581884196445, "grad_norm": 1.1796875, "learning_rate": 7.20754687919409e-05, "loss": 1.2992, "step": 1219 }, { "epoch": 0.1865086948213262, "grad_norm": 1.40625, "learning_rate": 7.207118220493634e-05, "loss": 1.6527, "step": 1220 }, { "epoch": 0.18666157080068793, "grad_norm": 1.3515625, "learning_rate": 7.206689567359135e-05, "loss": 1.5173, "step": 1221 }, { "epoch": 0.1868144467800497, "grad_norm": 1.3046875, "learning_rate": 7.206260919791454e-05, "loss": 1.3856, "step": 1222 }, { "epoch": 0.18696732275941141, "grad_norm": 1.234375, "learning_rate": 7.205832277791447e-05, "loss": 1.1564, "step": 1223 }, { "epoch": 0.18712019873877317, "grad_norm": 1.34375, "learning_rate": 7.205403641359958e-05, "loss": 1.5422, "step": 1224 }, { "epoch": 0.18727307471813492, "grad_norm": 1.28125, "learning_rate": 7.20497501049785e-05, "loss": 1.3873, "step": 1225 }, { "epoch": 0.18742595069749665, "grad_norm": 1.3984375, "learning_rate": 7.20454638520597e-05, "loss": 1.2334, "step": 1226 }, { "epoch": 0.1875788266768584, "grad_norm": 1.59375, "learning_rate": 7.204117765485183e-05, "loss": 1.6141, "step": 1227 }, { "epoch": 0.18773170265622013, "grad_norm": 1.546875, "learning_rate": 7.203689151336331e-05, "loss": 1.7485, "step": 1228 }, { "epoch": 0.1878845786355819, "grad_norm": 1.59375, "learning_rate": 7.203260542760279e-05, "loss": 1.8516, "step": 1229 }, { "epoch": 0.18803745461494362, "grad_norm": 1.3359375, "learning_rate": 7.202831939757872e-05, "loss": 1.6088, "step": 1230 }, { "epoch": 0.18819033059430537, "grad_norm": 1.2265625, "learning_rate": 7.202403342329969e-05, "loss": 1.3002, "step": 1231 }, { "epoch": 0.1883432065736671, "grad_norm": 1.65625, "learning_rate": 7.201974750477427e-05, "loss": 1.5733, "step": 1232 }, { "epoch": 0.18849608255302885, "grad_norm": 1.3671875, "learning_rate": 7.201546164201091e-05, "loss": 1.5515, "step": 1233 }, { "epoch": 0.1886489585323906, "grad_norm": 1.296875, "learning_rate": 7.201117583501826e-05, "loss": 1.35, "step": 1234 }, { "epoch": 0.18880183451175234, "grad_norm": 1.265625, "learning_rate": 7.200689008380477e-05, "loss": 1.4845, "step": 1235 }, { "epoch": 0.1889547104911141, "grad_norm": 1.1875, "learning_rate": 7.200260438837903e-05, "loss": 1.1849, "step": 1236 }, { "epoch": 0.18910758647047582, "grad_norm": 1.3359375, "learning_rate": 7.19983187487496e-05, "loss": 1.5402, "step": 1237 }, { "epoch": 0.18926046244983757, "grad_norm": 1.34375, "learning_rate": 7.199403316492495e-05, "loss": 1.5141, "step": 1238 }, { "epoch": 0.1894133384291993, "grad_norm": 1.296875, "learning_rate": 7.198974763691369e-05, "loss": 1.5899, "step": 1239 }, { "epoch": 0.18956621440856106, "grad_norm": 1.4609375, "learning_rate": 7.19854621647243e-05, "loss": 1.7225, "step": 1240 }, { "epoch": 0.18971909038792278, "grad_norm": 1.515625, "learning_rate": 7.198117674836537e-05, "loss": 1.4534, "step": 1241 }, { "epoch": 0.18987196636728454, "grad_norm": 1.296875, "learning_rate": 7.197689138784543e-05, "loss": 1.4041, "step": 1242 }, { "epoch": 0.1900248423466463, "grad_norm": 1.3359375, "learning_rate": 7.197260608317302e-05, "loss": 1.3383, "step": 1243 }, { "epoch": 0.19017771832600802, "grad_norm": 1.328125, "learning_rate": 7.196832083435667e-05, "loss": 1.6337, "step": 1244 }, { "epoch": 0.19033059430536978, "grad_norm": 1.28125, "learning_rate": 7.196403564140491e-05, "loss": 1.4851, "step": 1245 }, { "epoch": 0.1904834702847315, "grad_norm": 1.375, "learning_rate": 7.195975050432631e-05, "loss": 1.3924, "step": 1246 }, { "epoch": 0.19063634626409326, "grad_norm": 1.4609375, "learning_rate": 7.195546542312937e-05, "loss": 1.6634, "step": 1247 }, { "epoch": 0.190789222243455, "grad_norm": 1.4921875, "learning_rate": 7.19511803978227e-05, "loss": 1.6199, "step": 1248 }, { "epoch": 0.19094209822281674, "grad_norm": 1.328125, "learning_rate": 7.194689542841477e-05, "loss": 1.499, "step": 1249 }, { "epoch": 0.19109497420217847, "grad_norm": 1.34375, "learning_rate": 7.194261051491413e-05, "loss": 1.4181, "step": 1250 }, { "epoch": 0.19124785018154022, "grad_norm": 1.25, "learning_rate": 7.193832565732935e-05, "loss": 1.6497, "step": 1251 }, { "epoch": 0.19140072616090198, "grad_norm": 1.3984375, "learning_rate": 7.193404085566892e-05, "loss": 1.6223, "step": 1252 }, { "epoch": 0.1915536021402637, "grad_norm": 1.3359375, "learning_rate": 7.192975610994149e-05, "loss": 1.3309, "step": 1253 }, { "epoch": 0.19170647811962546, "grad_norm": 1.2578125, "learning_rate": 7.192547142015544e-05, "loss": 1.7185, "step": 1254 }, { "epoch": 0.1918593540989872, "grad_norm": 1.4609375, "learning_rate": 7.192118678631942e-05, "loss": 1.7686, "step": 1255 }, { "epoch": 0.19201223007834894, "grad_norm": 1.2734375, "learning_rate": 7.191690220844194e-05, "loss": 1.557, "step": 1256 }, { "epoch": 0.19216510605771067, "grad_norm": 1.359375, "learning_rate": 7.191261768653152e-05, "loss": 1.4299, "step": 1257 }, { "epoch": 0.19231798203707243, "grad_norm": 1.40625, "learning_rate": 7.190833322059675e-05, "loss": 1.4571, "step": 1258 }, { "epoch": 0.19247085801643415, "grad_norm": 1.6953125, "learning_rate": 7.19040488106461e-05, "loss": 1.4971, "step": 1259 }, { "epoch": 0.1926237339957959, "grad_norm": 1.3671875, "learning_rate": 7.189976445668816e-05, "loss": 1.4532, "step": 1260 }, { "epoch": 0.19277660997515766, "grad_norm": 1.4453125, "learning_rate": 7.189548015873142e-05, "loss": 1.5399, "step": 1261 }, { "epoch": 0.1929294859545194, "grad_norm": 1.375, "learning_rate": 7.189119591678448e-05, "loss": 1.3798, "step": 1262 }, { "epoch": 0.19308236193388115, "grad_norm": 1.4609375, "learning_rate": 7.188691173085587e-05, "loss": 1.7449, "step": 1263 }, { "epoch": 0.19323523791324287, "grad_norm": 1.2109375, "learning_rate": 7.188262760095408e-05, "loss": 1.2065, "step": 1264 }, { "epoch": 0.19338811389260463, "grad_norm": 1.2265625, "learning_rate": 7.187834352708767e-05, "loss": 1.2585, "step": 1265 }, { "epoch": 0.19354098987196636, "grad_norm": 1.265625, "learning_rate": 7.187405950926517e-05, "loss": 1.4675, "step": 1266 }, { "epoch": 0.1936938658513281, "grad_norm": 1.46875, "learning_rate": 7.186977554749516e-05, "loss": 1.6249, "step": 1267 }, { "epoch": 0.19384674183068984, "grad_norm": 1.3828125, "learning_rate": 7.18654916417861e-05, "loss": 1.6359, "step": 1268 }, { "epoch": 0.1939996178100516, "grad_norm": 1.2578125, "learning_rate": 7.186120779214662e-05, "loss": 1.2571, "step": 1269 }, { "epoch": 0.19415249378941335, "grad_norm": 1.3671875, "learning_rate": 7.18569239985852e-05, "loss": 1.4259, "step": 1270 }, { "epoch": 0.19430536976877508, "grad_norm": 1.40625, "learning_rate": 7.185264026111037e-05, "loss": 1.3059, "step": 1271 }, { "epoch": 0.19445824574813683, "grad_norm": 1.34375, "learning_rate": 7.184835657973073e-05, "loss": 1.5785, "step": 1272 }, { "epoch": 0.19461112172749856, "grad_norm": 1.3515625, "learning_rate": 7.18440729544547e-05, "loss": 1.4217, "step": 1273 }, { "epoch": 0.19476399770686031, "grad_norm": 1.40625, "learning_rate": 7.183978938529094e-05, "loss": 1.4414, "step": 1274 }, { "epoch": 0.19491687368622204, "grad_norm": 1.2265625, "learning_rate": 7.183550587224795e-05, "loss": 1.382, "step": 1275 }, { "epoch": 0.1950697496655838, "grad_norm": 1.3359375, "learning_rate": 7.18312224153342e-05, "loss": 1.6008, "step": 1276 }, { "epoch": 0.19522262564494552, "grad_norm": 1.359375, "learning_rate": 7.182693901455834e-05, "loss": 1.4223, "step": 1277 }, { "epoch": 0.19537550162430728, "grad_norm": 1.546875, "learning_rate": 7.182265566992878e-05, "loss": 1.6979, "step": 1278 }, { "epoch": 0.19552837760366903, "grad_norm": 1.296875, "learning_rate": 7.181837238145417e-05, "loss": 1.3558, "step": 1279 }, { "epoch": 0.19568125358303076, "grad_norm": 1.2578125, "learning_rate": 7.181408914914297e-05, "loss": 1.4607, "step": 1280 }, { "epoch": 0.19583412956239252, "grad_norm": 1.2734375, "learning_rate": 7.180980597300377e-05, "loss": 1.3251, "step": 1281 }, { "epoch": 0.19598700554175424, "grad_norm": 1.359375, "learning_rate": 7.180552285304509e-05, "loss": 1.4241, "step": 1282 }, { "epoch": 0.196139881521116, "grad_norm": 1.2421875, "learning_rate": 7.180123978927542e-05, "loss": 1.223, "step": 1283 }, { "epoch": 0.19629275750047773, "grad_norm": 1.390625, "learning_rate": 7.179695678170335e-05, "loss": 1.3792, "step": 1284 }, { "epoch": 0.19644563347983948, "grad_norm": 1.265625, "learning_rate": 7.179267383033736e-05, "loss": 1.5657, "step": 1285 }, { "epoch": 0.1965985094592012, "grad_norm": 1.390625, "learning_rate": 7.178839093518608e-05, "loss": 1.4217, "step": 1286 }, { "epoch": 0.19675138543856296, "grad_norm": 1.3828125, "learning_rate": 7.178410809625794e-05, "loss": 1.3663, "step": 1287 }, { "epoch": 0.19690426141792472, "grad_norm": 1.4609375, "learning_rate": 7.177982531356158e-05, "loss": 1.7429, "step": 1288 }, { "epoch": 0.19705713739728645, "grad_norm": 1.3046875, "learning_rate": 7.177554258710545e-05, "loss": 1.5311, "step": 1289 }, { "epoch": 0.1972100133766482, "grad_norm": 1.21875, "learning_rate": 7.177125991689809e-05, "loss": 1.3002, "step": 1290 }, { "epoch": 0.19736288935600993, "grad_norm": 1.3203125, "learning_rate": 7.176697730294809e-05, "loss": 1.6536, "step": 1291 }, { "epoch": 0.19751576533537168, "grad_norm": 1.359375, "learning_rate": 7.176269474526393e-05, "loss": 1.4768, "step": 1292 }, { "epoch": 0.1976686413147334, "grad_norm": 1.203125, "learning_rate": 7.17584122438542e-05, "loss": 1.2082, "step": 1293 }, { "epoch": 0.19782151729409517, "grad_norm": 1.1640625, "learning_rate": 7.175412979872738e-05, "loss": 1.3947, "step": 1294 }, { "epoch": 0.1979743932734569, "grad_norm": 1.4453125, "learning_rate": 7.174984740989203e-05, "loss": 1.3589, "step": 1295 }, { "epoch": 0.19812726925281865, "grad_norm": 1.2578125, "learning_rate": 7.17455650773567e-05, "loss": 1.3818, "step": 1296 }, { "epoch": 0.1982801452321804, "grad_norm": 1.3984375, "learning_rate": 7.174128280112989e-05, "loss": 1.5811, "step": 1297 }, { "epoch": 0.19843302121154213, "grad_norm": 1.3359375, "learning_rate": 7.173700058122017e-05, "loss": 1.4036, "step": 1298 }, { "epoch": 0.1985858971909039, "grad_norm": 1.5078125, "learning_rate": 7.173271841763602e-05, "loss": 1.5554, "step": 1299 }, { "epoch": 0.19873877317026561, "grad_norm": 1.2265625, "learning_rate": 7.172843631038604e-05, "loss": 1.4323, "step": 1300 }, { "epoch": 0.19889164914962737, "grad_norm": 1.4765625, "learning_rate": 7.172415425947869e-05, "loss": 1.6679, "step": 1301 }, { "epoch": 0.1990445251289891, "grad_norm": 1.4140625, "learning_rate": 7.17198722649226e-05, "loss": 1.6384, "step": 1302 }, { "epoch": 0.19919740110835085, "grad_norm": 1.265625, "learning_rate": 7.171559032672625e-05, "loss": 1.3327, "step": 1303 }, { "epoch": 0.19935027708771258, "grad_norm": 1.2890625, "learning_rate": 7.171130844489813e-05, "loss": 1.432, "step": 1304 }, { "epoch": 0.19950315306707433, "grad_norm": 1.359375, "learning_rate": 7.170702661944684e-05, "loss": 1.6095, "step": 1305 }, { "epoch": 0.1996560290464361, "grad_norm": 1.2109375, "learning_rate": 7.170274485038088e-05, "loss": 1.5294, "step": 1306 }, { "epoch": 0.19980890502579782, "grad_norm": 1.3125, "learning_rate": 7.169846313770881e-05, "loss": 1.4679, "step": 1307 }, { "epoch": 0.19996178100515957, "grad_norm": 1.203125, "learning_rate": 7.169418148143917e-05, "loss": 1.3049, "step": 1308 }, { "epoch": 0.2001146569845213, "grad_norm": 1.515625, "learning_rate": 7.168989988158041e-05, "loss": 1.7629, "step": 1309 }, { "epoch": 0.20026753296388305, "grad_norm": 1.28125, "learning_rate": 7.168561833814116e-05, "loss": 1.4754, "step": 1310 }, { "epoch": 0.20042040894324478, "grad_norm": 1.4296875, "learning_rate": 7.168133685112988e-05, "loss": 1.8118, "step": 1311 }, { "epoch": 0.20057328492260654, "grad_norm": 1.3046875, "learning_rate": 7.167705542055519e-05, "loss": 1.5194, "step": 1312 }, { "epoch": 0.20072616090196826, "grad_norm": 1.2109375, "learning_rate": 7.167277404642551e-05, "loss": 1.2952, "step": 1313 }, { "epoch": 0.20087903688133002, "grad_norm": 1.4140625, "learning_rate": 7.166849272874949e-05, "loss": 1.5722, "step": 1314 }, { "epoch": 0.20103191286069177, "grad_norm": 1.265625, "learning_rate": 7.166421146753558e-05, "loss": 1.4163, "step": 1315 }, { "epoch": 0.2011847888400535, "grad_norm": 1.3203125, "learning_rate": 7.165993026279232e-05, "loss": 1.41, "step": 1316 }, { "epoch": 0.20133766481941526, "grad_norm": 1.3203125, "learning_rate": 7.165564911452831e-05, "loss": 1.7381, "step": 1317 }, { "epoch": 0.20149054079877698, "grad_norm": 1.2578125, "learning_rate": 7.165136802275196e-05, "loss": 1.4838, "step": 1318 }, { "epoch": 0.20164341677813874, "grad_norm": 1.2109375, "learning_rate": 7.164708698747194e-05, "loss": 1.164, "step": 1319 }, { "epoch": 0.20179629275750047, "grad_norm": 1.4609375, "learning_rate": 7.164280600869666e-05, "loss": 1.7092, "step": 1320 }, { "epoch": 0.20194916873686222, "grad_norm": 1.234375, "learning_rate": 7.163852508643474e-05, "loss": 1.2434, "step": 1321 }, { "epoch": 0.20210204471622395, "grad_norm": 1.34375, "learning_rate": 7.163424422069469e-05, "loss": 1.467, "step": 1322 }, { "epoch": 0.2022549206955857, "grad_norm": 1.453125, "learning_rate": 7.162996341148498e-05, "loss": 1.5608, "step": 1323 }, { "epoch": 0.20240779667494746, "grad_norm": 1.265625, "learning_rate": 7.162568265881424e-05, "loss": 1.5165, "step": 1324 }, { "epoch": 0.2025606726543092, "grad_norm": 1.46875, "learning_rate": 7.16214019626909e-05, "loss": 1.4863, "step": 1325 }, { "epoch": 0.20271354863367094, "grad_norm": 1.40625, "learning_rate": 7.161712132312357e-05, "loss": 1.3779, "step": 1326 }, { "epoch": 0.20286642461303267, "grad_norm": 1.359375, "learning_rate": 7.161284074012074e-05, "loss": 1.6221, "step": 1327 }, { "epoch": 0.20301930059239442, "grad_norm": 1.390625, "learning_rate": 7.1608560213691e-05, "loss": 1.3004, "step": 1328 }, { "epoch": 0.20317217657175615, "grad_norm": 1.515625, "learning_rate": 7.160427974384281e-05, "loss": 1.7222, "step": 1329 }, { "epoch": 0.2033250525511179, "grad_norm": 1.375, "learning_rate": 7.15999993305847e-05, "loss": 1.5675, "step": 1330 }, { "epoch": 0.20347792853047963, "grad_norm": 1.4375, "learning_rate": 7.159571897392526e-05, "loss": 1.6204, "step": 1331 }, { "epoch": 0.2036308045098414, "grad_norm": 1.21875, "learning_rate": 7.159143867387294e-05, "loss": 1.3672, "step": 1332 }, { "epoch": 0.20378368048920315, "grad_norm": 1.453125, "learning_rate": 7.158715843043639e-05, "loss": 1.6924, "step": 1333 }, { "epoch": 0.20393655646856487, "grad_norm": 1.453125, "learning_rate": 7.158287824362399e-05, "loss": 1.409, "step": 1334 }, { "epoch": 0.20408943244792663, "grad_norm": 1.4453125, "learning_rate": 7.15785981134444e-05, "loss": 1.903, "step": 1335 }, { "epoch": 0.20424230842728835, "grad_norm": 1.421875, "learning_rate": 7.157431803990609e-05, "loss": 1.358, "step": 1336 }, { "epoch": 0.2043951844066501, "grad_norm": 1.25, "learning_rate": 7.157003802301756e-05, "loss": 1.2911, "step": 1337 }, { "epoch": 0.20454806038601184, "grad_norm": 1.375, "learning_rate": 7.156575806278743e-05, "loss": 1.4547, "step": 1338 }, { "epoch": 0.2047009363653736, "grad_norm": 1.4609375, "learning_rate": 7.156147815922413e-05, "loss": 1.5554, "step": 1339 }, { "epoch": 0.20485381234473532, "grad_norm": 1.296875, "learning_rate": 7.155719831233624e-05, "loss": 1.6137, "step": 1340 }, { "epoch": 0.20500668832409707, "grad_norm": 1.390625, "learning_rate": 7.155291852213235e-05, "loss": 1.6015, "step": 1341 }, { "epoch": 0.20515956430345883, "grad_norm": 1.359375, "learning_rate": 7.154863878862081e-05, "loss": 1.6731, "step": 1342 }, { "epoch": 0.20531244028282056, "grad_norm": 1.1875, "learning_rate": 7.154435911181036e-05, "loss": 1.3144, "step": 1343 }, { "epoch": 0.2054653162621823, "grad_norm": 1.3828125, "learning_rate": 7.154007949170938e-05, "loss": 1.6549, "step": 1344 }, { "epoch": 0.20561819224154404, "grad_norm": 1.28125, "learning_rate": 7.153579992832646e-05, "loss": 1.4028, "step": 1345 }, { "epoch": 0.2057710682209058, "grad_norm": 1.4296875, "learning_rate": 7.153152042167012e-05, "loss": 1.4579, "step": 1346 }, { "epoch": 0.20592394420026752, "grad_norm": 1.40625, "learning_rate": 7.15272409717489e-05, "loss": 1.4194, "step": 1347 }, { "epoch": 0.20607682017962928, "grad_norm": 1.3984375, "learning_rate": 7.152296157857134e-05, "loss": 1.4872, "step": 1348 }, { "epoch": 0.206229696158991, "grad_norm": 1.2578125, "learning_rate": 7.151868224214589e-05, "loss": 1.5636, "step": 1349 }, { "epoch": 0.20638257213835276, "grad_norm": 1.4921875, "learning_rate": 7.151440296248117e-05, "loss": 1.4108, "step": 1350 }, { "epoch": 0.20653544811771452, "grad_norm": 1.265625, "learning_rate": 7.151012373958563e-05, "loss": 1.539, "step": 1351 }, { "epoch": 0.20668832409707624, "grad_norm": 1.2109375, "learning_rate": 7.150584457346791e-05, "loss": 1.4829, "step": 1352 }, { "epoch": 0.206841200076438, "grad_norm": 1.296875, "learning_rate": 7.150156546413639e-05, "loss": 1.6532, "step": 1353 }, { "epoch": 0.20699407605579973, "grad_norm": 1.4296875, "learning_rate": 7.149728641159974e-05, "loss": 1.32, "step": 1354 }, { "epoch": 0.20714695203516148, "grad_norm": 1.171875, "learning_rate": 7.14930074158664e-05, "loss": 1.3375, "step": 1355 }, { "epoch": 0.2072998280145232, "grad_norm": 1.4921875, "learning_rate": 7.14887284769449e-05, "loss": 1.4912, "step": 1356 }, { "epoch": 0.20745270399388496, "grad_norm": 1.359375, "learning_rate": 7.148444959484384e-05, "loss": 1.6436, "step": 1357 }, { "epoch": 0.2076055799732467, "grad_norm": 1.2421875, "learning_rate": 7.148017076957163e-05, "loss": 1.2142, "step": 1358 }, { "epoch": 0.20775845595260845, "grad_norm": 1.5078125, "learning_rate": 7.147589200113691e-05, "loss": 1.6684, "step": 1359 }, { "epoch": 0.2079113319319702, "grad_norm": 1.2890625, "learning_rate": 7.147161328954814e-05, "loss": 1.5237, "step": 1360 }, { "epoch": 0.20806420791133193, "grad_norm": 1.4140625, "learning_rate": 7.146733463481388e-05, "loss": 1.5089, "step": 1361 }, { "epoch": 0.20821708389069368, "grad_norm": 1.28125, "learning_rate": 7.146305603694268e-05, "loss": 1.3367, "step": 1362 }, { "epoch": 0.2083699598700554, "grad_norm": 1.3515625, "learning_rate": 7.145877749594296e-05, "loss": 1.5213, "step": 1363 }, { "epoch": 0.20852283584941717, "grad_norm": 1.484375, "learning_rate": 7.145449901182336e-05, "loss": 1.9016, "step": 1364 }, { "epoch": 0.2086757118287789, "grad_norm": 1.296875, "learning_rate": 7.145022058459234e-05, "loss": 1.3901, "step": 1365 }, { "epoch": 0.20882858780814065, "grad_norm": 1.578125, "learning_rate": 7.144594221425847e-05, "loss": 1.8253, "step": 1366 }, { "epoch": 0.20898146378750238, "grad_norm": 1.234375, "learning_rate": 7.144166390083025e-05, "loss": 1.3843, "step": 1367 }, { "epoch": 0.20913433976686413, "grad_norm": 1.1484375, "learning_rate": 7.143738564431623e-05, "loss": 1.256, "step": 1368 }, { "epoch": 0.20928721574622589, "grad_norm": 1.375, "learning_rate": 7.143310744472492e-05, "loss": 1.5612, "step": 1369 }, { "epoch": 0.2094400917255876, "grad_norm": 1.3359375, "learning_rate": 7.142882930206483e-05, "loss": 1.6262, "step": 1370 }, { "epoch": 0.20959296770494937, "grad_norm": 1.375, "learning_rate": 7.14245512163445e-05, "loss": 1.458, "step": 1371 }, { "epoch": 0.2097458436843111, "grad_norm": 1.34375, "learning_rate": 7.142027318757245e-05, "loss": 1.6873, "step": 1372 }, { "epoch": 0.20989871966367285, "grad_norm": 1.4140625, "learning_rate": 7.141599521575726e-05, "loss": 1.6595, "step": 1373 }, { "epoch": 0.21005159564303458, "grad_norm": 1.2890625, "learning_rate": 7.141171730090739e-05, "loss": 1.3203, "step": 1374 }, { "epoch": 0.21020447162239633, "grad_norm": 1.28125, "learning_rate": 7.140743944303136e-05, "loss": 1.4667, "step": 1375 }, { "epoch": 0.21035734760175806, "grad_norm": 1.375, "learning_rate": 7.140316164213774e-05, "loss": 1.4278, "step": 1376 }, { "epoch": 0.21051022358111982, "grad_norm": 1.34375, "learning_rate": 7.139888389823501e-05, "loss": 1.4878, "step": 1377 }, { "epoch": 0.21066309956048157, "grad_norm": 1.375, "learning_rate": 7.139460621133177e-05, "loss": 1.5393, "step": 1378 }, { "epoch": 0.2108159755398433, "grad_norm": 1.484375, "learning_rate": 7.139032858143647e-05, "loss": 1.6977, "step": 1379 }, { "epoch": 0.21096885151920505, "grad_norm": 1.3515625, "learning_rate": 7.138605100855765e-05, "loss": 1.4178, "step": 1380 }, { "epoch": 0.21112172749856678, "grad_norm": 1.25, "learning_rate": 7.138177349270385e-05, "loss": 1.3983, "step": 1381 }, { "epoch": 0.21127460347792854, "grad_norm": 1.359375, "learning_rate": 7.137749603388358e-05, "loss": 1.4785, "step": 1382 }, { "epoch": 0.21142747945729026, "grad_norm": 1.4375, "learning_rate": 7.137321863210542e-05, "loss": 1.6085, "step": 1383 }, { "epoch": 0.21158035543665202, "grad_norm": 1.359375, "learning_rate": 7.136894128737781e-05, "loss": 1.5428, "step": 1384 }, { "epoch": 0.21173323141601375, "grad_norm": 1.21875, "learning_rate": 7.136466399970932e-05, "loss": 1.3472, "step": 1385 }, { "epoch": 0.2118861073953755, "grad_norm": 1.2421875, "learning_rate": 7.136038676910846e-05, "loss": 1.431, "step": 1386 }, { "epoch": 0.21203898337473726, "grad_norm": 1.4453125, "learning_rate": 7.135610959558377e-05, "loss": 1.7729, "step": 1387 }, { "epoch": 0.21219185935409898, "grad_norm": 1.421875, "learning_rate": 7.13518324791438e-05, "loss": 1.4453, "step": 1388 }, { "epoch": 0.21234473533346074, "grad_norm": 1.3359375, "learning_rate": 7.1347555419797e-05, "loss": 1.5428, "step": 1389 }, { "epoch": 0.21249761131282247, "grad_norm": 1.4140625, "learning_rate": 7.134327841755195e-05, "loss": 1.6438, "step": 1390 }, { "epoch": 0.21265048729218422, "grad_norm": 1.3671875, "learning_rate": 7.133900147241712e-05, "loss": 1.6016, "step": 1391 }, { "epoch": 0.21280336327154595, "grad_norm": 1.2421875, "learning_rate": 7.133472458440114e-05, "loss": 1.4099, "step": 1392 }, { "epoch": 0.2129562392509077, "grad_norm": 1.484375, "learning_rate": 7.133044775351239e-05, "loss": 1.7095, "step": 1393 }, { "epoch": 0.21310911523026943, "grad_norm": 1.2734375, "learning_rate": 7.132617097975953e-05, "loss": 1.414, "step": 1394 }, { "epoch": 0.21326199120963119, "grad_norm": 1.3359375, "learning_rate": 7.132189426315099e-05, "loss": 1.3477, "step": 1395 }, { "epoch": 0.21341486718899294, "grad_norm": 1.5078125, "learning_rate": 7.131761760369531e-05, "loss": 1.7697, "step": 1396 }, { "epoch": 0.21356774316835467, "grad_norm": 1.3203125, "learning_rate": 7.131334100140108e-05, "loss": 1.434, "step": 1397 }, { "epoch": 0.21372061914771642, "grad_norm": 1.234375, "learning_rate": 7.130906445627669e-05, "loss": 1.292, "step": 1398 }, { "epoch": 0.21387349512707815, "grad_norm": 1.328125, "learning_rate": 7.130478796833084e-05, "loss": 1.6295, "step": 1399 }, { "epoch": 0.2140263711064399, "grad_norm": 1.375, "learning_rate": 7.13005115375719e-05, "loss": 1.5023, "step": 1400 }, { "epoch": 0.21417924708580163, "grad_norm": 1.3515625, "learning_rate": 7.129623516400844e-05, "loss": 1.2472, "step": 1401 }, { "epoch": 0.2143321230651634, "grad_norm": 1.3671875, "learning_rate": 7.129195884764903e-05, "loss": 1.5157, "step": 1402 }, { "epoch": 0.21448499904452512, "grad_norm": 1.4921875, "learning_rate": 7.128768258850212e-05, "loss": 1.4904, "step": 1403 }, { "epoch": 0.21463787502388687, "grad_norm": 1.3828125, "learning_rate": 7.128340638657627e-05, "loss": 1.8814, "step": 1404 }, { "epoch": 0.21479075100324863, "grad_norm": 1.3515625, "learning_rate": 7.127913024187998e-05, "loss": 1.5305, "step": 1405 }, { "epoch": 0.21494362698261035, "grad_norm": 1.25, "learning_rate": 7.127485415442182e-05, "loss": 1.5918, "step": 1406 }, { "epoch": 0.2150965029619721, "grad_norm": 1.265625, "learning_rate": 7.12705781242103e-05, "loss": 1.3808, "step": 1407 }, { "epoch": 0.21524937894133384, "grad_norm": 1.1875, "learning_rate": 7.126630215125388e-05, "loss": 1.3335, "step": 1408 }, { "epoch": 0.2154022549206956, "grad_norm": 1.2578125, "learning_rate": 7.126202623556112e-05, "loss": 1.3821, "step": 1409 }, { "epoch": 0.21555513090005732, "grad_norm": 1.46875, "learning_rate": 7.125775037714055e-05, "loss": 1.6315, "step": 1410 }, { "epoch": 0.21570800687941907, "grad_norm": 1.3125, "learning_rate": 7.125347457600071e-05, "loss": 1.3276, "step": 1411 }, { "epoch": 0.2158608828587808, "grad_norm": 1.421875, "learning_rate": 7.124919883215007e-05, "loss": 1.5268, "step": 1412 }, { "epoch": 0.21601375883814256, "grad_norm": 1.3671875, "learning_rate": 7.124492314559723e-05, "loss": 1.3663, "step": 1413 }, { "epoch": 0.2161666348175043, "grad_norm": 1.4375, "learning_rate": 7.124064751635063e-05, "loss": 1.5366, "step": 1414 }, { "epoch": 0.21631951079686604, "grad_norm": 1.265625, "learning_rate": 7.123637194441881e-05, "loss": 1.4835, "step": 1415 }, { "epoch": 0.2164723867762278, "grad_norm": 1.421875, "learning_rate": 7.12320964298103e-05, "loss": 1.4964, "step": 1416 }, { "epoch": 0.21662526275558952, "grad_norm": 1.234375, "learning_rate": 7.122782097253362e-05, "loss": 1.1674, "step": 1417 }, { "epoch": 0.21677813873495128, "grad_norm": 1.2578125, "learning_rate": 7.122354557259735e-05, "loss": 1.3672, "step": 1418 }, { "epoch": 0.216931014714313, "grad_norm": 1.4921875, "learning_rate": 7.121927023000989e-05, "loss": 1.8929, "step": 1419 }, { "epoch": 0.21708389069367476, "grad_norm": 1.3984375, "learning_rate": 7.121499494477986e-05, "loss": 1.6751, "step": 1420 }, { "epoch": 0.21723676667303649, "grad_norm": 1.4140625, "learning_rate": 7.121071971691573e-05, "loss": 1.6119, "step": 1421 }, { "epoch": 0.21738964265239824, "grad_norm": 1.2890625, "learning_rate": 7.120644454642602e-05, "loss": 1.6929, "step": 1422 }, { "epoch": 0.21754251863176, "grad_norm": 1.5, "learning_rate": 7.120216943331932e-05, "loss": 1.5637, "step": 1423 }, { "epoch": 0.21769539461112172, "grad_norm": 1.265625, "learning_rate": 7.119789437760404e-05, "loss": 1.2257, "step": 1424 }, { "epoch": 0.21784827059048348, "grad_norm": 1.4609375, "learning_rate": 7.119361937928878e-05, "loss": 1.6247, "step": 1425 }, { "epoch": 0.2180011465698452, "grad_norm": 1.2578125, "learning_rate": 7.118934443838202e-05, "loss": 1.2395, "step": 1426 }, { "epoch": 0.21815402254920696, "grad_norm": 1.3828125, "learning_rate": 7.11850695548923e-05, "loss": 1.4492, "step": 1427 }, { "epoch": 0.2183068985285687, "grad_norm": 1.40625, "learning_rate": 7.118079472882816e-05, "loss": 1.7695, "step": 1428 }, { "epoch": 0.21845977450793044, "grad_norm": 1.171875, "learning_rate": 7.117651996019805e-05, "loss": 1.1376, "step": 1429 }, { "epoch": 0.21861265048729217, "grad_norm": 1.4296875, "learning_rate": 7.117224524901056e-05, "loss": 1.4578, "step": 1430 }, { "epoch": 0.21876552646665393, "grad_norm": 1.390625, "learning_rate": 7.116797059527416e-05, "loss": 1.4018, "step": 1431 }, { "epoch": 0.21891840244601568, "grad_norm": 1.359375, "learning_rate": 7.116369599899739e-05, "loss": 1.6358, "step": 1432 }, { "epoch": 0.2190712784253774, "grad_norm": 1.3203125, "learning_rate": 7.115942146018881e-05, "loss": 1.4666, "step": 1433 }, { "epoch": 0.21922415440473916, "grad_norm": 1.359375, "learning_rate": 7.115514697885685e-05, "loss": 1.5555, "step": 1434 }, { "epoch": 0.2193770303841009, "grad_norm": 1.3984375, "learning_rate": 7.115087255501012e-05, "loss": 1.6411, "step": 1435 }, { "epoch": 0.21952990636346265, "grad_norm": 1.3125, "learning_rate": 7.114659818865704e-05, "loss": 1.5454, "step": 1436 }, { "epoch": 0.21968278234282437, "grad_norm": 1.4375, "learning_rate": 7.114232387980624e-05, "loss": 1.4562, "step": 1437 }, { "epoch": 0.21983565832218613, "grad_norm": 1.2734375, "learning_rate": 7.113804962846613e-05, "loss": 1.3841, "step": 1438 }, { "epoch": 0.21998853430154786, "grad_norm": 1.3125, "learning_rate": 7.113377543464533e-05, "loss": 1.5197, "step": 1439 }, { "epoch": 0.2201414102809096, "grad_norm": 1.5, "learning_rate": 7.112950129835229e-05, "loss": 1.4657, "step": 1440 }, { "epoch": 0.22029428626027137, "grad_norm": 1.40625, "learning_rate": 7.112522721959551e-05, "loss": 1.5116, "step": 1441 }, { "epoch": 0.2204471622396331, "grad_norm": 1.3828125, "learning_rate": 7.112095319838361e-05, "loss": 1.4436, "step": 1442 }, { "epoch": 0.22060003821899485, "grad_norm": 1.5546875, "learning_rate": 7.111667923472497e-05, "loss": 1.5547, "step": 1443 }, { "epoch": 0.22075291419835658, "grad_norm": 1.359375, "learning_rate": 7.111240532862824e-05, "loss": 1.5903, "step": 1444 }, { "epoch": 0.22090579017771833, "grad_norm": 1.3828125, "learning_rate": 7.110813148010184e-05, "loss": 1.8591, "step": 1445 }, { "epoch": 0.22105866615708006, "grad_norm": 1.28125, "learning_rate": 7.110385768915433e-05, "loss": 1.678, "step": 1446 }, { "epoch": 0.2212115421364418, "grad_norm": 1.2265625, "learning_rate": 7.109958395579426e-05, "loss": 1.1696, "step": 1447 }, { "epoch": 0.22136441811580354, "grad_norm": 1.4609375, "learning_rate": 7.109531028003006e-05, "loss": 1.6613, "step": 1448 }, { "epoch": 0.2215172940951653, "grad_norm": 1.4765625, "learning_rate": 7.109103666187032e-05, "loss": 1.6052, "step": 1449 }, { "epoch": 0.22167017007452705, "grad_norm": 1.5078125, "learning_rate": 7.10867631013235e-05, "loss": 1.4898, "step": 1450 }, { "epoch": 0.22182304605388878, "grad_norm": 1.265625, "learning_rate": 7.108248959839818e-05, "loss": 1.5036, "step": 1451 }, { "epoch": 0.22197592203325053, "grad_norm": 1.6015625, "learning_rate": 7.107821615310282e-05, "loss": 1.8412, "step": 1452 }, { "epoch": 0.22212879801261226, "grad_norm": 1.3046875, "learning_rate": 7.107394276544601e-05, "loss": 1.6183, "step": 1453 }, { "epoch": 0.22228167399197402, "grad_norm": 1.421875, "learning_rate": 7.106966943543621e-05, "loss": 1.454, "step": 1454 }, { "epoch": 0.22243454997133574, "grad_norm": 1.5390625, "learning_rate": 7.10653961630819e-05, "loss": 1.7577, "step": 1455 }, { "epoch": 0.2225874259506975, "grad_norm": 1.484375, "learning_rate": 7.106112294839166e-05, "loss": 1.6572, "step": 1456 }, { "epoch": 0.22274030193005923, "grad_norm": 1.40625, "learning_rate": 7.1056849791374e-05, "loss": 1.4778, "step": 1457 }, { "epoch": 0.22289317790942098, "grad_norm": 1.2578125, "learning_rate": 7.105257669203744e-05, "loss": 1.453, "step": 1458 }, { "epoch": 0.22304605388878274, "grad_norm": 1.3515625, "learning_rate": 7.104830365039045e-05, "loss": 1.2404, "step": 1459 }, { "epoch": 0.22319892986814446, "grad_norm": 1.46875, "learning_rate": 7.104403066644159e-05, "loss": 1.6077, "step": 1460 }, { "epoch": 0.22335180584750622, "grad_norm": 1.4140625, "learning_rate": 7.103975774019936e-05, "loss": 1.4769, "step": 1461 }, { "epoch": 0.22350468182686795, "grad_norm": 1.4921875, "learning_rate": 7.103548487167226e-05, "loss": 1.361, "step": 1462 }, { "epoch": 0.2236575578062297, "grad_norm": 1.2421875, "learning_rate": 7.103121206086887e-05, "loss": 1.5263, "step": 1463 }, { "epoch": 0.22381043378559143, "grad_norm": 1.453125, "learning_rate": 7.10269393077976e-05, "loss": 1.5718, "step": 1464 }, { "epoch": 0.22396330976495318, "grad_norm": 1.3125, "learning_rate": 7.102266661246706e-05, "loss": 1.4062, "step": 1465 }, { "epoch": 0.2241161857443149, "grad_norm": 1.5625, "learning_rate": 7.101839397488573e-05, "loss": 1.4319, "step": 1466 }, { "epoch": 0.22426906172367667, "grad_norm": 1.390625, "learning_rate": 7.101412139506207e-05, "loss": 1.5983, "step": 1467 }, { "epoch": 0.22442193770303842, "grad_norm": 1.4609375, "learning_rate": 7.100984887300472e-05, "loss": 1.6772, "step": 1468 }, { "epoch": 0.22457481368240015, "grad_norm": 1.5078125, "learning_rate": 7.100557640872208e-05, "loss": 1.6605, "step": 1469 }, { "epoch": 0.2247276896617619, "grad_norm": 1.375, "learning_rate": 7.100130400222271e-05, "loss": 1.4976, "step": 1470 }, { "epoch": 0.22488056564112363, "grad_norm": 1.375, "learning_rate": 7.099703165351511e-05, "loss": 1.5065, "step": 1471 }, { "epoch": 0.22503344162048539, "grad_norm": 1.484375, "learning_rate": 7.099275936260783e-05, "loss": 1.7586, "step": 1472 }, { "epoch": 0.2251863175998471, "grad_norm": 1.4765625, "learning_rate": 7.098848712950938e-05, "loss": 1.5838, "step": 1473 }, { "epoch": 0.22533919357920887, "grad_norm": 1.359375, "learning_rate": 7.098421495422821e-05, "loss": 1.5305, "step": 1474 }, { "epoch": 0.2254920695585706, "grad_norm": 1.2734375, "learning_rate": 7.09799428367729e-05, "loss": 1.3885, "step": 1475 }, { "epoch": 0.22564494553793235, "grad_norm": 1.359375, "learning_rate": 7.097567077715193e-05, "loss": 1.6439, "step": 1476 }, { "epoch": 0.2257978215172941, "grad_norm": 1.4453125, "learning_rate": 7.097139877537387e-05, "loss": 1.5327, "step": 1477 }, { "epoch": 0.22595069749665583, "grad_norm": 1.4375, "learning_rate": 7.09671268314471e-05, "loss": 1.5136, "step": 1478 }, { "epoch": 0.2261035734760176, "grad_norm": 1.4140625, "learning_rate": 7.096285494538031e-05, "loss": 1.3857, "step": 1479 }, { "epoch": 0.22625644945537932, "grad_norm": 1.4375, "learning_rate": 7.095858311718191e-05, "loss": 1.5658, "step": 1480 }, { "epoch": 0.22640932543474107, "grad_norm": 1.2421875, "learning_rate": 7.095431134686039e-05, "loss": 1.2826, "step": 1481 }, { "epoch": 0.2265622014141028, "grad_norm": 1.328125, "learning_rate": 7.095003963442437e-05, "loss": 1.4333, "step": 1482 }, { "epoch": 0.22671507739346455, "grad_norm": 1.359375, "learning_rate": 7.094576797988222e-05, "loss": 1.3609, "step": 1483 }, { "epoch": 0.22686795337282628, "grad_norm": 1.34375, "learning_rate": 7.094149638324259e-05, "loss": 1.4976, "step": 1484 }, { "epoch": 0.22702082935218804, "grad_norm": 1.3671875, "learning_rate": 7.09372248445139e-05, "loss": 1.4213, "step": 1485 }, { "epoch": 0.2271737053315498, "grad_norm": 1.4609375, "learning_rate": 7.09329533637047e-05, "loss": 1.4922, "step": 1486 }, { "epoch": 0.22732658131091152, "grad_norm": 1.375, "learning_rate": 7.092868194082354e-05, "loss": 1.4087, "step": 1487 }, { "epoch": 0.22747945729027327, "grad_norm": 1.484375, "learning_rate": 7.092441057587884e-05, "loss": 1.6371, "step": 1488 }, { "epoch": 0.227632333269635, "grad_norm": 1.3515625, "learning_rate": 7.092013926887918e-05, "loss": 1.6181, "step": 1489 }, { "epoch": 0.22778520924899676, "grad_norm": 1.2734375, "learning_rate": 7.091586801983303e-05, "loss": 1.4218, "step": 1490 }, { "epoch": 0.22793808522835848, "grad_norm": 1.390625, "learning_rate": 7.091159682874898e-05, "loss": 1.4793, "step": 1491 }, { "epoch": 0.22809096120772024, "grad_norm": 1.3515625, "learning_rate": 7.090732569563548e-05, "loss": 1.4424, "step": 1492 }, { "epoch": 0.22824383718708197, "grad_norm": 1.2109375, "learning_rate": 7.090305462050102e-05, "loss": 1.3342, "step": 1493 }, { "epoch": 0.22839671316644372, "grad_norm": 1.640625, "learning_rate": 7.089878360335416e-05, "loss": 1.7358, "step": 1494 }, { "epoch": 0.22854958914580548, "grad_norm": 1.3125, "learning_rate": 7.089451264420338e-05, "loss": 1.4043, "step": 1495 }, { "epoch": 0.2287024651251672, "grad_norm": 1.2421875, "learning_rate": 7.089024174305722e-05, "loss": 1.5541, "step": 1496 }, { "epoch": 0.22885534110452896, "grad_norm": 1.59375, "learning_rate": 7.088597089992417e-05, "loss": 1.7429, "step": 1497 }, { "epoch": 0.22900821708389069, "grad_norm": 1.453125, "learning_rate": 7.088170011481279e-05, "loss": 1.499, "step": 1498 }, { "epoch": 0.22916109306325244, "grad_norm": 1.53125, "learning_rate": 7.087742938773153e-05, "loss": 1.4879, "step": 1499 }, { "epoch": 0.22931396904261417, "grad_norm": 1.2734375, "learning_rate": 7.087315871868889e-05, "loss": 1.4627, "step": 1500 }, { "epoch": 0.22946684502197592, "grad_norm": 1.4453125, "learning_rate": 7.086888810769344e-05, "loss": 1.615, "step": 1501 }, { "epoch": 0.22961972100133765, "grad_norm": 1.328125, "learning_rate": 7.086461755475364e-05, "loss": 1.3315, "step": 1502 }, { "epoch": 0.2297725969806994, "grad_norm": 1.484375, "learning_rate": 7.086034705987808e-05, "loss": 1.5015, "step": 1503 }, { "epoch": 0.22992547296006116, "grad_norm": 1.4921875, "learning_rate": 7.085607662307517e-05, "loss": 1.6255, "step": 1504 }, { "epoch": 0.2300783489394229, "grad_norm": 1.390625, "learning_rate": 7.085180624435348e-05, "loss": 1.7215, "step": 1505 }, { "epoch": 0.23023122491878464, "grad_norm": 1.3203125, "learning_rate": 7.08475359237215e-05, "loss": 1.3492, "step": 1506 }, { "epoch": 0.23038410089814637, "grad_norm": 1.3046875, "learning_rate": 7.084326566118775e-05, "loss": 1.3794, "step": 1507 }, { "epoch": 0.23053697687750813, "grad_norm": 1.453125, "learning_rate": 7.083899545676077e-05, "loss": 1.684, "step": 1508 }, { "epoch": 0.23068985285686985, "grad_norm": 1.2890625, "learning_rate": 7.0834725310449e-05, "loss": 1.5644, "step": 1509 }, { "epoch": 0.2308427288362316, "grad_norm": 1.359375, "learning_rate": 7.0830455222261e-05, "loss": 1.4607, "step": 1510 }, { "epoch": 0.23099560481559334, "grad_norm": 1.5546875, "learning_rate": 7.082618519220526e-05, "loss": 1.4598, "step": 1511 }, { "epoch": 0.2311484807949551, "grad_norm": 1.4375, "learning_rate": 7.08219152202903e-05, "loss": 1.5731, "step": 1512 }, { "epoch": 0.23130135677431685, "grad_norm": 1.3125, "learning_rate": 7.081764530652466e-05, "loss": 1.5043, "step": 1513 }, { "epoch": 0.23145423275367857, "grad_norm": 1.375, "learning_rate": 7.081337545091678e-05, "loss": 1.4537, "step": 1514 }, { "epoch": 0.23160710873304033, "grad_norm": 1.3671875, "learning_rate": 7.08091056534752e-05, "loss": 1.5614, "step": 1515 }, { "epoch": 0.23175998471240206, "grad_norm": 1.390625, "learning_rate": 7.080483591420844e-05, "loss": 1.3568, "step": 1516 }, { "epoch": 0.2319128606917638, "grad_norm": 1.453125, "learning_rate": 7.080056623312503e-05, "loss": 1.5271, "step": 1517 }, { "epoch": 0.23206573667112554, "grad_norm": 1.4296875, "learning_rate": 7.079629661023341e-05, "loss": 1.7042, "step": 1518 }, { "epoch": 0.2322186126504873, "grad_norm": 1.296875, "learning_rate": 7.079202704554219e-05, "loss": 1.7501, "step": 1519 }, { "epoch": 0.23237148862984902, "grad_norm": 1.3828125, "learning_rate": 7.078775753905981e-05, "loss": 1.4957, "step": 1520 }, { "epoch": 0.23252436460921078, "grad_norm": 1.4453125, "learning_rate": 7.078348809079475e-05, "loss": 1.4789, "step": 1521 }, { "epoch": 0.23267724058857253, "grad_norm": 1.2421875, "learning_rate": 7.07792187007556e-05, "loss": 1.3067, "step": 1522 }, { "epoch": 0.23283011656793426, "grad_norm": 1.421875, "learning_rate": 7.077494936895079e-05, "loss": 1.7608, "step": 1523 }, { "epoch": 0.23298299254729601, "grad_norm": 1.3671875, "learning_rate": 7.077068009538892e-05, "loss": 1.3477, "step": 1524 }, { "epoch": 0.23313586852665774, "grad_norm": 1.28125, "learning_rate": 7.076641088007842e-05, "loss": 1.4563, "step": 1525 }, { "epoch": 0.2332887445060195, "grad_norm": 1.4453125, "learning_rate": 7.07621417230278e-05, "loss": 1.7172, "step": 1526 }, { "epoch": 0.23344162048538122, "grad_norm": 1.5, "learning_rate": 7.075787262424562e-05, "loss": 1.6279, "step": 1527 }, { "epoch": 0.23359449646474298, "grad_norm": 1.28125, "learning_rate": 7.075360358374033e-05, "loss": 1.4217, "step": 1528 }, { "epoch": 0.2337473724441047, "grad_norm": 1.390625, "learning_rate": 7.074933460152049e-05, "loss": 1.399, "step": 1529 }, { "epoch": 0.23390024842346646, "grad_norm": 1.3671875, "learning_rate": 7.074506567759456e-05, "loss": 1.8604, "step": 1530 }, { "epoch": 0.23405312440282822, "grad_norm": 1.484375, "learning_rate": 7.074079681197108e-05, "loss": 1.7644, "step": 1531 }, { "epoch": 0.23420600038218994, "grad_norm": 1.265625, "learning_rate": 7.073652800465857e-05, "loss": 1.1034, "step": 1532 }, { "epoch": 0.2343588763615517, "grad_norm": 1.296875, "learning_rate": 7.073225925566549e-05, "loss": 1.3709, "step": 1533 }, { "epoch": 0.23451175234091343, "grad_norm": 1.3046875, "learning_rate": 7.072799056500039e-05, "loss": 1.335, "step": 1534 }, { "epoch": 0.23466462832027518, "grad_norm": 1.1875, "learning_rate": 7.072372193267173e-05, "loss": 1.4285, "step": 1535 }, { "epoch": 0.2348175042996369, "grad_norm": 1.34375, "learning_rate": 7.071945335868809e-05, "loss": 1.5445, "step": 1536 }, { "epoch": 0.23497038027899866, "grad_norm": 1.3125, "learning_rate": 7.07151848430579e-05, "loss": 1.3067, "step": 1537 }, { "epoch": 0.2351232562583604, "grad_norm": 1.2578125, "learning_rate": 7.071091638578975e-05, "loss": 1.4368, "step": 1538 }, { "epoch": 0.23527613223772215, "grad_norm": 1.3828125, "learning_rate": 7.070664798689206e-05, "loss": 1.4905, "step": 1539 }, { "epoch": 0.2354290082170839, "grad_norm": 1.1796875, "learning_rate": 7.070237964637337e-05, "loss": 1.2846, "step": 1540 }, { "epoch": 0.23558188419644563, "grad_norm": 1.2734375, "learning_rate": 7.06981113642422e-05, "loss": 1.4535, "step": 1541 }, { "epoch": 0.23573476017580738, "grad_norm": 1.453125, "learning_rate": 7.069384314050705e-05, "loss": 1.7261, "step": 1542 }, { "epoch": 0.2358876361551691, "grad_norm": 1.359375, "learning_rate": 7.068957497517643e-05, "loss": 1.5291, "step": 1543 }, { "epoch": 0.23604051213453087, "grad_norm": 1.265625, "learning_rate": 7.068530686825882e-05, "loss": 1.2181, "step": 1544 }, { "epoch": 0.2361933881138926, "grad_norm": 1.5, "learning_rate": 7.068103881976276e-05, "loss": 1.7527, "step": 1545 }, { "epoch": 0.23634626409325435, "grad_norm": 1.234375, "learning_rate": 7.067677082969674e-05, "loss": 1.4071, "step": 1546 }, { "epoch": 0.23649914007261608, "grad_norm": 1.421875, "learning_rate": 7.067250289806926e-05, "loss": 1.6761, "step": 1547 }, { "epoch": 0.23665201605197783, "grad_norm": 1.359375, "learning_rate": 7.066823502488887e-05, "loss": 1.5254, "step": 1548 }, { "epoch": 0.2368048920313396, "grad_norm": 1.28125, "learning_rate": 7.066396721016397e-05, "loss": 1.3407, "step": 1549 }, { "epoch": 0.23695776801070131, "grad_norm": 1.2265625, "learning_rate": 7.065969945390318e-05, "loss": 1.2598, "step": 1550 }, { "epoch": 0.23711064399006307, "grad_norm": 1.2734375, "learning_rate": 7.065543175611493e-05, "loss": 1.4687, "step": 1551 }, { "epoch": 0.2372635199694248, "grad_norm": 1.4296875, "learning_rate": 7.065116411680779e-05, "loss": 1.7711, "step": 1552 }, { "epoch": 0.23741639594878655, "grad_norm": 1.53125, "learning_rate": 7.064689653599022e-05, "loss": 1.6101, "step": 1553 }, { "epoch": 0.23756927192814828, "grad_norm": 1.4765625, "learning_rate": 7.064262901367072e-05, "loss": 1.6722, "step": 1554 }, { "epoch": 0.23772214790751003, "grad_norm": 1.5234375, "learning_rate": 7.063836154985781e-05, "loss": 1.4756, "step": 1555 }, { "epoch": 0.23787502388687176, "grad_norm": 1.390625, "learning_rate": 7.063409414455999e-05, "loss": 1.5038, "step": 1556 }, { "epoch": 0.23802789986623352, "grad_norm": 1.2578125, "learning_rate": 7.062982679778578e-05, "loss": 1.3784, "step": 1557 }, { "epoch": 0.23818077584559527, "grad_norm": 1.296875, "learning_rate": 7.062555950954369e-05, "loss": 1.6468, "step": 1558 }, { "epoch": 0.238333651824957, "grad_norm": 1.453125, "learning_rate": 7.062129227984216e-05, "loss": 1.4806, "step": 1559 }, { "epoch": 0.23848652780431875, "grad_norm": 1.2890625, "learning_rate": 7.061702510868978e-05, "loss": 1.5251, "step": 1560 }, { "epoch": 0.23863940378368048, "grad_norm": 1.2734375, "learning_rate": 7.061275799609498e-05, "loss": 1.2925, "step": 1561 }, { "epoch": 0.23879227976304224, "grad_norm": 1.5234375, "learning_rate": 7.060849094206634e-05, "loss": 1.5742, "step": 1562 }, { "epoch": 0.23894515574240396, "grad_norm": 1.3125, "learning_rate": 7.060422394661226e-05, "loss": 1.4171, "step": 1563 }, { "epoch": 0.23909803172176572, "grad_norm": 1.2421875, "learning_rate": 7.059995700974137e-05, "loss": 1.1985, "step": 1564 }, { "epoch": 0.23925090770112745, "grad_norm": 1.25, "learning_rate": 7.05956901314621e-05, "loss": 1.5138, "step": 1565 }, { "epoch": 0.2394037836804892, "grad_norm": 1.3125, "learning_rate": 7.059142331178293e-05, "loss": 1.6128, "step": 1566 }, { "epoch": 0.23955665965985096, "grad_norm": 1.390625, "learning_rate": 7.058715655071244e-05, "loss": 1.5754, "step": 1567 }, { "epoch": 0.23970953563921268, "grad_norm": 1.2734375, "learning_rate": 7.058288984825903e-05, "loss": 1.2247, "step": 1568 }, { "epoch": 0.23986241161857444, "grad_norm": 1.265625, "learning_rate": 7.057862320443131e-05, "loss": 1.5768, "step": 1569 }, { "epoch": 0.24001528759793617, "grad_norm": 1.390625, "learning_rate": 7.057435661923773e-05, "loss": 1.3493, "step": 1570 }, { "epoch": 0.24016816357729792, "grad_norm": 1.375, "learning_rate": 7.05700900926868e-05, "loss": 1.4452, "step": 1571 }, { "epoch": 0.24032103955665965, "grad_norm": 1.375, "learning_rate": 7.056582362478704e-05, "loss": 1.4294, "step": 1572 }, { "epoch": 0.2404739155360214, "grad_norm": 1.4140625, "learning_rate": 7.056155721554689e-05, "loss": 1.5566, "step": 1573 }, { "epoch": 0.24062679151538313, "grad_norm": 1.5234375, "learning_rate": 7.055729086497492e-05, "loss": 1.9338, "step": 1574 }, { "epoch": 0.2407796674947449, "grad_norm": 1.3984375, "learning_rate": 7.05530245730796e-05, "loss": 1.5442, "step": 1575 }, { "epoch": 0.24093254347410664, "grad_norm": 1.2734375, "learning_rate": 7.054875833986945e-05, "loss": 1.51, "step": 1576 }, { "epoch": 0.24108541945346837, "grad_norm": 1.2421875, "learning_rate": 7.054449216535295e-05, "loss": 1.4149, "step": 1577 }, { "epoch": 0.24123829543283012, "grad_norm": 1.4453125, "learning_rate": 7.054022604953866e-05, "loss": 1.6081, "step": 1578 }, { "epoch": 0.24139117141219185, "grad_norm": 1.5390625, "learning_rate": 7.053595999243501e-05, "loss": 1.8564, "step": 1579 }, { "epoch": 0.2415440473915536, "grad_norm": 1.2890625, "learning_rate": 7.053169399405052e-05, "loss": 1.5318, "step": 1580 }, { "epoch": 0.24169692337091533, "grad_norm": 1.25, "learning_rate": 7.052742805439371e-05, "loss": 1.3951, "step": 1581 }, { "epoch": 0.2418497993502771, "grad_norm": 1.4921875, "learning_rate": 7.052316217347307e-05, "loss": 1.5236, "step": 1582 }, { "epoch": 0.24200267532963882, "grad_norm": 1.5, "learning_rate": 7.051889635129713e-05, "loss": 1.6543, "step": 1583 }, { "epoch": 0.24215555130900057, "grad_norm": 1.4453125, "learning_rate": 7.051463058787433e-05, "loss": 1.5507, "step": 1584 }, { "epoch": 0.24230842728836233, "grad_norm": 1.1953125, "learning_rate": 7.051036488321323e-05, "loss": 1.1866, "step": 1585 }, { "epoch": 0.24246130326772405, "grad_norm": 1.4765625, "learning_rate": 7.050609923732232e-05, "loss": 1.6024, "step": 1586 }, { "epoch": 0.2426141792470858, "grad_norm": 1.4609375, "learning_rate": 7.050183365021006e-05, "loss": 1.8052, "step": 1587 }, { "epoch": 0.24276705522644754, "grad_norm": 1.515625, "learning_rate": 7.049756812188501e-05, "loss": 1.4643, "step": 1588 }, { "epoch": 0.2429199312058093, "grad_norm": 1.296875, "learning_rate": 7.049330265235561e-05, "loss": 1.663, "step": 1589 }, { "epoch": 0.24307280718517102, "grad_norm": 1.25, "learning_rate": 7.048903724163043e-05, "loss": 1.1964, "step": 1590 }, { "epoch": 0.24322568316453277, "grad_norm": 1.375, "learning_rate": 7.048477188971794e-05, "loss": 1.7147, "step": 1591 }, { "epoch": 0.2433785591438945, "grad_norm": 1.2109375, "learning_rate": 7.048050659662656e-05, "loss": 1.227, "step": 1592 }, { "epoch": 0.24353143512325626, "grad_norm": 1.359375, "learning_rate": 7.047624136236494e-05, "loss": 1.3895, "step": 1593 }, { "epoch": 0.243684311102618, "grad_norm": 1.4453125, "learning_rate": 7.047197618694146e-05, "loss": 1.6284, "step": 1594 }, { "epoch": 0.24383718708197974, "grad_norm": 1.421875, "learning_rate": 7.046771107036469e-05, "loss": 1.3616, "step": 1595 }, { "epoch": 0.2439900630613415, "grad_norm": 1.4921875, "learning_rate": 7.046344601264307e-05, "loss": 1.7336, "step": 1596 }, { "epoch": 0.24414293904070322, "grad_norm": 1.40625, "learning_rate": 7.045918101378516e-05, "loss": 1.3876, "step": 1597 }, { "epoch": 0.24429581502006498, "grad_norm": 1.3203125, "learning_rate": 7.045491607379946e-05, "loss": 1.3047, "step": 1598 }, { "epoch": 0.2444486909994267, "grad_norm": 1.40625, "learning_rate": 7.045065119269439e-05, "loss": 1.7368, "step": 1599 }, { "epoch": 0.24460156697878846, "grad_norm": 1.3671875, "learning_rate": 7.044638637047854e-05, "loss": 1.4671, "step": 1600 }, { "epoch": 0.2447544429581502, "grad_norm": 1.2890625, "learning_rate": 7.044212160716035e-05, "loss": 1.4192, "step": 1601 }, { "epoch": 0.24490731893751194, "grad_norm": 1.2890625, "learning_rate": 7.043785690274836e-05, "loss": 1.39, "step": 1602 }, { "epoch": 0.2450601949168737, "grad_norm": 1.34375, "learning_rate": 7.043359225725101e-05, "loss": 1.5266, "step": 1603 }, { "epoch": 0.24521307089623542, "grad_norm": 1.3671875, "learning_rate": 7.04293276706769e-05, "loss": 1.4366, "step": 1604 }, { "epoch": 0.24536594687559718, "grad_norm": 1.25, "learning_rate": 7.042506314303445e-05, "loss": 1.4609, "step": 1605 }, { "epoch": 0.2455188228549589, "grad_norm": 1.2421875, "learning_rate": 7.042079867433216e-05, "loss": 1.365, "step": 1606 }, { "epoch": 0.24567169883432066, "grad_norm": 1.328125, "learning_rate": 7.041653426457857e-05, "loss": 1.5229, "step": 1607 }, { "epoch": 0.2458245748136824, "grad_norm": 1.2265625, "learning_rate": 7.04122699137821e-05, "loss": 1.3442, "step": 1608 }, { "epoch": 0.24597745079304414, "grad_norm": 1.3515625, "learning_rate": 7.040800562195138e-05, "loss": 1.4084, "step": 1609 }, { "epoch": 0.24613032677240587, "grad_norm": 1.1328125, "learning_rate": 7.040374138909478e-05, "loss": 1.3918, "step": 1610 }, { "epoch": 0.24628320275176763, "grad_norm": 1.4296875, "learning_rate": 7.039947721522086e-05, "loss": 1.4391, "step": 1611 }, { "epoch": 0.24643607873112938, "grad_norm": 1.5390625, "learning_rate": 7.039521310033814e-05, "loss": 1.5682, "step": 1612 }, { "epoch": 0.2465889547104911, "grad_norm": 1.3046875, "learning_rate": 7.039094904445505e-05, "loss": 1.3743, "step": 1613 }, { "epoch": 0.24674183068985286, "grad_norm": 1.3984375, "learning_rate": 7.038668504758013e-05, "loss": 1.6528, "step": 1614 }, { "epoch": 0.2468947066692146, "grad_norm": 1.4140625, "learning_rate": 7.038242110972186e-05, "loss": 1.4226, "step": 1615 }, { "epoch": 0.24704758264857635, "grad_norm": 1.3671875, "learning_rate": 7.037815723088877e-05, "loss": 1.5238, "step": 1616 }, { "epoch": 0.24720045862793807, "grad_norm": 1.328125, "learning_rate": 7.037389341108937e-05, "loss": 1.3918, "step": 1617 }, { "epoch": 0.24735333460729983, "grad_norm": 1.421875, "learning_rate": 7.036962965033205e-05, "loss": 1.5198, "step": 1618 }, { "epoch": 0.24750621058666156, "grad_norm": 1.3046875, "learning_rate": 7.036536594862544e-05, "loss": 1.5365, "step": 1619 }, { "epoch": 0.2476590865660233, "grad_norm": 1.4296875, "learning_rate": 7.036110230597794e-05, "loss": 1.5462, "step": 1620 }, { "epoch": 0.24781196254538507, "grad_norm": 2.15625, "learning_rate": 7.03568387223981e-05, "loss": 1.4217, "step": 1621 }, { "epoch": 0.2479648385247468, "grad_norm": 1.1953125, "learning_rate": 7.03525751978944e-05, "loss": 1.2489, "step": 1622 }, { "epoch": 0.24811771450410855, "grad_norm": 1.46875, "learning_rate": 7.034831173247536e-05, "loss": 1.5917, "step": 1623 }, { "epoch": 0.24827059048347028, "grad_norm": 1.3046875, "learning_rate": 7.034404832614945e-05, "loss": 1.5074, "step": 1624 }, { "epoch": 0.24842346646283203, "grad_norm": 1.4296875, "learning_rate": 7.033978497892516e-05, "loss": 1.4686, "step": 1625 }, { "epoch": 0.24857634244219376, "grad_norm": 1.2578125, "learning_rate": 7.033552169081099e-05, "loss": 1.2658, "step": 1626 }, { "epoch": 0.24872921842155551, "grad_norm": 1.421875, "learning_rate": 7.033125846181544e-05, "loss": 1.498, "step": 1627 }, { "epoch": 0.24888209440091724, "grad_norm": 1.1640625, "learning_rate": 7.032699529194706e-05, "loss": 1.2055, "step": 1628 }, { "epoch": 0.249034970380279, "grad_norm": 1.2265625, "learning_rate": 7.032273218121425e-05, "loss": 1.3709, "step": 1629 }, { "epoch": 0.24918784635964075, "grad_norm": 1.46875, "learning_rate": 7.031846912962558e-05, "loss": 1.4894, "step": 1630 }, { "epoch": 0.24934072233900248, "grad_norm": 1.4375, "learning_rate": 7.03142061371895e-05, "loss": 1.5204, "step": 1631 }, { "epoch": 0.24949359831836423, "grad_norm": 1.390625, "learning_rate": 7.030994320391454e-05, "loss": 1.2634, "step": 1632 }, { "epoch": 0.24964647429772596, "grad_norm": 1.4453125, "learning_rate": 7.03056803298092e-05, "loss": 1.4834, "step": 1633 }, { "epoch": 0.24979935027708772, "grad_norm": 1.390625, "learning_rate": 7.030141751488192e-05, "loss": 1.5508, "step": 1634 }, { "epoch": 0.24995222625644944, "grad_norm": 1.3671875, "learning_rate": 7.029715475914126e-05, "loss": 1.6034, "step": 1635 }, { "epoch": 0.25010510223581117, "grad_norm": 1.359375, "learning_rate": 7.029289206259566e-05, "loss": 1.5939, "step": 1636 }, { "epoch": 0.2502579782151729, "grad_norm": 1.40625, "learning_rate": 7.028862942525366e-05, "loss": 1.5446, "step": 1637 }, { "epoch": 0.2504108541945347, "grad_norm": 1.265625, "learning_rate": 7.028436684712376e-05, "loss": 1.4142, "step": 1638 }, { "epoch": 0.25056373017389644, "grad_norm": 1.296875, "learning_rate": 7.028010432821439e-05, "loss": 1.3694, "step": 1639 }, { "epoch": 0.2507166061532582, "grad_norm": 1.296875, "learning_rate": 7.027584186853411e-05, "loss": 1.4854, "step": 1640 }, { "epoch": 0.2508694821326199, "grad_norm": 1.21875, "learning_rate": 7.027157946809138e-05, "loss": 1.4291, "step": 1641 }, { "epoch": 0.25102235811198165, "grad_norm": 1.3203125, "learning_rate": 7.026731712689471e-05, "loss": 1.4224, "step": 1642 }, { "epoch": 0.2511752340913434, "grad_norm": 1.359375, "learning_rate": 7.02630548449526e-05, "loss": 1.3811, "step": 1643 }, { "epoch": 0.25132811007070516, "grad_norm": 1.3984375, "learning_rate": 7.025879262227355e-05, "loss": 1.5781, "step": 1644 }, { "epoch": 0.25148098605006686, "grad_norm": 1.3203125, "learning_rate": 7.025453045886604e-05, "loss": 1.5252, "step": 1645 }, { "epoch": 0.2516338620294286, "grad_norm": 1.40625, "learning_rate": 7.025026835473851e-05, "loss": 1.5452, "step": 1646 }, { "epoch": 0.25178673800879037, "grad_norm": 1.421875, "learning_rate": 7.024600630989957e-05, "loss": 1.6979, "step": 1647 }, { "epoch": 0.2519396139881521, "grad_norm": 1.5234375, "learning_rate": 7.02417443243576e-05, "loss": 1.5456, "step": 1648 }, { "epoch": 0.2520924899675139, "grad_norm": 1.359375, "learning_rate": 7.023748239812119e-05, "loss": 1.4286, "step": 1649 }, { "epoch": 0.2522453659468756, "grad_norm": 1.3515625, "learning_rate": 7.023322053119879e-05, "loss": 1.425, "step": 1650 }, { "epoch": 0.25239824192623733, "grad_norm": 1.4453125, "learning_rate": 7.022895872359885e-05, "loss": 1.6111, "step": 1651 }, { "epoch": 0.2525511179055991, "grad_norm": 1.296875, "learning_rate": 7.022469697532995e-05, "loss": 1.6046, "step": 1652 }, { "epoch": 0.25270399388496084, "grad_norm": 1.390625, "learning_rate": 7.02204352864005e-05, "loss": 1.5269, "step": 1653 }, { "epoch": 0.25285686986432254, "grad_norm": 1.4375, "learning_rate": 7.021617365681905e-05, "loss": 1.3472, "step": 1654 }, { "epoch": 0.2530097458436843, "grad_norm": 1.34375, "learning_rate": 7.021191208659407e-05, "loss": 1.6016, "step": 1655 }, { "epoch": 0.25316262182304605, "grad_norm": 1.484375, "learning_rate": 7.020765057573406e-05, "loss": 1.5198, "step": 1656 }, { "epoch": 0.2533154978024078, "grad_norm": 1.3046875, "learning_rate": 7.020338912424756e-05, "loss": 1.4021, "step": 1657 }, { "epoch": 0.25346837378176956, "grad_norm": 1.5078125, "learning_rate": 7.019912773214295e-05, "loss": 1.6291, "step": 1658 }, { "epoch": 0.25362124976113126, "grad_norm": 1.3828125, "learning_rate": 7.01948663994288e-05, "loss": 1.4622, "step": 1659 }, { "epoch": 0.253774125740493, "grad_norm": 1.328125, "learning_rate": 7.019060512611358e-05, "loss": 1.607, "step": 1660 }, { "epoch": 0.2539270017198548, "grad_norm": 1.265625, "learning_rate": 7.018634391220582e-05, "loss": 1.5034, "step": 1661 }, { "epoch": 0.2540798776992165, "grad_norm": 1.2578125, "learning_rate": 7.018208275771396e-05, "loss": 1.4283, "step": 1662 }, { "epoch": 0.2542327536785782, "grad_norm": 1.4296875, "learning_rate": 7.017782166264655e-05, "loss": 1.4462, "step": 1663 }, { "epoch": 0.25438562965794, "grad_norm": 1.3515625, "learning_rate": 7.017356062701201e-05, "loss": 1.42, "step": 1664 }, { "epoch": 0.25453850563730174, "grad_norm": 1.2578125, "learning_rate": 7.016929965081889e-05, "loss": 1.351, "step": 1665 }, { "epoch": 0.2546913816166635, "grad_norm": 1.25, "learning_rate": 7.016503873407566e-05, "loss": 1.4472, "step": 1666 }, { "epoch": 0.25484425759602525, "grad_norm": 1.4375, "learning_rate": 7.016077787679078e-05, "loss": 1.3188, "step": 1667 }, { "epoch": 0.25499713357538695, "grad_norm": 1.2265625, "learning_rate": 7.015651707897283e-05, "loss": 1.3774, "step": 1668 }, { "epoch": 0.2551500095547487, "grad_norm": 1.1875, "learning_rate": 7.01522563406302e-05, "loss": 1.0604, "step": 1669 }, { "epoch": 0.25530288553411046, "grad_norm": 1.8359375, "learning_rate": 7.014799566177145e-05, "loss": 1.5571, "step": 1670 }, { "epoch": 0.2554557615134722, "grad_norm": 1.3515625, "learning_rate": 7.014373504240504e-05, "loss": 1.7846, "step": 1671 }, { "epoch": 0.2556086374928339, "grad_norm": 1.75, "learning_rate": 7.013947448253945e-05, "loss": 1.7147, "step": 1672 }, { "epoch": 0.25576151347219567, "grad_norm": 1.2890625, "learning_rate": 7.013521398218324e-05, "loss": 1.4475, "step": 1673 }, { "epoch": 0.2559143894515574, "grad_norm": 1.3671875, "learning_rate": 7.013095354134481e-05, "loss": 1.3542, "step": 1674 }, { "epoch": 0.2560672654309192, "grad_norm": 1.375, "learning_rate": 7.012669316003271e-05, "loss": 1.5669, "step": 1675 }, { "epoch": 0.25622014141028093, "grad_norm": 1.421875, "learning_rate": 7.01224328382554e-05, "loss": 1.5809, "step": 1676 }, { "epoch": 0.25637301738964263, "grad_norm": 1.2890625, "learning_rate": 7.011817257602138e-05, "loss": 1.3683, "step": 1677 }, { "epoch": 0.2565258933690044, "grad_norm": 1.3984375, "learning_rate": 7.011391237333919e-05, "loss": 1.491, "step": 1678 }, { "epoch": 0.25667876934836614, "grad_norm": 1.40625, "learning_rate": 7.010965223021721e-05, "loss": 1.5288, "step": 1679 }, { "epoch": 0.2568316453277279, "grad_norm": 1.2734375, "learning_rate": 7.010539214666403e-05, "loss": 1.1408, "step": 1680 }, { "epoch": 0.2569845213070896, "grad_norm": 1.3984375, "learning_rate": 7.010113212268807e-05, "loss": 1.5371, "step": 1681 }, { "epoch": 0.25713739728645135, "grad_norm": 1.4296875, "learning_rate": 7.009687215829788e-05, "loss": 1.8059, "step": 1682 }, { "epoch": 0.2572902732658131, "grad_norm": 1.359375, "learning_rate": 7.009261225350194e-05, "loss": 1.741, "step": 1683 }, { "epoch": 0.25744314924517486, "grad_norm": 1.46875, "learning_rate": 7.00883524083087e-05, "loss": 1.4891, "step": 1684 }, { "epoch": 0.2575960252245366, "grad_norm": 1.359375, "learning_rate": 7.008409262272666e-05, "loss": 1.6341, "step": 1685 }, { "epoch": 0.2577489012038983, "grad_norm": 1.390625, "learning_rate": 7.007983289676433e-05, "loss": 1.3046, "step": 1686 }, { "epoch": 0.2579017771832601, "grad_norm": 1.4140625, "learning_rate": 7.007557323043019e-05, "loss": 1.6938, "step": 1687 }, { "epoch": 0.2580546531626218, "grad_norm": 1.3671875, "learning_rate": 7.00713136237327e-05, "loss": 1.4352, "step": 1688 }, { "epoch": 0.2582075291419836, "grad_norm": 1.4921875, "learning_rate": 7.006705407668044e-05, "loss": 1.5407, "step": 1689 }, { "epoch": 0.2583604051213453, "grad_norm": 1.9140625, "learning_rate": 7.006279458928182e-05, "loss": 1.51, "step": 1690 }, { "epoch": 0.25851328110070704, "grad_norm": 1.46875, "learning_rate": 7.00585351615453e-05, "loss": 1.4514, "step": 1691 }, { "epoch": 0.2586661570800688, "grad_norm": 1.3125, "learning_rate": 7.005427579347949e-05, "loss": 1.2179, "step": 1692 }, { "epoch": 0.25881903305943055, "grad_norm": 1.2734375, "learning_rate": 7.005001648509272e-05, "loss": 1.3294, "step": 1693 }, { "epoch": 0.2589719090387923, "grad_norm": 1.3828125, "learning_rate": 7.004575723639362e-05, "loss": 1.4427, "step": 1694 }, { "epoch": 0.259124785018154, "grad_norm": 1.3984375, "learning_rate": 7.004149804739059e-05, "loss": 1.4011, "step": 1695 }, { "epoch": 0.25927766099751576, "grad_norm": 1.3671875, "learning_rate": 7.003723891809217e-05, "loss": 1.5464, "step": 1696 }, { "epoch": 0.2594305369768775, "grad_norm": 1.203125, "learning_rate": 7.003297984850684e-05, "loss": 1.3244, "step": 1697 }, { "epoch": 0.25958341295623927, "grad_norm": 1.2890625, "learning_rate": 7.002872083864304e-05, "loss": 1.3343, "step": 1698 }, { "epoch": 0.25973628893560097, "grad_norm": 1.34375, "learning_rate": 7.002446188850931e-05, "loss": 1.4058, "step": 1699 }, { "epoch": 0.2598891649149627, "grad_norm": 1.421875, "learning_rate": 7.00202029981141e-05, "loss": 1.4449, "step": 1700 }, { "epoch": 0.2600420408943245, "grad_norm": 1.484375, "learning_rate": 7.001594416746593e-05, "loss": 1.7897, "step": 1701 }, { "epoch": 0.26019491687368623, "grad_norm": 1.359375, "learning_rate": 7.001168539657325e-05, "loss": 1.6254, "step": 1702 }, { "epoch": 0.260347792853048, "grad_norm": 1.6875, "learning_rate": 7.000742668544465e-05, "loss": 1.3763, "step": 1703 }, { "epoch": 0.2605006688324097, "grad_norm": 1.453125, "learning_rate": 7.000316803408849e-05, "loss": 1.349, "step": 1704 }, { "epoch": 0.26065354481177144, "grad_norm": 1.515625, "learning_rate": 6.999890944251328e-05, "loss": 1.6902, "step": 1705 }, { "epoch": 0.2608064207911332, "grad_norm": 1.6015625, "learning_rate": 6.999465091072756e-05, "loss": 1.5551, "step": 1706 }, { "epoch": 0.26095929677049495, "grad_norm": 1.3359375, "learning_rate": 6.999039243873977e-05, "loss": 1.4934, "step": 1707 }, { "epoch": 0.26111217274985665, "grad_norm": 1.484375, "learning_rate": 6.998613402655846e-05, "loss": 1.6402, "step": 1708 }, { "epoch": 0.2612650487292184, "grad_norm": 1.390625, "learning_rate": 6.998187567419206e-05, "loss": 1.4356, "step": 1709 }, { "epoch": 0.26141792470858016, "grad_norm": 1.390625, "learning_rate": 6.997761738164904e-05, "loss": 1.3609, "step": 1710 }, { "epoch": 0.2615708006879419, "grad_norm": 1.3359375, "learning_rate": 6.997335914893794e-05, "loss": 1.2795, "step": 1711 }, { "epoch": 0.2617236766673037, "grad_norm": 1.2265625, "learning_rate": 6.996910097606721e-05, "loss": 1.1373, "step": 1712 }, { "epoch": 0.2618765526466654, "grad_norm": 1.953125, "learning_rate": 6.996484286304537e-05, "loss": 1.5648, "step": 1713 }, { "epoch": 0.2620294286260271, "grad_norm": 1.3125, "learning_rate": 6.996058480988086e-05, "loss": 1.4214, "step": 1714 }, { "epoch": 0.2621823046053889, "grad_norm": 1.21875, "learning_rate": 6.995632681658221e-05, "loss": 1.5432, "step": 1715 }, { "epoch": 0.26233518058475064, "grad_norm": 1.2578125, "learning_rate": 6.995206888315791e-05, "loss": 1.3059, "step": 1716 }, { "epoch": 0.26248805656411234, "grad_norm": 1.25, "learning_rate": 6.994781100961636e-05, "loss": 1.5841, "step": 1717 }, { "epoch": 0.2626409325434741, "grad_norm": 1.3046875, "learning_rate": 6.994355319596616e-05, "loss": 1.3904, "step": 1718 }, { "epoch": 0.26279380852283585, "grad_norm": 1.34375, "learning_rate": 6.993929544221572e-05, "loss": 1.4175, "step": 1719 }, { "epoch": 0.2629466845021976, "grad_norm": 1.484375, "learning_rate": 6.993503774837356e-05, "loss": 1.4299, "step": 1720 }, { "epoch": 0.26309956048155936, "grad_norm": 1.3828125, "learning_rate": 6.993078011444811e-05, "loss": 1.569, "step": 1721 }, { "epoch": 0.26325243646092106, "grad_norm": 1.2734375, "learning_rate": 6.992652254044796e-05, "loss": 1.5003, "step": 1722 }, { "epoch": 0.2634053124402828, "grad_norm": 1.2265625, "learning_rate": 6.992226502638152e-05, "loss": 0.9971, "step": 1723 }, { "epoch": 0.26355818841964457, "grad_norm": 1.328125, "learning_rate": 6.991800757225728e-05, "loss": 1.3149, "step": 1724 }, { "epoch": 0.2637110643990063, "grad_norm": 1.390625, "learning_rate": 6.991375017808372e-05, "loss": 1.4036, "step": 1725 }, { "epoch": 0.263863940378368, "grad_norm": 1.265625, "learning_rate": 6.990949284386934e-05, "loss": 1.4504, "step": 1726 }, { "epoch": 0.2640168163577298, "grad_norm": 1.3359375, "learning_rate": 6.990523556962268e-05, "loss": 1.5973, "step": 1727 }, { "epoch": 0.26416969233709153, "grad_norm": 1.453125, "learning_rate": 6.990097835535207e-05, "loss": 1.6048, "step": 1728 }, { "epoch": 0.2643225683164533, "grad_norm": 1.28125, "learning_rate": 6.989672120106616e-05, "loss": 1.4267, "step": 1729 }, { "epoch": 0.26447544429581504, "grad_norm": 1.34375, "learning_rate": 6.989246410677337e-05, "loss": 1.2753, "step": 1730 }, { "epoch": 0.26462832027517674, "grad_norm": 1.3125, "learning_rate": 6.988820707248213e-05, "loss": 1.4932, "step": 1731 }, { "epoch": 0.2647811962545385, "grad_norm": 1.296875, "learning_rate": 6.988395009820104e-05, "loss": 1.8535, "step": 1732 }, { "epoch": 0.26493407223390025, "grad_norm": 1.359375, "learning_rate": 6.987969318393843e-05, "loss": 1.7427, "step": 1733 }, { "epoch": 0.265086948213262, "grad_norm": 1.296875, "learning_rate": 6.987543632970295e-05, "loss": 1.272, "step": 1734 }, { "epoch": 0.2652398241926237, "grad_norm": 1.328125, "learning_rate": 6.987117953550296e-05, "loss": 1.4004, "step": 1735 }, { "epoch": 0.26539270017198546, "grad_norm": 1.234375, "learning_rate": 6.986692280134699e-05, "loss": 1.2024, "step": 1736 }, { "epoch": 0.2655455761513472, "grad_norm": 1.3515625, "learning_rate": 6.986266612724356e-05, "loss": 1.3795, "step": 1737 }, { "epoch": 0.265698452130709, "grad_norm": 1.390625, "learning_rate": 6.985840951320107e-05, "loss": 1.5405, "step": 1738 }, { "epoch": 0.26585132811007073, "grad_norm": 1.421875, "learning_rate": 6.985415295922807e-05, "loss": 1.5563, "step": 1739 }, { "epoch": 0.26600420408943243, "grad_norm": 1.328125, "learning_rate": 6.9849896465333e-05, "loss": 1.2893, "step": 1740 }, { "epoch": 0.2661570800687942, "grad_norm": 1.3203125, "learning_rate": 6.984564003152438e-05, "loss": 1.3379, "step": 1741 }, { "epoch": 0.26630995604815594, "grad_norm": 1.3984375, "learning_rate": 6.98413836578107e-05, "loss": 1.5672, "step": 1742 }, { "epoch": 0.2664628320275177, "grad_norm": 1.2890625, "learning_rate": 6.983712734420037e-05, "loss": 1.4546, "step": 1743 }, { "epoch": 0.2666157080068794, "grad_norm": 1.5546875, "learning_rate": 6.983287109070195e-05, "loss": 1.7046, "step": 1744 }, { "epoch": 0.26676858398624115, "grad_norm": 1.2578125, "learning_rate": 6.982861489732386e-05, "loss": 1.5094, "step": 1745 }, { "epoch": 0.2669214599656029, "grad_norm": 1.265625, "learning_rate": 6.982435876407464e-05, "loss": 1.3587, "step": 1746 }, { "epoch": 0.26707433594496466, "grad_norm": 1.3359375, "learning_rate": 6.982010269096273e-05, "loss": 1.5666, "step": 1747 }, { "epoch": 0.2672272119243264, "grad_norm": 1.3828125, "learning_rate": 6.981584667799668e-05, "loss": 1.3919, "step": 1748 }, { "epoch": 0.2673800879036881, "grad_norm": 1.3671875, "learning_rate": 6.981159072518489e-05, "loss": 1.6081, "step": 1749 }, { "epoch": 0.26753296388304987, "grad_norm": 1.3515625, "learning_rate": 6.980733483253585e-05, "loss": 1.6999, "step": 1750 }, { "epoch": 0.2676858398624116, "grad_norm": 1.421875, "learning_rate": 6.980307900005809e-05, "loss": 1.4394, "step": 1751 }, { "epoch": 0.2678387158417734, "grad_norm": 1.3359375, "learning_rate": 6.979882322776003e-05, "loss": 1.4768, "step": 1752 }, { "epoch": 0.2679915918211351, "grad_norm": 1.4453125, "learning_rate": 6.979456751565025e-05, "loss": 1.5672, "step": 1753 }, { "epoch": 0.26814446780049683, "grad_norm": 1.234375, "learning_rate": 6.979031186373713e-05, "loss": 1.2531, "step": 1754 }, { "epoch": 0.2682973437798586, "grad_norm": 1.3359375, "learning_rate": 6.978605627202919e-05, "loss": 1.3642, "step": 1755 }, { "epoch": 0.26845021975922034, "grad_norm": 1.4296875, "learning_rate": 6.97818007405349e-05, "loss": 1.5111, "step": 1756 }, { "epoch": 0.2686030957385821, "grad_norm": 1.46875, "learning_rate": 6.977754526926277e-05, "loss": 1.5885, "step": 1757 }, { "epoch": 0.2687559717179438, "grad_norm": 1.5, "learning_rate": 6.977328985822129e-05, "loss": 1.5064, "step": 1758 }, { "epoch": 0.26890884769730555, "grad_norm": 1.28125, "learning_rate": 6.976903450741886e-05, "loss": 1.4475, "step": 1759 }, { "epoch": 0.2690617236766673, "grad_norm": 1.6015625, "learning_rate": 6.976477921686404e-05, "loss": 1.5098, "step": 1760 }, { "epoch": 0.26921459965602906, "grad_norm": 1.1796875, "learning_rate": 6.976052398656525e-05, "loss": 1.0902, "step": 1761 }, { "epoch": 0.26936747563539076, "grad_norm": 1.359375, "learning_rate": 6.975626881653105e-05, "loss": 1.4813, "step": 1762 }, { "epoch": 0.2695203516147525, "grad_norm": 1.4453125, "learning_rate": 6.975201370676988e-05, "loss": 1.5305, "step": 1763 }, { "epoch": 0.2696732275941143, "grad_norm": 1.4765625, "learning_rate": 6.974775865729017e-05, "loss": 1.4861, "step": 1764 }, { "epoch": 0.26982610357347603, "grad_norm": 1.328125, "learning_rate": 6.974350366810047e-05, "loss": 1.5139, "step": 1765 }, { "epoch": 0.2699789795528378, "grad_norm": 1.3125, "learning_rate": 6.973924873920922e-05, "loss": 1.3922, "step": 1766 }, { "epoch": 0.2701318555321995, "grad_norm": 1.3671875, "learning_rate": 6.973499387062493e-05, "loss": 1.5415, "step": 1767 }, { "epoch": 0.27028473151156124, "grad_norm": 1.234375, "learning_rate": 6.973073906235603e-05, "loss": 1.4646, "step": 1768 }, { "epoch": 0.270437607490923, "grad_norm": 1.3359375, "learning_rate": 6.972648431441109e-05, "loss": 1.4586, "step": 1769 }, { "epoch": 0.27059048347028475, "grad_norm": 1.5, "learning_rate": 6.972222962679851e-05, "loss": 1.6601, "step": 1770 }, { "epoch": 0.27074335944964645, "grad_norm": 1.4921875, "learning_rate": 6.971797499952676e-05, "loss": 1.7179, "step": 1771 }, { "epoch": 0.2708962354290082, "grad_norm": 1.453125, "learning_rate": 6.971372043260442e-05, "loss": 1.6422, "step": 1772 }, { "epoch": 0.27104911140836996, "grad_norm": 1.25, "learning_rate": 6.970946592603981e-05, "loss": 1.1941, "step": 1773 }, { "epoch": 0.2712019873877317, "grad_norm": 1.40625, "learning_rate": 6.970521147984157e-05, "loss": 1.4128, "step": 1774 }, { "epoch": 0.27135486336709347, "grad_norm": 1.3203125, "learning_rate": 6.970095709401807e-05, "loss": 1.2496, "step": 1775 }, { "epoch": 0.27150773934645517, "grad_norm": 1.3828125, "learning_rate": 6.969670276857782e-05, "loss": 1.5598, "step": 1776 }, { "epoch": 0.2716606153258169, "grad_norm": 1.453125, "learning_rate": 6.969244850352936e-05, "loss": 1.7402, "step": 1777 }, { "epoch": 0.2718134913051787, "grad_norm": 1.34375, "learning_rate": 6.968819429888105e-05, "loss": 1.5741, "step": 1778 }, { "epoch": 0.27196636728454043, "grad_norm": 1.5, "learning_rate": 6.968394015464146e-05, "loss": 1.374, "step": 1779 }, { "epoch": 0.27211924326390213, "grad_norm": 1.453125, "learning_rate": 6.9679686070819e-05, "loss": 1.7662, "step": 1780 }, { "epoch": 0.2722721192432639, "grad_norm": 1.328125, "learning_rate": 6.967543204742223e-05, "loss": 1.3953, "step": 1781 }, { "epoch": 0.27242499522262564, "grad_norm": 1.421875, "learning_rate": 6.96711780844596e-05, "loss": 1.4928, "step": 1782 }, { "epoch": 0.2725778712019874, "grad_norm": 1.5, "learning_rate": 6.966692418193952e-05, "loss": 1.4222, "step": 1783 }, { "epoch": 0.27273074718134915, "grad_norm": 1.4140625, "learning_rate": 6.966267033987055e-05, "loss": 1.5163, "step": 1784 }, { "epoch": 0.27288362316071085, "grad_norm": 1.296875, "learning_rate": 6.965841655826113e-05, "loss": 1.4727, "step": 1785 }, { "epoch": 0.2730364991400726, "grad_norm": 1.5, "learning_rate": 6.965416283711975e-05, "loss": 1.4396, "step": 1786 }, { "epoch": 0.27318937511943436, "grad_norm": 1.3203125, "learning_rate": 6.964990917645486e-05, "loss": 1.6777, "step": 1787 }, { "epoch": 0.2733422510987961, "grad_norm": 1.2578125, "learning_rate": 6.9645655576275e-05, "loss": 1.5295, "step": 1788 }, { "epoch": 0.2734951270781578, "grad_norm": 1.2421875, "learning_rate": 6.96414020365886e-05, "loss": 1.5176, "step": 1789 }, { "epoch": 0.2736480030575196, "grad_norm": 1.3125, "learning_rate": 6.963714855740411e-05, "loss": 1.3174, "step": 1790 }, { "epoch": 0.27380087903688133, "grad_norm": 1.4296875, "learning_rate": 6.963289513873005e-05, "loss": 1.5044, "step": 1791 }, { "epoch": 0.2739537550162431, "grad_norm": 1.4609375, "learning_rate": 6.962864178057488e-05, "loss": 1.6241, "step": 1792 }, { "epoch": 0.27410663099560484, "grad_norm": 1.4375, "learning_rate": 6.962438848294714e-05, "loss": 1.449, "step": 1793 }, { "epoch": 0.27425950697496654, "grad_norm": 1.3125, "learning_rate": 6.96201352458552e-05, "loss": 1.3616, "step": 1794 }, { "epoch": 0.2744123829543283, "grad_norm": 1.4140625, "learning_rate": 6.961588206930759e-05, "loss": 1.3798, "step": 1795 }, { "epoch": 0.27456525893369005, "grad_norm": 1.421875, "learning_rate": 6.961162895331279e-05, "loss": 1.4199, "step": 1796 }, { "epoch": 0.2747181349130518, "grad_norm": 1.375, "learning_rate": 6.960737589787926e-05, "loss": 1.6507, "step": 1797 }, { "epoch": 0.2748710108924135, "grad_norm": 1.328125, "learning_rate": 6.960312290301551e-05, "loss": 1.3076, "step": 1798 }, { "epoch": 0.27502388687177526, "grad_norm": 1.6953125, "learning_rate": 6.959886996872997e-05, "loss": 1.7338, "step": 1799 }, { "epoch": 0.275176762851137, "grad_norm": 1.3984375, "learning_rate": 6.959461709503113e-05, "loss": 1.602, "step": 1800 }, { "epoch": 0.27532963883049877, "grad_norm": 1.3203125, "learning_rate": 6.959036428192746e-05, "loss": 1.4047, "step": 1801 }, { "epoch": 0.2754825148098605, "grad_norm": 1.265625, "learning_rate": 6.958611152942748e-05, "loss": 1.2124, "step": 1802 }, { "epoch": 0.2756353907892222, "grad_norm": 1.1484375, "learning_rate": 6.958185883753964e-05, "loss": 1.4107, "step": 1803 }, { "epoch": 0.275788266768584, "grad_norm": 1.53125, "learning_rate": 6.957760620627238e-05, "loss": 1.5097, "step": 1804 }, { "epoch": 0.27594114274794573, "grad_norm": 1.421875, "learning_rate": 6.957335363563421e-05, "loss": 1.5217, "step": 1805 }, { "epoch": 0.2760940187273075, "grad_norm": 1.3359375, "learning_rate": 6.956910112563358e-05, "loss": 1.4553, "step": 1806 }, { "epoch": 0.2762468947066692, "grad_norm": 1.3828125, "learning_rate": 6.9564848676279e-05, "loss": 1.4685, "step": 1807 }, { "epoch": 0.27639977068603094, "grad_norm": 1.390625, "learning_rate": 6.956059628757896e-05, "loss": 1.4898, "step": 1808 }, { "epoch": 0.2765526466653927, "grad_norm": 1.3828125, "learning_rate": 6.955634395954185e-05, "loss": 1.6149, "step": 1809 }, { "epoch": 0.27670552264475445, "grad_norm": 1.390625, "learning_rate": 6.955209169217622e-05, "loss": 1.6707, "step": 1810 }, { "epoch": 0.2768583986241162, "grad_norm": 1.359375, "learning_rate": 6.954783948549051e-05, "loss": 1.5015, "step": 1811 }, { "epoch": 0.2770112746034779, "grad_norm": 1.328125, "learning_rate": 6.954358733949324e-05, "loss": 1.4061, "step": 1812 }, { "epoch": 0.27716415058283966, "grad_norm": 1.3203125, "learning_rate": 6.953933525419279e-05, "loss": 1.5442, "step": 1813 }, { "epoch": 0.2773170265622014, "grad_norm": 1.3515625, "learning_rate": 6.953508322959774e-05, "loss": 1.3915, "step": 1814 }, { "epoch": 0.2774699025415632, "grad_norm": 1.28125, "learning_rate": 6.95308312657165e-05, "loss": 1.2326, "step": 1815 }, { "epoch": 0.2776227785209249, "grad_norm": 1.5, "learning_rate": 6.952657936255755e-05, "loss": 1.4904, "step": 1816 }, { "epoch": 0.27777565450028663, "grad_norm": 1.2578125, "learning_rate": 6.952232752012941e-05, "loss": 1.5187, "step": 1817 }, { "epoch": 0.2779285304796484, "grad_norm": 1.359375, "learning_rate": 6.951807573844047e-05, "loss": 1.5065, "step": 1818 }, { "epoch": 0.27808140645901014, "grad_norm": 1.3984375, "learning_rate": 6.951382401749929e-05, "loss": 1.7077, "step": 1819 }, { "epoch": 0.2782342824383719, "grad_norm": 1.4921875, "learning_rate": 6.950957235731428e-05, "loss": 1.5363, "step": 1820 }, { "epoch": 0.2783871584177336, "grad_norm": 1.3359375, "learning_rate": 6.950532075789395e-05, "loss": 1.5239, "step": 1821 }, { "epoch": 0.27854003439709535, "grad_norm": 1.2265625, "learning_rate": 6.95010692192468e-05, "loss": 1.4331, "step": 1822 }, { "epoch": 0.2786929103764571, "grad_norm": 1.453125, "learning_rate": 6.949681774138121e-05, "loss": 1.4164, "step": 1823 }, { "epoch": 0.27884578635581886, "grad_norm": 1.359375, "learning_rate": 6.949256632430572e-05, "loss": 1.6392, "step": 1824 }, { "epoch": 0.27899866233518056, "grad_norm": 1.421875, "learning_rate": 6.948831496802879e-05, "loss": 1.7208, "step": 1825 }, { "epoch": 0.2791515383145423, "grad_norm": 1.3984375, "learning_rate": 6.948406367255888e-05, "loss": 1.3206, "step": 1826 }, { "epoch": 0.27930441429390407, "grad_norm": 1.3671875, "learning_rate": 6.947981243790448e-05, "loss": 1.5092, "step": 1827 }, { "epoch": 0.2794572902732658, "grad_norm": 1.375, "learning_rate": 6.94755612640741e-05, "loss": 1.5382, "step": 1828 }, { "epoch": 0.2796101662526276, "grad_norm": 1.4921875, "learning_rate": 6.947131015107613e-05, "loss": 1.5601, "step": 1829 }, { "epoch": 0.2797630422319893, "grad_norm": 1.5078125, "learning_rate": 6.946705909891907e-05, "loss": 1.517, "step": 1830 }, { "epoch": 0.27991591821135103, "grad_norm": 1.40625, "learning_rate": 6.946280810761143e-05, "loss": 1.5299, "step": 1831 }, { "epoch": 0.2800687941907128, "grad_norm": 1.359375, "learning_rate": 6.945855717716163e-05, "loss": 1.2375, "step": 1832 }, { "epoch": 0.28022167017007454, "grad_norm": 1.390625, "learning_rate": 6.945430630757821e-05, "loss": 1.5014, "step": 1833 }, { "epoch": 0.28037454614943624, "grad_norm": 1.4375, "learning_rate": 6.945005549886957e-05, "loss": 1.553, "step": 1834 }, { "epoch": 0.280527422128798, "grad_norm": 1.2890625, "learning_rate": 6.94458047510442e-05, "loss": 1.217, "step": 1835 }, { "epoch": 0.28068029810815975, "grad_norm": 1.40625, "learning_rate": 6.944155406411059e-05, "loss": 1.3369, "step": 1836 }, { "epoch": 0.2808331740875215, "grad_norm": 1.234375, "learning_rate": 6.943730343807719e-05, "loss": 1.5227, "step": 1837 }, { "epoch": 0.28098605006688326, "grad_norm": 1.328125, "learning_rate": 6.943305287295251e-05, "loss": 1.3123, "step": 1838 }, { "epoch": 0.28113892604624496, "grad_norm": 1.296875, "learning_rate": 6.942880236874499e-05, "loss": 1.2505, "step": 1839 }, { "epoch": 0.2812918020256067, "grad_norm": 1.3515625, "learning_rate": 6.942455192546309e-05, "loss": 1.3804, "step": 1840 }, { "epoch": 0.2814446780049685, "grad_norm": 1.5, "learning_rate": 6.942030154311532e-05, "loss": 1.4128, "step": 1841 }, { "epoch": 0.28159755398433023, "grad_norm": 1.3515625, "learning_rate": 6.941605122171007e-05, "loss": 1.2787, "step": 1842 }, { "epoch": 0.28175042996369193, "grad_norm": 1.390625, "learning_rate": 6.941180096125594e-05, "loss": 1.1692, "step": 1843 }, { "epoch": 0.2819033059430537, "grad_norm": 1.3125, "learning_rate": 6.940755076176129e-05, "loss": 1.4377, "step": 1844 }, { "epoch": 0.28205618192241544, "grad_norm": 1.65625, "learning_rate": 6.940330062323463e-05, "loss": 1.6879, "step": 1845 }, { "epoch": 0.2822090579017772, "grad_norm": 1.34375, "learning_rate": 6.939905054568441e-05, "loss": 1.545, "step": 1846 }, { "epoch": 0.28236193388113895, "grad_norm": 1.3515625, "learning_rate": 6.939480052911914e-05, "loss": 1.6049, "step": 1847 }, { "epoch": 0.28251480986050065, "grad_norm": 1.3203125, "learning_rate": 6.93905505735473e-05, "loss": 1.6769, "step": 1848 }, { "epoch": 0.2826676858398624, "grad_norm": 1.2265625, "learning_rate": 6.938630067897727e-05, "loss": 1.3372, "step": 1849 }, { "epoch": 0.28282056181922416, "grad_norm": 1.4453125, "learning_rate": 6.93820508454176e-05, "loss": 1.4187, "step": 1850 }, { "epoch": 0.2829734377985859, "grad_norm": 1.3828125, "learning_rate": 6.937780107287673e-05, "loss": 1.4725, "step": 1851 }, { "epoch": 0.2831263137779476, "grad_norm": 1.3671875, "learning_rate": 6.937355136136317e-05, "loss": 1.6687, "step": 1852 }, { "epoch": 0.28327918975730937, "grad_norm": 1.53125, "learning_rate": 6.93693017108853e-05, "loss": 1.5832, "step": 1853 }, { "epoch": 0.2834320657366711, "grad_norm": 1.359375, "learning_rate": 6.93650521214517e-05, "loss": 1.1951, "step": 1854 }, { "epoch": 0.2835849417160329, "grad_norm": 1.2578125, "learning_rate": 6.936080259307077e-05, "loss": 1.5566, "step": 1855 }, { "epoch": 0.28373781769539463, "grad_norm": 1.234375, "learning_rate": 6.935655312575096e-05, "loss": 1.2404, "step": 1856 }, { "epoch": 0.28389069367475633, "grad_norm": 1.34375, "learning_rate": 6.935230371950083e-05, "loss": 1.4269, "step": 1857 }, { "epoch": 0.2840435696541181, "grad_norm": 1.3828125, "learning_rate": 6.93480543743287e-05, "loss": 1.3868, "step": 1858 }, { "epoch": 0.28419644563347984, "grad_norm": 1.40625, "learning_rate": 6.934380509024321e-05, "loss": 1.5624, "step": 1859 }, { "epoch": 0.2843493216128416, "grad_norm": 1.34375, "learning_rate": 6.933955586725272e-05, "loss": 1.3422, "step": 1860 }, { "epoch": 0.2845021975922033, "grad_norm": 1.390625, "learning_rate": 6.933530670536572e-05, "loss": 1.4214, "step": 1861 }, { "epoch": 0.28465507357156505, "grad_norm": 1.328125, "learning_rate": 6.933105760459071e-05, "loss": 1.5519, "step": 1862 }, { "epoch": 0.2848079495509268, "grad_norm": 1.203125, "learning_rate": 6.93268085649361e-05, "loss": 1.3551, "step": 1863 }, { "epoch": 0.28496082553028856, "grad_norm": 1.171875, "learning_rate": 6.932255958641041e-05, "loss": 1.1047, "step": 1864 }, { "epoch": 0.2851137015096503, "grad_norm": 1.453125, "learning_rate": 6.931831066902206e-05, "loss": 1.4785, "step": 1865 }, { "epoch": 0.285266577489012, "grad_norm": 1.1953125, "learning_rate": 6.931406181277958e-05, "loss": 1.3015, "step": 1866 }, { "epoch": 0.2854194534683738, "grad_norm": 1.4921875, "learning_rate": 6.93098130176914e-05, "loss": 1.83, "step": 1867 }, { "epoch": 0.28557232944773553, "grad_norm": 1.265625, "learning_rate": 6.930556428376597e-05, "loss": 1.3438, "step": 1868 }, { "epoch": 0.2857252054270973, "grad_norm": 1.2890625, "learning_rate": 6.930131561101179e-05, "loss": 1.5122, "step": 1869 }, { "epoch": 0.285878081406459, "grad_norm": 1.3046875, "learning_rate": 6.929706699943729e-05, "loss": 1.3857, "step": 1870 }, { "epoch": 0.28603095738582074, "grad_norm": 1.4140625, "learning_rate": 6.929281844905099e-05, "loss": 1.5384, "step": 1871 }, { "epoch": 0.2861838333651825, "grad_norm": 1.2578125, "learning_rate": 6.928856995986131e-05, "loss": 1.4534, "step": 1872 }, { "epoch": 0.28633670934454425, "grad_norm": 1.3125, "learning_rate": 6.928432153187676e-05, "loss": 1.4655, "step": 1873 }, { "epoch": 0.286489585323906, "grad_norm": 1.3515625, "learning_rate": 6.928007316510578e-05, "loss": 1.4358, "step": 1874 }, { "epoch": 0.2866424613032677, "grad_norm": 1.4609375, "learning_rate": 6.927582485955681e-05, "loss": 1.6396, "step": 1875 }, { "epoch": 0.28679533728262946, "grad_norm": 1.4453125, "learning_rate": 6.927157661523838e-05, "loss": 1.441, "step": 1876 }, { "epoch": 0.2869482132619912, "grad_norm": 1.4765625, "learning_rate": 6.926732843215888e-05, "loss": 1.5474, "step": 1877 }, { "epoch": 0.28710108924135297, "grad_norm": 1.3515625, "learning_rate": 6.926308031032687e-05, "loss": 1.5888, "step": 1878 }, { "epoch": 0.28725396522071467, "grad_norm": 1.2421875, "learning_rate": 6.925883224975074e-05, "loss": 1.2906, "step": 1879 }, { "epoch": 0.2874068412000764, "grad_norm": 1.265625, "learning_rate": 6.925458425043897e-05, "loss": 1.1278, "step": 1880 }, { "epoch": 0.2875597171794382, "grad_norm": 1.296875, "learning_rate": 6.925033631240005e-05, "loss": 1.4826, "step": 1881 }, { "epoch": 0.28771259315879993, "grad_norm": 1.546875, "learning_rate": 6.924608843564242e-05, "loss": 1.5962, "step": 1882 }, { "epoch": 0.2878654691381617, "grad_norm": 1.3984375, "learning_rate": 6.924184062017461e-05, "loss": 1.417, "step": 1883 }, { "epoch": 0.2880183451175234, "grad_norm": 1.3125, "learning_rate": 6.923759286600495e-05, "loss": 1.3124, "step": 1884 }, { "epoch": 0.28817122109688514, "grad_norm": 1.3203125, "learning_rate": 6.923334517314204e-05, "loss": 1.3578, "step": 1885 }, { "epoch": 0.2883240970762469, "grad_norm": 1.421875, "learning_rate": 6.922909754159428e-05, "loss": 1.8285, "step": 1886 }, { "epoch": 0.28847697305560865, "grad_norm": 1.4140625, "learning_rate": 6.922484997137016e-05, "loss": 1.4773, "step": 1887 }, { "epoch": 0.28862984903497035, "grad_norm": 1.328125, "learning_rate": 6.922060246247814e-05, "loss": 1.5032, "step": 1888 }, { "epoch": 0.2887827250143321, "grad_norm": 1.40625, "learning_rate": 6.921635501492665e-05, "loss": 1.537, "step": 1889 }, { "epoch": 0.28893560099369386, "grad_norm": 1.2421875, "learning_rate": 6.921210762872421e-05, "loss": 1.5239, "step": 1890 }, { "epoch": 0.2890884769730556, "grad_norm": 1.3046875, "learning_rate": 6.920786030387922e-05, "loss": 1.5401, "step": 1891 }, { "epoch": 0.2892413529524174, "grad_norm": 1.3671875, "learning_rate": 6.920361304040022e-05, "loss": 1.3044, "step": 1892 }, { "epoch": 0.2893942289317791, "grad_norm": 1.4453125, "learning_rate": 6.919936583829561e-05, "loss": 1.5118, "step": 1893 }, { "epoch": 0.28954710491114083, "grad_norm": 1.296875, "learning_rate": 6.919511869757392e-05, "loss": 1.3464, "step": 1894 }, { "epoch": 0.2896999808905026, "grad_norm": 1.265625, "learning_rate": 6.919087161824356e-05, "loss": 1.343, "step": 1895 }, { "epoch": 0.28985285686986434, "grad_norm": 1.28125, "learning_rate": 6.9186624600313e-05, "loss": 1.3114, "step": 1896 }, { "epoch": 0.29000573284922604, "grad_norm": 1.6171875, "learning_rate": 6.918237764379074e-05, "loss": 1.4048, "step": 1897 }, { "epoch": 0.2901586088285878, "grad_norm": 1.3125, "learning_rate": 6.917813074868516e-05, "loss": 1.3725, "step": 1898 }, { "epoch": 0.29031148480794955, "grad_norm": 1.328125, "learning_rate": 6.917388391500484e-05, "loss": 1.644, "step": 1899 }, { "epoch": 0.2904643607873113, "grad_norm": 1.34375, "learning_rate": 6.916963714275816e-05, "loss": 1.4748, "step": 1900 }, { "epoch": 0.29061723676667306, "grad_norm": 1.28125, "learning_rate": 6.91653904319536e-05, "loss": 1.2348, "step": 1901 }, { "epoch": 0.29077011274603476, "grad_norm": 1.3828125, "learning_rate": 6.916114378259967e-05, "loss": 1.4212, "step": 1902 }, { "epoch": 0.2909229887253965, "grad_norm": 1.1953125, "learning_rate": 6.915689719470473e-05, "loss": 1.2909, "step": 1903 }, { "epoch": 0.29107586470475827, "grad_norm": 1.5859375, "learning_rate": 6.915265066827737e-05, "loss": 1.5083, "step": 1904 }, { "epoch": 0.29122874068412, "grad_norm": 1.625, "learning_rate": 6.914840420332596e-05, "loss": 1.6984, "step": 1905 }, { "epoch": 0.2913816166634817, "grad_norm": 1.40625, "learning_rate": 6.9144157799859e-05, "loss": 1.4423, "step": 1906 }, { "epoch": 0.2915344926428435, "grad_norm": 1.3515625, "learning_rate": 6.913991145788496e-05, "loss": 1.8085, "step": 1907 }, { "epoch": 0.29168736862220523, "grad_norm": 1.453125, "learning_rate": 6.913566517741226e-05, "loss": 1.529, "step": 1908 }, { "epoch": 0.291840244601567, "grad_norm": 1.2734375, "learning_rate": 6.913141895844941e-05, "loss": 1.3419, "step": 1909 }, { "epoch": 0.29199312058092874, "grad_norm": 1.421875, "learning_rate": 6.912717280100485e-05, "loss": 1.6775, "step": 1910 }, { "epoch": 0.29214599656029044, "grad_norm": 1.3203125, "learning_rate": 6.912292670508704e-05, "loss": 1.353, "step": 1911 }, { "epoch": 0.2922988725396522, "grad_norm": 1.3984375, "learning_rate": 6.911868067070444e-05, "loss": 1.5013, "step": 1912 }, { "epoch": 0.29245174851901395, "grad_norm": 1.2578125, "learning_rate": 6.911443469786556e-05, "loss": 1.5137, "step": 1913 }, { "epoch": 0.2926046244983757, "grad_norm": 1.234375, "learning_rate": 6.91101887865788e-05, "loss": 1.5313, "step": 1914 }, { "epoch": 0.2927575004777374, "grad_norm": 1.4296875, "learning_rate": 6.910594293685262e-05, "loss": 1.5245, "step": 1915 }, { "epoch": 0.29291037645709916, "grad_norm": 1.3125, "learning_rate": 6.910169714869553e-05, "loss": 1.4753, "step": 1916 }, { "epoch": 0.2930632524364609, "grad_norm": 1.40625, "learning_rate": 6.909745142211595e-05, "loss": 1.6131, "step": 1917 }, { "epoch": 0.2932161284158227, "grad_norm": 1.234375, "learning_rate": 6.90932057571224e-05, "loss": 1.317, "step": 1918 }, { "epoch": 0.29336900439518443, "grad_norm": 1.34375, "learning_rate": 6.908896015372324e-05, "loss": 1.4441, "step": 1919 }, { "epoch": 0.29352188037454613, "grad_norm": 1.328125, "learning_rate": 6.908471461192702e-05, "loss": 1.5165, "step": 1920 }, { "epoch": 0.2936747563539079, "grad_norm": 1.34375, "learning_rate": 6.908046913174217e-05, "loss": 1.44, "step": 1921 }, { "epoch": 0.29382763233326964, "grad_norm": 1.3828125, "learning_rate": 6.907622371317715e-05, "loss": 1.5219, "step": 1922 }, { "epoch": 0.2939805083126314, "grad_norm": 1.375, "learning_rate": 6.907197835624044e-05, "loss": 1.7112, "step": 1923 }, { "epoch": 0.2941333842919931, "grad_norm": 1.2578125, "learning_rate": 6.906773306094045e-05, "loss": 1.468, "step": 1924 }, { "epoch": 0.29428626027135485, "grad_norm": 1.546875, "learning_rate": 6.90634878272857e-05, "loss": 1.4871, "step": 1925 }, { "epoch": 0.2944391362507166, "grad_norm": 1.21875, "learning_rate": 6.905924265528461e-05, "loss": 1.3379, "step": 1926 }, { "epoch": 0.29459201223007836, "grad_norm": 1.203125, "learning_rate": 6.905499754494566e-05, "loss": 1.3464, "step": 1927 }, { "epoch": 0.2947448882094401, "grad_norm": 1.375, "learning_rate": 6.905075249627732e-05, "loss": 1.4201, "step": 1928 }, { "epoch": 0.2948977641888018, "grad_norm": 1.4296875, "learning_rate": 6.9046507509288e-05, "loss": 1.5626, "step": 1929 }, { "epoch": 0.29505064016816357, "grad_norm": 1.515625, "learning_rate": 6.904226258398623e-05, "loss": 1.7508, "step": 1930 }, { "epoch": 0.2952035161475253, "grad_norm": 1.2421875, "learning_rate": 6.90380177203804e-05, "loss": 1.3606, "step": 1931 }, { "epoch": 0.2953563921268871, "grad_norm": 1.34375, "learning_rate": 6.903377291847902e-05, "loss": 1.5511, "step": 1932 }, { "epoch": 0.2955092681062488, "grad_norm": 1.3984375, "learning_rate": 6.902952817829055e-05, "loss": 1.5738, "step": 1933 }, { "epoch": 0.29566214408561053, "grad_norm": 1.5078125, "learning_rate": 6.902528349982343e-05, "loss": 1.6081, "step": 1934 }, { "epoch": 0.2958150200649723, "grad_norm": 1.2890625, "learning_rate": 6.902103888308611e-05, "loss": 1.1895, "step": 1935 }, { "epoch": 0.29596789604433404, "grad_norm": 1.2734375, "learning_rate": 6.901679432808703e-05, "loss": 1.2966, "step": 1936 }, { "epoch": 0.2961207720236958, "grad_norm": 1.390625, "learning_rate": 6.901254983483474e-05, "loss": 1.3764, "step": 1937 }, { "epoch": 0.2962736480030575, "grad_norm": 1.265625, "learning_rate": 6.900830540333758e-05, "loss": 1.3548, "step": 1938 }, { "epoch": 0.29642652398241925, "grad_norm": 1.203125, "learning_rate": 6.900406103360413e-05, "loss": 1.0266, "step": 1939 }, { "epoch": 0.296579399961781, "grad_norm": 1.3125, "learning_rate": 6.899981672564276e-05, "loss": 1.4991, "step": 1940 }, { "epoch": 0.29673227594114276, "grad_norm": 1.3828125, "learning_rate": 6.899557247946192e-05, "loss": 1.3737, "step": 1941 }, { "epoch": 0.29688515192050446, "grad_norm": 1.375, "learning_rate": 6.899132829507016e-05, "loss": 1.5473, "step": 1942 }, { "epoch": 0.2970380278998662, "grad_norm": 1.421875, "learning_rate": 6.898708417247583e-05, "loss": 1.7097, "step": 1943 }, { "epoch": 0.297190903879228, "grad_norm": 1.2890625, "learning_rate": 6.898284011168749e-05, "loss": 1.4457, "step": 1944 }, { "epoch": 0.29734377985858973, "grad_norm": 1.609375, "learning_rate": 6.89785961127135e-05, "loss": 1.5083, "step": 1945 }, { "epoch": 0.2974966558379515, "grad_norm": 1.296875, "learning_rate": 6.89743521755624e-05, "loss": 1.522, "step": 1946 }, { "epoch": 0.2976495318173132, "grad_norm": 1.359375, "learning_rate": 6.897010830024262e-05, "loss": 1.403, "step": 1947 }, { "epoch": 0.29780240779667494, "grad_norm": 1.3359375, "learning_rate": 6.896586448676257e-05, "loss": 1.5984, "step": 1948 }, { "epoch": 0.2979552837760367, "grad_norm": 1.40625, "learning_rate": 6.89616207351308e-05, "loss": 1.6304, "step": 1949 }, { "epoch": 0.29810815975539845, "grad_norm": 1.2734375, "learning_rate": 6.895737704535566e-05, "loss": 1.4131, "step": 1950 }, { "epoch": 0.29826103573476015, "grad_norm": 1.375, "learning_rate": 6.895313341744569e-05, "loss": 1.315, "step": 1951 }, { "epoch": 0.2984139117141219, "grad_norm": 1.3515625, "learning_rate": 6.89488898514093e-05, "loss": 1.2759, "step": 1952 }, { "epoch": 0.29856678769348366, "grad_norm": 1.4765625, "learning_rate": 6.894464634725502e-05, "loss": 1.621, "step": 1953 }, { "epoch": 0.2987196636728454, "grad_norm": 1.4140625, "learning_rate": 6.894040290499124e-05, "loss": 1.5853, "step": 1954 }, { "epoch": 0.29887253965220717, "grad_norm": 1.53125, "learning_rate": 6.89361595246264e-05, "loss": 1.6722, "step": 1955 }, { "epoch": 0.29902541563156887, "grad_norm": 1.5546875, "learning_rate": 6.8931916206169e-05, "loss": 1.3582, "step": 1956 }, { "epoch": 0.2991782916109306, "grad_norm": 1.484375, "learning_rate": 6.892767294962747e-05, "loss": 1.6685, "step": 1957 }, { "epoch": 0.2993311675902924, "grad_norm": 1.3203125, "learning_rate": 6.892342975501034e-05, "loss": 1.312, "step": 1958 }, { "epoch": 0.29948404356965413, "grad_norm": 1.390625, "learning_rate": 6.891918662232597e-05, "loss": 1.59, "step": 1959 }, { "epoch": 0.29963691954901583, "grad_norm": 1.34375, "learning_rate": 6.891494355158284e-05, "loss": 1.6266, "step": 1960 }, { "epoch": 0.2997897955283776, "grad_norm": 1.265625, "learning_rate": 6.891070054278943e-05, "loss": 1.1521, "step": 1961 }, { "epoch": 0.29994267150773934, "grad_norm": 1.453125, "learning_rate": 6.890645759595416e-05, "loss": 1.596, "step": 1962 }, { "epoch": 0.3000955474871011, "grad_norm": 1.296875, "learning_rate": 6.890221471108558e-05, "loss": 1.273, "step": 1963 }, { "epoch": 0.30024842346646285, "grad_norm": 1.4921875, "learning_rate": 6.8897971888192e-05, "loss": 1.3668, "step": 1964 }, { "epoch": 0.30040129944582455, "grad_norm": 1.3046875, "learning_rate": 6.889372912728201e-05, "loss": 1.2746, "step": 1965 }, { "epoch": 0.3005541754251863, "grad_norm": 1.53125, "learning_rate": 6.8889486428364e-05, "loss": 1.5199, "step": 1966 }, { "epoch": 0.30070705140454806, "grad_norm": 1.2421875, "learning_rate": 6.888524379144638e-05, "loss": 1.2887, "step": 1967 }, { "epoch": 0.3008599273839098, "grad_norm": 1.3515625, "learning_rate": 6.888100121653773e-05, "loss": 1.2471, "step": 1968 }, { "epoch": 0.3010128033632715, "grad_norm": 1.2890625, "learning_rate": 6.887675870364639e-05, "loss": 1.6896, "step": 1969 }, { "epoch": 0.3011656793426333, "grad_norm": 1.3203125, "learning_rate": 6.887251625278085e-05, "loss": 1.3981, "step": 1970 }, { "epoch": 0.30131855532199503, "grad_norm": 1.34375, "learning_rate": 6.886827386394959e-05, "loss": 1.4513, "step": 1971 }, { "epoch": 0.3014714313013568, "grad_norm": 1.390625, "learning_rate": 6.886403153716105e-05, "loss": 1.5055, "step": 1972 }, { "epoch": 0.30162430728071854, "grad_norm": 1.3515625, "learning_rate": 6.885978927242372e-05, "loss": 1.5559, "step": 1973 }, { "epoch": 0.30177718326008024, "grad_norm": 1.484375, "learning_rate": 6.885554706974597e-05, "loss": 1.578, "step": 1974 }, { "epoch": 0.301930059239442, "grad_norm": 1.34375, "learning_rate": 6.885130492913631e-05, "loss": 1.4094, "step": 1975 }, { "epoch": 0.30208293521880375, "grad_norm": 1.3125, "learning_rate": 6.884706285060317e-05, "loss": 1.3288, "step": 1976 }, { "epoch": 0.3022358111981655, "grad_norm": 1.3203125, "learning_rate": 6.884282083415507e-05, "loss": 1.4615, "step": 1977 }, { "epoch": 0.3023886871775272, "grad_norm": 1.4375, "learning_rate": 6.883857887980034e-05, "loss": 1.6963, "step": 1978 }, { "epoch": 0.30254156315688896, "grad_norm": 1.5078125, "learning_rate": 6.883433698754758e-05, "loss": 1.5243, "step": 1979 }, { "epoch": 0.3026944391362507, "grad_norm": 1.390625, "learning_rate": 6.883009515740513e-05, "loss": 1.5793, "step": 1980 }, { "epoch": 0.30284731511561247, "grad_norm": 1.4609375, "learning_rate": 6.882585338938148e-05, "loss": 1.6399, "step": 1981 }, { "epoch": 0.3030001910949742, "grad_norm": 1.359375, "learning_rate": 6.882161168348514e-05, "loss": 1.3726, "step": 1982 }, { "epoch": 0.3031530670743359, "grad_norm": 1.4375, "learning_rate": 6.881737003972442e-05, "loss": 1.8675, "step": 1983 }, { "epoch": 0.3033059430536977, "grad_norm": 1.671875, "learning_rate": 6.881312845810794e-05, "loss": 1.5201, "step": 1984 }, { "epoch": 0.30345881903305943, "grad_norm": 1.4921875, "learning_rate": 6.880888693864405e-05, "loss": 1.6278, "step": 1985 }, { "epoch": 0.3036116950124212, "grad_norm": 1.515625, "learning_rate": 6.880464548134124e-05, "loss": 1.8604, "step": 1986 }, { "epoch": 0.3037645709917829, "grad_norm": 1.40625, "learning_rate": 6.880040408620798e-05, "loss": 1.3837, "step": 1987 }, { "epoch": 0.30391744697114464, "grad_norm": 1.4140625, "learning_rate": 6.879616275325264e-05, "loss": 1.2533, "step": 1988 }, { "epoch": 0.3040703229505064, "grad_norm": 1.34375, "learning_rate": 6.879192148248377e-05, "loss": 1.4059, "step": 1989 }, { "epoch": 0.30422319892986815, "grad_norm": 1.3984375, "learning_rate": 6.878768027390976e-05, "loss": 1.4609, "step": 1990 }, { "epoch": 0.3043760749092299, "grad_norm": 1.296875, "learning_rate": 6.87834391275391e-05, "loss": 1.3679, "step": 1991 }, { "epoch": 0.3045289508885916, "grad_norm": 1.3984375, "learning_rate": 6.877919804338024e-05, "loss": 1.4371, "step": 1992 }, { "epoch": 0.30468182686795336, "grad_norm": 1.3046875, "learning_rate": 6.877495702144158e-05, "loss": 1.3835, "step": 1993 }, { "epoch": 0.3048347028473151, "grad_norm": 1.4375, "learning_rate": 6.877071606173162e-05, "loss": 1.3819, "step": 1994 }, { "epoch": 0.3049875788266769, "grad_norm": 1.3671875, "learning_rate": 6.876647516425881e-05, "loss": 1.42, "step": 1995 }, { "epoch": 0.3051404548060386, "grad_norm": 1.46875, "learning_rate": 6.876223432903159e-05, "loss": 1.2865, "step": 1996 }, { "epoch": 0.30529333078540033, "grad_norm": 1.453125, "learning_rate": 6.87579935560584e-05, "loss": 1.3109, "step": 1997 }, { "epoch": 0.3054462067647621, "grad_norm": 1.3984375, "learning_rate": 6.875375284534775e-05, "loss": 1.5427, "step": 1998 }, { "epoch": 0.30559908274412384, "grad_norm": 1.203125, "learning_rate": 6.874951219690803e-05, "loss": 1.2659, "step": 1999 }, { "epoch": 0.3057519587234856, "grad_norm": 1.3203125, "learning_rate": 6.874527161074768e-05, "loss": 1.3346, "step": 2000 }, { "epoch": 0.3057519587234856, "eval_loss": 1.4422107934951782, "eval_model_preparation_time": 0.0034, "eval_runtime": 111.752, "eval_samples_per_second": 89.484, "eval_steps_per_second": 2.801, "step": 2000 }, { "epoch": 0.3059048347028473, "grad_norm": 1.3515625, "learning_rate": 6.87410310868752e-05, "loss": 1.3909, "step": 2001 }, { "epoch": 0.30605771068220905, "grad_norm": 1.4140625, "learning_rate": 6.873679062529901e-05, "loss": 1.5792, "step": 2002 }, { "epoch": 0.3062105866615708, "grad_norm": 1.3515625, "learning_rate": 6.873255022602761e-05, "loss": 1.2816, "step": 2003 }, { "epoch": 0.30636346264093256, "grad_norm": 1.3125, "learning_rate": 6.872830988906937e-05, "loss": 1.4062, "step": 2004 }, { "epoch": 0.30651633862029426, "grad_norm": 1.3984375, "learning_rate": 6.872406961443282e-05, "loss": 1.4407, "step": 2005 }, { "epoch": 0.306669214599656, "grad_norm": 1.1875, "learning_rate": 6.871982940212636e-05, "loss": 1.2595, "step": 2006 }, { "epoch": 0.30682209057901777, "grad_norm": 1.4140625, "learning_rate": 6.871558925215843e-05, "loss": 1.3819, "step": 2007 }, { "epoch": 0.3069749665583795, "grad_norm": 1.390625, "learning_rate": 6.871134916453755e-05, "loss": 1.5579, "step": 2008 }, { "epoch": 0.3071278425377413, "grad_norm": 1.328125, "learning_rate": 6.870710913927209e-05, "loss": 1.3987, "step": 2009 }, { "epoch": 0.307280718517103, "grad_norm": 1.4609375, "learning_rate": 6.870286917637055e-05, "loss": 1.6541, "step": 2010 }, { "epoch": 0.30743359449646473, "grad_norm": 1.3125, "learning_rate": 6.869862927584134e-05, "loss": 1.4139, "step": 2011 }, { "epoch": 0.3075864704758265, "grad_norm": 1.3125, "learning_rate": 6.869438943769297e-05, "loss": 1.4538, "step": 2012 }, { "epoch": 0.30773934645518825, "grad_norm": 1.171875, "learning_rate": 6.869014966193385e-05, "loss": 1.1121, "step": 2013 }, { "epoch": 0.30789222243454994, "grad_norm": 1.3828125, "learning_rate": 6.868590994857241e-05, "loss": 1.5544, "step": 2014 }, { "epoch": 0.3080450984139117, "grad_norm": 1.3671875, "learning_rate": 6.868167029761714e-05, "loss": 1.6288, "step": 2015 }, { "epoch": 0.30819797439327346, "grad_norm": 1.46875, "learning_rate": 6.867743070907646e-05, "loss": 1.6175, "step": 2016 }, { "epoch": 0.3083508503726352, "grad_norm": 1.3125, "learning_rate": 6.867319118295883e-05, "loss": 1.4325, "step": 2017 }, { "epoch": 0.30850372635199697, "grad_norm": 1.2578125, "learning_rate": 6.86689517192727e-05, "loss": 1.4186, "step": 2018 }, { "epoch": 0.30865660233135866, "grad_norm": 1.390625, "learning_rate": 6.866471231802655e-05, "loss": 1.299, "step": 2019 }, { "epoch": 0.3088094783107204, "grad_norm": 1.4609375, "learning_rate": 6.866047297922878e-05, "loss": 1.4717, "step": 2020 }, { "epoch": 0.3089623542900822, "grad_norm": 1.2890625, "learning_rate": 6.865623370288783e-05, "loss": 1.2336, "step": 2021 }, { "epoch": 0.30911523026944393, "grad_norm": 1.28125, "learning_rate": 6.865199448901222e-05, "loss": 1.3947, "step": 2022 }, { "epoch": 0.30926810624880563, "grad_norm": 1.4296875, "learning_rate": 6.86477553376103e-05, "loss": 1.4575, "step": 2023 }, { "epoch": 0.3094209822281674, "grad_norm": 1.25, "learning_rate": 6.864351624869063e-05, "loss": 1.3093, "step": 2024 }, { "epoch": 0.30957385820752914, "grad_norm": 1.2109375, "learning_rate": 6.863927722226158e-05, "loss": 1.1077, "step": 2025 }, { "epoch": 0.3097267341868909, "grad_norm": 1.2890625, "learning_rate": 6.863503825833158e-05, "loss": 1.4545, "step": 2026 }, { "epoch": 0.30987961016625265, "grad_norm": 1.4140625, "learning_rate": 6.863079935690917e-05, "loss": 1.4731, "step": 2027 }, { "epoch": 0.31003248614561435, "grad_norm": 1.2421875, "learning_rate": 6.862656051800269e-05, "loss": 1.3126, "step": 2028 }, { "epoch": 0.3101853621249761, "grad_norm": 1.359375, "learning_rate": 6.862232174162069e-05, "loss": 1.328, "step": 2029 }, { "epoch": 0.31033823810433786, "grad_norm": 1.3125, "learning_rate": 6.861808302777153e-05, "loss": 1.4362, "step": 2030 }, { "epoch": 0.3104911140836996, "grad_norm": 1.2421875, "learning_rate": 6.861384437646372e-05, "loss": 1.2239, "step": 2031 }, { "epoch": 0.3106439900630613, "grad_norm": 1.5390625, "learning_rate": 6.86096057877057e-05, "loss": 1.6432, "step": 2032 }, { "epoch": 0.31079686604242307, "grad_norm": 1.2109375, "learning_rate": 6.860536726150586e-05, "loss": 1.383, "step": 2033 }, { "epoch": 0.3109497420217848, "grad_norm": 1.359375, "learning_rate": 6.860112879787272e-05, "loss": 1.4829, "step": 2034 }, { "epoch": 0.3111026180011466, "grad_norm": 1.2109375, "learning_rate": 6.859689039681465e-05, "loss": 1.2699, "step": 2035 }, { "epoch": 0.31125549398050834, "grad_norm": 1.34375, "learning_rate": 6.859265205834018e-05, "loss": 1.4874, "step": 2036 }, { "epoch": 0.31140836995987003, "grad_norm": 1.3125, "learning_rate": 6.858841378245768e-05, "loss": 1.5245, "step": 2037 }, { "epoch": 0.3115612459392318, "grad_norm": 1.203125, "learning_rate": 6.858417556917569e-05, "loss": 1.4077, "step": 2038 }, { "epoch": 0.31171412191859355, "grad_norm": 1.3984375, "learning_rate": 6.857993741850257e-05, "loss": 1.5554, "step": 2039 }, { "epoch": 0.3118669978979553, "grad_norm": 1.34375, "learning_rate": 6.857569933044678e-05, "loss": 1.4161, "step": 2040 }, { "epoch": 0.312019873877317, "grad_norm": 1.3125, "learning_rate": 6.857146130501681e-05, "loss": 1.3885, "step": 2041 }, { "epoch": 0.31217274985667876, "grad_norm": 1.5, "learning_rate": 6.856722334222106e-05, "loss": 1.4891, "step": 2042 }, { "epoch": 0.3123256258360405, "grad_norm": 1.328125, "learning_rate": 6.856298544206803e-05, "loss": 1.5905, "step": 2043 }, { "epoch": 0.31247850181540227, "grad_norm": 1.375, "learning_rate": 6.85587476045661e-05, "loss": 1.415, "step": 2044 }, { "epoch": 0.312631377794764, "grad_norm": 1.46875, "learning_rate": 6.855450982972373e-05, "loss": 1.824, "step": 2045 }, { "epoch": 0.3127842537741257, "grad_norm": 1.4453125, "learning_rate": 6.855027211754941e-05, "loss": 1.6205, "step": 2046 }, { "epoch": 0.3129371297534875, "grad_norm": 1.375, "learning_rate": 6.854603446805152e-05, "loss": 1.2987, "step": 2047 }, { "epoch": 0.31309000573284923, "grad_norm": 1.4296875, "learning_rate": 6.85417968812386e-05, "loss": 1.4222, "step": 2048 }, { "epoch": 0.313242881712211, "grad_norm": 1.421875, "learning_rate": 6.8537559357119e-05, "loss": 1.5549, "step": 2049 }, { "epoch": 0.3133957576915727, "grad_norm": 1.3125, "learning_rate": 6.85333218957012e-05, "loss": 1.4176, "step": 2050 }, { "epoch": 0.31354863367093444, "grad_norm": 1.4765625, "learning_rate": 6.852908449699369e-05, "loss": 1.3508, "step": 2051 }, { "epoch": 0.3137015096502962, "grad_norm": 1.3671875, "learning_rate": 6.85248471610048e-05, "loss": 1.4993, "step": 2052 }, { "epoch": 0.31385438562965795, "grad_norm": 1.515625, "learning_rate": 6.852060988774312e-05, "loss": 1.8012, "step": 2053 }, { "epoch": 0.3140072616090197, "grad_norm": 1.3515625, "learning_rate": 6.851637267721696e-05, "loss": 1.4866, "step": 2054 }, { "epoch": 0.3141601375883814, "grad_norm": 1.3125, "learning_rate": 6.851213552943485e-05, "loss": 1.1384, "step": 2055 }, { "epoch": 0.31431301356774316, "grad_norm": 1.3359375, "learning_rate": 6.850789844440519e-05, "loss": 1.4278, "step": 2056 }, { "epoch": 0.3144658895471049, "grad_norm": 1.4765625, "learning_rate": 6.850366142213648e-05, "loss": 1.4468, "step": 2057 }, { "epoch": 0.31461876552646667, "grad_norm": 1.46875, "learning_rate": 6.849942446263713e-05, "loss": 1.5968, "step": 2058 }, { "epoch": 0.31477164150582837, "grad_norm": 1.2421875, "learning_rate": 6.849518756591556e-05, "loss": 1.3678, "step": 2059 }, { "epoch": 0.3149245174851901, "grad_norm": 1.3515625, "learning_rate": 6.849095073198025e-05, "loss": 1.4597, "step": 2060 }, { "epoch": 0.3150773934645519, "grad_norm": 1.328125, "learning_rate": 6.84867139608396e-05, "loss": 1.3584, "step": 2061 }, { "epoch": 0.31523026944391364, "grad_norm": 1.40625, "learning_rate": 6.848247725250211e-05, "loss": 1.17, "step": 2062 }, { "epoch": 0.3153831454232754, "grad_norm": 1.2890625, "learning_rate": 6.847824060697615e-05, "loss": 1.4174, "step": 2063 }, { "epoch": 0.3155360214026371, "grad_norm": 1.4296875, "learning_rate": 6.847400402427027e-05, "loss": 1.5259, "step": 2064 }, { "epoch": 0.31568889738199885, "grad_norm": 1.3046875, "learning_rate": 6.846976750439283e-05, "loss": 1.1841, "step": 2065 }, { "epoch": 0.3158417733613606, "grad_norm": 1.6484375, "learning_rate": 6.846553104735227e-05, "loss": 1.5355, "step": 2066 }, { "epoch": 0.31599464934072236, "grad_norm": 1.3203125, "learning_rate": 6.846129465315712e-05, "loss": 1.5458, "step": 2067 }, { "epoch": 0.31614752532008406, "grad_norm": 1.2578125, "learning_rate": 6.845705832181568e-05, "loss": 1.48, "step": 2068 }, { "epoch": 0.3163004012994458, "grad_norm": 1.3359375, "learning_rate": 6.845282205333654e-05, "loss": 1.4685, "step": 2069 }, { "epoch": 0.31645327727880757, "grad_norm": 1.3671875, "learning_rate": 6.844858584772804e-05, "loss": 1.2787, "step": 2070 }, { "epoch": 0.3166061532581693, "grad_norm": 1.2734375, "learning_rate": 6.844434970499867e-05, "loss": 1.6489, "step": 2071 }, { "epoch": 0.3167590292375311, "grad_norm": 1.4140625, "learning_rate": 6.844011362515688e-05, "loss": 1.4691, "step": 2072 }, { "epoch": 0.3169119052168928, "grad_norm": 1.3046875, "learning_rate": 6.843587760821107e-05, "loss": 1.2754, "step": 2073 }, { "epoch": 0.31706478119625453, "grad_norm": 1.5078125, "learning_rate": 6.843164165416971e-05, "loss": 1.5446, "step": 2074 }, { "epoch": 0.3172176571756163, "grad_norm": 1.34375, "learning_rate": 6.842740576304121e-05, "loss": 1.2498, "step": 2075 }, { "epoch": 0.31737053315497804, "grad_norm": 1.375, "learning_rate": 6.842316993483406e-05, "loss": 1.4201, "step": 2076 }, { "epoch": 0.31752340913433974, "grad_norm": 1.453125, "learning_rate": 6.841893416955667e-05, "loss": 1.5271, "step": 2077 }, { "epoch": 0.3176762851137015, "grad_norm": 1.2421875, "learning_rate": 6.841469846721753e-05, "loss": 1.2507, "step": 2078 }, { "epoch": 0.31782916109306325, "grad_norm": 1.421875, "learning_rate": 6.841046282782503e-05, "loss": 1.3554, "step": 2079 }, { "epoch": 0.317982037072425, "grad_norm": 1.359375, "learning_rate": 6.840622725138758e-05, "loss": 1.2994, "step": 2080 }, { "epoch": 0.31813491305178676, "grad_norm": 1.421875, "learning_rate": 6.840199173791371e-05, "loss": 1.3838, "step": 2081 }, { "epoch": 0.31828778903114846, "grad_norm": 1.328125, "learning_rate": 6.839775628741178e-05, "loss": 1.2822, "step": 2082 }, { "epoch": 0.3184406650105102, "grad_norm": 1.40625, "learning_rate": 6.839352089989033e-05, "loss": 1.535, "step": 2083 }, { "epoch": 0.31859354098987197, "grad_norm": 1.3359375, "learning_rate": 6.838928557535769e-05, "loss": 1.8463, "step": 2084 }, { "epoch": 0.3187464169692337, "grad_norm": 1.3984375, "learning_rate": 6.838505031382234e-05, "loss": 1.4194, "step": 2085 }, { "epoch": 0.3188992929485954, "grad_norm": 1.2421875, "learning_rate": 6.838081511529275e-05, "loss": 1.5828, "step": 2086 }, { "epoch": 0.3190521689279572, "grad_norm": 1.3515625, "learning_rate": 6.837657997977732e-05, "loss": 1.5125, "step": 2087 }, { "epoch": 0.31920504490731894, "grad_norm": 1.6015625, "learning_rate": 6.837234490728455e-05, "loss": 1.6364, "step": 2088 }, { "epoch": 0.3193579208866807, "grad_norm": 1.3515625, "learning_rate": 6.83681098978228e-05, "loss": 1.3841, "step": 2089 }, { "epoch": 0.31951079686604245, "grad_norm": 1.3515625, "learning_rate": 6.836387495140057e-05, "loss": 1.4712, "step": 2090 }, { "epoch": 0.31966367284540415, "grad_norm": 1.3203125, "learning_rate": 6.835964006802626e-05, "loss": 1.4368, "step": 2091 }, { "epoch": 0.3198165488247659, "grad_norm": 1.4296875, "learning_rate": 6.835540524770833e-05, "loss": 1.4836, "step": 2092 }, { "epoch": 0.31996942480412766, "grad_norm": 1.328125, "learning_rate": 6.835117049045526e-05, "loss": 1.5823, "step": 2093 }, { "epoch": 0.3201223007834894, "grad_norm": 1.375, "learning_rate": 6.83469357962754e-05, "loss": 1.4056, "step": 2094 }, { "epoch": 0.3202751767628511, "grad_norm": 1.2578125, "learning_rate": 6.834270116517727e-05, "loss": 1.4181, "step": 2095 }, { "epoch": 0.32042805274221287, "grad_norm": 1.4921875, "learning_rate": 6.833846659716925e-05, "loss": 1.6415, "step": 2096 }, { "epoch": 0.3205809287215746, "grad_norm": 1.3671875, "learning_rate": 6.833423209225983e-05, "loss": 1.6614, "step": 2097 }, { "epoch": 0.3207338047009364, "grad_norm": 1.203125, "learning_rate": 6.832999765045743e-05, "loss": 1.1763, "step": 2098 }, { "epoch": 0.32088668068029813, "grad_norm": 1.4453125, "learning_rate": 6.832576327177045e-05, "loss": 1.6471, "step": 2099 }, { "epoch": 0.32103955665965983, "grad_norm": 1.390625, "learning_rate": 6.832152895620739e-05, "loss": 1.424, "step": 2100 }, { "epoch": 0.3211924326390216, "grad_norm": 1.4296875, "learning_rate": 6.831729470377663e-05, "loss": 1.6162, "step": 2101 }, { "epoch": 0.32134530861838334, "grad_norm": 1.4375, "learning_rate": 6.83130605144867e-05, "loss": 1.6475, "step": 2102 }, { "epoch": 0.3214981845977451, "grad_norm": 1.3203125, "learning_rate": 6.830882638834591e-05, "loss": 1.641, "step": 2103 }, { "epoch": 0.3216510605771068, "grad_norm": 1.2890625, "learning_rate": 6.830459232536282e-05, "loss": 1.3787, "step": 2104 }, { "epoch": 0.32180393655646855, "grad_norm": 1.421875, "learning_rate": 6.83003583255458e-05, "loss": 1.5292, "step": 2105 }, { "epoch": 0.3219568125358303, "grad_norm": 1.3359375, "learning_rate": 6.829612438890327e-05, "loss": 1.3206, "step": 2106 }, { "epoch": 0.32210968851519206, "grad_norm": 1.390625, "learning_rate": 6.829189051544376e-05, "loss": 1.5402, "step": 2107 }, { "epoch": 0.3222625644945538, "grad_norm": 1.4453125, "learning_rate": 6.828765670517557e-05, "loss": 1.6728, "step": 2108 }, { "epoch": 0.3224154404739155, "grad_norm": 1.3828125, "learning_rate": 6.828342295810728e-05, "loss": 1.3688, "step": 2109 }, { "epoch": 0.32256831645327727, "grad_norm": 1.484375, "learning_rate": 6.827918927424722e-05, "loss": 1.6505, "step": 2110 }, { "epoch": 0.322721192432639, "grad_norm": 1.3203125, "learning_rate": 6.82749556536039e-05, "loss": 1.3954, "step": 2111 }, { "epoch": 0.3228740684120008, "grad_norm": 1.46875, "learning_rate": 6.827072209618575e-05, "loss": 1.4643, "step": 2112 }, { "epoch": 0.3230269443913625, "grad_norm": 1.2421875, "learning_rate": 6.826648860200113e-05, "loss": 1.4011, "step": 2113 }, { "epoch": 0.32317982037072424, "grad_norm": 1.328125, "learning_rate": 6.826225517105856e-05, "loss": 1.7025, "step": 2114 }, { "epoch": 0.323332696350086, "grad_norm": 1.453125, "learning_rate": 6.825802180336643e-05, "loss": 1.2815, "step": 2115 }, { "epoch": 0.32348557232944775, "grad_norm": 1.4296875, "learning_rate": 6.825378849893322e-05, "loss": 1.396, "step": 2116 }, { "epoch": 0.3236384483088095, "grad_norm": 1.296875, "learning_rate": 6.824955525776735e-05, "loss": 1.485, "step": 2117 }, { "epoch": 0.3237913242881712, "grad_norm": 1.234375, "learning_rate": 6.824532207987722e-05, "loss": 1.296, "step": 2118 }, { "epoch": 0.32394420026753296, "grad_norm": 1.1953125, "learning_rate": 6.82410889652713e-05, "loss": 1.2599, "step": 2119 }, { "epoch": 0.3240970762468947, "grad_norm": 1.46875, "learning_rate": 6.823685591395801e-05, "loss": 1.3953, "step": 2120 }, { "epoch": 0.32424995222625647, "grad_norm": 1.3515625, "learning_rate": 6.823262292594582e-05, "loss": 1.265, "step": 2121 }, { "epoch": 0.32440282820561817, "grad_norm": 1.2578125, "learning_rate": 6.822839000124313e-05, "loss": 1.1624, "step": 2122 }, { "epoch": 0.3245557041849799, "grad_norm": 1.328125, "learning_rate": 6.822415713985842e-05, "loss": 1.4511, "step": 2123 }, { "epoch": 0.3247085801643417, "grad_norm": 1.2578125, "learning_rate": 6.821992434180007e-05, "loss": 1.2157, "step": 2124 }, { "epoch": 0.32486145614370343, "grad_norm": 1.5234375, "learning_rate": 6.821569160707652e-05, "loss": 1.7497, "step": 2125 }, { "epoch": 0.3250143321230652, "grad_norm": 1.3984375, "learning_rate": 6.821145893569625e-05, "loss": 1.5052, "step": 2126 }, { "epoch": 0.3251672081024269, "grad_norm": 1.25, "learning_rate": 6.820722632766765e-05, "loss": 1.1572, "step": 2127 }, { "epoch": 0.32532008408178864, "grad_norm": 1.390625, "learning_rate": 6.820299378299922e-05, "loss": 1.53, "step": 2128 }, { "epoch": 0.3254729600611504, "grad_norm": 1.5234375, "learning_rate": 6.81987613016993e-05, "loss": 1.5355, "step": 2129 }, { "epoch": 0.32562583604051215, "grad_norm": 1.25, "learning_rate": 6.819452888377639e-05, "loss": 1.4092, "step": 2130 }, { "epoch": 0.32577871201987385, "grad_norm": 1.40625, "learning_rate": 6.819029652923894e-05, "loss": 1.5013, "step": 2131 }, { "epoch": 0.3259315879992356, "grad_norm": 1.4921875, "learning_rate": 6.818606423809533e-05, "loss": 1.4633, "step": 2132 }, { "epoch": 0.32608446397859736, "grad_norm": 1.3828125, "learning_rate": 6.818183201035404e-05, "loss": 1.4004, "step": 2133 }, { "epoch": 0.3262373399579591, "grad_norm": 1.21875, "learning_rate": 6.817759984602346e-05, "loss": 1.4666, "step": 2134 }, { "epoch": 0.32639021593732087, "grad_norm": 1.3203125, "learning_rate": 6.817336774511206e-05, "loss": 1.5537, "step": 2135 }, { "epoch": 0.32654309191668257, "grad_norm": 1.375, "learning_rate": 6.816913570762824e-05, "loss": 1.5659, "step": 2136 }, { "epoch": 0.3266959678960443, "grad_norm": 1.375, "learning_rate": 6.816490373358049e-05, "loss": 1.3366, "step": 2137 }, { "epoch": 0.3268488438754061, "grad_norm": 1.3828125, "learning_rate": 6.816067182297723e-05, "loss": 1.4717, "step": 2138 }, { "epoch": 0.32700171985476784, "grad_norm": 1.3359375, "learning_rate": 6.815643997582682e-05, "loss": 1.4565, "step": 2139 }, { "epoch": 0.32715459583412954, "grad_norm": 1.328125, "learning_rate": 6.815220819213779e-05, "loss": 1.4057, "step": 2140 }, { "epoch": 0.3273074718134913, "grad_norm": 1.5, "learning_rate": 6.81479764719185e-05, "loss": 1.6826, "step": 2141 }, { "epoch": 0.32746034779285305, "grad_norm": 1.2734375, "learning_rate": 6.814374481517743e-05, "loss": 1.4884, "step": 2142 }, { "epoch": 0.3276132237722148, "grad_norm": 1.3515625, "learning_rate": 6.813951322192299e-05, "loss": 1.5055, "step": 2143 }, { "epoch": 0.32776609975157656, "grad_norm": 1.265625, "learning_rate": 6.813528169216365e-05, "loss": 1.2712, "step": 2144 }, { "epoch": 0.32791897573093826, "grad_norm": 1.4765625, "learning_rate": 6.813105022590781e-05, "loss": 1.362, "step": 2145 }, { "epoch": 0.3280718517103, "grad_norm": 1.3515625, "learning_rate": 6.812681882316389e-05, "loss": 1.3357, "step": 2146 }, { "epoch": 0.32822472768966177, "grad_norm": 1.4453125, "learning_rate": 6.812258748394037e-05, "loss": 1.5553, "step": 2147 }, { "epoch": 0.3283776036690235, "grad_norm": 1.3203125, "learning_rate": 6.81183562082456e-05, "loss": 1.3185, "step": 2148 }, { "epoch": 0.3285304796483852, "grad_norm": 1.4453125, "learning_rate": 6.811412499608811e-05, "loss": 1.3989, "step": 2149 }, { "epoch": 0.328683355627747, "grad_norm": 1.296875, "learning_rate": 6.81098938474763e-05, "loss": 1.3998, "step": 2150 }, { "epoch": 0.32883623160710873, "grad_norm": 1.21875, "learning_rate": 6.810566276241854e-05, "loss": 1.2624, "step": 2151 }, { "epoch": 0.3289891075864705, "grad_norm": 1.375, "learning_rate": 6.810143174092336e-05, "loss": 1.4627, "step": 2152 }, { "epoch": 0.32914198356583224, "grad_norm": 1.6171875, "learning_rate": 6.809720078299911e-05, "loss": 1.537, "step": 2153 }, { "epoch": 0.32929485954519394, "grad_norm": 1.40625, "learning_rate": 6.809296988865428e-05, "loss": 1.655, "step": 2154 }, { "epoch": 0.3294477355245557, "grad_norm": 1.5390625, "learning_rate": 6.808873905789725e-05, "loss": 1.3312, "step": 2155 }, { "epoch": 0.32960061150391745, "grad_norm": 1.671875, "learning_rate": 6.808450829073652e-05, "loss": 1.5596, "step": 2156 }, { "epoch": 0.3297534874832792, "grad_norm": 1.25, "learning_rate": 6.80802775871805e-05, "loss": 1.418, "step": 2157 }, { "epoch": 0.3299063634626409, "grad_norm": 1.46875, "learning_rate": 6.807604694723755e-05, "loss": 1.6755, "step": 2158 }, { "epoch": 0.33005923944200266, "grad_norm": 1.3515625, "learning_rate": 6.807181637091617e-05, "loss": 1.6769, "step": 2159 }, { "epoch": 0.3302121154213644, "grad_norm": 1.3671875, "learning_rate": 6.806758585822476e-05, "loss": 1.5178, "step": 2160 }, { "epoch": 0.33036499140072617, "grad_norm": 1.53125, "learning_rate": 6.80633554091718e-05, "loss": 1.503, "step": 2161 }, { "epoch": 0.3305178673800879, "grad_norm": 1.328125, "learning_rate": 6.805912502376565e-05, "loss": 1.3925, "step": 2162 }, { "epoch": 0.3306707433594496, "grad_norm": 1.4375, "learning_rate": 6.805489470201482e-05, "loss": 1.5148, "step": 2163 }, { "epoch": 0.3308236193388114, "grad_norm": 1.328125, "learning_rate": 6.80506644439277e-05, "loss": 1.2264, "step": 2164 }, { "epoch": 0.33097649531817314, "grad_norm": 1.3046875, "learning_rate": 6.804643424951268e-05, "loss": 1.3199, "step": 2165 }, { "epoch": 0.3311293712975349, "grad_norm": 1.6484375, "learning_rate": 6.804220411877826e-05, "loss": 1.4786, "step": 2166 }, { "epoch": 0.3312822472768966, "grad_norm": 1.2578125, "learning_rate": 6.803797405173281e-05, "loss": 1.3411, "step": 2167 }, { "epoch": 0.33143512325625835, "grad_norm": 1.46875, "learning_rate": 6.803374404838484e-05, "loss": 1.7688, "step": 2168 }, { "epoch": 0.3315879992356201, "grad_norm": 1.53125, "learning_rate": 6.802951410874269e-05, "loss": 1.6794, "step": 2169 }, { "epoch": 0.33174087521498186, "grad_norm": 1.3359375, "learning_rate": 6.802528423281484e-05, "loss": 1.1715, "step": 2170 }, { "epoch": 0.3318937511943436, "grad_norm": 1.4140625, "learning_rate": 6.802105442060973e-05, "loss": 1.3487, "step": 2171 }, { "epoch": 0.3320466271737053, "grad_norm": 1.3515625, "learning_rate": 6.801682467213572e-05, "loss": 1.5299, "step": 2172 }, { "epoch": 0.33219950315306707, "grad_norm": 1.375, "learning_rate": 6.801259498740136e-05, "loss": 1.6416, "step": 2173 }, { "epoch": 0.3323523791324288, "grad_norm": 1.3125, "learning_rate": 6.800836536641496e-05, "loss": 1.428, "step": 2174 }, { "epoch": 0.3325052551117906, "grad_norm": 1.34375, "learning_rate": 6.8004135809185e-05, "loss": 1.483, "step": 2175 }, { "epoch": 0.3326581310911523, "grad_norm": 1.6328125, "learning_rate": 6.799990631571995e-05, "loss": 1.3265, "step": 2176 }, { "epoch": 0.33281100707051403, "grad_norm": 1.4140625, "learning_rate": 6.799567688602812e-05, "loss": 1.6009, "step": 2177 }, { "epoch": 0.3329638830498758, "grad_norm": 1.3359375, "learning_rate": 6.79914475201181e-05, "loss": 1.6384, "step": 2178 }, { "epoch": 0.33311675902923754, "grad_norm": 1.375, "learning_rate": 6.798721821799817e-05, "loss": 1.4673, "step": 2179 }, { "epoch": 0.3332696350085993, "grad_norm": 1.421875, "learning_rate": 6.798298897967685e-05, "loss": 1.4282, "step": 2180 }, { "epoch": 0.333422510987961, "grad_norm": 1.28125, "learning_rate": 6.797875980516252e-05, "loss": 1.6189, "step": 2181 }, { "epoch": 0.33357538696732275, "grad_norm": 1.40625, "learning_rate": 6.797453069446364e-05, "loss": 1.562, "step": 2182 }, { "epoch": 0.3337282629466845, "grad_norm": 1.265625, "learning_rate": 6.797030164758866e-05, "loss": 1.5356, "step": 2183 }, { "epoch": 0.33388113892604626, "grad_norm": 1.453125, "learning_rate": 6.796607266454594e-05, "loss": 1.5552, "step": 2184 }, { "epoch": 0.33403401490540796, "grad_norm": 1.3671875, "learning_rate": 6.796184374534396e-05, "loss": 1.2892, "step": 2185 }, { "epoch": 0.3341868908847697, "grad_norm": 1.40625, "learning_rate": 6.795761488999111e-05, "loss": 1.3089, "step": 2186 }, { "epoch": 0.33433976686413147, "grad_norm": 1.421875, "learning_rate": 6.795338609849586e-05, "loss": 1.5649, "step": 2187 }, { "epoch": 0.3344926428434932, "grad_norm": 1.421875, "learning_rate": 6.794915737086658e-05, "loss": 1.5526, "step": 2188 }, { "epoch": 0.334645518822855, "grad_norm": 1.3046875, "learning_rate": 6.794492870711178e-05, "loss": 1.627, "step": 2189 }, { "epoch": 0.3347983948022167, "grad_norm": 1.4765625, "learning_rate": 6.794070010723983e-05, "loss": 1.6119, "step": 2190 }, { "epoch": 0.33495127078157844, "grad_norm": 1.5078125, "learning_rate": 6.793647157125916e-05, "loss": 1.4695, "step": 2191 }, { "epoch": 0.3351041467609402, "grad_norm": 1.34375, "learning_rate": 6.793224309917822e-05, "loss": 1.4323, "step": 2192 }, { "epoch": 0.33525702274030195, "grad_norm": 1.4140625, "learning_rate": 6.792801469100539e-05, "loss": 1.3191, "step": 2193 }, { "epoch": 0.33540989871966365, "grad_norm": 1.2265625, "learning_rate": 6.792378634674917e-05, "loss": 1.2445, "step": 2194 }, { "epoch": 0.3355627746990254, "grad_norm": 1.3671875, "learning_rate": 6.791955806641792e-05, "loss": 1.2754, "step": 2195 }, { "epoch": 0.33571565067838716, "grad_norm": 1.3984375, "learning_rate": 6.791532985002009e-05, "loss": 1.4422, "step": 2196 }, { "epoch": 0.3358685266577489, "grad_norm": 1.390625, "learning_rate": 6.791110169756416e-05, "loss": 1.5616, "step": 2197 }, { "epoch": 0.33602140263711067, "grad_norm": 1.3046875, "learning_rate": 6.790687360905845e-05, "loss": 1.3844, "step": 2198 }, { "epoch": 0.33617427861647237, "grad_norm": 1.453125, "learning_rate": 6.790264558451146e-05, "loss": 1.4458, "step": 2199 }, { "epoch": 0.3363271545958341, "grad_norm": 1.3125, "learning_rate": 6.789841762393158e-05, "loss": 1.2891, "step": 2200 }, { "epoch": 0.3364800305751959, "grad_norm": 1.3046875, "learning_rate": 6.789418972732727e-05, "loss": 1.2408, "step": 2201 }, { "epoch": 0.33663290655455763, "grad_norm": 2.484375, "learning_rate": 6.788996189470694e-05, "loss": 1.5309, "step": 2202 }, { "epoch": 0.33678578253391933, "grad_norm": 1.328125, "learning_rate": 6.788573412607902e-05, "loss": 1.3302, "step": 2203 }, { "epoch": 0.3369386585132811, "grad_norm": 1.3671875, "learning_rate": 6.788150642145194e-05, "loss": 1.3523, "step": 2204 }, { "epoch": 0.33709153449264284, "grad_norm": 1.3046875, "learning_rate": 6.787727878083408e-05, "loss": 1.4111, "step": 2205 }, { "epoch": 0.3372444104720046, "grad_norm": 1.21875, "learning_rate": 6.787305120423393e-05, "loss": 1.3279, "step": 2206 }, { "epoch": 0.33739728645136635, "grad_norm": 1.4765625, "learning_rate": 6.786882369165986e-05, "loss": 1.2703, "step": 2207 }, { "epoch": 0.33755016243072805, "grad_norm": 1.328125, "learning_rate": 6.786459624312039e-05, "loss": 1.5381, "step": 2208 }, { "epoch": 0.3377030384100898, "grad_norm": 1.296875, "learning_rate": 6.786036885862382e-05, "loss": 1.3331, "step": 2209 }, { "epoch": 0.33785591438945156, "grad_norm": 1.21875, "learning_rate": 6.785614153817863e-05, "loss": 1.5247, "step": 2210 }, { "epoch": 0.3380087903688133, "grad_norm": 1.375, "learning_rate": 6.785191428179326e-05, "loss": 1.4226, "step": 2211 }, { "epoch": 0.338161666348175, "grad_norm": 1.375, "learning_rate": 6.784768708947611e-05, "loss": 1.4887, "step": 2212 }, { "epoch": 0.33831454232753677, "grad_norm": 1.4921875, "learning_rate": 6.784345996123565e-05, "loss": 1.4704, "step": 2213 }, { "epoch": 0.3384674183068985, "grad_norm": 1.1796875, "learning_rate": 6.783923289708023e-05, "loss": 1.1296, "step": 2214 }, { "epoch": 0.3386202942862603, "grad_norm": 1.3671875, "learning_rate": 6.783500589701832e-05, "loss": 1.5973, "step": 2215 }, { "epoch": 0.33877317026562204, "grad_norm": 1.515625, "learning_rate": 6.783077896105836e-05, "loss": 1.4583, "step": 2216 }, { "epoch": 0.33892604624498374, "grad_norm": 1.4609375, "learning_rate": 6.782655208920871e-05, "loss": 1.6255, "step": 2217 }, { "epoch": 0.3390789222243455, "grad_norm": 1.515625, "learning_rate": 6.782232528147789e-05, "loss": 1.5024, "step": 2218 }, { "epoch": 0.33923179820370725, "grad_norm": 1.3515625, "learning_rate": 6.781809853787422e-05, "loss": 1.3287, "step": 2219 }, { "epoch": 0.339384674183069, "grad_norm": 1.3125, "learning_rate": 6.781387185840621e-05, "loss": 1.4041, "step": 2220 }, { "epoch": 0.3395375501624307, "grad_norm": 1.328125, "learning_rate": 6.780964524308222e-05, "loss": 1.4541, "step": 2221 }, { "epoch": 0.33969042614179246, "grad_norm": 1.359375, "learning_rate": 6.780541869191071e-05, "loss": 1.5894, "step": 2222 }, { "epoch": 0.3398433021211542, "grad_norm": 1.4765625, "learning_rate": 6.78011922049001e-05, "loss": 1.7736, "step": 2223 }, { "epoch": 0.33999617810051597, "grad_norm": 1.4296875, "learning_rate": 6.77969657820588e-05, "loss": 1.6378, "step": 2224 }, { "epoch": 0.3401490540798777, "grad_norm": 1.328125, "learning_rate": 6.779273942339524e-05, "loss": 1.6151, "step": 2225 }, { "epoch": 0.3403019300592394, "grad_norm": 1.4375, "learning_rate": 6.778851312891782e-05, "loss": 1.7298, "step": 2226 }, { "epoch": 0.3404548060386012, "grad_norm": 1.1953125, "learning_rate": 6.778428689863503e-05, "loss": 1.1504, "step": 2227 }, { "epoch": 0.34060768201796293, "grad_norm": 1.4296875, "learning_rate": 6.778006073255518e-05, "loss": 1.4965, "step": 2228 }, { "epoch": 0.3407605579973247, "grad_norm": 1.2578125, "learning_rate": 6.777583463068682e-05, "loss": 1.2364, "step": 2229 }, { "epoch": 0.3409134339766864, "grad_norm": 1.375, "learning_rate": 6.77716085930383e-05, "loss": 1.7007, "step": 2230 }, { "epoch": 0.34106630995604814, "grad_norm": 1.328125, "learning_rate": 6.776738261961801e-05, "loss": 1.464, "step": 2231 }, { "epoch": 0.3412191859354099, "grad_norm": 1.28125, "learning_rate": 6.776315671043447e-05, "loss": 1.4125, "step": 2232 }, { "epoch": 0.34137206191477165, "grad_norm": 1.46875, "learning_rate": 6.775893086549598e-05, "loss": 1.5148, "step": 2233 }, { "epoch": 0.3415249378941334, "grad_norm": 1.296875, "learning_rate": 6.77547050848111e-05, "loss": 1.2222, "step": 2234 }, { "epoch": 0.3416778138734951, "grad_norm": 1.3046875, "learning_rate": 6.775047936838814e-05, "loss": 1.3937, "step": 2235 }, { "epoch": 0.34183068985285686, "grad_norm": 1.4921875, "learning_rate": 6.774625371623557e-05, "loss": 1.6332, "step": 2236 }, { "epoch": 0.3419835658322186, "grad_norm": 1.3671875, "learning_rate": 6.774202812836184e-05, "loss": 1.5511, "step": 2237 }, { "epoch": 0.34213644181158037, "grad_norm": 1.3828125, "learning_rate": 6.773780260477526e-05, "loss": 1.5499, "step": 2238 }, { "epoch": 0.34228931779094207, "grad_norm": 1.4609375, "learning_rate": 6.773357714548438e-05, "loss": 1.4583, "step": 2239 }, { "epoch": 0.3424421937703038, "grad_norm": 1.25, "learning_rate": 6.772935175049753e-05, "loss": 1.0776, "step": 2240 }, { "epoch": 0.3425950697496656, "grad_norm": 1.3203125, "learning_rate": 6.77251264198232e-05, "loss": 1.584, "step": 2241 }, { "epoch": 0.34274794572902734, "grad_norm": 1.4296875, "learning_rate": 6.772090115346978e-05, "loss": 1.3774, "step": 2242 }, { "epoch": 0.3429008217083891, "grad_norm": 1.5, "learning_rate": 6.771667595144567e-05, "loss": 1.2885, "step": 2243 }, { "epoch": 0.3430536976877508, "grad_norm": 1.3359375, "learning_rate": 6.77124508137593e-05, "loss": 1.4504, "step": 2244 }, { "epoch": 0.34320657366711255, "grad_norm": 1.328125, "learning_rate": 6.770822574041911e-05, "loss": 1.5836, "step": 2245 }, { "epoch": 0.3433594496464743, "grad_norm": 1.2265625, "learning_rate": 6.770400073143351e-05, "loss": 1.3188, "step": 2246 }, { "epoch": 0.34351232562583606, "grad_norm": 1.3671875, "learning_rate": 6.76997757868109e-05, "loss": 1.2699, "step": 2247 }, { "epoch": 0.34366520160519776, "grad_norm": 1.2578125, "learning_rate": 6.769555090655976e-05, "loss": 1.4932, "step": 2248 }, { "epoch": 0.3438180775845595, "grad_norm": 1.53125, "learning_rate": 6.769132609068844e-05, "loss": 1.519, "step": 2249 }, { "epoch": 0.34397095356392127, "grad_norm": 1.40625, "learning_rate": 6.76871013392054e-05, "loss": 1.4708, "step": 2250 }, { "epoch": 0.344123829543283, "grad_norm": 1.3125, "learning_rate": 6.768287665211903e-05, "loss": 1.3219, "step": 2251 }, { "epoch": 0.3442767055226448, "grad_norm": 1.4921875, "learning_rate": 6.767865202943778e-05, "loss": 1.595, "step": 2252 }, { "epoch": 0.3444295815020065, "grad_norm": 1.390625, "learning_rate": 6.767442747117008e-05, "loss": 1.346, "step": 2253 }, { "epoch": 0.34458245748136823, "grad_norm": 1.4453125, "learning_rate": 6.767020297732429e-05, "loss": 1.6157, "step": 2254 }, { "epoch": 0.34473533346073, "grad_norm": 1.453125, "learning_rate": 6.766597854790888e-05, "loss": 1.6022, "step": 2255 }, { "epoch": 0.34488820944009174, "grad_norm": 1.484375, "learning_rate": 6.766175418293225e-05, "loss": 1.682, "step": 2256 }, { "epoch": 0.34504108541945344, "grad_norm": 1.4296875, "learning_rate": 6.765752988240281e-05, "loss": 1.431, "step": 2257 }, { "epoch": 0.3451939613988152, "grad_norm": 1.28125, "learning_rate": 6.765330564632903e-05, "loss": 1.3268, "step": 2258 }, { "epoch": 0.34534683737817695, "grad_norm": 1.390625, "learning_rate": 6.764908147471927e-05, "loss": 1.3872, "step": 2259 }, { "epoch": 0.3454997133575387, "grad_norm": 1.4296875, "learning_rate": 6.764485736758196e-05, "loss": 1.5473, "step": 2260 }, { "epoch": 0.34565258933690046, "grad_norm": 1.3515625, "learning_rate": 6.764063332492551e-05, "loss": 1.4242, "step": 2261 }, { "epoch": 0.34580546531626216, "grad_norm": 1.328125, "learning_rate": 6.76364093467584e-05, "loss": 1.6554, "step": 2262 }, { "epoch": 0.3459583412956239, "grad_norm": 1.5078125, "learning_rate": 6.7632185433089e-05, "loss": 1.6923, "step": 2263 }, { "epoch": 0.34611121727498567, "grad_norm": 1.234375, "learning_rate": 6.762796158392568e-05, "loss": 1.4341, "step": 2264 }, { "epoch": 0.3462640932543474, "grad_norm": 1.4375, "learning_rate": 6.762373779927695e-05, "loss": 1.702, "step": 2265 }, { "epoch": 0.3464169692337091, "grad_norm": 1.3515625, "learning_rate": 6.761951407915116e-05, "loss": 1.5198, "step": 2266 }, { "epoch": 0.3465698452130709, "grad_norm": 1.328125, "learning_rate": 6.761529042355678e-05, "loss": 1.5057, "step": 2267 }, { "epoch": 0.34672272119243264, "grad_norm": 1.4453125, "learning_rate": 6.76110668325022e-05, "loss": 1.4485, "step": 2268 }, { "epoch": 0.3468755971717944, "grad_norm": 1.4375, "learning_rate": 6.760684330599582e-05, "loss": 1.4925, "step": 2269 }, { "epoch": 0.34702847315115615, "grad_norm": 1.484375, "learning_rate": 6.760261984404609e-05, "loss": 1.5142, "step": 2270 }, { "epoch": 0.34718134913051785, "grad_norm": 1.40625, "learning_rate": 6.75983964466614e-05, "loss": 1.3309, "step": 2271 }, { "epoch": 0.3473342251098796, "grad_norm": 1.375, "learning_rate": 6.759417311385019e-05, "loss": 1.2551, "step": 2272 }, { "epoch": 0.34748710108924136, "grad_norm": 1.234375, "learning_rate": 6.758994984562084e-05, "loss": 1.1034, "step": 2273 }, { "epoch": 0.3476399770686031, "grad_norm": 1.359375, "learning_rate": 6.758572664198183e-05, "loss": 1.4033, "step": 2274 }, { "epoch": 0.3477928530479648, "grad_norm": 1.4140625, "learning_rate": 6.758150350294152e-05, "loss": 1.4976, "step": 2275 }, { "epoch": 0.34794572902732657, "grad_norm": 1.390625, "learning_rate": 6.757728042850833e-05, "loss": 1.4925, "step": 2276 }, { "epoch": 0.3480986050066883, "grad_norm": 1.296875, "learning_rate": 6.757305741869073e-05, "loss": 1.3335, "step": 2277 }, { "epoch": 0.3482514809860501, "grad_norm": 1.2890625, "learning_rate": 6.756883447349704e-05, "loss": 1.5058, "step": 2278 }, { "epoch": 0.34840435696541183, "grad_norm": 1.46875, "learning_rate": 6.75646115929358e-05, "loss": 1.3892, "step": 2279 }, { "epoch": 0.34855723294477353, "grad_norm": 1.5390625, "learning_rate": 6.756038877701531e-05, "loss": 1.6103, "step": 2280 }, { "epoch": 0.3487101089241353, "grad_norm": 1.4375, "learning_rate": 6.755616602574406e-05, "loss": 1.4284, "step": 2281 }, { "epoch": 0.34886298490349704, "grad_norm": 1.3125, "learning_rate": 6.755194333913046e-05, "loss": 1.542, "step": 2282 }, { "epoch": 0.3490158608828588, "grad_norm": 1.4453125, "learning_rate": 6.754772071718286e-05, "loss": 1.5376, "step": 2283 }, { "epoch": 0.3491687368622205, "grad_norm": 1.4765625, "learning_rate": 6.754349815990974e-05, "loss": 1.641, "step": 2284 }, { "epoch": 0.34932161284158225, "grad_norm": 1.2890625, "learning_rate": 6.753927566731948e-05, "loss": 1.516, "step": 2285 }, { "epoch": 0.349474488820944, "grad_norm": 1.359375, "learning_rate": 6.753505323942053e-05, "loss": 1.5247, "step": 2286 }, { "epoch": 0.34962736480030576, "grad_norm": 1.2578125, "learning_rate": 6.753083087622127e-05, "loss": 1.2365, "step": 2287 }, { "epoch": 0.3497802407796675, "grad_norm": 1.28125, "learning_rate": 6.752660857773018e-05, "loss": 1.2149, "step": 2288 }, { "epoch": 0.3499331167590292, "grad_norm": 1.328125, "learning_rate": 6.75223863439556e-05, "loss": 1.4193, "step": 2289 }, { "epoch": 0.35008599273839097, "grad_norm": 1.421875, "learning_rate": 6.751816417490594e-05, "loss": 1.5307, "step": 2290 }, { "epoch": 0.3502388687177527, "grad_norm": 1.484375, "learning_rate": 6.751394207058967e-05, "loss": 1.7026, "step": 2291 }, { "epoch": 0.3503917446971145, "grad_norm": 1.4140625, "learning_rate": 6.750972003101516e-05, "loss": 1.5739, "step": 2292 }, { "epoch": 0.3505446206764762, "grad_norm": 1.3515625, "learning_rate": 6.750549805619089e-05, "loss": 1.4219, "step": 2293 }, { "epoch": 0.35069749665583794, "grad_norm": 1.3203125, "learning_rate": 6.750127614612518e-05, "loss": 1.3023, "step": 2294 }, { "epoch": 0.3508503726351997, "grad_norm": 1.3125, "learning_rate": 6.749705430082651e-05, "loss": 1.4475, "step": 2295 }, { "epoch": 0.35100324861456145, "grad_norm": 1.2421875, "learning_rate": 6.749283252030328e-05, "loss": 1.381, "step": 2296 }, { "epoch": 0.3511561245939232, "grad_norm": 1.40625, "learning_rate": 6.748861080456386e-05, "loss": 1.3873, "step": 2297 }, { "epoch": 0.3513090005732849, "grad_norm": 1.3515625, "learning_rate": 6.748438915361677e-05, "loss": 1.3333, "step": 2298 }, { "epoch": 0.35146187655264666, "grad_norm": 1.3984375, "learning_rate": 6.748016756747029e-05, "loss": 1.4939, "step": 2299 }, { "epoch": 0.3516147525320084, "grad_norm": 1.3359375, "learning_rate": 6.747594604613293e-05, "loss": 1.4456, "step": 2300 }, { "epoch": 0.35176762851137017, "grad_norm": 1.34375, "learning_rate": 6.74717245896131e-05, "loss": 1.2814, "step": 2301 }, { "epoch": 0.35192050449073187, "grad_norm": 1.4140625, "learning_rate": 6.74675031979191e-05, "loss": 1.4692, "step": 2302 }, { "epoch": 0.3520733804700936, "grad_norm": 1.34375, "learning_rate": 6.746328187105952e-05, "loss": 1.507, "step": 2303 }, { "epoch": 0.3522262564494554, "grad_norm": 3.140625, "learning_rate": 6.745906060904261e-05, "loss": 1.3042, "step": 2304 }, { "epoch": 0.35237913242881713, "grad_norm": 1.3984375, "learning_rate": 6.745483941187689e-05, "loss": 1.419, "step": 2305 }, { "epoch": 0.3525320084081789, "grad_norm": 1.28125, "learning_rate": 6.745061827957072e-05, "loss": 1.3049, "step": 2306 }, { "epoch": 0.3526848843875406, "grad_norm": 1.3828125, "learning_rate": 6.744639721213253e-05, "loss": 1.3829, "step": 2307 }, { "epoch": 0.35283776036690234, "grad_norm": 1.3359375, "learning_rate": 6.744217620957076e-05, "loss": 1.4154, "step": 2308 }, { "epoch": 0.3529906363462641, "grad_norm": 1.25, "learning_rate": 6.743795527189376e-05, "loss": 1.6073, "step": 2309 }, { "epoch": 0.35314351232562585, "grad_norm": 1.3515625, "learning_rate": 6.743373439910998e-05, "loss": 1.55, "step": 2310 }, { "epoch": 0.35329638830498755, "grad_norm": 1.515625, "learning_rate": 6.742951359122782e-05, "loss": 1.6221, "step": 2311 }, { "epoch": 0.3534492642843493, "grad_norm": 1.15625, "learning_rate": 6.742529284825572e-05, "loss": 1.2449, "step": 2312 }, { "epoch": 0.35360214026371106, "grad_norm": 1.453125, "learning_rate": 6.742107217020202e-05, "loss": 1.5037, "step": 2313 }, { "epoch": 0.3537550162430728, "grad_norm": 1.4609375, "learning_rate": 6.741685155707524e-05, "loss": 1.4209, "step": 2314 }, { "epoch": 0.3539078922224346, "grad_norm": 1.3359375, "learning_rate": 6.74126310088837e-05, "loss": 1.2981, "step": 2315 }, { "epoch": 0.35406076820179627, "grad_norm": 1.5078125, "learning_rate": 6.740841052563584e-05, "loss": 1.5849, "step": 2316 }, { "epoch": 0.354213644181158, "grad_norm": 1.2734375, "learning_rate": 6.740419010734011e-05, "loss": 1.3735, "step": 2317 }, { "epoch": 0.3543665201605198, "grad_norm": 1.2421875, "learning_rate": 6.739996975400482e-05, "loss": 1.3308, "step": 2318 }, { "epoch": 0.35451939613988154, "grad_norm": 1.2265625, "learning_rate": 6.73957494656385e-05, "loss": 1.2159, "step": 2319 }, { "epoch": 0.35467227211924324, "grad_norm": 1.3046875, "learning_rate": 6.739152924224948e-05, "loss": 1.4021, "step": 2320 }, { "epoch": 0.354825148098605, "grad_norm": 1.40625, "learning_rate": 6.73873090838462e-05, "loss": 1.2898, "step": 2321 }, { "epoch": 0.35497802407796675, "grad_norm": 1.3359375, "learning_rate": 6.73830889904371e-05, "loss": 1.5976, "step": 2322 }, { "epoch": 0.3551309000573285, "grad_norm": 1.3671875, "learning_rate": 6.737886896203052e-05, "loss": 1.2814, "step": 2323 }, { "epoch": 0.35528377603669026, "grad_norm": 1.421875, "learning_rate": 6.737464899863491e-05, "loss": 1.8675, "step": 2324 }, { "epoch": 0.35543665201605196, "grad_norm": 1.2421875, "learning_rate": 6.737042910025868e-05, "loss": 1.3319, "step": 2325 }, { "epoch": 0.3555895279954137, "grad_norm": 1.4375, "learning_rate": 6.736620926691024e-05, "loss": 1.2677, "step": 2326 }, { "epoch": 0.35574240397477547, "grad_norm": 1.4609375, "learning_rate": 6.736198949859798e-05, "loss": 1.542, "step": 2327 }, { "epoch": 0.3558952799541372, "grad_norm": 1.5, "learning_rate": 6.735776979533037e-05, "loss": 1.4135, "step": 2328 }, { "epoch": 0.3560481559334989, "grad_norm": 1.3515625, "learning_rate": 6.735355015711575e-05, "loss": 1.5064, "step": 2329 }, { "epoch": 0.3562010319128607, "grad_norm": 1.4375, "learning_rate": 6.734933058396256e-05, "loss": 1.5168, "step": 2330 }, { "epoch": 0.35635390789222243, "grad_norm": 1.515625, "learning_rate": 6.73451110758792e-05, "loss": 1.8362, "step": 2331 }, { "epoch": 0.3565067838715842, "grad_norm": 1.5, "learning_rate": 6.734089163287408e-05, "loss": 1.5793, "step": 2332 }, { "epoch": 0.35665965985094594, "grad_norm": 1.3125, "learning_rate": 6.733667225495564e-05, "loss": 1.3369, "step": 2333 }, { "epoch": 0.35681253583030764, "grad_norm": 1.3046875, "learning_rate": 6.733245294213224e-05, "loss": 1.2924, "step": 2334 }, { "epoch": 0.3569654118096694, "grad_norm": 1.4296875, "learning_rate": 6.73282336944123e-05, "loss": 1.27, "step": 2335 }, { "epoch": 0.35711828778903115, "grad_norm": 1.34375, "learning_rate": 6.732401451180424e-05, "loss": 1.3782, "step": 2336 }, { "epoch": 0.3572711637683929, "grad_norm": 1.375, "learning_rate": 6.731979539431646e-05, "loss": 1.2897, "step": 2337 }, { "epoch": 0.3574240397477546, "grad_norm": 1.3203125, "learning_rate": 6.731557634195742e-05, "loss": 1.3593, "step": 2338 }, { "epoch": 0.35757691572711636, "grad_norm": 1.3203125, "learning_rate": 6.731135735473544e-05, "loss": 1.3722, "step": 2339 }, { "epoch": 0.3577297917064781, "grad_norm": 1.296875, "learning_rate": 6.7307138432659e-05, "loss": 1.2117, "step": 2340 }, { "epoch": 0.3578826676858399, "grad_norm": 1.2578125, "learning_rate": 6.730291957573647e-05, "loss": 1.2248, "step": 2341 }, { "epoch": 0.3580355436652016, "grad_norm": 1.296875, "learning_rate": 6.729870078397625e-05, "loss": 1.4286, "step": 2342 }, { "epoch": 0.3581884196445633, "grad_norm": 1.515625, "learning_rate": 6.729448205738682e-05, "loss": 1.9536, "step": 2343 }, { "epoch": 0.3583412956239251, "grad_norm": 1.1875, "learning_rate": 6.729026339597647e-05, "loss": 1.3911, "step": 2344 }, { "epoch": 0.35849417160328684, "grad_norm": 1.4375, "learning_rate": 6.72860447997537e-05, "loss": 1.5741, "step": 2345 }, { "epoch": 0.3586470475826486, "grad_norm": 1.328125, "learning_rate": 6.728182626872687e-05, "loss": 1.3496, "step": 2346 }, { "epoch": 0.3587999235620103, "grad_norm": 1.2578125, "learning_rate": 6.727760780290441e-05, "loss": 1.3675, "step": 2347 }, { "epoch": 0.35895279954137205, "grad_norm": 1.296875, "learning_rate": 6.727338940229477e-05, "loss": 1.3972, "step": 2348 }, { "epoch": 0.3591056755207338, "grad_norm": 1.3359375, "learning_rate": 6.726917106690624e-05, "loss": 1.4976, "step": 2349 }, { "epoch": 0.35925855150009556, "grad_norm": 1.3046875, "learning_rate": 6.726495279674734e-05, "loss": 1.2319, "step": 2350 }, { "epoch": 0.3594114274794573, "grad_norm": 1.3671875, "learning_rate": 6.72607345918264e-05, "loss": 1.5026, "step": 2351 }, { "epoch": 0.359564303458819, "grad_norm": 1.296875, "learning_rate": 6.725651645215187e-05, "loss": 1.256, "step": 2352 }, { "epoch": 0.35971717943818077, "grad_norm": 1.3203125, "learning_rate": 6.725229837773215e-05, "loss": 1.2834, "step": 2353 }, { "epoch": 0.3598700554175425, "grad_norm": 1.2890625, "learning_rate": 6.724808036857566e-05, "loss": 1.3722, "step": 2354 }, { "epoch": 0.3600229313969043, "grad_norm": 1.4375, "learning_rate": 6.724386242469078e-05, "loss": 1.3842, "step": 2355 }, { "epoch": 0.360175807376266, "grad_norm": 1.34375, "learning_rate": 6.72396445460859e-05, "loss": 1.6585, "step": 2356 }, { "epoch": 0.36032868335562773, "grad_norm": 1.2890625, "learning_rate": 6.723542673276948e-05, "loss": 1.5334, "step": 2357 }, { "epoch": 0.3604815593349895, "grad_norm": 1.3359375, "learning_rate": 6.723120898474984e-05, "loss": 1.1877, "step": 2358 }, { "epoch": 0.36063443531435124, "grad_norm": 1.46875, "learning_rate": 6.722699130203549e-05, "loss": 1.6153, "step": 2359 }, { "epoch": 0.360787311293713, "grad_norm": 1.171875, "learning_rate": 6.722277368463476e-05, "loss": 1.1649, "step": 2360 }, { "epoch": 0.3609401872730747, "grad_norm": 1.2890625, "learning_rate": 6.72185561325561e-05, "loss": 1.2626, "step": 2361 }, { "epoch": 0.36109306325243645, "grad_norm": 1.4921875, "learning_rate": 6.721433864580791e-05, "loss": 1.5767, "step": 2362 }, { "epoch": 0.3612459392317982, "grad_norm": 1.375, "learning_rate": 6.721012122439856e-05, "loss": 1.4812, "step": 2363 }, { "epoch": 0.36139881521115996, "grad_norm": 1.3671875, "learning_rate": 6.720590386833648e-05, "loss": 1.5168, "step": 2364 }, { "epoch": 0.36155169119052166, "grad_norm": 1.2265625, "learning_rate": 6.720168657763006e-05, "loss": 1.3358, "step": 2365 }, { "epoch": 0.3617045671698834, "grad_norm": 1.265625, "learning_rate": 6.719746935228772e-05, "loss": 1.6028, "step": 2366 }, { "epoch": 0.3618574431492452, "grad_norm": 1.265625, "learning_rate": 6.71932521923179e-05, "loss": 1.3013, "step": 2367 }, { "epoch": 0.3620103191286069, "grad_norm": 1.4375, "learning_rate": 6.718903509772892e-05, "loss": 1.3679, "step": 2368 }, { "epoch": 0.3621631951079687, "grad_norm": 1.5234375, "learning_rate": 6.718481806852923e-05, "loss": 1.6892, "step": 2369 }, { "epoch": 0.3623160710873304, "grad_norm": 1.34375, "learning_rate": 6.718060110472724e-05, "loss": 1.2638, "step": 2370 }, { "epoch": 0.36246894706669214, "grad_norm": 1.328125, "learning_rate": 6.717638420633135e-05, "loss": 1.2683, "step": 2371 }, { "epoch": 0.3626218230460539, "grad_norm": 1.3203125, "learning_rate": 6.717216737334995e-05, "loss": 1.3718, "step": 2372 }, { "epoch": 0.36277469902541565, "grad_norm": 1.3203125, "learning_rate": 6.716795060579149e-05, "loss": 1.4728, "step": 2373 }, { "epoch": 0.36292757500477735, "grad_norm": 1.5078125, "learning_rate": 6.716373390366432e-05, "loss": 1.4919, "step": 2374 }, { "epoch": 0.3630804509841391, "grad_norm": 1.3125, "learning_rate": 6.715951726697685e-05, "loss": 1.5172, "step": 2375 }, { "epoch": 0.36323332696350086, "grad_norm": 1.28125, "learning_rate": 6.71553006957375e-05, "loss": 1.4285, "step": 2376 }, { "epoch": 0.3633862029428626, "grad_norm": 1.546875, "learning_rate": 6.715108418995466e-05, "loss": 1.6219, "step": 2377 }, { "epoch": 0.36353907892222437, "grad_norm": 1.34375, "learning_rate": 6.714686774963678e-05, "loss": 1.4743, "step": 2378 }, { "epoch": 0.36369195490158607, "grad_norm": 1.34375, "learning_rate": 6.714265137479219e-05, "loss": 1.5527, "step": 2379 }, { "epoch": 0.3638448308809478, "grad_norm": 1.171875, "learning_rate": 6.713843506542934e-05, "loss": 1.301, "step": 2380 }, { "epoch": 0.3639977068603096, "grad_norm": 1.2734375, "learning_rate": 6.713421882155662e-05, "loss": 1.5351, "step": 2381 }, { "epoch": 0.36415058283967133, "grad_norm": 1.390625, "learning_rate": 6.713000264318242e-05, "loss": 1.4541, "step": 2382 }, { "epoch": 0.36430345881903303, "grad_norm": 1.3671875, "learning_rate": 6.712578653031518e-05, "loss": 1.2487, "step": 2383 }, { "epoch": 0.3644563347983948, "grad_norm": 1.1953125, "learning_rate": 6.712157048296326e-05, "loss": 1.1941, "step": 2384 }, { "epoch": 0.36460921077775654, "grad_norm": 1.484375, "learning_rate": 6.711735450113509e-05, "loss": 1.7405, "step": 2385 }, { "epoch": 0.3647620867571183, "grad_norm": 1.40625, "learning_rate": 6.711313858483903e-05, "loss": 1.5786, "step": 2386 }, { "epoch": 0.36491496273648005, "grad_norm": 1.4140625, "learning_rate": 6.710892273408355e-05, "loss": 1.3924, "step": 2387 }, { "epoch": 0.36506783871584175, "grad_norm": 1.3359375, "learning_rate": 6.710470694887704e-05, "loss": 1.3518, "step": 2388 }, { "epoch": 0.3652207146952035, "grad_norm": 1.296875, "learning_rate": 6.710049122922782e-05, "loss": 1.2511, "step": 2389 }, { "epoch": 0.36537359067456526, "grad_norm": 1.40625, "learning_rate": 6.709627557514438e-05, "loss": 1.653, "step": 2390 }, { "epoch": 0.365526466653927, "grad_norm": 1.390625, "learning_rate": 6.709205998663509e-05, "loss": 1.5921, "step": 2391 }, { "epoch": 0.3656793426332887, "grad_norm": 1.5078125, "learning_rate": 6.708784446370834e-05, "loss": 1.6291, "step": 2392 }, { "epoch": 0.3658322186126505, "grad_norm": 1.421875, "learning_rate": 6.708362900637258e-05, "loss": 1.4824, "step": 2393 }, { "epoch": 0.36598509459201223, "grad_norm": 1.3125, "learning_rate": 6.707941361463612e-05, "loss": 1.4519, "step": 2394 }, { "epoch": 0.366137970571374, "grad_norm": 1.234375, "learning_rate": 6.707519828850743e-05, "loss": 1.407, "step": 2395 }, { "epoch": 0.36629084655073574, "grad_norm": 1.3671875, "learning_rate": 6.707098302799491e-05, "loss": 1.5182, "step": 2396 }, { "epoch": 0.36644372253009744, "grad_norm": 1.34375, "learning_rate": 6.706676783310696e-05, "loss": 1.3512, "step": 2397 }, { "epoch": 0.3665965985094592, "grad_norm": 1.3671875, "learning_rate": 6.706255270385191e-05, "loss": 1.5243, "step": 2398 }, { "epoch": 0.36674947448882095, "grad_norm": 1.5078125, "learning_rate": 6.705833764023827e-05, "loss": 1.6083, "step": 2399 }, { "epoch": 0.3669023504681827, "grad_norm": 1.3359375, "learning_rate": 6.705412264227439e-05, "loss": 1.5374, "step": 2400 }, { "epoch": 0.3670552264475444, "grad_norm": 1.4765625, "learning_rate": 6.704990770996863e-05, "loss": 1.3506, "step": 2401 }, { "epoch": 0.36720810242690616, "grad_norm": 1.3203125, "learning_rate": 6.704569284332948e-05, "loss": 1.5148, "step": 2402 }, { "epoch": 0.3673609784062679, "grad_norm": 1.34375, "learning_rate": 6.704147804236521e-05, "loss": 1.3194, "step": 2403 }, { "epoch": 0.36751385438562967, "grad_norm": 1.3359375, "learning_rate": 6.703726330708438e-05, "loss": 1.5201, "step": 2404 }, { "epoch": 0.3676667303649914, "grad_norm": 1.4609375, "learning_rate": 6.703304863749524e-05, "loss": 1.4678, "step": 2405 }, { "epoch": 0.3678196063443531, "grad_norm": 1.3671875, "learning_rate": 6.70288340336063e-05, "loss": 1.3661, "step": 2406 }, { "epoch": 0.3679724823237149, "grad_norm": 1.375, "learning_rate": 6.702461949542592e-05, "loss": 1.5033, "step": 2407 }, { "epoch": 0.36812535830307663, "grad_norm": 1.125, "learning_rate": 6.702040502296245e-05, "loss": 1.2275, "step": 2408 }, { "epoch": 0.3682782342824384, "grad_norm": 1.5703125, "learning_rate": 6.701619061622437e-05, "loss": 1.5858, "step": 2409 }, { "epoch": 0.3684311102618001, "grad_norm": 1.6484375, "learning_rate": 6.701197627522002e-05, "loss": 1.6184, "step": 2410 }, { "epoch": 0.36858398624116184, "grad_norm": 1.4140625, "learning_rate": 6.700776199995785e-05, "loss": 1.5201, "step": 2411 }, { "epoch": 0.3687368622205236, "grad_norm": 1.3671875, "learning_rate": 6.700354779044619e-05, "loss": 1.4586, "step": 2412 }, { "epoch": 0.36888973819988535, "grad_norm": 1.3671875, "learning_rate": 6.699933364669353e-05, "loss": 1.4562, "step": 2413 }, { "epoch": 0.3690426141792471, "grad_norm": 1.4921875, "learning_rate": 6.699511956870818e-05, "loss": 1.5226, "step": 2414 }, { "epoch": 0.3691954901586088, "grad_norm": 1.359375, "learning_rate": 6.699090555649857e-05, "loss": 1.411, "step": 2415 }, { "epoch": 0.36934836613797056, "grad_norm": 1.3203125, "learning_rate": 6.698669161007312e-05, "loss": 1.262, "step": 2416 }, { "epoch": 0.3695012421173323, "grad_norm": 1.328125, "learning_rate": 6.69824777294402e-05, "loss": 1.3264, "step": 2417 }, { "epoch": 0.3696541180966941, "grad_norm": 1.3125, "learning_rate": 6.697826391460826e-05, "loss": 1.4219, "step": 2418 }, { "epoch": 0.3698069940760558, "grad_norm": 1.1875, "learning_rate": 6.69740501655856e-05, "loss": 1.1849, "step": 2419 }, { "epoch": 0.36995987005541753, "grad_norm": 1.3671875, "learning_rate": 6.69698364823807e-05, "loss": 1.5873, "step": 2420 }, { "epoch": 0.3701127460347793, "grad_norm": 1.3828125, "learning_rate": 6.696562286500195e-05, "loss": 1.3944, "step": 2421 }, { "epoch": 0.37026562201414104, "grad_norm": 1.2890625, "learning_rate": 6.696140931345769e-05, "loss": 1.2951, "step": 2422 }, { "epoch": 0.3704184979935028, "grad_norm": 1.21875, "learning_rate": 6.695719582775641e-05, "loss": 1.4864, "step": 2423 }, { "epoch": 0.3705713739728645, "grad_norm": 1.390625, "learning_rate": 6.695298240790641e-05, "loss": 1.5142, "step": 2424 }, { "epoch": 0.37072424995222625, "grad_norm": 1.4453125, "learning_rate": 6.694876905391615e-05, "loss": 1.4225, "step": 2425 }, { "epoch": 0.370877125931588, "grad_norm": 1.3515625, "learning_rate": 6.694455576579404e-05, "loss": 1.5648, "step": 2426 }, { "epoch": 0.37103000191094976, "grad_norm": 1.4140625, "learning_rate": 6.694034254354836e-05, "loss": 1.8622, "step": 2427 }, { "epoch": 0.37118287789031146, "grad_norm": 1.2578125, "learning_rate": 6.693612938718766e-05, "loss": 1.4057, "step": 2428 }, { "epoch": 0.3713357538696732, "grad_norm": 1.359375, "learning_rate": 6.693191629672023e-05, "loss": 1.3941, "step": 2429 }, { "epoch": 0.37148862984903497, "grad_norm": 1.3125, "learning_rate": 6.692770327215452e-05, "loss": 1.4376, "step": 2430 }, { "epoch": 0.3716415058283967, "grad_norm": 1.3671875, "learning_rate": 6.692349031349889e-05, "loss": 1.5512, "step": 2431 }, { "epoch": 0.3717943818077585, "grad_norm": 1.3203125, "learning_rate": 6.691927742076178e-05, "loss": 1.415, "step": 2432 }, { "epoch": 0.3719472577871202, "grad_norm": 1.2890625, "learning_rate": 6.69150645939516e-05, "loss": 1.2591, "step": 2433 }, { "epoch": 0.37210013376648193, "grad_norm": 1.359375, "learning_rate": 6.691085183307663e-05, "loss": 1.3841, "step": 2434 }, { "epoch": 0.3722530097458437, "grad_norm": 1.265625, "learning_rate": 6.690663913814539e-05, "loss": 1.3184, "step": 2435 }, { "epoch": 0.37240588572520544, "grad_norm": 1.4765625, "learning_rate": 6.69024265091662e-05, "loss": 1.4955, "step": 2436 }, { "epoch": 0.37255876170456714, "grad_norm": 1.296875, "learning_rate": 6.689821394614752e-05, "loss": 1.3643, "step": 2437 }, { "epoch": 0.3727116376839289, "grad_norm": 1.328125, "learning_rate": 6.689400144909766e-05, "loss": 1.4346, "step": 2438 }, { "epoch": 0.37286451366329065, "grad_norm": 1.4375, "learning_rate": 6.688978901802513e-05, "loss": 1.6561, "step": 2439 }, { "epoch": 0.3730173896426524, "grad_norm": 1.3828125, "learning_rate": 6.688557665293823e-05, "loss": 1.5386, "step": 2440 }, { "epoch": 0.37317026562201416, "grad_norm": 1.2421875, "learning_rate": 6.688136435384535e-05, "loss": 1.3854, "step": 2441 }, { "epoch": 0.37332314160137586, "grad_norm": 1.25, "learning_rate": 6.687715212075499e-05, "loss": 1.3854, "step": 2442 }, { "epoch": 0.3734760175807376, "grad_norm": 1.296875, "learning_rate": 6.687293995367539e-05, "loss": 1.4617, "step": 2443 }, { "epoch": 0.3736288935600994, "grad_norm": 1.390625, "learning_rate": 6.68687278526151e-05, "loss": 1.4967, "step": 2444 }, { "epoch": 0.37378176953946113, "grad_norm": 1.34375, "learning_rate": 6.68645158175824e-05, "loss": 1.3731, "step": 2445 }, { "epoch": 0.37393464551882283, "grad_norm": 1.21875, "learning_rate": 6.686030384858574e-05, "loss": 1.0517, "step": 2446 }, { "epoch": 0.3740875214981846, "grad_norm": 1.265625, "learning_rate": 6.685609194563353e-05, "loss": 1.3133, "step": 2447 }, { "epoch": 0.37424039747754634, "grad_norm": 1.2734375, "learning_rate": 6.685188010873409e-05, "loss": 1.3909, "step": 2448 }, { "epoch": 0.3743932734569081, "grad_norm": 1.4921875, "learning_rate": 6.684766833789588e-05, "loss": 1.4041, "step": 2449 }, { "epoch": 0.37454614943626985, "grad_norm": 1.359375, "learning_rate": 6.684345663312725e-05, "loss": 1.2984, "step": 2450 }, { "epoch": 0.37469902541563155, "grad_norm": 1.3125, "learning_rate": 6.683924499443665e-05, "loss": 1.4966, "step": 2451 }, { "epoch": 0.3748519013949933, "grad_norm": 1.328125, "learning_rate": 6.683503342183239e-05, "loss": 1.4401, "step": 2452 }, { "epoch": 0.37500477737435506, "grad_norm": 1.359375, "learning_rate": 6.683082191532299e-05, "loss": 1.5122, "step": 2453 }, { "epoch": 0.3751576533537168, "grad_norm": 1.359375, "learning_rate": 6.682661047491671e-05, "loss": 1.5003, "step": 2454 }, { "epoch": 0.3753105293330785, "grad_norm": 1.3515625, "learning_rate": 6.682239910062201e-05, "loss": 1.6376, "step": 2455 }, { "epoch": 0.37546340531244027, "grad_norm": 1.359375, "learning_rate": 6.681818779244727e-05, "loss": 1.251, "step": 2456 }, { "epoch": 0.375616281291802, "grad_norm": 1.328125, "learning_rate": 6.681397655040087e-05, "loss": 1.5418, "step": 2457 }, { "epoch": 0.3757691572711638, "grad_norm": 1.3359375, "learning_rate": 6.680976537449126e-05, "loss": 1.7143, "step": 2458 }, { "epoch": 0.37592203325052553, "grad_norm": 1.359375, "learning_rate": 6.680555426472676e-05, "loss": 1.5163, "step": 2459 }, { "epoch": 0.37607490922988723, "grad_norm": 1.4765625, "learning_rate": 6.680134322111578e-05, "loss": 1.6316, "step": 2460 }, { "epoch": 0.376227785209249, "grad_norm": 1.359375, "learning_rate": 6.679713224366675e-05, "loss": 1.388, "step": 2461 }, { "epoch": 0.37638066118861074, "grad_norm": 1.3984375, "learning_rate": 6.679292133238802e-05, "loss": 1.6006, "step": 2462 }, { "epoch": 0.3765335371679725, "grad_norm": 1.3359375, "learning_rate": 6.6788710487288e-05, "loss": 1.3725, "step": 2463 }, { "epoch": 0.3766864131473342, "grad_norm": 1.359375, "learning_rate": 6.678449970837508e-05, "loss": 1.3471, "step": 2464 }, { "epoch": 0.37683928912669595, "grad_norm": 1.2421875, "learning_rate": 6.678028899565767e-05, "loss": 1.3297, "step": 2465 }, { "epoch": 0.3769921651060577, "grad_norm": 1.28125, "learning_rate": 6.677607834914412e-05, "loss": 1.422, "step": 2466 }, { "epoch": 0.37714504108541946, "grad_norm": 1.25, "learning_rate": 6.677186776884284e-05, "loss": 1.3144, "step": 2467 }, { "epoch": 0.3772979170647812, "grad_norm": 1.3515625, "learning_rate": 6.676765725476226e-05, "loss": 1.7186, "step": 2468 }, { "epoch": 0.3774507930441429, "grad_norm": 1.4453125, "learning_rate": 6.676344680691071e-05, "loss": 1.3951, "step": 2469 }, { "epoch": 0.3776036690235047, "grad_norm": 1.5390625, "learning_rate": 6.675923642529662e-05, "loss": 1.5363, "step": 2470 }, { "epoch": 0.37775654500286643, "grad_norm": 1.40625, "learning_rate": 6.675502610992834e-05, "loss": 1.4049, "step": 2471 }, { "epoch": 0.3779094209822282, "grad_norm": 1.453125, "learning_rate": 6.675081586081433e-05, "loss": 1.5189, "step": 2472 }, { "epoch": 0.3780622969615899, "grad_norm": 1.2109375, "learning_rate": 6.674660567796293e-05, "loss": 1.1768, "step": 2473 }, { "epoch": 0.37821517294095164, "grad_norm": 1.484375, "learning_rate": 6.674239556138254e-05, "loss": 1.4859, "step": 2474 }, { "epoch": 0.3783680489203134, "grad_norm": 1.4140625, "learning_rate": 6.673818551108155e-05, "loss": 1.468, "step": 2475 }, { "epoch": 0.37852092489967515, "grad_norm": 1.4765625, "learning_rate": 6.673397552706833e-05, "loss": 1.4183, "step": 2476 }, { "epoch": 0.3786738008790369, "grad_norm": 1.2421875, "learning_rate": 6.672976560935131e-05, "loss": 1.259, "step": 2477 }, { "epoch": 0.3788266768583986, "grad_norm": 1.515625, "learning_rate": 6.672555575793885e-05, "loss": 1.7106, "step": 2478 }, { "epoch": 0.37897955283776036, "grad_norm": 1.4375, "learning_rate": 6.67213459728394e-05, "loss": 1.4, "step": 2479 }, { "epoch": 0.3791324288171221, "grad_norm": 1.265625, "learning_rate": 6.671713625406126e-05, "loss": 1.3027, "step": 2480 }, { "epoch": 0.37928530479648387, "grad_norm": 1.3515625, "learning_rate": 6.671292660161286e-05, "loss": 1.5308, "step": 2481 }, { "epoch": 0.37943818077584557, "grad_norm": 1.40625, "learning_rate": 6.670871701550261e-05, "loss": 1.5171, "step": 2482 }, { "epoch": 0.3795910567552073, "grad_norm": 1.390625, "learning_rate": 6.670450749573884e-05, "loss": 1.5184, "step": 2483 }, { "epoch": 0.3797439327345691, "grad_norm": 1.4140625, "learning_rate": 6.670029804233003e-05, "loss": 1.2497, "step": 2484 }, { "epoch": 0.37989680871393083, "grad_norm": 1.3046875, "learning_rate": 6.669608865528449e-05, "loss": 1.4082, "step": 2485 }, { "epoch": 0.3800496846932926, "grad_norm": 1.40625, "learning_rate": 6.669187933461064e-05, "loss": 1.5547, "step": 2486 }, { "epoch": 0.3802025606726543, "grad_norm": 1.4609375, "learning_rate": 6.668767008031688e-05, "loss": 1.2935, "step": 2487 }, { "epoch": 0.38035543665201604, "grad_norm": 1.5703125, "learning_rate": 6.668346089241155e-05, "loss": 1.3122, "step": 2488 }, { "epoch": 0.3805083126313778, "grad_norm": 1.5546875, "learning_rate": 6.66792517709031e-05, "loss": 1.5845, "step": 2489 }, { "epoch": 0.38066118861073955, "grad_norm": 1.4296875, "learning_rate": 6.667504271579987e-05, "loss": 1.4901, "step": 2490 }, { "epoch": 0.38081406459010125, "grad_norm": 1.484375, "learning_rate": 6.66708337271103e-05, "loss": 1.473, "step": 2491 }, { "epoch": 0.380966940569463, "grad_norm": 1.328125, "learning_rate": 6.666662480484275e-05, "loss": 1.4374, "step": 2492 }, { "epoch": 0.38111981654882476, "grad_norm": 1.3828125, "learning_rate": 6.666241594900558e-05, "loss": 1.7417, "step": 2493 }, { "epoch": 0.3812726925281865, "grad_norm": 1.2734375, "learning_rate": 6.665820715960722e-05, "loss": 1.3665, "step": 2494 }, { "epoch": 0.3814255685075483, "grad_norm": 1.328125, "learning_rate": 6.665399843665601e-05, "loss": 1.2936, "step": 2495 }, { "epoch": 0.38157844448691, "grad_norm": 1.625, "learning_rate": 6.66497897801604e-05, "loss": 1.5983, "step": 2496 }, { "epoch": 0.38173132046627173, "grad_norm": 1.3828125, "learning_rate": 6.664558119012871e-05, "loss": 1.565, "step": 2497 }, { "epoch": 0.3818841964456335, "grad_norm": 1.328125, "learning_rate": 6.664137266656943e-05, "loss": 1.5306, "step": 2498 }, { "epoch": 0.38203707242499524, "grad_norm": 1.3203125, "learning_rate": 6.663716420949085e-05, "loss": 1.5282, "step": 2499 }, { "epoch": 0.38218994840435694, "grad_norm": 1.171875, "learning_rate": 6.663295581890137e-05, "loss": 1.1983, "step": 2500 }, { "epoch": 0.3823428243837187, "grad_norm": 1.6171875, "learning_rate": 6.66287474948094e-05, "loss": 1.2734, "step": 2501 }, { "epoch": 0.38249570036308045, "grad_norm": 1.296875, "learning_rate": 6.662453923722332e-05, "loss": 1.2736, "step": 2502 }, { "epoch": 0.3826485763424422, "grad_norm": 1.4609375, "learning_rate": 6.662033104615155e-05, "loss": 1.3431, "step": 2503 }, { "epoch": 0.38280145232180396, "grad_norm": 1.484375, "learning_rate": 6.661612292160241e-05, "loss": 1.5416, "step": 2504 }, { "epoch": 0.38295432830116566, "grad_norm": 1.5078125, "learning_rate": 6.661191486358433e-05, "loss": 1.5642, "step": 2505 }, { "epoch": 0.3831072042805274, "grad_norm": 1.2578125, "learning_rate": 6.66077068721057e-05, "loss": 1.3036, "step": 2506 }, { "epoch": 0.38326008025988917, "grad_norm": 1.28125, "learning_rate": 6.660349894717488e-05, "loss": 1.55, "step": 2507 }, { "epoch": 0.3834129562392509, "grad_norm": 1.421875, "learning_rate": 6.65992910888003e-05, "loss": 1.3896, "step": 2508 }, { "epoch": 0.3835658322186126, "grad_norm": 1.3203125, "learning_rate": 6.659508329699027e-05, "loss": 1.5123, "step": 2509 }, { "epoch": 0.3837187081979744, "grad_norm": 1.40625, "learning_rate": 6.659087557175325e-05, "loss": 1.5339, "step": 2510 }, { "epoch": 0.38387158417733613, "grad_norm": 1.46875, "learning_rate": 6.658666791309758e-05, "loss": 1.4827, "step": 2511 }, { "epoch": 0.3840244601566979, "grad_norm": 1.3359375, "learning_rate": 6.658246032103167e-05, "loss": 1.4618, "step": 2512 }, { "epoch": 0.38417733613605964, "grad_norm": 1.4609375, "learning_rate": 6.657825279556394e-05, "loss": 1.5952, "step": 2513 }, { "epoch": 0.38433021211542134, "grad_norm": 1.3984375, "learning_rate": 6.657404533670266e-05, "loss": 1.5184, "step": 2514 }, { "epoch": 0.3844830880947831, "grad_norm": 1.3359375, "learning_rate": 6.656983794445633e-05, "loss": 1.349, "step": 2515 }, { "epoch": 0.38463596407414485, "grad_norm": 1.3671875, "learning_rate": 6.656563061883327e-05, "loss": 1.3705, "step": 2516 }, { "epoch": 0.3847888400535066, "grad_norm": 1.4375, "learning_rate": 6.65614233598419e-05, "loss": 1.479, "step": 2517 }, { "epoch": 0.3849417160328683, "grad_norm": 1.5, "learning_rate": 6.655721616749063e-05, "loss": 1.7473, "step": 2518 }, { "epoch": 0.38509459201223006, "grad_norm": 1.4140625, "learning_rate": 6.655300904178776e-05, "loss": 1.665, "step": 2519 }, { "epoch": 0.3852474679915918, "grad_norm": 1.40625, "learning_rate": 6.654880198274173e-05, "loss": 1.3896, "step": 2520 }, { "epoch": 0.3854003439709536, "grad_norm": 1.1953125, "learning_rate": 6.654459499036091e-05, "loss": 1.2395, "step": 2521 }, { "epoch": 0.38555321995031533, "grad_norm": 1.6640625, "learning_rate": 6.654038806465373e-05, "loss": 1.5704, "step": 2522 }, { "epoch": 0.38570609592967703, "grad_norm": 1.234375, "learning_rate": 6.653618120562847e-05, "loss": 1.452, "step": 2523 }, { "epoch": 0.3858589719090388, "grad_norm": 1.3125, "learning_rate": 6.653197441329364e-05, "loss": 1.4756, "step": 2524 }, { "epoch": 0.38601184788840054, "grad_norm": 1.3515625, "learning_rate": 6.652776768765754e-05, "loss": 1.2813, "step": 2525 }, { "epoch": 0.3861647238677623, "grad_norm": 1.3359375, "learning_rate": 6.652356102872856e-05, "loss": 1.2626, "step": 2526 }, { "epoch": 0.386317599847124, "grad_norm": 1.15625, "learning_rate": 6.651935443651512e-05, "loss": 1.0679, "step": 2527 }, { "epoch": 0.38647047582648575, "grad_norm": 1.421875, "learning_rate": 6.651514791102555e-05, "loss": 1.28, "step": 2528 }, { "epoch": 0.3866233518058475, "grad_norm": 1.484375, "learning_rate": 6.65109414522683e-05, "loss": 1.4155, "step": 2529 }, { "epoch": 0.38677622778520926, "grad_norm": 1.2578125, "learning_rate": 6.65067350602517e-05, "loss": 1.2256, "step": 2530 }, { "epoch": 0.386929103764571, "grad_norm": 1.4375, "learning_rate": 6.650252873498416e-05, "loss": 1.3293, "step": 2531 }, { "epoch": 0.3870819797439327, "grad_norm": 1.265625, "learning_rate": 6.649832247647407e-05, "loss": 1.2571, "step": 2532 }, { "epoch": 0.38723485572329447, "grad_norm": 1.296875, "learning_rate": 6.649411628472977e-05, "loss": 1.3759, "step": 2533 }, { "epoch": 0.3873877317026562, "grad_norm": 1.265625, "learning_rate": 6.648991015975969e-05, "loss": 1.3263, "step": 2534 }, { "epoch": 0.387540607682018, "grad_norm": 1.2421875, "learning_rate": 6.648570410157216e-05, "loss": 1.15, "step": 2535 }, { "epoch": 0.3876934836613797, "grad_norm": 1.2578125, "learning_rate": 6.648149811017563e-05, "loss": 1.3411, "step": 2536 }, { "epoch": 0.38784635964074143, "grad_norm": 1.3828125, "learning_rate": 6.64772921855784e-05, "loss": 1.2332, "step": 2537 }, { "epoch": 0.3879992356201032, "grad_norm": 1.2734375, "learning_rate": 6.647308632778896e-05, "loss": 1.3868, "step": 2538 }, { "epoch": 0.38815211159946494, "grad_norm": 1.328125, "learning_rate": 6.646888053681561e-05, "loss": 1.3153, "step": 2539 }, { "epoch": 0.3883049875788267, "grad_norm": 1.0859375, "learning_rate": 6.646467481266673e-05, "loss": 0.9907, "step": 2540 }, { "epoch": 0.3884578635581884, "grad_norm": 1.5, "learning_rate": 6.646046915535074e-05, "loss": 1.334, "step": 2541 }, { "epoch": 0.38861073953755015, "grad_norm": 1.390625, "learning_rate": 6.645626356487598e-05, "loss": 1.5064, "step": 2542 }, { "epoch": 0.3887636155169119, "grad_norm": 1.296875, "learning_rate": 6.64520580412509e-05, "loss": 1.4652, "step": 2543 }, { "epoch": 0.38891649149627366, "grad_norm": 1.296875, "learning_rate": 6.64478525844838e-05, "loss": 1.2713, "step": 2544 }, { "epoch": 0.38906936747563536, "grad_norm": 1.3203125, "learning_rate": 6.644364719458314e-05, "loss": 1.2917, "step": 2545 }, { "epoch": 0.3892222434549971, "grad_norm": 1.234375, "learning_rate": 6.643944187155724e-05, "loss": 1.361, "step": 2546 }, { "epoch": 0.3893751194343589, "grad_norm": 1.3046875, "learning_rate": 6.643523661541447e-05, "loss": 1.4169, "step": 2547 }, { "epoch": 0.38952799541372063, "grad_norm": 1.328125, "learning_rate": 6.64310314261633e-05, "loss": 1.5073, "step": 2548 }, { "epoch": 0.3896808713930824, "grad_norm": 1.453125, "learning_rate": 6.642682630381201e-05, "loss": 1.4812, "step": 2549 }, { "epoch": 0.3898337473724441, "grad_norm": 1.4609375, "learning_rate": 6.642262124836903e-05, "loss": 1.4057, "step": 2550 }, { "epoch": 0.38998662335180584, "grad_norm": 1.453125, "learning_rate": 6.641841625984277e-05, "loss": 1.4331, "step": 2551 }, { "epoch": 0.3901394993311676, "grad_norm": 1.3515625, "learning_rate": 6.64142113382415e-05, "loss": 1.3267, "step": 2552 }, { "epoch": 0.39029237531052935, "grad_norm": 1.3984375, "learning_rate": 6.641000648357374e-05, "loss": 1.3518, "step": 2553 }, { "epoch": 0.39044525128989105, "grad_norm": 1.46875, "learning_rate": 6.640580169584775e-05, "loss": 1.6391, "step": 2554 }, { "epoch": 0.3905981272692528, "grad_norm": 1.296875, "learning_rate": 6.6401596975072e-05, "loss": 1.3096, "step": 2555 }, { "epoch": 0.39075100324861456, "grad_norm": 1.3984375, "learning_rate": 6.639739232125481e-05, "loss": 1.5711, "step": 2556 }, { "epoch": 0.3909038792279763, "grad_norm": 1.390625, "learning_rate": 6.63931877344046e-05, "loss": 1.6962, "step": 2557 }, { "epoch": 0.39105675520733807, "grad_norm": 1.3828125, "learning_rate": 6.638898321452973e-05, "loss": 1.3163, "step": 2558 }, { "epoch": 0.39120963118669977, "grad_norm": 1.4609375, "learning_rate": 6.638477876163856e-05, "loss": 1.651, "step": 2559 }, { "epoch": 0.3913625071660615, "grad_norm": 1.3984375, "learning_rate": 6.638057437573951e-05, "loss": 1.3396, "step": 2560 }, { "epoch": 0.3915153831454233, "grad_norm": 1.3203125, "learning_rate": 6.637637005684091e-05, "loss": 1.3618, "step": 2561 }, { "epoch": 0.39166825912478503, "grad_norm": 1.4375, "learning_rate": 6.637216580495122e-05, "loss": 1.4646, "step": 2562 }, { "epoch": 0.39182113510414673, "grad_norm": 1.2890625, "learning_rate": 6.63679616200787e-05, "loss": 1.2836, "step": 2563 }, { "epoch": 0.3919740110835085, "grad_norm": 1.3515625, "learning_rate": 6.636375750223185e-05, "loss": 1.4206, "step": 2564 }, { "epoch": 0.39212688706287024, "grad_norm": 1.453125, "learning_rate": 6.635955345141898e-05, "loss": 1.6436, "step": 2565 }, { "epoch": 0.392279763042232, "grad_norm": 1.3359375, "learning_rate": 6.635534946764845e-05, "loss": 1.4546, "step": 2566 }, { "epoch": 0.39243263902159375, "grad_norm": 1.3359375, "learning_rate": 6.635114555092873e-05, "loss": 1.41, "step": 2567 }, { "epoch": 0.39258551500095545, "grad_norm": 1.515625, "learning_rate": 6.634694170126806e-05, "loss": 1.4378, "step": 2568 }, { "epoch": 0.3927383909803172, "grad_norm": 1.4296875, "learning_rate": 6.634273791867496e-05, "loss": 1.3193, "step": 2569 }, { "epoch": 0.39289126695967896, "grad_norm": 1.4453125, "learning_rate": 6.63385342031577e-05, "loss": 1.4676, "step": 2570 }, { "epoch": 0.3930441429390407, "grad_norm": 1.546875, "learning_rate": 6.633433055472474e-05, "loss": 1.4883, "step": 2571 }, { "epoch": 0.3931970189184024, "grad_norm": 1.21875, "learning_rate": 6.633012697338441e-05, "loss": 1.3672, "step": 2572 }, { "epoch": 0.3933498948977642, "grad_norm": 1.40625, "learning_rate": 6.632592345914506e-05, "loss": 1.42, "step": 2573 }, { "epoch": 0.39350277087712593, "grad_norm": 1.390625, "learning_rate": 6.632172001201513e-05, "loss": 1.4726, "step": 2574 }, { "epoch": 0.3936556468564877, "grad_norm": 1.375, "learning_rate": 6.631751663200296e-05, "loss": 1.3826, "step": 2575 }, { "epoch": 0.39380852283584944, "grad_norm": 1.28125, "learning_rate": 6.631331331911695e-05, "loss": 1.3742, "step": 2576 }, { "epoch": 0.39396139881521114, "grad_norm": 1.3203125, "learning_rate": 6.630911007336543e-05, "loss": 1.4844, "step": 2577 }, { "epoch": 0.3941142747945729, "grad_norm": 1.1640625, "learning_rate": 6.630490689475686e-05, "loss": 1.2451, "step": 2578 }, { "epoch": 0.39426715077393465, "grad_norm": 1.234375, "learning_rate": 6.630070378329955e-05, "loss": 1.2993, "step": 2579 }, { "epoch": 0.3944200267532964, "grad_norm": 1.3359375, "learning_rate": 6.629650073900188e-05, "loss": 1.2945, "step": 2580 }, { "epoch": 0.3945729027326581, "grad_norm": 1.4296875, "learning_rate": 6.629229776187224e-05, "loss": 1.6979, "step": 2581 }, { "epoch": 0.39472577871201986, "grad_norm": 1.21875, "learning_rate": 6.6288094851919e-05, "loss": 1.4987, "step": 2582 }, { "epoch": 0.3948786546913816, "grad_norm": 1.3125, "learning_rate": 6.628389200915059e-05, "loss": 1.4767, "step": 2583 }, { "epoch": 0.39503153067074337, "grad_norm": 1.4921875, "learning_rate": 6.62796892335753e-05, "loss": 1.4536, "step": 2584 }, { "epoch": 0.3951844066501051, "grad_norm": 1.4453125, "learning_rate": 6.627548652520153e-05, "loss": 1.4213, "step": 2585 }, { "epoch": 0.3953372826294668, "grad_norm": 1.3828125, "learning_rate": 6.62712838840377e-05, "loss": 1.6423, "step": 2586 }, { "epoch": 0.3954901586088286, "grad_norm": 1.5078125, "learning_rate": 6.626708131009212e-05, "loss": 1.4303, "step": 2587 }, { "epoch": 0.39564303458819033, "grad_norm": 1.359375, "learning_rate": 6.626287880337324e-05, "loss": 1.6162, "step": 2588 }, { "epoch": 0.3957959105675521, "grad_norm": 1.28125, "learning_rate": 6.625867636388935e-05, "loss": 1.4618, "step": 2589 }, { "epoch": 0.3959487865469138, "grad_norm": 1.421875, "learning_rate": 6.62544739916489e-05, "loss": 1.6134, "step": 2590 }, { "epoch": 0.39610166252627554, "grad_norm": 1.3125, "learning_rate": 6.625027168666023e-05, "loss": 1.3467, "step": 2591 }, { "epoch": 0.3962545385056373, "grad_norm": 1.40625, "learning_rate": 6.62460694489317e-05, "loss": 1.343, "step": 2592 }, { "epoch": 0.39640741448499905, "grad_norm": 1.3671875, "learning_rate": 6.624186727847174e-05, "loss": 1.463, "step": 2593 }, { "epoch": 0.3965602904643608, "grad_norm": 1.3203125, "learning_rate": 6.623766517528867e-05, "loss": 1.3832, "step": 2594 }, { "epoch": 0.3967131664437225, "grad_norm": 1.421875, "learning_rate": 6.62334631393909e-05, "loss": 1.6537, "step": 2595 }, { "epoch": 0.39686604242308426, "grad_norm": 1.5, "learning_rate": 6.622926117078674e-05, "loss": 1.849, "step": 2596 }, { "epoch": 0.397018918402446, "grad_norm": 1.328125, "learning_rate": 6.622505926948466e-05, "loss": 1.3225, "step": 2597 }, { "epoch": 0.3971717943818078, "grad_norm": 1.375, "learning_rate": 6.6220857435493e-05, "loss": 1.4767, "step": 2598 }, { "epoch": 0.3973246703611695, "grad_norm": 1.25, "learning_rate": 6.621665566882008e-05, "loss": 1.1008, "step": 2599 }, { "epoch": 0.39747754634053123, "grad_norm": 1.34375, "learning_rate": 6.621245396947433e-05, "loss": 1.3228, "step": 2600 }, { "epoch": 0.397630422319893, "grad_norm": 1.390625, "learning_rate": 6.620825233746409e-05, "loss": 1.4419, "step": 2601 }, { "epoch": 0.39778329829925474, "grad_norm": 1.296875, "learning_rate": 6.620405077279776e-05, "loss": 1.3603, "step": 2602 }, { "epoch": 0.3979361742786165, "grad_norm": 1.3828125, "learning_rate": 6.619984927548369e-05, "loss": 1.6713, "step": 2603 }, { "epoch": 0.3980890502579782, "grad_norm": 1.46875, "learning_rate": 6.619564784553031e-05, "loss": 1.8594, "step": 2604 }, { "epoch": 0.39824192623733995, "grad_norm": 1.484375, "learning_rate": 6.619144648294593e-05, "loss": 1.5318, "step": 2605 }, { "epoch": 0.3983948022167017, "grad_norm": 1.453125, "learning_rate": 6.618724518773891e-05, "loss": 1.6837, "step": 2606 }, { "epoch": 0.39854767819606346, "grad_norm": 1.28125, "learning_rate": 6.618304395991772e-05, "loss": 1.386, "step": 2607 }, { "epoch": 0.39870055417542516, "grad_norm": 1.3984375, "learning_rate": 6.617884279949059e-05, "loss": 1.4886, "step": 2608 }, { "epoch": 0.3988534301547869, "grad_norm": 1.5, "learning_rate": 6.617464170646605e-05, "loss": 1.5508, "step": 2609 }, { "epoch": 0.39900630613414867, "grad_norm": 1.265625, "learning_rate": 6.617044068085236e-05, "loss": 1.312, "step": 2610 }, { "epoch": 0.3991591821135104, "grad_norm": 1.375, "learning_rate": 6.616623972265789e-05, "loss": 1.5203, "step": 2611 }, { "epoch": 0.3993120580928722, "grad_norm": 1.625, "learning_rate": 6.616203883189112e-05, "loss": 1.6588, "step": 2612 }, { "epoch": 0.3994649340722339, "grad_norm": 1.375, "learning_rate": 6.61578380085603e-05, "loss": 1.3462, "step": 2613 }, { "epoch": 0.39961781005159563, "grad_norm": 1.328125, "learning_rate": 6.615363725267388e-05, "loss": 1.2965, "step": 2614 }, { "epoch": 0.3997706860309574, "grad_norm": 1.3046875, "learning_rate": 6.614943656424015e-05, "loss": 1.534, "step": 2615 }, { "epoch": 0.39992356201031914, "grad_norm": 1.3671875, "learning_rate": 6.614523594326758e-05, "loss": 1.3846, "step": 2616 }, { "epoch": 0.40007643798968084, "grad_norm": 1.484375, "learning_rate": 6.614103538976451e-05, "loss": 1.7665, "step": 2617 }, { "epoch": 0.4002293139690426, "grad_norm": 1.3515625, "learning_rate": 6.613683490373928e-05, "loss": 1.6331, "step": 2618 }, { "epoch": 0.40038218994840435, "grad_norm": 1.296875, "learning_rate": 6.613263448520027e-05, "loss": 1.2409, "step": 2619 }, { "epoch": 0.4005350659277661, "grad_norm": 1.359375, "learning_rate": 6.612843413415586e-05, "loss": 1.3577, "step": 2620 }, { "epoch": 0.40068794190712786, "grad_norm": 1.28125, "learning_rate": 6.612423385061443e-05, "loss": 1.3395, "step": 2621 }, { "epoch": 0.40084081788648956, "grad_norm": 1.53125, "learning_rate": 6.612003363458432e-05, "loss": 1.6573, "step": 2622 }, { "epoch": 0.4009936938658513, "grad_norm": 1.4140625, "learning_rate": 6.611583348607397e-05, "loss": 1.4436, "step": 2623 }, { "epoch": 0.4011465698452131, "grad_norm": 1.5078125, "learning_rate": 6.611163340509167e-05, "loss": 1.2495, "step": 2624 }, { "epoch": 0.40129944582457483, "grad_norm": 1.40625, "learning_rate": 6.610743339164582e-05, "loss": 1.3666, "step": 2625 }, { "epoch": 0.40145232180393653, "grad_norm": 1.3203125, "learning_rate": 6.61032334457448e-05, "loss": 1.6099, "step": 2626 }, { "epoch": 0.4016051977832983, "grad_norm": 1.4765625, "learning_rate": 6.609903356739697e-05, "loss": 1.6725, "step": 2627 }, { "epoch": 0.40175807376266004, "grad_norm": 1.2734375, "learning_rate": 6.609483375661073e-05, "loss": 1.2779, "step": 2628 }, { "epoch": 0.4019109497420218, "grad_norm": 1.234375, "learning_rate": 6.60906340133944e-05, "loss": 1.255, "step": 2629 }, { "epoch": 0.40206382572138355, "grad_norm": 1.4765625, "learning_rate": 6.608643433775637e-05, "loss": 1.7146, "step": 2630 }, { "epoch": 0.40221670170074525, "grad_norm": 1.3671875, "learning_rate": 6.608223472970503e-05, "loss": 1.4532, "step": 2631 }, { "epoch": 0.402369577680107, "grad_norm": 1.375, "learning_rate": 6.60780351892487e-05, "loss": 1.6233, "step": 2632 }, { "epoch": 0.40252245365946876, "grad_norm": 1.40625, "learning_rate": 6.607383571639583e-05, "loss": 1.5305, "step": 2633 }, { "epoch": 0.4026753296388305, "grad_norm": 1.328125, "learning_rate": 6.606963631115471e-05, "loss": 1.3889, "step": 2634 }, { "epoch": 0.4028282056181922, "grad_norm": 1.3671875, "learning_rate": 6.606543697353374e-05, "loss": 1.3714, "step": 2635 }, { "epoch": 0.40298108159755397, "grad_norm": 1.3984375, "learning_rate": 6.606123770354129e-05, "loss": 1.426, "step": 2636 }, { "epoch": 0.4031339575769157, "grad_norm": 1.4375, "learning_rate": 6.605703850118573e-05, "loss": 1.5239, "step": 2637 }, { "epoch": 0.4032868335562775, "grad_norm": 1.5, "learning_rate": 6.605283936647545e-05, "loss": 1.7352, "step": 2638 }, { "epoch": 0.40343970953563923, "grad_norm": 1.3515625, "learning_rate": 6.604864029941875e-05, "loss": 1.4092, "step": 2639 }, { "epoch": 0.40359258551500093, "grad_norm": 1.390625, "learning_rate": 6.604444130002408e-05, "loss": 1.252, "step": 2640 }, { "epoch": 0.4037454614943627, "grad_norm": 1.3984375, "learning_rate": 6.604024236829974e-05, "loss": 1.2403, "step": 2641 }, { "epoch": 0.40389833747372444, "grad_norm": 1.5, "learning_rate": 6.603604350425416e-05, "loss": 1.6821, "step": 2642 }, { "epoch": 0.4040512134530862, "grad_norm": 1.3515625, "learning_rate": 6.603184470789568e-05, "loss": 1.5033, "step": 2643 }, { "epoch": 0.4042040894324479, "grad_norm": 1.3125, "learning_rate": 6.602764597923264e-05, "loss": 1.4999, "step": 2644 }, { "epoch": 0.40435696541180965, "grad_norm": 1.265625, "learning_rate": 6.602344731827343e-05, "loss": 1.5002, "step": 2645 }, { "epoch": 0.4045098413911714, "grad_norm": 1.4921875, "learning_rate": 6.601924872502643e-05, "loss": 1.7039, "step": 2646 }, { "epoch": 0.40466271737053316, "grad_norm": 1.3671875, "learning_rate": 6.601505019950003e-05, "loss": 1.2404, "step": 2647 }, { "epoch": 0.4048155933498949, "grad_norm": 1.2734375, "learning_rate": 6.601085174170249e-05, "loss": 1.1209, "step": 2648 }, { "epoch": 0.4049684693292566, "grad_norm": 1.390625, "learning_rate": 6.600665335164232e-05, "loss": 1.5118, "step": 2649 }, { "epoch": 0.4051213453086184, "grad_norm": 1.28125, "learning_rate": 6.60024550293278e-05, "loss": 1.5033, "step": 2650 }, { "epoch": 0.40527422128798013, "grad_norm": 1.2734375, "learning_rate": 6.59982567747673e-05, "loss": 1.2457, "step": 2651 }, { "epoch": 0.4054270972673419, "grad_norm": 1.3984375, "learning_rate": 6.599405858796924e-05, "loss": 1.7758, "step": 2652 }, { "epoch": 0.4055799732467036, "grad_norm": 1.3125, "learning_rate": 6.598986046894189e-05, "loss": 1.3628, "step": 2653 }, { "epoch": 0.40573284922606534, "grad_norm": 1.421875, "learning_rate": 6.598566241769374e-05, "loss": 1.3034, "step": 2654 }, { "epoch": 0.4058857252054271, "grad_norm": 1.453125, "learning_rate": 6.598146443423305e-05, "loss": 1.2927, "step": 2655 }, { "epoch": 0.40603860118478885, "grad_norm": 1.3359375, "learning_rate": 6.597726651856824e-05, "loss": 1.5004, "step": 2656 }, { "epoch": 0.4061914771641506, "grad_norm": 1.21875, "learning_rate": 6.59730686707077e-05, "loss": 1.2027, "step": 2657 }, { "epoch": 0.4063443531435123, "grad_norm": 1.4921875, "learning_rate": 6.596887089065971e-05, "loss": 1.4489, "step": 2658 }, { "epoch": 0.40649722912287406, "grad_norm": 1.3359375, "learning_rate": 6.59646731784327e-05, "loss": 1.5122, "step": 2659 }, { "epoch": 0.4066501051022358, "grad_norm": 1.3515625, "learning_rate": 6.596047553403501e-05, "loss": 1.5257, "step": 2660 }, { "epoch": 0.40680298108159757, "grad_norm": 1.484375, "learning_rate": 6.595627795747503e-05, "loss": 1.6857, "step": 2661 }, { "epoch": 0.40695585706095927, "grad_norm": 1.421875, "learning_rate": 6.59520804487611e-05, "loss": 1.5167, "step": 2662 }, { "epoch": 0.407108733040321, "grad_norm": 1.1875, "learning_rate": 6.594788300790164e-05, "loss": 1.2375, "step": 2663 }, { "epoch": 0.4072616090196828, "grad_norm": 1.59375, "learning_rate": 6.594368563490496e-05, "loss": 1.5355, "step": 2664 }, { "epoch": 0.40741448499904453, "grad_norm": 1.3828125, "learning_rate": 6.593948832977941e-05, "loss": 1.5386, "step": 2665 }, { "epoch": 0.4075673609784063, "grad_norm": 1.3046875, "learning_rate": 6.59352910925334e-05, "loss": 1.3897, "step": 2666 }, { "epoch": 0.407720236957768, "grad_norm": 1.3515625, "learning_rate": 6.593109392317527e-05, "loss": 1.4069, "step": 2667 }, { "epoch": 0.40787311293712974, "grad_norm": 1.2578125, "learning_rate": 6.592689682171341e-05, "loss": 1.2475, "step": 2668 }, { "epoch": 0.4080259889164915, "grad_norm": 1.4453125, "learning_rate": 6.592269978815616e-05, "loss": 1.3617, "step": 2669 }, { "epoch": 0.40817886489585326, "grad_norm": 1.296875, "learning_rate": 6.591850282251187e-05, "loss": 1.4667, "step": 2670 }, { "epoch": 0.40833174087521495, "grad_norm": 1.4140625, "learning_rate": 6.591430592478895e-05, "loss": 1.3, "step": 2671 }, { "epoch": 0.4084846168545767, "grad_norm": 1.46875, "learning_rate": 6.591010909499572e-05, "loss": 1.3561, "step": 2672 }, { "epoch": 0.40863749283393846, "grad_norm": 1.3515625, "learning_rate": 6.59059123331406e-05, "loss": 1.2307, "step": 2673 }, { "epoch": 0.4087903688133002, "grad_norm": 1.421875, "learning_rate": 6.590171563923187e-05, "loss": 1.7031, "step": 2674 }, { "epoch": 0.408943244792662, "grad_norm": 1.3984375, "learning_rate": 6.589751901327796e-05, "loss": 1.5473, "step": 2675 }, { "epoch": 0.4090961207720237, "grad_norm": 1.265625, "learning_rate": 6.589332245528726e-05, "loss": 1.2603, "step": 2676 }, { "epoch": 0.40924899675138543, "grad_norm": 1.3046875, "learning_rate": 6.5889125965268e-05, "loss": 1.4121, "step": 2677 }, { "epoch": 0.4094018727307472, "grad_norm": 1.4140625, "learning_rate": 6.588492954322869e-05, "loss": 1.4877, "step": 2678 }, { "epoch": 0.40955474871010894, "grad_norm": 1.328125, "learning_rate": 6.588073318917761e-05, "loss": 1.4872, "step": 2679 }, { "epoch": 0.40970762468947064, "grad_norm": 1.5546875, "learning_rate": 6.587653690312316e-05, "loss": 1.6289, "step": 2680 }, { "epoch": 0.4098605006688324, "grad_norm": 1.453125, "learning_rate": 6.587234068507367e-05, "loss": 1.3966, "step": 2681 }, { "epoch": 0.41001337664819415, "grad_norm": 1.4296875, "learning_rate": 6.586814453503753e-05, "loss": 1.3922, "step": 2682 }, { "epoch": 0.4101662526275559, "grad_norm": 1.2734375, "learning_rate": 6.586394845302314e-05, "loss": 1.3842, "step": 2683 }, { "epoch": 0.41031912860691766, "grad_norm": 1.296875, "learning_rate": 6.585975243903876e-05, "loss": 1.2102, "step": 2684 }, { "epoch": 0.41047200458627936, "grad_norm": 1.3828125, "learning_rate": 6.585555649309283e-05, "loss": 1.4064, "step": 2685 }, { "epoch": 0.4106248805656411, "grad_norm": 1.3515625, "learning_rate": 6.585136061519368e-05, "loss": 1.4728, "step": 2686 }, { "epoch": 0.41077775654500287, "grad_norm": 1.2890625, "learning_rate": 6.584716480534971e-05, "loss": 1.8553, "step": 2687 }, { "epoch": 0.4109306325243646, "grad_norm": 1.3046875, "learning_rate": 6.584296906356921e-05, "loss": 1.1931, "step": 2688 }, { "epoch": 0.4110835085037263, "grad_norm": 1.3828125, "learning_rate": 6.583877338986065e-05, "loss": 1.3831, "step": 2689 }, { "epoch": 0.4112363844830881, "grad_norm": 1.3125, "learning_rate": 6.58345777842323e-05, "loss": 1.3494, "step": 2690 }, { "epoch": 0.41138926046244984, "grad_norm": 1.265625, "learning_rate": 6.583038224669251e-05, "loss": 1.3027, "step": 2691 }, { "epoch": 0.4115421364418116, "grad_norm": 1.3828125, "learning_rate": 6.582618677724976e-05, "loss": 1.2931, "step": 2692 }, { "epoch": 0.41169501242117335, "grad_norm": 1.359375, "learning_rate": 6.582199137591225e-05, "loss": 1.5355, "step": 2693 }, { "epoch": 0.41184788840053504, "grad_norm": 1.53125, "learning_rate": 6.581779604268848e-05, "loss": 1.5785, "step": 2694 }, { "epoch": 0.4120007643798968, "grad_norm": 1.3515625, "learning_rate": 6.581360077758674e-05, "loss": 1.4175, "step": 2695 }, { "epoch": 0.41215364035925856, "grad_norm": 1.421875, "learning_rate": 6.580940558061539e-05, "loss": 1.4856, "step": 2696 }, { "epoch": 0.4123065163386203, "grad_norm": 1.3828125, "learning_rate": 6.580521045178286e-05, "loss": 1.3551, "step": 2697 }, { "epoch": 0.412459392317982, "grad_norm": 1.3671875, "learning_rate": 6.58010153910974e-05, "loss": 1.4296, "step": 2698 }, { "epoch": 0.41261226829734376, "grad_norm": 1.2890625, "learning_rate": 6.579682039856744e-05, "loss": 1.4606, "step": 2699 }, { "epoch": 0.4127651442767055, "grad_norm": 1.2890625, "learning_rate": 6.579262547420132e-05, "loss": 1.6763, "step": 2700 }, { "epoch": 0.4129180202560673, "grad_norm": 1.3984375, "learning_rate": 6.578843061800743e-05, "loss": 1.2646, "step": 2701 }, { "epoch": 0.41307089623542903, "grad_norm": 1.4765625, "learning_rate": 6.578423582999408e-05, "loss": 1.6309, "step": 2702 }, { "epoch": 0.41322377221479073, "grad_norm": 1.515625, "learning_rate": 6.57800411101697e-05, "loss": 1.6614, "step": 2703 }, { "epoch": 0.4133766481941525, "grad_norm": 1.546875, "learning_rate": 6.577584645854258e-05, "loss": 1.375, "step": 2704 }, { "epoch": 0.41352952417351424, "grad_norm": 1.3984375, "learning_rate": 6.57716518751211e-05, "loss": 1.378, "step": 2705 }, { "epoch": 0.413682400152876, "grad_norm": 1.5078125, "learning_rate": 6.576745735991364e-05, "loss": 1.6502, "step": 2706 }, { "epoch": 0.4138352761322377, "grad_norm": 1.3125, "learning_rate": 6.576326291292852e-05, "loss": 1.2862, "step": 2707 }, { "epoch": 0.41398815211159945, "grad_norm": 1.3984375, "learning_rate": 6.575906853417418e-05, "loss": 1.3451, "step": 2708 }, { "epoch": 0.4141410280909612, "grad_norm": 1.3125, "learning_rate": 6.575487422365889e-05, "loss": 1.3081, "step": 2709 }, { "epoch": 0.41429390407032296, "grad_norm": 1.3359375, "learning_rate": 6.575067998139102e-05, "loss": 1.3193, "step": 2710 }, { "epoch": 0.4144467800496847, "grad_norm": 1.34375, "learning_rate": 6.574648580737896e-05, "loss": 1.4521, "step": 2711 }, { "epoch": 0.4145996560290464, "grad_norm": 1.28125, "learning_rate": 6.574229170163107e-05, "loss": 1.6296, "step": 2712 }, { "epoch": 0.41475253200840817, "grad_norm": 1.3828125, "learning_rate": 6.57380976641557e-05, "loss": 1.3734, "step": 2713 }, { "epoch": 0.4149054079877699, "grad_norm": 1.34375, "learning_rate": 6.573390369496119e-05, "loss": 1.3668, "step": 2714 }, { "epoch": 0.4150582839671317, "grad_norm": 1.46875, "learning_rate": 6.572970979405595e-05, "loss": 1.5867, "step": 2715 }, { "epoch": 0.4152111599464934, "grad_norm": 1.3359375, "learning_rate": 6.572551596144827e-05, "loss": 1.6005, "step": 2716 }, { "epoch": 0.41536403592585514, "grad_norm": 1.3671875, "learning_rate": 6.572132219714656e-05, "loss": 1.6608, "step": 2717 }, { "epoch": 0.4155169119052169, "grad_norm": 1.421875, "learning_rate": 6.571712850115917e-05, "loss": 1.4247, "step": 2718 }, { "epoch": 0.41566978788457865, "grad_norm": 1.4609375, "learning_rate": 6.571293487349441e-05, "loss": 1.6799, "step": 2719 }, { "epoch": 0.4158226638639404, "grad_norm": 1.3671875, "learning_rate": 6.57087413141607e-05, "loss": 1.3524, "step": 2720 }, { "epoch": 0.4159755398433021, "grad_norm": 1.3046875, "learning_rate": 6.570454782316634e-05, "loss": 1.4037, "step": 2721 }, { "epoch": 0.41612841582266386, "grad_norm": 1.609375, "learning_rate": 6.570035440051975e-05, "loss": 1.7313, "step": 2722 }, { "epoch": 0.4162812918020256, "grad_norm": 1.109375, "learning_rate": 6.569616104622927e-05, "loss": 1.123, "step": 2723 }, { "epoch": 0.41643416778138737, "grad_norm": 1.5, "learning_rate": 6.56919677603032e-05, "loss": 1.6281, "step": 2724 }, { "epoch": 0.41658704376074907, "grad_norm": 1.3984375, "learning_rate": 6.568777454274997e-05, "loss": 1.5606, "step": 2725 }, { "epoch": 0.4167399197401108, "grad_norm": 1.3203125, "learning_rate": 6.568358139357787e-05, "loss": 1.3058, "step": 2726 }, { "epoch": 0.4168927957194726, "grad_norm": 1.546875, "learning_rate": 6.567938831279532e-05, "loss": 1.725, "step": 2727 }, { "epoch": 0.41704567169883433, "grad_norm": 1.375, "learning_rate": 6.567519530041063e-05, "loss": 1.3984, "step": 2728 }, { "epoch": 0.4171985476781961, "grad_norm": 1.359375, "learning_rate": 6.567100235643222e-05, "loss": 1.4679, "step": 2729 }, { "epoch": 0.4173514236575578, "grad_norm": 1.265625, "learning_rate": 6.566680948086837e-05, "loss": 1.4622, "step": 2730 }, { "epoch": 0.41750429963691954, "grad_norm": 1.4375, "learning_rate": 6.566261667372745e-05, "loss": 1.4722, "step": 2731 }, { "epoch": 0.4176571756162813, "grad_norm": 1.328125, "learning_rate": 6.565842393501788e-05, "loss": 1.2332, "step": 2732 }, { "epoch": 0.41781005159564305, "grad_norm": 1.28125, "learning_rate": 6.56542312647479e-05, "loss": 1.4797, "step": 2733 }, { "epoch": 0.41796292757500475, "grad_norm": 1.3515625, "learning_rate": 6.5650038662926e-05, "loss": 1.4333, "step": 2734 }, { "epoch": 0.4181158035543665, "grad_norm": 1.4453125, "learning_rate": 6.564584612956046e-05, "loss": 1.624, "step": 2735 }, { "epoch": 0.41826867953372826, "grad_norm": 1.3828125, "learning_rate": 6.564165366465962e-05, "loss": 1.258, "step": 2736 }, { "epoch": 0.41842155551309, "grad_norm": 1.28125, "learning_rate": 6.56374612682319e-05, "loss": 1.1758, "step": 2737 }, { "epoch": 0.41857443149245177, "grad_norm": 1.578125, "learning_rate": 6.563326894028554e-05, "loss": 1.8142, "step": 2738 }, { "epoch": 0.41872730747181347, "grad_norm": 1.21875, "learning_rate": 6.562907668082905e-05, "loss": 0.8553, "step": 2739 }, { "epoch": 0.4188801834511752, "grad_norm": 1.359375, "learning_rate": 6.562488448987065e-05, "loss": 1.4334, "step": 2740 }, { "epoch": 0.419033059430537, "grad_norm": 1.265625, "learning_rate": 6.562069236741878e-05, "loss": 1.3887, "step": 2741 }, { "epoch": 0.41918593540989874, "grad_norm": 1.484375, "learning_rate": 6.561650031348178e-05, "loss": 1.5594, "step": 2742 }, { "epoch": 0.41933881138926044, "grad_norm": 1.390625, "learning_rate": 6.561230832806795e-05, "loss": 1.4133, "step": 2743 }, { "epoch": 0.4194916873686222, "grad_norm": 1.3359375, "learning_rate": 6.560811641118572e-05, "loss": 1.3907, "step": 2744 }, { "epoch": 0.41964456334798395, "grad_norm": 1.34375, "learning_rate": 6.560392456284336e-05, "loss": 1.402, "step": 2745 }, { "epoch": 0.4197974393273457, "grad_norm": 1.3359375, "learning_rate": 6.559973278304932e-05, "loss": 1.0544, "step": 2746 }, { "epoch": 0.41995031530670746, "grad_norm": 1.609375, "learning_rate": 6.559554107181186e-05, "loss": 1.5654, "step": 2747 }, { "epoch": 0.42010319128606916, "grad_norm": 1.4609375, "learning_rate": 6.559134942913942e-05, "loss": 1.5136, "step": 2748 }, { "epoch": 0.4202560672654309, "grad_norm": 1.3671875, "learning_rate": 6.55871578550403e-05, "loss": 1.3306, "step": 2749 }, { "epoch": 0.42040894324479267, "grad_norm": 1.28125, "learning_rate": 6.558296634952286e-05, "loss": 1.1601, "step": 2750 }, { "epoch": 0.4205618192241544, "grad_norm": 1.46875, "learning_rate": 6.557877491259545e-05, "loss": 1.6638, "step": 2751 }, { "epoch": 0.4207146952035161, "grad_norm": 1.4453125, "learning_rate": 6.557458354426645e-05, "loss": 1.5166, "step": 2752 }, { "epoch": 0.4208675711828779, "grad_norm": 1.359375, "learning_rate": 6.55703922445442e-05, "loss": 1.4814, "step": 2753 }, { "epoch": 0.42102044716223963, "grad_norm": 1.359375, "learning_rate": 6.556620101343702e-05, "loss": 1.3766, "step": 2754 }, { "epoch": 0.4211733231416014, "grad_norm": 1.34375, "learning_rate": 6.556200985095331e-05, "loss": 1.2485, "step": 2755 }, { "epoch": 0.42132619912096314, "grad_norm": 1.2578125, "learning_rate": 6.55578187571014e-05, "loss": 1.36, "step": 2756 }, { "epoch": 0.42147907510032484, "grad_norm": 1.328125, "learning_rate": 6.555362773188965e-05, "loss": 1.2025, "step": 2757 }, { "epoch": 0.4216319510796866, "grad_norm": 1.4921875, "learning_rate": 6.554943677532643e-05, "loss": 1.3864, "step": 2758 }, { "epoch": 0.42178482705904835, "grad_norm": 1.6015625, "learning_rate": 6.554524588742004e-05, "loss": 1.7266, "step": 2759 }, { "epoch": 0.4219377030384101, "grad_norm": 1.4765625, "learning_rate": 6.554105506817887e-05, "loss": 1.7885, "step": 2760 }, { "epoch": 0.4220905790177718, "grad_norm": 1.4296875, "learning_rate": 6.553686431761123e-05, "loss": 1.2901, "step": 2761 }, { "epoch": 0.42224345499713356, "grad_norm": 1.390625, "learning_rate": 6.553267363572555e-05, "loss": 1.4513, "step": 2762 }, { "epoch": 0.4223963309764953, "grad_norm": 1.5703125, "learning_rate": 6.552848302253016e-05, "loss": 1.7201, "step": 2763 }, { "epoch": 0.42254920695585707, "grad_norm": 1.421875, "learning_rate": 6.552429247803335e-05, "loss": 1.4923, "step": 2764 }, { "epoch": 0.4227020829352188, "grad_norm": 1.2578125, "learning_rate": 6.552010200224352e-05, "loss": 1.5992, "step": 2765 }, { "epoch": 0.4228549589145805, "grad_norm": 1.6171875, "learning_rate": 6.551591159516899e-05, "loss": 1.442, "step": 2766 }, { "epoch": 0.4230078348939423, "grad_norm": 1.5390625, "learning_rate": 6.551172125681818e-05, "loss": 1.3919, "step": 2767 }, { "epoch": 0.42316071087330404, "grad_norm": 1.25, "learning_rate": 6.550753098719937e-05, "loss": 1.4168, "step": 2768 }, { "epoch": 0.4233135868526658, "grad_norm": 1.34375, "learning_rate": 6.550334078632092e-05, "loss": 1.374, "step": 2769 }, { "epoch": 0.4234664628320275, "grad_norm": 1.46875, "learning_rate": 6.549915065419122e-05, "loss": 1.5875, "step": 2770 }, { "epoch": 0.42361933881138925, "grad_norm": 1.484375, "learning_rate": 6.549496059081857e-05, "loss": 1.7285, "step": 2771 }, { "epoch": 0.423772214790751, "grad_norm": 1.421875, "learning_rate": 6.549077059621138e-05, "loss": 1.3452, "step": 2772 }, { "epoch": 0.42392509077011276, "grad_norm": 1.484375, "learning_rate": 6.548658067037791e-05, "loss": 1.4874, "step": 2773 }, { "epoch": 0.4240779667494745, "grad_norm": 1.21875, "learning_rate": 6.548239081332664e-05, "loss": 1.3469, "step": 2774 }, { "epoch": 0.4242308427288362, "grad_norm": 1.53125, "learning_rate": 6.547820102506583e-05, "loss": 1.4795, "step": 2775 }, { "epoch": 0.42438371870819797, "grad_norm": 1.390625, "learning_rate": 6.54740113056038e-05, "loss": 1.4394, "step": 2776 }, { "epoch": 0.4245365946875597, "grad_norm": 1.2734375, "learning_rate": 6.546982165494901e-05, "loss": 1.1537, "step": 2777 }, { "epoch": 0.4246894706669215, "grad_norm": 1.328125, "learning_rate": 6.546563207310967e-05, "loss": 1.4269, "step": 2778 }, { "epoch": 0.4248423466462832, "grad_norm": 1.1953125, "learning_rate": 6.546144256009427e-05, "loss": 1.1527, "step": 2779 }, { "epoch": 0.42499522262564493, "grad_norm": 1.4921875, "learning_rate": 6.545725311591106e-05, "loss": 1.4725, "step": 2780 }, { "epoch": 0.4251480986050067, "grad_norm": 1.3046875, "learning_rate": 6.545306374056846e-05, "loss": 1.4249, "step": 2781 }, { "epoch": 0.42530097458436844, "grad_norm": 1.34375, "learning_rate": 6.544887443407478e-05, "loss": 1.3793, "step": 2782 }, { "epoch": 0.4254538505637302, "grad_norm": 1.2734375, "learning_rate": 6.544468519643834e-05, "loss": 1.3678, "step": 2783 }, { "epoch": 0.4256067265430919, "grad_norm": 1.2421875, "learning_rate": 6.544049602766755e-05, "loss": 1.4741, "step": 2784 }, { "epoch": 0.42575960252245365, "grad_norm": 1.34375, "learning_rate": 6.543630692777069e-05, "loss": 1.3578, "step": 2785 }, { "epoch": 0.4259124785018154, "grad_norm": 1.4765625, "learning_rate": 6.543211789675618e-05, "loss": 1.4421, "step": 2786 }, { "epoch": 0.42606535448117716, "grad_norm": 1.3984375, "learning_rate": 6.542792893463231e-05, "loss": 1.281, "step": 2787 }, { "epoch": 0.42621823046053886, "grad_norm": 1.515625, "learning_rate": 6.54237400414075e-05, "loss": 1.3683, "step": 2788 }, { "epoch": 0.4263711064399006, "grad_norm": 1.390625, "learning_rate": 6.541955121709003e-05, "loss": 1.6134, "step": 2789 }, { "epoch": 0.42652398241926237, "grad_norm": 1.453125, "learning_rate": 6.541536246168825e-05, "loss": 1.6049, "step": 2790 }, { "epoch": 0.4266768583986241, "grad_norm": 1.453125, "learning_rate": 6.541117377521056e-05, "loss": 1.517, "step": 2791 }, { "epoch": 0.4268297343779859, "grad_norm": 1.2265625, "learning_rate": 6.540698515766525e-05, "loss": 1.2023, "step": 2792 }, { "epoch": 0.4269826103573476, "grad_norm": 1.3203125, "learning_rate": 6.540279660906072e-05, "loss": 1.3975, "step": 2793 }, { "epoch": 0.42713548633670934, "grad_norm": 1.3828125, "learning_rate": 6.539860812940527e-05, "loss": 1.405, "step": 2794 }, { "epoch": 0.4272883623160711, "grad_norm": 1.296875, "learning_rate": 6.539441971870728e-05, "loss": 1.3943, "step": 2795 }, { "epoch": 0.42744123829543285, "grad_norm": 1.484375, "learning_rate": 6.539023137697508e-05, "loss": 1.4836, "step": 2796 }, { "epoch": 0.42759411427479455, "grad_norm": 1.4140625, "learning_rate": 6.538604310421701e-05, "loss": 1.6653, "step": 2797 }, { "epoch": 0.4277469902541563, "grad_norm": 1.2421875, "learning_rate": 6.538185490044148e-05, "loss": 1.3017, "step": 2798 }, { "epoch": 0.42789986623351806, "grad_norm": 1.2734375, "learning_rate": 6.537766676565673e-05, "loss": 1.0389, "step": 2799 }, { "epoch": 0.4280527422128798, "grad_norm": 1.2734375, "learning_rate": 6.537347869987119e-05, "loss": 1.2055, "step": 2800 }, { "epoch": 0.42820561819224157, "grad_norm": 1.515625, "learning_rate": 6.53692907030932e-05, "loss": 1.695, "step": 2801 }, { "epoch": 0.42835849417160327, "grad_norm": 1.359375, "learning_rate": 6.536510277533102e-05, "loss": 1.4137, "step": 2802 }, { "epoch": 0.428511370150965, "grad_norm": 1.34375, "learning_rate": 6.536091491659313e-05, "loss": 1.4102, "step": 2803 }, { "epoch": 0.4286642461303268, "grad_norm": 1.2734375, "learning_rate": 6.535672712688776e-05, "loss": 1.4287, "step": 2804 }, { "epoch": 0.42881712210968853, "grad_norm": 1.265625, "learning_rate": 6.535253940622332e-05, "loss": 1.3363, "step": 2805 }, { "epoch": 0.42896999808905023, "grad_norm": 1.2265625, "learning_rate": 6.534835175460813e-05, "loss": 1.2618, "step": 2806 }, { "epoch": 0.429122874068412, "grad_norm": 1.3515625, "learning_rate": 6.534416417205056e-05, "loss": 1.3911, "step": 2807 }, { "epoch": 0.42927575004777374, "grad_norm": 1.375, "learning_rate": 6.533997665855895e-05, "loss": 1.3327, "step": 2808 }, { "epoch": 0.4294286260271355, "grad_norm": 1.3671875, "learning_rate": 6.533578921414161e-05, "loss": 1.343, "step": 2809 }, { "epoch": 0.42958150200649725, "grad_norm": 1.3671875, "learning_rate": 6.533160183880693e-05, "loss": 1.3779, "step": 2810 }, { "epoch": 0.42973437798585895, "grad_norm": 1.4765625, "learning_rate": 6.532741453256322e-05, "loss": 1.6438, "step": 2811 }, { "epoch": 0.4298872539652207, "grad_norm": 1.3828125, "learning_rate": 6.532322729541887e-05, "loss": 1.2882, "step": 2812 }, { "epoch": 0.43004012994458246, "grad_norm": 1.359375, "learning_rate": 6.531904012738213e-05, "loss": 1.4963, "step": 2813 }, { "epoch": 0.4301930059239442, "grad_norm": 1.265625, "learning_rate": 6.531485302846148e-05, "loss": 1.4273, "step": 2814 }, { "epoch": 0.4303458819033059, "grad_norm": 1.3125, "learning_rate": 6.531066599866518e-05, "loss": 1.5224, "step": 2815 }, { "epoch": 0.43049875788266767, "grad_norm": 1.40625, "learning_rate": 6.530647903800156e-05, "loss": 1.3888, "step": 2816 }, { "epoch": 0.4306516338620294, "grad_norm": 1.46875, "learning_rate": 6.530229214647904e-05, "loss": 1.471, "step": 2817 }, { "epoch": 0.4308045098413912, "grad_norm": 1.375, "learning_rate": 6.529810532410585e-05, "loss": 1.3697, "step": 2818 }, { "epoch": 0.43095738582075294, "grad_norm": 1.3828125, "learning_rate": 6.529391857089047e-05, "loss": 1.4679, "step": 2819 }, { "epoch": 0.43111026180011464, "grad_norm": 1.4765625, "learning_rate": 6.528973188684114e-05, "loss": 1.2811, "step": 2820 }, { "epoch": 0.4312631377794764, "grad_norm": 1.4765625, "learning_rate": 6.528554527196625e-05, "loss": 1.4204, "step": 2821 }, { "epoch": 0.43141601375883815, "grad_norm": 1.375, "learning_rate": 6.528135872627415e-05, "loss": 1.4909, "step": 2822 }, { "epoch": 0.4315688897381999, "grad_norm": 1.4921875, "learning_rate": 6.527717224977313e-05, "loss": 1.4818, "step": 2823 }, { "epoch": 0.4317217657175616, "grad_norm": 1.359375, "learning_rate": 6.527298584247158e-05, "loss": 1.4275, "step": 2824 }, { "epoch": 0.43187464169692336, "grad_norm": 1.46875, "learning_rate": 6.526879950437781e-05, "loss": 1.7493, "step": 2825 }, { "epoch": 0.4320275176762851, "grad_norm": 1.3828125, "learning_rate": 6.526461323550021e-05, "loss": 1.5021, "step": 2826 }, { "epoch": 0.43218039365564687, "grad_norm": 1.3359375, "learning_rate": 6.526042703584714e-05, "loss": 1.3147, "step": 2827 }, { "epoch": 0.4323332696350086, "grad_norm": 1.2578125, "learning_rate": 6.525624090542683e-05, "loss": 1.278, "step": 2828 }, { "epoch": 0.4324861456143703, "grad_norm": 1.4296875, "learning_rate": 6.525205484424775e-05, "loss": 1.3347, "step": 2829 }, { "epoch": 0.4326390215937321, "grad_norm": 1.3828125, "learning_rate": 6.524786885231813e-05, "loss": 1.385, "step": 2830 }, { "epoch": 0.43279189757309383, "grad_norm": 1.296875, "learning_rate": 6.52436829296464e-05, "loss": 1.5046, "step": 2831 }, { "epoch": 0.4329447735524556, "grad_norm": 1.2734375, "learning_rate": 6.523949707624086e-05, "loss": 1.3372, "step": 2832 }, { "epoch": 0.4330976495318173, "grad_norm": 1.359375, "learning_rate": 6.52353112921099e-05, "loss": 1.4518, "step": 2833 }, { "epoch": 0.43325052551117904, "grad_norm": 1.2578125, "learning_rate": 6.523112557726179e-05, "loss": 1.4967, "step": 2834 }, { "epoch": 0.4334034014905408, "grad_norm": 1.390625, "learning_rate": 6.52269399317049e-05, "loss": 1.2294, "step": 2835 }, { "epoch": 0.43355627746990255, "grad_norm": 1.453125, "learning_rate": 6.52227543554476e-05, "loss": 1.6726, "step": 2836 }, { "epoch": 0.4337091534492643, "grad_norm": 1.3359375, "learning_rate": 6.521856884849817e-05, "loss": 1.3902, "step": 2837 }, { "epoch": 0.433862029428626, "grad_norm": 1.3046875, "learning_rate": 6.521438341086504e-05, "loss": 1.322, "step": 2838 }, { "epoch": 0.43401490540798776, "grad_norm": 1.3359375, "learning_rate": 6.521019804255647e-05, "loss": 1.5132, "step": 2839 }, { "epoch": 0.4341677813873495, "grad_norm": 1.25, "learning_rate": 6.520601274358085e-05, "loss": 1.1763, "step": 2840 }, { "epoch": 0.43432065736671127, "grad_norm": 1.4765625, "learning_rate": 6.52018275139465e-05, "loss": 1.552, "step": 2841 }, { "epoch": 0.43447353334607297, "grad_norm": 1.359375, "learning_rate": 6.519764235366175e-05, "loss": 1.2858, "step": 2842 }, { "epoch": 0.4346264093254347, "grad_norm": 1.3125, "learning_rate": 6.519345726273498e-05, "loss": 1.5757, "step": 2843 }, { "epoch": 0.4347792853047965, "grad_norm": 1.2890625, "learning_rate": 6.518927224117448e-05, "loss": 1.4836, "step": 2844 }, { "epoch": 0.43493216128415824, "grad_norm": 1.328125, "learning_rate": 6.518508728898863e-05, "loss": 1.4725, "step": 2845 }, { "epoch": 0.43508503726352, "grad_norm": 1.2421875, "learning_rate": 6.518090240618573e-05, "loss": 1.3855, "step": 2846 }, { "epoch": 0.4352379132428817, "grad_norm": 1.59375, "learning_rate": 6.517671759277417e-05, "loss": 1.3414, "step": 2847 }, { "epoch": 0.43539078922224345, "grad_norm": 1.3125, "learning_rate": 6.51725328487623e-05, "loss": 1.2903, "step": 2848 }, { "epoch": 0.4355436652016052, "grad_norm": 1.4296875, "learning_rate": 6.516834817415838e-05, "loss": 1.5791, "step": 2849 }, { "epoch": 0.43569654118096696, "grad_norm": 1.4375, "learning_rate": 6.516416356897082e-05, "loss": 1.635, "step": 2850 }, { "epoch": 0.43584941716032866, "grad_norm": 1.375, "learning_rate": 6.51599790332079e-05, "loss": 1.532, "step": 2851 }, { "epoch": 0.4360022931396904, "grad_norm": 1.4140625, "learning_rate": 6.515579456687801e-05, "loss": 1.6796, "step": 2852 }, { "epoch": 0.43615516911905217, "grad_norm": 1.3046875, "learning_rate": 6.515161016998947e-05, "loss": 1.4308, "step": 2853 }, { "epoch": 0.4363080450984139, "grad_norm": 1.3515625, "learning_rate": 6.514742584255067e-05, "loss": 1.6877, "step": 2854 }, { "epoch": 0.4364609210777757, "grad_norm": 1.375, "learning_rate": 6.514324158456986e-05, "loss": 1.4468, "step": 2855 }, { "epoch": 0.4366137970571374, "grad_norm": 1.5546875, "learning_rate": 6.513905739605541e-05, "loss": 1.3821, "step": 2856 }, { "epoch": 0.43676667303649913, "grad_norm": 1.1796875, "learning_rate": 6.513487327701573e-05, "loss": 1.1666, "step": 2857 }, { "epoch": 0.4369195490158609, "grad_norm": 1.2578125, "learning_rate": 6.513068922745902e-05, "loss": 1.4821, "step": 2858 }, { "epoch": 0.43707242499522264, "grad_norm": 1.4453125, "learning_rate": 6.512650524739374e-05, "loss": 1.2391, "step": 2859 }, { "epoch": 0.43722530097458434, "grad_norm": 1.3203125, "learning_rate": 6.512232133682819e-05, "loss": 1.4878, "step": 2860 }, { "epoch": 0.4373781769539461, "grad_norm": 1.4453125, "learning_rate": 6.511813749577066e-05, "loss": 1.5937, "step": 2861 }, { "epoch": 0.43753105293330785, "grad_norm": 1.15625, "learning_rate": 6.511395372422961e-05, "loss": 1.4241, "step": 2862 }, { "epoch": 0.4376839289126696, "grad_norm": 1.3984375, "learning_rate": 6.510977002221322e-05, "loss": 1.5328, "step": 2863 }, { "epoch": 0.43783680489203136, "grad_norm": 1.3046875, "learning_rate": 6.510558638972994e-05, "loss": 1.4483, "step": 2864 }, { "epoch": 0.43798968087139306, "grad_norm": 1.296875, "learning_rate": 6.510140282678807e-05, "loss": 1.1568, "step": 2865 }, { "epoch": 0.4381425568507548, "grad_norm": 1.171875, "learning_rate": 6.509721933339596e-05, "loss": 1.4551, "step": 2866 }, { "epoch": 0.43829543283011657, "grad_norm": 1.4765625, "learning_rate": 6.509303590956195e-05, "loss": 1.3484, "step": 2867 }, { "epoch": 0.4384483088094783, "grad_norm": 1.4921875, "learning_rate": 6.508885255529433e-05, "loss": 1.6188, "step": 2868 }, { "epoch": 0.43860118478884, "grad_norm": 1.4375, "learning_rate": 6.50846692706015e-05, "loss": 1.4899, "step": 2869 }, { "epoch": 0.4387540607682018, "grad_norm": 1.421875, "learning_rate": 6.508048605549175e-05, "loss": 1.3672, "step": 2870 }, { "epoch": 0.43890693674756354, "grad_norm": 1.3515625, "learning_rate": 6.507630290997346e-05, "loss": 1.3949, "step": 2871 }, { "epoch": 0.4390598127269253, "grad_norm": 1.2890625, "learning_rate": 6.50721198340549e-05, "loss": 1.2836, "step": 2872 }, { "epoch": 0.43921268870628705, "grad_norm": 1.515625, "learning_rate": 6.506793682774452e-05, "loss": 1.8112, "step": 2873 }, { "epoch": 0.43936556468564875, "grad_norm": 1.4140625, "learning_rate": 6.506375389105055e-05, "loss": 1.2802, "step": 2874 }, { "epoch": 0.4395184406650105, "grad_norm": 1.3515625, "learning_rate": 6.505957102398134e-05, "loss": 1.4269, "step": 2875 }, { "epoch": 0.43967131664437226, "grad_norm": 1.46875, "learning_rate": 6.505538822654529e-05, "loss": 1.301, "step": 2876 }, { "epoch": 0.439824192623734, "grad_norm": 1.53125, "learning_rate": 6.505120549875065e-05, "loss": 1.6018, "step": 2877 }, { "epoch": 0.4399770686030957, "grad_norm": 1.359375, "learning_rate": 6.504702284060586e-05, "loss": 1.4828, "step": 2878 }, { "epoch": 0.44012994458245747, "grad_norm": 1.5, "learning_rate": 6.504284025211914e-05, "loss": 1.4507, "step": 2879 }, { "epoch": 0.4402828205618192, "grad_norm": 1.3828125, "learning_rate": 6.503865773329892e-05, "loss": 1.3653, "step": 2880 }, { "epoch": 0.440435696541181, "grad_norm": 1.4609375, "learning_rate": 6.503447528415348e-05, "loss": 1.6511, "step": 2881 }, { "epoch": 0.44058857252054273, "grad_norm": 1.484375, "learning_rate": 6.503029290469116e-05, "loss": 1.3391, "step": 2882 }, { "epoch": 0.44074144849990443, "grad_norm": 1.3828125, "learning_rate": 6.502611059492034e-05, "loss": 1.4432, "step": 2883 }, { "epoch": 0.4408943244792662, "grad_norm": 1.546875, "learning_rate": 6.502192835484929e-05, "loss": 1.7418, "step": 2884 }, { "epoch": 0.44104720045862794, "grad_norm": 1.40625, "learning_rate": 6.50177461844864e-05, "loss": 1.5257, "step": 2885 }, { "epoch": 0.4412000764379897, "grad_norm": 1.4453125, "learning_rate": 6.501356408383995e-05, "loss": 1.592, "step": 2886 }, { "epoch": 0.4413529524173514, "grad_norm": 1.4765625, "learning_rate": 6.500938205291833e-05, "loss": 1.8738, "step": 2887 }, { "epoch": 0.44150582839671315, "grad_norm": 1.53125, "learning_rate": 6.500520009172988e-05, "loss": 1.6329, "step": 2888 }, { "epoch": 0.4416587043760749, "grad_norm": 1.5078125, "learning_rate": 6.500101820028286e-05, "loss": 1.4824, "step": 2889 }, { "epoch": 0.44181158035543666, "grad_norm": 1.2578125, "learning_rate": 6.499683637858566e-05, "loss": 1.411, "step": 2890 }, { "epoch": 0.4419644563347984, "grad_norm": 1.5078125, "learning_rate": 6.499265462664661e-05, "loss": 1.4626, "step": 2891 }, { "epoch": 0.4421173323141601, "grad_norm": 1.328125, "learning_rate": 6.498847294447404e-05, "loss": 1.4048, "step": 2892 }, { "epoch": 0.44227020829352187, "grad_norm": 1.2734375, "learning_rate": 6.49842913320763e-05, "loss": 1.2163, "step": 2893 }, { "epoch": 0.4424230842728836, "grad_norm": 1.328125, "learning_rate": 6.498010978946165e-05, "loss": 1.3134, "step": 2894 }, { "epoch": 0.4425759602522454, "grad_norm": 3.21875, "learning_rate": 6.497592831663852e-05, "loss": 1.7886, "step": 2895 }, { "epoch": 0.4427288362316071, "grad_norm": 1.2265625, "learning_rate": 6.497174691361517e-05, "loss": 1.2764, "step": 2896 }, { "epoch": 0.44288171221096884, "grad_norm": 1.40625, "learning_rate": 6.496756558040001e-05, "loss": 1.2992, "step": 2897 }, { "epoch": 0.4430345881903306, "grad_norm": 1.3125, "learning_rate": 6.496338431700126e-05, "loss": 1.4397, "step": 2898 }, { "epoch": 0.44318746416969235, "grad_norm": 1.3671875, "learning_rate": 6.495920312342739e-05, "loss": 1.6398, "step": 2899 }, { "epoch": 0.4433403401490541, "grad_norm": 1.28125, "learning_rate": 6.495502199968664e-05, "loss": 1.5241, "step": 2900 }, { "epoch": 0.4434932161284158, "grad_norm": 1.1796875, "learning_rate": 6.495084094578735e-05, "loss": 1.3884, "step": 2901 }, { "epoch": 0.44364609210777756, "grad_norm": 1.5546875, "learning_rate": 6.49466599617379e-05, "loss": 1.4743, "step": 2902 }, { "epoch": 0.4437989680871393, "grad_norm": 1.4140625, "learning_rate": 6.494247904754653e-05, "loss": 1.4239, "step": 2903 }, { "epoch": 0.44395184406650107, "grad_norm": 1.3515625, "learning_rate": 6.49382982032217e-05, "loss": 1.1987, "step": 2904 }, { "epoch": 0.44410472004586277, "grad_norm": 1.5234375, "learning_rate": 6.493411742877162e-05, "loss": 1.3526, "step": 2905 }, { "epoch": 0.4442575960252245, "grad_norm": 1.390625, "learning_rate": 6.492993672420471e-05, "loss": 1.5712, "step": 2906 }, { "epoch": 0.4444104720045863, "grad_norm": 1.3671875, "learning_rate": 6.492575608952929e-05, "loss": 1.4329, "step": 2907 }, { "epoch": 0.44456334798394803, "grad_norm": 1.4375, "learning_rate": 6.492157552475362e-05, "loss": 1.7223, "step": 2908 }, { "epoch": 0.4447162239633098, "grad_norm": 1.4140625, "learning_rate": 6.491739502988611e-05, "loss": 1.5487, "step": 2909 }, { "epoch": 0.4448690999426715, "grad_norm": 1.296875, "learning_rate": 6.491321460493504e-05, "loss": 1.2758, "step": 2910 }, { "epoch": 0.44502197592203324, "grad_norm": 1.34375, "learning_rate": 6.490903424990877e-05, "loss": 1.2744, "step": 2911 }, { "epoch": 0.445174851901395, "grad_norm": 1.296875, "learning_rate": 6.490485396481562e-05, "loss": 1.2105, "step": 2912 }, { "epoch": 0.44532772788075675, "grad_norm": 1.359375, "learning_rate": 6.490067374966398e-05, "loss": 1.3754, "step": 2913 }, { "epoch": 0.44548060386011845, "grad_norm": 1.4921875, "learning_rate": 6.489649360446208e-05, "loss": 1.7566, "step": 2914 }, { "epoch": 0.4456334798394802, "grad_norm": 1.3828125, "learning_rate": 6.489231352921828e-05, "loss": 1.5715, "step": 2915 }, { "epoch": 0.44578635581884196, "grad_norm": 1.359375, "learning_rate": 6.488813352394096e-05, "loss": 1.3636, "step": 2916 }, { "epoch": 0.4459392317982037, "grad_norm": 1.296875, "learning_rate": 6.488395358863839e-05, "loss": 1.5113, "step": 2917 }, { "epoch": 0.44609210777756547, "grad_norm": 1.25, "learning_rate": 6.487977372331898e-05, "loss": 1.3538, "step": 2918 }, { "epoch": 0.44624498375692717, "grad_norm": 1.4609375, "learning_rate": 6.487559392799097e-05, "loss": 1.649, "step": 2919 }, { "epoch": 0.4463978597362889, "grad_norm": 1.328125, "learning_rate": 6.487141420266272e-05, "loss": 1.4943, "step": 2920 }, { "epoch": 0.4465507357156507, "grad_norm": 1.3671875, "learning_rate": 6.48672345473426e-05, "loss": 1.4405, "step": 2921 }, { "epoch": 0.44670361169501244, "grad_norm": 1.3125, "learning_rate": 6.486305496203886e-05, "loss": 1.4162, "step": 2922 }, { "epoch": 0.44685648767437414, "grad_norm": 1.359375, "learning_rate": 6.485887544675995e-05, "loss": 1.3798, "step": 2923 }, { "epoch": 0.4470093636537359, "grad_norm": 1.5078125, "learning_rate": 6.485469600151406e-05, "loss": 1.4474, "step": 2924 }, { "epoch": 0.44716223963309765, "grad_norm": 1.5546875, "learning_rate": 6.485051662630961e-05, "loss": 1.5306, "step": 2925 }, { "epoch": 0.4473151156124594, "grad_norm": 1.3046875, "learning_rate": 6.484633732115493e-05, "loss": 1.1457, "step": 2926 }, { "epoch": 0.44746799159182116, "grad_norm": 1.3984375, "learning_rate": 6.484215808605827e-05, "loss": 1.339, "step": 2927 }, { "epoch": 0.44762086757118286, "grad_norm": 1.4609375, "learning_rate": 6.483797892102808e-05, "loss": 1.7007, "step": 2928 }, { "epoch": 0.4477737435505446, "grad_norm": 1.328125, "learning_rate": 6.483379982607255e-05, "loss": 1.3688, "step": 2929 }, { "epoch": 0.44792661952990637, "grad_norm": 1.2890625, "learning_rate": 6.482962080120015e-05, "loss": 1.2815, "step": 2930 }, { "epoch": 0.4480794955092681, "grad_norm": 1.53125, "learning_rate": 6.482544184641908e-05, "loss": 1.503, "step": 2931 }, { "epoch": 0.4482323714886298, "grad_norm": 1.2109375, "learning_rate": 6.482126296173775e-05, "loss": 1.1019, "step": 2932 }, { "epoch": 0.4483852474679916, "grad_norm": 1.4453125, "learning_rate": 6.48170841471645e-05, "loss": 1.4282, "step": 2933 }, { "epoch": 0.44853812344735333, "grad_norm": 1.3671875, "learning_rate": 6.481290540270758e-05, "loss": 1.3257, "step": 2934 }, { "epoch": 0.4486909994267151, "grad_norm": 1.2734375, "learning_rate": 6.480872672837537e-05, "loss": 1.2326, "step": 2935 }, { "epoch": 0.44884387540607684, "grad_norm": 1.40625, "learning_rate": 6.480454812417617e-05, "loss": 1.5359, "step": 2936 }, { "epoch": 0.44899675138543854, "grad_norm": 1.2421875, "learning_rate": 6.480036959011837e-05, "loss": 1.432, "step": 2937 }, { "epoch": 0.4491496273648003, "grad_norm": 1.390625, "learning_rate": 6.47961911262102e-05, "loss": 1.4716, "step": 2938 }, { "epoch": 0.44930250334416205, "grad_norm": 1.3984375, "learning_rate": 6.479201273246009e-05, "loss": 1.6773, "step": 2939 }, { "epoch": 0.4494553793235238, "grad_norm": 1.375, "learning_rate": 6.47878344088763e-05, "loss": 1.4733, "step": 2940 }, { "epoch": 0.4496082553028855, "grad_norm": 1.390625, "learning_rate": 6.478365615546718e-05, "loss": 1.8463, "step": 2941 }, { "epoch": 0.44976113128224726, "grad_norm": 1.3125, "learning_rate": 6.477947797224106e-05, "loss": 1.5031, "step": 2942 }, { "epoch": 0.449914007261609, "grad_norm": 1.40625, "learning_rate": 6.477529985920621e-05, "loss": 1.8074, "step": 2943 }, { "epoch": 0.45006688324097077, "grad_norm": 1.4609375, "learning_rate": 6.477112181637107e-05, "loss": 1.6157, "step": 2944 }, { "epoch": 0.4502197592203325, "grad_norm": 1.4453125, "learning_rate": 6.476694384374387e-05, "loss": 1.6195, "step": 2945 }, { "epoch": 0.4503726351996942, "grad_norm": 1.203125, "learning_rate": 6.476276594133297e-05, "loss": 1.2497, "step": 2946 }, { "epoch": 0.450525511179056, "grad_norm": 1.171875, "learning_rate": 6.475858810914673e-05, "loss": 1.3007, "step": 2947 }, { "epoch": 0.45067838715841774, "grad_norm": 1.390625, "learning_rate": 6.475441034719338e-05, "loss": 1.5152, "step": 2948 }, { "epoch": 0.4508312631377795, "grad_norm": 1.8671875, "learning_rate": 6.475023265548135e-05, "loss": 1.509, "step": 2949 }, { "epoch": 0.4509841391171412, "grad_norm": 1.671875, "learning_rate": 6.474605503401888e-05, "loss": 1.5699, "step": 2950 }, { "epoch": 0.45113701509650295, "grad_norm": 1.21875, "learning_rate": 6.474187748281438e-05, "loss": 1.1455, "step": 2951 }, { "epoch": 0.4512898910758647, "grad_norm": 1.421875, "learning_rate": 6.473770000187614e-05, "loss": 1.2962, "step": 2952 }, { "epoch": 0.45144276705522646, "grad_norm": 1.5078125, "learning_rate": 6.473352259121244e-05, "loss": 1.6781, "step": 2953 }, { "epoch": 0.4515956430345882, "grad_norm": 1.5546875, "learning_rate": 6.472934525083165e-05, "loss": 1.5443, "step": 2954 }, { "epoch": 0.4517485190139499, "grad_norm": 1.5, "learning_rate": 6.472516798074208e-05, "loss": 1.5424, "step": 2955 }, { "epoch": 0.45190139499331167, "grad_norm": 1.4921875, "learning_rate": 6.47209907809521e-05, "loss": 1.3164, "step": 2956 }, { "epoch": 0.4520542709726734, "grad_norm": 1.2734375, "learning_rate": 6.471681365146997e-05, "loss": 1.2551, "step": 2957 }, { "epoch": 0.4522071469520352, "grad_norm": 1.2890625, "learning_rate": 6.471263659230407e-05, "loss": 1.3404, "step": 2958 }, { "epoch": 0.4523600229313969, "grad_norm": 1.46875, "learning_rate": 6.47084596034627e-05, "loss": 1.4288, "step": 2959 }, { "epoch": 0.45251289891075863, "grad_norm": 2.1875, "learning_rate": 6.470428268495415e-05, "loss": 1.3412, "step": 2960 }, { "epoch": 0.4526657748901204, "grad_norm": 1.2734375, "learning_rate": 6.470010583678679e-05, "loss": 1.3277, "step": 2961 }, { "epoch": 0.45281865086948214, "grad_norm": 1.453125, "learning_rate": 6.469592905896891e-05, "loss": 1.8083, "step": 2962 }, { "epoch": 0.4529715268488439, "grad_norm": 1.390625, "learning_rate": 6.46917523515089e-05, "loss": 1.5302, "step": 2963 }, { "epoch": 0.4531244028282056, "grad_norm": 1.5078125, "learning_rate": 6.468757571441501e-05, "loss": 1.7348, "step": 2964 }, { "epoch": 0.45327727880756735, "grad_norm": 1.296875, "learning_rate": 6.468339914769559e-05, "loss": 1.4128, "step": 2965 }, { "epoch": 0.4534301547869291, "grad_norm": 1.2734375, "learning_rate": 6.467922265135897e-05, "loss": 1.2663, "step": 2966 }, { "epoch": 0.45358303076629086, "grad_norm": 1.40625, "learning_rate": 6.467504622541347e-05, "loss": 1.3919, "step": 2967 }, { "epoch": 0.45373590674565256, "grad_norm": 1.5703125, "learning_rate": 6.467086986986744e-05, "loss": 1.6345, "step": 2968 }, { "epoch": 0.4538887827250143, "grad_norm": 1.2734375, "learning_rate": 6.466669358472913e-05, "loss": 1.3786, "step": 2969 }, { "epoch": 0.45404165870437607, "grad_norm": 1.390625, "learning_rate": 6.466251737000693e-05, "loss": 1.5739, "step": 2970 }, { "epoch": 0.4541945346837378, "grad_norm": 1.390625, "learning_rate": 6.465834122570911e-05, "loss": 1.2896, "step": 2971 }, { "epoch": 0.4543474106630996, "grad_norm": 1.421875, "learning_rate": 6.465416515184406e-05, "loss": 1.5271, "step": 2972 }, { "epoch": 0.4545002866424613, "grad_norm": 1.328125, "learning_rate": 6.464998914842008e-05, "loss": 1.3013, "step": 2973 }, { "epoch": 0.45465316262182304, "grad_norm": 1.4375, "learning_rate": 6.464581321544544e-05, "loss": 1.6763, "step": 2974 }, { "epoch": 0.4548060386011848, "grad_norm": 1.5625, "learning_rate": 6.464163735292852e-05, "loss": 1.4899, "step": 2975 }, { "epoch": 0.45495891458054655, "grad_norm": 1.4140625, "learning_rate": 6.46374615608776e-05, "loss": 1.6926, "step": 2976 }, { "epoch": 0.45511179055990825, "grad_norm": 1.390625, "learning_rate": 6.463328583930104e-05, "loss": 1.3577, "step": 2977 }, { "epoch": 0.45526466653927, "grad_norm": 1.4140625, "learning_rate": 6.462911018820714e-05, "loss": 1.4767, "step": 2978 }, { "epoch": 0.45541754251863176, "grad_norm": 1.3984375, "learning_rate": 6.462493460760426e-05, "loss": 1.4584, "step": 2979 }, { "epoch": 0.4555704184979935, "grad_norm": 1.5234375, "learning_rate": 6.462075909750067e-05, "loss": 1.5786, "step": 2980 }, { "epoch": 0.45572329447735527, "grad_norm": 1.3828125, "learning_rate": 6.46165836579047e-05, "loss": 1.3533, "step": 2981 }, { "epoch": 0.45587617045671697, "grad_norm": 1.34375, "learning_rate": 6.461240828882472e-05, "loss": 1.3881, "step": 2982 }, { "epoch": 0.4560290464360787, "grad_norm": 1.3671875, "learning_rate": 6.460823299026895e-05, "loss": 1.5386, "step": 2983 }, { "epoch": 0.4561819224154405, "grad_norm": 1.2890625, "learning_rate": 6.460405776224583e-05, "loss": 1.3188, "step": 2984 }, { "epoch": 0.45633479839480223, "grad_norm": 1.46875, "learning_rate": 6.45998826047636e-05, "loss": 1.6403, "step": 2985 }, { "epoch": 0.45648767437416393, "grad_norm": 1.3359375, "learning_rate": 6.45957075178306e-05, "loss": 1.4836, "step": 2986 }, { "epoch": 0.4566405503535257, "grad_norm": 1.3515625, "learning_rate": 6.45915325014552e-05, "loss": 1.4464, "step": 2987 }, { "epoch": 0.45679342633288744, "grad_norm": 1.3359375, "learning_rate": 6.45873575556456e-05, "loss": 1.3548, "step": 2988 }, { "epoch": 0.4569463023122492, "grad_norm": 1.4921875, "learning_rate": 6.458318268041028e-05, "loss": 1.7938, "step": 2989 }, { "epoch": 0.45709917829161095, "grad_norm": 1.3515625, "learning_rate": 6.457900787575743e-05, "loss": 1.4285, "step": 2990 }, { "epoch": 0.45725205427097265, "grad_norm": 1.46875, "learning_rate": 6.457483314169543e-05, "loss": 1.5934, "step": 2991 }, { "epoch": 0.4574049302503344, "grad_norm": 1.3984375, "learning_rate": 6.45706584782326e-05, "loss": 1.3174, "step": 2992 }, { "epoch": 0.45755780622969616, "grad_norm": 1.4609375, "learning_rate": 6.456648388537723e-05, "loss": 1.4296, "step": 2993 }, { "epoch": 0.4577106822090579, "grad_norm": 1.4140625, "learning_rate": 6.456230936313767e-05, "loss": 1.4084, "step": 2994 }, { "epoch": 0.4578635581884196, "grad_norm": 1.4375, "learning_rate": 6.45581349115222e-05, "loss": 1.5862, "step": 2995 }, { "epoch": 0.45801643416778137, "grad_norm": 1.21875, "learning_rate": 6.455396053053918e-05, "loss": 1.4472, "step": 2996 }, { "epoch": 0.4581693101471431, "grad_norm": 1.375, "learning_rate": 6.454978622019689e-05, "loss": 1.3027, "step": 2997 }, { "epoch": 0.4583221861265049, "grad_norm": 1.2421875, "learning_rate": 6.454561198050373e-05, "loss": 1.3064, "step": 2998 }, { "epoch": 0.45847506210586664, "grad_norm": 1.2578125, "learning_rate": 6.454143781146795e-05, "loss": 1.3611, "step": 2999 }, { "epoch": 0.45862793808522834, "grad_norm": 1.296875, "learning_rate": 6.453726371309785e-05, "loss": 1.3655, "step": 3000 }, { "epoch": 0.45862793808522834, "eval_loss": 1.398690104484558, "eval_model_preparation_time": 0.0034, "eval_runtime": 111.6647, "eval_samples_per_second": 89.554, "eval_steps_per_second": 2.803, "step": 3000 }, { "epoch": 0.4587808140645901, "grad_norm": 1.453125, "learning_rate": 6.453308968540179e-05, "loss": 1.3634, "step": 3001 }, { "epoch": 0.45893369004395185, "grad_norm": 1.421875, "learning_rate": 6.452891572838809e-05, "loss": 1.5571, "step": 3002 }, { "epoch": 0.4590865660233136, "grad_norm": 1.265625, "learning_rate": 6.452474184206507e-05, "loss": 1.4122, "step": 3003 }, { "epoch": 0.4592394420026753, "grad_norm": 1.296875, "learning_rate": 6.4520568026441e-05, "loss": 1.3069, "step": 3004 }, { "epoch": 0.45939231798203706, "grad_norm": 1.5390625, "learning_rate": 6.451639428152425e-05, "loss": 1.4891, "step": 3005 }, { "epoch": 0.4595451939613988, "grad_norm": 1.3203125, "learning_rate": 6.451222060732312e-05, "loss": 1.4331, "step": 3006 }, { "epoch": 0.45969806994076057, "grad_norm": 1.453125, "learning_rate": 6.450804700384591e-05, "loss": 1.3018, "step": 3007 }, { "epoch": 0.4598509459201223, "grad_norm": 1.3984375, "learning_rate": 6.4503873471101e-05, "loss": 1.4591, "step": 3008 }, { "epoch": 0.460003821899484, "grad_norm": 1.2421875, "learning_rate": 6.449970000909663e-05, "loss": 1.2068, "step": 3009 }, { "epoch": 0.4601566978788458, "grad_norm": 1.3671875, "learning_rate": 6.449552661784117e-05, "loss": 1.2666, "step": 3010 }, { "epoch": 0.46030957385820753, "grad_norm": 1.40625, "learning_rate": 6.449135329734288e-05, "loss": 1.5398, "step": 3011 }, { "epoch": 0.4604624498375693, "grad_norm": 1.2734375, "learning_rate": 6.448718004761016e-05, "loss": 1.2407, "step": 3012 }, { "epoch": 0.460615325816931, "grad_norm": 1.3984375, "learning_rate": 6.448300686865129e-05, "loss": 1.4627, "step": 3013 }, { "epoch": 0.46076820179629274, "grad_norm": 1.3359375, "learning_rate": 6.447883376047455e-05, "loss": 1.3817, "step": 3014 }, { "epoch": 0.4609210777756545, "grad_norm": 1.4765625, "learning_rate": 6.447466072308828e-05, "loss": 1.5339, "step": 3015 }, { "epoch": 0.46107395375501625, "grad_norm": 1.3671875, "learning_rate": 6.44704877565008e-05, "loss": 1.219, "step": 3016 }, { "epoch": 0.461226829734378, "grad_norm": 1.3828125, "learning_rate": 6.446631486072045e-05, "loss": 1.5292, "step": 3017 }, { "epoch": 0.4613797057137397, "grad_norm": 1.3359375, "learning_rate": 6.446214203575553e-05, "loss": 1.7307, "step": 3018 }, { "epoch": 0.46153258169310146, "grad_norm": 1.34375, "learning_rate": 6.445796928161435e-05, "loss": 1.2565, "step": 3019 }, { "epoch": 0.4616854576724632, "grad_norm": 1.5, "learning_rate": 6.445379659830521e-05, "loss": 1.4024, "step": 3020 }, { "epoch": 0.461838333651825, "grad_norm": 1.3046875, "learning_rate": 6.444962398583642e-05, "loss": 1.2815, "step": 3021 }, { "epoch": 0.4619912096311867, "grad_norm": 1.453125, "learning_rate": 6.444545144421637e-05, "loss": 1.6087, "step": 3022 }, { "epoch": 0.4621440856105484, "grad_norm": 1.328125, "learning_rate": 6.444127897345327e-05, "loss": 1.4753, "step": 3023 }, { "epoch": 0.4622969615899102, "grad_norm": 1.4296875, "learning_rate": 6.443710657355554e-05, "loss": 1.5892, "step": 3024 }, { "epoch": 0.46244983756927194, "grad_norm": 1.3515625, "learning_rate": 6.443293424453142e-05, "loss": 1.6107, "step": 3025 }, { "epoch": 0.4626027135486337, "grad_norm": 1.1875, "learning_rate": 6.442876198638924e-05, "loss": 1.1626, "step": 3026 }, { "epoch": 0.4627555895279954, "grad_norm": 1.3515625, "learning_rate": 6.442458979913736e-05, "loss": 1.3587, "step": 3027 }, { "epoch": 0.46290846550735715, "grad_norm": 1.3515625, "learning_rate": 6.4420417682784e-05, "loss": 1.0942, "step": 3028 }, { "epoch": 0.4630613414867189, "grad_norm": 1.296875, "learning_rate": 6.441624563733759e-05, "loss": 1.235, "step": 3029 }, { "epoch": 0.46321421746608066, "grad_norm": 1.3203125, "learning_rate": 6.441207366280634e-05, "loss": 1.45, "step": 3030 }, { "epoch": 0.46336709344544236, "grad_norm": 1.3515625, "learning_rate": 6.440790175919867e-05, "loss": 1.5953, "step": 3031 }, { "epoch": 0.4635199694248041, "grad_norm": 1.4296875, "learning_rate": 6.440372992652282e-05, "loss": 1.4598, "step": 3032 }, { "epoch": 0.46367284540416587, "grad_norm": 1.453125, "learning_rate": 6.439955816478709e-05, "loss": 1.445, "step": 3033 }, { "epoch": 0.4638257213835276, "grad_norm": 1.375, "learning_rate": 6.439538647399984e-05, "loss": 1.3882, "step": 3034 }, { "epoch": 0.4639785973628894, "grad_norm": 1.3125, "learning_rate": 6.439121485416936e-05, "loss": 1.3776, "step": 3035 }, { "epoch": 0.4641314733422511, "grad_norm": 1.390625, "learning_rate": 6.438704330530398e-05, "loss": 1.6439, "step": 3036 }, { "epoch": 0.46428434932161283, "grad_norm": 1.4296875, "learning_rate": 6.438287182741199e-05, "loss": 1.3184, "step": 3037 }, { "epoch": 0.4644372253009746, "grad_norm": 1.3046875, "learning_rate": 6.437870042050178e-05, "loss": 1.2821, "step": 3038 }, { "epoch": 0.46459010128033634, "grad_norm": 1.4375, "learning_rate": 6.437452908458154e-05, "loss": 1.5179, "step": 3039 }, { "epoch": 0.46474297725969804, "grad_norm": 1.2265625, "learning_rate": 6.437035781965965e-05, "loss": 1.272, "step": 3040 }, { "epoch": 0.4648958532390598, "grad_norm": 1.5, "learning_rate": 6.436618662574445e-05, "loss": 1.5814, "step": 3041 }, { "epoch": 0.46504872921842155, "grad_norm": 1.296875, "learning_rate": 6.436201550284418e-05, "loss": 1.4749, "step": 3042 }, { "epoch": 0.4652016051977833, "grad_norm": 1.3125, "learning_rate": 6.435784445096725e-05, "loss": 1.6364, "step": 3043 }, { "epoch": 0.46535448117714506, "grad_norm": 1.3125, "learning_rate": 6.435367347012189e-05, "loss": 1.5024, "step": 3044 }, { "epoch": 0.46550735715650676, "grad_norm": 1.4765625, "learning_rate": 6.434950256031642e-05, "loss": 1.4768, "step": 3045 }, { "epoch": 0.4656602331358685, "grad_norm": 1.359375, "learning_rate": 6.434533172155919e-05, "loss": 1.3261, "step": 3046 }, { "epoch": 0.4658131091152303, "grad_norm": 1.21875, "learning_rate": 6.434116095385848e-05, "loss": 1.2312, "step": 3047 }, { "epoch": 0.46596598509459203, "grad_norm": 1.3203125, "learning_rate": 6.433699025722265e-05, "loss": 1.4155, "step": 3048 }, { "epoch": 0.4661188610739537, "grad_norm": 1.2109375, "learning_rate": 6.433281963165993e-05, "loss": 1.4209, "step": 3049 }, { "epoch": 0.4662717370533155, "grad_norm": 1.421875, "learning_rate": 6.432864907717871e-05, "loss": 1.4603, "step": 3050 }, { "epoch": 0.46642461303267724, "grad_norm": 1.328125, "learning_rate": 6.432447859378726e-05, "loss": 1.37, "step": 3051 }, { "epoch": 0.466577489012039, "grad_norm": 1.3671875, "learning_rate": 6.432030818149388e-05, "loss": 1.4004, "step": 3052 }, { "epoch": 0.46673036499140075, "grad_norm": 1.4609375, "learning_rate": 6.431613784030694e-05, "loss": 1.3925, "step": 3053 }, { "epoch": 0.46688324097076245, "grad_norm": 1.328125, "learning_rate": 6.431196757023468e-05, "loss": 1.4258, "step": 3054 }, { "epoch": 0.4670361169501242, "grad_norm": 1.390625, "learning_rate": 6.430779737128547e-05, "loss": 1.3248, "step": 3055 }, { "epoch": 0.46718899292948596, "grad_norm": 1.3828125, "learning_rate": 6.430362724346759e-05, "loss": 1.4437, "step": 3056 }, { "epoch": 0.4673418689088477, "grad_norm": 1.3671875, "learning_rate": 6.429945718678935e-05, "loss": 1.446, "step": 3057 }, { "epoch": 0.4674947448882094, "grad_norm": 1.3125, "learning_rate": 6.42952872012591e-05, "loss": 1.3564, "step": 3058 }, { "epoch": 0.46764762086757117, "grad_norm": 1.34375, "learning_rate": 6.429111728688506e-05, "loss": 1.4828, "step": 3059 }, { "epoch": 0.4678004968469329, "grad_norm": 1.2578125, "learning_rate": 6.428694744367564e-05, "loss": 1.3843, "step": 3060 }, { "epoch": 0.4679533728262947, "grad_norm": 1.4140625, "learning_rate": 6.428277767163909e-05, "loss": 1.681, "step": 3061 }, { "epoch": 0.46810624880565643, "grad_norm": 1.4609375, "learning_rate": 6.427860797078375e-05, "loss": 1.5247, "step": 3062 }, { "epoch": 0.46825912478501813, "grad_norm": 1.4296875, "learning_rate": 6.427443834111788e-05, "loss": 1.5122, "step": 3063 }, { "epoch": 0.4684120007643799, "grad_norm": 1.265625, "learning_rate": 6.427026878264988e-05, "loss": 1.2257, "step": 3064 }, { "epoch": 0.46856487674374164, "grad_norm": 1.421875, "learning_rate": 6.426609929538799e-05, "loss": 1.5139, "step": 3065 }, { "epoch": 0.4687177527231034, "grad_norm": 1.6328125, "learning_rate": 6.426192987934052e-05, "loss": 1.6674, "step": 3066 }, { "epoch": 0.4688706287024651, "grad_norm": 1.4921875, "learning_rate": 6.425776053451582e-05, "loss": 1.4957, "step": 3067 }, { "epoch": 0.46902350468182685, "grad_norm": 1.4609375, "learning_rate": 6.425359126092214e-05, "loss": 1.4851, "step": 3068 }, { "epoch": 0.4691763806611886, "grad_norm": 1.4765625, "learning_rate": 6.424942205856786e-05, "loss": 1.3343, "step": 3069 }, { "epoch": 0.46932925664055036, "grad_norm": 1.359375, "learning_rate": 6.424525292746122e-05, "loss": 1.2895, "step": 3070 }, { "epoch": 0.4694821326199121, "grad_norm": 1.4140625, "learning_rate": 6.424108386761058e-05, "loss": 1.4711, "step": 3071 }, { "epoch": 0.4696350085992738, "grad_norm": 1.3515625, "learning_rate": 6.423691487902426e-05, "loss": 1.3516, "step": 3072 }, { "epoch": 0.4697878845786356, "grad_norm": 1.4765625, "learning_rate": 6.42327459617105e-05, "loss": 1.5334, "step": 3073 }, { "epoch": 0.46994076055799733, "grad_norm": 1.3671875, "learning_rate": 6.422857711567764e-05, "loss": 1.3436, "step": 3074 }, { "epoch": 0.4700936365373591, "grad_norm": 1.3515625, "learning_rate": 6.4224408340934e-05, "loss": 1.4649, "step": 3075 }, { "epoch": 0.4702465125167208, "grad_norm": 1.53125, "learning_rate": 6.422023963748789e-05, "loss": 1.5177, "step": 3076 }, { "epoch": 0.47039938849608254, "grad_norm": 1.25, "learning_rate": 6.421607100534763e-05, "loss": 1.23, "step": 3077 }, { "epoch": 0.4705522644754443, "grad_norm": 1.3671875, "learning_rate": 6.42119024445215e-05, "loss": 1.3324, "step": 3078 }, { "epoch": 0.47070514045480605, "grad_norm": 1.375, "learning_rate": 6.420773395501779e-05, "loss": 1.4215, "step": 3079 }, { "epoch": 0.4708580164341678, "grad_norm": 1.2109375, "learning_rate": 6.420356553684483e-05, "loss": 1.262, "step": 3080 }, { "epoch": 0.4710108924135295, "grad_norm": 1.4375, "learning_rate": 6.419939719001096e-05, "loss": 1.443, "step": 3081 }, { "epoch": 0.47116376839289126, "grad_norm": 1.5859375, "learning_rate": 6.419522891452443e-05, "loss": 1.5567, "step": 3082 }, { "epoch": 0.471316644372253, "grad_norm": 1.4453125, "learning_rate": 6.41910607103936e-05, "loss": 1.4718, "step": 3083 }, { "epoch": 0.47146952035161477, "grad_norm": 1.3828125, "learning_rate": 6.418689257762676e-05, "loss": 1.4175, "step": 3084 }, { "epoch": 0.47162239633097647, "grad_norm": 1.3359375, "learning_rate": 6.418272451623217e-05, "loss": 1.1403, "step": 3085 }, { "epoch": 0.4717752723103382, "grad_norm": 1.34375, "learning_rate": 6.41785565262182e-05, "loss": 1.4105, "step": 3086 }, { "epoch": 0.4719281482897, "grad_norm": 1.34375, "learning_rate": 6.417438860759311e-05, "loss": 1.3738, "step": 3087 }, { "epoch": 0.47208102426906173, "grad_norm": 1.3828125, "learning_rate": 6.417022076036526e-05, "loss": 1.4108, "step": 3088 }, { "epoch": 0.4722339002484235, "grad_norm": 1.484375, "learning_rate": 6.41660529845429e-05, "loss": 1.5121, "step": 3089 }, { "epoch": 0.4723867762277852, "grad_norm": 1.375, "learning_rate": 6.416188528013435e-05, "loss": 1.4985, "step": 3090 }, { "epoch": 0.47253965220714694, "grad_norm": 1.5234375, "learning_rate": 6.415771764714795e-05, "loss": 1.3854, "step": 3091 }, { "epoch": 0.4726925281865087, "grad_norm": 1.453125, "learning_rate": 6.415355008559197e-05, "loss": 1.4933, "step": 3092 }, { "epoch": 0.47284540416587045, "grad_norm": 1.4375, "learning_rate": 6.414938259547475e-05, "loss": 1.7323, "step": 3093 }, { "epoch": 0.47299828014523215, "grad_norm": 1.453125, "learning_rate": 6.414521517680455e-05, "loss": 1.3954, "step": 3094 }, { "epoch": 0.4731511561245939, "grad_norm": 1.28125, "learning_rate": 6.414104782958969e-05, "loss": 1.366, "step": 3095 }, { "epoch": 0.47330403210395566, "grad_norm": 1.5078125, "learning_rate": 6.413688055383846e-05, "loss": 1.5319, "step": 3096 }, { "epoch": 0.4734569080833174, "grad_norm": 1.359375, "learning_rate": 6.413271334955923e-05, "loss": 1.2439, "step": 3097 }, { "epoch": 0.4736097840626792, "grad_norm": 1.28125, "learning_rate": 6.412854621676028e-05, "loss": 1.3318, "step": 3098 }, { "epoch": 0.4737626600420409, "grad_norm": 1.2890625, "learning_rate": 6.412437915544985e-05, "loss": 1.4765, "step": 3099 }, { "epoch": 0.47391553602140263, "grad_norm": 1.3125, "learning_rate": 6.412021216563631e-05, "loss": 1.4934, "step": 3100 }, { "epoch": 0.4740684120007644, "grad_norm": 1.2265625, "learning_rate": 6.411604524732793e-05, "loss": 1.0816, "step": 3101 }, { "epoch": 0.47422128798012614, "grad_norm": 1.515625, "learning_rate": 6.411187840053305e-05, "loss": 1.4129, "step": 3102 }, { "epoch": 0.47437416395948784, "grad_norm": 1.3984375, "learning_rate": 6.410771162525994e-05, "loss": 1.2867, "step": 3103 }, { "epoch": 0.4745270399388496, "grad_norm": 1.390625, "learning_rate": 6.410354492151696e-05, "loss": 1.3801, "step": 3104 }, { "epoch": 0.47467991591821135, "grad_norm": 1.3046875, "learning_rate": 6.409937828931233e-05, "loss": 1.3711, "step": 3105 }, { "epoch": 0.4748327918975731, "grad_norm": 1.4453125, "learning_rate": 6.40952117286544e-05, "loss": 1.5561, "step": 3106 }, { "epoch": 0.47498566787693486, "grad_norm": 1.3515625, "learning_rate": 6.40910452395515e-05, "loss": 1.1312, "step": 3107 }, { "epoch": 0.47513854385629656, "grad_norm": 1.2578125, "learning_rate": 6.408687882201185e-05, "loss": 1.4076, "step": 3108 }, { "epoch": 0.4752914198356583, "grad_norm": 1.3671875, "learning_rate": 6.408271247604388e-05, "loss": 1.4089, "step": 3109 }, { "epoch": 0.47544429581502007, "grad_norm": 1.2890625, "learning_rate": 6.407854620165577e-05, "loss": 1.266, "step": 3110 }, { "epoch": 0.4755971717943818, "grad_norm": 1.328125, "learning_rate": 6.407437999885586e-05, "loss": 1.2982, "step": 3111 }, { "epoch": 0.4757500477737435, "grad_norm": 1.4296875, "learning_rate": 6.407021386765251e-05, "loss": 1.3849, "step": 3112 }, { "epoch": 0.4759029237531053, "grad_norm": 1.3203125, "learning_rate": 6.406604780805392e-05, "loss": 1.4689, "step": 3113 }, { "epoch": 0.47605579973246703, "grad_norm": 1.7265625, "learning_rate": 6.40618818200685e-05, "loss": 1.5327, "step": 3114 }, { "epoch": 0.4762086757118288, "grad_norm": 1.484375, "learning_rate": 6.405771590370448e-05, "loss": 1.3296, "step": 3115 }, { "epoch": 0.47636155169119054, "grad_norm": 1.4375, "learning_rate": 6.40535500589702e-05, "loss": 1.3602, "step": 3116 }, { "epoch": 0.47651442767055224, "grad_norm": 1.2109375, "learning_rate": 6.404938428587397e-05, "loss": 1.3637, "step": 3117 }, { "epoch": 0.476667303649914, "grad_norm": 1.328125, "learning_rate": 6.404521858442401e-05, "loss": 1.3802, "step": 3118 }, { "epoch": 0.47682017962927575, "grad_norm": 1.3046875, "learning_rate": 6.404105295462872e-05, "loss": 1.3516, "step": 3119 }, { "epoch": 0.4769730556086375, "grad_norm": 1.3125, "learning_rate": 6.403688739649634e-05, "loss": 1.1062, "step": 3120 }, { "epoch": 0.4771259315879992, "grad_norm": 1.265625, "learning_rate": 6.403272191003522e-05, "loss": 1.1793, "step": 3121 }, { "epoch": 0.47727880756736096, "grad_norm": 1.296875, "learning_rate": 6.402855649525361e-05, "loss": 1.3928, "step": 3122 }, { "epoch": 0.4774316835467227, "grad_norm": 1.2265625, "learning_rate": 6.402439115215988e-05, "loss": 1.5042, "step": 3123 }, { "epoch": 0.4775845595260845, "grad_norm": 1.2734375, "learning_rate": 6.402022588076227e-05, "loss": 1.2215, "step": 3124 }, { "epoch": 0.47773743550544623, "grad_norm": 1.5, "learning_rate": 6.401606068106906e-05, "loss": 1.5065, "step": 3125 }, { "epoch": 0.47789031148480793, "grad_norm": 1.3515625, "learning_rate": 6.401189555308864e-05, "loss": 1.3088, "step": 3126 }, { "epoch": 0.4780431874641697, "grad_norm": 1.40625, "learning_rate": 6.400773049682922e-05, "loss": 1.3874, "step": 3127 }, { "epoch": 0.47819606344353144, "grad_norm": 1.5625, "learning_rate": 6.40035655122992e-05, "loss": 1.412, "step": 3128 }, { "epoch": 0.4783489394228932, "grad_norm": 1.25, "learning_rate": 6.399940059950678e-05, "loss": 1.4332, "step": 3129 }, { "epoch": 0.4785018154022549, "grad_norm": 1.3046875, "learning_rate": 6.399523575846032e-05, "loss": 1.4761, "step": 3130 }, { "epoch": 0.47865469138161665, "grad_norm": 1.28125, "learning_rate": 6.39910709891681e-05, "loss": 1.2319, "step": 3131 }, { "epoch": 0.4788075673609784, "grad_norm": 1.3359375, "learning_rate": 6.39869062916384e-05, "loss": 1.4192, "step": 3132 }, { "epoch": 0.47896044334034016, "grad_norm": 1.4375, "learning_rate": 6.398274166587957e-05, "loss": 1.2766, "step": 3133 }, { "epoch": 0.4791133193197019, "grad_norm": 1.328125, "learning_rate": 6.397857711189986e-05, "loss": 1.3172, "step": 3134 }, { "epoch": 0.4792661952990636, "grad_norm": 1.6171875, "learning_rate": 6.397441262970762e-05, "loss": 1.4235, "step": 3135 }, { "epoch": 0.47941907127842537, "grad_norm": 1.3203125, "learning_rate": 6.397024821931108e-05, "loss": 1.2635, "step": 3136 }, { "epoch": 0.4795719472577871, "grad_norm": 1.5546875, "learning_rate": 6.39660838807186e-05, "loss": 1.8402, "step": 3137 }, { "epoch": 0.4797248232371489, "grad_norm": 1.3359375, "learning_rate": 6.39619196139385e-05, "loss": 1.3785, "step": 3138 }, { "epoch": 0.4798776992165106, "grad_norm": 1.484375, "learning_rate": 6.3957755418979e-05, "loss": 1.3215, "step": 3139 }, { "epoch": 0.48003057519587233, "grad_norm": 1.4140625, "learning_rate": 6.395359129584844e-05, "loss": 1.4403, "step": 3140 }, { "epoch": 0.4801834511752341, "grad_norm": 1.1796875, "learning_rate": 6.39494272445551e-05, "loss": 1.3631, "step": 3141 }, { "epoch": 0.48033632715459584, "grad_norm": 1.296875, "learning_rate": 6.394526326510733e-05, "loss": 1.2965, "step": 3142 }, { "epoch": 0.4804892031339576, "grad_norm": 1.46875, "learning_rate": 6.39410993575134e-05, "loss": 1.5046, "step": 3143 }, { "epoch": 0.4806420791133193, "grad_norm": 1.4765625, "learning_rate": 6.393693552178158e-05, "loss": 1.4361, "step": 3144 }, { "epoch": 0.48079495509268105, "grad_norm": 1.4296875, "learning_rate": 6.393277175792018e-05, "loss": 1.8033, "step": 3145 }, { "epoch": 0.4809478310720428, "grad_norm": 1.3125, "learning_rate": 6.39286080659375e-05, "loss": 1.4845, "step": 3146 }, { "epoch": 0.48110070705140456, "grad_norm": 1.21875, "learning_rate": 6.392444444584188e-05, "loss": 1.2114, "step": 3147 }, { "epoch": 0.48125358303076626, "grad_norm": 1.5, "learning_rate": 6.392028089764154e-05, "loss": 1.4715, "step": 3148 }, { "epoch": 0.481406459010128, "grad_norm": 1.34375, "learning_rate": 6.391611742134487e-05, "loss": 1.2793, "step": 3149 }, { "epoch": 0.4815593349894898, "grad_norm": 1.3671875, "learning_rate": 6.391195401696011e-05, "loss": 1.5012, "step": 3150 }, { "epoch": 0.48171221096885153, "grad_norm": 1.3984375, "learning_rate": 6.390779068449552e-05, "loss": 1.4756, "step": 3151 }, { "epoch": 0.4818650869482133, "grad_norm": 1.3515625, "learning_rate": 6.390362742395952e-05, "loss": 1.3602, "step": 3152 }, { "epoch": 0.482017962927575, "grad_norm": 1.3203125, "learning_rate": 6.389946423536024e-05, "loss": 1.3818, "step": 3153 }, { "epoch": 0.48217083890693674, "grad_norm": 1.4921875, "learning_rate": 6.389530111870614e-05, "loss": 1.6472, "step": 3154 }, { "epoch": 0.4823237148862985, "grad_norm": 1.3515625, "learning_rate": 6.38911380740054e-05, "loss": 1.1989, "step": 3155 }, { "epoch": 0.48247659086566025, "grad_norm": 1.3671875, "learning_rate": 6.388697510126638e-05, "loss": 1.3909, "step": 3156 }, { "epoch": 0.48262946684502195, "grad_norm": 1.3515625, "learning_rate": 6.388281220049739e-05, "loss": 1.4996, "step": 3157 }, { "epoch": 0.4827823428243837, "grad_norm": 1.671875, "learning_rate": 6.387864937170664e-05, "loss": 1.7734, "step": 3158 }, { "epoch": 0.48293521880374546, "grad_norm": 1.3046875, "learning_rate": 6.38744866149025e-05, "loss": 1.5351, "step": 3159 }, { "epoch": 0.4830880947831072, "grad_norm": 1.578125, "learning_rate": 6.387032393009322e-05, "loss": 1.2859, "step": 3160 }, { "epoch": 0.48324097076246897, "grad_norm": 1.296875, "learning_rate": 6.386616131728714e-05, "loss": 1.2285, "step": 3161 }, { "epoch": 0.48339384674183067, "grad_norm": 1.1953125, "learning_rate": 6.386199877649253e-05, "loss": 1.2438, "step": 3162 }, { "epoch": 0.4835467227211924, "grad_norm": 1.3359375, "learning_rate": 6.385783630771771e-05, "loss": 1.1643, "step": 3163 }, { "epoch": 0.4836995987005542, "grad_norm": 1.453125, "learning_rate": 6.385367391097096e-05, "loss": 1.3287, "step": 3164 }, { "epoch": 0.48385247467991593, "grad_norm": 1.34375, "learning_rate": 6.384951158626054e-05, "loss": 1.2562, "step": 3165 }, { "epoch": 0.48400535065927763, "grad_norm": 1.4140625, "learning_rate": 6.38453493335948e-05, "loss": 1.661, "step": 3166 }, { "epoch": 0.4841582266386394, "grad_norm": 2.15625, "learning_rate": 6.3841187152982e-05, "loss": 1.6356, "step": 3167 }, { "epoch": 0.48431110261800114, "grad_norm": 1.3984375, "learning_rate": 6.383702504443047e-05, "loss": 1.2358, "step": 3168 }, { "epoch": 0.4844639785973629, "grad_norm": 1.390625, "learning_rate": 6.383286300794848e-05, "loss": 1.4858, "step": 3169 }, { "epoch": 0.48461685457672465, "grad_norm": 1.4609375, "learning_rate": 6.38287010435443e-05, "loss": 1.4691, "step": 3170 }, { "epoch": 0.48476973055608635, "grad_norm": 1.2421875, "learning_rate": 6.382453915122629e-05, "loss": 1.2568, "step": 3171 }, { "epoch": 0.4849226065354481, "grad_norm": 1.5078125, "learning_rate": 6.382037733100266e-05, "loss": 1.6624, "step": 3172 }, { "epoch": 0.48507548251480986, "grad_norm": 1.3125, "learning_rate": 6.38162155828818e-05, "loss": 1.3966, "step": 3173 }, { "epoch": 0.4852283584941716, "grad_norm": 1.28125, "learning_rate": 6.381205390687192e-05, "loss": 1.1933, "step": 3174 }, { "epoch": 0.4853812344735333, "grad_norm": 1.28125, "learning_rate": 6.380789230298135e-05, "loss": 1.295, "step": 3175 }, { "epoch": 0.4855341104528951, "grad_norm": 1.296875, "learning_rate": 6.38037307712184e-05, "loss": 1.3016, "step": 3176 }, { "epoch": 0.48568698643225683, "grad_norm": 1.2265625, "learning_rate": 6.379956931159132e-05, "loss": 1.3317, "step": 3177 }, { "epoch": 0.4858398624116186, "grad_norm": 1.2890625, "learning_rate": 6.379540792410847e-05, "loss": 1.3387, "step": 3178 }, { "epoch": 0.48599273839098034, "grad_norm": 1.3984375, "learning_rate": 6.379124660877805e-05, "loss": 1.327, "step": 3179 }, { "epoch": 0.48614561437034204, "grad_norm": 1.203125, "learning_rate": 6.378708536560843e-05, "loss": 1.2048, "step": 3180 }, { "epoch": 0.4862984903497038, "grad_norm": 1.390625, "learning_rate": 6.378292419460787e-05, "loss": 1.3786, "step": 3181 }, { "epoch": 0.48645136632906555, "grad_norm": 1.265625, "learning_rate": 6.377876309578468e-05, "loss": 1.2497, "step": 3182 }, { "epoch": 0.4866042423084273, "grad_norm": 1.3828125, "learning_rate": 6.377460206914716e-05, "loss": 1.445, "step": 3183 }, { "epoch": 0.486757118287789, "grad_norm": 1.2734375, "learning_rate": 6.377044111470354e-05, "loss": 1.4973, "step": 3184 }, { "epoch": 0.48690999426715076, "grad_norm": 1.6171875, "learning_rate": 6.37662802324622e-05, "loss": 1.727, "step": 3185 }, { "epoch": 0.4870628702465125, "grad_norm": 1.34375, "learning_rate": 6.376211942243135e-05, "loss": 1.3917, "step": 3186 }, { "epoch": 0.48721574622587427, "grad_norm": 1.2421875, "learning_rate": 6.375795868461938e-05, "loss": 1.18, "step": 3187 }, { "epoch": 0.487368622205236, "grad_norm": 1.5078125, "learning_rate": 6.375379801903445e-05, "loss": 1.5119, "step": 3188 }, { "epoch": 0.4875214981845977, "grad_norm": 1.3515625, "learning_rate": 6.374963742568499e-05, "loss": 1.1337, "step": 3189 }, { "epoch": 0.4876743741639595, "grad_norm": 1.421875, "learning_rate": 6.374547690457922e-05, "loss": 1.2865, "step": 3190 }, { "epoch": 0.48782725014332123, "grad_norm": 1.484375, "learning_rate": 6.37413164557254e-05, "loss": 1.6237, "step": 3191 }, { "epoch": 0.487980126122683, "grad_norm": 1.5234375, "learning_rate": 6.373715607913191e-05, "loss": 1.5967, "step": 3192 }, { "epoch": 0.4881330021020447, "grad_norm": 1.5078125, "learning_rate": 6.373299577480694e-05, "loss": 1.6447, "step": 3193 }, { "epoch": 0.48828587808140644, "grad_norm": 1.3125, "learning_rate": 6.372883554275888e-05, "loss": 1.4909, "step": 3194 }, { "epoch": 0.4884387540607682, "grad_norm": 1.4140625, "learning_rate": 6.372467538299595e-05, "loss": 1.4582, "step": 3195 }, { "epoch": 0.48859163004012995, "grad_norm": 1.359375, "learning_rate": 6.372051529552647e-05, "loss": 1.3214, "step": 3196 }, { "epoch": 0.4887445060194917, "grad_norm": 1.4765625, "learning_rate": 6.371635528035875e-05, "loss": 1.6415, "step": 3197 }, { "epoch": 0.4888973819988534, "grad_norm": 1.3125, "learning_rate": 6.371219533750102e-05, "loss": 1.4073, "step": 3198 }, { "epoch": 0.48905025797821516, "grad_norm": 1.3359375, "learning_rate": 6.370803546696162e-05, "loss": 1.3547, "step": 3199 }, { "epoch": 0.4892031339575769, "grad_norm": 1.453125, "learning_rate": 6.370387566874883e-05, "loss": 1.574, "step": 3200 }, { "epoch": 0.4893560099369387, "grad_norm": 1.4453125, "learning_rate": 6.369971594287093e-05, "loss": 1.4481, "step": 3201 }, { "epoch": 0.4895088859163004, "grad_norm": 1.2890625, "learning_rate": 6.369555628933626e-05, "loss": 1.2127, "step": 3202 }, { "epoch": 0.48966176189566213, "grad_norm": 1.328125, "learning_rate": 6.3691396708153e-05, "loss": 1.3494, "step": 3203 }, { "epoch": 0.4898146378750239, "grad_norm": 1.265625, "learning_rate": 6.368723719932954e-05, "loss": 1.3339, "step": 3204 }, { "epoch": 0.48996751385438564, "grad_norm": 1.25, "learning_rate": 6.368307776287412e-05, "loss": 1.3327, "step": 3205 }, { "epoch": 0.4901203898337474, "grad_norm": 1.3125, "learning_rate": 6.367891839879507e-05, "loss": 1.2239, "step": 3206 }, { "epoch": 0.4902732658131091, "grad_norm": 1.3828125, "learning_rate": 6.367475910710061e-05, "loss": 1.5712, "step": 3207 }, { "epoch": 0.49042614179247085, "grad_norm": 1.46875, "learning_rate": 6.367059988779915e-05, "loss": 1.7283, "step": 3208 }, { "epoch": 0.4905790177718326, "grad_norm": 1.3125, "learning_rate": 6.366644074089885e-05, "loss": 1.4021, "step": 3209 }, { "epoch": 0.49073189375119436, "grad_norm": 1.4140625, "learning_rate": 6.366228166640804e-05, "loss": 1.4644, "step": 3210 }, { "epoch": 0.49088476973055606, "grad_norm": 1.2109375, "learning_rate": 6.365812266433506e-05, "loss": 1.1786, "step": 3211 }, { "epoch": 0.4910376457099178, "grad_norm": 1.25, "learning_rate": 6.365396373468813e-05, "loss": 1.2407, "step": 3212 }, { "epoch": 0.49119052168927957, "grad_norm": 1.28125, "learning_rate": 6.364980487747558e-05, "loss": 1.4512, "step": 3213 }, { "epoch": 0.4913433976686413, "grad_norm": 1.4296875, "learning_rate": 6.364564609270568e-05, "loss": 1.4455, "step": 3214 }, { "epoch": 0.4914962736480031, "grad_norm": 1.328125, "learning_rate": 6.364148738038674e-05, "loss": 1.4648, "step": 3215 }, { "epoch": 0.4916491496273648, "grad_norm": 1.3984375, "learning_rate": 6.363732874052702e-05, "loss": 1.3712, "step": 3216 }, { "epoch": 0.49180202560672653, "grad_norm": 1.515625, "learning_rate": 6.36331701731348e-05, "loss": 1.477, "step": 3217 }, { "epoch": 0.4919549015860883, "grad_norm": 1.3125, "learning_rate": 6.362901167821842e-05, "loss": 1.276, "step": 3218 }, { "epoch": 0.49210777756545004, "grad_norm": 1.5546875, "learning_rate": 6.36248532557861e-05, "loss": 1.4256, "step": 3219 }, { "epoch": 0.49226065354481174, "grad_norm": 1.265625, "learning_rate": 6.36206949058462e-05, "loss": 1.3905, "step": 3220 }, { "epoch": 0.4924135295241735, "grad_norm": 1.40625, "learning_rate": 6.361653662840692e-05, "loss": 1.6102, "step": 3221 }, { "epoch": 0.49256640550353525, "grad_norm": 1.2421875, "learning_rate": 6.361237842347662e-05, "loss": 1.2836, "step": 3222 }, { "epoch": 0.492719281482897, "grad_norm": 1.296875, "learning_rate": 6.36082202910636e-05, "loss": 1.4676, "step": 3223 }, { "epoch": 0.49287215746225876, "grad_norm": 1.5390625, "learning_rate": 6.360406223117606e-05, "loss": 1.5968, "step": 3224 }, { "epoch": 0.49302503344162046, "grad_norm": 1.4140625, "learning_rate": 6.359990424382236e-05, "loss": 1.5593, "step": 3225 }, { "epoch": 0.4931779094209822, "grad_norm": 1.5390625, "learning_rate": 6.359574632901074e-05, "loss": 1.5033, "step": 3226 }, { "epoch": 0.493330785400344, "grad_norm": 1.46875, "learning_rate": 6.359158848674953e-05, "loss": 1.5944, "step": 3227 }, { "epoch": 0.49348366137970573, "grad_norm": 1.375, "learning_rate": 6.358743071704698e-05, "loss": 1.1887, "step": 3228 }, { "epoch": 0.49363653735906743, "grad_norm": 1.234375, "learning_rate": 6.358327301991142e-05, "loss": 1.1543, "step": 3229 }, { "epoch": 0.4937894133384292, "grad_norm": 1.25, "learning_rate": 6.35791153953511e-05, "loss": 1.1942, "step": 3230 }, { "epoch": 0.49394228931779094, "grad_norm": 1.34375, "learning_rate": 6.357495784337429e-05, "loss": 1.232, "step": 3231 }, { "epoch": 0.4940951652971527, "grad_norm": 1.2734375, "learning_rate": 6.357080036398932e-05, "loss": 1.2952, "step": 3232 }, { "epoch": 0.49424804127651445, "grad_norm": 1.421875, "learning_rate": 6.356664295720441e-05, "loss": 1.4355, "step": 3233 }, { "epoch": 0.49440091725587615, "grad_norm": 1.4609375, "learning_rate": 6.356248562302795e-05, "loss": 1.5518, "step": 3234 }, { "epoch": 0.4945537932352379, "grad_norm": 1.390625, "learning_rate": 6.355832836146816e-05, "loss": 1.2409, "step": 3235 }, { "epoch": 0.49470666921459966, "grad_norm": 1.3671875, "learning_rate": 6.355417117253328e-05, "loss": 1.2844, "step": 3236 }, { "epoch": 0.4948595451939614, "grad_norm": 1.3515625, "learning_rate": 6.355001405623171e-05, "loss": 1.404, "step": 3237 }, { "epoch": 0.4950124211733231, "grad_norm": 1.3515625, "learning_rate": 6.354585701257159e-05, "loss": 1.4943, "step": 3238 }, { "epoch": 0.49516529715268487, "grad_norm": 1.5078125, "learning_rate": 6.354170004156135e-05, "loss": 1.6126, "step": 3239 }, { "epoch": 0.4953181731320466, "grad_norm": 1.390625, "learning_rate": 6.353754314320917e-05, "loss": 1.5147, "step": 3240 }, { "epoch": 0.4954710491114084, "grad_norm": 1.421875, "learning_rate": 6.35333863175234e-05, "loss": 1.3581, "step": 3241 }, { "epoch": 0.49562392509077013, "grad_norm": 1.3125, "learning_rate": 6.35292295645123e-05, "loss": 1.2843, "step": 3242 }, { "epoch": 0.49577680107013183, "grad_norm": 1.3046875, "learning_rate": 6.352507288418413e-05, "loss": 1.3691, "step": 3243 }, { "epoch": 0.4959296770494936, "grad_norm": 1.5546875, "learning_rate": 6.352091627654722e-05, "loss": 1.5913, "step": 3244 }, { "epoch": 0.49608255302885534, "grad_norm": 1.328125, "learning_rate": 6.351675974160979e-05, "loss": 1.464, "step": 3245 }, { "epoch": 0.4962354290082171, "grad_norm": 1.4140625, "learning_rate": 6.351260327938018e-05, "loss": 1.404, "step": 3246 }, { "epoch": 0.4963883049875788, "grad_norm": 1.421875, "learning_rate": 6.350844688986664e-05, "loss": 1.4303, "step": 3247 }, { "epoch": 0.49654118096694055, "grad_norm": 1.34375, "learning_rate": 6.350429057307752e-05, "loss": 1.4371, "step": 3248 }, { "epoch": 0.4966940569463023, "grad_norm": 1.3125, "learning_rate": 6.350013432902102e-05, "loss": 1.3786, "step": 3249 }, { "epoch": 0.49684693292566406, "grad_norm": 1.4765625, "learning_rate": 6.349597815770546e-05, "loss": 1.515, "step": 3250 }, { "epoch": 0.4969998089050258, "grad_norm": 1.453125, "learning_rate": 6.349182205913911e-05, "loss": 1.6952, "step": 3251 }, { "epoch": 0.4971526848843875, "grad_norm": 1.296875, "learning_rate": 6.348766603333025e-05, "loss": 1.4754, "step": 3252 }, { "epoch": 0.4973055608637493, "grad_norm": 1.375, "learning_rate": 6.348351008028721e-05, "loss": 1.7561, "step": 3253 }, { "epoch": 0.49745843684311103, "grad_norm": 1.421875, "learning_rate": 6.34793542000182e-05, "loss": 1.3273, "step": 3254 }, { "epoch": 0.4976113128224728, "grad_norm": 1.28125, "learning_rate": 6.347519839253155e-05, "loss": 1.1578, "step": 3255 }, { "epoch": 0.4977641888018345, "grad_norm": 1.2578125, "learning_rate": 6.347104265783555e-05, "loss": 1.457, "step": 3256 }, { "epoch": 0.49791706478119624, "grad_norm": 1.4609375, "learning_rate": 6.346688699593843e-05, "loss": 1.6979, "step": 3257 }, { "epoch": 0.498069940760558, "grad_norm": 1.4765625, "learning_rate": 6.346273140684854e-05, "loss": 1.6564, "step": 3258 }, { "epoch": 0.49822281673991975, "grad_norm": 1.3125, "learning_rate": 6.34585758905741e-05, "loss": 1.3571, "step": 3259 }, { "epoch": 0.4983756927192815, "grad_norm": 1.421875, "learning_rate": 6.345442044712343e-05, "loss": 1.3408, "step": 3260 }, { "epoch": 0.4985285686986432, "grad_norm": 1.375, "learning_rate": 6.34502650765048e-05, "loss": 1.3996, "step": 3261 }, { "epoch": 0.49868144467800496, "grad_norm": 1.4453125, "learning_rate": 6.344610977872645e-05, "loss": 1.4555, "step": 3262 }, { "epoch": 0.4988343206573667, "grad_norm": 1.2890625, "learning_rate": 6.344195455379676e-05, "loss": 1.3521, "step": 3263 }, { "epoch": 0.49898719663672847, "grad_norm": 1.421875, "learning_rate": 6.343779940172389e-05, "loss": 1.3857, "step": 3264 }, { "epoch": 0.49914007261609017, "grad_norm": 1.59375, "learning_rate": 6.343364432251624e-05, "loss": 1.7979, "step": 3265 }, { "epoch": 0.4992929485954519, "grad_norm": 1.265625, "learning_rate": 6.3429489316182e-05, "loss": 1.0513, "step": 3266 }, { "epoch": 0.4994458245748137, "grad_norm": 1.46875, "learning_rate": 6.34253343827295e-05, "loss": 1.5051, "step": 3267 }, { "epoch": 0.49959870055417543, "grad_norm": 1.421875, "learning_rate": 6.342117952216702e-05, "loss": 1.3854, "step": 3268 }, { "epoch": 0.4997515765335372, "grad_norm": 1.3046875, "learning_rate": 6.34170247345028e-05, "loss": 1.2595, "step": 3269 }, { "epoch": 0.4999044525128989, "grad_norm": 1.4375, "learning_rate": 6.341287001974514e-05, "loss": 1.5776, "step": 3270 }, { "epoch": 0.5000573284922607, "grad_norm": 1.28125, "learning_rate": 6.340871537790233e-05, "loss": 1.2734, "step": 3271 }, { "epoch": 0.5002102044716223, "grad_norm": 1.2890625, "learning_rate": 6.340456080898267e-05, "loss": 1.2429, "step": 3272 }, { "epoch": 0.5003630804509841, "grad_norm": 1.5234375, "learning_rate": 6.340040631299437e-05, "loss": 1.5782, "step": 3273 }, { "epoch": 0.5005159564303459, "grad_norm": 1.3203125, "learning_rate": 6.33962518899458e-05, "loss": 1.3213, "step": 3274 }, { "epoch": 0.5006688324097076, "grad_norm": 1.3046875, "learning_rate": 6.339209753984518e-05, "loss": 1.4798, "step": 3275 }, { "epoch": 0.5008217083890694, "grad_norm": 1.3984375, "learning_rate": 6.338794326270078e-05, "loss": 1.5573, "step": 3276 }, { "epoch": 0.5009745843684311, "grad_norm": 1.390625, "learning_rate": 6.338378905852094e-05, "loss": 1.5049, "step": 3277 }, { "epoch": 0.5011274603477929, "grad_norm": 1.4375, "learning_rate": 6.337963492731384e-05, "loss": 1.5815, "step": 3278 }, { "epoch": 0.5012803363271546, "grad_norm": 1.265625, "learning_rate": 6.337548086908788e-05, "loss": 1.0744, "step": 3279 }, { "epoch": 0.5014332123065164, "grad_norm": 1.375, "learning_rate": 6.337132688385126e-05, "loss": 1.2374, "step": 3280 }, { "epoch": 0.501586088285878, "grad_norm": 1.4765625, "learning_rate": 6.336717297161227e-05, "loss": 1.4955, "step": 3281 }, { "epoch": 0.5017389642652398, "grad_norm": 1.4296875, "learning_rate": 6.336301913237923e-05, "loss": 1.4976, "step": 3282 }, { "epoch": 0.5018918402446015, "grad_norm": 1.4609375, "learning_rate": 6.335886536616033e-05, "loss": 1.6221, "step": 3283 }, { "epoch": 0.5020447162239633, "grad_norm": 1.3359375, "learning_rate": 6.335471167296394e-05, "loss": 1.3765, "step": 3284 }, { "epoch": 0.502197592203325, "grad_norm": 1.375, "learning_rate": 6.335055805279828e-05, "loss": 1.3876, "step": 3285 }, { "epoch": 0.5023504681826868, "grad_norm": 1.53125, "learning_rate": 6.334640450567165e-05, "loss": 1.5397, "step": 3286 }, { "epoch": 0.5025033441620486, "grad_norm": 1.375, "learning_rate": 6.33422510315923e-05, "loss": 1.5121, "step": 3287 }, { "epoch": 0.5026562201414103, "grad_norm": 1.4375, "learning_rate": 6.33380976305686e-05, "loss": 1.597, "step": 3288 }, { "epoch": 0.5028090961207721, "grad_norm": 1.296875, "learning_rate": 6.333394430260872e-05, "loss": 1.3488, "step": 3289 }, { "epoch": 0.5029619721001337, "grad_norm": 1.53125, "learning_rate": 6.332979104772096e-05, "loss": 1.4993, "step": 3290 }, { "epoch": 0.5031148480794955, "grad_norm": 1.4140625, "learning_rate": 6.332563786591363e-05, "loss": 1.3079, "step": 3291 }, { "epoch": 0.5032677240588572, "grad_norm": 1.4609375, "learning_rate": 6.332148475719497e-05, "loss": 1.4318, "step": 3292 }, { "epoch": 0.503420600038219, "grad_norm": 1.4765625, "learning_rate": 6.331733172157332e-05, "loss": 1.4215, "step": 3293 }, { "epoch": 0.5035734760175807, "grad_norm": 1.40625, "learning_rate": 6.331317875905691e-05, "loss": 1.4832, "step": 3294 }, { "epoch": 0.5037263519969425, "grad_norm": 1.4609375, "learning_rate": 6.3309025869654e-05, "loss": 1.5007, "step": 3295 }, { "epoch": 0.5038792279763042, "grad_norm": 1.234375, "learning_rate": 6.330487305337288e-05, "loss": 1.4174, "step": 3296 }, { "epoch": 0.504032103955666, "grad_norm": 1.421875, "learning_rate": 6.330072031022182e-05, "loss": 1.4838, "step": 3297 }, { "epoch": 0.5041849799350278, "grad_norm": 1.34375, "learning_rate": 6.329656764020917e-05, "loss": 1.413, "step": 3298 }, { "epoch": 0.5043378559143894, "grad_norm": 1.4453125, "learning_rate": 6.32924150433431e-05, "loss": 1.723, "step": 3299 }, { "epoch": 0.5044907318937512, "grad_norm": 1.3125, "learning_rate": 6.328826251963194e-05, "loss": 1.1301, "step": 3300 }, { "epoch": 0.5046436078731129, "grad_norm": 1.4453125, "learning_rate": 6.328411006908396e-05, "loss": 1.6607, "step": 3301 }, { "epoch": 0.5047964838524747, "grad_norm": 1.2734375, "learning_rate": 6.32799576917074e-05, "loss": 1.4064, "step": 3302 }, { "epoch": 0.5049493598318364, "grad_norm": 1.2734375, "learning_rate": 6.327580538751064e-05, "loss": 1.3834, "step": 3303 }, { "epoch": 0.5051022358111982, "grad_norm": 1.5078125, "learning_rate": 6.327165315650181e-05, "loss": 1.4715, "step": 3304 }, { "epoch": 0.5052551117905599, "grad_norm": 1.265625, "learning_rate": 6.32675009986893e-05, "loss": 1.3956, "step": 3305 }, { "epoch": 0.5054079877699217, "grad_norm": 1.4296875, "learning_rate": 6.326334891408131e-05, "loss": 1.4625, "step": 3306 }, { "epoch": 0.5055608637492834, "grad_norm": 1.2265625, "learning_rate": 6.325919690268617e-05, "loss": 1.4231, "step": 3307 }, { "epoch": 0.5057137397286451, "grad_norm": 1.234375, "learning_rate": 6.325504496451215e-05, "loss": 1.2225, "step": 3308 }, { "epoch": 0.5058666157080068, "grad_norm": 1.4609375, "learning_rate": 6.325089309956746e-05, "loss": 1.6952, "step": 3309 }, { "epoch": 0.5060194916873686, "grad_norm": 1.6796875, "learning_rate": 6.324674130786046e-05, "loss": 1.5443, "step": 3310 }, { "epoch": 0.5061723676667303, "grad_norm": 1.1796875, "learning_rate": 6.324258958939934e-05, "loss": 1.0412, "step": 3311 }, { "epoch": 0.5063252436460921, "grad_norm": 1.3515625, "learning_rate": 6.323843794419246e-05, "loss": 1.5308, "step": 3312 }, { "epoch": 0.5064781196254539, "grad_norm": 1.3984375, "learning_rate": 6.323428637224803e-05, "loss": 1.5556, "step": 3313 }, { "epoch": 0.5066309956048156, "grad_norm": 1.359375, "learning_rate": 6.323013487357439e-05, "loss": 1.3076, "step": 3314 }, { "epoch": 0.5067838715841774, "grad_norm": 1.25, "learning_rate": 6.322598344817974e-05, "loss": 1.2397, "step": 3315 }, { "epoch": 0.5069367475635391, "grad_norm": 1.2578125, "learning_rate": 6.322183209607235e-05, "loss": 1.1805, "step": 3316 }, { "epoch": 0.5070896235429008, "grad_norm": 1.578125, "learning_rate": 6.321768081726059e-05, "loss": 1.669, "step": 3317 }, { "epoch": 0.5072424995222625, "grad_norm": 1.375, "learning_rate": 6.32135296117526e-05, "loss": 1.4184, "step": 3318 }, { "epoch": 0.5073953755016243, "grad_norm": 1.3125, "learning_rate": 6.320937847955679e-05, "loss": 1.3171, "step": 3319 }, { "epoch": 0.507548251480986, "grad_norm": 1.3203125, "learning_rate": 6.320522742068132e-05, "loss": 1.3125, "step": 3320 }, { "epoch": 0.5077011274603478, "grad_norm": 1.3046875, "learning_rate": 6.320107643513451e-05, "loss": 1.264, "step": 3321 }, { "epoch": 0.5078540034397095, "grad_norm": 1.1953125, "learning_rate": 6.319692552292468e-05, "loss": 1.2561, "step": 3322 }, { "epoch": 0.5080068794190713, "grad_norm": 1.578125, "learning_rate": 6.319277468406e-05, "loss": 1.4226, "step": 3323 }, { "epoch": 0.508159755398433, "grad_norm": 1.3828125, "learning_rate": 6.31886239185488e-05, "loss": 1.4187, "step": 3324 }, { "epoch": 0.5083126313777948, "grad_norm": 1.3984375, "learning_rate": 6.318447322639935e-05, "loss": 1.5031, "step": 3325 }, { "epoch": 0.5084655073571565, "grad_norm": 1.4609375, "learning_rate": 6.318032260761994e-05, "loss": 1.4875, "step": 3326 }, { "epoch": 0.5086183833365182, "grad_norm": 1.421875, "learning_rate": 6.317617206221884e-05, "loss": 1.3238, "step": 3327 }, { "epoch": 0.50877125931588, "grad_norm": 1.375, "learning_rate": 6.317202159020424e-05, "loss": 1.3059, "step": 3328 }, { "epoch": 0.5089241352952417, "grad_norm": 1.3359375, "learning_rate": 6.316787119158452e-05, "loss": 1.2181, "step": 3329 }, { "epoch": 0.5090770112746035, "grad_norm": 1.328125, "learning_rate": 6.316372086636786e-05, "loss": 1.2614, "step": 3330 }, { "epoch": 0.5092298872539652, "grad_norm": 1.5390625, "learning_rate": 6.315957061456262e-05, "loss": 1.3969, "step": 3331 }, { "epoch": 0.509382763233327, "grad_norm": 1.390625, "learning_rate": 6.3155420436177e-05, "loss": 1.3862, "step": 3332 }, { "epoch": 0.5095356392126887, "grad_norm": 1.2421875, "learning_rate": 6.315127033121934e-05, "loss": 1.438, "step": 3333 }, { "epoch": 0.5096885151920505, "grad_norm": 1.4765625, "learning_rate": 6.314712029969785e-05, "loss": 1.6168, "step": 3334 }, { "epoch": 0.5098413911714121, "grad_norm": 1.265625, "learning_rate": 6.31429703416208e-05, "loss": 1.3295, "step": 3335 }, { "epoch": 0.5099942671507739, "grad_norm": 1.21875, "learning_rate": 6.313882045699651e-05, "loss": 1.2274, "step": 3336 }, { "epoch": 0.5101471431301356, "grad_norm": 1.4453125, "learning_rate": 6.313467064583319e-05, "loss": 1.6541, "step": 3337 }, { "epoch": 0.5103000191094974, "grad_norm": 1.3046875, "learning_rate": 6.313052090813917e-05, "loss": 1.3249, "step": 3338 }, { "epoch": 0.5104528950888592, "grad_norm": 1.453125, "learning_rate": 6.312637124392268e-05, "loss": 1.5472, "step": 3339 }, { "epoch": 0.5106057710682209, "grad_norm": 1.34375, "learning_rate": 6.3122221653192e-05, "loss": 1.1861, "step": 3340 }, { "epoch": 0.5107586470475827, "grad_norm": 1.3515625, "learning_rate": 6.311807213595541e-05, "loss": 1.5591, "step": 3341 }, { "epoch": 0.5109115230269444, "grad_norm": 1.421875, "learning_rate": 6.311392269222113e-05, "loss": 1.4506, "step": 3342 }, { "epoch": 0.5110643990063062, "grad_norm": 1.3359375, "learning_rate": 6.310977332199755e-05, "loss": 1.2863, "step": 3343 }, { "epoch": 0.5112172749856678, "grad_norm": 1.453125, "learning_rate": 6.310562402529278e-05, "loss": 1.3742, "step": 3344 }, { "epoch": 0.5113701509650296, "grad_norm": 1.359375, "learning_rate": 6.310147480211522e-05, "loss": 1.5104, "step": 3345 }, { "epoch": 0.5115230269443913, "grad_norm": 1.3515625, "learning_rate": 6.309732565247306e-05, "loss": 1.434, "step": 3346 }, { "epoch": 0.5116759029237531, "grad_norm": 1.7890625, "learning_rate": 6.30931765763746e-05, "loss": 1.4121, "step": 3347 }, { "epoch": 0.5118287789031148, "grad_norm": 1.4765625, "learning_rate": 6.308902757382813e-05, "loss": 1.3573, "step": 3348 }, { "epoch": 0.5119816548824766, "grad_norm": 1.484375, "learning_rate": 6.308487864484187e-05, "loss": 1.4758, "step": 3349 }, { "epoch": 0.5121345308618384, "grad_norm": 1.46875, "learning_rate": 6.308072978942412e-05, "loss": 1.5419, "step": 3350 }, { "epoch": 0.5122874068412001, "grad_norm": 1.484375, "learning_rate": 6.307658100758311e-05, "loss": 1.5421, "step": 3351 }, { "epoch": 0.5124402828205619, "grad_norm": 1.390625, "learning_rate": 6.307243229932718e-05, "loss": 1.5721, "step": 3352 }, { "epoch": 0.5125931587999235, "grad_norm": 1.453125, "learning_rate": 6.306828366466453e-05, "loss": 1.3825, "step": 3353 }, { "epoch": 0.5127460347792853, "grad_norm": 1.3125, "learning_rate": 6.30641351036035e-05, "loss": 1.2834, "step": 3354 }, { "epoch": 0.512898910758647, "grad_norm": 1.4375, "learning_rate": 6.305998661615228e-05, "loss": 1.3483, "step": 3355 }, { "epoch": 0.5130517867380088, "grad_norm": 1.2890625, "learning_rate": 6.305583820231916e-05, "loss": 1.1243, "step": 3356 }, { "epoch": 0.5132046627173705, "grad_norm": 1.4375, "learning_rate": 6.305168986211245e-05, "loss": 1.4246, "step": 3357 }, { "epoch": 0.5133575386967323, "grad_norm": 1.296875, "learning_rate": 6.304754159554031e-05, "loss": 1.1949, "step": 3358 }, { "epoch": 0.513510414676094, "grad_norm": 1.375, "learning_rate": 6.304339340261117e-05, "loss": 1.5407, "step": 3359 }, { "epoch": 0.5136632906554558, "grad_norm": 1.40625, "learning_rate": 6.303924528333319e-05, "loss": 1.3867, "step": 3360 }, { "epoch": 0.5138161666348176, "grad_norm": 1.4453125, "learning_rate": 6.303509723771461e-05, "loss": 1.6385, "step": 3361 }, { "epoch": 0.5139690426141792, "grad_norm": 1.34375, "learning_rate": 6.30309492657638e-05, "loss": 1.4094, "step": 3362 }, { "epoch": 0.514121918593541, "grad_norm": 1.2421875, "learning_rate": 6.302680136748889e-05, "loss": 1.5117, "step": 3363 }, { "epoch": 0.5142747945729027, "grad_norm": 1.28125, "learning_rate": 6.302265354289831e-05, "loss": 1.3482, "step": 3364 }, { "epoch": 0.5144276705522645, "grad_norm": 1.46875, "learning_rate": 6.301850579200019e-05, "loss": 1.5313, "step": 3365 }, { "epoch": 0.5145805465316262, "grad_norm": 1.265625, "learning_rate": 6.301435811480286e-05, "loss": 1.2578, "step": 3366 }, { "epoch": 0.514733422510988, "grad_norm": 1.4453125, "learning_rate": 6.30102105113146e-05, "loss": 1.4506, "step": 3367 }, { "epoch": 0.5148862984903497, "grad_norm": 1.3828125, "learning_rate": 6.300606298154362e-05, "loss": 1.5193, "step": 3368 }, { "epoch": 0.5150391744697115, "grad_norm": 1.3515625, "learning_rate": 6.300191552549823e-05, "loss": 1.4389, "step": 3369 }, { "epoch": 0.5151920504490732, "grad_norm": 1.328125, "learning_rate": 6.299776814318665e-05, "loss": 1.4376, "step": 3370 }, { "epoch": 0.5153449264284349, "grad_norm": 1.25, "learning_rate": 6.29936208346172e-05, "loss": 1.366, "step": 3371 }, { "epoch": 0.5154978024077966, "grad_norm": 1.359375, "learning_rate": 6.29894735997981e-05, "loss": 1.5144, "step": 3372 }, { "epoch": 0.5156506783871584, "grad_norm": 1.359375, "learning_rate": 6.298532643873768e-05, "loss": 1.2796, "step": 3373 }, { "epoch": 0.5158035543665201, "grad_norm": 1.4140625, "learning_rate": 6.298117935144414e-05, "loss": 1.5305, "step": 3374 }, { "epoch": 0.5159564303458819, "grad_norm": 1.2578125, "learning_rate": 6.297703233792573e-05, "loss": 1.3043, "step": 3375 }, { "epoch": 0.5161093063252437, "grad_norm": 1.3984375, "learning_rate": 6.297288539819079e-05, "loss": 1.3891, "step": 3376 }, { "epoch": 0.5162621823046054, "grad_norm": 1.34375, "learning_rate": 6.296873853224752e-05, "loss": 1.1854, "step": 3377 }, { "epoch": 0.5164150582839672, "grad_norm": 1.2890625, "learning_rate": 6.296459174010425e-05, "loss": 1.4238, "step": 3378 }, { "epoch": 0.5165679342633289, "grad_norm": 1.3984375, "learning_rate": 6.296044502176917e-05, "loss": 1.414, "step": 3379 }, { "epoch": 0.5167208102426906, "grad_norm": 1.3125, "learning_rate": 6.295629837725059e-05, "loss": 1.4124, "step": 3380 }, { "epoch": 0.5168736862220523, "grad_norm": 1.2734375, "learning_rate": 6.295215180655676e-05, "loss": 1.3118, "step": 3381 }, { "epoch": 0.5170265622014141, "grad_norm": 1.3671875, "learning_rate": 6.294800530969591e-05, "loss": 1.2499, "step": 3382 }, { "epoch": 0.5171794381807758, "grad_norm": 1.3046875, "learning_rate": 6.29438588866764e-05, "loss": 1.37, "step": 3383 }, { "epoch": 0.5173323141601376, "grad_norm": 1.2109375, "learning_rate": 6.293971253750639e-05, "loss": 1.3007, "step": 3384 }, { "epoch": 0.5174851901394993, "grad_norm": 1.3671875, "learning_rate": 6.293556626219419e-05, "loss": 1.4751, "step": 3385 }, { "epoch": 0.5176380661188611, "grad_norm": 1.4296875, "learning_rate": 6.29314200607481e-05, "loss": 1.5842, "step": 3386 }, { "epoch": 0.5177909420982229, "grad_norm": 1.484375, "learning_rate": 6.292727393317626e-05, "loss": 1.4688, "step": 3387 }, { "epoch": 0.5179438180775846, "grad_norm": 1.265625, "learning_rate": 6.292312787948709e-05, "loss": 1.3565, "step": 3388 }, { "epoch": 0.5180966940569462, "grad_norm": 1.4296875, "learning_rate": 6.291898189968874e-05, "loss": 1.1077, "step": 3389 }, { "epoch": 0.518249570036308, "grad_norm": 1.5234375, "learning_rate": 6.291483599378951e-05, "loss": 1.4296, "step": 3390 }, { "epoch": 0.5184024460156698, "grad_norm": 1.4140625, "learning_rate": 6.291069016179766e-05, "loss": 1.6112, "step": 3391 }, { "epoch": 0.5185553219950315, "grad_norm": 1.3828125, "learning_rate": 6.290654440372147e-05, "loss": 1.3666, "step": 3392 }, { "epoch": 0.5187081979743933, "grad_norm": 1.2578125, "learning_rate": 6.29023987195692e-05, "loss": 1.2712, "step": 3393 }, { "epoch": 0.518861073953755, "grad_norm": 1.3046875, "learning_rate": 6.289825310934907e-05, "loss": 1.4134, "step": 3394 }, { "epoch": 0.5190139499331168, "grad_norm": 1.3046875, "learning_rate": 6.289410757306937e-05, "loss": 1.2742, "step": 3395 }, { "epoch": 0.5191668259124785, "grad_norm": 1.3203125, "learning_rate": 6.288996211073837e-05, "loss": 1.3644, "step": 3396 }, { "epoch": 0.5193197018918403, "grad_norm": 1.4453125, "learning_rate": 6.288581672236433e-05, "loss": 1.5192, "step": 3397 }, { "epoch": 0.5194725778712019, "grad_norm": 1.421875, "learning_rate": 6.288167140795546e-05, "loss": 1.2451, "step": 3398 }, { "epoch": 0.5196254538505637, "grad_norm": 1.3671875, "learning_rate": 6.287752616752015e-05, "loss": 1.3256, "step": 3399 }, { "epoch": 0.5197783298299254, "grad_norm": 1.40625, "learning_rate": 6.287338100106652e-05, "loss": 1.4303, "step": 3400 }, { "epoch": 0.5199312058092872, "grad_norm": 1.4375, "learning_rate": 6.28692359086029e-05, "loss": 1.5603, "step": 3401 }, { "epoch": 0.520084081788649, "grad_norm": 1.3984375, "learning_rate": 6.286509089013754e-05, "loss": 1.3219, "step": 3402 }, { "epoch": 0.5202369577680107, "grad_norm": 1.40625, "learning_rate": 6.286094594567867e-05, "loss": 1.5269, "step": 3403 }, { "epoch": 0.5203898337473725, "grad_norm": 1.3359375, "learning_rate": 6.285680107523464e-05, "loss": 1.4523, "step": 3404 }, { "epoch": 0.5205427097267342, "grad_norm": 1.4765625, "learning_rate": 6.28526562788136e-05, "loss": 1.4593, "step": 3405 }, { "epoch": 0.520695585706096, "grad_norm": 1.5, "learning_rate": 6.284851155642388e-05, "loss": 1.372, "step": 3406 }, { "epoch": 0.5208484616854576, "grad_norm": 1.3125, "learning_rate": 6.284436690807374e-05, "loss": 1.1861, "step": 3407 }, { "epoch": 0.5210013376648194, "grad_norm": 1.25, "learning_rate": 6.284022233377139e-05, "loss": 1.3496, "step": 3408 }, { "epoch": 0.5211542136441811, "grad_norm": 1.2109375, "learning_rate": 6.283607783352514e-05, "loss": 1.4467, "step": 3409 }, { "epoch": 0.5213070896235429, "grad_norm": 1.3125, "learning_rate": 6.28319334073432e-05, "loss": 1.3559, "step": 3410 }, { "epoch": 0.5214599656029046, "grad_norm": 1.28125, "learning_rate": 6.282778905523387e-05, "loss": 1.4993, "step": 3411 }, { "epoch": 0.5216128415822664, "grad_norm": 1.359375, "learning_rate": 6.28236447772054e-05, "loss": 1.4315, "step": 3412 }, { "epoch": 0.5217657175616282, "grad_norm": 1.484375, "learning_rate": 6.281950057326609e-05, "loss": 1.3523, "step": 3413 }, { "epoch": 0.5219185935409899, "grad_norm": 1.4296875, "learning_rate": 6.281535644342412e-05, "loss": 1.4031, "step": 3414 }, { "epoch": 0.5220714695203517, "grad_norm": 1.2890625, "learning_rate": 6.281121238768776e-05, "loss": 1.2951, "step": 3415 }, { "epoch": 0.5222243454997133, "grad_norm": 1.484375, "learning_rate": 6.280706840606534e-05, "loss": 1.3869, "step": 3416 }, { "epoch": 0.5223772214790751, "grad_norm": 1.3046875, "learning_rate": 6.280292449856504e-05, "loss": 1.4625, "step": 3417 }, { "epoch": 0.5225300974584368, "grad_norm": 1.6015625, "learning_rate": 6.27987806651952e-05, "loss": 1.4011, "step": 3418 }, { "epoch": 0.5226829734377986, "grad_norm": 1.3671875, "learning_rate": 6.279463690596399e-05, "loss": 1.4897, "step": 3419 }, { "epoch": 0.5228358494171603, "grad_norm": 1.2109375, "learning_rate": 6.27904932208797e-05, "loss": 1.0513, "step": 3420 }, { "epoch": 0.5229887253965221, "grad_norm": 1.3125, "learning_rate": 6.278634960995063e-05, "loss": 1.2423, "step": 3421 }, { "epoch": 0.5231416013758838, "grad_norm": 1.484375, "learning_rate": 6.278220607318496e-05, "loss": 1.4447, "step": 3422 }, { "epoch": 0.5232944773552456, "grad_norm": 1.40625, "learning_rate": 6.277806261059104e-05, "loss": 1.4464, "step": 3423 }, { "epoch": 0.5234473533346073, "grad_norm": 1.2890625, "learning_rate": 6.277391922217703e-05, "loss": 1.537, "step": 3424 }, { "epoch": 0.523600229313969, "grad_norm": 1.375, "learning_rate": 6.276977590795127e-05, "loss": 1.4914, "step": 3425 }, { "epoch": 0.5237531052933307, "grad_norm": 1.375, "learning_rate": 6.276563266792198e-05, "loss": 1.3997, "step": 3426 }, { "epoch": 0.5239059812726925, "grad_norm": 1.3671875, "learning_rate": 6.276148950209739e-05, "loss": 1.4239, "step": 3427 }, { "epoch": 0.5240588572520543, "grad_norm": 1.1875, "learning_rate": 6.275734641048585e-05, "loss": 1.2331, "step": 3428 }, { "epoch": 0.524211733231416, "grad_norm": 1.328125, "learning_rate": 6.27532033930955e-05, "loss": 1.242, "step": 3429 }, { "epoch": 0.5243646092107778, "grad_norm": 1.3828125, "learning_rate": 6.274906044993467e-05, "loss": 1.453, "step": 3430 }, { "epoch": 0.5245174851901395, "grad_norm": 1.359375, "learning_rate": 6.274491758101159e-05, "loss": 1.3646, "step": 3431 }, { "epoch": 0.5246703611695013, "grad_norm": 1.28125, "learning_rate": 6.274077478633454e-05, "loss": 1.4231, "step": 3432 }, { "epoch": 0.524823237148863, "grad_norm": 1.328125, "learning_rate": 6.273663206591177e-05, "loss": 1.3741, "step": 3433 }, { "epoch": 0.5249761131282247, "grad_norm": 1.1875, "learning_rate": 6.273248941975149e-05, "loss": 1.3058, "step": 3434 }, { "epoch": 0.5251289891075864, "grad_norm": 1.4609375, "learning_rate": 6.272834684786202e-05, "loss": 1.769, "step": 3435 }, { "epoch": 0.5252818650869482, "grad_norm": 1.453125, "learning_rate": 6.272420435025154e-05, "loss": 1.4472, "step": 3436 }, { "epoch": 0.5254347410663099, "grad_norm": 1.578125, "learning_rate": 6.272006192692841e-05, "loss": 1.2814, "step": 3437 }, { "epoch": 0.5255876170456717, "grad_norm": 1.2890625, "learning_rate": 6.271591957790079e-05, "loss": 1.2159, "step": 3438 }, { "epoch": 0.5257404930250335, "grad_norm": 1.578125, "learning_rate": 6.271177730317703e-05, "loss": 1.5192, "step": 3439 }, { "epoch": 0.5258933690043952, "grad_norm": 1.578125, "learning_rate": 6.27076351027653e-05, "loss": 1.5824, "step": 3440 }, { "epoch": 0.526046244983757, "grad_norm": 1.296875, "learning_rate": 6.270349297667385e-05, "loss": 1.3528, "step": 3441 }, { "epoch": 0.5261991209631187, "grad_norm": 1.328125, "learning_rate": 6.269935092491103e-05, "loss": 1.2711, "step": 3442 }, { "epoch": 0.5263519969424804, "grad_norm": 1.3984375, "learning_rate": 6.269520894748496e-05, "loss": 1.2079, "step": 3443 }, { "epoch": 0.5265048729218421, "grad_norm": 1.3203125, "learning_rate": 6.269106704440405e-05, "loss": 1.3747, "step": 3444 }, { "epoch": 0.5266577489012039, "grad_norm": 1.359375, "learning_rate": 6.26869252156764e-05, "loss": 1.3747, "step": 3445 }, { "epoch": 0.5268106248805656, "grad_norm": 1.4453125, "learning_rate": 6.268278346131037e-05, "loss": 1.657, "step": 3446 }, { "epoch": 0.5269635008599274, "grad_norm": 1.265625, "learning_rate": 6.267864178131422e-05, "loss": 1.552, "step": 3447 }, { "epoch": 0.5271163768392891, "grad_norm": 1.53125, "learning_rate": 6.267450017569611e-05, "loss": 1.1998, "step": 3448 }, { "epoch": 0.5272692528186509, "grad_norm": 1.3359375, "learning_rate": 6.267035864446438e-05, "loss": 1.3905, "step": 3449 }, { "epoch": 0.5274221287980126, "grad_norm": 1.4453125, "learning_rate": 6.266621718762722e-05, "loss": 1.4521, "step": 3450 }, { "epoch": 0.5275750047773744, "grad_norm": 1.2421875, "learning_rate": 6.266207580519294e-05, "loss": 1.3176, "step": 3451 }, { "epoch": 0.527727880756736, "grad_norm": 1.3515625, "learning_rate": 6.265793449716981e-05, "loss": 1.565, "step": 3452 }, { "epoch": 0.5278807567360978, "grad_norm": 1.3203125, "learning_rate": 6.265379326356597e-05, "loss": 1.7557, "step": 3453 }, { "epoch": 0.5280336327154596, "grad_norm": 1.3984375, "learning_rate": 6.26496521043898e-05, "loss": 1.4541, "step": 3454 }, { "epoch": 0.5281865086948213, "grad_norm": 1.4296875, "learning_rate": 6.264551101964946e-05, "loss": 1.6054, "step": 3455 }, { "epoch": 0.5283393846741831, "grad_norm": 1.3828125, "learning_rate": 6.264137000935326e-05, "loss": 1.3815, "step": 3456 }, { "epoch": 0.5284922606535448, "grad_norm": 1.359375, "learning_rate": 6.263722907350942e-05, "loss": 1.2693, "step": 3457 }, { "epoch": 0.5286451366329066, "grad_norm": 1.3359375, "learning_rate": 6.263308821212625e-05, "loss": 1.4559, "step": 3458 }, { "epoch": 0.5287980126122683, "grad_norm": 1.4296875, "learning_rate": 6.262894742521194e-05, "loss": 1.316, "step": 3459 }, { "epoch": 0.5289508885916301, "grad_norm": 1.3515625, "learning_rate": 6.262480671277474e-05, "loss": 1.1436, "step": 3460 }, { "epoch": 0.5291037645709917, "grad_norm": 1.203125, "learning_rate": 6.262066607482295e-05, "loss": 1.3413, "step": 3461 }, { "epoch": 0.5292566405503535, "grad_norm": 1.3828125, "learning_rate": 6.261652551136477e-05, "loss": 1.4134, "step": 3462 }, { "epoch": 0.5294095165297152, "grad_norm": 1.3359375, "learning_rate": 6.261238502240852e-05, "loss": 1.356, "step": 3463 }, { "epoch": 0.529562392509077, "grad_norm": 1.328125, "learning_rate": 6.260824460796237e-05, "loss": 1.3873, "step": 3464 }, { "epoch": 0.5297152684884388, "grad_norm": 1.21875, "learning_rate": 6.260410426803462e-05, "loss": 1.1948, "step": 3465 }, { "epoch": 0.5298681444678005, "grad_norm": 1.46875, "learning_rate": 6.259996400263353e-05, "loss": 1.36, "step": 3466 }, { "epoch": 0.5300210204471623, "grad_norm": 1.3671875, "learning_rate": 6.259582381176729e-05, "loss": 1.3959, "step": 3467 }, { "epoch": 0.530173896426524, "grad_norm": 1.4453125, "learning_rate": 6.259168369544426e-05, "loss": 1.3238, "step": 3468 }, { "epoch": 0.5303267724058858, "grad_norm": 1.625, "learning_rate": 6.258754365367257e-05, "loss": 1.4065, "step": 3469 }, { "epoch": 0.5304796483852474, "grad_norm": 1.46875, "learning_rate": 6.258340368646054e-05, "loss": 1.4006, "step": 3470 }, { "epoch": 0.5306325243646092, "grad_norm": 1.359375, "learning_rate": 6.25792637938164e-05, "loss": 1.3173, "step": 3471 }, { "epoch": 0.5307854003439709, "grad_norm": 1.5, "learning_rate": 6.257512397574843e-05, "loss": 1.6135, "step": 3472 }, { "epoch": 0.5309382763233327, "grad_norm": 1.5, "learning_rate": 6.257098423226487e-05, "loss": 1.5578, "step": 3473 }, { "epoch": 0.5310911523026944, "grad_norm": 1.4609375, "learning_rate": 6.256684456337392e-05, "loss": 1.5022, "step": 3474 }, { "epoch": 0.5312440282820562, "grad_norm": 1.328125, "learning_rate": 6.256270496908387e-05, "loss": 1.5726, "step": 3475 }, { "epoch": 0.531396904261418, "grad_norm": 1.3359375, "learning_rate": 6.255856544940298e-05, "loss": 1.3262, "step": 3476 }, { "epoch": 0.5315497802407797, "grad_norm": 1.453125, "learning_rate": 6.255442600433948e-05, "loss": 1.5536, "step": 3477 }, { "epoch": 0.5317026562201415, "grad_norm": 1.4921875, "learning_rate": 6.255028663390162e-05, "loss": 1.4046, "step": 3478 }, { "epoch": 0.5318555321995031, "grad_norm": 1.2734375, "learning_rate": 6.254614733809769e-05, "loss": 1.4335, "step": 3479 }, { "epoch": 0.5320084081788649, "grad_norm": 1.3046875, "learning_rate": 6.254200811693589e-05, "loss": 1.407, "step": 3480 }, { "epoch": 0.5321612841582266, "grad_norm": 1.265625, "learning_rate": 6.253786897042447e-05, "loss": 1.1618, "step": 3481 }, { "epoch": 0.5323141601375884, "grad_norm": 1.4765625, "learning_rate": 6.253372989857171e-05, "loss": 1.3855, "step": 3482 }, { "epoch": 0.5324670361169501, "grad_norm": 1.3515625, "learning_rate": 6.25295909013858e-05, "loss": 1.382, "step": 3483 }, { "epoch": 0.5326199120963119, "grad_norm": 1.3046875, "learning_rate": 6.252545197887509e-05, "loss": 1.4171, "step": 3484 }, { "epoch": 0.5327727880756736, "grad_norm": 1.3671875, "learning_rate": 6.252131313104774e-05, "loss": 1.4159, "step": 3485 }, { "epoch": 0.5329256640550354, "grad_norm": 1.3515625, "learning_rate": 6.251717435791202e-05, "loss": 1.2187, "step": 3486 }, { "epoch": 0.5330785400343971, "grad_norm": 1.390625, "learning_rate": 6.251303565947623e-05, "loss": 1.2442, "step": 3487 }, { "epoch": 0.5332314160137588, "grad_norm": 1.3359375, "learning_rate": 6.250889703574849e-05, "loss": 1.4113, "step": 3488 }, { "epoch": 0.5333842919931205, "grad_norm": 1.4453125, "learning_rate": 6.250475848673721e-05, "loss": 1.3664, "step": 3489 }, { "epoch": 0.5335371679724823, "grad_norm": 1.4296875, "learning_rate": 6.250062001245052e-05, "loss": 1.4987, "step": 3490 }, { "epoch": 0.533690043951844, "grad_norm": 1.3203125, "learning_rate": 6.249648161289672e-05, "loss": 1.2985, "step": 3491 }, { "epoch": 0.5338429199312058, "grad_norm": 1.3515625, "learning_rate": 6.249234328808406e-05, "loss": 1.2102, "step": 3492 }, { "epoch": 0.5339957959105676, "grad_norm": 1.3359375, "learning_rate": 6.248820503802076e-05, "loss": 1.5147, "step": 3493 }, { "epoch": 0.5341486718899293, "grad_norm": 1.3203125, "learning_rate": 6.248406686271506e-05, "loss": 1.2615, "step": 3494 }, { "epoch": 0.5343015478692911, "grad_norm": 1.2734375, "learning_rate": 6.247992876217524e-05, "loss": 1.4199, "step": 3495 }, { "epoch": 0.5344544238486528, "grad_norm": 1.4609375, "learning_rate": 6.247579073640952e-05, "loss": 1.2532, "step": 3496 }, { "epoch": 0.5346072998280145, "grad_norm": 1.390625, "learning_rate": 6.247165278542616e-05, "loss": 1.4151, "step": 3497 }, { "epoch": 0.5347601758073762, "grad_norm": 1.5703125, "learning_rate": 6.246751490923344e-05, "loss": 1.6552, "step": 3498 }, { "epoch": 0.534913051786738, "grad_norm": 1.25, "learning_rate": 6.246337710783956e-05, "loss": 1.2972, "step": 3499 }, { "epoch": 0.5350659277660997, "grad_norm": 1.2109375, "learning_rate": 6.245923938125276e-05, "loss": 1.1608, "step": 3500 }, { "epoch": 0.5352188037454615, "grad_norm": 1.28125, "learning_rate": 6.245510172948131e-05, "loss": 1.5043, "step": 3501 }, { "epoch": 0.5353716797248232, "grad_norm": 1.3671875, "learning_rate": 6.245096415253343e-05, "loss": 1.4038, "step": 3502 }, { "epoch": 0.535524555704185, "grad_norm": 1.3125, "learning_rate": 6.244682665041743e-05, "loss": 1.2754, "step": 3503 }, { "epoch": 0.5356774316835468, "grad_norm": 1.4453125, "learning_rate": 6.244268922314149e-05, "loss": 1.8989, "step": 3504 }, { "epoch": 0.5358303076629085, "grad_norm": 1.2421875, "learning_rate": 6.243855187071388e-05, "loss": 1.4463, "step": 3505 }, { "epoch": 0.5359831836422702, "grad_norm": 1.328125, "learning_rate": 6.243441459314285e-05, "loss": 1.2364, "step": 3506 }, { "epoch": 0.5361360596216319, "grad_norm": 1.4140625, "learning_rate": 6.243027739043661e-05, "loss": 1.3355, "step": 3507 }, { "epoch": 0.5362889356009937, "grad_norm": 1.546875, "learning_rate": 6.24261402626035e-05, "loss": 1.6238, "step": 3508 }, { "epoch": 0.5364418115803554, "grad_norm": 1.390625, "learning_rate": 6.242200320965163e-05, "loss": 1.2143, "step": 3509 }, { "epoch": 0.5365946875597172, "grad_norm": 1.296875, "learning_rate": 6.241786623158934e-05, "loss": 1.4485, "step": 3510 }, { "epoch": 0.5367475635390789, "grad_norm": 1.328125, "learning_rate": 6.241372932842487e-05, "loss": 1.4861, "step": 3511 }, { "epoch": 0.5369004395184407, "grad_norm": 1.359375, "learning_rate": 6.240959250016638e-05, "loss": 1.5101, "step": 3512 }, { "epoch": 0.5370533154978024, "grad_norm": 1.28125, "learning_rate": 6.240545574682224e-05, "loss": 1.3492, "step": 3513 }, { "epoch": 0.5372061914771642, "grad_norm": 1.4140625, "learning_rate": 6.24013190684006e-05, "loss": 1.4892, "step": 3514 }, { "epoch": 0.5373590674565258, "grad_norm": 1.421875, "learning_rate": 6.239718246490974e-05, "loss": 1.5038, "step": 3515 }, { "epoch": 0.5375119434358876, "grad_norm": 1.3046875, "learning_rate": 6.239304593635788e-05, "loss": 1.3589, "step": 3516 }, { "epoch": 0.5376648194152494, "grad_norm": 1.359375, "learning_rate": 6.23889094827533e-05, "loss": 1.325, "step": 3517 }, { "epoch": 0.5378176953946111, "grad_norm": 1.4921875, "learning_rate": 6.238477310410426e-05, "loss": 1.5476, "step": 3518 }, { "epoch": 0.5379705713739729, "grad_norm": 1.4609375, "learning_rate": 6.238063680041891e-05, "loss": 1.5176, "step": 3519 }, { "epoch": 0.5381234473533346, "grad_norm": 1.2578125, "learning_rate": 6.237650057170559e-05, "loss": 1.1254, "step": 3520 }, { "epoch": 0.5382763233326964, "grad_norm": 1.4296875, "learning_rate": 6.237236441797247e-05, "loss": 1.268, "step": 3521 }, { "epoch": 0.5384291993120581, "grad_norm": 1.3671875, "learning_rate": 6.236822833922788e-05, "loss": 1.3927, "step": 3522 }, { "epoch": 0.5385820752914199, "grad_norm": 1.3359375, "learning_rate": 6.236409233547995e-05, "loss": 1.4772, "step": 3523 }, { "epoch": 0.5387349512707815, "grad_norm": 1.390625, "learning_rate": 6.235995640673704e-05, "loss": 1.4401, "step": 3524 }, { "epoch": 0.5388878272501433, "grad_norm": 1.3671875, "learning_rate": 6.235582055300733e-05, "loss": 1.3067, "step": 3525 }, { "epoch": 0.539040703229505, "grad_norm": 1.4140625, "learning_rate": 6.235168477429905e-05, "loss": 1.6439, "step": 3526 }, { "epoch": 0.5391935792088668, "grad_norm": 1.2265625, "learning_rate": 6.234754907062048e-05, "loss": 1.1609, "step": 3527 }, { "epoch": 0.5393464551882285, "grad_norm": 1.4453125, "learning_rate": 6.23434134419798e-05, "loss": 1.5374, "step": 3528 }, { "epoch": 0.5394993311675903, "grad_norm": 1.390625, "learning_rate": 6.233927788838534e-05, "loss": 1.5961, "step": 3529 }, { "epoch": 0.5396522071469521, "grad_norm": 1.4296875, "learning_rate": 6.233514240984529e-05, "loss": 1.1671, "step": 3530 }, { "epoch": 0.5398050831263138, "grad_norm": 1.53125, "learning_rate": 6.23310070063679e-05, "loss": 1.4948, "step": 3531 }, { "epoch": 0.5399579591056756, "grad_norm": 1.375, "learning_rate": 6.23268716779614e-05, "loss": 1.3822, "step": 3532 }, { "epoch": 0.5401108350850372, "grad_norm": 1.2421875, "learning_rate": 6.232273642463405e-05, "loss": 1.6298, "step": 3533 }, { "epoch": 0.540263711064399, "grad_norm": 1.328125, "learning_rate": 6.231860124639408e-05, "loss": 1.2874, "step": 3534 }, { "epoch": 0.5404165870437607, "grad_norm": 1.4609375, "learning_rate": 6.231446614324972e-05, "loss": 1.4074, "step": 3535 }, { "epoch": 0.5405694630231225, "grad_norm": 1.3203125, "learning_rate": 6.231033111520923e-05, "loss": 1.5077, "step": 3536 }, { "epoch": 0.5407223390024842, "grad_norm": 1.5625, "learning_rate": 6.230619616228083e-05, "loss": 1.6318, "step": 3537 }, { "epoch": 0.540875214981846, "grad_norm": 1.1875, "learning_rate": 6.230206128447282e-05, "loss": 1.2141, "step": 3538 }, { "epoch": 0.5410280909612077, "grad_norm": 1.484375, "learning_rate": 6.229792648179339e-05, "loss": 1.5682, "step": 3539 }, { "epoch": 0.5411809669405695, "grad_norm": 1.375, "learning_rate": 6.229379175425077e-05, "loss": 1.6203, "step": 3540 }, { "epoch": 0.5413338429199313, "grad_norm": 1.5234375, "learning_rate": 6.228965710185322e-05, "loss": 1.4591, "step": 3541 }, { "epoch": 0.5414867188992929, "grad_norm": 1.4375, "learning_rate": 6.228552252460895e-05, "loss": 1.2318, "step": 3542 }, { "epoch": 0.5416395948786547, "grad_norm": 1.40625, "learning_rate": 6.228138802252628e-05, "loss": 1.4941, "step": 3543 }, { "epoch": 0.5417924708580164, "grad_norm": 1.1796875, "learning_rate": 6.227725359561337e-05, "loss": 1.2795, "step": 3544 }, { "epoch": 0.5419453468373782, "grad_norm": 1.328125, "learning_rate": 6.227311924387848e-05, "loss": 1.3359, "step": 3545 }, { "epoch": 0.5420982228167399, "grad_norm": 1.28125, "learning_rate": 6.226898496732986e-05, "loss": 1.2941, "step": 3546 }, { "epoch": 0.5422510987961017, "grad_norm": 1.3125, "learning_rate": 6.226485076597572e-05, "loss": 1.3359, "step": 3547 }, { "epoch": 0.5424039747754634, "grad_norm": 1.3671875, "learning_rate": 6.226071663982438e-05, "loss": 1.3704, "step": 3548 }, { "epoch": 0.5425568507548252, "grad_norm": 1.296875, "learning_rate": 6.225658258888398e-05, "loss": 1.2425, "step": 3549 }, { "epoch": 0.5427097267341869, "grad_norm": 1.4296875, "learning_rate": 6.225244861316281e-05, "loss": 1.5194, "step": 3550 }, { "epoch": 0.5428626027135486, "grad_norm": 1.3984375, "learning_rate": 6.224831471266912e-05, "loss": 1.2748, "step": 3551 }, { "epoch": 0.5430154786929103, "grad_norm": 1.375, "learning_rate": 6.22441808874111e-05, "loss": 1.4152, "step": 3552 }, { "epoch": 0.5431683546722721, "grad_norm": 1.3984375, "learning_rate": 6.224004713739705e-05, "loss": 1.4354, "step": 3553 }, { "epoch": 0.5433212306516338, "grad_norm": 1.40625, "learning_rate": 6.223591346263513e-05, "loss": 1.4592, "step": 3554 }, { "epoch": 0.5434741066309956, "grad_norm": 1.4453125, "learning_rate": 6.223177986313366e-05, "loss": 1.4289, "step": 3555 }, { "epoch": 0.5436269826103574, "grad_norm": 1.5, "learning_rate": 6.222764633890082e-05, "loss": 1.3318, "step": 3556 }, { "epoch": 0.5437798585897191, "grad_norm": 1.5546875, "learning_rate": 6.222351288994487e-05, "loss": 1.4656, "step": 3557 }, { "epoch": 0.5439327345690809, "grad_norm": 1.390625, "learning_rate": 6.221937951627409e-05, "loss": 1.1966, "step": 3558 }, { "epoch": 0.5440856105484426, "grad_norm": 1.46875, "learning_rate": 6.221524621789663e-05, "loss": 1.5163, "step": 3559 }, { "epoch": 0.5442384865278043, "grad_norm": 1.234375, "learning_rate": 6.221111299482078e-05, "loss": 1.407, "step": 3560 }, { "epoch": 0.544391362507166, "grad_norm": 1.3203125, "learning_rate": 6.220697984705475e-05, "loss": 1.3463, "step": 3561 }, { "epoch": 0.5445442384865278, "grad_norm": 1.2265625, "learning_rate": 6.220284677460681e-05, "loss": 1.0885, "step": 3562 }, { "epoch": 0.5446971144658895, "grad_norm": 1.5703125, "learning_rate": 6.219871377748517e-05, "loss": 1.6327, "step": 3563 }, { "epoch": 0.5448499904452513, "grad_norm": 2.15625, "learning_rate": 6.219458085569813e-05, "loss": 1.6551, "step": 3564 }, { "epoch": 0.545002866424613, "grad_norm": 1.5078125, "learning_rate": 6.219044800925384e-05, "loss": 1.5481, "step": 3565 }, { "epoch": 0.5451557424039748, "grad_norm": 1.4296875, "learning_rate": 6.218631523816056e-05, "loss": 1.5172, "step": 3566 }, { "epoch": 0.5453086183833366, "grad_norm": 1.328125, "learning_rate": 6.218218254242656e-05, "loss": 1.2119, "step": 3567 }, { "epoch": 0.5454614943626983, "grad_norm": 1.3984375, "learning_rate": 6.217804992206002e-05, "loss": 1.2455, "step": 3568 }, { "epoch": 0.54561437034206, "grad_norm": 1.375, "learning_rate": 6.217391737706926e-05, "loss": 1.406, "step": 3569 }, { "epoch": 0.5457672463214217, "grad_norm": 1.359375, "learning_rate": 6.216978490746242e-05, "loss": 1.5071, "step": 3570 }, { "epoch": 0.5459201223007835, "grad_norm": 1.296875, "learning_rate": 6.21656525132478e-05, "loss": 1.3256, "step": 3571 }, { "epoch": 0.5460729982801452, "grad_norm": 1.2734375, "learning_rate": 6.216152019443365e-05, "loss": 1.2445, "step": 3572 }, { "epoch": 0.546225874259507, "grad_norm": 1.375, "learning_rate": 6.21573879510281e-05, "loss": 1.3479, "step": 3573 }, { "epoch": 0.5463787502388687, "grad_norm": 1.3515625, "learning_rate": 6.215325578303954e-05, "loss": 1.5691, "step": 3574 }, { "epoch": 0.5465316262182305, "grad_norm": 1.359375, "learning_rate": 6.214912369047605e-05, "loss": 1.4201, "step": 3575 }, { "epoch": 0.5466845021975922, "grad_norm": 1.390625, "learning_rate": 6.214499167334597e-05, "loss": 1.473, "step": 3576 }, { "epoch": 0.546837378176954, "grad_norm": 1.375, "learning_rate": 6.214085973165753e-05, "loss": 1.423, "step": 3577 }, { "epoch": 0.5469902541563156, "grad_norm": 1.3984375, "learning_rate": 6.213672786541889e-05, "loss": 1.5177, "step": 3578 }, { "epoch": 0.5471431301356774, "grad_norm": 1.3203125, "learning_rate": 6.213259607463835e-05, "loss": 1.4606, "step": 3579 }, { "epoch": 0.5472960061150391, "grad_norm": 1.4453125, "learning_rate": 6.212846435932412e-05, "loss": 1.3953, "step": 3580 }, { "epoch": 0.5474488820944009, "grad_norm": 1.34375, "learning_rate": 6.212433271948444e-05, "loss": 1.3682, "step": 3581 }, { "epoch": 0.5476017580737627, "grad_norm": 1.5078125, "learning_rate": 6.212020115512753e-05, "loss": 1.2329, "step": 3582 }, { "epoch": 0.5477546340531244, "grad_norm": 1.3828125, "learning_rate": 6.211606966626169e-05, "loss": 1.6128, "step": 3583 }, { "epoch": 0.5479075100324862, "grad_norm": 1.40625, "learning_rate": 6.211193825289506e-05, "loss": 1.4331, "step": 3584 }, { "epoch": 0.5480603860118479, "grad_norm": 1.3515625, "learning_rate": 6.210780691503591e-05, "loss": 1.3975, "step": 3585 }, { "epoch": 0.5482132619912097, "grad_norm": 1.2890625, "learning_rate": 6.210367565269248e-05, "loss": 1.2529, "step": 3586 }, { "epoch": 0.5483661379705713, "grad_norm": 1.375, "learning_rate": 6.209954446587301e-05, "loss": 1.4261, "step": 3587 }, { "epoch": 0.5485190139499331, "grad_norm": 1.34375, "learning_rate": 6.209541335458576e-05, "loss": 1.4312, "step": 3588 }, { "epoch": 0.5486718899292948, "grad_norm": 1.421875, "learning_rate": 6.209128231883887e-05, "loss": 1.438, "step": 3589 }, { "epoch": 0.5488247659086566, "grad_norm": 1.3671875, "learning_rate": 6.208715135864064e-05, "loss": 1.3435, "step": 3590 }, { "epoch": 0.5489776418880183, "grad_norm": 1.40625, "learning_rate": 6.208302047399932e-05, "loss": 1.3513, "step": 3591 }, { "epoch": 0.5491305178673801, "grad_norm": 1.4375, "learning_rate": 6.207888966492308e-05, "loss": 1.2758, "step": 3592 }, { "epoch": 0.5492833938467419, "grad_norm": 1.3046875, "learning_rate": 6.207475893142023e-05, "loss": 1.4331, "step": 3593 }, { "epoch": 0.5494362698261036, "grad_norm": 1.6328125, "learning_rate": 6.207062827349892e-05, "loss": 1.3608, "step": 3594 }, { "epoch": 0.5495891458054654, "grad_norm": 1.3984375, "learning_rate": 6.206649769116744e-05, "loss": 1.3879, "step": 3595 }, { "epoch": 0.549742021784827, "grad_norm": 1.40625, "learning_rate": 6.206236718443398e-05, "loss": 1.4458, "step": 3596 }, { "epoch": 0.5498948977641888, "grad_norm": 1.390625, "learning_rate": 6.205823675330683e-05, "loss": 1.4696, "step": 3597 }, { "epoch": 0.5500477737435505, "grad_norm": 1.28125, "learning_rate": 6.205410639779419e-05, "loss": 1.598, "step": 3598 }, { "epoch": 0.5502006497229123, "grad_norm": 1.3984375, "learning_rate": 6.204997611790427e-05, "loss": 1.3186, "step": 3599 }, { "epoch": 0.550353525702274, "grad_norm": 1.34375, "learning_rate": 6.204584591364532e-05, "loss": 1.5334, "step": 3600 }, { "epoch": 0.5505064016816358, "grad_norm": 1.34375, "learning_rate": 6.204171578502556e-05, "loss": 1.2931, "step": 3601 }, { "epoch": 0.5506592776609975, "grad_norm": 1.2890625, "learning_rate": 6.203758573205325e-05, "loss": 1.4391, "step": 3602 }, { "epoch": 0.5508121536403593, "grad_norm": 1.2265625, "learning_rate": 6.203345575473662e-05, "loss": 1.4108, "step": 3603 }, { "epoch": 0.550965029619721, "grad_norm": 1.3828125, "learning_rate": 6.202932585308384e-05, "loss": 1.2823, "step": 3604 }, { "epoch": 0.5511179055990827, "grad_norm": 1.453125, "learning_rate": 6.202519602710322e-05, "loss": 1.4644, "step": 3605 }, { "epoch": 0.5512707815784444, "grad_norm": 1.328125, "learning_rate": 6.20210662768029e-05, "loss": 1.4013, "step": 3606 }, { "epoch": 0.5514236575578062, "grad_norm": 1.375, "learning_rate": 6.201693660219123e-05, "loss": 1.4437, "step": 3607 }, { "epoch": 0.551576533537168, "grad_norm": 1.5, "learning_rate": 6.201280700327631e-05, "loss": 1.3226, "step": 3608 }, { "epoch": 0.5517294095165297, "grad_norm": 1.2734375, "learning_rate": 6.20086774800665e-05, "loss": 1.3634, "step": 3609 }, { "epoch": 0.5518822854958915, "grad_norm": 1.34375, "learning_rate": 6.200454803256994e-05, "loss": 1.391, "step": 3610 }, { "epoch": 0.5520351614752532, "grad_norm": 1.3359375, "learning_rate": 6.200041866079486e-05, "loss": 1.5648, "step": 3611 }, { "epoch": 0.552188037454615, "grad_norm": 1.3671875, "learning_rate": 6.199628936474956e-05, "loss": 1.6372, "step": 3612 }, { "epoch": 0.5523409134339767, "grad_norm": 1.2890625, "learning_rate": 6.199216014444215e-05, "loss": 1.3558, "step": 3613 }, { "epoch": 0.5524937894133384, "grad_norm": 1.2890625, "learning_rate": 6.198803099988101e-05, "loss": 1.1975, "step": 3614 }, { "epoch": 0.5526466653927001, "grad_norm": 1.4140625, "learning_rate": 6.198390193107424e-05, "loss": 1.4355, "step": 3615 }, { "epoch": 0.5527995413720619, "grad_norm": 1.3515625, "learning_rate": 6.197977293803015e-05, "loss": 1.3426, "step": 3616 }, { "epoch": 0.5529524173514236, "grad_norm": 1.34375, "learning_rate": 6.197564402075693e-05, "loss": 1.2608, "step": 3617 }, { "epoch": 0.5531052933307854, "grad_norm": 1.3359375, "learning_rate": 6.197151517926281e-05, "loss": 1.3069, "step": 3618 }, { "epoch": 0.5532581693101472, "grad_norm": 1.1875, "learning_rate": 6.196738641355602e-05, "loss": 1.2483, "step": 3619 }, { "epoch": 0.5534110452895089, "grad_norm": 1.46875, "learning_rate": 6.196325772364478e-05, "loss": 1.5373, "step": 3620 }, { "epoch": 0.5535639212688707, "grad_norm": 1.2734375, "learning_rate": 6.195912910953736e-05, "loss": 1.1724, "step": 3621 }, { "epoch": 0.5537167972482324, "grad_norm": 1.4765625, "learning_rate": 6.195500057124193e-05, "loss": 1.4447, "step": 3622 }, { "epoch": 0.5538696732275941, "grad_norm": 1.3203125, "learning_rate": 6.19508721087668e-05, "loss": 1.4031, "step": 3623 }, { "epoch": 0.5540225492069558, "grad_norm": 1.4140625, "learning_rate": 6.194674372212011e-05, "loss": 1.3943, "step": 3624 }, { "epoch": 0.5541754251863176, "grad_norm": 1.375, "learning_rate": 6.19426154113101e-05, "loss": 1.3906, "step": 3625 }, { "epoch": 0.5543283011656793, "grad_norm": 1.3828125, "learning_rate": 6.193848717634504e-05, "loss": 1.2899, "step": 3626 }, { "epoch": 0.5544811771450411, "grad_norm": 1.3984375, "learning_rate": 6.193435901723315e-05, "loss": 1.7337, "step": 3627 }, { "epoch": 0.5546340531244028, "grad_norm": 1.3984375, "learning_rate": 6.193023093398264e-05, "loss": 1.5335, "step": 3628 }, { "epoch": 0.5547869291037646, "grad_norm": 1.453125, "learning_rate": 6.192610292660172e-05, "loss": 1.7074, "step": 3629 }, { "epoch": 0.5549398050831263, "grad_norm": 1.3984375, "learning_rate": 6.192197499509866e-05, "loss": 1.4726, "step": 3630 }, { "epoch": 0.5550926810624881, "grad_norm": 1.3046875, "learning_rate": 6.191784713948167e-05, "loss": 1.5296, "step": 3631 }, { "epoch": 0.5552455570418497, "grad_norm": 1.4296875, "learning_rate": 6.191371935975895e-05, "loss": 1.3239, "step": 3632 }, { "epoch": 0.5553984330212115, "grad_norm": 1.4140625, "learning_rate": 6.190959165593877e-05, "loss": 1.4007, "step": 3633 }, { "epoch": 0.5555513090005733, "grad_norm": 1.40625, "learning_rate": 6.19054640280293e-05, "loss": 1.3708, "step": 3634 }, { "epoch": 0.555704184979935, "grad_norm": 1.4765625, "learning_rate": 6.190133647603885e-05, "loss": 1.4162, "step": 3635 }, { "epoch": 0.5558570609592968, "grad_norm": 1.3671875, "learning_rate": 6.189720899997558e-05, "loss": 1.2952, "step": 3636 }, { "epoch": 0.5560099369386585, "grad_norm": 1.3046875, "learning_rate": 6.189308159984767e-05, "loss": 1.2335, "step": 3637 }, { "epoch": 0.5561628129180203, "grad_norm": 1.7265625, "learning_rate": 6.188895427566348e-05, "loss": 1.6796, "step": 3638 }, { "epoch": 0.556315688897382, "grad_norm": 1.4765625, "learning_rate": 6.188482702743111e-05, "loss": 1.4871, "step": 3639 }, { "epoch": 0.5564685648767438, "grad_norm": 1.4140625, "learning_rate": 6.188069985515886e-05, "loss": 1.5977, "step": 3640 }, { "epoch": 0.5566214408561054, "grad_norm": 1.375, "learning_rate": 6.187657275885493e-05, "loss": 1.4075, "step": 3641 }, { "epoch": 0.5567743168354672, "grad_norm": 1.265625, "learning_rate": 6.187244573852754e-05, "loss": 1.2528, "step": 3642 }, { "epoch": 0.5569271928148289, "grad_norm": 1.71875, "learning_rate": 6.186831879418495e-05, "loss": 1.5991, "step": 3643 }, { "epoch": 0.5570800687941907, "grad_norm": 1.5390625, "learning_rate": 6.186419192583532e-05, "loss": 1.47, "step": 3644 }, { "epoch": 0.5572329447735525, "grad_norm": 1.3671875, "learning_rate": 6.186006513348694e-05, "loss": 1.3411, "step": 3645 }, { "epoch": 0.5573858207529142, "grad_norm": 1.3671875, "learning_rate": 6.185593841714797e-05, "loss": 1.3182, "step": 3646 }, { "epoch": 0.557538696732276, "grad_norm": 1.34375, "learning_rate": 6.185181177682671e-05, "loss": 1.4879, "step": 3647 }, { "epoch": 0.5576915727116377, "grad_norm": 1.453125, "learning_rate": 6.184768521253128e-05, "loss": 1.486, "step": 3648 }, { "epoch": 0.5578444486909995, "grad_norm": 1.515625, "learning_rate": 6.184355872427004e-05, "loss": 1.5274, "step": 3649 }, { "epoch": 0.5579973246703611, "grad_norm": 1.4453125, "learning_rate": 6.183943231205111e-05, "loss": 1.2127, "step": 3650 }, { "epoch": 0.5581502006497229, "grad_norm": 1.4921875, "learning_rate": 6.183530597588274e-05, "loss": 1.7405, "step": 3651 }, { "epoch": 0.5583030766290846, "grad_norm": 1.3046875, "learning_rate": 6.183117971577319e-05, "loss": 1.5142, "step": 3652 }, { "epoch": 0.5584559526084464, "grad_norm": 1.4921875, "learning_rate": 6.182705353173058e-05, "loss": 1.4385, "step": 3653 }, { "epoch": 0.5586088285878081, "grad_norm": 1.2890625, "learning_rate": 6.182292742376328e-05, "loss": 1.2433, "step": 3654 }, { "epoch": 0.5587617045671699, "grad_norm": 1.3046875, "learning_rate": 6.18188013918794e-05, "loss": 1.3516, "step": 3655 }, { "epoch": 0.5589145805465316, "grad_norm": 1.390625, "learning_rate": 6.181467543608719e-05, "loss": 1.5351, "step": 3656 }, { "epoch": 0.5590674565258934, "grad_norm": 1.3515625, "learning_rate": 6.181054955639493e-05, "loss": 1.5084, "step": 3657 }, { "epoch": 0.5592203325052552, "grad_norm": 1.296875, "learning_rate": 6.180642375281074e-05, "loss": 1.3172, "step": 3658 }, { "epoch": 0.5593732084846168, "grad_norm": 1.2421875, "learning_rate": 6.180229802534293e-05, "loss": 1.3043, "step": 3659 }, { "epoch": 0.5595260844639786, "grad_norm": 1.4921875, "learning_rate": 6.179817237399966e-05, "loss": 1.4431, "step": 3660 }, { "epoch": 0.5596789604433403, "grad_norm": 1.3125, "learning_rate": 6.179404679878921e-05, "loss": 1.6162, "step": 3661 }, { "epoch": 0.5598318364227021, "grad_norm": 1.390625, "learning_rate": 6.178992129971974e-05, "loss": 1.6012, "step": 3662 }, { "epoch": 0.5599847124020638, "grad_norm": 1.4609375, "learning_rate": 6.178579587679956e-05, "loss": 1.394, "step": 3663 }, { "epoch": 0.5601375883814256, "grad_norm": 1.5078125, "learning_rate": 6.178167053003682e-05, "loss": 1.4787, "step": 3664 }, { "epoch": 0.5602904643607873, "grad_norm": 1.265625, "learning_rate": 6.177754525943973e-05, "loss": 1.4764, "step": 3665 }, { "epoch": 0.5604433403401491, "grad_norm": 1.46875, "learning_rate": 6.177342006501656e-05, "loss": 1.4157, "step": 3666 }, { "epoch": 0.5605962163195108, "grad_norm": 1.5234375, "learning_rate": 6.176929494677548e-05, "loss": 1.6087, "step": 3667 }, { "epoch": 0.5607490922988725, "grad_norm": 1.2265625, "learning_rate": 6.176516990472481e-05, "loss": 1.2584, "step": 3668 }, { "epoch": 0.5609019682782342, "grad_norm": 1.3203125, "learning_rate": 6.176104493887264e-05, "loss": 1.3075, "step": 3669 }, { "epoch": 0.561054844257596, "grad_norm": 1.328125, "learning_rate": 6.175692004922727e-05, "loss": 1.4194, "step": 3670 }, { "epoch": 0.5612077202369578, "grad_norm": 1.265625, "learning_rate": 6.17527952357969e-05, "loss": 1.2374, "step": 3671 }, { "epoch": 0.5613605962163195, "grad_norm": 1.40625, "learning_rate": 6.174867049858976e-05, "loss": 1.3351, "step": 3672 }, { "epoch": 0.5615134721956813, "grad_norm": 1.40625, "learning_rate": 6.174454583761409e-05, "loss": 1.3884, "step": 3673 }, { "epoch": 0.561666348175043, "grad_norm": 1.265625, "learning_rate": 6.174042125287804e-05, "loss": 1.2332, "step": 3674 }, { "epoch": 0.5618192241544048, "grad_norm": 1.34375, "learning_rate": 6.17362967443899e-05, "loss": 1.2567, "step": 3675 }, { "epoch": 0.5619721001337665, "grad_norm": 1.3984375, "learning_rate": 6.173217231215788e-05, "loss": 1.4637, "step": 3676 }, { "epoch": 0.5621249761131282, "grad_norm": 1.4140625, "learning_rate": 6.172804795619014e-05, "loss": 1.5452, "step": 3677 }, { "epoch": 0.5622778520924899, "grad_norm": 1.1484375, "learning_rate": 6.1723923676495e-05, "loss": 1.0906, "step": 3678 }, { "epoch": 0.5624307280718517, "grad_norm": 1.3984375, "learning_rate": 6.171979947308057e-05, "loss": 1.3123, "step": 3679 }, { "epoch": 0.5625836040512134, "grad_norm": 1.3984375, "learning_rate": 6.171567534595514e-05, "loss": 1.5226, "step": 3680 }, { "epoch": 0.5627364800305752, "grad_norm": 1.3984375, "learning_rate": 6.17115512951269e-05, "loss": 1.4121, "step": 3681 }, { "epoch": 0.562889356009937, "grad_norm": 1.6484375, "learning_rate": 6.170742732060411e-05, "loss": 1.3365, "step": 3682 }, { "epoch": 0.5630422319892987, "grad_norm": 1.40625, "learning_rate": 6.170330342239496e-05, "loss": 1.2933, "step": 3683 }, { "epoch": 0.5631951079686605, "grad_norm": 1.3828125, "learning_rate": 6.169917960050762e-05, "loss": 1.3751, "step": 3684 }, { "epoch": 0.5633479839480222, "grad_norm": 1.4453125, "learning_rate": 6.16950558549504e-05, "loss": 1.5272, "step": 3685 }, { "epoch": 0.5635008599273839, "grad_norm": 1.3359375, "learning_rate": 6.169093218573143e-05, "loss": 1.2176, "step": 3686 }, { "epoch": 0.5636537359067456, "grad_norm": 1.4765625, "learning_rate": 6.1686808592859e-05, "loss": 1.5029, "step": 3687 }, { "epoch": 0.5638066118861074, "grad_norm": 1.5625, "learning_rate": 6.16826850763413e-05, "loss": 1.4738, "step": 3688 }, { "epoch": 0.5639594878654691, "grad_norm": 1.3515625, "learning_rate": 6.167856163618656e-05, "loss": 1.2975, "step": 3689 }, { "epoch": 0.5641123638448309, "grad_norm": 1.4921875, "learning_rate": 6.167443827240298e-05, "loss": 1.5392, "step": 3690 }, { "epoch": 0.5642652398241926, "grad_norm": 1.5390625, "learning_rate": 6.167031498499876e-05, "loss": 1.6602, "step": 3691 }, { "epoch": 0.5644181158035544, "grad_norm": 1.4921875, "learning_rate": 6.166619177398217e-05, "loss": 1.4484, "step": 3692 }, { "epoch": 0.5645709917829161, "grad_norm": 1.3515625, "learning_rate": 6.166206863936135e-05, "loss": 1.3161, "step": 3693 }, { "epoch": 0.5647238677622779, "grad_norm": 1.2421875, "learning_rate": 6.165794558114462e-05, "loss": 1.3681, "step": 3694 }, { "epoch": 0.5648767437416395, "grad_norm": 1.3828125, "learning_rate": 6.16538225993401e-05, "loss": 1.5196, "step": 3695 }, { "epoch": 0.5650296197210013, "grad_norm": 1.484375, "learning_rate": 6.164969969395606e-05, "loss": 1.7023, "step": 3696 }, { "epoch": 0.565182495700363, "grad_norm": 1.5078125, "learning_rate": 6.164557686500074e-05, "loss": 1.6211, "step": 3697 }, { "epoch": 0.5653353716797248, "grad_norm": 1.28125, "learning_rate": 6.164145411248224e-05, "loss": 1.2686, "step": 3698 }, { "epoch": 0.5654882476590866, "grad_norm": 1.515625, "learning_rate": 6.163733143640892e-05, "loss": 1.6257, "step": 3699 }, { "epoch": 0.5656411236384483, "grad_norm": 1.4609375, "learning_rate": 6.163320883678891e-05, "loss": 1.2734, "step": 3700 }, { "epoch": 0.5657939996178101, "grad_norm": 1.2578125, "learning_rate": 6.162908631363047e-05, "loss": 1.2801, "step": 3701 }, { "epoch": 0.5659468755971718, "grad_norm": 1.28125, "learning_rate": 6.16249638669418e-05, "loss": 1.0549, "step": 3702 }, { "epoch": 0.5660997515765336, "grad_norm": 1.3203125, "learning_rate": 6.162084149673108e-05, "loss": 1.5023, "step": 3703 }, { "epoch": 0.5662526275558952, "grad_norm": 1.3671875, "learning_rate": 6.161671920300659e-05, "loss": 1.2444, "step": 3704 }, { "epoch": 0.566405503535257, "grad_norm": 1.3125, "learning_rate": 6.161259698577647e-05, "loss": 1.1891, "step": 3705 }, { "epoch": 0.5665583795146187, "grad_norm": 1.296875, "learning_rate": 6.160847484504899e-05, "loss": 1.2575, "step": 3706 }, { "epoch": 0.5667112554939805, "grad_norm": 1.3125, "learning_rate": 6.160435278083234e-05, "loss": 1.5259, "step": 3707 }, { "epoch": 0.5668641314733422, "grad_norm": 1.25, "learning_rate": 6.16002307931348e-05, "loss": 1.2974, "step": 3708 }, { "epoch": 0.567017007452704, "grad_norm": 1.4375, "learning_rate": 6.15961088819645e-05, "loss": 1.7474, "step": 3709 }, { "epoch": 0.5671698834320658, "grad_norm": 1.34375, "learning_rate": 6.159198704732967e-05, "loss": 1.4323, "step": 3710 }, { "epoch": 0.5673227594114275, "grad_norm": 1.359375, "learning_rate": 6.158786528923855e-05, "loss": 1.5843, "step": 3711 }, { "epoch": 0.5674756353907893, "grad_norm": 1.46875, "learning_rate": 6.158374360769934e-05, "loss": 1.26, "step": 3712 }, { "epoch": 0.5676285113701509, "grad_norm": 1.328125, "learning_rate": 6.15796220027203e-05, "loss": 1.4219, "step": 3713 }, { "epoch": 0.5677813873495127, "grad_norm": 1.5, "learning_rate": 6.157550047430954e-05, "loss": 1.5337, "step": 3714 }, { "epoch": 0.5679342633288744, "grad_norm": 1.375, "learning_rate": 6.157137902247539e-05, "loss": 1.3219, "step": 3715 }, { "epoch": 0.5680871393082362, "grad_norm": 1.4609375, "learning_rate": 6.156725764722597e-05, "loss": 1.3059, "step": 3716 }, { "epoch": 0.5682400152875979, "grad_norm": 1.2890625, "learning_rate": 6.156313634856954e-05, "loss": 1.2377, "step": 3717 }, { "epoch": 0.5683928912669597, "grad_norm": 1.453125, "learning_rate": 6.155901512651434e-05, "loss": 1.5604, "step": 3718 }, { "epoch": 0.5685457672463214, "grad_norm": 1.3515625, "learning_rate": 6.155489398106852e-05, "loss": 1.5036, "step": 3719 }, { "epoch": 0.5686986432256832, "grad_norm": 1.390625, "learning_rate": 6.155077291224035e-05, "loss": 1.5106, "step": 3720 }, { "epoch": 0.568851519205045, "grad_norm": 2.1875, "learning_rate": 6.154665192003797e-05, "loss": 1.4803, "step": 3721 }, { "epoch": 0.5690043951844066, "grad_norm": 1.5546875, "learning_rate": 6.154253100446967e-05, "loss": 1.5845, "step": 3722 }, { "epoch": 0.5691572711637684, "grad_norm": 1.2265625, "learning_rate": 6.153841016554365e-05, "loss": 1.2072, "step": 3723 }, { "epoch": 0.5693101471431301, "grad_norm": 1.3671875, "learning_rate": 6.153428940326807e-05, "loss": 1.3036, "step": 3724 }, { "epoch": 0.5694630231224919, "grad_norm": 1.3046875, "learning_rate": 6.153016871765121e-05, "loss": 1.3087, "step": 3725 }, { "epoch": 0.5696158991018536, "grad_norm": 1.4453125, "learning_rate": 6.15260481087012e-05, "loss": 1.4485, "step": 3726 }, { "epoch": 0.5697687750812154, "grad_norm": 1.3828125, "learning_rate": 6.152192757642634e-05, "loss": 1.352, "step": 3727 }, { "epoch": 0.5699216510605771, "grad_norm": 1.1953125, "learning_rate": 6.151780712083481e-05, "loss": 1.2411, "step": 3728 }, { "epoch": 0.5700745270399389, "grad_norm": 1.390625, "learning_rate": 6.151368674193478e-05, "loss": 1.4912, "step": 3729 }, { "epoch": 0.5702274030193006, "grad_norm": 1.3125, "learning_rate": 6.150956643973454e-05, "loss": 1.3073, "step": 3730 }, { "epoch": 0.5703802789986623, "grad_norm": 1.2890625, "learning_rate": 6.15054462142422e-05, "loss": 1.2174, "step": 3731 }, { "epoch": 0.570533154978024, "grad_norm": 1.390625, "learning_rate": 6.150132606546608e-05, "loss": 1.1605, "step": 3732 }, { "epoch": 0.5706860309573858, "grad_norm": 1.328125, "learning_rate": 6.149720599341429e-05, "loss": 1.2748, "step": 3733 }, { "epoch": 0.5708389069367475, "grad_norm": 1.4140625, "learning_rate": 6.149308599809514e-05, "loss": 1.3677, "step": 3734 }, { "epoch": 0.5709917829161093, "grad_norm": 1.40625, "learning_rate": 6.148896607951678e-05, "loss": 1.4114, "step": 3735 }, { "epoch": 0.5711446588954711, "grad_norm": 1.21875, "learning_rate": 6.14848462376874e-05, "loss": 1.0444, "step": 3736 }, { "epoch": 0.5712975348748328, "grad_norm": 1.5234375, "learning_rate": 6.148072647261529e-05, "loss": 1.4399, "step": 3737 }, { "epoch": 0.5714504108541946, "grad_norm": 1.5078125, "learning_rate": 6.147660678430855e-05, "loss": 1.4199, "step": 3738 }, { "epoch": 0.5716032868335563, "grad_norm": 1.609375, "learning_rate": 6.147248717277552e-05, "loss": 1.5557, "step": 3739 }, { "epoch": 0.571756162812918, "grad_norm": 1.390625, "learning_rate": 6.14683676380243e-05, "loss": 1.3057, "step": 3740 }, { "epoch": 0.5719090387922797, "grad_norm": 1.3984375, "learning_rate": 6.146424818006315e-05, "loss": 1.5252, "step": 3741 }, { "epoch": 0.5720619147716415, "grad_norm": 1.2578125, "learning_rate": 6.146012879890031e-05, "loss": 1.4306, "step": 3742 }, { "epoch": 0.5722147907510032, "grad_norm": 1.4296875, "learning_rate": 6.145600949454391e-05, "loss": 1.476, "step": 3743 }, { "epoch": 0.572367666730365, "grad_norm": 1.3046875, "learning_rate": 6.145189026700221e-05, "loss": 1.2522, "step": 3744 }, { "epoch": 0.5725205427097267, "grad_norm": 1.453125, "learning_rate": 6.14477711162834e-05, "loss": 1.6258, "step": 3745 }, { "epoch": 0.5726734186890885, "grad_norm": 1.375, "learning_rate": 6.144365204239572e-05, "loss": 1.3989, "step": 3746 }, { "epoch": 0.5728262946684503, "grad_norm": 1.546875, "learning_rate": 6.143953304534733e-05, "loss": 1.5656, "step": 3747 }, { "epoch": 0.572979170647812, "grad_norm": 1.390625, "learning_rate": 6.143541412514651e-05, "loss": 1.3216, "step": 3748 }, { "epoch": 0.5731320466271737, "grad_norm": 1.3125, "learning_rate": 6.14312952818014e-05, "loss": 1.2142, "step": 3749 }, { "epoch": 0.5732849226065354, "grad_norm": 1.59375, "learning_rate": 6.142717651532024e-05, "loss": 1.835, "step": 3750 }, { "epoch": 0.5734377985858972, "grad_norm": 1.3359375, "learning_rate": 6.142305782571123e-05, "loss": 1.5433, "step": 3751 }, { "epoch": 0.5735906745652589, "grad_norm": 1.546875, "learning_rate": 6.141893921298256e-05, "loss": 1.4415, "step": 3752 }, { "epoch": 0.5737435505446207, "grad_norm": 1.3125, "learning_rate": 6.141482067714251e-05, "loss": 1.2455, "step": 3753 }, { "epoch": 0.5738964265239824, "grad_norm": 1.3203125, "learning_rate": 6.14107022181992e-05, "loss": 1.5949, "step": 3754 }, { "epoch": 0.5740493025033442, "grad_norm": 1.328125, "learning_rate": 6.14065838361609e-05, "loss": 1.2682, "step": 3755 }, { "epoch": 0.5742021784827059, "grad_norm": 1.2578125, "learning_rate": 6.140246553103576e-05, "loss": 1.3026, "step": 3756 }, { "epoch": 0.5743550544620677, "grad_norm": 1.375, "learning_rate": 6.139834730283203e-05, "loss": 1.6445, "step": 3757 }, { "epoch": 0.5745079304414293, "grad_norm": 1.3515625, "learning_rate": 6.139422915155793e-05, "loss": 1.3871, "step": 3758 }, { "epoch": 0.5746608064207911, "grad_norm": 1.4765625, "learning_rate": 6.139011107722162e-05, "loss": 1.4475, "step": 3759 }, { "epoch": 0.5748136824001528, "grad_norm": 1.203125, "learning_rate": 6.138599307983135e-05, "loss": 1.1654, "step": 3760 }, { "epoch": 0.5749665583795146, "grad_norm": 1.3203125, "learning_rate": 6.138187515939531e-05, "loss": 1.2789, "step": 3761 }, { "epoch": 0.5751194343588764, "grad_norm": 1.4921875, "learning_rate": 6.137775731592166e-05, "loss": 1.5137, "step": 3762 }, { "epoch": 0.5752723103382381, "grad_norm": 1.28125, "learning_rate": 6.137363954941871e-05, "loss": 1.3506, "step": 3763 }, { "epoch": 0.5754251863175999, "grad_norm": 1.40625, "learning_rate": 6.136952185989455e-05, "loss": 1.2561, "step": 3764 }, { "epoch": 0.5755780622969616, "grad_norm": 1.390625, "learning_rate": 6.136540424735748e-05, "loss": 1.5401, "step": 3765 }, { "epoch": 0.5757309382763234, "grad_norm": 1.3671875, "learning_rate": 6.136128671181565e-05, "loss": 1.3714, "step": 3766 }, { "epoch": 0.575883814255685, "grad_norm": 1.3359375, "learning_rate": 6.13571692532773e-05, "loss": 1.3513, "step": 3767 }, { "epoch": 0.5760366902350468, "grad_norm": 1.4296875, "learning_rate": 6.135305187175065e-05, "loss": 1.272, "step": 3768 }, { "epoch": 0.5761895662144085, "grad_norm": 1.4921875, "learning_rate": 6.134893456724382e-05, "loss": 1.513, "step": 3769 }, { "epoch": 0.5763424421937703, "grad_norm": 1.375, "learning_rate": 6.13448173397651e-05, "loss": 1.5034, "step": 3770 }, { "epoch": 0.576495318173132, "grad_norm": 1.4140625, "learning_rate": 6.134070018932265e-05, "loss": 1.4073, "step": 3771 }, { "epoch": 0.5766481941524938, "grad_norm": 1.21875, "learning_rate": 6.133658311592474e-05, "loss": 1.3902, "step": 3772 }, { "epoch": 0.5768010701318556, "grad_norm": 1.2421875, "learning_rate": 6.133246611957946e-05, "loss": 1.3172, "step": 3773 }, { "epoch": 0.5769539461112173, "grad_norm": 1.28125, "learning_rate": 6.132834920029514e-05, "loss": 1.4008, "step": 3774 }, { "epoch": 0.5771068220905791, "grad_norm": 1.25, "learning_rate": 6.132423235807989e-05, "loss": 1.3599, "step": 3775 }, { "epoch": 0.5772596980699407, "grad_norm": 1.390625, "learning_rate": 6.132011559294195e-05, "loss": 1.3068, "step": 3776 }, { "epoch": 0.5774125740493025, "grad_norm": 1.34375, "learning_rate": 6.131599890488957e-05, "loss": 1.4551, "step": 3777 }, { "epoch": 0.5775654500286642, "grad_norm": 1.2578125, "learning_rate": 6.131188229393085e-05, "loss": 1.169, "step": 3778 }, { "epoch": 0.577718326008026, "grad_norm": 1.375, "learning_rate": 6.130776576007411e-05, "loss": 1.4521, "step": 3779 }, { "epoch": 0.5778712019873877, "grad_norm": 1.4921875, "learning_rate": 6.130364930332745e-05, "loss": 1.5335, "step": 3780 }, { "epoch": 0.5780240779667495, "grad_norm": 1.3671875, "learning_rate": 6.129953292369915e-05, "loss": 1.6189, "step": 3781 }, { "epoch": 0.5781769539461112, "grad_norm": 1.515625, "learning_rate": 6.12954166211974e-05, "loss": 1.5087, "step": 3782 }, { "epoch": 0.578329829925473, "grad_norm": 1.3359375, "learning_rate": 6.129130039583036e-05, "loss": 1.5037, "step": 3783 }, { "epoch": 0.5784827059048347, "grad_norm": 1.6328125, "learning_rate": 6.128718424760626e-05, "loss": 1.764, "step": 3784 }, { "epoch": 0.5786355818841964, "grad_norm": 1.4609375, "learning_rate": 6.12830681765333e-05, "loss": 1.2899, "step": 3785 }, { "epoch": 0.5787884578635581, "grad_norm": 1.421875, "learning_rate": 6.12789521826197e-05, "loss": 1.4298, "step": 3786 }, { "epoch": 0.5789413338429199, "grad_norm": 1.265625, "learning_rate": 6.127483626587364e-05, "loss": 1.3815, "step": 3787 }, { "epoch": 0.5790942098222817, "grad_norm": 1.3515625, "learning_rate": 6.127072042630337e-05, "loss": 1.5894, "step": 3788 }, { "epoch": 0.5792470858016434, "grad_norm": 1.3359375, "learning_rate": 6.126660466391703e-05, "loss": 1.4247, "step": 3789 }, { "epoch": 0.5793999617810052, "grad_norm": 1.2578125, "learning_rate": 6.126248897872284e-05, "loss": 1.1213, "step": 3790 }, { "epoch": 0.5795528377603669, "grad_norm": 1.421875, "learning_rate": 6.125837337072902e-05, "loss": 1.3318, "step": 3791 }, { "epoch": 0.5797057137397287, "grad_norm": 1.2890625, "learning_rate": 6.125425783994374e-05, "loss": 1.2348, "step": 3792 }, { "epoch": 0.5798585897190904, "grad_norm": 1.375, "learning_rate": 6.125014238637527e-05, "loss": 1.2924, "step": 3793 }, { "epoch": 0.5800114656984521, "grad_norm": 1.421875, "learning_rate": 6.124602701003175e-05, "loss": 1.66, "step": 3794 }, { "epoch": 0.5801643416778138, "grad_norm": 1.3125, "learning_rate": 6.124191171092138e-05, "loss": 1.5564, "step": 3795 }, { "epoch": 0.5803172176571756, "grad_norm": 1.3515625, "learning_rate": 6.123779648905239e-05, "loss": 1.4854, "step": 3796 }, { "epoch": 0.5804700936365373, "grad_norm": 1.1484375, "learning_rate": 6.123368134443294e-05, "loss": 1.0303, "step": 3797 }, { "epoch": 0.5806229696158991, "grad_norm": 1.390625, "learning_rate": 6.122956627707133e-05, "loss": 1.515, "step": 3798 }, { "epoch": 0.5807758455952609, "grad_norm": 1.3671875, "learning_rate": 6.122545128697563e-05, "loss": 1.1031, "step": 3799 }, { "epoch": 0.5809287215746226, "grad_norm": 1.3984375, "learning_rate": 6.122133637415413e-05, "loss": 1.2524, "step": 3800 }, { "epoch": 0.5810815975539844, "grad_norm": 1.4140625, "learning_rate": 6.121722153861502e-05, "loss": 1.3272, "step": 3801 }, { "epoch": 0.5812344735333461, "grad_norm": 1.5234375, "learning_rate": 6.121310678036643e-05, "loss": 1.2534, "step": 3802 }, { "epoch": 0.5813873495127078, "grad_norm": 1.5234375, "learning_rate": 6.120899209941668e-05, "loss": 1.3892, "step": 3803 }, { "epoch": 0.5815402254920695, "grad_norm": 1.4140625, "learning_rate": 6.120487749577386e-05, "loss": 1.5795, "step": 3804 }, { "epoch": 0.5816931014714313, "grad_norm": 1.4609375, "learning_rate": 6.120076296944623e-05, "loss": 1.634, "step": 3805 }, { "epoch": 0.581845977450793, "grad_norm": 1.3359375, "learning_rate": 6.119664852044195e-05, "loss": 1.327, "step": 3806 }, { "epoch": 0.5819988534301548, "grad_norm": 1.3515625, "learning_rate": 6.119253414876928e-05, "loss": 1.1613, "step": 3807 }, { "epoch": 0.5821517294095165, "grad_norm": 1.3203125, "learning_rate": 6.11884198544364e-05, "loss": 1.5229, "step": 3808 }, { "epoch": 0.5823046053888783, "grad_norm": 1.5234375, "learning_rate": 6.118430563745144e-05, "loss": 1.4059, "step": 3809 }, { "epoch": 0.58245748136824, "grad_norm": 1.34375, "learning_rate": 6.118019149782268e-05, "loss": 1.5002, "step": 3810 }, { "epoch": 0.5826103573476018, "grad_norm": 1.25, "learning_rate": 6.117607743555826e-05, "loss": 1.3863, "step": 3811 }, { "epoch": 0.5827632333269634, "grad_norm": 1.4140625, "learning_rate": 6.117196345066644e-05, "loss": 1.2457, "step": 3812 }, { "epoch": 0.5829161093063252, "grad_norm": 1.5625, "learning_rate": 6.116784954315539e-05, "loss": 1.7812, "step": 3813 }, { "epoch": 0.583068985285687, "grad_norm": 1.421875, "learning_rate": 6.116373571303334e-05, "loss": 1.441, "step": 3814 }, { "epoch": 0.5832218612650487, "grad_norm": 1.4375, "learning_rate": 6.115962196030843e-05, "loss": 1.3633, "step": 3815 }, { "epoch": 0.5833747372444105, "grad_norm": 1.390625, "learning_rate": 6.115550828498885e-05, "loss": 1.3061, "step": 3816 }, { "epoch": 0.5835276132237722, "grad_norm": 1.359375, "learning_rate": 6.115139468708289e-05, "loss": 1.4517, "step": 3817 }, { "epoch": 0.583680489203134, "grad_norm": 1.3984375, "learning_rate": 6.114728116659862e-05, "loss": 1.4929, "step": 3818 }, { "epoch": 0.5838333651824957, "grad_norm": 1.5703125, "learning_rate": 6.114316772354438e-05, "loss": 1.5777, "step": 3819 }, { "epoch": 0.5839862411618575, "grad_norm": 1.4921875, "learning_rate": 6.113905435792827e-05, "loss": 1.4476, "step": 3820 }, { "epoch": 0.5841391171412191, "grad_norm": 1.3046875, "learning_rate": 6.11349410697585e-05, "loss": 1.3593, "step": 3821 }, { "epoch": 0.5842919931205809, "grad_norm": 1.2890625, "learning_rate": 6.113082785904331e-05, "loss": 1.2601, "step": 3822 }, { "epoch": 0.5844448690999426, "grad_norm": 1.3046875, "learning_rate": 6.112671472579084e-05, "loss": 1.6503, "step": 3823 }, { "epoch": 0.5845977450793044, "grad_norm": 1.3125, "learning_rate": 6.112260167000934e-05, "loss": 1.2645, "step": 3824 }, { "epoch": 0.5847506210586662, "grad_norm": 1.3046875, "learning_rate": 6.111848869170695e-05, "loss": 1.2592, "step": 3825 }, { "epoch": 0.5849034970380279, "grad_norm": 1.4765625, "learning_rate": 6.111437579089195e-05, "loss": 1.255, "step": 3826 }, { "epoch": 0.5850563730173897, "grad_norm": 1.3515625, "learning_rate": 6.111026296757247e-05, "loss": 1.3387, "step": 3827 }, { "epoch": 0.5852092489967514, "grad_norm": 1.4765625, "learning_rate": 6.11061502217567e-05, "loss": 1.4168, "step": 3828 }, { "epoch": 0.5853621249761132, "grad_norm": 1.6484375, "learning_rate": 6.110203755345287e-05, "loss": 1.4713, "step": 3829 }, { "epoch": 0.5855150009554748, "grad_norm": 1.484375, "learning_rate": 6.109792496266915e-05, "loss": 1.5518, "step": 3830 }, { "epoch": 0.5856678769348366, "grad_norm": 1.4140625, "learning_rate": 6.109381244941378e-05, "loss": 1.5136, "step": 3831 }, { "epoch": 0.5858207529141983, "grad_norm": 1.2109375, "learning_rate": 6.108970001369489e-05, "loss": 1.2242, "step": 3832 }, { "epoch": 0.5859736288935601, "grad_norm": 1.546875, "learning_rate": 6.108558765552076e-05, "loss": 1.4601, "step": 3833 }, { "epoch": 0.5861265048729218, "grad_norm": 1.359375, "learning_rate": 6.108147537489954e-05, "loss": 1.5163, "step": 3834 }, { "epoch": 0.5862793808522836, "grad_norm": 1.4375, "learning_rate": 6.107736317183939e-05, "loss": 1.6528, "step": 3835 }, { "epoch": 0.5864322568316453, "grad_norm": 1.3046875, "learning_rate": 6.107325104634856e-05, "loss": 1.1274, "step": 3836 }, { "epoch": 0.5865851328110071, "grad_norm": 1.2890625, "learning_rate": 6.106913899843518e-05, "loss": 1.2567, "step": 3837 }, { "epoch": 0.5867380087903689, "grad_norm": 1.34375, "learning_rate": 6.106502702810756e-05, "loss": 1.4097, "step": 3838 }, { "epoch": 0.5868908847697305, "grad_norm": 1.3125, "learning_rate": 6.106091513537377e-05, "loss": 1.322, "step": 3839 }, { "epoch": 0.5870437607490923, "grad_norm": 1.34375, "learning_rate": 6.105680332024208e-05, "loss": 1.4807, "step": 3840 }, { "epoch": 0.587196636728454, "grad_norm": 1.2890625, "learning_rate": 6.105269158272067e-05, "loss": 1.3371, "step": 3841 }, { "epoch": 0.5873495127078158, "grad_norm": 1.4140625, "learning_rate": 6.104857992281771e-05, "loss": 1.2451, "step": 3842 }, { "epoch": 0.5875023886871775, "grad_norm": 1.40625, "learning_rate": 6.104446834054145e-05, "loss": 1.2068, "step": 3843 }, { "epoch": 0.5876552646665393, "grad_norm": 1.4375, "learning_rate": 6.104035683589999e-05, "loss": 1.3902, "step": 3844 }, { "epoch": 0.587808140645901, "grad_norm": 1.4375, "learning_rate": 6.10362454089016e-05, "loss": 1.6467, "step": 3845 }, { "epoch": 0.5879610166252628, "grad_norm": 1.2890625, "learning_rate": 6.103213405955446e-05, "loss": 1.2646, "step": 3846 }, { "epoch": 0.5881138926046245, "grad_norm": 1.234375, "learning_rate": 6.102802278786675e-05, "loss": 1.0039, "step": 3847 }, { "epoch": 0.5882667685839862, "grad_norm": 1.4296875, "learning_rate": 6.10239115938467e-05, "loss": 1.6402, "step": 3848 }, { "epoch": 0.5884196445633479, "grad_norm": 1.34375, "learning_rate": 6.1019800477502445e-05, "loss": 1.3497, "step": 3849 }, { "epoch": 0.5885725205427097, "grad_norm": 1.2421875, "learning_rate": 6.1015689438842216e-05, "loss": 1.1922, "step": 3850 }, { "epoch": 0.5887253965220715, "grad_norm": 1.4140625, "learning_rate": 6.101157847787417e-05, "loss": 1.4063, "step": 3851 }, { "epoch": 0.5888782725014332, "grad_norm": 1.4140625, "learning_rate": 6.100746759460656e-05, "loss": 1.4312, "step": 3852 }, { "epoch": 0.589031148480795, "grad_norm": 1.296875, "learning_rate": 6.1003356789047564e-05, "loss": 1.2524, "step": 3853 }, { "epoch": 0.5891840244601567, "grad_norm": 1.4140625, "learning_rate": 6.099924606120531e-05, "loss": 1.4491, "step": 3854 }, { "epoch": 0.5893369004395185, "grad_norm": 1.4765625, "learning_rate": 6.0995135411088044e-05, "loss": 1.689, "step": 3855 }, { "epoch": 0.5894897764188802, "grad_norm": 1.4453125, "learning_rate": 6.0991024838703935e-05, "loss": 1.4213, "step": 3856 }, { "epoch": 0.5896426523982419, "grad_norm": 1.4453125, "learning_rate": 6.098691434406124e-05, "loss": 1.5474, "step": 3857 }, { "epoch": 0.5897955283776036, "grad_norm": 1.5703125, "learning_rate": 6.098280392716804e-05, "loss": 1.5811, "step": 3858 }, { "epoch": 0.5899484043569654, "grad_norm": 1.390625, "learning_rate": 6.097869358803264e-05, "loss": 1.3851, "step": 3859 }, { "epoch": 0.5901012803363271, "grad_norm": 1.2890625, "learning_rate": 6.097458332666317e-05, "loss": 1.169, "step": 3860 }, { "epoch": 0.5902541563156889, "grad_norm": 1.25, "learning_rate": 6.0970473143067796e-05, "loss": 1.1345, "step": 3861 }, { "epoch": 0.5904070322950506, "grad_norm": 1.328125, "learning_rate": 6.096636303725479e-05, "loss": 1.2814, "step": 3862 }, { "epoch": 0.5905599082744124, "grad_norm": 1.328125, "learning_rate": 6.096225300923224e-05, "loss": 1.354, "step": 3863 }, { "epoch": 0.5907127842537742, "grad_norm": 1.453125, "learning_rate": 6.095814305900844e-05, "loss": 1.388, "step": 3864 }, { "epoch": 0.5908656602331359, "grad_norm": 1.2734375, "learning_rate": 6.095403318659152e-05, "loss": 1.2066, "step": 3865 }, { "epoch": 0.5910185362124976, "grad_norm": 1.359375, "learning_rate": 6.0949923391989684e-05, "loss": 1.4419, "step": 3866 }, { "epoch": 0.5911714121918593, "grad_norm": 1.46875, "learning_rate": 6.094581367521115e-05, "loss": 1.4576, "step": 3867 }, { "epoch": 0.5913242881712211, "grad_norm": 1.53125, "learning_rate": 6.094170403626403e-05, "loss": 1.4322, "step": 3868 }, { "epoch": 0.5914771641505828, "grad_norm": 1.453125, "learning_rate": 6.093759447515661e-05, "loss": 1.3894, "step": 3869 }, { "epoch": 0.5916300401299446, "grad_norm": 1.484375, "learning_rate": 6.093348499189699e-05, "loss": 1.6897, "step": 3870 }, { "epoch": 0.5917829161093063, "grad_norm": 1.3984375, "learning_rate": 6.0929375586493444e-05, "loss": 1.3411, "step": 3871 }, { "epoch": 0.5919357920886681, "grad_norm": 1.59375, "learning_rate": 6.09252662589541e-05, "loss": 1.7679, "step": 3872 }, { "epoch": 0.5920886680680298, "grad_norm": 1.328125, "learning_rate": 6.09211570092872e-05, "loss": 1.3181, "step": 3873 }, { "epoch": 0.5922415440473916, "grad_norm": 1.53125, "learning_rate": 6.0917047837500894e-05, "loss": 1.6577, "step": 3874 }, { "epoch": 0.5923944200267532, "grad_norm": 1.3671875, "learning_rate": 6.0912938743603355e-05, "loss": 1.3486, "step": 3875 }, { "epoch": 0.592547296006115, "grad_norm": 1.4453125, "learning_rate": 6.0908829727602834e-05, "loss": 1.2837, "step": 3876 }, { "epoch": 0.5927001719854768, "grad_norm": 1.40625, "learning_rate": 6.0904720789507444e-05, "loss": 1.479, "step": 3877 }, { "epoch": 0.5928530479648385, "grad_norm": 1.3671875, "learning_rate": 6.090061192932546e-05, "loss": 1.6692, "step": 3878 }, { "epoch": 0.5930059239442003, "grad_norm": 1.4921875, "learning_rate": 6.089650314706499e-05, "loss": 1.6868, "step": 3879 }, { "epoch": 0.593158799923562, "grad_norm": 1.4453125, "learning_rate": 6.0892394442734266e-05, "loss": 1.6847, "step": 3880 }, { "epoch": 0.5933116759029238, "grad_norm": 1.328125, "learning_rate": 6.0888285816341484e-05, "loss": 1.1197, "step": 3881 }, { "epoch": 0.5934645518822855, "grad_norm": 1.359375, "learning_rate": 6.0884177267894796e-05, "loss": 1.4556, "step": 3882 }, { "epoch": 0.5936174278616473, "grad_norm": 1.421875, "learning_rate": 6.088006879740242e-05, "loss": 1.6781, "step": 3883 }, { "epoch": 0.5937703038410089, "grad_norm": 1.453125, "learning_rate": 6.087596040487252e-05, "loss": 1.53, "step": 3884 }, { "epoch": 0.5939231798203707, "grad_norm": 1.34375, "learning_rate": 6.087185209031332e-05, "loss": 1.3508, "step": 3885 }, { "epoch": 0.5940760557997324, "grad_norm": 1.21875, "learning_rate": 6.0867743853732975e-05, "loss": 1.3294, "step": 3886 }, { "epoch": 0.5942289317790942, "grad_norm": 1.3203125, "learning_rate": 6.086363569513965e-05, "loss": 1.296, "step": 3887 }, { "epoch": 0.594381807758456, "grad_norm": 1.390625, "learning_rate": 6.085952761454161e-05, "loss": 1.5769, "step": 3888 }, { "epoch": 0.5945346837378177, "grad_norm": 1.375, "learning_rate": 6.085541961194696e-05, "loss": 1.4613, "step": 3889 }, { "epoch": 0.5946875597171795, "grad_norm": 1.3515625, "learning_rate": 6.085131168736395e-05, "loss": 1.4794, "step": 3890 }, { "epoch": 0.5948404356965412, "grad_norm": 1.3125, "learning_rate": 6.084720384080071e-05, "loss": 1.1252, "step": 3891 }, { "epoch": 0.594993311675903, "grad_norm": 1.3203125, "learning_rate": 6.084309607226548e-05, "loss": 1.5591, "step": 3892 }, { "epoch": 0.5951461876552646, "grad_norm": 1.3984375, "learning_rate": 6.083898838176645e-05, "loss": 1.3252, "step": 3893 }, { "epoch": 0.5952990636346264, "grad_norm": 1.4296875, "learning_rate": 6.083488076931173e-05, "loss": 1.6104, "step": 3894 }, { "epoch": 0.5954519396139881, "grad_norm": 1.265625, "learning_rate": 6.083077323490958e-05, "loss": 1.2742, "step": 3895 }, { "epoch": 0.5956048155933499, "grad_norm": 1.5, "learning_rate": 6.0826665778568124e-05, "loss": 1.4879, "step": 3896 }, { "epoch": 0.5957576915727116, "grad_norm": 1.3828125, "learning_rate": 6.082255840029565e-05, "loss": 1.6907, "step": 3897 }, { "epoch": 0.5959105675520734, "grad_norm": 1.4453125, "learning_rate": 6.0818451100100205e-05, "loss": 1.5643, "step": 3898 }, { "epoch": 0.5960634435314351, "grad_norm": 1.3359375, "learning_rate": 6.08143438779901e-05, "loss": 1.3251, "step": 3899 }, { "epoch": 0.5962163195107969, "grad_norm": 1.4453125, "learning_rate": 6.081023673397346e-05, "loss": 1.517, "step": 3900 }, { "epoch": 0.5963691954901587, "grad_norm": 1.3046875, "learning_rate": 6.0806129668058456e-05, "loss": 1.1793, "step": 3901 }, { "epoch": 0.5965220714695203, "grad_norm": 1.515625, "learning_rate": 6.0802022680253324e-05, "loss": 1.7367, "step": 3902 }, { "epoch": 0.596674947448882, "grad_norm": 1.3515625, "learning_rate": 6.079791577056617e-05, "loss": 1.3767, "step": 3903 }, { "epoch": 0.5968278234282438, "grad_norm": 1.2421875, "learning_rate": 6.079380893900529e-05, "loss": 1.0927, "step": 3904 }, { "epoch": 0.5969806994076056, "grad_norm": 1.34375, "learning_rate": 6.078970218557877e-05, "loss": 1.6472, "step": 3905 }, { "epoch": 0.5971335753869673, "grad_norm": 1.40625, "learning_rate": 6.0785595510294846e-05, "loss": 1.3801, "step": 3906 }, { "epoch": 0.5972864513663291, "grad_norm": 1.3203125, "learning_rate": 6.078148891316171e-05, "loss": 1.2947, "step": 3907 }, { "epoch": 0.5974393273456908, "grad_norm": 1.328125, "learning_rate": 6.0777382394187486e-05, "loss": 1.3601, "step": 3908 }, { "epoch": 0.5975922033250526, "grad_norm": 1.34375, "learning_rate": 6.07732759533804e-05, "loss": 1.2041, "step": 3909 }, { "epoch": 0.5977450793044143, "grad_norm": 1.40625, "learning_rate": 6.076916959074863e-05, "loss": 1.5755, "step": 3910 }, { "epoch": 0.597897955283776, "grad_norm": 1.2421875, "learning_rate": 6.0765063306300386e-05, "loss": 1.2618, "step": 3911 }, { "epoch": 0.5980508312631377, "grad_norm": 1.3828125, "learning_rate": 6.076095710004379e-05, "loss": 1.3336, "step": 3912 }, { "epoch": 0.5982037072424995, "grad_norm": 1.3046875, "learning_rate": 6.075685097198711e-05, "loss": 1.2548, "step": 3913 }, { "epoch": 0.5983565832218612, "grad_norm": 1.3359375, "learning_rate": 6.075274492213846e-05, "loss": 1.39, "step": 3914 }, { "epoch": 0.598509459201223, "grad_norm": 1.3828125, "learning_rate": 6.074863895050602e-05, "loss": 1.299, "step": 3915 }, { "epoch": 0.5986623351805848, "grad_norm": 1.359375, "learning_rate": 6.074453305709802e-05, "loss": 1.3948, "step": 3916 }, { "epoch": 0.5988152111599465, "grad_norm": 1.375, "learning_rate": 6.0740427241922604e-05, "loss": 1.2833, "step": 3917 }, { "epoch": 0.5989680871393083, "grad_norm": 1.140625, "learning_rate": 6.0736321504988006e-05, "loss": 1.2298, "step": 3918 }, { "epoch": 0.59912096311867, "grad_norm": 1.3515625, "learning_rate": 6.073221584630235e-05, "loss": 1.3895, "step": 3919 }, { "epoch": 0.5992738390980317, "grad_norm": 1.3984375, "learning_rate": 6.072811026587383e-05, "loss": 1.5881, "step": 3920 }, { "epoch": 0.5994267150773934, "grad_norm": 1.3671875, "learning_rate": 6.072400476371065e-05, "loss": 1.3679, "step": 3921 }, { "epoch": 0.5995795910567552, "grad_norm": 1.328125, "learning_rate": 6.0719899339820964e-05, "loss": 1.3459, "step": 3922 }, { "epoch": 0.5997324670361169, "grad_norm": 1.4296875, "learning_rate": 6.071579399421301e-05, "loss": 1.4759, "step": 3923 }, { "epoch": 0.5998853430154787, "grad_norm": 1.375, "learning_rate": 6.07116887268949e-05, "loss": 1.2903, "step": 3924 }, { "epoch": 0.6000382189948404, "grad_norm": 1.40625, "learning_rate": 6.070758353787485e-05, "loss": 1.3962, "step": 3925 }, { "epoch": 0.6001910949742022, "grad_norm": 1.3984375, "learning_rate": 6.0703478427161045e-05, "loss": 1.3464, "step": 3926 }, { "epoch": 0.600343970953564, "grad_norm": 1.34375, "learning_rate": 6.069937339476164e-05, "loss": 1.1924, "step": 3927 }, { "epoch": 0.6004968469329257, "grad_norm": 1.5703125, "learning_rate": 6.069526844068486e-05, "loss": 1.2883, "step": 3928 }, { "epoch": 0.6006497229122874, "grad_norm": 1.4921875, "learning_rate": 6.069116356493883e-05, "loss": 1.3435, "step": 3929 }, { "epoch": 0.6008025988916491, "grad_norm": 1.3671875, "learning_rate": 6.068705876753178e-05, "loss": 1.4388, "step": 3930 }, { "epoch": 0.6009554748710109, "grad_norm": 1.25, "learning_rate": 6.068295404847185e-05, "loss": 1.306, "step": 3931 }, { "epoch": 0.6011083508503726, "grad_norm": 1.2734375, "learning_rate": 6.067884940776727e-05, "loss": 1.2704, "step": 3932 }, { "epoch": 0.6012612268297344, "grad_norm": 1.34375, "learning_rate": 6.067474484542619e-05, "loss": 1.4091, "step": 3933 }, { "epoch": 0.6014141028090961, "grad_norm": 1.46875, "learning_rate": 6.0670640361456766e-05, "loss": 1.421, "step": 3934 }, { "epoch": 0.6015669787884579, "grad_norm": 1.34375, "learning_rate": 6.066653595586722e-05, "loss": 1.3662, "step": 3935 }, { "epoch": 0.6017198547678196, "grad_norm": 1.40625, "learning_rate": 6.0662431628665694e-05, "loss": 1.2863, "step": 3936 }, { "epoch": 0.6018727307471814, "grad_norm": 1.5625, "learning_rate": 6.065832737986041e-05, "loss": 1.5302, "step": 3937 }, { "epoch": 0.602025606726543, "grad_norm": 1.7890625, "learning_rate": 6.065422320945951e-05, "loss": 1.5489, "step": 3938 }, { "epoch": 0.6021784827059048, "grad_norm": 1.3125, "learning_rate": 6.065011911747123e-05, "loss": 1.2737, "step": 3939 }, { "epoch": 0.6023313586852665, "grad_norm": 1.4140625, "learning_rate": 6.064601510390368e-05, "loss": 1.269, "step": 3940 }, { "epoch": 0.6024842346646283, "grad_norm": 1.4375, "learning_rate": 6.0641911168765057e-05, "loss": 1.2478, "step": 3941 }, { "epoch": 0.6026371106439901, "grad_norm": 1.3359375, "learning_rate": 6.063780731206356e-05, "loss": 1.2318, "step": 3942 }, { "epoch": 0.6027899866233518, "grad_norm": 1.515625, "learning_rate": 6.063370353380733e-05, "loss": 1.6085, "step": 3943 }, { "epoch": 0.6029428626027136, "grad_norm": 1.65625, "learning_rate": 6.062959983400462e-05, "loss": 1.6431, "step": 3944 }, { "epoch": 0.6030957385820753, "grad_norm": 1.453125, "learning_rate": 6.062549621266356e-05, "loss": 1.3575, "step": 3945 }, { "epoch": 0.6032486145614371, "grad_norm": 1.3046875, "learning_rate": 6.062139266979228e-05, "loss": 1.2524, "step": 3946 }, { "epoch": 0.6034014905407987, "grad_norm": 1.4453125, "learning_rate": 6.061728920539906e-05, "loss": 1.4541, "step": 3947 }, { "epoch": 0.6035543665201605, "grad_norm": 1.6875, "learning_rate": 6.061318581949198e-05, "loss": 1.4372, "step": 3948 }, { "epoch": 0.6037072424995222, "grad_norm": 1.2265625, "learning_rate": 6.0609082512079304e-05, "loss": 1.104, "step": 3949 }, { "epoch": 0.603860118478884, "grad_norm": 1.34375, "learning_rate": 6.060497928316913e-05, "loss": 1.3917, "step": 3950 }, { "epoch": 0.6040129944582457, "grad_norm": 1.28125, "learning_rate": 6.06008761327697e-05, "loss": 1.2922, "step": 3951 }, { "epoch": 0.6041658704376075, "grad_norm": 1.2890625, "learning_rate": 6.0596773060889175e-05, "loss": 1.2454, "step": 3952 }, { "epoch": 0.6043187464169693, "grad_norm": 1.4375, "learning_rate": 6.05926700675357e-05, "loss": 1.3372, "step": 3953 }, { "epoch": 0.604471622396331, "grad_norm": 1.4375, "learning_rate": 6.058856715271748e-05, "loss": 1.6397, "step": 3954 }, { "epoch": 0.6046244983756928, "grad_norm": 1.3359375, "learning_rate": 6.058446431644267e-05, "loss": 1.4457, "step": 3955 }, { "epoch": 0.6047773743550544, "grad_norm": 1.421875, "learning_rate": 6.058036155871948e-05, "loss": 1.6901, "step": 3956 }, { "epoch": 0.6049302503344162, "grad_norm": 1.4609375, "learning_rate": 6.057625887955605e-05, "loss": 1.321, "step": 3957 }, { "epoch": 0.6050831263137779, "grad_norm": 1.375, "learning_rate": 6.057215627896061e-05, "loss": 1.2088, "step": 3958 }, { "epoch": 0.6052360022931397, "grad_norm": 1.40625, "learning_rate": 6.056805375694129e-05, "loss": 1.2377, "step": 3959 }, { "epoch": 0.6053888782725014, "grad_norm": 1.421875, "learning_rate": 6.056395131350625e-05, "loss": 1.5524, "step": 3960 }, { "epoch": 0.6055417542518632, "grad_norm": 1.3125, "learning_rate": 6.055984894866371e-05, "loss": 1.2882, "step": 3961 }, { "epoch": 0.6056946302312249, "grad_norm": 1.6328125, "learning_rate": 6.055574666242182e-05, "loss": 1.3471, "step": 3962 }, { "epoch": 0.6058475062105867, "grad_norm": 1.390625, "learning_rate": 6.05516444547888e-05, "loss": 1.377, "step": 3963 }, { "epoch": 0.6060003821899484, "grad_norm": 1.265625, "learning_rate": 6.054754232577275e-05, "loss": 1.1486, "step": 3964 }, { "epoch": 0.6061532581693101, "grad_norm": 1.46875, "learning_rate": 6.05434402753819e-05, "loss": 1.4495, "step": 3965 }, { "epoch": 0.6063061341486718, "grad_norm": 1.3828125, "learning_rate": 6.053933830362442e-05, "loss": 1.6115, "step": 3966 }, { "epoch": 0.6064590101280336, "grad_norm": 1.4140625, "learning_rate": 6.0535236410508434e-05, "loss": 1.4199, "step": 3967 }, { "epoch": 0.6066118861073954, "grad_norm": 1.359375, "learning_rate": 6.0531134596042205e-05, "loss": 1.1534, "step": 3968 }, { "epoch": 0.6067647620867571, "grad_norm": 1.5234375, "learning_rate": 6.0527032860233826e-05, "loss": 1.6333, "step": 3969 }, { "epoch": 0.6069176380661189, "grad_norm": 1.3125, "learning_rate": 6.052293120309151e-05, "loss": 1.542, "step": 3970 }, { "epoch": 0.6070705140454806, "grad_norm": 1.4765625, "learning_rate": 6.051882962462343e-05, "loss": 1.4352, "step": 3971 }, { "epoch": 0.6072233900248424, "grad_norm": 1.3671875, "learning_rate": 6.0514728124837764e-05, "loss": 1.5583, "step": 3972 }, { "epoch": 0.6073762660042041, "grad_norm": 1.3828125, "learning_rate": 6.05106267037427e-05, "loss": 1.3183, "step": 3973 }, { "epoch": 0.6075291419835658, "grad_norm": 1.4921875, "learning_rate": 6.050652536134636e-05, "loss": 1.4365, "step": 3974 }, { "epoch": 0.6076820179629275, "grad_norm": 1.2890625, "learning_rate": 6.050242409765694e-05, "loss": 1.3591, "step": 3975 }, { "epoch": 0.6078348939422893, "grad_norm": 1.34375, "learning_rate": 6.0498322912682625e-05, "loss": 1.3231, "step": 3976 }, { "epoch": 0.607987769921651, "grad_norm": 1.3125, "learning_rate": 6.049422180643159e-05, "loss": 1.2917, "step": 3977 }, { "epoch": 0.6081406459010128, "grad_norm": 1.4765625, "learning_rate": 6.049012077891203e-05, "loss": 1.6013, "step": 3978 }, { "epoch": 0.6082935218803746, "grad_norm": 1.484375, "learning_rate": 6.0486019830132047e-05, "loss": 1.4766, "step": 3979 }, { "epoch": 0.6084463978597363, "grad_norm": 1.3125, "learning_rate": 6.048191896009988e-05, "loss": 1.1355, "step": 3980 }, { "epoch": 0.6085992738390981, "grad_norm": 1.328125, "learning_rate": 6.047781816882365e-05, "loss": 1.201, "step": 3981 }, { "epoch": 0.6087521498184598, "grad_norm": 1.3125, "learning_rate": 6.047371745631161e-05, "loss": 1.2991, "step": 3982 }, { "epoch": 0.6089050257978215, "grad_norm": 1.4296875, "learning_rate": 6.046961682257182e-05, "loss": 1.3564, "step": 3983 }, { "epoch": 0.6090579017771832, "grad_norm": 1.3515625, "learning_rate": 6.046551626761257e-05, "loss": 1.5228, "step": 3984 }, { "epoch": 0.609210777756545, "grad_norm": 1.4453125, "learning_rate": 6.0461415791441956e-05, "loss": 1.5115, "step": 3985 }, { "epoch": 0.6093636537359067, "grad_norm": 1.4375, "learning_rate": 6.045731539406815e-05, "loss": 1.2124, "step": 3986 }, { "epoch": 0.6095165297152685, "grad_norm": 1.3203125, "learning_rate": 6.045321507549937e-05, "loss": 1.3863, "step": 3987 }, { "epoch": 0.6096694056946302, "grad_norm": 1.234375, "learning_rate": 6.0449114835743714e-05, "loss": 1.1321, "step": 3988 }, { "epoch": 0.609822281673992, "grad_norm": 1.4140625, "learning_rate": 6.044501467480945e-05, "loss": 1.3755, "step": 3989 }, { "epoch": 0.6099751576533538, "grad_norm": 1.3828125, "learning_rate": 6.044091459270467e-05, "loss": 1.3203, "step": 3990 }, { "epoch": 0.6101280336327155, "grad_norm": 1.203125, "learning_rate": 6.04368145894376e-05, "loss": 1.1494, "step": 3991 }, { "epoch": 0.6102809096120771, "grad_norm": 1.375, "learning_rate": 6.043271466501639e-05, "loss": 1.5148, "step": 3992 }, { "epoch": 0.6104337855914389, "grad_norm": 1.3203125, "learning_rate": 6.042861481944917e-05, "loss": 1.3696, "step": 3993 }, { "epoch": 0.6105866615708007, "grad_norm": 1.5625, "learning_rate": 6.042451505274416e-05, "loss": 1.4319, "step": 3994 }, { "epoch": 0.6107395375501624, "grad_norm": 1.3125, "learning_rate": 6.04204153649095e-05, "loss": 1.3051, "step": 3995 }, { "epoch": 0.6108924135295242, "grad_norm": 1.2265625, "learning_rate": 6.04163157559534e-05, "loss": 1.4218, "step": 3996 }, { "epoch": 0.6110452895088859, "grad_norm": 1.375, "learning_rate": 6.0412216225883976e-05, "loss": 1.2436, "step": 3997 }, { "epoch": 0.6111981654882477, "grad_norm": 1.296875, "learning_rate": 6.040811677470949e-05, "loss": 1.2354, "step": 3998 }, { "epoch": 0.6113510414676094, "grad_norm": 1.3125, "learning_rate": 6.040401740243802e-05, "loss": 1.5375, "step": 3999 }, { "epoch": 0.6115039174469712, "grad_norm": 1.328125, "learning_rate": 6.0399918109077745e-05, "loss": 1.293, "step": 4000 }, { "epoch": 0.6115039174469712, "eval_loss": 1.3737149238586426, "eval_model_preparation_time": 0.0034, "eval_runtime": 111.8656, "eval_samples_per_second": 89.393, "eval_steps_per_second": 2.798, "step": 4000 }, { "epoch": 0.6116567934263328, "grad_norm": 1.46875, "learning_rate": 6.039581889463688e-05, "loss": 1.4677, "step": 4001 }, { "epoch": 0.6118096694056946, "grad_norm": 1.4296875, "learning_rate": 6.039171975912354e-05, "loss": 1.25, "step": 4002 }, { "epoch": 0.6119625453850563, "grad_norm": 1.3984375, "learning_rate": 6.038762070254598e-05, "loss": 1.4799, "step": 4003 }, { "epoch": 0.6121154213644181, "grad_norm": 1.3125, "learning_rate": 6.038352172491226e-05, "loss": 1.3492, "step": 4004 }, { "epoch": 0.6122682973437799, "grad_norm": 1.53125, "learning_rate": 6.037942282623064e-05, "loss": 1.321, "step": 4005 }, { "epoch": 0.6124211733231416, "grad_norm": 1.2734375, "learning_rate": 6.0375324006509235e-05, "loss": 1.3303, "step": 4006 }, { "epoch": 0.6125740493025034, "grad_norm": 1.359375, "learning_rate": 6.037122526575621e-05, "loss": 1.3482, "step": 4007 }, { "epoch": 0.6127269252818651, "grad_norm": 1.2265625, "learning_rate": 6.036712660397981e-05, "loss": 1.1676, "step": 4008 }, { "epoch": 0.6128798012612269, "grad_norm": 1.359375, "learning_rate": 6.03630280211881e-05, "loss": 1.5108, "step": 4009 }, { "epoch": 0.6130326772405885, "grad_norm": 1.2734375, "learning_rate": 6.035892951738931e-05, "loss": 1.2183, "step": 4010 }, { "epoch": 0.6131855532199503, "grad_norm": 1.28125, "learning_rate": 6.035483109259159e-05, "loss": 1.2034, "step": 4011 }, { "epoch": 0.613338429199312, "grad_norm": 1.2265625, "learning_rate": 6.0350732746803083e-05, "loss": 1.3228, "step": 4012 }, { "epoch": 0.6134913051786738, "grad_norm": 1.40625, "learning_rate": 6.034663448003204e-05, "loss": 1.655, "step": 4013 }, { "epoch": 0.6136441811580355, "grad_norm": 1.4453125, "learning_rate": 6.0342536292286525e-05, "loss": 1.6641, "step": 4014 }, { "epoch": 0.6137970571373973, "grad_norm": 1.453125, "learning_rate": 6.0338438183574776e-05, "loss": 1.3534, "step": 4015 }, { "epoch": 0.613949933116759, "grad_norm": 1.3828125, "learning_rate": 6.033434015390491e-05, "loss": 1.2487, "step": 4016 }, { "epoch": 0.6141028090961208, "grad_norm": 1.5859375, "learning_rate": 6.033024220328516e-05, "loss": 1.6375, "step": 4017 }, { "epoch": 0.6142556850754826, "grad_norm": 1.4609375, "learning_rate": 6.0326144331723654e-05, "loss": 1.47, "step": 4018 }, { "epoch": 0.6144085610548442, "grad_norm": 1.453125, "learning_rate": 6.032204653922853e-05, "loss": 1.4144, "step": 4019 }, { "epoch": 0.614561437034206, "grad_norm": 1.3671875, "learning_rate": 6.031794882580799e-05, "loss": 1.4177, "step": 4020 }, { "epoch": 0.6147143130135677, "grad_norm": 1.421875, "learning_rate": 6.0313851191470196e-05, "loss": 1.3866, "step": 4021 }, { "epoch": 0.6148671889929295, "grad_norm": 1.28125, "learning_rate": 6.0309753636223334e-05, "loss": 1.1008, "step": 4022 }, { "epoch": 0.6150200649722912, "grad_norm": 1.3671875, "learning_rate": 6.0305656160075485e-05, "loss": 1.4731, "step": 4023 }, { "epoch": 0.615172940951653, "grad_norm": 1.3984375, "learning_rate": 6.030155876303495e-05, "loss": 1.4211, "step": 4024 }, { "epoch": 0.6153258169310147, "grad_norm": 1.2890625, "learning_rate": 6.029746144510979e-05, "loss": 1.4508, "step": 4025 }, { "epoch": 0.6154786929103765, "grad_norm": 1.234375, "learning_rate": 6.02933642063082e-05, "loss": 1.2093, "step": 4026 }, { "epoch": 0.6156315688897382, "grad_norm": 1.34375, "learning_rate": 6.0289267046638375e-05, "loss": 1.5126, "step": 4027 }, { "epoch": 0.6157844448690999, "grad_norm": 1.3515625, "learning_rate": 6.0285169966108404e-05, "loss": 1.2902, "step": 4028 }, { "epoch": 0.6159373208484616, "grad_norm": 1.3984375, "learning_rate": 6.028107296472656e-05, "loss": 1.617, "step": 4029 }, { "epoch": 0.6160901968278234, "grad_norm": 1.4375, "learning_rate": 6.02769760425009e-05, "loss": 1.4328, "step": 4030 }, { "epoch": 0.6162430728071852, "grad_norm": 1.328125, "learning_rate": 6.027287919943966e-05, "loss": 1.5554, "step": 4031 }, { "epoch": 0.6163959487865469, "grad_norm": 1.53125, "learning_rate": 6.026878243555101e-05, "loss": 1.371, "step": 4032 }, { "epoch": 0.6165488247659087, "grad_norm": 1.2578125, "learning_rate": 6.0264685750843044e-05, "loss": 1.1734, "step": 4033 }, { "epoch": 0.6167017007452704, "grad_norm": 1.328125, "learning_rate": 6.0260589145323986e-05, "loss": 1.3712, "step": 4034 }, { "epoch": 0.6168545767246322, "grad_norm": 1.546875, "learning_rate": 6.025649261900197e-05, "loss": 1.5426, "step": 4035 }, { "epoch": 0.6170074527039939, "grad_norm": 1.515625, "learning_rate": 6.025239617188519e-05, "loss": 1.6385, "step": 4036 }, { "epoch": 0.6171603286833556, "grad_norm": 1.515625, "learning_rate": 6.024829980398182e-05, "loss": 1.4084, "step": 4037 }, { "epoch": 0.6173132046627173, "grad_norm": 1.3515625, "learning_rate": 6.024420351529996e-05, "loss": 1.4511, "step": 4038 }, { "epoch": 0.6174660806420791, "grad_norm": 1.359375, "learning_rate": 6.0240107305847814e-05, "loss": 1.1239, "step": 4039 }, { "epoch": 0.6176189566214408, "grad_norm": 1.2109375, "learning_rate": 6.023601117563354e-05, "loss": 1.2594, "step": 4040 }, { "epoch": 0.6177718326008026, "grad_norm": 1.5703125, "learning_rate": 6.023191512466533e-05, "loss": 1.4443, "step": 4041 }, { "epoch": 0.6179247085801644, "grad_norm": 1.359375, "learning_rate": 6.022781915295128e-05, "loss": 1.6002, "step": 4042 }, { "epoch": 0.6180775845595261, "grad_norm": 1.2265625, "learning_rate": 6.022372326049966e-05, "loss": 1.3151, "step": 4043 }, { "epoch": 0.6182304605388879, "grad_norm": 1.2890625, "learning_rate": 6.0219627447318515e-05, "loss": 1.3297, "step": 4044 }, { "epoch": 0.6183833365182496, "grad_norm": 1.4296875, "learning_rate": 6.021553171341605e-05, "loss": 1.6577, "step": 4045 }, { "epoch": 0.6185362124976113, "grad_norm": 1.4140625, "learning_rate": 6.021143605880046e-05, "loss": 1.2171, "step": 4046 }, { "epoch": 0.618689088476973, "grad_norm": 1.4453125, "learning_rate": 6.0207340483479865e-05, "loss": 1.5134, "step": 4047 }, { "epoch": 0.6188419644563348, "grad_norm": 1.4765625, "learning_rate": 6.020324498746249e-05, "loss": 1.338, "step": 4048 }, { "epoch": 0.6189948404356965, "grad_norm": 1.375, "learning_rate": 6.0199149570756406e-05, "loss": 1.3365, "step": 4049 }, { "epoch": 0.6191477164150583, "grad_norm": 1.3984375, "learning_rate": 6.019505423336984e-05, "loss": 1.4756, "step": 4050 }, { "epoch": 0.61930059239442, "grad_norm": 1.4296875, "learning_rate": 6.019095897531095e-05, "loss": 1.6265, "step": 4051 }, { "epoch": 0.6194534683737818, "grad_norm": 1.484375, "learning_rate": 6.018686379658784e-05, "loss": 1.5889, "step": 4052 }, { "epoch": 0.6196063443531435, "grad_norm": 1.3984375, "learning_rate": 6.018276869720878e-05, "loss": 1.4411, "step": 4053 }, { "epoch": 0.6197592203325053, "grad_norm": 1.3203125, "learning_rate": 6.017867367718181e-05, "loss": 1.2379, "step": 4054 }, { "epoch": 0.619912096311867, "grad_norm": 1.328125, "learning_rate": 6.017457873651516e-05, "loss": 1.367, "step": 4055 }, { "epoch": 0.6200649722912287, "grad_norm": 1.390625, "learning_rate": 6.0170483875216975e-05, "loss": 1.6009, "step": 4056 }, { "epoch": 0.6202178482705905, "grad_norm": 1.46875, "learning_rate": 6.016638909329542e-05, "loss": 1.7434, "step": 4057 }, { "epoch": 0.6203707242499522, "grad_norm": 1.421875, "learning_rate": 6.016229439075869e-05, "loss": 1.5292, "step": 4058 }, { "epoch": 0.620523600229314, "grad_norm": 1.3046875, "learning_rate": 6.0158199767614854e-05, "loss": 1.2657, "step": 4059 }, { "epoch": 0.6206764762086757, "grad_norm": 1.2734375, "learning_rate": 6.015410522387215e-05, "loss": 1.0519, "step": 4060 }, { "epoch": 0.6208293521880375, "grad_norm": 1.390625, "learning_rate": 6.01500107595387e-05, "loss": 1.4676, "step": 4061 }, { "epoch": 0.6209822281673992, "grad_norm": 1.3828125, "learning_rate": 6.01459163746227e-05, "loss": 1.3077, "step": 4062 }, { "epoch": 0.621135104146761, "grad_norm": 1.3125, "learning_rate": 6.014182206913226e-05, "loss": 1.4161, "step": 4063 }, { "epoch": 0.6212879801261226, "grad_norm": 1.3046875, "learning_rate": 6.013772784307562e-05, "loss": 1.1153, "step": 4064 }, { "epoch": 0.6214408561054844, "grad_norm": 1.328125, "learning_rate": 6.0133633696460854e-05, "loss": 1.1845, "step": 4065 }, { "epoch": 0.6215937320848461, "grad_norm": 1.453125, "learning_rate": 6.012953962929615e-05, "loss": 1.5894, "step": 4066 }, { "epoch": 0.6217466080642079, "grad_norm": 1.3984375, "learning_rate": 6.012544564158971e-05, "loss": 1.3067, "step": 4067 }, { "epoch": 0.6218994840435697, "grad_norm": 1.5, "learning_rate": 6.012135173334958e-05, "loss": 1.7612, "step": 4068 }, { "epoch": 0.6220523600229314, "grad_norm": 1.3359375, "learning_rate": 6.011725790458407e-05, "loss": 1.2374, "step": 4069 }, { "epoch": 0.6222052360022932, "grad_norm": 1.234375, "learning_rate": 6.0113164155301235e-05, "loss": 1.2701, "step": 4070 }, { "epoch": 0.6223581119816549, "grad_norm": 1.2734375, "learning_rate": 6.0109070485509244e-05, "loss": 1.3964, "step": 4071 }, { "epoch": 0.6225109879610167, "grad_norm": 1.390625, "learning_rate": 6.0104976895216304e-05, "loss": 1.2528, "step": 4072 }, { "epoch": 0.6226638639403783, "grad_norm": 1.53125, "learning_rate": 6.01008833844305e-05, "loss": 1.7824, "step": 4073 }, { "epoch": 0.6228167399197401, "grad_norm": 1.296875, "learning_rate": 6.009678995316007e-05, "loss": 1.2479, "step": 4074 }, { "epoch": 0.6229696158991018, "grad_norm": 1.390625, "learning_rate": 6.00926966014131e-05, "loss": 1.4622, "step": 4075 }, { "epoch": 0.6231224918784636, "grad_norm": 1.4765625, "learning_rate": 6.0088603329197804e-05, "loss": 1.4158, "step": 4076 }, { "epoch": 0.6232753678578253, "grad_norm": 1.3046875, "learning_rate": 6.008451013652233e-05, "loss": 1.2136, "step": 4077 }, { "epoch": 0.6234282438371871, "grad_norm": 1.28125, "learning_rate": 6.0080417023394794e-05, "loss": 1.4027, "step": 4078 }, { "epoch": 0.6235811198165488, "grad_norm": 1.2421875, "learning_rate": 6.007632398982339e-05, "loss": 1.2023, "step": 4079 }, { "epoch": 0.6237339957959106, "grad_norm": 1.1953125, "learning_rate": 6.007223103581625e-05, "loss": 1.0747, "step": 4080 }, { "epoch": 0.6238868717752724, "grad_norm": 1.296875, "learning_rate": 6.006813816138157e-05, "loss": 1.2905, "step": 4081 }, { "epoch": 0.624039747754634, "grad_norm": 1.2265625, "learning_rate": 6.0064045366527456e-05, "loss": 1.3955, "step": 4082 }, { "epoch": 0.6241926237339958, "grad_norm": 1.3515625, "learning_rate": 6.005995265126214e-05, "loss": 1.2215, "step": 4083 }, { "epoch": 0.6243454997133575, "grad_norm": 1.2265625, "learning_rate": 6.005586001559371e-05, "loss": 1.5669, "step": 4084 }, { "epoch": 0.6244983756927193, "grad_norm": 1.4140625, "learning_rate": 6.005176745953032e-05, "loss": 1.4027, "step": 4085 }, { "epoch": 0.624651251672081, "grad_norm": 1.4375, "learning_rate": 6.0047674983080174e-05, "loss": 1.2492, "step": 4086 }, { "epoch": 0.6248041276514428, "grad_norm": 1.3203125, "learning_rate": 6.004358258625137e-05, "loss": 1.4168, "step": 4087 }, { "epoch": 0.6249570036308045, "grad_norm": 1.3125, "learning_rate": 6.0039490269052134e-05, "loss": 1.1013, "step": 4088 }, { "epoch": 0.6251098796101663, "grad_norm": 1.421875, "learning_rate": 6.003539803149056e-05, "loss": 1.4106, "step": 4089 }, { "epoch": 0.625262755589528, "grad_norm": 1.46875, "learning_rate": 6.0031305873574836e-05, "loss": 1.6608, "step": 4090 }, { "epoch": 0.6254156315688897, "grad_norm": 1.390625, "learning_rate": 6.002721379531313e-05, "loss": 1.2811, "step": 4091 }, { "epoch": 0.6255685075482514, "grad_norm": 1.421875, "learning_rate": 6.0023121796713534e-05, "loss": 1.5467, "step": 4092 }, { "epoch": 0.6257213835276132, "grad_norm": 1.5078125, "learning_rate": 6.00190298777843e-05, "loss": 1.4478, "step": 4093 }, { "epoch": 0.625874259506975, "grad_norm": 1.375, "learning_rate": 6.001493803853348e-05, "loss": 1.5149, "step": 4094 }, { "epoch": 0.6260271354863367, "grad_norm": 1.2421875, "learning_rate": 6.001084627896929e-05, "loss": 1.3086, "step": 4095 }, { "epoch": 0.6261800114656985, "grad_norm": 2.109375, "learning_rate": 6.0006754599099855e-05, "loss": 1.6858, "step": 4096 }, { "epoch": 0.6263328874450602, "grad_norm": 1.2421875, "learning_rate": 6.000266299893338e-05, "loss": 1.1709, "step": 4097 }, { "epoch": 0.626485763424422, "grad_norm": 1.3359375, "learning_rate": 5.999857147847798e-05, "loss": 1.4151, "step": 4098 }, { "epoch": 0.6266386394037837, "grad_norm": 1.5546875, "learning_rate": 5.99944800377418e-05, "loss": 1.5953, "step": 4099 }, { "epoch": 0.6267915153831454, "grad_norm": 1.453125, "learning_rate": 5.999038867673301e-05, "loss": 1.2321, "step": 4100 }, { "epoch": 0.6269443913625071, "grad_norm": 1.1796875, "learning_rate": 5.9986297395459744e-05, "loss": 1.2383, "step": 4101 }, { "epoch": 0.6270972673418689, "grad_norm": 1.2890625, "learning_rate": 5.99822061939302e-05, "loss": 1.525, "step": 4102 }, { "epoch": 0.6272501433212306, "grad_norm": 1.3984375, "learning_rate": 5.997811507215252e-05, "loss": 1.6652, "step": 4103 }, { "epoch": 0.6274030193005924, "grad_norm": 1.3203125, "learning_rate": 5.99740240301348e-05, "loss": 1.3383, "step": 4104 }, { "epoch": 0.6275558952799541, "grad_norm": 1.2265625, "learning_rate": 5.996993306788525e-05, "loss": 1.2283, "step": 4105 }, { "epoch": 0.6277087712593159, "grad_norm": 1.3984375, "learning_rate": 5.996584218541199e-05, "loss": 1.4793, "step": 4106 }, { "epoch": 0.6278616472386777, "grad_norm": 1.296875, "learning_rate": 5.996175138272324e-05, "loss": 1.2043, "step": 4107 }, { "epoch": 0.6280145232180394, "grad_norm": 1.421875, "learning_rate": 5.995766065982704e-05, "loss": 1.5636, "step": 4108 }, { "epoch": 0.628167399197401, "grad_norm": 1.3671875, "learning_rate": 5.995357001673165e-05, "loss": 1.1648, "step": 4109 }, { "epoch": 0.6283202751767628, "grad_norm": 1.296875, "learning_rate": 5.994947945344518e-05, "loss": 1.3333, "step": 4110 }, { "epoch": 0.6284731511561246, "grad_norm": 1.34375, "learning_rate": 5.9945388969975745e-05, "loss": 1.5915, "step": 4111 }, { "epoch": 0.6286260271354863, "grad_norm": 1.328125, "learning_rate": 5.994129856633157e-05, "loss": 1.3535, "step": 4112 }, { "epoch": 0.6287789031148481, "grad_norm": 1.390625, "learning_rate": 5.993720824252071e-05, "loss": 1.3447, "step": 4113 }, { "epoch": 0.6289317790942098, "grad_norm": 1.40625, "learning_rate": 5.993311799855146e-05, "loss": 1.3858, "step": 4114 }, { "epoch": 0.6290846550735716, "grad_norm": 1.359375, "learning_rate": 5.9929027834431817e-05, "loss": 1.4118, "step": 4115 }, { "epoch": 0.6292375310529333, "grad_norm": 1.328125, "learning_rate": 5.992493775017003e-05, "loss": 1.3358, "step": 4116 }, { "epoch": 0.6293904070322951, "grad_norm": 1.3984375, "learning_rate": 5.9920847745774244e-05, "loss": 1.2204, "step": 4117 }, { "epoch": 0.6295432830116567, "grad_norm": 1.484375, "learning_rate": 5.991675782125254e-05, "loss": 1.2908, "step": 4118 }, { "epoch": 0.6296961589910185, "grad_norm": 1.453125, "learning_rate": 5.991266797661315e-05, "loss": 1.4187, "step": 4119 }, { "epoch": 0.6298490349703803, "grad_norm": 1.328125, "learning_rate": 5.990857821186416e-05, "loss": 1.2775, "step": 4120 }, { "epoch": 0.630001910949742, "grad_norm": 1.375, "learning_rate": 5.990448852701378e-05, "loss": 1.2204, "step": 4121 }, { "epoch": 0.6301547869291038, "grad_norm": 1.3515625, "learning_rate": 5.990039892207011e-05, "loss": 1.4928, "step": 4122 }, { "epoch": 0.6303076629084655, "grad_norm": 1.3828125, "learning_rate": 5.9896309397041364e-05, "loss": 1.3831, "step": 4123 }, { "epoch": 0.6304605388878273, "grad_norm": 1.3984375, "learning_rate": 5.9892219951935634e-05, "loss": 1.3982, "step": 4124 }, { "epoch": 0.630613414867189, "grad_norm": 1.4765625, "learning_rate": 5.9888130586761056e-05, "loss": 1.4663, "step": 4125 }, { "epoch": 0.6307662908465508, "grad_norm": 1.3984375, "learning_rate": 5.9884041301525837e-05, "loss": 1.3951, "step": 4126 }, { "epoch": 0.6309191668259124, "grad_norm": 1.296875, "learning_rate": 5.987995209623809e-05, "loss": 1.4536, "step": 4127 }, { "epoch": 0.6310720428052742, "grad_norm": 1.40625, "learning_rate": 5.9875862970905996e-05, "loss": 1.4809, "step": 4128 }, { "epoch": 0.6312249187846359, "grad_norm": 1.390625, "learning_rate": 5.987177392553765e-05, "loss": 1.3661, "step": 4129 }, { "epoch": 0.6313777947639977, "grad_norm": 1.328125, "learning_rate": 5.9867684960141254e-05, "loss": 1.4626, "step": 4130 }, { "epoch": 0.6315306707433594, "grad_norm": 1.421875, "learning_rate": 5.986359607472494e-05, "loss": 1.4975, "step": 4131 }, { "epoch": 0.6316835467227212, "grad_norm": 1.34375, "learning_rate": 5.985950726929683e-05, "loss": 1.2845, "step": 4132 }, { "epoch": 0.631836422702083, "grad_norm": 1.3203125, "learning_rate": 5.9855418543865136e-05, "loss": 1.3322, "step": 4133 }, { "epoch": 0.6319892986814447, "grad_norm": 1.2890625, "learning_rate": 5.9851329898437934e-05, "loss": 1.1547, "step": 4134 }, { "epoch": 0.6321421746608065, "grad_norm": 1.609375, "learning_rate": 5.984724133302342e-05, "loss": 1.6427, "step": 4135 }, { "epoch": 0.6322950506401681, "grad_norm": 1.484375, "learning_rate": 5.9843152847629725e-05, "loss": 1.5787, "step": 4136 }, { "epoch": 0.6324479266195299, "grad_norm": 1.359375, "learning_rate": 5.983906444226497e-05, "loss": 1.4668, "step": 4137 }, { "epoch": 0.6326008025988916, "grad_norm": 1.28125, "learning_rate": 5.983497611693739e-05, "loss": 1.2627, "step": 4138 }, { "epoch": 0.6327536785782534, "grad_norm": 1.28125, "learning_rate": 5.983088787165503e-05, "loss": 1.3474, "step": 4139 }, { "epoch": 0.6329065545576151, "grad_norm": 1.3671875, "learning_rate": 5.982679970642609e-05, "loss": 1.316, "step": 4140 }, { "epoch": 0.6330594305369769, "grad_norm": 1.296875, "learning_rate": 5.9822711621258696e-05, "loss": 1.2717, "step": 4141 }, { "epoch": 0.6332123065163386, "grad_norm": 1.3984375, "learning_rate": 5.981862361616104e-05, "loss": 1.4686, "step": 4142 }, { "epoch": 0.6333651824957004, "grad_norm": 1.3828125, "learning_rate": 5.981453569114124e-05, "loss": 1.4755, "step": 4143 }, { "epoch": 0.6335180584750622, "grad_norm": 1.359375, "learning_rate": 5.9810447846207416e-05, "loss": 1.3674, "step": 4144 }, { "epoch": 0.6336709344544238, "grad_norm": 1.5, "learning_rate": 5.9806360081367755e-05, "loss": 1.3611, "step": 4145 }, { "epoch": 0.6338238104337856, "grad_norm": 1.359375, "learning_rate": 5.980227239663035e-05, "loss": 1.5732, "step": 4146 }, { "epoch": 0.6339766864131473, "grad_norm": 1.453125, "learning_rate": 5.979818479200343e-05, "loss": 1.7883, "step": 4147 }, { "epoch": 0.6341295623925091, "grad_norm": 1.375, "learning_rate": 5.9794097267495054e-05, "loss": 1.4618, "step": 4148 }, { "epoch": 0.6342824383718708, "grad_norm": 1.421875, "learning_rate": 5.979000982311347e-05, "loss": 1.3509, "step": 4149 }, { "epoch": 0.6344353143512326, "grad_norm": 1.71875, "learning_rate": 5.9785922458866736e-05, "loss": 1.6098, "step": 4150 }, { "epoch": 0.6345881903305943, "grad_norm": 1.2734375, "learning_rate": 5.9781835174763e-05, "loss": 1.3756, "step": 4151 }, { "epoch": 0.6347410663099561, "grad_norm": 1.3046875, "learning_rate": 5.9777747970810486e-05, "loss": 1.4262, "step": 4152 }, { "epoch": 0.6348939422893178, "grad_norm": 1.578125, "learning_rate": 5.977366084701721e-05, "loss": 1.6788, "step": 4153 }, { "epoch": 0.6350468182686795, "grad_norm": 1.2421875, "learning_rate": 5.9769573803391476e-05, "loss": 1.2117, "step": 4154 }, { "epoch": 0.6351996942480412, "grad_norm": 1.3125, "learning_rate": 5.97654868399413e-05, "loss": 1.3441, "step": 4155 }, { "epoch": 0.635352570227403, "grad_norm": 1.375, "learning_rate": 5.976139995667489e-05, "loss": 1.3789, "step": 4156 }, { "epoch": 0.6355054462067647, "grad_norm": 1.265625, "learning_rate": 5.975731315360039e-05, "loss": 1.312, "step": 4157 }, { "epoch": 0.6356583221861265, "grad_norm": 1.4921875, "learning_rate": 5.9753226430725897e-05, "loss": 1.609, "step": 4158 }, { "epoch": 0.6358111981654883, "grad_norm": 1.6640625, "learning_rate": 5.974913978805961e-05, "loss": 1.4467, "step": 4159 }, { "epoch": 0.63596407414485, "grad_norm": 1.5, "learning_rate": 5.9745053225609614e-05, "loss": 1.5437, "step": 4160 }, { "epoch": 0.6361169501242118, "grad_norm": 1.453125, "learning_rate": 5.9740966743384144e-05, "loss": 1.2433, "step": 4161 }, { "epoch": 0.6362698261035735, "grad_norm": 1.3046875, "learning_rate": 5.973688034139129e-05, "loss": 1.327, "step": 4162 }, { "epoch": 0.6364227020829352, "grad_norm": 1.4296875, "learning_rate": 5.973279401963915e-05, "loss": 1.2368, "step": 4163 }, { "epoch": 0.6365755780622969, "grad_norm": 1.5078125, "learning_rate": 5.9728707778135953e-05, "loss": 1.4383, "step": 4164 }, { "epoch": 0.6367284540416587, "grad_norm": 1.5, "learning_rate": 5.9724621616889764e-05, "loss": 1.6541, "step": 4165 }, { "epoch": 0.6368813300210204, "grad_norm": 1.3359375, "learning_rate": 5.9720535535908795e-05, "loss": 1.3641, "step": 4166 }, { "epoch": 0.6370342060003822, "grad_norm": 1.296875, "learning_rate": 5.9716449535201146e-05, "loss": 1.1848, "step": 4167 }, { "epoch": 0.6371870819797439, "grad_norm": 1.390625, "learning_rate": 5.9712363614775e-05, "loss": 1.327, "step": 4168 }, { "epoch": 0.6373399579591057, "grad_norm": 1.453125, "learning_rate": 5.970827777463847e-05, "loss": 1.4513, "step": 4169 }, { "epoch": 0.6374928339384675, "grad_norm": 1.4296875, "learning_rate": 5.9704192014799666e-05, "loss": 1.5024, "step": 4170 }, { "epoch": 0.6376457099178292, "grad_norm": 1.265625, "learning_rate": 5.9700106335266805e-05, "loss": 1.2762, "step": 4171 }, { "epoch": 0.6377985858971909, "grad_norm": 1.328125, "learning_rate": 5.969602073604796e-05, "loss": 1.5364, "step": 4172 }, { "epoch": 0.6379514618765526, "grad_norm": 1.4140625, "learning_rate": 5.969193521715135e-05, "loss": 1.4426, "step": 4173 }, { "epoch": 0.6381043378559144, "grad_norm": 1.328125, "learning_rate": 5.968784977858504e-05, "loss": 1.4436, "step": 4174 }, { "epoch": 0.6382572138352761, "grad_norm": 1.4453125, "learning_rate": 5.968376442035721e-05, "loss": 1.4863, "step": 4175 }, { "epoch": 0.6384100898146379, "grad_norm": 1.4375, "learning_rate": 5.9679679142475997e-05, "loss": 1.5194, "step": 4176 }, { "epoch": 0.6385629657939996, "grad_norm": 1.4453125, "learning_rate": 5.9675593944949525e-05, "loss": 1.2618, "step": 4177 }, { "epoch": 0.6387158417733614, "grad_norm": 1.2734375, "learning_rate": 5.9671508827785995e-05, "loss": 1.3181, "step": 4178 }, { "epoch": 0.6388687177527231, "grad_norm": 1.3828125, "learning_rate": 5.966742379099346e-05, "loss": 1.3703, "step": 4179 }, { "epoch": 0.6390215937320849, "grad_norm": 1.3359375, "learning_rate": 5.966333883458013e-05, "loss": 1.4947, "step": 4180 }, { "epoch": 0.6391744697114465, "grad_norm": 1.40625, "learning_rate": 5.9659253958554096e-05, "loss": 1.4011, "step": 4181 }, { "epoch": 0.6393273456908083, "grad_norm": 1.5078125, "learning_rate": 5.9655169162923555e-05, "loss": 1.3164, "step": 4182 }, { "epoch": 0.63948022167017, "grad_norm": 1.4296875, "learning_rate": 5.965108444769663e-05, "loss": 1.4282, "step": 4183 }, { "epoch": 0.6396330976495318, "grad_norm": 1.3046875, "learning_rate": 5.964699981288141e-05, "loss": 1.3328, "step": 4184 }, { "epoch": 0.6397859736288936, "grad_norm": 1.5, "learning_rate": 5.964291525848609e-05, "loss": 1.3792, "step": 4185 }, { "epoch": 0.6399388496082553, "grad_norm": 1.5, "learning_rate": 5.963883078451878e-05, "loss": 1.3503, "step": 4186 }, { "epoch": 0.6400917255876171, "grad_norm": 1.5078125, "learning_rate": 5.963474639098765e-05, "loss": 1.5502, "step": 4187 }, { "epoch": 0.6402446015669788, "grad_norm": 1.3984375, "learning_rate": 5.963066207790082e-05, "loss": 1.5576, "step": 4188 }, { "epoch": 0.6403974775463406, "grad_norm": 1.5, "learning_rate": 5.962657784526646e-05, "loss": 1.422, "step": 4189 }, { "epoch": 0.6405503535257022, "grad_norm": 1.1875, "learning_rate": 5.9622493693092666e-05, "loss": 1.327, "step": 4190 }, { "epoch": 0.640703229505064, "grad_norm": 1.3203125, "learning_rate": 5.9618409621387574e-05, "loss": 1.3528, "step": 4191 }, { "epoch": 0.6408561054844257, "grad_norm": 1.203125, "learning_rate": 5.961432563015938e-05, "loss": 1.2262, "step": 4192 }, { "epoch": 0.6410089814637875, "grad_norm": 1.3984375, "learning_rate": 5.961024171941614e-05, "loss": 1.4469, "step": 4193 }, { "epoch": 0.6411618574431492, "grad_norm": 1.4765625, "learning_rate": 5.960615788916608e-05, "loss": 1.4705, "step": 4194 }, { "epoch": 0.641314733422511, "grad_norm": 1.34375, "learning_rate": 5.96020741394173e-05, "loss": 1.2829, "step": 4195 }, { "epoch": 0.6414676094018728, "grad_norm": 1.3046875, "learning_rate": 5.95979904701779e-05, "loss": 0.9993, "step": 4196 }, { "epoch": 0.6416204853812345, "grad_norm": 1.359375, "learning_rate": 5.95939068814561e-05, "loss": 1.4291, "step": 4197 }, { "epoch": 0.6417733613605963, "grad_norm": 1.5, "learning_rate": 5.9589823373259935e-05, "loss": 1.694, "step": 4198 }, { "epoch": 0.6419262373399579, "grad_norm": 1.234375, "learning_rate": 5.958573994559766e-05, "loss": 1.329, "step": 4199 }, { "epoch": 0.6420791133193197, "grad_norm": 1.4453125, "learning_rate": 5.958165659847732e-05, "loss": 1.6727, "step": 4200 }, { "epoch": 0.6422319892986814, "grad_norm": 1.34375, "learning_rate": 5.957757333190711e-05, "loss": 1.528, "step": 4201 }, { "epoch": 0.6423848652780432, "grad_norm": 1.265625, "learning_rate": 5.957349014589516e-05, "loss": 1.3244, "step": 4202 }, { "epoch": 0.6425377412574049, "grad_norm": 1.5, "learning_rate": 5.9569407040449555e-05, "loss": 1.4037, "step": 4203 }, { "epoch": 0.6426906172367667, "grad_norm": 1.1796875, "learning_rate": 5.9565324015578484e-05, "loss": 1.0865, "step": 4204 }, { "epoch": 0.6428434932161284, "grad_norm": 1.4296875, "learning_rate": 5.9561241071290054e-05, "loss": 1.4521, "step": 4205 }, { "epoch": 0.6429963691954902, "grad_norm": 1.4140625, "learning_rate": 5.9557158207592445e-05, "loss": 1.18, "step": 4206 }, { "epoch": 0.643149245174852, "grad_norm": 1.34375, "learning_rate": 5.955307542449373e-05, "loss": 1.3011, "step": 4207 }, { "epoch": 0.6433021211542136, "grad_norm": 1.546875, "learning_rate": 5.954899272200213e-05, "loss": 1.484, "step": 4208 }, { "epoch": 0.6434549971335753, "grad_norm": 1.390625, "learning_rate": 5.954491010012572e-05, "loss": 1.4496, "step": 4209 }, { "epoch": 0.6436078731129371, "grad_norm": 1.34375, "learning_rate": 5.954082755887263e-05, "loss": 1.1915, "step": 4210 }, { "epoch": 0.6437607490922989, "grad_norm": 1.4453125, "learning_rate": 5.953674509825102e-05, "loss": 1.713, "step": 4211 }, { "epoch": 0.6439136250716606, "grad_norm": 1.3828125, "learning_rate": 5.953266271826901e-05, "loss": 1.3459, "step": 4212 }, { "epoch": 0.6440665010510224, "grad_norm": 1.3359375, "learning_rate": 5.952858041893479e-05, "loss": 1.3017, "step": 4213 }, { "epoch": 0.6442193770303841, "grad_norm": 1.2421875, "learning_rate": 5.952449820025641e-05, "loss": 1.2664, "step": 4214 }, { "epoch": 0.6443722530097459, "grad_norm": 1.2890625, "learning_rate": 5.952041606224208e-05, "loss": 1.3154, "step": 4215 }, { "epoch": 0.6445251289891076, "grad_norm": 1.5625, "learning_rate": 5.951633400489989e-05, "loss": 1.6162, "step": 4216 }, { "epoch": 0.6446780049684693, "grad_norm": 1.5078125, "learning_rate": 5.951225202823798e-05, "loss": 1.5597, "step": 4217 }, { "epoch": 0.644830880947831, "grad_norm": 1.4609375, "learning_rate": 5.950817013226453e-05, "loss": 1.1965, "step": 4218 }, { "epoch": 0.6449837569271928, "grad_norm": 1.3515625, "learning_rate": 5.950408831698759e-05, "loss": 1.1288, "step": 4219 }, { "epoch": 0.6451366329065545, "grad_norm": 1.4765625, "learning_rate": 5.950000658241538e-05, "loss": 1.4928, "step": 4220 }, { "epoch": 0.6452895088859163, "grad_norm": 1.3984375, "learning_rate": 5.9495924928555965e-05, "loss": 1.4873, "step": 4221 }, { "epoch": 0.645442384865278, "grad_norm": 1.2578125, "learning_rate": 5.9491843355417555e-05, "loss": 1.1042, "step": 4222 }, { "epoch": 0.6455952608446398, "grad_norm": 1.34375, "learning_rate": 5.948776186300824e-05, "loss": 1.2457, "step": 4223 }, { "epoch": 0.6457481368240016, "grad_norm": 1.421875, "learning_rate": 5.948368045133612e-05, "loss": 1.539, "step": 4224 }, { "epoch": 0.6459010128033633, "grad_norm": 1.421875, "learning_rate": 5.947959912040939e-05, "loss": 1.6344, "step": 4225 }, { "epoch": 0.646053888782725, "grad_norm": 1.5625, "learning_rate": 5.947551787023613e-05, "loss": 1.3432, "step": 4226 }, { "epoch": 0.6462067647620867, "grad_norm": 1.4140625, "learning_rate": 5.947143670082454e-05, "loss": 1.4261, "step": 4227 }, { "epoch": 0.6463596407414485, "grad_norm": 1.359375, "learning_rate": 5.946735561218273e-05, "loss": 1.4241, "step": 4228 }, { "epoch": 0.6465125167208102, "grad_norm": 1.2890625, "learning_rate": 5.9463274604318766e-05, "loss": 1.1842, "step": 4229 }, { "epoch": 0.646665392700172, "grad_norm": 1.390625, "learning_rate": 5.9459193677240865e-05, "loss": 1.3574, "step": 4230 }, { "epoch": 0.6468182686795337, "grad_norm": 1.3359375, "learning_rate": 5.945511283095711e-05, "loss": 1.2765, "step": 4231 }, { "epoch": 0.6469711446588955, "grad_norm": 1.5078125, "learning_rate": 5.9451032065475685e-05, "loss": 1.5222, "step": 4232 }, { "epoch": 0.6471240206382572, "grad_norm": 1.3125, "learning_rate": 5.944695138080463e-05, "loss": 1.1934, "step": 4233 }, { "epoch": 0.647276896617619, "grad_norm": 1.6015625, "learning_rate": 5.944287077695221e-05, "loss": 1.5523, "step": 4234 }, { "epoch": 0.6474297725969806, "grad_norm": 1.34375, "learning_rate": 5.943879025392645e-05, "loss": 1.4177, "step": 4235 }, { "epoch": 0.6475826485763424, "grad_norm": 1.4296875, "learning_rate": 5.94347098117355e-05, "loss": 1.608, "step": 4236 }, { "epoch": 0.6477355245557042, "grad_norm": 1.515625, "learning_rate": 5.9430629450387556e-05, "loss": 1.6417, "step": 4237 }, { "epoch": 0.6478884005350659, "grad_norm": 1.421875, "learning_rate": 5.942654916989063e-05, "loss": 1.3606, "step": 4238 }, { "epoch": 0.6480412765144277, "grad_norm": 1.3359375, "learning_rate": 5.942246897025301e-05, "loss": 1.1486, "step": 4239 }, { "epoch": 0.6481941524937894, "grad_norm": 1.484375, "learning_rate": 5.941838885148269e-05, "loss": 1.2818, "step": 4240 }, { "epoch": 0.6483470284731512, "grad_norm": 1.3046875, "learning_rate": 5.941430881358787e-05, "loss": 1.4263, "step": 4241 }, { "epoch": 0.6484999044525129, "grad_norm": 1.4765625, "learning_rate": 5.94102288565767e-05, "loss": 1.4444, "step": 4242 }, { "epoch": 0.6486527804318747, "grad_norm": 1.5, "learning_rate": 5.940614898045722e-05, "loss": 1.2313, "step": 4243 }, { "epoch": 0.6488056564112363, "grad_norm": 1.46875, "learning_rate": 5.940206918523765e-05, "loss": 1.5187, "step": 4244 }, { "epoch": 0.6489585323905981, "grad_norm": 1.328125, "learning_rate": 5.939798947092607e-05, "loss": 1.239, "step": 4245 }, { "epoch": 0.6491114083699598, "grad_norm": 1.2890625, "learning_rate": 5.939390983753064e-05, "loss": 1.151, "step": 4246 }, { "epoch": 0.6492642843493216, "grad_norm": 1.328125, "learning_rate": 5.9389830285059466e-05, "loss": 1.5471, "step": 4247 }, { "epoch": 0.6494171603286834, "grad_norm": 1.40625, "learning_rate": 5.938575081352073e-05, "loss": 1.4026, "step": 4248 }, { "epoch": 0.6495700363080451, "grad_norm": 1.4609375, "learning_rate": 5.9381671422922515e-05, "loss": 1.2434, "step": 4249 }, { "epoch": 0.6497229122874069, "grad_norm": 1.3671875, "learning_rate": 5.937759211327293e-05, "loss": 1.4945, "step": 4250 }, { "epoch": 0.6498757882667686, "grad_norm": 1.3359375, "learning_rate": 5.937351288458015e-05, "loss": 1.33, "step": 4251 }, { "epoch": 0.6500286642461304, "grad_norm": 1.2890625, "learning_rate": 5.9369433736852275e-05, "loss": 1.224, "step": 4252 }, { "epoch": 0.650181540225492, "grad_norm": 1.359375, "learning_rate": 5.9365354670097495e-05, "loss": 1.5206, "step": 4253 }, { "epoch": 0.6503344162048538, "grad_norm": 1.3515625, "learning_rate": 5.9361275684323836e-05, "loss": 1.3282, "step": 4254 }, { "epoch": 0.6504872921842155, "grad_norm": 1.2890625, "learning_rate": 5.935719677953952e-05, "loss": 1.1584, "step": 4255 }, { "epoch": 0.6506401681635773, "grad_norm": 1.40625, "learning_rate": 5.935311795575263e-05, "loss": 1.4152, "step": 4256 }, { "epoch": 0.650793044142939, "grad_norm": 1.2578125, "learning_rate": 5.934903921297128e-05, "loss": 1.3302, "step": 4257 }, { "epoch": 0.6509459201223008, "grad_norm": 1.40625, "learning_rate": 5.9344960551203674e-05, "loss": 1.4158, "step": 4258 }, { "epoch": 0.6510987961016625, "grad_norm": 1.34375, "learning_rate": 5.934088197045785e-05, "loss": 1.3953, "step": 4259 }, { "epoch": 0.6512516720810243, "grad_norm": 1.234375, "learning_rate": 5.9336803470741995e-05, "loss": 1.3405, "step": 4260 }, { "epoch": 0.6514045480603861, "grad_norm": 1.328125, "learning_rate": 5.933272505206421e-05, "loss": 1.607, "step": 4261 }, { "epoch": 0.6515574240397477, "grad_norm": 1.34375, "learning_rate": 5.9328646714432615e-05, "loss": 1.3443, "step": 4262 }, { "epoch": 0.6517103000191095, "grad_norm": 1.3671875, "learning_rate": 5.932456845785539e-05, "loss": 1.5838, "step": 4263 }, { "epoch": 0.6518631759984712, "grad_norm": 1.296875, "learning_rate": 5.932049028234059e-05, "loss": 1.2794, "step": 4264 }, { "epoch": 0.652016051977833, "grad_norm": 1.375, "learning_rate": 5.931641218789638e-05, "loss": 1.164, "step": 4265 }, { "epoch": 0.6521689279571947, "grad_norm": 1.359375, "learning_rate": 5.931233417453089e-05, "loss": 1.5332, "step": 4266 }, { "epoch": 0.6523218039365565, "grad_norm": 1.3671875, "learning_rate": 5.930825624225226e-05, "loss": 1.649, "step": 4267 }, { "epoch": 0.6524746799159182, "grad_norm": 1.3125, "learning_rate": 5.930417839106861e-05, "loss": 1.2181, "step": 4268 }, { "epoch": 0.65262755589528, "grad_norm": 1.4765625, "learning_rate": 5.930010062098802e-05, "loss": 1.5903, "step": 4269 }, { "epoch": 0.6527804318746417, "grad_norm": 1.4375, "learning_rate": 5.929602293201868e-05, "loss": 1.4592, "step": 4270 }, { "epoch": 0.6529333078540034, "grad_norm": 1.296875, "learning_rate": 5.929194532416866e-05, "loss": 1.4243, "step": 4271 }, { "epoch": 0.6530861838333651, "grad_norm": 1.5, "learning_rate": 5.928786779744614e-05, "loss": 1.395, "step": 4272 }, { "epoch": 0.6532390598127269, "grad_norm": 1.375, "learning_rate": 5.92837903518592e-05, "loss": 1.4568, "step": 4273 }, { "epoch": 0.6533919357920887, "grad_norm": 1.28125, "learning_rate": 5.927971298741603e-05, "loss": 1.4682, "step": 4274 }, { "epoch": 0.6535448117714504, "grad_norm": 1.4140625, "learning_rate": 5.927563570412469e-05, "loss": 1.2146, "step": 4275 }, { "epoch": 0.6536976877508122, "grad_norm": 1.34375, "learning_rate": 5.9271558501993306e-05, "loss": 1.3698, "step": 4276 }, { "epoch": 0.6538505637301739, "grad_norm": 1.421875, "learning_rate": 5.9267481381030076e-05, "loss": 1.4603, "step": 4277 }, { "epoch": 0.6540034397095357, "grad_norm": 1.4765625, "learning_rate": 5.926340434124302e-05, "loss": 1.5235, "step": 4278 }, { "epoch": 0.6541563156888974, "grad_norm": 1.359375, "learning_rate": 5.9259327382640375e-05, "loss": 1.3263, "step": 4279 }, { "epoch": 0.6543091916682591, "grad_norm": 1.4140625, "learning_rate": 5.9255250505230154e-05, "loss": 1.4016, "step": 4280 }, { "epoch": 0.6544620676476208, "grad_norm": 1.328125, "learning_rate": 5.925117370902058e-05, "loss": 1.1946, "step": 4281 }, { "epoch": 0.6546149436269826, "grad_norm": 1.34375, "learning_rate": 5.9247096994019747e-05, "loss": 1.2739, "step": 4282 }, { "epoch": 0.6547678196063443, "grad_norm": 1.53125, "learning_rate": 5.924302036023572e-05, "loss": 1.3939, "step": 4283 }, { "epoch": 0.6549206955857061, "grad_norm": 1.6171875, "learning_rate": 5.923894380767672e-05, "loss": 1.6393, "step": 4284 }, { "epoch": 0.6550735715650678, "grad_norm": 1.3125, "learning_rate": 5.9234867336350774e-05, "loss": 1.2694, "step": 4285 }, { "epoch": 0.6552264475444296, "grad_norm": 1.375, "learning_rate": 5.923079094626609e-05, "loss": 1.379, "step": 4286 }, { "epoch": 0.6553793235237914, "grad_norm": 1.3359375, "learning_rate": 5.922671463743077e-05, "loss": 1.5188, "step": 4287 }, { "epoch": 0.6555321995031531, "grad_norm": 1.296875, "learning_rate": 5.922263840985289e-05, "loss": 1.2551, "step": 4288 }, { "epoch": 0.6556850754825148, "grad_norm": 1.296875, "learning_rate": 5.9218562263540626e-05, "loss": 1.5643, "step": 4289 }, { "epoch": 0.6558379514618765, "grad_norm": 1.3125, "learning_rate": 5.9214486198502065e-05, "loss": 1.2876, "step": 4290 }, { "epoch": 0.6559908274412383, "grad_norm": 1.296875, "learning_rate": 5.921041021474536e-05, "loss": 1.2428, "step": 4291 }, { "epoch": 0.6561437034206, "grad_norm": 1.265625, "learning_rate": 5.9206334312278614e-05, "loss": 1.1795, "step": 4292 }, { "epoch": 0.6562965793999618, "grad_norm": 1.5234375, "learning_rate": 5.920225849110999e-05, "loss": 1.6569, "step": 4293 }, { "epoch": 0.6564494553793235, "grad_norm": 1.3671875, "learning_rate": 5.919818275124758e-05, "loss": 1.4506, "step": 4294 }, { "epoch": 0.6566023313586853, "grad_norm": 1.3671875, "learning_rate": 5.919410709269946e-05, "loss": 1.389, "step": 4295 }, { "epoch": 0.656755207338047, "grad_norm": 1.3359375, "learning_rate": 5.919003151547383e-05, "loss": 1.5066, "step": 4296 }, { "epoch": 0.6569080833174088, "grad_norm": 1.359375, "learning_rate": 5.918595601957876e-05, "loss": 1.4289, "step": 4297 }, { "epoch": 0.6570609592967704, "grad_norm": 1.46875, "learning_rate": 5.9181880605022436e-05, "loss": 1.5475, "step": 4298 }, { "epoch": 0.6572138352761322, "grad_norm": 1.3046875, "learning_rate": 5.9177805271812895e-05, "loss": 1.2151, "step": 4299 }, { "epoch": 0.657366711255494, "grad_norm": 1.40625, "learning_rate": 5.917373001995832e-05, "loss": 1.3784, "step": 4300 }, { "epoch": 0.6575195872348557, "grad_norm": 1.53125, "learning_rate": 5.9169654849466816e-05, "loss": 1.4208, "step": 4301 }, { "epoch": 0.6576724632142175, "grad_norm": 1.3203125, "learning_rate": 5.916557976034647e-05, "loss": 1.4554, "step": 4302 }, { "epoch": 0.6578253391935792, "grad_norm": 1.6953125, "learning_rate": 5.916150475260549e-05, "loss": 1.5372, "step": 4303 }, { "epoch": 0.657978215172941, "grad_norm": 1.3125, "learning_rate": 5.9157429826251885e-05, "loss": 1.2948, "step": 4304 }, { "epoch": 0.6581310911523027, "grad_norm": 1.40625, "learning_rate": 5.9153354981293866e-05, "loss": 1.3856, "step": 4305 }, { "epoch": 0.6582839671316645, "grad_norm": 1.328125, "learning_rate": 5.91492802177395e-05, "loss": 1.2492, "step": 4306 }, { "epoch": 0.6584368431110261, "grad_norm": 1.234375, "learning_rate": 5.914520553559695e-05, "loss": 1.1786, "step": 4307 }, { "epoch": 0.6585897190903879, "grad_norm": 1.46875, "learning_rate": 5.914113093487432e-05, "loss": 1.6649, "step": 4308 }, { "epoch": 0.6587425950697496, "grad_norm": 1.390625, "learning_rate": 5.913705641557971e-05, "loss": 1.1951, "step": 4309 }, { "epoch": 0.6588954710491114, "grad_norm": 1.390625, "learning_rate": 5.913298197772126e-05, "loss": 1.2305, "step": 4310 }, { "epoch": 0.6590483470284731, "grad_norm": 1.359375, "learning_rate": 5.912890762130707e-05, "loss": 1.2735, "step": 4311 }, { "epoch": 0.6592012230078349, "grad_norm": 1.46875, "learning_rate": 5.9124833346345284e-05, "loss": 1.2256, "step": 4312 }, { "epoch": 0.6593540989871967, "grad_norm": 1.8203125, "learning_rate": 5.9120759152844005e-05, "loss": 1.4538, "step": 4313 }, { "epoch": 0.6595069749665584, "grad_norm": 1.3046875, "learning_rate": 5.9116685040811404e-05, "loss": 1.3114, "step": 4314 }, { "epoch": 0.6596598509459202, "grad_norm": 1.3125, "learning_rate": 5.911261101025553e-05, "loss": 1.1911, "step": 4315 }, { "epoch": 0.6598127269252818, "grad_norm": 1.5234375, "learning_rate": 5.910853706118451e-05, "loss": 1.4293, "step": 4316 }, { "epoch": 0.6599656029046436, "grad_norm": 1.4765625, "learning_rate": 5.910446319360652e-05, "loss": 1.5836, "step": 4317 }, { "epoch": 0.6601184788840053, "grad_norm": 1.21875, "learning_rate": 5.910038940752958e-05, "loss": 1.2143, "step": 4318 }, { "epoch": 0.6602713548633671, "grad_norm": 1.4375, "learning_rate": 5.9096315702961926e-05, "loss": 1.5225, "step": 4319 }, { "epoch": 0.6604242308427288, "grad_norm": 1.421875, "learning_rate": 5.909224207991162e-05, "loss": 1.3923, "step": 4320 }, { "epoch": 0.6605771068220906, "grad_norm": 1.25, "learning_rate": 5.9088168538386745e-05, "loss": 1.3288, "step": 4321 }, { "epoch": 0.6607299828014523, "grad_norm": 1.5234375, "learning_rate": 5.908409507839549e-05, "loss": 1.3974, "step": 4322 }, { "epoch": 0.6608828587808141, "grad_norm": 1.4375, "learning_rate": 5.908002169994589e-05, "loss": 1.5048, "step": 4323 }, { "epoch": 0.6610357347601759, "grad_norm": 1.3984375, "learning_rate": 5.907594840304615e-05, "loss": 1.3201, "step": 4324 }, { "epoch": 0.6611886107395375, "grad_norm": 1.34375, "learning_rate": 5.9071875187704315e-05, "loss": 1.3743, "step": 4325 }, { "epoch": 0.6613414867188993, "grad_norm": 1.3828125, "learning_rate": 5.9067802053928564e-05, "loss": 1.3613, "step": 4326 }, { "epoch": 0.661494362698261, "grad_norm": 1.3671875, "learning_rate": 5.9063729001727006e-05, "loss": 1.5356, "step": 4327 }, { "epoch": 0.6616472386776228, "grad_norm": 1.390625, "learning_rate": 5.9059656031107704e-05, "loss": 1.4798, "step": 4328 }, { "epoch": 0.6618001146569845, "grad_norm": 1.359375, "learning_rate": 5.905558314207882e-05, "loss": 1.2379, "step": 4329 }, { "epoch": 0.6619529906363463, "grad_norm": 1.453125, "learning_rate": 5.905151033464844e-05, "loss": 1.3942, "step": 4330 }, { "epoch": 0.662105866615708, "grad_norm": 1.40625, "learning_rate": 5.904743760882473e-05, "loss": 1.3715, "step": 4331 }, { "epoch": 0.6622587425950698, "grad_norm": 1.25, "learning_rate": 5.9043364964615756e-05, "loss": 1.2212, "step": 4332 }, { "epoch": 0.6624116185744315, "grad_norm": 1.40625, "learning_rate": 5.903929240202968e-05, "loss": 1.2346, "step": 4333 }, { "epoch": 0.6625644945537932, "grad_norm": 1.296875, "learning_rate": 5.903521992107459e-05, "loss": 1.3116, "step": 4334 }, { "epoch": 0.6627173705331549, "grad_norm": 1.3984375, "learning_rate": 5.903114752175858e-05, "loss": 1.4903, "step": 4335 }, { "epoch": 0.6628702465125167, "grad_norm": 1.234375, "learning_rate": 5.902707520408981e-05, "loss": 1.1693, "step": 4336 }, { "epoch": 0.6630231224918784, "grad_norm": 1.375, "learning_rate": 5.902300296807637e-05, "loss": 1.6775, "step": 4337 }, { "epoch": 0.6631759984712402, "grad_norm": 1.3828125, "learning_rate": 5.901893081372641e-05, "loss": 1.5198, "step": 4338 }, { "epoch": 0.663328874450602, "grad_norm": 1.375, "learning_rate": 5.9014858741048e-05, "loss": 1.4886, "step": 4339 }, { "epoch": 0.6634817504299637, "grad_norm": 1.453125, "learning_rate": 5.901078675004928e-05, "loss": 1.6331, "step": 4340 }, { "epoch": 0.6636346264093255, "grad_norm": 1.265625, "learning_rate": 5.9006714840738344e-05, "loss": 1.167, "step": 4341 }, { "epoch": 0.6637875023886872, "grad_norm": 1.203125, "learning_rate": 5.9002643013123326e-05, "loss": 1.0502, "step": 4342 }, { "epoch": 0.6639403783680489, "grad_norm": 1.359375, "learning_rate": 5.899857126721237e-05, "loss": 1.3426, "step": 4343 }, { "epoch": 0.6640932543474106, "grad_norm": 1.3671875, "learning_rate": 5.899449960301351e-05, "loss": 1.3649, "step": 4344 }, { "epoch": 0.6642461303267724, "grad_norm": 1.359375, "learning_rate": 5.899042802053494e-05, "loss": 1.3749, "step": 4345 }, { "epoch": 0.6643990063061341, "grad_norm": 1.3828125, "learning_rate": 5.8986356519784703e-05, "loss": 1.411, "step": 4346 }, { "epoch": 0.6645518822854959, "grad_norm": 1.34375, "learning_rate": 5.898228510077099e-05, "loss": 1.5965, "step": 4347 }, { "epoch": 0.6647047582648576, "grad_norm": 1.28125, "learning_rate": 5.89782137635019e-05, "loss": 1.3641, "step": 4348 }, { "epoch": 0.6648576342442194, "grad_norm": 1.5078125, "learning_rate": 5.8974142507985476e-05, "loss": 1.4738, "step": 4349 }, { "epoch": 0.6650105102235812, "grad_norm": 1.3828125, "learning_rate": 5.8970071334229904e-05, "loss": 1.2744, "step": 4350 }, { "epoch": 0.6651633862029429, "grad_norm": 1.3046875, "learning_rate": 5.8966000242243255e-05, "loss": 1.5076, "step": 4351 }, { "epoch": 0.6653162621823046, "grad_norm": 1.5, "learning_rate": 5.896192923203367e-05, "loss": 1.3422, "step": 4352 }, { "epoch": 0.6654691381616663, "grad_norm": 1.6953125, "learning_rate": 5.8957858303609296e-05, "loss": 1.4778, "step": 4353 }, { "epoch": 0.6656220141410281, "grad_norm": 1.515625, "learning_rate": 5.895378745697815e-05, "loss": 1.4139, "step": 4354 }, { "epoch": 0.6657748901203898, "grad_norm": 1.2421875, "learning_rate": 5.894971669214842e-05, "loss": 1.2278, "step": 4355 }, { "epoch": 0.6659277660997516, "grad_norm": 1.4140625, "learning_rate": 5.894564600912816e-05, "loss": 1.5055, "step": 4356 }, { "epoch": 0.6660806420791133, "grad_norm": 1.453125, "learning_rate": 5.8941575407925575e-05, "loss": 1.3087, "step": 4357 }, { "epoch": 0.6662335180584751, "grad_norm": 1.40625, "learning_rate": 5.8937504888548655e-05, "loss": 1.4264, "step": 4358 }, { "epoch": 0.6663863940378368, "grad_norm": 1.421875, "learning_rate": 5.893343445100564e-05, "loss": 1.446, "step": 4359 }, { "epoch": 0.6665392700171986, "grad_norm": 1.4296875, "learning_rate": 5.892936409530456e-05, "loss": 1.4228, "step": 4360 }, { "epoch": 0.6666921459965602, "grad_norm": 1.3046875, "learning_rate": 5.892529382145353e-05, "loss": 1.2653, "step": 4361 }, { "epoch": 0.666845021975922, "grad_norm": 1.3671875, "learning_rate": 5.8921223629460734e-05, "loss": 1.3218, "step": 4362 }, { "epoch": 0.6669978979552837, "grad_norm": 1.3046875, "learning_rate": 5.891715351933416e-05, "loss": 1.272, "step": 4363 }, { "epoch": 0.6671507739346455, "grad_norm": 1.4375, "learning_rate": 5.891308349108203e-05, "loss": 1.4046, "step": 4364 }, { "epoch": 0.6673036499140073, "grad_norm": 1.546875, "learning_rate": 5.890901354471239e-05, "loss": 1.6567, "step": 4365 }, { "epoch": 0.667456525893369, "grad_norm": 1.4375, "learning_rate": 5.890494368023338e-05, "loss": 1.2856, "step": 4366 }, { "epoch": 0.6676094018727308, "grad_norm": 1.5859375, "learning_rate": 5.8900873897653144e-05, "loss": 1.357, "step": 4367 }, { "epoch": 0.6677622778520925, "grad_norm": 1.40625, "learning_rate": 5.889680419697969e-05, "loss": 1.5847, "step": 4368 }, { "epoch": 0.6679151538314543, "grad_norm": 1.328125, "learning_rate": 5.889273457822124e-05, "loss": 1.5925, "step": 4369 }, { "epoch": 0.6680680298108159, "grad_norm": 1.421875, "learning_rate": 5.8888665041385815e-05, "loss": 1.5357, "step": 4370 }, { "epoch": 0.6682209057901777, "grad_norm": 1.3828125, "learning_rate": 5.88845955864816e-05, "loss": 1.4821, "step": 4371 }, { "epoch": 0.6683737817695394, "grad_norm": 1.75, "learning_rate": 5.8880526213516654e-05, "loss": 1.5405, "step": 4372 }, { "epoch": 0.6685266577489012, "grad_norm": 1.3828125, "learning_rate": 5.8876456922499135e-05, "loss": 1.2119, "step": 4373 }, { "epoch": 0.6686795337282629, "grad_norm": 1.421875, "learning_rate": 5.88723877134371e-05, "loss": 1.2477, "step": 4374 }, { "epoch": 0.6688324097076247, "grad_norm": 1.3125, "learning_rate": 5.886831858633868e-05, "loss": 1.2085, "step": 4375 }, { "epoch": 0.6689852856869865, "grad_norm": 1.3828125, "learning_rate": 5.886424954121199e-05, "loss": 1.2494, "step": 4376 }, { "epoch": 0.6691381616663482, "grad_norm": 1.296875, "learning_rate": 5.886018057806512e-05, "loss": 1.4036, "step": 4377 }, { "epoch": 0.66929103764571, "grad_norm": 1.296875, "learning_rate": 5.8856111696906234e-05, "loss": 1.2841, "step": 4378 }, { "epoch": 0.6694439136250716, "grad_norm": 1.5078125, "learning_rate": 5.885204289774337e-05, "loss": 1.5532, "step": 4379 }, { "epoch": 0.6695967896044334, "grad_norm": 1.28125, "learning_rate": 5.8847974180584665e-05, "loss": 1.261, "step": 4380 }, { "epoch": 0.6697496655837951, "grad_norm": 1.359375, "learning_rate": 5.884390554543824e-05, "loss": 1.5714, "step": 4381 }, { "epoch": 0.6699025415631569, "grad_norm": 1.34375, "learning_rate": 5.8839836992312166e-05, "loss": 1.7471, "step": 4382 }, { "epoch": 0.6700554175425186, "grad_norm": 1.4453125, "learning_rate": 5.883576852121464e-05, "loss": 1.6226, "step": 4383 }, { "epoch": 0.6702082935218804, "grad_norm": 1.3515625, "learning_rate": 5.8831700132153655e-05, "loss": 1.3666, "step": 4384 }, { "epoch": 0.6703611695012421, "grad_norm": 1.4375, "learning_rate": 5.88276318251374e-05, "loss": 1.6139, "step": 4385 }, { "epoch": 0.6705140454806039, "grad_norm": 1.3515625, "learning_rate": 5.8823563600173945e-05, "loss": 1.2725, "step": 4386 }, { "epoch": 0.6706669214599656, "grad_norm": 1.4140625, "learning_rate": 5.881949545727139e-05, "loss": 1.5465, "step": 4387 }, { "epoch": 0.6708197974393273, "grad_norm": 1.3828125, "learning_rate": 5.881542739643791e-05, "loss": 1.424, "step": 4388 }, { "epoch": 0.670972673418689, "grad_norm": 1.3828125, "learning_rate": 5.881135941768152e-05, "loss": 1.1647, "step": 4389 }, { "epoch": 0.6711255493980508, "grad_norm": 1.3515625, "learning_rate": 5.880729152101039e-05, "loss": 1.4146, "step": 4390 }, { "epoch": 0.6712784253774126, "grad_norm": 1.2890625, "learning_rate": 5.880322370643259e-05, "loss": 1.2296, "step": 4391 }, { "epoch": 0.6714313013567743, "grad_norm": 1.3515625, "learning_rate": 5.8799155973956255e-05, "loss": 1.4004, "step": 4392 }, { "epoch": 0.6715841773361361, "grad_norm": 1.359375, "learning_rate": 5.8795088323589506e-05, "loss": 1.3632, "step": 4393 }, { "epoch": 0.6717370533154978, "grad_norm": 1.328125, "learning_rate": 5.879102075534038e-05, "loss": 1.1526, "step": 4394 }, { "epoch": 0.6718899292948596, "grad_norm": 1.5078125, "learning_rate": 5.878695326921705e-05, "loss": 1.5995, "step": 4395 }, { "epoch": 0.6720428052742213, "grad_norm": 1.3203125, "learning_rate": 5.8782885865227576e-05, "loss": 1.1981, "step": 4396 }, { "epoch": 0.672195681253583, "grad_norm": 1.2890625, "learning_rate": 5.877881854338012e-05, "loss": 1.259, "step": 4397 }, { "epoch": 0.6723485572329447, "grad_norm": 1.4140625, "learning_rate": 5.877475130368273e-05, "loss": 1.4051, "step": 4398 }, { "epoch": 0.6725014332123065, "grad_norm": 1.3671875, "learning_rate": 5.877068414614356e-05, "loss": 1.3773, "step": 4399 }, { "epoch": 0.6726543091916682, "grad_norm": 1.5859375, "learning_rate": 5.876661707077069e-05, "loss": 1.5108, "step": 4400 }, { "epoch": 0.67280718517103, "grad_norm": 1.4921875, "learning_rate": 5.876255007757221e-05, "loss": 1.5146, "step": 4401 }, { "epoch": 0.6729600611503918, "grad_norm": 1.34375, "learning_rate": 5.8758483166556266e-05, "loss": 1.3664, "step": 4402 }, { "epoch": 0.6731129371297535, "grad_norm": 1.4375, "learning_rate": 5.8754416337730886e-05, "loss": 1.4965, "step": 4403 }, { "epoch": 0.6732658131091153, "grad_norm": 1.4375, "learning_rate": 5.87503495911043e-05, "loss": 1.6631, "step": 4404 }, { "epoch": 0.673418689088477, "grad_norm": 1.3515625, "learning_rate": 5.874628292668449e-05, "loss": 1.3743, "step": 4405 }, { "epoch": 0.6735715650678387, "grad_norm": 1.40625, "learning_rate": 5.874221634447963e-05, "loss": 1.3008, "step": 4406 }, { "epoch": 0.6737244410472004, "grad_norm": 1.3984375, "learning_rate": 5.873814984449784e-05, "loss": 1.4063, "step": 4407 }, { "epoch": 0.6738773170265622, "grad_norm": 1.484375, "learning_rate": 5.8734083426747136e-05, "loss": 1.6209, "step": 4408 }, { "epoch": 0.6740301930059239, "grad_norm": 1.390625, "learning_rate": 5.87300170912357e-05, "loss": 1.5183, "step": 4409 }, { "epoch": 0.6741830689852857, "grad_norm": 1.4765625, "learning_rate": 5.872595083797159e-05, "loss": 1.4477, "step": 4410 }, { "epoch": 0.6743359449646474, "grad_norm": 1.390625, "learning_rate": 5.872188466696296e-05, "loss": 1.4948, "step": 4411 }, { "epoch": 0.6744888209440092, "grad_norm": 1.21875, "learning_rate": 5.87178185782179e-05, "loss": 1.2776, "step": 4412 }, { "epoch": 0.674641696923371, "grad_norm": 1.359375, "learning_rate": 5.871375257174445e-05, "loss": 1.6353, "step": 4413 }, { "epoch": 0.6747945729027327, "grad_norm": 1.328125, "learning_rate": 5.8709686647550786e-05, "loss": 1.2685, "step": 4414 }, { "epoch": 0.6749474488820943, "grad_norm": 1.421875, "learning_rate": 5.8705620805644977e-05, "loss": 1.6626, "step": 4415 }, { "epoch": 0.6751003248614561, "grad_norm": 1.2734375, "learning_rate": 5.870155504603515e-05, "loss": 1.3039, "step": 4416 }, { "epoch": 0.6752532008408179, "grad_norm": 1.4296875, "learning_rate": 5.8697489368729375e-05, "loss": 1.4826, "step": 4417 }, { "epoch": 0.6754060768201796, "grad_norm": 1.40625, "learning_rate": 5.869342377373581e-05, "loss": 1.3558, "step": 4418 }, { "epoch": 0.6755589527995414, "grad_norm": 1.3828125, "learning_rate": 5.86893582610625e-05, "loss": 1.3601, "step": 4419 }, { "epoch": 0.6757118287789031, "grad_norm": 1.234375, "learning_rate": 5.868529283071755e-05, "loss": 1.4586, "step": 4420 }, { "epoch": 0.6758647047582649, "grad_norm": 1.453125, "learning_rate": 5.86812274827091e-05, "loss": 1.6763, "step": 4421 }, { "epoch": 0.6760175807376266, "grad_norm": 1.4296875, "learning_rate": 5.8677162217045204e-05, "loss": 1.2338, "step": 4422 }, { "epoch": 0.6761704567169884, "grad_norm": 1.6328125, "learning_rate": 5.867309703373404e-05, "loss": 1.4173, "step": 4423 }, { "epoch": 0.67632333269635, "grad_norm": 1.375, "learning_rate": 5.866903193278362e-05, "loss": 1.5318, "step": 4424 }, { "epoch": 0.6764762086757118, "grad_norm": 1.3515625, "learning_rate": 5.866496691420209e-05, "loss": 1.2032, "step": 4425 }, { "epoch": 0.6766290846550735, "grad_norm": 1.640625, "learning_rate": 5.8660901977997565e-05, "loss": 1.5601, "step": 4426 }, { "epoch": 0.6767819606344353, "grad_norm": 1.4375, "learning_rate": 5.865683712417811e-05, "loss": 1.4931, "step": 4427 }, { "epoch": 0.676934836613797, "grad_norm": 1.3984375, "learning_rate": 5.865277235275187e-05, "loss": 1.4215, "step": 4428 }, { "epoch": 0.6770877125931588, "grad_norm": 1.6171875, "learning_rate": 5.86487076637269e-05, "loss": 1.5043, "step": 4429 }, { "epoch": 0.6772405885725206, "grad_norm": 1.3515625, "learning_rate": 5.864464305711133e-05, "loss": 1.4638, "step": 4430 }, { "epoch": 0.6773934645518823, "grad_norm": 1.4296875, "learning_rate": 5.8640578532913214e-05, "loss": 1.4758, "step": 4431 }, { "epoch": 0.6775463405312441, "grad_norm": 1.375, "learning_rate": 5.863651409114072e-05, "loss": 1.1539, "step": 4432 }, { "epoch": 0.6776992165106057, "grad_norm": 1.2578125, "learning_rate": 5.8632449731801954e-05, "loss": 1.3239, "step": 4433 }, { "epoch": 0.6778520924899675, "grad_norm": 1.4296875, "learning_rate": 5.862838545490492e-05, "loss": 1.5666, "step": 4434 }, { "epoch": 0.6780049684693292, "grad_norm": 1.421875, "learning_rate": 5.8624321260457804e-05, "loss": 1.4565, "step": 4435 }, { "epoch": 0.678157844448691, "grad_norm": 1.3359375, "learning_rate": 5.862025714846865e-05, "loss": 1.3558, "step": 4436 }, { "epoch": 0.6783107204280527, "grad_norm": 1.453125, "learning_rate": 5.8616193118945615e-05, "loss": 1.2791, "step": 4437 }, { "epoch": 0.6784635964074145, "grad_norm": 1.4609375, "learning_rate": 5.8612129171896744e-05, "loss": 1.4055, "step": 4438 }, { "epoch": 0.6786164723867762, "grad_norm": 1.4453125, "learning_rate": 5.860806530733021e-05, "loss": 1.348, "step": 4439 }, { "epoch": 0.678769348366138, "grad_norm": 1.359375, "learning_rate": 5.860400152525403e-05, "loss": 1.1407, "step": 4440 }, { "epoch": 0.6789222243454998, "grad_norm": 1.4296875, "learning_rate": 5.859993782567632e-05, "loss": 1.2793, "step": 4441 }, { "epoch": 0.6790751003248614, "grad_norm": 1.3828125, "learning_rate": 5.8595874208605236e-05, "loss": 1.417, "step": 4442 }, { "epoch": 0.6792279763042232, "grad_norm": 1.5703125, "learning_rate": 5.859181067404877e-05, "loss": 1.4486, "step": 4443 }, { "epoch": 0.6793808522835849, "grad_norm": 1.484375, "learning_rate": 5.858774722201515e-05, "loss": 1.6207, "step": 4444 }, { "epoch": 0.6795337282629467, "grad_norm": 1.3359375, "learning_rate": 5.858368385251238e-05, "loss": 1.4505, "step": 4445 }, { "epoch": 0.6796866042423084, "grad_norm": 1.1875, "learning_rate": 5.857962056554859e-05, "loss": 1.1711, "step": 4446 }, { "epoch": 0.6798394802216702, "grad_norm": 1.2421875, "learning_rate": 5.857555736113189e-05, "loss": 1.2701, "step": 4447 }, { "epoch": 0.6799923562010319, "grad_norm": 1.4921875, "learning_rate": 5.857149423927032e-05, "loss": 1.5546, "step": 4448 }, { "epoch": 0.6801452321803937, "grad_norm": 1.65625, "learning_rate": 5.856743119997208e-05, "loss": 1.4493, "step": 4449 }, { "epoch": 0.6802981081597554, "grad_norm": 1.328125, "learning_rate": 5.856336824324516e-05, "loss": 1.4647, "step": 4450 }, { "epoch": 0.6804509841391171, "grad_norm": 1.484375, "learning_rate": 5.8559305369097704e-05, "loss": 1.361, "step": 4451 }, { "epoch": 0.6806038601184788, "grad_norm": 1.484375, "learning_rate": 5.855524257753786e-05, "loss": 1.6977, "step": 4452 }, { "epoch": 0.6807567360978406, "grad_norm": 1.3359375, "learning_rate": 5.855117986857361e-05, "loss": 1.4587, "step": 4453 }, { "epoch": 0.6809096120772024, "grad_norm": 1.3984375, "learning_rate": 5.854711724221316e-05, "loss": 1.1699, "step": 4454 }, { "epoch": 0.6810624880565641, "grad_norm": 1.3671875, "learning_rate": 5.8543054698464526e-05, "loss": 1.4766, "step": 4455 }, { "epoch": 0.6812153640359259, "grad_norm": 1.5625, "learning_rate": 5.8538992237335855e-05, "loss": 1.5284, "step": 4456 }, { "epoch": 0.6813682400152876, "grad_norm": 1.3515625, "learning_rate": 5.853492985883522e-05, "loss": 1.47, "step": 4457 }, { "epoch": 0.6815211159946494, "grad_norm": 1.375, "learning_rate": 5.853086756297075e-05, "loss": 1.4241, "step": 4458 }, { "epoch": 0.6816739919740111, "grad_norm": 1.3359375, "learning_rate": 5.85268053497505e-05, "loss": 1.1578, "step": 4459 }, { "epoch": 0.6818268679533728, "grad_norm": 1.265625, "learning_rate": 5.852274321918256e-05, "loss": 1.2676, "step": 4460 }, { "epoch": 0.6819797439327345, "grad_norm": 1.3515625, "learning_rate": 5.8518681171275084e-05, "loss": 1.2928, "step": 4461 }, { "epoch": 0.6821326199120963, "grad_norm": 1.4609375, "learning_rate": 5.851461920603609e-05, "loss": 1.5313, "step": 4462 }, { "epoch": 0.682285495891458, "grad_norm": 1.390625, "learning_rate": 5.8510557323473755e-05, "loss": 1.4495, "step": 4463 }, { "epoch": 0.6824383718708198, "grad_norm": 1.375, "learning_rate": 5.850649552359609e-05, "loss": 1.4917, "step": 4464 }, { "epoch": 0.6825912478501815, "grad_norm": 1.3515625, "learning_rate": 5.850243380641126e-05, "loss": 1.5309, "step": 4465 }, { "epoch": 0.6827441238295433, "grad_norm": 1.3359375, "learning_rate": 5.849837217192733e-05, "loss": 1.1793, "step": 4466 }, { "epoch": 0.6828969998089051, "grad_norm": 1.4453125, "learning_rate": 5.849431062015236e-05, "loss": 1.4056, "step": 4467 }, { "epoch": 0.6830498757882668, "grad_norm": 1.2265625, "learning_rate": 5.8490249151094535e-05, "loss": 1.3504, "step": 4468 }, { "epoch": 0.6832027517676285, "grad_norm": 1.3828125, "learning_rate": 5.8486187764761844e-05, "loss": 1.5108, "step": 4469 }, { "epoch": 0.6833556277469902, "grad_norm": 1.2578125, "learning_rate": 5.8482126461162465e-05, "loss": 1.2062, "step": 4470 }, { "epoch": 0.683508503726352, "grad_norm": 1.515625, "learning_rate": 5.8478065240304436e-05, "loss": 1.2276, "step": 4471 }, { "epoch": 0.6836613797057137, "grad_norm": 1.4609375, "learning_rate": 5.847400410219588e-05, "loss": 1.4542, "step": 4472 }, { "epoch": 0.6838142556850755, "grad_norm": 1.4765625, "learning_rate": 5.846994304684491e-05, "loss": 1.4259, "step": 4473 }, { "epoch": 0.6839671316644372, "grad_norm": 1.40625, "learning_rate": 5.846588207425956e-05, "loss": 1.5274, "step": 4474 }, { "epoch": 0.684120007643799, "grad_norm": 1.28125, "learning_rate": 5.8461821184447964e-05, "loss": 1.2986, "step": 4475 }, { "epoch": 0.6842728836231607, "grad_norm": 1.5078125, "learning_rate": 5.845776037741818e-05, "loss": 1.6672, "step": 4476 }, { "epoch": 0.6844257596025225, "grad_norm": 1.2890625, "learning_rate": 5.8453699653178354e-05, "loss": 1.244, "step": 4477 }, { "epoch": 0.6845786355818841, "grad_norm": 1.421875, "learning_rate": 5.844963901173658e-05, "loss": 1.3231, "step": 4478 }, { "epoch": 0.6847315115612459, "grad_norm": 1.2578125, "learning_rate": 5.844557845310088e-05, "loss": 1.3505, "step": 4479 }, { "epoch": 0.6848843875406077, "grad_norm": 1.3046875, "learning_rate": 5.8441517977279394e-05, "loss": 1.2746, "step": 4480 }, { "epoch": 0.6850372635199694, "grad_norm": 1.3359375, "learning_rate": 5.8437457584280196e-05, "loss": 1.4834, "step": 4481 }, { "epoch": 0.6851901394993312, "grad_norm": 1.5, "learning_rate": 5.8433397274111436e-05, "loss": 1.3674, "step": 4482 }, { "epoch": 0.6853430154786929, "grad_norm": 1.53125, "learning_rate": 5.842933704678111e-05, "loss": 1.327, "step": 4483 }, { "epoch": 0.6854958914580547, "grad_norm": 1.3828125, "learning_rate": 5.84252769022974e-05, "loss": 1.3677, "step": 4484 }, { "epoch": 0.6856487674374164, "grad_norm": 1.3359375, "learning_rate": 5.842121684066835e-05, "loss": 1.0566, "step": 4485 }, { "epoch": 0.6858016434167782, "grad_norm": 1.390625, "learning_rate": 5.8417156861902035e-05, "loss": 1.4814, "step": 4486 }, { "epoch": 0.6859545193961398, "grad_norm": 1.484375, "learning_rate": 5.8413096966006606e-05, "loss": 1.5045, "step": 4487 }, { "epoch": 0.6861073953755016, "grad_norm": 1.4765625, "learning_rate": 5.8409037152990065e-05, "loss": 1.4726, "step": 4488 }, { "epoch": 0.6862602713548633, "grad_norm": 1.4453125, "learning_rate": 5.840497742286061e-05, "loss": 1.5544, "step": 4489 }, { "epoch": 0.6864131473342251, "grad_norm": 1.40625, "learning_rate": 5.840091777562624e-05, "loss": 1.5151, "step": 4490 }, { "epoch": 0.6865660233135868, "grad_norm": 1.375, "learning_rate": 5.83968582112951e-05, "loss": 1.4588, "step": 4491 }, { "epoch": 0.6867188992929486, "grad_norm": 1.296875, "learning_rate": 5.839279872987529e-05, "loss": 1.2952, "step": 4492 }, { "epoch": 0.6868717752723104, "grad_norm": 1.3203125, "learning_rate": 5.838873933137483e-05, "loss": 1.5104, "step": 4493 }, { "epoch": 0.6870246512516721, "grad_norm": 1.3359375, "learning_rate": 5.838468001580188e-05, "loss": 1.252, "step": 4494 }, { "epoch": 0.6871775272310339, "grad_norm": 1.453125, "learning_rate": 5.8380620783164464e-05, "loss": 1.1868, "step": 4495 }, { "epoch": 0.6873304032103955, "grad_norm": 1.5859375, "learning_rate": 5.837656163347078e-05, "loss": 1.5477, "step": 4496 }, { "epoch": 0.6874832791897573, "grad_norm": 1.34375, "learning_rate": 5.837250256672877e-05, "loss": 1.0777, "step": 4497 }, { "epoch": 0.687636155169119, "grad_norm": 1.359375, "learning_rate": 5.8368443582946654e-05, "loss": 1.4732, "step": 4498 }, { "epoch": 0.6877890311484808, "grad_norm": 1.5546875, "learning_rate": 5.83643846821325e-05, "loss": 1.6732, "step": 4499 }, { "epoch": 0.6879419071278425, "grad_norm": 1.34375, "learning_rate": 5.83603258642943e-05, "loss": 1.3239, "step": 4500 }, { "epoch": 0.6880947831072043, "grad_norm": 1.3984375, "learning_rate": 5.8356267129440226e-05, "loss": 1.4715, "step": 4501 }, { "epoch": 0.688247659086566, "grad_norm": 1.3828125, "learning_rate": 5.835220847757836e-05, "loss": 1.6425, "step": 4502 }, { "epoch": 0.6884005350659278, "grad_norm": 1.40625, "learning_rate": 5.8348149908716785e-05, "loss": 1.3861, "step": 4503 }, { "epoch": 0.6885534110452896, "grad_norm": 1.1796875, "learning_rate": 5.834409142286359e-05, "loss": 1.0159, "step": 4504 }, { "epoch": 0.6887062870246512, "grad_norm": 1.375, "learning_rate": 5.834003302002681e-05, "loss": 1.3425, "step": 4505 }, { "epoch": 0.688859163004013, "grad_norm": 1.390625, "learning_rate": 5.833597470021466e-05, "loss": 1.4724, "step": 4506 }, { "epoch": 0.6890120389833747, "grad_norm": 1.1796875, "learning_rate": 5.8331916463435077e-05, "loss": 1.1847, "step": 4507 }, { "epoch": 0.6891649149627365, "grad_norm": 1.5390625, "learning_rate": 5.832785830969625e-05, "loss": 1.6194, "step": 4508 }, { "epoch": 0.6893177909420982, "grad_norm": 1.25, "learning_rate": 5.8323800239006235e-05, "loss": 1.5363, "step": 4509 }, { "epoch": 0.68947066692146, "grad_norm": 1.3359375, "learning_rate": 5.831974225137313e-05, "loss": 1.5011, "step": 4510 }, { "epoch": 0.6896235429008217, "grad_norm": 1.21875, "learning_rate": 5.8315684346805014e-05, "loss": 1.1854, "step": 4511 }, { "epoch": 0.6897764188801835, "grad_norm": 1.375, "learning_rate": 5.831162652530997e-05, "loss": 1.2871, "step": 4512 }, { "epoch": 0.6899292948595452, "grad_norm": 1.3671875, "learning_rate": 5.830756878689609e-05, "loss": 1.4585, "step": 4513 }, { "epoch": 0.6900821708389069, "grad_norm": 1.40625, "learning_rate": 5.830351113157142e-05, "loss": 1.2996, "step": 4514 }, { "epoch": 0.6902350468182686, "grad_norm": 1.265625, "learning_rate": 5.829945355934416e-05, "loss": 1.5246, "step": 4515 }, { "epoch": 0.6903879227976304, "grad_norm": 1.3828125, "learning_rate": 5.829539607022225e-05, "loss": 1.7043, "step": 4516 }, { "epoch": 0.6905407987769921, "grad_norm": 1.3046875, "learning_rate": 5.829133866421389e-05, "loss": 1.2747, "step": 4517 }, { "epoch": 0.6906936747563539, "grad_norm": 1.375, "learning_rate": 5.828728134132713e-05, "loss": 1.2586, "step": 4518 }, { "epoch": 0.6908465507357157, "grad_norm": 1.3828125, "learning_rate": 5.8283224101570035e-05, "loss": 1.6475, "step": 4519 }, { "epoch": 0.6909994267150774, "grad_norm": 1.421875, "learning_rate": 5.8279166944950704e-05, "loss": 1.3717, "step": 4520 }, { "epoch": 0.6911523026944392, "grad_norm": 1.5078125, "learning_rate": 5.827510987147724e-05, "loss": 1.5533, "step": 4521 }, { "epoch": 0.6913051786738009, "grad_norm": 1.2890625, "learning_rate": 5.82710528811577e-05, "loss": 1.3316, "step": 4522 }, { "epoch": 0.6914580546531626, "grad_norm": 1.3515625, "learning_rate": 5.8266995974000146e-05, "loss": 1.4523, "step": 4523 }, { "epoch": 0.6916109306325243, "grad_norm": 1.2578125, "learning_rate": 5.8262939150012755e-05, "loss": 1.1943, "step": 4524 }, { "epoch": 0.6917638066118861, "grad_norm": 1.4453125, "learning_rate": 5.825888240920357e-05, "loss": 1.4078, "step": 4525 }, { "epoch": 0.6919166825912478, "grad_norm": 1.40625, "learning_rate": 5.82548257515806e-05, "loss": 1.4633, "step": 4526 }, { "epoch": 0.6920695585706096, "grad_norm": 1.75, "learning_rate": 5.825076917715202e-05, "loss": 1.3651, "step": 4527 }, { "epoch": 0.6922224345499713, "grad_norm": 1.3203125, "learning_rate": 5.82467126859259e-05, "loss": 1.4518, "step": 4528 }, { "epoch": 0.6923753105293331, "grad_norm": 1.4375, "learning_rate": 5.82426562779103e-05, "loss": 1.411, "step": 4529 }, { "epoch": 0.6925281865086949, "grad_norm": 1.2890625, "learning_rate": 5.823859995311327e-05, "loss": 1.2024, "step": 4530 }, { "epoch": 0.6926810624880566, "grad_norm": 1.4453125, "learning_rate": 5.8234543711543025e-05, "loss": 1.4387, "step": 4531 }, { "epoch": 0.6928339384674183, "grad_norm": 1.34375, "learning_rate": 5.8230487553207527e-05, "loss": 1.2042, "step": 4532 }, { "epoch": 0.69298681444678, "grad_norm": 1.578125, "learning_rate": 5.822643147811484e-05, "loss": 1.6756, "step": 4533 }, { "epoch": 0.6931396904261418, "grad_norm": 1.484375, "learning_rate": 5.822237548627316e-05, "loss": 1.5787, "step": 4534 }, { "epoch": 0.6932925664055035, "grad_norm": 1.3671875, "learning_rate": 5.8218319577690493e-05, "loss": 1.4435, "step": 4535 }, { "epoch": 0.6934454423848653, "grad_norm": 1.4375, "learning_rate": 5.821426375237495e-05, "loss": 1.3298, "step": 4536 }, { "epoch": 0.693598318364227, "grad_norm": 1.265625, "learning_rate": 5.821020801033461e-05, "loss": 1.1444, "step": 4537 }, { "epoch": 0.6937511943435888, "grad_norm": 1.515625, "learning_rate": 5.820615235157754e-05, "loss": 1.4941, "step": 4538 }, { "epoch": 0.6939040703229505, "grad_norm": 1.4765625, "learning_rate": 5.820209677611184e-05, "loss": 1.6403, "step": 4539 }, { "epoch": 0.6940569463023123, "grad_norm": 1.4375, "learning_rate": 5.819804128394555e-05, "loss": 1.3714, "step": 4540 }, { "epoch": 0.6942098222816739, "grad_norm": 1.796875, "learning_rate": 5.819398587508685e-05, "loss": 1.0353, "step": 4541 }, { "epoch": 0.6943626982610357, "grad_norm": 1.4296875, "learning_rate": 5.818993054954369e-05, "loss": 1.3947, "step": 4542 }, { "epoch": 0.6945155742403974, "grad_norm": 1.25, "learning_rate": 5.818587530732427e-05, "loss": 1.3046, "step": 4543 }, { "epoch": 0.6946684502197592, "grad_norm": 1.328125, "learning_rate": 5.818182014843665e-05, "loss": 1.3473, "step": 4544 }, { "epoch": 0.694821326199121, "grad_norm": 1.3828125, "learning_rate": 5.817776507288881e-05, "loss": 1.2935, "step": 4545 }, { "epoch": 0.6949742021784827, "grad_norm": 1.4765625, "learning_rate": 5.817371008068895e-05, "loss": 1.2613, "step": 4546 }, { "epoch": 0.6951270781578445, "grad_norm": 1.375, "learning_rate": 5.816965517184512e-05, "loss": 1.4996, "step": 4547 }, { "epoch": 0.6952799541372062, "grad_norm": 1.2265625, "learning_rate": 5.816560034636537e-05, "loss": 1.2761, "step": 4548 }, { "epoch": 0.695432830116568, "grad_norm": 1.53125, "learning_rate": 5.816154560425776e-05, "loss": 1.4888, "step": 4549 }, { "epoch": 0.6955857060959296, "grad_norm": 1.3984375, "learning_rate": 5.815749094553049e-05, "loss": 1.5115, "step": 4550 }, { "epoch": 0.6957385820752914, "grad_norm": 1.359375, "learning_rate": 5.8153436370191524e-05, "loss": 1.1965, "step": 4551 }, { "epoch": 0.6958914580546531, "grad_norm": 1.5234375, "learning_rate": 5.814938187824894e-05, "loss": 1.7104, "step": 4552 }, { "epoch": 0.6960443340340149, "grad_norm": 1.3046875, "learning_rate": 5.814532746971091e-05, "loss": 1.3155, "step": 4553 }, { "epoch": 0.6961972100133766, "grad_norm": 1.4609375, "learning_rate": 5.814127314458544e-05, "loss": 1.4326, "step": 4554 }, { "epoch": 0.6963500859927384, "grad_norm": 1.40625, "learning_rate": 5.813721890288064e-05, "loss": 1.3883, "step": 4555 }, { "epoch": 0.6965029619721002, "grad_norm": 1.34375, "learning_rate": 5.813316474460455e-05, "loss": 1.5165, "step": 4556 }, { "epoch": 0.6966558379514619, "grad_norm": 1.3515625, "learning_rate": 5.812911066976535e-05, "loss": 1.273, "step": 4557 }, { "epoch": 0.6968087139308237, "grad_norm": 1.4140625, "learning_rate": 5.812505667837102e-05, "loss": 1.5101, "step": 4558 }, { "epoch": 0.6969615899101853, "grad_norm": 1.515625, "learning_rate": 5.8121002770429625e-05, "loss": 1.3057, "step": 4559 }, { "epoch": 0.6971144658895471, "grad_norm": 1.390625, "learning_rate": 5.811694894594936e-05, "loss": 1.459, "step": 4560 }, { "epoch": 0.6972673418689088, "grad_norm": 1.34375, "learning_rate": 5.8112895204938155e-05, "loss": 1.3994, "step": 4561 }, { "epoch": 0.6974202178482706, "grad_norm": 1.3515625, "learning_rate": 5.810884154740421e-05, "loss": 1.3394, "step": 4562 }, { "epoch": 0.6975730938276323, "grad_norm": 1.359375, "learning_rate": 5.810478797335557e-05, "loss": 1.3375, "step": 4563 }, { "epoch": 0.6977259698069941, "grad_norm": 1.46875, "learning_rate": 5.8100734482800286e-05, "loss": 1.496, "step": 4564 }, { "epoch": 0.6978788457863558, "grad_norm": 1.390625, "learning_rate": 5.809668107574646e-05, "loss": 1.4744, "step": 4565 }, { "epoch": 0.6980317217657176, "grad_norm": 1.328125, "learning_rate": 5.809262775220217e-05, "loss": 1.3306, "step": 4566 }, { "epoch": 0.6981845977450793, "grad_norm": 1.296875, "learning_rate": 5.8088574512175474e-05, "loss": 1.2428, "step": 4567 }, { "epoch": 0.698337473724441, "grad_norm": 1.5625, "learning_rate": 5.808452135567444e-05, "loss": 1.7445, "step": 4568 }, { "epoch": 0.6984903497038027, "grad_norm": 1.4453125, "learning_rate": 5.808046828270719e-05, "loss": 1.5464, "step": 4569 }, { "epoch": 0.6986432256831645, "grad_norm": 1.3203125, "learning_rate": 5.807641529328183e-05, "loss": 1.2186, "step": 4570 }, { "epoch": 0.6987961016625263, "grad_norm": 1.34375, "learning_rate": 5.8072362387406306e-05, "loss": 1.2226, "step": 4571 }, { "epoch": 0.698948977641888, "grad_norm": 1.40625, "learning_rate": 5.806830956508882e-05, "loss": 1.5267, "step": 4572 }, { "epoch": 0.6991018536212498, "grad_norm": 1.4375, "learning_rate": 5.8064256826337403e-05, "loss": 1.4431, "step": 4573 }, { "epoch": 0.6992547296006115, "grad_norm": 1.359375, "learning_rate": 5.806020417116014e-05, "loss": 1.4757, "step": 4574 }, { "epoch": 0.6994076055799733, "grad_norm": 1.578125, "learning_rate": 5.8056151599565056e-05, "loss": 1.4427, "step": 4575 }, { "epoch": 0.699560481559335, "grad_norm": 1.3125, "learning_rate": 5.8052099111560355e-05, "loss": 1.332, "step": 4576 }, { "epoch": 0.6997133575386967, "grad_norm": 1.4375, "learning_rate": 5.804804670715398e-05, "loss": 1.343, "step": 4577 }, { "epoch": 0.6998662335180584, "grad_norm": 1.5703125, "learning_rate": 5.804399438635403e-05, "loss": 1.3271, "step": 4578 }, { "epoch": 0.7000191094974202, "grad_norm": 1.3515625, "learning_rate": 5.803994214916865e-05, "loss": 1.1603, "step": 4579 }, { "epoch": 0.7001719854767819, "grad_norm": 1.3125, "learning_rate": 5.8035889995605875e-05, "loss": 1.1294, "step": 4580 }, { "epoch": 0.7003248614561437, "grad_norm": 1.3828125, "learning_rate": 5.803183792567377e-05, "loss": 1.152, "step": 4581 }, { "epoch": 0.7004777374355055, "grad_norm": 1.2578125, "learning_rate": 5.802778593938043e-05, "loss": 1.1597, "step": 4582 }, { "epoch": 0.7006306134148672, "grad_norm": 1.578125, "learning_rate": 5.802373403673391e-05, "loss": 1.6406, "step": 4583 }, { "epoch": 0.700783489394229, "grad_norm": 1.4453125, "learning_rate": 5.801968221774232e-05, "loss": 1.5645, "step": 4584 }, { "epoch": 0.7009363653735907, "grad_norm": 1.3984375, "learning_rate": 5.801563048241365e-05, "loss": 1.4783, "step": 4585 }, { "epoch": 0.7010892413529524, "grad_norm": 1.2578125, "learning_rate": 5.801157883075612e-05, "loss": 1.4379, "step": 4586 }, { "epoch": 0.7012421173323141, "grad_norm": 1.25, "learning_rate": 5.800752726277765e-05, "loss": 1.43, "step": 4587 }, { "epoch": 0.7013949933116759, "grad_norm": 1.421875, "learning_rate": 5.800347577848641e-05, "loss": 1.4898, "step": 4588 }, { "epoch": 0.7015478692910376, "grad_norm": 1.375, "learning_rate": 5.7999424377890466e-05, "loss": 1.2944, "step": 4589 }, { "epoch": 0.7017007452703994, "grad_norm": 1.2890625, "learning_rate": 5.799537306099787e-05, "loss": 1.3631, "step": 4590 }, { "epoch": 0.7018536212497611, "grad_norm": 1.2890625, "learning_rate": 5.79913218278167e-05, "loss": 1.222, "step": 4591 }, { "epoch": 0.7020064972291229, "grad_norm": 1.4609375, "learning_rate": 5.7987270678355034e-05, "loss": 1.3723, "step": 4592 }, { "epoch": 0.7021593732084846, "grad_norm": 1.4140625, "learning_rate": 5.798321961262093e-05, "loss": 1.2638, "step": 4593 }, { "epoch": 0.7023122491878464, "grad_norm": 1.4140625, "learning_rate": 5.797916863062245e-05, "loss": 1.3066, "step": 4594 }, { "epoch": 0.702465125167208, "grad_norm": 1.4375, "learning_rate": 5.797511773236776e-05, "loss": 1.5753, "step": 4595 }, { "epoch": 0.7026180011465698, "grad_norm": 1.3359375, "learning_rate": 5.797106691786482e-05, "loss": 1.2807, "step": 4596 }, { "epoch": 0.7027708771259316, "grad_norm": 1.4921875, "learning_rate": 5.796701618712173e-05, "loss": 1.4039, "step": 4597 }, { "epoch": 0.7029237531052933, "grad_norm": 1.1875, "learning_rate": 5.79629655401466e-05, "loss": 1.0871, "step": 4598 }, { "epoch": 0.7030766290846551, "grad_norm": 1.4296875, "learning_rate": 5.7958914976947476e-05, "loss": 1.4269, "step": 4599 }, { "epoch": 0.7032295050640168, "grad_norm": 1.28125, "learning_rate": 5.795486449753246e-05, "loss": 1.2456, "step": 4600 }, { "epoch": 0.7033823810433786, "grad_norm": 1.515625, "learning_rate": 5.795081410190955e-05, "loss": 1.3861, "step": 4601 }, { "epoch": 0.7035352570227403, "grad_norm": 1.4609375, "learning_rate": 5.794676379008695e-05, "loss": 1.5335, "step": 4602 }, { "epoch": 0.7036881330021021, "grad_norm": 1.3984375, "learning_rate": 5.7942713562072604e-05, "loss": 1.852, "step": 4603 }, { "epoch": 0.7038410089814637, "grad_norm": 1.4140625, "learning_rate": 5.79386634178746e-05, "loss": 1.4243, "step": 4604 }, { "epoch": 0.7039938849608255, "grad_norm": 1.3671875, "learning_rate": 5.793461335750112e-05, "loss": 1.5173, "step": 4605 }, { "epoch": 0.7041467609401872, "grad_norm": 1.5234375, "learning_rate": 5.793056338096007e-05, "loss": 1.3044, "step": 4606 }, { "epoch": 0.704299636919549, "grad_norm": 1.453125, "learning_rate": 5.792651348825965e-05, "loss": 1.1198, "step": 4607 }, { "epoch": 0.7044525128989108, "grad_norm": 1.296875, "learning_rate": 5.792246367940788e-05, "loss": 1.3648, "step": 4608 }, { "epoch": 0.7046053888782725, "grad_norm": 1.3984375, "learning_rate": 5.791841395441285e-05, "loss": 1.3483, "step": 4609 }, { "epoch": 0.7047582648576343, "grad_norm": 1.3125, "learning_rate": 5.791436431328262e-05, "loss": 1.4395, "step": 4610 }, { "epoch": 0.704911140836996, "grad_norm": 1.53125, "learning_rate": 5.7910314756025266e-05, "loss": 1.4434, "step": 4611 }, { "epoch": 0.7050640168163578, "grad_norm": 1.5859375, "learning_rate": 5.790626528264884e-05, "loss": 1.6608, "step": 4612 }, { "epoch": 0.7052168927957194, "grad_norm": 1.4296875, "learning_rate": 5.790221589316138e-05, "loss": 1.5194, "step": 4613 }, { "epoch": 0.7053697687750812, "grad_norm": 1.421875, "learning_rate": 5.7898166587571056e-05, "loss": 1.2641, "step": 4614 }, { "epoch": 0.7055226447544429, "grad_norm": 1.3046875, "learning_rate": 5.789411736588588e-05, "loss": 1.5003, "step": 4615 }, { "epoch": 0.7056755207338047, "grad_norm": 1.3984375, "learning_rate": 5.789006822811392e-05, "loss": 1.4584, "step": 4616 }, { "epoch": 0.7058283967131664, "grad_norm": 1.6015625, "learning_rate": 5.788601917426324e-05, "loss": 1.5041, "step": 4617 }, { "epoch": 0.7059812726925282, "grad_norm": 1.21875, "learning_rate": 5.788197020434194e-05, "loss": 1.3065, "step": 4618 }, { "epoch": 0.70613414867189, "grad_norm": 1.4453125, "learning_rate": 5.787792131835805e-05, "loss": 1.2566, "step": 4619 }, { "epoch": 0.7062870246512517, "grad_norm": 1.3046875, "learning_rate": 5.787387251631964e-05, "loss": 1.4564, "step": 4620 }, { "epoch": 0.7064399006306135, "grad_norm": 1.25, "learning_rate": 5.7869823798234867e-05, "loss": 1.3053, "step": 4621 }, { "epoch": 0.7065927766099751, "grad_norm": 1.2421875, "learning_rate": 5.786577516411165e-05, "loss": 1.4146, "step": 4622 }, { "epoch": 0.7067456525893369, "grad_norm": 1.2734375, "learning_rate": 5.7861726613958164e-05, "loss": 1.102, "step": 4623 }, { "epoch": 0.7068985285686986, "grad_norm": 1.3671875, "learning_rate": 5.7857678147782516e-05, "loss": 1.2965, "step": 4624 }, { "epoch": 0.7070514045480604, "grad_norm": 1.296875, "learning_rate": 5.785362976559261e-05, "loss": 1.3816, "step": 4625 }, { "epoch": 0.7072042805274221, "grad_norm": 1.1640625, "learning_rate": 5.7849581467396654e-05, "loss": 0.9458, "step": 4626 }, { "epoch": 0.7073571565067839, "grad_norm": 1.1875, "learning_rate": 5.7845533253202675e-05, "loss": 1.1198, "step": 4627 }, { "epoch": 0.7075100324861456, "grad_norm": 1.515625, "learning_rate": 5.7841485123018746e-05, "loss": 1.6057, "step": 4628 }, { "epoch": 0.7076629084655074, "grad_norm": 1.3515625, "learning_rate": 5.7837437076852916e-05, "loss": 1.3381, "step": 4629 }, { "epoch": 0.7078157844448691, "grad_norm": 1.328125, "learning_rate": 5.783338911471324e-05, "loss": 1.3524, "step": 4630 }, { "epoch": 0.7079686604242308, "grad_norm": 1.6015625, "learning_rate": 5.7829341236607884e-05, "loss": 1.3622, "step": 4631 }, { "epoch": 0.7081215364035925, "grad_norm": 1.3125, "learning_rate": 5.782529344254477e-05, "loss": 1.2582, "step": 4632 }, { "epoch": 0.7082744123829543, "grad_norm": 1.359375, "learning_rate": 5.782124573253207e-05, "loss": 1.6606, "step": 4633 }, { "epoch": 0.708427288362316, "grad_norm": 1.3046875, "learning_rate": 5.781719810657782e-05, "loss": 1.0773, "step": 4634 }, { "epoch": 0.7085801643416778, "grad_norm": 1.4921875, "learning_rate": 5.781315056469008e-05, "loss": 1.4068, "step": 4635 }, { "epoch": 0.7087330403210396, "grad_norm": 1.3359375, "learning_rate": 5.780910310687693e-05, "loss": 1.2531, "step": 4636 }, { "epoch": 0.7088859163004013, "grad_norm": 1.40625, "learning_rate": 5.780505573314641e-05, "loss": 1.4736, "step": 4637 }, { "epoch": 0.7090387922797631, "grad_norm": 1.421875, "learning_rate": 5.7801008443506613e-05, "loss": 1.401, "step": 4638 }, { "epoch": 0.7091916682591248, "grad_norm": 1.40625, "learning_rate": 5.779696123796554e-05, "loss": 1.1598, "step": 4639 }, { "epoch": 0.7093445442384865, "grad_norm": 1.1484375, "learning_rate": 5.77929141165314e-05, "loss": 1.1973, "step": 4640 }, { "epoch": 0.7094974202178482, "grad_norm": 1.390625, "learning_rate": 5.77888670792121e-05, "loss": 1.3577, "step": 4641 }, { "epoch": 0.70965029619721, "grad_norm": 1.5234375, "learning_rate": 5.7784820126015806e-05, "loss": 1.2412, "step": 4642 }, { "epoch": 0.7098031721765717, "grad_norm": 1.40625, "learning_rate": 5.778077325695056e-05, "loss": 1.68, "step": 4643 }, { "epoch": 0.7099560481559335, "grad_norm": 1.328125, "learning_rate": 5.7776726472024414e-05, "loss": 1.2773, "step": 4644 }, { "epoch": 0.7101089241352952, "grad_norm": 1.234375, "learning_rate": 5.777267977124543e-05, "loss": 1.4034, "step": 4645 }, { "epoch": 0.710261800114657, "grad_norm": 1.1953125, "learning_rate": 5.776863315462169e-05, "loss": 1.3506, "step": 4646 }, { "epoch": 0.7104146760940188, "grad_norm": 1.3125, "learning_rate": 5.7764586622161245e-05, "loss": 1.5202, "step": 4647 }, { "epoch": 0.7105675520733805, "grad_norm": 1.25, "learning_rate": 5.776054017387213e-05, "loss": 1.3623, "step": 4648 }, { "epoch": 0.7107204280527422, "grad_norm": 1.5078125, "learning_rate": 5.775649380976248e-05, "loss": 1.2875, "step": 4649 }, { "epoch": 0.7108733040321039, "grad_norm": 1.2890625, "learning_rate": 5.775244752984036e-05, "loss": 1.4064, "step": 4650 }, { "epoch": 0.7110261800114657, "grad_norm": 1.328125, "learning_rate": 5.7748401334113725e-05, "loss": 1.331, "step": 4651 }, { "epoch": 0.7111790559908274, "grad_norm": 1.3359375, "learning_rate": 5.774435522259074e-05, "loss": 1.4032, "step": 4652 }, { "epoch": 0.7113319319701892, "grad_norm": 1.3828125, "learning_rate": 5.774030919527944e-05, "loss": 1.3594, "step": 4653 }, { "epoch": 0.7114848079495509, "grad_norm": 1.34375, "learning_rate": 5.773626325218788e-05, "loss": 1.4105, "step": 4654 }, { "epoch": 0.7116376839289127, "grad_norm": 1.4296875, "learning_rate": 5.7732217393324095e-05, "loss": 1.319, "step": 4655 }, { "epoch": 0.7117905599082744, "grad_norm": 1.328125, "learning_rate": 5.772817161869626e-05, "loss": 1.481, "step": 4656 }, { "epoch": 0.7119434358876362, "grad_norm": 1.4375, "learning_rate": 5.772412592831233e-05, "loss": 1.4589, "step": 4657 }, { "epoch": 0.7120963118669978, "grad_norm": 1.390625, "learning_rate": 5.772008032218035e-05, "loss": 1.1845, "step": 4658 }, { "epoch": 0.7122491878463596, "grad_norm": 1.3984375, "learning_rate": 5.771603480030847e-05, "loss": 1.5487, "step": 4659 }, { "epoch": 0.7124020638257214, "grad_norm": 1.296875, "learning_rate": 5.771198936270472e-05, "loss": 1.348, "step": 4660 }, { "epoch": 0.7125549398050831, "grad_norm": 1.4140625, "learning_rate": 5.770794400937716e-05, "loss": 1.4948, "step": 4661 }, { "epoch": 0.7127078157844449, "grad_norm": 1.390625, "learning_rate": 5.770389874033383e-05, "loss": 1.394, "step": 4662 }, { "epoch": 0.7128606917638066, "grad_norm": 1.3828125, "learning_rate": 5.769985355558282e-05, "loss": 1.3312, "step": 4663 }, { "epoch": 0.7130135677431684, "grad_norm": 1.328125, "learning_rate": 5.769580845513218e-05, "loss": 1.3513, "step": 4664 }, { "epoch": 0.7131664437225301, "grad_norm": 1.484375, "learning_rate": 5.769176343898992e-05, "loss": 1.5922, "step": 4665 }, { "epoch": 0.7133193197018919, "grad_norm": 1.375, "learning_rate": 5.768771850716426e-05, "loss": 1.691, "step": 4666 }, { "epoch": 0.7134721956812535, "grad_norm": 1.3125, "learning_rate": 5.768367365966305e-05, "loss": 1.2408, "step": 4667 }, { "epoch": 0.7136250716606153, "grad_norm": 1.3671875, "learning_rate": 5.767962889649451e-05, "loss": 1.3681, "step": 4668 }, { "epoch": 0.713777947639977, "grad_norm": 1.34375, "learning_rate": 5.767558421766668e-05, "loss": 1.248, "step": 4669 }, { "epoch": 0.7139308236193388, "grad_norm": 1.4140625, "learning_rate": 5.767153962318752e-05, "loss": 1.3287, "step": 4670 }, { "epoch": 0.7140836995987005, "grad_norm": 1.453125, "learning_rate": 5.7667495113065186e-05, "loss": 1.5678, "step": 4671 }, { "epoch": 0.7142365755780623, "grad_norm": 1.3359375, "learning_rate": 5.7663450687307705e-05, "loss": 1.3767, "step": 4672 }, { "epoch": 0.7143894515574241, "grad_norm": 1.3984375, "learning_rate": 5.765940634592315e-05, "loss": 1.2751, "step": 4673 }, { "epoch": 0.7145423275367858, "grad_norm": 1.4375, "learning_rate": 5.765536208891954e-05, "loss": 1.44, "step": 4674 }, { "epoch": 0.7146952035161476, "grad_norm": 1.3046875, "learning_rate": 5.7651317916305044e-05, "loss": 1.3498, "step": 4675 }, { "epoch": 0.7148480794955092, "grad_norm": 1.40625, "learning_rate": 5.76472738280876e-05, "loss": 1.5152, "step": 4676 }, { "epoch": 0.715000955474871, "grad_norm": 1.34375, "learning_rate": 5.764322982427528e-05, "loss": 1.2626, "step": 4677 }, { "epoch": 0.7151538314542327, "grad_norm": 1.4375, "learning_rate": 5.763918590487621e-05, "loss": 1.5635, "step": 4678 }, { "epoch": 0.7153067074335945, "grad_norm": 1.5546875, "learning_rate": 5.763514206989842e-05, "loss": 1.4908, "step": 4679 }, { "epoch": 0.7154595834129562, "grad_norm": 1.484375, "learning_rate": 5.763109831934996e-05, "loss": 1.3674, "step": 4680 }, { "epoch": 0.715612459392318, "grad_norm": 1.3984375, "learning_rate": 5.762705465323888e-05, "loss": 1.2611, "step": 4681 }, { "epoch": 0.7157653353716797, "grad_norm": 1.4765625, "learning_rate": 5.7623011071573305e-05, "loss": 1.4621, "step": 4682 }, { "epoch": 0.7159182113510415, "grad_norm": 1.3046875, "learning_rate": 5.7618967574361205e-05, "loss": 1.2411, "step": 4683 }, { "epoch": 0.7160710873304033, "grad_norm": 1.453125, "learning_rate": 5.761492416161064e-05, "loss": 1.4368, "step": 4684 }, { "epoch": 0.7162239633097649, "grad_norm": 1.453125, "learning_rate": 5.761088083332977e-05, "loss": 1.6246, "step": 4685 }, { "epoch": 0.7163768392891267, "grad_norm": 1.34375, "learning_rate": 5.760683758952652e-05, "loss": 1.1712, "step": 4686 }, { "epoch": 0.7165297152684884, "grad_norm": 1.4921875, "learning_rate": 5.7602794430209064e-05, "loss": 1.5207, "step": 4687 }, { "epoch": 0.7166825912478502, "grad_norm": 1.5546875, "learning_rate": 5.7598751355385386e-05, "loss": 1.4408, "step": 4688 }, { "epoch": 0.7168354672272119, "grad_norm": 1.3515625, "learning_rate": 5.759470836506358e-05, "loss": 1.4168, "step": 4689 }, { "epoch": 0.7169883432065737, "grad_norm": 1.2265625, "learning_rate": 5.759066545925167e-05, "loss": 1.1511, "step": 4690 }, { "epoch": 0.7171412191859354, "grad_norm": 1.4296875, "learning_rate": 5.7586622637957754e-05, "loss": 1.4842, "step": 4691 }, { "epoch": 0.7172940951652972, "grad_norm": 1.375, "learning_rate": 5.758257990118986e-05, "loss": 1.5447, "step": 4692 }, { "epoch": 0.7174469711446589, "grad_norm": 1.359375, "learning_rate": 5.757853724895601e-05, "loss": 1.5903, "step": 4693 }, { "epoch": 0.7175998471240206, "grad_norm": 1.4296875, "learning_rate": 5.757449468126434e-05, "loss": 1.6191, "step": 4694 }, { "epoch": 0.7177527231033823, "grad_norm": 1.453125, "learning_rate": 5.757045219812294e-05, "loss": 1.4493, "step": 4695 }, { "epoch": 0.7179055990827441, "grad_norm": 1.265625, "learning_rate": 5.756640979953969e-05, "loss": 1.377, "step": 4696 }, { "epoch": 0.7180584750621058, "grad_norm": 1.2734375, "learning_rate": 5.7562367485522795e-05, "loss": 1.1674, "step": 4697 }, { "epoch": 0.7182113510414676, "grad_norm": 1.28125, "learning_rate": 5.755832525608027e-05, "loss": 1.2136, "step": 4698 }, { "epoch": 0.7183642270208294, "grad_norm": 1.4296875, "learning_rate": 5.7554283111220155e-05, "loss": 1.4192, "step": 4699 }, { "epoch": 0.7185171030001911, "grad_norm": 1.375, "learning_rate": 5.7550241050950504e-05, "loss": 1.507, "step": 4700 }, { "epoch": 0.7186699789795529, "grad_norm": 1.28125, "learning_rate": 5.754619907527945e-05, "loss": 1.3787, "step": 4701 }, { "epoch": 0.7188228549589146, "grad_norm": 1.2578125, "learning_rate": 5.754215718421495e-05, "loss": 1.5397, "step": 4702 }, { "epoch": 0.7189757309382763, "grad_norm": 1.3125, "learning_rate": 5.753811537776506e-05, "loss": 1.2987, "step": 4703 }, { "epoch": 0.719128606917638, "grad_norm": 1.2421875, "learning_rate": 5.7534073655937904e-05, "loss": 1.2281, "step": 4704 }, { "epoch": 0.7192814828969998, "grad_norm": 1.25, "learning_rate": 5.753003201874152e-05, "loss": 1.2086, "step": 4705 }, { "epoch": 0.7194343588763615, "grad_norm": 1.34375, "learning_rate": 5.752599046618392e-05, "loss": 1.533, "step": 4706 }, { "epoch": 0.7195872348557233, "grad_norm": 1.359375, "learning_rate": 5.7521948998273215e-05, "loss": 1.3958, "step": 4707 }, { "epoch": 0.719740110835085, "grad_norm": 1.5234375, "learning_rate": 5.751790761501741e-05, "loss": 1.5091, "step": 4708 }, { "epoch": 0.7198929868144468, "grad_norm": 1.28125, "learning_rate": 5.751386631642457e-05, "loss": 1.2298, "step": 4709 }, { "epoch": 0.7200458627938086, "grad_norm": 1.3671875, "learning_rate": 5.7509825102502735e-05, "loss": 1.3405, "step": 4710 }, { "epoch": 0.7201987387731703, "grad_norm": 1.375, "learning_rate": 5.7505783973260055e-05, "loss": 1.539, "step": 4711 }, { "epoch": 0.720351614752532, "grad_norm": 1.359375, "learning_rate": 5.750174292870441e-05, "loss": 1.4314, "step": 4712 }, { "epoch": 0.7205044907318937, "grad_norm": 1.34375, "learning_rate": 5.749770196884401e-05, "loss": 1.2633, "step": 4713 }, { "epoch": 0.7206573667112555, "grad_norm": 1.3046875, "learning_rate": 5.749366109368686e-05, "loss": 1.2359, "step": 4714 }, { "epoch": 0.7208102426906172, "grad_norm": 1.359375, "learning_rate": 5.748962030324099e-05, "loss": 1.5504, "step": 4715 }, { "epoch": 0.720963118669979, "grad_norm": 1.46875, "learning_rate": 5.748557959751446e-05, "loss": 1.6193, "step": 4716 }, { "epoch": 0.7211159946493407, "grad_norm": 1.28125, "learning_rate": 5.748153897651535e-05, "loss": 1.2571, "step": 4717 }, { "epoch": 0.7212688706287025, "grad_norm": 1.3828125, "learning_rate": 5.747749844025168e-05, "loss": 1.553, "step": 4718 }, { "epoch": 0.7214217466080642, "grad_norm": 1.203125, "learning_rate": 5.747345798873147e-05, "loss": 1.1506, "step": 4719 }, { "epoch": 0.721574622587426, "grad_norm": 1.2734375, "learning_rate": 5.7469417621962906e-05, "loss": 1.162, "step": 4720 }, { "epoch": 0.7217274985667876, "grad_norm": 1.515625, "learning_rate": 5.7465377339953906e-05, "loss": 1.6444, "step": 4721 }, { "epoch": 0.7218803745461494, "grad_norm": 1.34375, "learning_rate": 5.7461337142712535e-05, "loss": 1.4009, "step": 4722 }, { "epoch": 0.7220332505255111, "grad_norm": 1.6953125, "learning_rate": 5.745729703024692e-05, "loss": 1.6948, "step": 4723 }, { "epoch": 0.7221861265048729, "grad_norm": 1.34375, "learning_rate": 5.7453257002565056e-05, "loss": 1.3945, "step": 4724 }, { "epoch": 0.7223390024842347, "grad_norm": 1.4609375, "learning_rate": 5.744921705967501e-05, "loss": 1.2893, "step": 4725 }, { "epoch": 0.7224918784635964, "grad_norm": 1.3359375, "learning_rate": 5.7445177201584775e-05, "loss": 1.2583, "step": 4726 }, { "epoch": 0.7226447544429582, "grad_norm": 1.7109375, "learning_rate": 5.744113742830256e-05, "loss": 1.5504, "step": 4727 }, { "epoch": 0.7227976304223199, "grad_norm": 1.4296875, "learning_rate": 5.743709773983628e-05, "loss": 1.28, "step": 4728 }, { "epoch": 0.7229505064016817, "grad_norm": 1.453125, "learning_rate": 5.743305813619395e-05, "loss": 1.8399, "step": 4729 }, { "epoch": 0.7231033823810433, "grad_norm": 1.421875, "learning_rate": 5.742901861738379e-05, "loss": 1.24, "step": 4730 }, { "epoch": 0.7232562583604051, "grad_norm": 1.3984375, "learning_rate": 5.742497918341367e-05, "loss": 1.4163, "step": 4731 }, { "epoch": 0.7234091343397668, "grad_norm": 1.5234375, "learning_rate": 5.742093983429177e-05, "loss": 1.7055, "step": 4732 }, { "epoch": 0.7235620103191286, "grad_norm": 1.328125, "learning_rate": 5.7416900570026064e-05, "loss": 1.3602, "step": 4733 }, { "epoch": 0.7237148862984903, "grad_norm": 1.3203125, "learning_rate": 5.741286139062465e-05, "loss": 1.3362, "step": 4734 }, { "epoch": 0.7238677622778521, "grad_norm": 1.4921875, "learning_rate": 5.740882229609556e-05, "loss": 1.4263, "step": 4735 }, { "epoch": 0.7240206382572139, "grad_norm": 1.3359375, "learning_rate": 5.740478328644683e-05, "loss": 1.289, "step": 4736 }, { "epoch": 0.7241735142365756, "grad_norm": 1.4765625, "learning_rate": 5.740074436168652e-05, "loss": 1.5852, "step": 4737 }, { "epoch": 0.7243263902159374, "grad_norm": 1.3203125, "learning_rate": 5.7396705521822636e-05, "loss": 1.226, "step": 4738 }, { "epoch": 0.724479266195299, "grad_norm": 1.390625, "learning_rate": 5.739266676686331e-05, "loss": 1.3146, "step": 4739 }, { "epoch": 0.7246321421746608, "grad_norm": 1.265625, "learning_rate": 5.7388628096816555e-05, "loss": 1.0871, "step": 4740 }, { "epoch": 0.7247850181540225, "grad_norm": 1.296875, "learning_rate": 5.738458951169041e-05, "loss": 1.388, "step": 4741 }, { "epoch": 0.7249378941333843, "grad_norm": 1.3828125, "learning_rate": 5.738055101149293e-05, "loss": 1.6257, "step": 4742 }, { "epoch": 0.725090770112746, "grad_norm": 1.359375, "learning_rate": 5.7376512596232165e-05, "loss": 1.1675, "step": 4743 }, { "epoch": 0.7252436460921078, "grad_norm": 1.4765625, "learning_rate": 5.7372474265916164e-05, "loss": 1.4943, "step": 4744 }, { "epoch": 0.7253965220714695, "grad_norm": 1.3984375, "learning_rate": 5.736843602055293e-05, "loss": 1.5227, "step": 4745 }, { "epoch": 0.7255493980508313, "grad_norm": 1.359375, "learning_rate": 5.736439786015062e-05, "loss": 1.4374, "step": 4746 }, { "epoch": 0.725702274030193, "grad_norm": 1.3828125, "learning_rate": 5.736035978471714e-05, "loss": 1.3096, "step": 4747 }, { "epoch": 0.7258551500095547, "grad_norm": 1.2109375, "learning_rate": 5.735632179426065e-05, "loss": 1.1258, "step": 4748 }, { "epoch": 0.7260080259889165, "grad_norm": 1.375, "learning_rate": 5.7352283888789215e-05, "loss": 1.5578, "step": 4749 }, { "epoch": 0.7261609019682782, "grad_norm": 1.4765625, "learning_rate": 5.734824606831073e-05, "loss": 1.4584, "step": 4750 }, { "epoch": 0.72631377794764, "grad_norm": 1.3515625, "learning_rate": 5.7344208332833385e-05, "loss": 1.3381, "step": 4751 }, { "epoch": 0.7264666539270017, "grad_norm": 1.2734375, "learning_rate": 5.734017068236517e-05, "loss": 1.3332, "step": 4752 }, { "epoch": 0.7266195299063635, "grad_norm": 1.40625, "learning_rate": 5.733613311691415e-05, "loss": 1.4832, "step": 4753 }, { "epoch": 0.7267724058857252, "grad_norm": 1.359375, "learning_rate": 5.733209563648836e-05, "loss": 1.3524, "step": 4754 }, { "epoch": 0.726925281865087, "grad_norm": 1.3046875, "learning_rate": 5.7328058241095795e-05, "loss": 1.2737, "step": 4755 }, { "epoch": 0.7270781578444487, "grad_norm": 1.4921875, "learning_rate": 5.732402093074465e-05, "loss": 1.504, "step": 4756 }, { "epoch": 0.7272310338238104, "grad_norm": 1.3359375, "learning_rate": 5.7319983705442805e-05, "loss": 1.2163, "step": 4757 }, { "epoch": 0.7273839098031721, "grad_norm": 1.3828125, "learning_rate": 5.731594656519841e-05, "loss": 1.3826, "step": 4758 }, { "epoch": 0.7275367857825339, "grad_norm": 1.421875, "learning_rate": 5.731190951001947e-05, "loss": 1.289, "step": 4759 }, { "epoch": 0.7276896617618956, "grad_norm": 1.3125, "learning_rate": 5.7307872539914045e-05, "loss": 1.1747, "step": 4760 }, { "epoch": 0.7278425377412574, "grad_norm": 1.265625, "learning_rate": 5.730383565489017e-05, "loss": 1.3727, "step": 4761 }, { "epoch": 0.7279954137206192, "grad_norm": 1.4609375, "learning_rate": 5.72997988549559e-05, "loss": 1.3527, "step": 4762 }, { "epoch": 0.7281482896999809, "grad_norm": 1.3125, "learning_rate": 5.729576214011927e-05, "loss": 1.3929, "step": 4763 }, { "epoch": 0.7283011656793427, "grad_norm": 1.625, "learning_rate": 5.729172551038828e-05, "loss": 1.3793, "step": 4764 }, { "epoch": 0.7284540416587044, "grad_norm": 1.4609375, "learning_rate": 5.728768896577113e-05, "loss": 1.61, "step": 4765 }, { "epoch": 0.7286069176380661, "grad_norm": 1.4921875, "learning_rate": 5.7283652506275655e-05, "loss": 1.5657, "step": 4766 }, { "epoch": 0.7287597936174278, "grad_norm": 1.4375, "learning_rate": 5.7279616131910043e-05, "loss": 1.6503, "step": 4767 }, { "epoch": 0.7289126695967896, "grad_norm": 1.4609375, "learning_rate": 5.72755798426823e-05, "loss": 1.3904, "step": 4768 }, { "epoch": 0.7290655455761513, "grad_norm": 1.4765625, "learning_rate": 5.7271543638600466e-05, "loss": 1.4932, "step": 4769 }, { "epoch": 0.7292184215555131, "grad_norm": 1.4375, "learning_rate": 5.7267507519672595e-05, "loss": 1.5738, "step": 4770 }, { "epoch": 0.7293712975348748, "grad_norm": 1.34375, "learning_rate": 5.726347148590672e-05, "loss": 1.5524, "step": 4771 }, { "epoch": 0.7295241735142366, "grad_norm": 1.3984375, "learning_rate": 5.725943553731087e-05, "loss": 1.4177, "step": 4772 }, { "epoch": 0.7296770494935984, "grad_norm": 1.3046875, "learning_rate": 5.7255399673893075e-05, "loss": 1.2746, "step": 4773 }, { "epoch": 0.7298299254729601, "grad_norm": 1.3515625, "learning_rate": 5.7251363895661445e-05, "loss": 1.3214, "step": 4774 }, { "epoch": 0.7299828014523218, "grad_norm": 1.4140625, "learning_rate": 5.724732820262403e-05, "loss": 1.4681, "step": 4775 }, { "epoch": 0.7301356774316835, "grad_norm": 1.3828125, "learning_rate": 5.7243292594788744e-05, "loss": 1.5153, "step": 4776 }, { "epoch": 0.7302885534110453, "grad_norm": 1.4375, "learning_rate": 5.723925707216375e-05, "loss": 1.5061, "step": 4777 }, { "epoch": 0.730441429390407, "grad_norm": 1.4921875, "learning_rate": 5.7235221634757066e-05, "loss": 1.3673, "step": 4778 }, { "epoch": 0.7305943053697688, "grad_norm": 1.390625, "learning_rate": 5.723118628257671e-05, "loss": 1.356, "step": 4779 }, { "epoch": 0.7307471813491305, "grad_norm": 1.234375, "learning_rate": 5.72271510156307e-05, "loss": 1.2935, "step": 4780 }, { "epoch": 0.7309000573284923, "grad_norm": 1.28125, "learning_rate": 5.722311583392721e-05, "loss": 1.2171, "step": 4781 }, { "epoch": 0.731052933307854, "grad_norm": 1.21875, "learning_rate": 5.721908073747413e-05, "loss": 1.3249, "step": 4782 }, { "epoch": 0.7312058092872158, "grad_norm": 1.4921875, "learning_rate": 5.7215045726279514e-05, "loss": 1.3167, "step": 4783 }, { "epoch": 0.7313586852665774, "grad_norm": 1.4765625, "learning_rate": 5.72110108003515e-05, "loss": 1.3817, "step": 4784 }, { "epoch": 0.7315115612459392, "grad_norm": 1.4765625, "learning_rate": 5.720697595969806e-05, "loss": 1.4948, "step": 4785 }, { "epoch": 0.731664437225301, "grad_norm": 1.359375, "learning_rate": 5.7202941204327264e-05, "loss": 1.5534, "step": 4786 }, { "epoch": 0.7318173132046627, "grad_norm": 1.453125, "learning_rate": 5.7198906534247134e-05, "loss": 1.2115, "step": 4787 }, { "epoch": 0.7319701891840245, "grad_norm": 1.4140625, "learning_rate": 5.719487194946572e-05, "loss": 1.1989, "step": 4788 }, { "epoch": 0.7321230651633862, "grad_norm": 1.21875, "learning_rate": 5.719083744999107e-05, "loss": 1.3356, "step": 4789 }, { "epoch": 0.732275941142748, "grad_norm": 1.4140625, "learning_rate": 5.718680303583116e-05, "loss": 1.3835, "step": 4790 }, { "epoch": 0.7324288171221097, "grad_norm": 1.28125, "learning_rate": 5.718276870699417e-05, "loss": 1.0485, "step": 4791 }, { "epoch": 0.7325816931014715, "grad_norm": 1.4765625, "learning_rate": 5.7178734463487974e-05, "loss": 1.4271, "step": 4792 }, { "epoch": 0.7327345690808331, "grad_norm": 1.421875, "learning_rate": 5.717470030532073e-05, "loss": 1.3754, "step": 4793 }, { "epoch": 0.7328874450601949, "grad_norm": 1.515625, "learning_rate": 5.7170666232500466e-05, "loss": 1.5685, "step": 4794 }, { "epoch": 0.7330403210395566, "grad_norm": 1.4921875, "learning_rate": 5.7166632245035136e-05, "loss": 1.5382, "step": 4795 }, { "epoch": 0.7331931970189184, "grad_norm": 1.4453125, "learning_rate": 5.716259834293287e-05, "loss": 1.2366, "step": 4796 }, { "epoch": 0.7333460729982801, "grad_norm": 1.2265625, "learning_rate": 5.715856452620169e-05, "loss": 1.3078, "step": 4797 }, { "epoch": 0.7334989489776419, "grad_norm": 1.3359375, "learning_rate": 5.7154530794849604e-05, "loss": 1.3343, "step": 4798 }, { "epoch": 0.7336518249570037, "grad_norm": 1.21875, "learning_rate": 5.715049714888463e-05, "loss": 1.1696, "step": 4799 }, { "epoch": 0.7338047009363654, "grad_norm": 1.4375, "learning_rate": 5.714646358831494e-05, "loss": 1.7519, "step": 4800 }, { "epoch": 0.7339575769157272, "grad_norm": 1.3984375, "learning_rate": 5.7142430113148415e-05, "loss": 1.0776, "step": 4801 }, { "epoch": 0.7341104528950888, "grad_norm": 1.375, "learning_rate": 5.713839672339314e-05, "loss": 1.331, "step": 4802 }, { "epoch": 0.7342633288744506, "grad_norm": 1.5, "learning_rate": 5.713436341905718e-05, "loss": 1.5576, "step": 4803 }, { "epoch": 0.7344162048538123, "grad_norm": 1.5, "learning_rate": 5.7130330200148586e-05, "loss": 1.3988, "step": 4804 }, { "epoch": 0.7345690808331741, "grad_norm": 1.390625, "learning_rate": 5.712629706667536e-05, "loss": 1.1628, "step": 4805 }, { "epoch": 0.7347219568125358, "grad_norm": 1.46875, "learning_rate": 5.712226401864551e-05, "loss": 1.2593, "step": 4806 }, { "epoch": 0.7348748327918976, "grad_norm": 1.3515625, "learning_rate": 5.7118231056067194e-05, "loss": 1.112, "step": 4807 }, { "epoch": 0.7350277087712593, "grad_norm": 1.421875, "learning_rate": 5.711419817894834e-05, "loss": 1.5581, "step": 4808 }, { "epoch": 0.7351805847506211, "grad_norm": 1.5625, "learning_rate": 5.7110165387296964e-05, "loss": 1.4776, "step": 4809 }, { "epoch": 0.7353334607299828, "grad_norm": 1.4296875, "learning_rate": 5.7106132681121236e-05, "loss": 1.4007, "step": 4810 }, { "epoch": 0.7354863367093445, "grad_norm": 1.4453125, "learning_rate": 5.710210006042904e-05, "loss": 1.3388, "step": 4811 }, { "epoch": 0.7356392126887062, "grad_norm": 1.21875, "learning_rate": 5.7098067525228526e-05, "loss": 1.0894, "step": 4812 }, { "epoch": 0.735792088668068, "grad_norm": 1.21875, "learning_rate": 5.709403507552767e-05, "loss": 1.2644, "step": 4813 }, { "epoch": 0.7359449646474298, "grad_norm": 1.265625, "learning_rate": 5.709000271133453e-05, "loss": 1.1465, "step": 4814 }, { "epoch": 0.7360978406267915, "grad_norm": 1.3671875, "learning_rate": 5.708597043265715e-05, "loss": 1.3556, "step": 4815 }, { "epoch": 0.7362507166061533, "grad_norm": 1.4453125, "learning_rate": 5.7081938239503564e-05, "loss": 1.3744, "step": 4816 }, { "epoch": 0.736403592585515, "grad_norm": 1.390625, "learning_rate": 5.707790613188178e-05, "loss": 1.2327, "step": 4817 }, { "epoch": 0.7365564685648768, "grad_norm": 1.4609375, "learning_rate": 5.707387410979981e-05, "loss": 1.4156, "step": 4818 }, { "epoch": 0.7367093445442385, "grad_norm": 1.453125, "learning_rate": 5.706984217326579e-05, "loss": 1.4572, "step": 4819 }, { "epoch": 0.7368622205236002, "grad_norm": 1.53125, "learning_rate": 5.706581032228771e-05, "loss": 1.2782, "step": 4820 }, { "epoch": 0.7370150965029619, "grad_norm": 1.328125, "learning_rate": 5.7061778556873534e-05, "loss": 1.2709, "step": 4821 }, { "epoch": 0.7371679724823237, "grad_norm": 1.375, "learning_rate": 5.7057746877031385e-05, "loss": 1.4768, "step": 4822 }, { "epoch": 0.7373208484616854, "grad_norm": 1.3515625, "learning_rate": 5.705371528276926e-05, "loss": 1.2118, "step": 4823 }, { "epoch": 0.7374737244410472, "grad_norm": 1.4296875, "learning_rate": 5.704968377409521e-05, "loss": 1.2959, "step": 4824 }, { "epoch": 0.737626600420409, "grad_norm": 1.265625, "learning_rate": 5.704565235101722e-05, "loss": 1.3708, "step": 4825 }, { "epoch": 0.7377794763997707, "grad_norm": 1.421875, "learning_rate": 5.7041621013543444e-05, "loss": 1.4624, "step": 4826 }, { "epoch": 0.7379323523791325, "grad_norm": 1.3828125, "learning_rate": 5.703758976168181e-05, "loss": 1.3565, "step": 4827 }, { "epoch": 0.7380852283584942, "grad_norm": 1.296875, "learning_rate": 5.703355859544033e-05, "loss": 1.4914, "step": 4828 }, { "epoch": 0.7382381043378559, "grad_norm": 1.3515625, "learning_rate": 5.702952751482711e-05, "loss": 1.4853, "step": 4829 }, { "epoch": 0.7383909803172176, "grad_norm": 1.15625, "learning_rate": 5.702549651985019e-05, "loss": 0.9188, "step": 4830 }, { "epoch": 0.7385438562965794, "grad_norm": 1.3984375, "learning_rate": 5.7021465610517544e-05, "loss": 1.4928, "step": 4831 }, { "epoch": 0.7386967322759411, "grad_norm": 1.390625, "learning_rate": 5.701743478683726e-05, "loss": 1.5516, "step": 4832 }, { "epoch": 0.7388496082553029, "grad_norm": 1.21875, "learning_rate": 5.701340404881732e-05, "loss": 1.1345, "step": 4833 }, { "epoch": 0.7390024842346646, "grad_norm": 1.4296875, "learning_rate": 5.70093733964658e-05, "loss": 1.3773, "step": 4834 }, { "epoch": 0.7391553602140264, "grad_norm": 1.2578125, "learning_rate": 5.700534282979067e-05, "loss": 1.4047, "step": 4835 }, { "epoch": 0.7393082361933881, "grad_norm": 1.1640625, "learning_rate": 5.7001312348800096e-05, "loss": 1.0897, "step": 4836 }, { "epoch": 0.7394611121727499, "grad_norm": 1.28125, "learning_rate": 5.699728195350194e-05, "loss": 1.1422, "step": 4837 }, { "epoch": 0.7396139881521115, "grad_norm": 1.3203125, "learning_rate": 5.6993251643904344e-05, "loss": 1.2144, "step": 4838 }, { "epoch": 0.7397668641314733, "grad_norm": 1.484375, "learning_rate": 5.6989221420015325e-05, "loss": 1.3331, "step": 4839 }, { "epoch": 0.7399197401108351, "grad_norm": 1.40625, "learning_rate": 5.6985191281842895e-05, "loss": 1.3919, "step": 4840 }, { "epoch": 0.7400726160901968, "grad_norm": 1.4375, "learning_rate": 5.69811612293951e-05, "loss": 1.351, "step": 4841 }, { "epoch": 0.7402254920695586, "grad_norm": 1.3671875, "learning_rate": 5.697713126267996e-05, "loss": 1.4577, "step": 4842 }, { "epoch": 0.7403783680489203, "grad_norm": 1.375, "learning_rate": 5.697310138170552e-05, "loss": 1.2823, "step": 4843 }, { "epoch": 0.7405312440282821, "grad_norm": 1.375, "learning_rate": 5.696907158647975e-05, "loss": 1.4335, "step": 4844 }, { "epoch": 0.7406841200076438, "grad_norm": 1.3515625, "learning_rate": 5.6965041877010825e-05, "loss": 1.47, "step": 4845 }, { "epoch": 0.7408369959870056, "grad_norm": 1.75, "learning_rate": 5.696101225330663e-05, "loss": 1.4211, "step": 4846 }, { "epoch": 0.7409898719663672, "grad_norm": 1.3984375, "learning_rate": 5.695698271537523e-05, "loss": 1.1838, "step": 4847 }, { "epoch": 0.741142747945729, "grad_norm": 1.3203125, "learning_rate": 5.69529532632247e-05, "loss": 1.4282, "step": 4848 }, { "epoch": 0.7412956239250907, "grad_norm": 1.4296875, "learning_rate": 5.694892389686306e-05, "loss": 1.4078, "step": 4849 }, { "epoch": 0.7414484999044525, "grad_norm": 1.453125, "learning_rate": 5.694489461629831e-05, "loss": 1.3898, "step": 4850 }, { "epoch": 0.7416013758838143, "grad_norm": 1.296875, "learning_rate": 5.694086542153846e-05, "loss": 1.3427, "step": 4851 }, { "epoch": 0.741754251863176, "grad_norm": 1.1875, "learning_rate": 5.693683631259166e-05, "loss": 1.1382, "step": 4852 }, { "epoch": 0.7419071278425378, "grad_norm": 1.5078125, "learning_rate": 5.6932807289465816e-05, "loss": 1.6109, "step": 4853 }, { "epoch": 0.7420600038218995, "grad_norm": 1.34375, "learning_rate": 5.6928778352168946e-05, "loss": 1.3482, "step": 4854 }, { "epoch": 0.7422128798012613, "grad_norm": 1.3671875, "learning_rate": 5.692474950070922e-05, "loss": 1.3474, "step": 4855 }, { "epoch": 0.7423657557806229, "grad_norm": 1.5, "learning_rate": 5.692072073509449e-05, "loss": 1.3447, "step": 4856 }, { "epoch": 0.7425186317599847, "grad_norm": 1.5859375, "learning_rate": 5.6916692055332923e-05, "loss": 1.47, "step": 4857 }, { "epoch": 0.7426715077393464, "grad_norm": 1.28125, "learning_rate": 5.6912663461432494e-05, "loss": 1.1868, "step": 4858 }, { "epoch": 0.7428243837187082, "grad_norm": 1.609375, "learning_rate": 5.690863495340123e-05, "loss": 1.3705, "step": 4859 }, { "epoch": 0.7429772596980699, "grad_norm": 1.5703125, "learning_rate": 5.690460653124716e-05, "loss": 1.4529, "step": 4860 }, { "epoch": 0.7431301356774317, "grad_norm": 1.53125, "learning_rate": 5.6900578194978326e-05, "loss": 1.2794, "step": 4861 }, { "epoch": 0.7432830116567934, "grad_norm": 1.34375, "learning_rate": 5.689654994460274e-05, "loss": 1.3304, "step": 4862 }, { "epoch": 0.7434358876361552, "grad_norm": 1.2734375, "learning_rate": 5.6892521780128405e-05, "loss": 1.4197, "step": 4863 }, { "epoch": 0.743588763615517, "grad_norm": 1.53125, "learning_rate": 5.688849370156342e-05, "loss": 1.3962, "step": 4864 }, { "epoch": 0.7437416395948786, "grad_norm": 1.4375, "learning_rate": 5.688446570891576e-05, "loss": 1.4326, "step": 4865 }, { "epoch": 0.7438945155742404, "grad_norm": 1.46875, "learning_rate": 5.6880437802193476e-05, "loss": 1.4643, "step": 4866 }, { "epoch": 0.7440473915536021, "grad_norm": 1.578125, "learning_rate": 5.6876409981404575e-05, "loss": 1.4621, "step": 4867 }, { "epoch": 0.7442002675329639, "grad_norm": 1.328125, "learning_rate": 5.68723822465571e-05, "loss": 1.2955, "step": 4868 }, { "epoch": 0.7443531435123256, "grad_norm": 1.1640625, "learning_rate": 5.686835459765907e-05, "loss": 1.1275, "step": 4869 }, { "epoch": 0.7445060194916874, "grad_norm": 1.4296875, "learning_rate": 5.686432703471847e-05, "loss": 1.3038, "step": 4870 }, { "epoch": 0.7446588954710491, "grad_norm": 1.40625, "learning_rate": 5.686029955774346e-05, "loss": 1.4885, "step": 4871 }, { "epoch": 0.7448117714504109, "grad_norm": 1.4765625, "learning_rate": 5.6856272166741895e-05, "loss": 1.681, "step": 4872 }, { "epoch": 0.7449646474297726, "grad_norm": 1.4140625, "learning_rate": 5.685224486172191e-05, "loss": 1.294, "step": 4873 }, { "epoch": 0.7451175234091343, "grad_norm": 1.328125, "learning_rate": 5.684821764269155e-05, "loss": 1.2509, "step": 4874 }, { "epoch": 0.745270399388496, "grad_norm": 1.3515625, "learning_rate": 5.684419050965872e-05, "loss": 1.3386, "step": 4875 }, { "epoch": 0.7454232753678578, "grad_norm": 1.296875, "learning_rate": 5.684016346263156e-05, "loss": 1.3727, "step": 4876 }, { "epoch": 0.7455761513472196, "grad_norm": 1.453125, "learning_rate": 5.6836136501618056e-05, "loss": 1.6231, "step": 4877 }, { "epoch": 0.7457290273265813, "grad_norm": 1.3125, "learning_rate": 5.683210962662622e-05, "loss": 1.4139, "step": 4878 }, { "epoch": 0.7458819033059431, "grad_norm": 1.453125, "learning_rate": 5.6828082837664095e-05, "loss": 1.6113, "step": 4879 }, { "epoch": 0.7460347792853048, "grad_norm": 1.390625, "learning_rate": 5.682405613473967e-05, "loss": 1.4058, "step": 4880 }, { "epoch": 0.7461876552646666, "grad_norm": 1.484375, "learning_rate": 5.682002951786107e-05, "loss": 1.4555, "step": 4881 }, { "epoch": 0.7463405312440283, "grad_norm": 1.234375, "learning_rate": 5.6816002987036175e-05, "loss": 1.2503, "step": 4882 }, { "epoch": 0.74649340722339, "grad_norm": 1.265625, "learning_rate": 5.681197654227313e-05, "loss": 1.1073, "step": 4883 }, { "epoch": 0.7466462832027517, "grad_norm": 1.421875, "learning_rate": 5.6807950183579916e-05, "loss": 1.4935, "step": 4884 }, { "epoch": 0.7467991591821135, "grad_norm": 1.484375, "learning_rate": 5.680392391096454e-05, "loss": 1.6847, "step": 4885 }, { "epoch": 0.7469520351614752, "grad_norm": 1.3515625, "learning_rate": 5.6799897724435055e-05, "loss": 1.3593, "step": 4886 }, { "epoch": 0.747104911140837, "grad_norm": 1.28125, "learning_rate": 5.679587162399947e-05, "loss": 1.3532, "step": 4887 }, { "epoch": 0.7472577871201987, "grad_norm": 1.3671875, "learning_rate": 5.679184560966582e-05, "loss": 1.4669, "step": 4888 }, { "epoch": 0.7474106630995605, "grad_norm": 1.375, "learning_rate": 5.678781968144207e-05, "loss": 1.3026, "step": 4889 }, { "epoch": 0.7475635390789223, "grad_norm": 1.3515625, "learning_rate": 5.678379383933636e-05, "loss": 1.2697, "step": 4890 }, { "epoch": 0.747716415058284, "grad_norm": 1.375, "learning_rate": 5.6779768083356574e-05, "loss": 1.5828, "step": 4891 }, { "epoch": 0.7478692910376457, "grad_norm": 1.359375, "learning_rate": 5.677574241351085e-05, "loss": 1.5121, "step": 4892 }, { "epoch": 0.7480221670170074, "grad_norm": 1.46875, "learning_rate": 5.6771716829807165e-05, "loss": 1.4702, "step": 4893 }, { "epoch": 0.7481750429963692, "grad_norm": 1.3359375, "learning_rate": 5.676769133225355e-05, "loss": 1.3624, "step": 4894 }, { "epoch": 0.7483279189757309, "grad_norm": 1.375, "learning_rate": 5.6763665920858e-05, "loss": 1.3132, "step": 4895 }, { "epoch": 0.7484807949550927, "grad_norm": 1.2265625, "learning_rate": 5.675964059562857e-05, "loss": 1.3221, "step": 4896 }, { "epoch": 0.7486336709344544, "grad_norm": 1.3671875, "learning_rate": 5.6755615356573275e-05, "loss": 1.5445, "step": 4897 }, { "epoch": 0.7487865469138162, "grad_norm": 1.4765625, "learning_rate": 5.675159020370009e-05, "loss": 1.574, "step": 4898 }, { "epoch": 0.7489394228931779, "grad_norm": 1.5703125, "learning_rate": 5.674756513701714e-05, "loss": 1.3425, "step": 4899 }, { "epoch": 0.7490922988725397, "grad_norm": 1.34375, "learning_rate": 5.674354015653239e-05, "loss": 1.1699, "step": 4900 }, { "epoch": 0.7492451748519013, "grad_norm": 1.5546875, "learning_rate": 5.6739515262253796e-05, "loss": 1.3032, "step": 4901 }, { "epoch": 0.7493980508312631, "grad_norm": 1.296875, "learning_rate": 5.6735490454189466e-05, "loss": 1.2471, "step": 4902 }, { "epoch": 0.7495509268106249, "grad_norm": 1.4296875, "learning_rate": 5.6731465732347425e-05, "loss": 1.4975, "step": 4903 }, { "epoch": 0.7497038027899866, "grad_norm": 1.9921875, "learning_rate": 5.6727441096735635e-05, "loss": 1.444, "step": 4904 }, { "epoch": 0.7498566787693484, "grad_norm": 1.3671875, "learning_rate": 5.672341654736212e-05, "loss": 1.1263, "step": 4905 }, { "epoch": 0.7500095547487101, "grad_norm": 1.3984375, "learning_rate": 5.6719392084235e-05, "loss": 1.3982, "step": 4906 }, { "epoch": 0.7501624307280719, "grad_norm": 1.375, "learning_rate": 5.671536770736219e-05, "loss": 1.2054, "step": 4907 }, { "epoch": 0.7503153067074336, "grad_norm": 1.421875, "learning_rate": 5.6711343416751694e-05, "loss": 1.3455, "step": 4908 }, { "epoch": 0.7504681826867954, "grad_norm": 1.2578125, "learning_rate": 5.6707319212411615e-05, "loss": 1.383, "step": 4909 }, { "epoch": 0.750621058666157, "grad_norm": 1.2578125, "learning_rate": 5.670329509434995e-05, "loss": 1.3592, "step": 4910 }, { "epoch": 0.7507739346455188, "grad_norm": 1.453125, "learning_rate": 5.669927106257471e-05, "loss": 1.6832, "step": 4911 }, { "epoch": 0.7509268106248805, "grad_norm": 1.421875, "learning_rate": 5.669524711709391e-05, "loss": 1.3622, "step": 4912 }, { "epoch": 0.7510796866042423, "grad_norm": 1.3828125, "learning_rate": 5.669122325791556e-05, "loss": 1.5121, "step": 4913 }, { "epoch": 0.751232562583604, "grad_norm": 1.3359375, "learning_rate": 5.668719948504769e-05, "loss": 1.2989, "step": 4914 }, { "epoch": 0.7513854385629658, "grad_norm": 1.3359375, "learning_rate": 5.668317579849829e-05, "loss": 1.3086, "step": 4915 }, { "epoch": 0.7515383145423276, "grad_norm": 1.4765625, "learning_rate": 5.6679152198275485e-05, "loss": 1.4386, "step": 4916 }, { "epoch": 0.7516911905216893, "grad_norm": 1.4453125, "learning_rate": 5.6675128684387136e-05, "loss": 1.5687, "step": 4917 }, { "epoch": 0.7518440665010511, "grad_norm": 1.2265625, "learning_rate": 5.667110525684138e-05, "loss": 1.4212, "step": 4918 }, { "epoch": 0.7519969424804127, "grad_norm": 1.1875, "learning_rate": 5.6667081915646234e-05, "loss": 1.1881, "step": 4919 }, { "epoch": 0.7521498184597745, "grad_norm": 1.2265625, "learning_rate": 5.666305866080961e-05, "loss": 1.1616, "step": 4920 }, { "epoch": 0.7523026944391362, "grad_norm": 1.4296875, "learning_rate": 5.665903549233962e-05, "loss": 1.4536, "step": 4921 }, { "epoch": 0.752455570418498, "grad_norm": 1.2890625, "learning_rate": 5.6655012410244265e-05, "loss": 1.1983, "step": 4922 }, { "epoch": 0.7526084463978597, "grad_norm": 1.3828125, "learning_rate": 5.6650989414531555e-05, "loss": 1.266, "step": 4923 }, { "epoch": 0.7527613223772215, "grad_norm": 1.359375, "learning_rate": 5.664696650520946e-05, "loss": 1.3466, "step": 4924 }, { "epoch": 0.7529141983565832, "grad_norm": 1.3984375, "learning_rate": 5.6642943682286154e-05, "loss": 1.5906, "step": 4925 }, { "epoch": 0.753067074335945, "grad_norm": 1.40625, "learning_rate": 5.663892094576948e-05, "loss": 1.3605, "step": 4926 }, { "epoch": 0.7532199503153068, "grad_norm": 1.40625, "learning_rate": 5.6634898295667486e-05, "loss": 1.3464, "step": 4927 }, { "epoch": 0.7533728262946684, "grad_norm": 1.296875, "learning_rate": 5.663087573198825e-05, "loss": 1.3295, "step": 4928 }, { "epoch": 0.7535257022740302, "grad_norm": 1.4921875, "learning_rate": 5.6626853254739775e-05, "loss": 1.4456, "step": 4929 }, { "epoch": 0.7536785782533919, "grad_norm": 1.609375, "learning_rate": 5.662283086393005e-05, "loss": 1.5813, "step": 4930 }, { "epoch": 0.7538314542327537, "grad_norm": 1.4453125, "learning_rate": 5.6618808559567084e-05, "loss": 1.4544, "step": 4931 }, { "epoch": 0.7539843302121154, "grad_norm": 1.34375, "learning_rate": 5.6614786341658986e-05, "loss": 1.2736, "step": 4932 }, { "epoch": 0.7541372061914772, "grad_norm": 1.34375, "learning_rate": 5.6610764210213654e-05, "loss": 1.2514, "step": 4933 }, { "epoch": 0.7542900821708389, "grad_norm": 1.515625, "learning_rate": 5.6606742165239116e-05, "loss": 1.3478, "step": 4934 }, { "epoch": 0.7544429581502007, "grad_norm": 1.4609375, "learning_rate": 5.660272020674351e-05, "loss": 1.4702, "step": 4935 }, { "epoch": 0.7545958341295624, "grad_norm": 1.390625, "learning_rate": 5.659869833473466e-05, "loss": 1.5334, "step": 4936 }, { "epoch": 0.7547487101089241, "grad_norm": 1.25, "learning_rate": 5.659467654922074e-05, "loss": 1.1606, "step": 4937 }, { "epoch": 0.7549015860882858, "grad_norm": 1.3828125, "learning_rate": 5.65906548502097e-05, "loss": 1.4673, "step": 4938 }, { "epoch": 0.7550544620676476, "grad_norm": 1.3046875, "learning_rate": 5.658663323770957e-05, "loss": 1.325, "step": 4939 }, { "epoch": 0.7552073380470093, "grad_norm": 1.28125, "learning_rate": 5.6582611711728364e-05, "loss": 1.3405, "step": 4940 }, { "epoch": 0.7553602140263711, "grad_norm": 1.3984375, "learning_rate": 5.657859027227408e-05, "loss": 1.3336, "step": 4941 }, { "epoch": 0.7555130900057329, "grad_norm": 1.828125, "learning_rate": 5.657456891935475e-05, "loss": 1.4573, "step": 4942 }, { "epoch": 0.7556659659850946, "grad_norm": 1.328125, "learning_rate": 5.6570547652978334e-05, "loss": 1.2901, "step": 4943 }, { "epoch": 0.7558188419644564, "grad_norm": 1.34375, "learning_rate": 5.6566526473152935e-05, "loss": 1.3118, "step": 4944 }, { "epoch": 0.7559717179438181, "grad_norm": 1.6171875, "learning_rate": 5.6562505379886565e-05, "loss": 1.4036, "step": 4945 }, { "epoch": 0.7561245939231798, "grad_norm": 1.2890625, "learning_rate": 5.6558484373187125e-05, "loss": 1.3475, "step": 4946 }, { "epoch": 0.7562774699025415, "grad_norm": 1.3203125, "learning_rate": 5.6554463453062745e-05, "loss": 1.1345, "step": 4947 }, { "epoch": 0.7564303458819033, "grad_norm": 1.3515625, "learning_rate": 5.655044261952138e-05, "loss": 1.2635, "step": 4948 }, { "epoch": 0.756583221861265, "grad_norm": 1.4609375, "learning_rate": 5.654642187257107e-05, "loss": 1.8115, "step": 4949 }, { "epoch": 0.7567360978406268, "grad_norm": 1.2734375, "learning_rate": 5.6542401212219774e-05, "loss": 1.4515, "step": 4950 }, { "epoch": 0.7568889738199885, "grad_norm": 2.1875, "learning_rate": 5.653838063847563e-05, "loss": 1.5088, "step": 4951 }, { "epoch": 0.7570418497993503, "grad_norm": 1.3046875, "learning_rate": 5.653436015134652e-05, "loss": 1.3203, "step": 4952 }, { "epoch": 0.757194725778712, "grad_norm": 1.328125, "learning_rate": 5.653033975084047e-05, "loss": 1.2493, "step": 4953 }, { "epoch": 0.7573476017580738, "grad_norm": 1.578125, "learning_rate": 5.652631943696556e-05, "loss": 1.5092, "step": 4954 }, { "epoch": 0.7575004777374355, "grad_norm": 1.484375, "learning_rate": 5.6522299209729766e-05, "loss": 1.4298, "step": 4955 }, { "epoch": 0.7576533537167972, "grad_norm": 1.40625, "learning_rate": 5.651827906914111e-05, "loss": 1.2659, "step": 4956 }, { "epoch": 0.757806229696159, "grad_norm": 1.2890625, "learning_rate": 5.651425901520759e-05, "loss": 1.22, "step": 4957 }, { "epoch": 0.7579591056755207, "grad_norm": 1.4765625, "learning_rate": 5.651023904793723e-05, "loss": 1.5434, "step": 4958 }, { "epoch": 0.7581119816548825, "grad_norm": 1.25, "learning_rate": 5.650621916733802e-05, "loss": 1.1819, "step": 4959 }, { "epoch": 0.7582648576342442, "grad_norm": 1.515625, "learning_rate": 5.650219937341797e-05, "loss": 1.2793, "step": 4960 }, { "epoch": 0.758417733613606, "grad_norm": 1.3671875, "learning_rate": 5.649817966618518e-05, "loss": 1.6229, "step": 4961 }, { "epoch": 0.7585706095929677, "grad_norm": 1.3828125, "learning_rate": 5.64941600456475e-05, "loss": 1.428, "step": 4962 }, { "epoch": 0.7587234855723295, "grad_norm": 1.40625, "learning_rate": 5.649014051181307e-05, "loss": 1.4536, "step": 4963 }, { "epoch": 0.7588763615516911, "grad_norm": 1.390625, "learning_rate": 5.648612106468987e-05, "loss": 1.2944, "step": 4964 }, { "epoch": 0.7590292375310529, "grad_norm": 1.46875, "learning_rate": 5.6482101704285895e-05, "loss": 1.3848, "step": 4965 }, { "epoch": 0.7591821135104146, "grad_norm": 1.3359375, "learning_rate": 5.647808243060917e-05, "loss": 1.4193, "step": 4966 }, { "epoch": 0.7593349894897764, "grad_norm": 1.2890625, "learning_rate": 5.6474063243667676e-05, "loss": 1.3026, "step": 4967 }, { "epoch": 0.7594878654691382, "grad_norm": 1.3984375, "learning_rate": 5.647004414346946e-05, "loss": 1.2835, "step": 4968 }, { "epoch": 0.7596407414484999, "grad_norm": 1.2890625, "learning_rate": 5.646602513002246e-05, "loss": 1.5402, "step": 4969 }, { "epoch": 0.7597936174278617, "grad_norm": 1.59375, "learning_rate": 5.646200620333482e-05, "loss": 1.5071, "step": 4970 }, { "epoch": 0.7599464934072234, "grad_norm": 1.3046875, "learning_rate": 5.6457987363414436e-05, "loss": 1.228, "step": 4971 }, { "epoch": 0.7600993693865852, "grad_norm": 1.7734375, "learning_rate": 5.6453968610269306e-05, "loss": 1.6229, "step": 4972 }, { "epoch": 0.7602522453659468, "grad_norm": 1.4453125, "learning_rate": 5.644994994390753e-05, "loss": 1.7444, "step": 4973 }, { "epoch": 0.7604051213453086, "grad_norm": 1.5390625, "learning_rate": 5.644593136433706e-05, "loss": 1.4491, "step": 4974 }, { "epoch": 0.7605579973246703, "grad_norm": 1.4140625, "learning_rate": 5.6441912871565913e-05, "loss": 1.4384, "step": 4975 }, { "epoch": 0.7607108733040321, "grad_norm": 1.3515625, "learning_rate": 5.643789446560207e-05, "loss": 1.3746, "step": 4976 }, { "epoch": 0.7608637492833938, "grad_norm": 1.328125, "learning_rate": 5.6433876146453646e-05, "loss": 1.3039, "step": 4977 }, { "epoch": 0.7610166252627556, "grad_norm": 1.296875, "learning_rate": 5.642985791412852e-05, "loss": 1.2817, "step": 4978 }, { "epoch": 0.7611695012421174, "grad_norm": 1.328125, "learning_rate": 5.6425839768634726e-05, "loss": 1.3083, "step": 4979 }, { "epoch": 0.7613223772214791, "grad_norm": 1.484375, "learning_rate": 5.6421821709980374e-05, "loss": 1.5497, "step": 4980 }, { "epoch": 0.7614752532008409, "grad_norm": 1.3984375, "learning_rate": 5.641780373817331e-05, "loss": 1.1817, "step": 4981 }, { "epoch": 0.7616281291802025, "grad_norm": 1.5234375, "learning_rate": 5.641378585322168e-05, "loss": 1.4099, "step": 4982 }, { "epoch": 0.7617810051595643, "grad_norm": 1.296875, "learning_rate": 5.640976805513342e-05, "loss": 1.2089, "step": 4983 }, { "epoch": 0.761933881138926, "grad_norm": 1.375, "learning_rate": 5.640575034391655e-05, "loss": 1.4304, "step": 4984 }, { "epoch": 0.7620867571182878, "grad_norm": 1.484375, "learning_rate": 5.640173271957911e-05, "loss": 1.396, "step": 4985 }, { "epoch": 0.7622396330976495, "grad_norm": 1.4296875, "learning_rate": 5.639771518212906e-05, "loss": 1.5075, "step": 4986 }, { "epoch": 0.7623925090770113, "grad_norm": 1.390625, "learning_rate": 5.639369773157442e-05, "loss": 1.3274, "step": 4987 }, { "epoch": 0.762545385056373, "grad_norm": 1.3671875, "learning_rate": 5.638968036792317e-05, "loss": 1.239, "step": 4988 }, { "epoch": 0.7626982610357348, "grad_norm": 1.578125, "learning_rate": 5.6385663091183385e-05, "loss": 1.5581, "step": 4989 }, { "epoch": 0.7628511370150965, "grad_norm": 1.296875, "learning_rate": 5.6381645901363035e-05, "loss": 1.25, "step": 4990 }, { "epoch": 0.7630040129944582, "grad_norm": 1.3671875, "learning_rate": 5.6377628798470126e-05, "loss": 1.4192, "step": 4991 }, { "epoch": 0.76315688897382, "grad_norm": 1.3828125, "learning_rate": 5.637361178251266e-05, "loss": 1.4323, "step": 4992 }, { "epoch": 0.7633097649531817, "grad_norm": 1.546875, "learning_rate": 5.636959485349864e-05, "loss": 1.3619, "step": 4993 }, { "epoch": 0.7634626409325435, "grad_norm": 1.3828125, "learning_rate": 5.636557801143609e-05, "loss": 1.3963, "step": 4994 }, { "epoch": 0.7636155169119052, "grad_norm": 1.3515625, "learning_rate": 5.636156125633295e-05, "loss": 1.2759, "step": 4995 }, { "epoch": 0.763768392891267, "grad_norm": 1.359375, "learning_rate": 5.635754458819736e-05, "loss": 1.2592, "step": 4996 }, { "epoch": 0.7639212688706287, "grad_norm": 1.40625, "learning_rate": 5.635352800703717e-05, "loss": 1.4096, "step": 4997 }, { "epoch": 0.7640741448499905, "grad_norm": 1.3125, "learning_rate": 5.6349511512860475e-05, "loss": 1.2286, "step": 4998 }, { "epoch": 0.7642270208293522, "grad_norm": 1.296875, "learning_rate": 5.6345495105675327e-05, "loss": 1.3834, "step": 4999 }, { "epoch": 0.7643798968087139, "grad_norm": 1.53125, "learning_rate": 5.634147878548957e-05, "loss": 1.3593, "step": 5000 }, { "epoch": 0.7643798968087139, "eval_loss": 1.3599892854690552, "eval_model_preparation_time": 0.0034, "eval_runtime": 111.703, "eval_samples_per_second": 89.523, "eval_steps_per_second": 2.802, "step": 5000 }, { "epoch": 0.7645327727880756, "grad_norm": 1.375, "learning_rate": 5.633746255231134e-05, "loss": 1.192, "step": 5001 }, { "epoch": 0.7646856487674374, "grad_norm": 1.421875, "learning_rate": 5.63334464061486e-05, "loss": 1.3749, "step": 5002 }, { "epoch": 0.7648385247467991, "grad_norm": 1.6640625, "learning_rate": 5.632943034700937e-05, "loss": 1.6412, "step": 5003 }, { "epoch": 0.7649914007261609, "grad_norm": 1.3203125, "learning_rate": 5.6325414374901634e-05, "loss": 1.2578, "step": 5004 }, { "epoch": 0.7651442767055227, "grad_norm": 1.3125, "learning_rate": 5.632139848983335e-05, "loss": 1.4424, "step": 5005 }, { "epoch": 0.7652971526848844, "grad_norm": 1.34375, "learning_rate": 5.631738269181267e-05, "loss": 1.5274, "step": 5006 }, { "epoch": 0.7654500286642462, "grad_norm": 1.1484375, "learning_rate": 5.631336698084742e-05, "loss": 1.0652, "step": 5007 }, { "epoch": 0.7656029046436079, "grad_norm": 1.328125, "learning_rate": 5.6309351356945714e-05, "loss": 1.2083, "step": 5008 }, { "epoch": 0.7657557806229696, "grad_norm": 1.3203125, "learning_rate": 5.630533582011552e-05, "loss": 1.2492, "step": 5009 }, { "epoch": 0.7659086566023313, "grad_norm": 1.484375, "learning_rate": 5.630132037036486e-05, "loss": 1.3849, "step": 5010 }, { "epoch": 0.7660615325816931, "grad_norm": 1.390625, "learning_rate": 5.629730500770172e-05, "loss": 1.4198, "step": 5011 }, { "epoch": 0.7662144085610548, "grad_norm": 1.515625, "learning_rate": 5.62932897321341e-05, "loss": 1.4612, "step": 5012 }, { "epoch": 0.7663672845404166, "grad_norm": 1.484375, "learning_rate": 5.628927454367e-05, "loss": 1.4274, "step": 5013 }, { "epoch": 0.7665201605197783, "grad_norm": 1.4375, "learning_rate": 5.6285259442317396e-05, "loss": 1.3163, "step": 5014 }, { "epoch": 0.7666730364991401, "grad_norm": 1.4296875, "learning_rate": 5.628124442808439e-05, "loss": 1.1083, "step": 5015 }, { "epoch": 0.7668259124785018, "grad_norm": 1.4765625, "learning_rate": 5.6277229500978846e-05, "loss": 1.2104, "step": 5016 }, { "epoch": 0.7669787884578636, "grad_norm": 1.4453125, "learning_rate": 5.6273214661008866e-05, "loss": 1.1781, "step": 5017 }, { "epoch": 0.7671316644372252, "grad_norm": 1.4375, "learning_rate": 5.626919990818242e-05, "loss": 1.3006, "step": 5018 }, { "epoch": 0.767284540416587, "grad_norm": 1.390625, "learning_rate": 5.6265185242507515e-05, "loss": 1.3255, "step": 5019 }, { "epoch": 0.7674374163959488, "grad_norm": 1.3984375, "learning_rate": 5.626117066399214e-05, "loss": 1.4594, "step": 5020 }, { "epoch": 0.7675902923753105, "grad_norm": 1.4453125, "learning_rate": 5.625715617264431e-05, "loss": 1.4859, "step": 5021 }, { "epoch": 0.7677431683546723, "grad_norm": 1.2890625, "learning_rate": 5.625314176847201e-05, "loss": 1.2889, "step": 5022 }, { "epoch": 0.767896044334034, "grad_norm": 1.4765625, "learning_rate": 5.6249127451483195e-05, "loss": 1.4851, "step": 5023 }, { "epoch": 0.7680489203133958, "grad_norm": 1.390625, "learning_rate": 5.6245113221685975e-05, "loss": 1.3723, "step": 5024 }, { "epoch": 0.7682017962927575, "grad_norm": 1.3515625, "learning_rate": 5.624109907908832e-05, "loss": 1.415, "step": 5025 }, { "epoch": 0.7683546722721193, "grad_norm": 1.40625, "learning_rate": 5.6237085023698135e-05, "loss": 1.4167, "step": 5026 }, { "epoch": 0.7685075482514809, "grad_norm": 1.359375, "learning_rate": 5.6233071055523514e-05, "loss": 1.576, "step": 5027 }, { "epoch": 0.7686604242308427, "grad_norm": 1.359375, "learning_rate": 5.622905717457243e-05, "loss": 1.5775, "step": 5028 }, { "epoch": 0.7688133002102044, "grad_norm": 1.296875, "learning_rate": 5.622504338085288e-05, "loss": 1.3672, "step": 5029 }, { "epoch": 0.7689661761895662, "grad_norm": 1.1796875, "learning_rate": 5.6221029674372825e-05, "loss": 1.1404, "step": 5030 }, { "epoch": 0.769119052168928, "grad_norm": 1.453125, "learning_rate": 5.621701605514039e-05, "loss": 1.3033, "step": 5031 }, { "epoch": 0.7692719281482897, "grad_norm": 1.4140625, "learning_rate": 5.621300252316344e-05, "loss": 1.4552, "step": 5032 }, { "epoch": 0.7694248041276515, "grad_norm": 1.4140625, "learning_rate": 5.620898907844999e-05, "loss": 1.4407, "step": 5033 }, { "epoch": 0.7695776801070132, "grad_norm": 1.3515625, "learning_rate": 5.62049757210081e-05, "loss": 1.316, "step": 5034 }, { "epoch": 0.769730556086375, "grad_norm": 1.359375, "learning_rate": 5.620096245084574e-05, "loss": 1.3617, "step": 5035 }, { "epoch": 0.7698834320657366, "grad_norm": 1.3359375, "learning_rate": 5.619694926797091e-05, "loss": 1.4781, "step": 5036 }, { "epoch": 0.7700363080450984, "grad_norm": 1.46875, "learning_rate": 5.619293617239161e-05, "loss": 1.5749, "step": 5037 }, { "epoch": 0.7701891840244601, "grad_norm": 1.296875, "learning_rate": 5.6188923164115815e-05, "loss": 1.3148, "step": 5038 }, { "epoch": 0.7703420600038219, "grad_norm": 1.5078125, "learning_rate": 5.618491024315153e-05, "loss": 1.6704, "step": 5039 }, { "epoch": 0.7704949359831836, "grad_norm": 1.5, "learning_rate": 5.6180897409506746e-05, "loss": 1.3446, "step": 5040 }, { "epoch": 0.7706478119625454, "grad_norm": 1.375, "learning_rate": 5.6176884663189546e-05, "loss": 1.2107, "step": 5041 }, { "epoch": 0.7708006879419071, "grad_norm": 1.5, "learning_rate": 5.617287200420778e-05, "loss": 1.5994, "step": 5042 }, { "epoch": 0.7709535639212689, "grad_norm": 1.6796875, "learning_rate": 5.616885943256955e-05, "loss": 1.4865, "step": 5043 }, { "epoch": 0.7711064399006307, "grad_norm": 1.5625, "learning_rate": 5.616484694828288e-05, "loss": 1.5604, "step": 5044 }, { "epoch": 0.7712593158799923, "grad_norm": 1.3359375, "learning_rate": 5.616083455135561e-05, "loss": 1.2347, "step": 5045 }, { "epoch": 0.7714121918593541, "grad_norm": 1.4609375, "learning_rate": 5.615682224179589e-05, "loss": 1.6121, "step": 5046 }, { "epoch": 0.7715650678387158, "grad_norm": 1.484375, "learning_rate": 5.6152810019611655e-05, "loss": 1.6524, "step": 5047 }, { "epoch": 0.7717179438180776, "grad_norm": 1.265625, "learning_rate": 5.614879788481092e-05, "loss": 1.1569, "step": 5048 }, { "epoch": 0.7718708197974393, "grad_norm": 1.3203125, "learning_rate": 5.6144785837401624e-05, "loss": 1.2716, "step": 5049 }, { "epoch": 0.7720236957768011, "grad_norm": 1.4140625, "learning_rate": 5.614077387739189e-05, "loss": 1.3625, "step": 5050 }, { "epoch": 0.7721765717561628, "grad_norm": 1.484375, "learning_rate": 5.613676200478959e-05, "loss": 1.4648, "step": 5051 }, { "epoch": 0.7723294477355246, "grad_norm": 1.4296875, "learning_rate": 5.6132750219602726e-05, "loss": 1.3926, "step": 5052 }, { "epoch": 0.7724823237148863, "grad_norm": 1.4921875, "learning_rate": 5.612873852183936e-05, "loss": 1.5436, "step": 5053 }, { "epoch": 0.772635199694248, "grad_norm": 1.375, "learning_rate": 5.612472691150746e-05, "loss": 1.4091, "step": 5054 }, { "epoch": 0.7727880756736097, "grad_norm": 1.3046875, "learning_rate": 5.612071538861501e-05, "loss": 1.1438, "step": 5055 }, { "epoch": 0.7729409516529715, "grad_norm": 1.203125, "learning_rate": 5.611670395316998e-05, "loss": 1.0597, "step": 5056 }, { "epoch": 0.7730938276323333, "grad_norm": 1.34375, "learning_rate": 5.611269260518047e-05, "loss": 1.3906, "step": 5057 }, { "epoch": 0.773246703611695, "grad_norm": 1.4765625, "learning_rate": 5.610868134465437e-05, "loss": 1.2608, "step": 5058 }, { "epoch": 0.7733995795910568, "grad_norm": 1.5234375, "learning_rate": 5.610467017159965e-05, "loss": 1.545, "step": 5059 }, { "epoch": 0.7735524555704185, "grad_norm": 1.4453125, "learning_rate": 5.610065908602444e-05, "loss": 1.39, "step": 5060 }, { "epoch": 0.7737053315497803, "grad_norm": 1.359375, "learning_rate": 5.6096648087936574e-05, "loss": 1.2449, "step": 5061 }, { "epoch": 0.773858207529142, "grad_norm": 1.34375, "learning_rate": 5.609263717734416e-05, "loss": 1.4972, "step": 5062 }, { "epoch": 0.7740110835085037, "grad_norm": 1.421875, "learning_rate": 5.608862635425516e-05, "loss": 1.6753, "step": 5063 }, { "epoch": 0.7741639594878654, "grad_norm": 1.1953125, "learning_rate": 5.608461561867755e-05, "loss": 1.4205, "step": 5064 }, { "epoch": 0.7743168354672272, "grad_norm": 1.375, "learning_rate": 5.6080604970619334e-05, "loss": 1.4429, "step": 5065 }, { "epoch": 0.7744697114465889, "grad_norm": 1.3984375, "learning_rate": 5.607659441008852e-05, "loss": 1.3932, "step": 5066 }, { "epoch": 0.7746225874259507, "grad_norm": 1.3125, "learning_rate": 5.607258393709307e-05, "loss": 1.3966, "step": 5067 }, { "epoch": 0.7747754634053124, "grad_norm": 1.3359375, "learning_rate": 5.606857355164097e-05, "loss": 1.3831, "step": 5068 }, { "epoch": 0.7749283393846742, "grad_norm": 1.4296875, "learning_rate": 5.6064563253740257e-05, "loss": 1.2054, "step": 5069 }, { "epoch": 0.775081215364036, "grad_norm": 1.2890625, "learning_rate": 5.606055304339895e-05, "loss": 1.3059, "step": 5070 }, { "epoch": 0.7752340913433977, "grad_norm": 1.484375, "learning_rate": 5.60565429206249e-05, "loss": 1.3592, "step": 5071 }, { "epoch": 0.7753869673227594, "grad_norm": 1.3984375, "learning_rate": 5.605253288542625e-05, "loss": 1.3041, "step": 5072 }, { "epoch": 0.7755398433021211, "grad_norm": 1.1796875, "learning_rate": 5.604852293781091e-05, "loss": 1.0717, "step": 5073 }, { "epoch": 0.7756927192814829, "grad_norm": 1.359375, "learning_rate": 5.604451307778691e-05, "loss": 1.3243, "step": 5074 }, { "epoch": 0.7758455952608446, "grad_norm": 1.359375, "learning_rate": 5.604050330536219e-05, "loss": 1.3748, "step": 5075 }, { "epoch": 0.7759984712402064, "grad_norm": 1.4453125, "learning_rate": 5.603649362054484e-05, "loss": 1.5158, "step": 5076 }, { "epoch": 0.7761513472195681, "grad_norm": 1.359375, "learning_rate": 5.6032484023342756e-05, "loss": 1.2392, "step": 5077 }, { "epoch": 0.7763042231989299, "grad_norm": 1.3515625, "learning_rate": 5.602847451376392e-05, "loss": 1.6141, "step": 5078 }, { "epoch": 0.7764570991782916, "grad_norm": 1.5078125, "learning_rate": 5.60244650918164e-05, "loss": 1.5171, "step": 5079 }, { "epoch": 0.7766099751576534, "grad_norm": 1.375, "learning_rate": 5.602045575750814e-05, "loss": 1.2839, "step": 5080 }, { "epoch": 0.776762851137015, "grad_norm": 1.2734375, "learning_rate": 5.6016446510847157e-05, "loss": 1.217, "step": 5081 }, { "epoch": 0.7769157271163768, "grad_norm": 1.5546875, "learning_rate": 5.601243735184141e-05, "loss": 1.7301, "step": 5082 }, { "epoch": 0.7770686030957386, "grad_norm": 1.3203125, "learning_rate": 5.6008428280498905e-05, "loss": 1.3752, "step": 5083 }, { "epoch": 0.7772214790751003, "grad_norm": 1.421875, "learning_rate": 5.6004419296827627e-05, "loss": 1.5679, "step": 5084 }, { "epoch": 0.7773743550544621, "grad_norm": 1.4296875, "learning_rate": 5.6000410400835534e-05, "loss": 1.458, "step": 5085 }, { "epoch": 0.7775272310338238, "grad_norm": 1.25, "learning_rate": 5.5996401592530725e-05, "loss": 1.4501, "step": 5086 }, { "epoch": 0.7776801070131856, "grad_norm": 1.375, "learning_rate": 5.5992392871921026e-05, "loss": 1.4259, "step": 5087 }, { "epoch": 0.7778329829925473, "grad_norm": 1.3515625, "learning_rate": 5.5988384239014555e-05, "loss": 1.3822, "step": 5088 }, { "epoch": 0.7779858589719091, "grad_norm": 1.2265625, "learning_rate": 5.598437569381927e-05, "loss": 1.1376, "step": 5089 }, { "epoch": 0.7781387349512707, "grad_norm": 1.4375, "learning_rate": 5.5980367236343144e-05, "loss": 1.4493, "step": 5090 }, { "epoch": 0.7782916109306325, "grad_norm": 1.3984375, "learning_rate": 5.597635886659416e-05, "loss": 1.3607, "step": 5091 }, { "epoch": 0.7784444869099942, "grad_norm": 1.3828125, "learning_rate": 5.597235058458033e-05, "loss": 1.4362, "step": 5092 }, { "epoch": 0.778597362889356, "grad_norm": 1.3203125, "learning_rate": 5.5968342390309617e-05, "loss": 1.2822, "step": 5093 }, { "epoch": 0.7787502388687177, "grad_norm": 1.46875, "learning_rate": 5.596433428378999e-05, "loss": 1.483, "step": 5094 }, { "epoch": 0.7789031148480795, "grad_norm": 1.2734375, "learning_rate": 5.596032626502954e-05, "loss": 1.4142, "step": 5095 }, { "epoch": 0.7790559908274413, "grad_norm": 1.1796875, "learning_rate": 5.595631833403614e-05, "loss": 0.9709, "step": 5096 }, { "epoch": 0.779208866806803, "grad_norm": 1.5, "learning_rate": 5.5952310490817785e-05, "loss": 1.4786, "step": 5097 }, { "epoch": 0.7793617427861648, "grad_norm": 1.421875, "learning_rate": 5.594830273538253e-05, "loss": 1.2051, "step": 5098 }, { "epoch": 0.7795146187655264, "grad_norm": 1.53125, "learning_rate": 5.594429506773833e-05, "loss": 1.5098, "step": 5099 }, { "epoch": 0.7796674947448882, "grad_norm": 1.4140625, "learning_rate": 5.5940287487893175e-05, "loss": 1.5416, "step": 5100 }, { "epoch": 0.7798203707242499, "grad_norm": 1.3515625, "learning_rate": 5.5936279995855e-05, "loss": 1.2931, "step": 5101 }, { "epoch": 0.7799732467036117, "grad_norm": 1.328125, "learning_rate": 5.593227259163193e-05, "loss": 1.3606, "step": 5102 }, { "epoch": 0.7801261226829734, "grad_norm": 1.328125, "learning_rate": 5.5928265275231805e-05, "loss": 1.518, "step": 5103 }, { "epoch": 0.7802789986623352, "grad_norm": 1.296875, "learning_rate": 5.592425804666264e-05, "loss": 1.1346, "step": 5104 }, { "epoch": 0.7804318746416969, "grad_norm": 1.171875, "learning_rate": 5.5920250905932526e-05, "loss": 0.9857, "step": 5105 }, { "epoch": 0.7805847506210587, "grad_norm": 1.3671875, "learning_rate": 5.5916243853049276e-05, "loss": 1.7036, "step": 5106 }, { "epoch": 0.7807376266004205, "grad_norm": 1.3203125, "learning_rate": 5.591223688802103e-05, "loss": 1.4266, "step": 5107 }, { "epoch": 0.7808905025797821, "grad_norm": 1.4296875, "learning_rate": 5.59082300108557e-05, "loss": 1.2811, "step": 5108 }, { "epoch": 0.7810433785591439, "grad_norm": 1.1875, "learning_rate": 5.590422322156129e-05, "loss": 1.3192, "step": 5109 }, { "epoch": 0.7811962545385056, "grad_norm": 1.5703125, "learning_rate": 5.590021652014579e-05, "loss": 1.2432, "step": 5110 }, { "epoch": 0.7813491305178674, "grad_norm": 1.40625, "learning_rate": 5.589620990661715e-05, "loss": 1.2213, "step": 5111 }, { "epoch": 0.7815020064972291, "grad_norm": 1.1953125, "learning_rate": 5.58922033809834e-05, "loss": 1.1361, "step": 5112 }, { "epoch": 0.7816548824765909, "grad_norm": 1.34375, "learning_rate": 5.588819694325245e-05, "loss": 1.2646, "step": 5113 }, { "epoch": 0.7818077584559526, "grad_norm": 1.4609375, "learning_rate": 5.5884190593432396e-05, "loss": 1.6588, "step": 5114 }, { "epoch": 0.7819606344353144, "grad_norm": 1.3515625, "learning_rate": 5.588018433153115e-05, "loss": 1.2418, "step": 5115 }, { "epoch": 0.7821135104146761, "grad_norm": 1.5859375, "learning_rate": 5.5876178157556726e-05, "loss": 1.7649, "step": 5116 }, { "epoch": 0.7822663863940378, "grad_norm": 1.375, "learning_rate": 5.587217207151707e-05, "loss": 1.4049, "step": 5117 }, { "epoch": 0.7824192623733995, "grad_norm": 1.390625, "learning_rate": 5.58681660734202e-05, "loss": 1.324, "step": 5118 }, { "epoch": 0.7825721383527613, "grad_norm": 1.40625, "learning_rate": 5.58641601632741e-05, "loss": 1.5287, "step": 5119 }, { "epoch": 0.782725014332123, "grad_norm": 1.359375, "learning_rate": 5.586015434108669e-05, "loss": 1.4171, "step": 5120 }, { "epoch": 0.7828778903114848, "grad_norm": 1.40625, "learning_rate": 5.58561486068661e-05, "loss": 1.377, "step": 5121 }, { "epoch": 0.7830307662908466, "grad_norm": 1.40625, "learning_rate": 5.585214296062012e-05, "loss": 1.3474, "step": 5122 }, { "epoch": 0.7831836422702083, "grad_norm": 1.4765625, "learning_rate": 5.584813740235688e-05, "loss": 1.4299, "step": 5123 }, { "epoch": 0.7833365182495701, "grad_norm": 1.3984375, "learning_rate": 5.584413193208435e-05, "loss": 1.4339, "step": 5124 }, { "epoch": 0.7834893942289318, "grad_norm": 1.328125, "learning_rate": 5.58401265498104e-05, "loss": 1.3797, "step": 5125 }, { "epoch": 0.7836422702082935, "grad_norm": 1.3359375, "learning_rate": 5.583612125554314e-05, "loss": 1.2254, "step": 5126 }, { "epoch": 0.7837951461876552, "grad_norm": 1.2890625, "learning_rate": 5.583211604929048e-05, "loss": 1.2366, "step": 5127 }, { "epoch": 0.783948022167017, "grad_norm": 1.34375, "learning_rate": 5.582811093106044e-05, "loss": 1.4998, "step": 5128 }, { "epoch": 0.7841008981463787, "grad_norm": 1.375, "learning_rate": 5.582410590086097e-05, "loss": 1.3392, "step": 5129 }, { "epoch": 0.7842537741257405, "grad_norm": 1.234375, "learning_rate": 5.582010095870003e-05, "loss": 1.2168, "step": 5130 }, { "epoch": 0.7844066501051022, "grad_norm": 1.328125, "learning_rate": 5.5816096104585724e-05, "loss": 1.4219, "step": 5131 }, { "epoch": 0.784559526084464, "grad_norm": 1.3203125, "learning_rate": 5.581209133852586e-05, "loss": 1.151, "step": 5132 }, { "epoch": 0.7847124020638258, "grad_norm": 1.1796875, "learning_rate": 5.580808666052856e-05, "loss": 1.1125, "step": 5133 }, { "epoch": 0.7848652780431875, "grad_norm": 1.3515625, "learning_rate": 5.5804082070601735e-05, "loss": 1.2383, "step": 5134 }, { "epoch": 0.7850181540225492, "grad_norm": 1.28125, "learning_rate": 5.5800077568753396e-05, "loss": 1.3666, "step": 5135 }, { "epoch": 0.7851710300019109, "grad_norm": 1.4140625, "learning_rate": 5.5796073154991504e-05, "loss": 1.4266, "step": 5136 }, { "epoch": 0.7853239059812727, "grad_norm": 1.3359375, "learning_rate": 5.579206882932404e-05, "loss": 1.3759, "step": 5137 }, { "epoch": 0.7854767819606344, "grad_norm": 1.4375, "learning_rate": 5.5788064591759006e-05, "loss": 1.3759, "step": 5138 }, { "epoch": 0.7856296579399962, "grad_norm": 1.359375, "learning_rate": 5.578406044230431e-05, "loss": 1.4316, "step": 5139 }, { "epoch": 0.7857825339193579, "grad_norm": 1.3671875, "learning_rate": 5.578005638096807e-05, "loss": 1.3607, "step": 5140 }, { "epoch": 0.7859354098987197, "grad_norm": 1.3671875, "learning_rate": 5.577605240775811e-05, "loss": 1.3669, "step": 5141 }, { "epoch": 0.7860882858780814, "grad_norm": 1.1796875, "learning_rate": 5.5772048522682516e-05, "loss": 1.1953, "step": 5142 }, { "epoch": 0.7862411618574432, "grad_norm": 1.4453125, "learning_rate": 5.576804472574923e-05, "loss": 1.4696, "step": 5143 }, { "epoch": 0.7863940378368048, "grad_norm": 1.5078125, "learning_rate": 5.576404101696624e-05, "loss": 1.6794, "step": 5144 }, { "epoch": 0.7865469138161666, "grad_norm": 1.4375, "learning_rate": 5.5760037396341525e-05, "loss": 1.5909, "step": 5145 }, { "epoch": 0.7866997897955283, "grad_norm": 1.453125, "learning_rate": 5.575603386388306e-05, "loss": 1.4159, "step": 5146 }, { "epoch": 0.7868526657748901, "grad_norm": 1.28125, "learning_rate": 5.5752030419598824e-05, "loss": 1.2404, "step": 5147 }, { "epoch": 0.7870055417542519, "grad_norm": 1.34375, "learning_rate": 5.5748027063496743e-05, "loss": 1.3195, "step": 5148 }, { "epoch": 0.7871584177336136, "grad_norm": 1.3828125, "learning_rate": 5.57440237955849e-05, "loss": 1.2493, "step": 5149 }, { "epoch": 0.7873112937129754, "grad_norm": 1.3515625, "learning_rate": 5.5740020615871245e-05, "loss": 1.3046, "step": 5150 }, { "epoch": 0.7874641696923371, "grad_norm": 1.359375, "learning_rate": 5.573601752436366e-05, "loss": 1.3736, "step": 5151 }, { "epoch": 0.7876170456716989, "grad_norm": 1.328125, "learning_rate": 5.573201452107024e-05, "loss": 1.2826, "step": 5152 }, { "epoch": 0.7877699216510605, "grad_norm": 1.46875, "learning_rate": 5.5728011605998896e-05, "loss": 1.4567, "step": 5153 }, { "epoch": 0.7879227976304223, "grad_norm": 1.4453125, "learning_rate": 5.572400877915764e-05, "loss": 1.2157, "step": 5154 }, { "epoch": 0.788075673609784, "grad_norm": 1.2734375, "learning_rate": 5.572000604055444e-05, "loss": 1.3627, "step": 5155 }, { "epoch": 0.7882285495891458, "grad_norm": 1.5, "learning_rate": 5.571600339019727e-05, "loss": 1.4344, "step": 5156 }, { "epoch": 0.7883814255685075, "grad_norm": 1.59375, "learning_rate": 5.571200082809409e-05, "loss": 1.634, "step": 5157 }, { "epoch": 0.7885343015478693, "grad_norm": 1.5078125, "learning_rate": 5.5707998354252865e-05, "loss": 1.7588, "step": 5158 }, { "epoch": 0.788687177527231, "grad_norm": 1.34375, "learning_rate": 5.570399596868163e-05, "loss": 1.3601, "step": 5159 }, { "epoch": 0.7888400535065928, "grad_norm": 1.4609375, "learning_rate": 5.5699993671388326e-05, "loss": 1.3865, "step": 5160 }, { "epoch": 0.7889929294859546, "grad_norm": 1.453125, "learning_rate": 5.5695991462380946e-05, "loss": 1.6154, "step": 5161 }, { "epoch": 0.7891458054653162, "grad_norm": 1.2734375, "learning_rate": 5.569198934166744e-05, "loss": 1.4412, "step": 5162 }, { "epoch": 0.789298681444678, "grad_norm": 1.6015625, "learning_rate": 5.5687987309255806e-05, "loss": 1.5025, "step": 5163 }, { "epoch": 0.7894515574240397, "grad_norm": 1.4140625, "learning_rate": 5.568398536515401e-05, "loss": 1.6512, "step": 5164 }, { "epoch": 0.7896044334034015, "grad_norm": 1.4453125, "learning_rate": 5.567998350936998e-05, "loss": 1.4187, "step": 5165 }, { "epoch": 0.7897573093827632, "grad_norm": 1.5234375, "learning_rate": 5.567598174191183e-05, "loss": 1.6605, "step": 5166 }, { "epoch": 0.789910185362125, "grad_norm": 1.3359375, "learning_rate": 5.5671980062787364e-05, "loss": 1.2119, "step": 5167 }, { "epoch": 0.7900630613414867, "grad_norm": 1.3359375, "learning_rate": 5.566797847200467e-05, "loss": 1.3137, "step": 5168 }, { "epoch": 0.7902159373208485, "grad_norm": 1.375, "learning_rate": 5.566397696957174e-05, "loss": 1.2973, "step": 5169 }, { "epoch": 0.7903688133002102, "grad_norm": 1.3671875, "learning_rate": 5.5659975555496415e-05, "loss": 1.308, "step": 5170 }, { "epoch": 0.7905216892795719, "grad_norm": 1.453125, "learning_rate": 5.565597422978679e-05, "loss": 1.4006, "step": 5171 }, { "epoch": 0.7906745652589336, "grad_norm": 1.390625, "learning_rate": 5.5651972992450796e-05, "loss": 1.3768, "step": 5172 }, { "epoch": 0.7908274412382954, "grad_norm": 1.5625, "learning_rate": 5.5647971843496436e-05, "loss": 1.4689, "step": 5173 }, { "epoch": 0.7909803172176572, "grad_norm": 1.5, "learning_rate": 5.56439707829316e-05, "loss": 1.3103, "step": 5174 }, { "epoch": 0.7911331931970189, "grad_norm": 1.234375, "learning_rate": 5.563996981076438e-05, "loss": 1.3807, "step": 5175 }, { "epoch": 0.7912860691763807, "grad_norm": 1.453125, "learning_rate": 5.563596892700271e-05, "loss": 1.2953, "step": 5176 }, { "epoch": 0.7914389451557424, "grad_norm": 1.46875, "learning_rate": 5.56319681316545e-05, "loss": 1.6034, "step": 5177 }, { "epoch": 0.7915918211351042, "grad_norm": 1.40625, "learning_rate": 5.562796742472778e-05, "loss": 1.3489, "step": 5178 }, { "epoch": 0.7917446971144659, "grad_norm": 1.5, "learning_rate": 5.562396680623052e-05, "loss": 1.4918, "step": 5179 }, { "epoch": 0.7918975730938276, "grad_norm": 1.453125, "learning_rate": 5.56199662761707e-05, "loss": 1.4364, "step": 5180 }, { "epoch": 0.7920504490731893, "grad_norm": 1.609375, "learning_rate": 5.561596583455623e-05, "loss": 1.5604, "step": 5181 }, { "epoch": 0.7922033250525511, "grad_norm": 1.2890625, "learning_rate": 5.5611965481395204e-05, "loss": 1.2936, "step": 5182 }, { "epoch": 0.7923562010319128, "grad_norm": 1.4140625, "learning_rate": 5.560796521669549e-05, "loss": 1.3243, "step": 5183 }, { "epoch": 0.7925090770112746, "grad_norm": 1.2109375, "learning_rate": 5.560396504046504e-05, "loss": 1.1102, "step": 5184 }, { "epoch": 0.7926619529906364, "grad_norm": 1.375, "learning_rate": 5.559996495271197e-05, "loss": 1.3165, "step": 5185 }, { "epoch": 0.7928148289699981, "grad_norm": 1.453125, "learning_rate": 5.559596495344408e-05, "loss": 1.5969, "step": 5186 }, { "epoch": 0.7929677049493599, "grad_norm": 1.75, "learning_rate": 5.559196504266946e-05, "loss": 1.3473, "step": 5187 }, { "epoch": 0.7931205809287216, "grad_norm": 1.328125, "learning_rate": 5.558796522039602e-05, "loss": 1.2289, "step": 5188 }, { "epoch": 0.7932734569080833, "grad_norm": 1.4375, "learning_rate": 5.5583965486631785e-05, "loss": 1.5209, "step": 5189 }, { "epoch": 0.793426332887445, "grad_norm": 1.40625, "learning_rate": 5.557996584138467e-05, "loss": 1.5205, "step": 5190 }, { "epoch": 0.7935792088668068, "grad_norm": 1.5703125, "learning_rate": 5.5575966284662686e-05, "loss": 1.5346, "step": 5191 }, { "epoch": 0.7937320848461685, "grad_norm": 1.328125, "learning_rate": 5.5571966816473784e-05, "loss": 1.4007, "step": 5192 }, { "epoch": 0.7938849608255303, "grad_norm": 1.4375, "learning_rate": 5.5567967436825906e-05, "loss": 1.4657, "step": 5193 }, { "epoch": 0.794037836804892, "grad_norm": 1.40625, "learning_rate": 5.556396814572708e-05, "loss": 1.1915, "step": 5194 }, { "epoch": 0.7941907127842538, "grad_norm": 1.265625, "learning_rate": 5.5559968943185304e-05, "loss": 1.2657, "step": 5195 }, { "epoch": 0.7943435887636155, "grad_norm": 1.3125, "learning_rate": 5.555596982920841e-05, "loss": 1.2274, "step": 5196 }, { "epoch": 0.7944964647429773, "grad_norm": 1.40625, "learning_rate": 5.5551970803804496e-05, "loss": 1.3091, "step": 5197 }, { "epoch": 0.794649340722339, "grad_norm": 1.375, "learning_rate": 5.5547971866981486e-05, "loss": 1.4847, "step": 5198 }, { "epoch": 0.7948022167017007, "grad_norm": 1.2734375, "learning_rate": 5.554397301874736e-05, "loss": 1.008, "step": 5199 }, { "epoch": 0.7949550926810625, "grad_norm": 1.3828125, "learning_rate": 5.553997425911004e-05, "loss": 1.3701, "step": 5200 }, { "epoch": 0.7951079686604242, "grad_norm": 1.4609375, "learning_rate": 5.5535975588077604e-05, "loss": 1.3947, "step": 5201 }, { "epoch": 0.795260844639786, "grad_norm": 1.4609375, "learning_rate": 5.553197700565792e-05, "loss": 1.2634, "step": 5202 }, { "epoch": 0.7954137206191477, "grad_norm": 1.3671875, "learning_rate": 5.552797851185895e-05, "loss": 1.251, "step": 5203 }, { "epoch": 0.7955665965985095, "grad_norm": 1.6328125, "learning_rate": 5.552398010668873e-05, "loss": 1.5068, "step": 5204 }, { "epoch": 0.7957194725778712, "grad_norm": 1.3203125, "learning_rate": 5.5519981790155205e-05, "loss": 1.1224, "step": 5205 }, { "epoch": 0.795872348557233, "grad_norm": 1.453125, "learning_rate": 5.551598356226635e-05, "loss": 1.3593, "step": 5206 }, { "epoch": 0.7960252245365946, "grad_norm": 1.4453125, "learning_rate": 5.551198542303011e-05, "loss": 1.3026, "step": 5207 }, { "epoch": 0.7961781005159564, "grad_norm": 1.5234375, "learning_rate": 5.550798737245447e-05, "loss": 1.5126, "step": 5208 }, { "epoch": 0.7963309764953181, "grad_norm": 1.3125, "learning_rate": 5.5503989410547395e-05, "loss": 1.3608, "step": 5209 }, { "epoch": 0.7964838524746799, "grad_norm": 1.40625, "learning_rate": 5.549999153731681e-05, "loss": 1.1641, "step": 5210 }, { "epoch": 0.7966367284540417, "grad_norm": 1.3125, "learning_rate": 5.549599375277079e-05, "loss": 1.4588, "step": 5211 }, { "epoch": 0.7967896044334034, "grad_norm": 1.390625, "learning_rate": 5.549199605691717e-05, "loss": 1.448, "step": 5212 }, { "epoch": 0.7969424804127652, "grad_norm": 1.328125, "learning_rate": 5.5487998449764015e-05, "loss": 1.4048, "step": 5213 }, { "epoch": 0.7970953563921269, "grad_norm": 1.421875, "learning_rate": 5.548400093131926e-05, "loss": 1.5405, "step": 5214 }, { "epoch": 0.7972482323714887, "grad_norm": 1.4609375, "learning_rate": 5.548000350159086e-05, "loss": 1.5818, "step": 5215 }, { "epoch": 0.7974011083508503, "grad_norm": 1.5078125, "learning_rate": 5.54760061605868e-05, "loss": 1.548, "step": 5216 }, { "epoch": 0.7975539843302121, "grad_norm": 1.2265625, "learning_rate": 5.547200890831503e-05, "loss": 1.2453, "step": 5217 }, { "epoch": 0.7977068603095738, "grad_norm": 1.3125, "learning_rate": 5.546801174478353e-05, "loss": 1.2826, "step": 5218 }, { "epoch": 0.7978597362889356, "grad_norm": 1.53125, "learning_rate": 5.546401467000023e-05, "loss": 1.588, "step": 5219 }, { "epoch": 0.7980126122682973, "grad_norm": 1.3125, "learning_rate": 5.54600176839732e-05, "loss": 1.4564, "step": 5220 }, { "epoch": 0.7981654882476591, "grad_norm": 1.234375, "learning_rate": 5.545602078671028e-05, "loss": 1.5325, "step": 5221 }, { "epoch": 0.7983183642270208, "grad_norm": 1.4375, "learning_rate": 5.545202397821946e-05, "loss": 1.478, "step": 5222 }, { "epoch": 0.7984712402063826, "grad_norm": 1.328125, "learning_rate": 5.544802725850877e-05, "loss": 1.2582, "step": 5223 }, { "epoch": 0.7986241161857444, "grad_norm": 1.328125, "learning_rate": 5.544403062758613e-05, "loss": 1.4165, "step": 5224 }, { "epoch": 0.798776992165106, "grad_norm": 1.3203125, "learning_rate": 5.544003408545952e-05, "loss": 1.2014, "step": 5225 }, { "epoch": 0.7989298681444678, "grad_norm": 1.9453125, "learning_rate": 5.543603763213685e-05, "loss": 1.4387, "step": 5226 }, { "epoch": 0.7990827441238295, "grad_norm": 1.484375, "learning_rate": 5.5432041267626225e-05, "loss": 1.4343, "step": 5227 }, { "epoch": 0.7992356201031913, "grad_norm": 1.265625, "learning_rate": 5.5428044991935455e-05, "loss": 1.4485, "step": 5228 }, { "epoch": 0.799388496082553, "grad_norm": 1.65625, "learning_rate": 5.542404880507255e-05, "loss": 1.4845, "step": 5229 }, { "epoch": 0.7995413720619148, "grad_norm": 1.328125, "learning_rate": 5.542005270704556e-05, "loss": 1.2077, "step": 5230 }, { "epoch": 0.7996942480412765, "grad_norm": 1.4375, "learning_rate": 5.5416056697862286e-05, "loss": 1.3148, "step": 5231 }, { "epoch": 0.7998471240206383, "grad_norm": 1.46875, "learning_rate": 5.5412060777530826e-05, "loss": 1.3634, "step": 5232 }, { "epoch": 0.8, "grad_norm": 1.328125, "learning_rate": 5.54080649460591e-05, "loss": 1.271, "step": 5233 }, { "epoch": 0.8001528759793617, "grad_norm": 1.390625, "learning_rate": 5.540406920345509e-05, "loss": 1.3022, "step": 5234 }, { "epoch": 0.8003057519587234, "grad_norm": 1.4921875, "learning_rate": 5.5400073549726725e-05, "loss": 1.5025, "step": 5235 }, { "epoch": 0.8004586279380852, "grad_norm": 1.2421875, "learning_rate": 5.5396077984882e-05, "loss": 1.1487, "step": 5236 }, { "epoch": 0.800611503917447, "grad_norm": 1.4140625, "learning_rate": 5.539208250892884e-05, "loss": 1.4974, "step": 5237 }, { "epoch": 0.8007643798968087, "grad_norm": 1.328125, "learning_rate": 5.538808712187521e-05, "loss": 1.4062, "step": 5238 }, { "epoch": 0.8009172558761705, "grad_norm": 1.4140625, "learning_rate": 5.5384091823729125e-05, "loss": 1.4324, "step": 5239 }, { "epoch": 0.8010701318555322, "grad_norm": 1.3203125, "learning_rate": 5.538009661449852e-05, "loss": 1.2947, "step": 5240 }, { "epoch": 0.801223007834894, "grad_norm": 1.34375, "learning_rate": 5.537610149419135e-05, "loss": 1.0478, "step": 5241 }, { "epoch": 0.8013758838142557, "grad_norm": 1.53125, "learning_rate": 5.537210646281558e-05, "loss": 1.4941, "step": 5242 }, { "epoch": 0.8015287597936174, "grad_norm": 1.4296875, "learning_rate": 5.536811152037916e-05, "loss": 1.5221, "step": 5243 }, { "epoch": 0.8016816357729791, "grad_norm": 1.375, "learning_rate": 5.536411666689008e-05, "loss": 1.3707, "step": 5244 }, { "epoch": 0.8018345117523409, "grad_norm": 1.2890625, "learning_rate": 5.536012190235622e-05, "loss": 1.4272, "step": 5245 }, { "epoch": 0.8019873877317026, "grad_norm": 1.421875, "learning_rate": 5.53561272267857e-05, "loss": 1.5088, "step": 5246 }, { "epoch": 0.8021402637110644, "grad_norm": 1.25, "learning_rate": 5.5352132640186305e-05, "loss": 1.301, "step": 5247 }, { "epoch": 0.8022931396904261, "grad_norm": 1.5234375, "learning_rate": 5.534813814256612e-05, "loss": 1.3307, "step": 5248 }, { "epoch": 0.8024460156697879, "grad_norm": 1.359375, "learning_rate": 5.53441437339331e-05, "loss": 1.2833, "step": 5249 }, { "epoch": 0.8025988916491497, "grad_norm": 1.40625, "learning_rate": 5.5340149414295086e-05, "loss": 1.4293, "step": 5250 }, { "epoch": 0.8027517676285114, "grad_norm": 1.5078125, "learning_rate": 5.533615518366015e-05, "loss": 1.5748, "step": 5251 }, { "epoch": 0.8029046436078731, "grad_norm": 1.375, "learning_rate": 5.533216104203624e-05, "loss": 1.2884, "step": 5252 }, { "epoch": 0.8030575195872348, "grad_norm": 1.3984375, "learning_rate": 5.5328166989431285e-05, "loss": 1.3916, "step": 5253 }, { "epoch": 0.8032103955665966, "grad_norm": 1.390625, "learning_rate": 5.532417302585326e-05, "loss": 1.3299, "step": 5254 }, { "epoch": 0.8033632715459583, "grad_norm": 1.4609375, "learning_rate": 5.532017915131009e-05, "loss": 1.4193, "step": 5255 }, { "epoch": 0.8035161475253201, "grad_norm": 1.3515625, "learning_rate": 5.531618536580985e-05, "loss": 1.1225, "step": 5256 }, { "epoch": 0.8036690235046818, "grad_norm": 1.46875, "learning_rate": 5.531219166936032e-05, "loss": 1.476, "step": 5257 }, { "epoch": 0.8038218994840436, "grad_norm": 1.34375, "learning_rate": 5.530819806196961e-05, "loss": 1.3405, "step": 5258 }, { "epoch": 0.8039747754634053, "grad_norm": 1.4140625, "learning_rate": 5.530420454364561e-05, "loss": 1.3021, "step": 5259 }, { "epoch": 0.8041276514427671, "grad_norm": 1.40625, "learning_rate": 5.5300211114396305e-05, "loss": 1.4118, "step": 5260 }, { "epoch": 0.8042805274221287, "grad_norm": 1.375, "learning_rate": 5.529621777422964e-05, "loss": 1.3496, "step": 5261 }, { "epoch": 0.8044334034014905, "grad_norm": 1.4375, "learning_rate": 5.529222452315357e-05, "loss": 1.5238, "step": 5262 }, { "epoch": 0.8045862793808523, "grad_norm": 1.4609375, "learning_rate": 5.528823136117607e-05, "loss": 1.3074, "step": 5263 }, { "epoch": 0.804739155360214, "grad_norm": 1.5234375, "learning_rate": 5.528423828830502e-05, "loss": 1.4482, "step": 5264 }, { "epoch": 0.8048920313395758, "grad_norm": 1.359375, "learning_rate": 5.528024530454855e-05, "loss": 1.5467, "step": 5265 }, { "epoch": 0.8050449073189375, "grad_norm": 1.359375, "learning_rate": 5.527625240991442e-05, "loss": 1.5744, "step": 5266 }, { "epoch": 0.8051977832982993, "grad_norm": 1.4453125, "learning_rate": 5.5272259604410724e-05, "loss": 1.5317, "step": 5267 }, { "epoch": 0.805350659277661, "grad_norm": 1.4921875, "learning_rate": 5.5268266888045376e-05, "loss": 1.4091, "step": 5268 }, { "epoch": 0.8055035352570228, "grad_norm": 1.25, "learning_rate": 5.526427426082632e-05, "loss": 1.2172, "step": 5269 }, { "epoch": 0.8056564112363844, "grad_norm": 1.375, "learning_rate": 5.526028172276154e-05, "loss": 1.2605, "step": 5270 }, { "epoch": 0.8058092872157462, "grad_norm": 1.421875, "learning_rate": 5.5256289273858964e-05, "loss": 1.5544, "step": 5271 }, { "epoch": 0.8059621631951079, "grad_norm": 1.375, "learning_rate": 5.5252296914126564e-05, "loss": 1.4087, "step": 5272 }, { "epoch": 0.8061150391744697, "grad_norm": 1.3359375, "learning_rate": 5.524830464357227e-05, "loss": 1.4963, "step": 5273 }, { "epoch": 0.8062679151538314, "grad_norm": 1.4375, "learning_rate": 5.5244312462204075e-05, "loss": 1.5327, "step": 5274 }, { "epoch": 0.8064207911331932, "grad_norm": 1.2890625, "learning_rate": 5.5240320370029984e-05, "loss": 1.4249, "step": 5275 }, { "epoch": 0.806573667112555, "grad_norm": 1.4453125, "learning_rate": 5.523632836705781e-05, "loss": 1.475, "step": 5276 }, { "epoch": 0.8067265430919167, "grad_norm": 1.421875, "learning_rate": 5.523233645329562e-05, "loss": 1.6891, "step": 5277 }, { "epoch": 0.8068794190712785, "grad_norm": 1.3828125, "learning_rate": 5.522834462875135e-05, "loss": 1.3626, "step": 5278 }, { "epoch": 0.8070322950506401, "grad_norm": 1.46875, "learning_rate": 5.522435289343293e-05, "loss": 1.1445, "step": 5279 }, { "epoch": 0.8071851710300019, "grad_norm": 1.3671875, "learning_rate": 5.522036124734834e-05, "loss": 1.4931, "step": 5280 }, { "epoch": 0.8073380470093636, "grad_norm": 1.359375, "learning_rate": 5.521636969050552e-05, "loss": 1.3184, "step": 5281 }, { "epoch": 0.8074909229887254, "grad_norm": 1.4453125, "learning_rate": 5.521237822291244e-05, "loss": 1.3553, "step": 5282 }, { "epoch": 0.8076437989680871, "grad_norm": 1.3125, "learning_rate": 5.5208386844577e-05, "loss": 1.3006, "step": 5283 }, { "epoch": 0.8077966749474489, "grad_norm": 1.421875, "learning_rate": 5.5204395555507235e-05, "loss": 1.5393, "step": 5284 }, { "epoch": 0.8079495509268106, "grad_norm": 1.296875, "learning_rate": 5.5200404355711065e-05, "loss": 1.3406, "step": 5285 }, { "epoch": 0.8081024269061724, "grad_norm": 1.359375, "learning_rate": 5.519641324519644e-05, "loss": 1.3186, "step": 5286 }, { "epoch": 0.8082553028855342, "grad_norm": 1.40625, "learning_rate": 5.519242222397132e-05, "loss": 1.2045, "step": 5287 }, { "epoch": 0.8084081788648958, "grad_norm": 1.3046875, "learning_rate": 5.5188431292043665e-05, "loss": 1.1193, "step": 5288 }, { "epoch": 0.8085610548442576, "grad_norm": 1.4140625, "learning_rate": 5.518444044942141e-05, "loss": 1.3038, "step": 5289 }, { "epoch": 0.8087139308236193, "grad_norm": 1.21875, "learning_rate": 5.518044969611247e-05, "loss": 1.0983, "step": 5290 }, { "epoch": 0.8088668068029811, "grad_norm": 1.5, "learning_rate": 5.517645903212494e-05, "loss": 1.5494, "step": 5291 }, { "epoch": 0.8090196827823428, "grad_norm": 1.4296875, "learning_rate": 5.517246845746658e-05, "loss": 1.4566, "step": 5292 }, { "epoch": 0.8091725587617046, "grad_norm": 1.4140625, "learning_rate": 5.5168477972145496e-05, "loss": 1.41, "step": 5293 }, { "epoch": 0.8093254347410663, "grad_norm": 1.28125, "learning_rate": 5.516448757616961e-05, "loss": 1.2297, "step": 5294 }, { "epoch": 0.8094783107204281, "grad_norm": 1.3984375, "learning_rate": 5.516049726954678e-05, "loss": 1.279, "step": 5295 }, { "epoch": 0.8096311866997898, "grad_norm": 1.421875, "learning_rate": 5.515650705228508e-05, "loss": 1.4994, "step": 5296 }, { "epoch": 0.8097840626791515, "grad_norm": 1.296875, "learning_rate": 5.515251692439239e-05, "loss": 1.2591, "step": 5297 }, { "epoch": 0.8099369386585132, "grad_norm": 1.2890625, "learning_rate": 5.514852688587669e-05, "loss": 1.1784, "step": 5298 }, { "epoch": 0.810089814637875, "grad_norm": 1.4296875, "learning_rate": 5.514453693674589e-05, "loss": 1.2927, "step": 5299 }, { "epoch": 0.8102426906172367, "grad_norm": 1.4140625, "learning_rate": 5.514054707700802e-05, "loss": 1.3864, "step": 5300 }, { "epoch": 0.8103955665965985, "grad_norm": 1.359375, "learning_rate": 5.513655730667102e-05, "loss": 1.1688, "step": 5301 }, { "epoch": 0.8105484425759603, "grad_norm": 1.2734375, "learning_rate": 5.513256762574273e-05, "loss": 1.0698, "step": 5302 }, { "epoch": 0.810701318555322, "grad_norm": 1.375, "learning_rate": 5.5128578034231216e-05, "loss": 1.4238, "step": 5303 }, { "epoch": 0.8108541945346838, "grad_norm": 1.28125, "learning_rate": 5.5124588532144395e-05, "loss": 1.1435, "step": 5304 }, { "epoch": 0.8110070705140455, "grad_norm": 1.5, "learning_rate": 5.512059911949021e-05, "loss": 1.5835, "step": 5305 }, { "epoch": 0.8111599464934072, "grad_norm": 1.3671875, "learning_rate": 5.5116609796276597e-05, "loss": 1.5507, "step": 5306 }, { "epoch": 0.8113128224727689, "grad_norm": 1.484375, "learning_rate": 5.5112620562511585e-05, "loss": 1.3952, "step": 5307 }, { "epoch": 0.8114656984521307, "grad_norm": 1.53125, "learning_rate": 5.510863141820304e-05, "loss": 1.3058, "step": 5308 }, { "epoch": 0.8116185744314924, "grad_norm": 1.203125, "learning_rate": 5.510464236335888e-05, "loss": 1.1105, "step": 5309 }, { "epoch": 0.8117714504108542, "grad_norm": 1.34375, "learning_rate": 5.51006533979872e-05, "loss": 1.3245, "step": 5310 }, { "epoch": 0.811924326390216, "grad_norm": 1.5, "learning_rate": 5.5096664522095786e-05, "loss": 1.6335, "step": 5311 }, { "epoch": 0.8120772023695777, "grad_norm": 1.4375, "learning_rate": 5.509267573569271e-05, "loss": 1.4848, "step": 5312 }, { "epoch": 0.8122300783489395, "grad_norm": 1.4375, "learning_rate": 5.508868703878587e-05, "loss": 1.2741, "step": 5313 }, { "epoch": 0.8123829543283012, "grad_norm": 1.4296875, "learning_rate": 5.508469843138322e-05, "loss": 1.328, "step": 5314 }, { "epoch": 0.8125358303076629, "grad_norm": 1.4921875, "learning_rate": 5.50807099134927e-05, "loss": 1.7127, "step": 5315 }, { "epoch": 0.8126887062870246, "grad_norm": 1.3671875, "learning_rate": 5.5076721485122276e-05, "loss": 1.1842, "step": 5316 }, { "epoch": 0.8128415822663864, "grad_norm": 1.484375, "learning_rate": 5.507273314627989e-05, "loss": 1.3836, "step": 5317 }, { "epoch": 0.8129944582457481, "grad_norm": 1.3515625, "learning_rate": 5.5068744896973445e-05, "loss": 1.2932, "step": 5318 }, { "epoch": 0.8131473342251099, "grad_norm": 1.6328125, "learning_rate": 5.5064756737210966e-05, "loss": 1.5519, "step": 5319 }, { "epoch": 0.8133002102044716, "grad_norm": 1.359375, "learning_rate": 5.5060768667000416e-05, "loss": 1.2767, "step": 5320 }, { "epoch": 0.8134530861838334, "grad_norm": 1.4296875, "learning_rate": 5.505678068634961e-05, "loss": 1.3578, "step": 5321 }, { "epoch": 0.8136059621631951, "grad_norm": 1.421875, "learning_rate": 5.505279279526663e-05, "loss": 1.1611, "step": 5322 }, { "epoch": 0.8137588381425569, "grad_norm": 1.7265625, "learning_rate": 5.5048804993759374e-05, "loss": 1.4123, "step": 5323 }, { "epoch": 0.8139117141219185, "grad_norm": 1.6171875, "learning_rate": 5.504481728183577e-05, "loss": 1.6632, "step": 5324 }, { "epoch": 0.8140645901012803, "grad_norm": 1.40625, "learning_rate": 5.5040829659503765e-05, "loss": 1.4455, "step": 5325 }, { "epoch": 0.814217466080642, "grad_norm": 1.328125, "learning_rate": 5.503684212677139e-05, "loss": 1.205, "step": 5326 }, { "epoch": 0.8143703420600038, "grad_norm": 1.1953125, "learning_rate": 5.503285468364649e-05, "loss": 1.2209, "step": 5327 }, { "epoch": 0.8145232180393656, "grad_norm": 1.359375, "learning_rate": 5.5028867330137014e-05, "loss": 1.3797, "step": 5328 }, { "epoch": 0.8146760940187273, "grad_norm": 1.34375, "learning_rate": 5.5024880066250995e-05, "loss": 1.0822, "step": 5329 }, { "epoch": 0.8148289699980891, "grad_norm": 1.3984375, "learning_rate": 5.50208928919963e-05, "loss": 1.291, "step": 5330 }, { "epoch": 0.8149818459774508, "grad_norm": 1.4453125, "learning_rate": 5.501690580738093e-05, "loss": 1.2383, "step": 5331 }, { "epoch": 0.8151347219568126, "grad_norm": 1.2265625, "learning_rate": 5.501291881241279e-05, "loss": 1.3036, "step": 5332 }, { "epoch": 0.8152875979361742, "grad_norm": 1.515625, "learning_rate": 5.5008931907099834e-05, "loss": 1.3159, "step": 5333 }, { "epoch": 0.815440473915536, "grad_norm": 1.3671875, "learning_rate": 5.500494509145002e-05, "loss": 1.3701, "step": 5334 }, { "epoch": 0.8155933498948977, "grad_norm": 1.34375, "learning_rate": 5.5000958365471246e-05, "loss": 1.3747, "step": 5335 }, { "epoch": 0.8157462258742595, "grad_norm": 1.265625, "learning_rate": 5.4996971729171575e-05, "loss": 1.303, "step": 5336 }, { "epoch": 0.8158991018536212, "grad_norm": 1.25, "learning_rate": 5.49929851825588e-05, "loss": 1.2858, "step": 5337 }, { "epoch": 0.816051977832983, "grad_norm": 1.2421875, "learning_rate": 5.4988998725640975e-05, "loss": 1.3415, "step": 5338 }, { "epoch": 0.8162048538123448, "grad_norm": 1.3984375, "learning_rate": 5.498501235842601e-05, "loss": 1.423, "step": 5339 }, { "epoch": 0.8163577297917065, "grad_norm": 1.3828125, "learning_rate": 5.498102608092184e-05, "loss": 1.4449, "step": 5340 }, { "epoch": 0.8165106057710683, "grad_norm": 1.3125, "learning_rate": 5.497703989313643e-05, "loss": 1.4097, "step": 5341 }, { "epoch": 0.8166634817504299, "grad_norm": 1.5078125, "learning_rate": 5.497305379507771e-05, "loss": 1.3574, "step": 5342 }, { "epoch": 0.8168163577297917, "grad_norm": 1.3515625, "learning_rate": 5.4969067786753636e-05, "loss": 1.452, "step": 5343 }, { "epoch": 0.8169692337091534, "grad_norm": 1.375, "learning_rate": 5.4965081868172086e-05, "loss": 1.4243, "step": 5344 }, { "epoch": 0.8171221096885152, "grad_norm": 1.3203125, "learning_rate": 5.4961096039341144e-05, "loss": 1.3569, "step": 5345 }, { "epoch": 0.8172749856678769, "grad_norm": 1.46875, "learning_rate": 5.495711030026863e-05, "loss": 1.4613, "step": 5346 }, { "epoch": 0.8174278616472387, "grad_norm": 1.3828125, "learning_rate": 5.495312465096249e-05, "loss": 1.5819, "step": 5347 }, { "epoch": 0.8175807376266004, "grad_norm": 1.4453125, "learning_rate": 5.494913909143075e-05, "loss": 1.3898, "step": 5348 }, { "epoch": 0.8177336136059622, "grad_norm": 1.5078125, "learning_rate": 5.494515362168129e-05, "loss": 1.2983, "step": 5349 }, { "epoch": 0.817886489585324, "grad_norm": 1.4375, "learning_rate": 5.494116824172208e-05, "loss": 1.4789, "step": 5350 }, { "epoch": 0.8180393655646856, "grad_norm": 1.4140625, "learning_rate": 5.4937182951561005e-05, "loss": 1.3316, "step": 5351 }, { "epoch": 0.8181922415440473, "grad_norm": 1.5, "learning_rate": 5.493319775120615e-05, "loss": 1.4394, "step": 5352 }, { "epoch": 0.8183451175234091, "grad_norm": 1.3515625, "learning_rate": 5.4929212640665305e-05, "loss": 1.3882, "step": 5353 }, { "epoch": 0.8184979935027709, "grad_norm": 1.40625, "learning_rate": 5.492522761994643e-05, "loss": 1.3106, "step": 5354 }, { "epoch": 0.8186508694821326, "grad_norm": 1.4609375, "learning_rate": 5.492124268905758e-05, "loss": 1.6238, "step": 5355 }, { "epoch": 0.8188037454614944, "grad_norm": 1.484375, "learning_rate": 5.491725784800655e-05, "loss": 1.4946, "step": 5356 }, { "epoch": 0.8189566214408561, "grad_norm": 1.2890625, "learning_rate": 5.4913273096801386e-05, "loss": 1.2007, "step": 5357 }, { "epoch": 0.8191094974202179, "grad_norm": 1.3828125, "learning_rate": 5.490928843545e-05, "loss": 1.3738, "step": 5358 }, { "epoch": 0.8192623733995796, "grad_norm": 1.375, "learning_rate": 5.490530386396033e-05, "loss": 1.3614, "step": 5359 }, { "epoch": 0.8194152493789413, "grad_norm": 1.4453125, "learning_rate": 5.490131938234033e-05, "loss": 1.1481, "step": 5360 }, { "epoch": 0.819568125358303, "grad_norm": 1.2890625, "learning_rate": 5.48973349905979e-05, "loss": 1.2342, "step": 5361 }, { "epoch": 0.8197210013376648, "grad_norm": 1.3515625, "learning_rate": 5.489335068874103e-05, "loss": 1.4391, "step": 5362 }, { "epoch": 0.8198738773170265, "grad_norm": 1.359375, "learning_rate": 5.4889366476777584e-05, "loss": 1.3077, "step": 5363 }, { "epoch": 0.8200267532963883, "grad_norm": 1.3125, "learning_rate": 5.488538235471561e-05, "loss": 1.3896, "step": 5364 }, { "epoch": 0.82017962927575, "grad_norm": 1.546875, "learning_rate": 5.4881398322562974e-05, "loss": 1.3128, "step": 5365 }, { "epoch": 0.8203325052551118, "grad_norm": 1.3515625, "learning_rate": 5.4877414380327655e-05, "loss": 1.2097, "step": 5366 }, { "epoch": 0.8204853812344736, "grad_norm": 1.28125, "learning_rate": 5.4873430528017555e-05, "loss": 1.2288, "step": 5367 }, { "epoch": 0.8206382572138353, "grad_norm": 1.2578125, "learning_rate": 5.486944676564063e-05, "loss": 1.125, "step": 5368 }, { "epoch": 0.820791133193197, "grad_norm": 1.390625, "learning_rate": 5.486546309320484e-05, "loss": 1.2725, "step": 5369 }, { "epoch": 0.8209440091725587, "grad_norm": 1.4375, "learning_rate": 5.486147951071805e-05, "loss": 1.4246, "step": 5370 }, { "epoch": 0.8210968851519205, "grad_norm": 1.3515625, "learning_rate": 5.485749601818835e-05, "loss": 1.3188, "step": 5371 }, { "epoch": 0.8212497611312822, "grad_norm": 1.3984375, "learning_rate": 5.4853512615623524e-05, "loss": 1.2775, "step": 5372 }, { "epoch": 0.821402637110644, "grad_norm": 1.4140625, "learning_rate": 5.4849529303031536e-05, "loss": 1.4658, "step": 5373 }, { "epoch": 0.8215555130900057, "grad_norm": 1.4375, "learning_rate": 5.484554608042044e-05, "loss": 1.3967, "step": 5374 }, { "epoch": 0.8217083890693675, "grad_norm": 1.46875, "learning_rate": 5.484156294779801e-05, "loss": 1.3191, "step": 5375 }, { "epoch": 0.8218612650487293, "grad_norm": 1.328125, "learning_rate": 5.483757990517231e-05, "loss": 1.2363, "step": 5376 }, { "epoch": 0.822014141028091, "grad_norm": 1.5, "learning_rate": 5.4833596952551235e-05, "loss": 1.3478, "step": 5377 }, { "epoch": 0.8221670170074526, "grad_norm": 1.5, "learning_rate": 5.4829614089942715e-05, "loss": 1.4172, "step": 5378 }, { "epoch": 0.8223198929868144, "grad_norm": 1.5, "learning_rate": 5.48256313173547e-05, "loss": 1.7433, "step": 5379 }, { "epoch": 0.8224727689661762, "grad_norm": 1.4453125, "learning_rate": 5.4821648634795075e-05, "loss": 1.2564, "step": 5380 }, { "epoch": 0.8226256449455379, "grad_norm": 1.34375, "learning_rate": 5.48176660422719e-05, "loss": 1.2564, "step": 5381 }, { "epoch": 0.8227785209248997, "grad_norm": 1.3359375, "learning_rate": 5.481368353979297e-05, "loss": 1.1843, "step": 5382 }, { "epoch": 0.8229313969042614, "grad_norm": 1.3515625, "learning_rate": 5.480970112736632e-05, "loss": 1.2131, "step": 5383 }, { "epoch": 0.8230842728836232, "grad_norm": 1.421875, "learning_rate": 5.480571880499985e-05, "loss": 1.41, "step": 5384 }, { "epoch": 0.8232371488629849, "grad_norm": 1.5, "learning_rate": 5.480173657270149e-05, "loss": 1.2948, "step": 5385 }, { "epoch": 0.8233900248423467, "grad_norm": 1.4453125, "learning_rate": 5.4797754430479206e-05, "loss": 1.5464, "step": 5386 }, { "epoch": 0.8235429008217083, "grad_norm": 1.4296875, "learning_rate": 5.47937723783409e-05, "loss": 1.5885, "step": 5387 }, { "epoch": 0.8236957768010701, "grad_norm": 1.46875, "learning_rate": 5.478979041629453e-05, "loss": 1.2967, "step": 5388 }, { "epoch": 0.8238486527804318, "grad_norm": 1.4296875, "learning_rate": 5.478580854434798e-05, "loss": 1.315, "step": 5389 }, { "epoch": 0.8240015287597936, "grad_norm": 1.3671875, "learning_rate": 5.4781826762509314e-05, "loss": 1.2584, "step": 5390 }, { "epoch": 0.8241544047391554, "grad_norm": 1.34375, "learning_rate": 5.4777845070786305e-05, "loss": 1.4716, "step": 5391 }, { "epoch": 0.8243072807185171, "grad_norm": 1.390625, "learning_rate": 5.4773863469187006e-05, "loss": 1.5087, "step": 5392 }, { "epoch": 0.8244601566978789, "grad_norm": 1.4140625, "learning_rate": 5.47698819577193e-05, "loss": 1.3181, "step": 5393 }, { "epoch": 0.8246130326772406, "grad_norm": 1.296875, "learning_rate": 5.4765900536391146e-05, "loss": 1.2642, "step": 5394 }, { "epoch": 0.8247659086566024, "grad_norm": 1.5078125, "learning_rate": 5.4761919205210456e-05, "loss": 1.5883, "step": 5395 }, { "epoch": 0.824918784635964, "grad_norm": 1.265625, "learning_rate": 5.475793796418518e-05, "loss": 1.1751, "step": 5396 }, { "epoch": 0.8250716606153258, "grad_norm": 1.40625, "learning_rate": 5.475395681332325e-05, "loss": 1.3994, "step": 5397 }, { "epoch": 0.8252245365946875, "grad_norm": 1.3125, "learning_rate": 5.4749975752632556e-05, "loss": 1.3692, "step": 5398 }, { "epoch": 0.8253774125740493, "grad_norm": 1.9375, "learning_rate": 5.4745994782121103e-05, "loss": 1.5882, "step": 5399 }, { "epoch": 0.825530288553411, "grad_norm": 1.3046875, "learning_rate": 5.4742013901796854e-05, "loss": 1.0717, "step": 5400 }, { "epoch": 0.8256831645327728, "grad_norm": 1.3828125, "learning_rate": 5.4738033111667586e-05, "loss": 1.411, "step": 5401 }, { "epoch": 0.8258360405121346, "grad_norm": 1.2890625, "learning_rate": 5.4734052411741364e-05, "loss": 1.4999, "step": 5402 }, { "epoch": 0.8259889164914963, "grad_norm": 1.421875, "learning_rate": 5.47300718020261e-05, "loss": 1.4254, "step": 5403 }, { "epoch": 0.8261417924708581, "grad_norm": 1.46875, "learning_rate": 5.472609128252972e-05, "loss": 1.4883, "step": 5404 }, { "epoch": 0.8262946684502197, "grad_norm": 1.390625, "learning_rate": 5.472211085326013e-05, "loss": 1.292, "step": 5405 }, { "epoch": 0.8264475444295815, "grad_norm": 1.453125, "learning_rate": 5.4718130514225296e-05, "loss": 1.5292, "step": 5406 }, { "epoch": 0.8266004204089432, "grad_norm": 1.390625, "learning_rate": 5.471415026543314e-05, "loss": 1.4809, "step": 5407 }, { "epoch": 0.826753296388305, "grad_norm": 1.3125, "learning_rate": 5.471017010689155e-05, "loss": 1.152, "step": 5408 }, { "epoch": 0.8269061723676667, "grad_norm": 1.5234375, "learning_rate": 5.470619003860854e-05, "loss": 1.4822, "step": 5409 }, { "epoch": 0.8270590483470285, "grad_norm": 1.6796875, "learning_rate": 5.4702210060591994e-05, "loss": 1.4697, "step": 5410 }, { "epoch": 0.8272119243263902, "grad_norm": 1.3515625, "learning_rate": 5.469823017284987e-05, "loss": 1.3194, "step": 5411 }, { "epoch": 0.827364800305752, "grad_norm": 1.3203125, "learning_rate": 5.4694250375390066e-05, "loss": 1.1537, "step": 5412 }, { "epoch": 0.8275176762851137, "grad_norm": 1.3046875, "learning_rate": 5.469027066822052e-05, "loss": 1.2339, "step": 5413 }, { "epoch": 0.8276705522644754, "grad_norm": 1.3046875, "learning_rate": 5.468629105134919e-05, "loss": 1.2939, "step": 5414 }, { "epoch": 0.8278234282438371, "grad_norm": 1.3046875, "learning_rate": 5.4682311524783945e-05, "loss": 1.2042, "step": 5415 }, { "epoch": 0.8279763042231989, "grad_norm": 1.4765625, "learning_rate": 5.4678332088532835e-05, "loss": 1.5869, "step": 5416 }, { "epoch": 0.8281291802025607, "grad_norm": 1.28125, "learning_rate": 5.467435274260364e-05, "loss": 1.1123, "step": 5417 }, { "epoch": 0.8282820561819224, "grad_norm": 1.4296875, "learning_rate": 5.46703734870044e-05, "loss": 1.3503, "step": 5418 }, { "epoch": 0.8284349321612842, "grad_norm": 1.578125, "learning_rate": 5.466639432174305e-05, "loss": 1.3572, "step": 5419 }, { "epoch": 0.8285878081406459, "grad_norm": 1.484375, "learning_rate": 5.46624152468274e-05, "loss": 1.2339, "step": 5420 }, { "epoch": 0.8287406841200077, "grad_norm": 1.3671875, "learning_rate": 5.465843626226552e-05, "loss": 1.4517, "step": 5421 }, { "epoch": 0.8288935600993694, "grad_norm": 1.3515625, "learning_rate": 5.465445736806527e-05, "loss": 1.4409, "step": 5422 }, { "epoch": 0.8290464360787311, "grad_norm": 1.4375, "learning_rate": 5.465047856423457e-05, "loss": 1.551, "step": 5423 }, { "epoch": 0.8291993120580928, "grad_norm": 1.546875, "learning_rate": 5.464649985078136e-05, "loss": 1.4679, "step": 5424 }, { "epoch": 0.8293521880374546, "grad_norm": 1.3046875, "learning_rate": 5.464252122771361e-05, "loss": 1.2902, "step": 5425 }, { "epoch": 0.8295050640168163, "grad_norm": 1.4140625, "learning_rate": 5.463854269503925e-05, "loss": 1.6872, "step": 5426 }, { "epoch": 0.8296579399961781, "grad_norm": 1.34375, "learning_rate": 5.46345642527661e-05, "loss": 1.114, "step": 5427 }, { "epoch": 0.8298108159755399, "grad_norm": 1.4765625, "learning_rate": 5.46305859009022e-05, "loss": 1.1564, "step": 5428 }, { "epoch": 0.8299636919549016, "grad_norm": 1.484375, "learning_rate": 5.4626607639455464e-05, "loss": 1.5716, "step": 5429 }, { "epoch": 0.8301165679342634, "grad_norm": 1.375, "learning_rate": 5.462262946843377e-05, "loss": 1.5243, "step": 5430 }, { "epoch": 0.8302694439136251, "grad_norm": 1.1875, "learning_rate": 5.461865138784507e-05, "loss": 1.0811, "step": 5431 }, { "epoch": 0.8304223198929868, "grad_norm": 1.3984375, "learning_rate": 5.461467339769736e-05, "loss": 1.3645, "step": 5432 }, { "epoch": 0.8305751958723485, "grad_norm": 1.2734375, "learning_rate": 5.461069549799847e-05, "loss": 1.1889, "step": 5433 }, { "epoch": 0.8307280718517103, "grad_norm": 1.328125, "learning_rate": 5.4606717688756317e-05, "loss": 1.5259, "step": 5434 }, { "epoch": 0.830880947831072, "grad_norm": 1.515625, "learning_rate": 5.460273996997896e-05, "loss": 1.4357, "step": 5435 }, { "epoch": 0.8310338238104338, "grad_norm": 1.1640625, "learning_rate": 5.459876234167416e-05, "loss": 1.2301, "step": 5436 }, { "epoch": 0.8311866997897955, "grad_norm": 1.546875, "learning_rate": 5.4594784803849965e-05, "loss": 1.6865, "step": 5437 }, { "epoch": 0.8313395757691573, "grad_norm": 1.296875, "learning_rate": 5.4590807356514275e-05, "loss": 1.2088, "step": 5438 }, { "epoch": 0.831492451748519, "grad_norm": 1.28125, "learning_rate": 5.4586829999674996e-05, "loss": 1.2042, "step": 5439 }, { "epoch": 0.8316453277278808, "grad_norm": 1.3515625, "learning_rate": 5.458285273334005e-05, "loss": 1.2233, "step": 5440 }, { "epoch": 0.8317982037072424, "grad_norm": 1.40625, "learning_rate": 5.4578875557517396e-05, "loss": 1.5102, "step": 5441 }, { "epoch": 0.8319510796866042, "grad_norm": 1.2734375, "learning_rate": 5.457489847221493e-05, "loss": 1.3135, "step": 5442 }, { "epoch": 0.832103955665966, "grad_norm": 1.296875, "learning_rate": 5.457092147744054e-05, "loss": 1.4765, "step": 5443 }, { "epoch": 0.8322568316453277, "grad_norm": 1.4765625, "learning_rate": 5.456694457320226e-05, "loss": 1.3074, "step": 5444 }, { "epoch": 0.8324097076246895, "grad_norm": 1.4609375, "learning_rate": 5.456296775950798e-05, "loss": 1.48, "step": 5445 }, { "epoch": 0.8325625836040512, "grad_norm": 1.3984375, "learning_rate": 5.455899103636552e-05, "loss": 1.514, "step": 5446 }, { "epoch": 0.832715459583413, "grad_norm": 1.234375, "learning_rate": 5.4555014403782944e-05, "loss": 1.1606, "step": 5447 }, { "epoch": 0.8328683355627747, "grad_norm": 1.4375, "learning_rate": 5.4551037861768096e-05, "loss": 1.5344, "step": 5448 }, { "epoch": 0.8330212115421365, "grad_norm": 1.2578125, "learning_rate": 5.454706141032895e-05, "loss": 1.3692, "step": 5449 }, { "epoch": 0.8331740875214981, "grad_norm": 1.4296875, "learning_rate": 5.454308504947334e-05, "loss": 1.2958, "step": 5450 }, { "epoch": 0.8333269635008599, "grad_norm": 1.4375, "learning_rate": 5.453910877920935e-05, "loss": 1.4606, "step": 5451 }, { "epoch": 0.8334798394802216, "grad_norm": 1.5390625, "learning_rate": 5.453513259954477e-05, "loss": 1.4015, "step": 5452 }, { "epoch": 0.8336327154595834, "grad_norm": 4.125, "learning_rate": 5.4531156510487516e-05, "loss": 1.2463, "step": 5453 }, { "epoch": 0.8337855914389452, "grad_norm": 1.40625, "learning_rate": 5.452718051204559e-05, "loss": 1.1402, "step": 5454 }, { "epoch": 0.8339384674183069, "grad_norm": 1.25, "learning_rate": 5.4523204604226905e-05, "loss": 1.1732, "step": 5455 }, { "epoch": 0.8340913433976687, "grad_norm": 1.2890625, "learning_rate": 5.451922878703937e-05, "loss": 1.105, "step": 5456 }, { "epoch": 0.8342442193770304, "grad_norm": 1.296875, "learning_rate": 5.451525306049088e-05, "loss": 1.2629, "step": 5457 }, { "epoch": 0.8343970953563922, "grad_norm": 1.2890625, "learning_rate": 5.451127742458939e-05, "loss": 1.3022, "step": 5458 }, { "epoch": 0.8345499713357538, "grad_norm": 1.4765625, "learning_rate": 5.450730187934283e-05, "loss": 1.3871, "step": 5459 }, { "epoch": 0.8347028473151156, "grad_norm": 1.5, "learning_rate": 5.450332642475906e-05, "loss": 1.4746, "step": 5460 }, { "epoch": 0.8348557232944773, "grad_norm": 1.4921875, "learning_rate": 5.4499351060846125e-05, "loss": 1.4413, "step": 5461 }, { "epoch": 0.8350085992738391, "grad_norm": 1.4375, "learning_rate": 5.449537578761179e-05, "loss": 1.4993, "step": 5462 }, { "epoch": 0.8351614752532008, "grad_norm": 1.4375, "learning_rate": 5.449140060506411e-05, "loss": 1.3434, "step": 5463 }, { "epoch": 0.8353143512325626, "grad_norm": 1.4296875, "learning_rate": 5.448742551321095e-05, "loss": 1.4624, "step": 5464 }, { "epoch": 0.8354672272119243, "grad_norm": 1.296875, "learning_rate": 5.4483450512060254e-05, "loss": 1.1029, "step": 5465 }, { "epoch": 0.8356201031912861, "grad_norm": 1.3125, "learning_rate": 5.447947560161992e-05, "loss": 1.2668, "step": 5466 }, { "epoch": 0.8357729791706479, "grad_norm": 1.3984375, "learning_rate": 5.447550078189788e-05, "loss": 1.5204, "step": 5467 }, { "epoch": 0.8359258551500095, "grad_norm": 1.4765625, "learning_rate": 5.4471526052902066e-05, "loss": 1.3151, "step": 5468 }, { "epoch": 0.8360787311293713, "grad_norm": 1.34375, "learning_rate": 5.446755141464033e-05, "loss": 1.3078, "step": 5469 }, { "epoch": 0.836231607108733, "grad_norm": 1.953125, "learning_rate": 5.446357686712073e-05, "loss": 1.5075, "step": 5470 }, { "epoch": 0.8363844830880948, "grad_norm": 1.3359375, "learning_rate": 5.445960241035108e-05, "loss": 1.4423, "step": 5471 }, { "epoch": 0.8365373590674565, "grad_norm": 1.390625, "learning_rate": 5.445562804433929e-05, "loss": 1.4646, "step": 5472 }, { "epoch": 0.8366902350468183, "grad_norm": 1.3671875, "learning_rate": 5.445165376909336e-05, "loss": 1.2689, "step": 5473 }, { "epoch": 0.83684311102618, "grad_norm": 1.3984375, "learning_rate": 5.444767958462116e-05, "loss": 1.337, "step": 5474 }, { "epoch": 0.8369959870055418, "grad_norm": 1.2890625, "learning_rate": 5.444370549093062e-05, "loss": 1.3505, "step": 5475 }, { "epoch": 0.8371488629849035, "grad_norm": 1.2734375, "learning_rate": 5.4439731488029625e-05, "loss": 1.2553, "step": 5476 }, { "epoch": 0.8373017389642652, "grad_norm": 1.4453125, "learning_rate": 5.44357575759262e-05, "loss": 1.4479, "step": 5477 }, { "epoch": 0.8374546149436269, "grad_norm": 1.3828125, "learning_rate": 5.443178375462816e-05, "loss": 1.6982, "step": 5478 }, { "epoch": 0.8376074909229887, "grad_norm": 1.3671875, "learning_rate": 5.4427810024143436e-05, "loss": 1.2886, "step": 5479 }, { "epoch": 0.8377603669023505, "grad_norm": 1.2734375, "learning_rate": 5.442383638448003e-05, "loss": 1.1922, "step": 5480 }, { "epoch": 0.8379132428817122, "grad_norm": 1.34375, "learning_rate": 5.441986283564573e-05, "loss": 1.406, "step": 5481 }, { "epoch": 0.838066118861074, "grad_norm": 1.5, "learning_rate": 5.441588937764857e-05, "loss": 1.6629, "step": 5482 }, { "epoch": 0.8382189948404357, "grad_norm": 1.6328125, "learning_rate": 5.441191601049641e-05, "loss": 1.5504, "step": 5483 }, { "epoch": 0.8383718708197975, "grad_norm": 1.40625, "learning_rate": 5.440794273419718e-05, "loss": 1.3391, "step": 5484 }, { "epoch": 0.8385247467991592, "grad_norm": 1.3984375, "learning_rate": 5.440396954875882e-05, "loss": 1.5389, "step": 5485 }, { "epoch": 0.8386776227785209, "grad_norm": 1.421875, "learning_rate": 5.439999645418923e-05, "loss": 1.2876, "step": 5486 }, { "epoch": 0.8388304987578826, "grad_norm": 1.421875, "learning_rate": 5.439602345049631e-05, "loss": 1.3984, "step": 5487 }, { "epoch": 0.8389833747372444, "grad_norm": 1.4453125, "learning_rate": 5.4392050537687964e-05, "loss": 1.6283, "step": 5488 }, { "epoch": 0.8391362507166061, "grad_norm": 1.3203125, "learning_rate": 5.438807771577219e-05, "loss": 1.5093, "step": 5489 }, { "epoch": 0.8392891266959679, "grad_norm": 1.296875, "learning_rate": 5.4384104984756856e-05, "loss": 1.3057, "step": 5490 }, { "epoch": 0.8394420026753296, "grad_norm": 1.359375, "learning_rate": 5.438013234464987e-05, "loss": 1.2737, "step": 5491 }, { "epoch": 0.8395948786546914, "grad_norm": 1.4296875, "learning_rate": 5.437615979545917e-05, "loss": 1.5559, "step": 5492 }, { "epoch": 0.8397477546340532, "grad_norm": 1.3828125, "learning_rate": 5.437218733719266e-05, "loss": 1.4501, "step": 5493 }, { "epoch": 0.8399006306134149, "grad_norm": 1.4296875, "learning_rate": 5.436821496985826e-05, "loss": 1.5158, "step": 5494 }, { "epoch": 0.8400535065927766, "grad_norm": 1.4375, "learning_rate": 5.4364242693463826e-05, "loss": 1.3913, "step": 5495 }, { "epoch": 0.8402063825721383, "grad_norm": 1.21875, "learning_rate": 5.436027050801744e-05, "loss": 1.1635, "step": 5496 }, { "epoch": 0.8403592585515001, "grad_norm": 1.2734375, "learning_rate": 5.435629841352686e-05, "loss": 1.448, "step": 5497 }, { "epoch": 0.8405121345308618, "grad_norm": 1.6953125, "learning_rate": 5.435232641000002e-05, "loss": 1.3649, "step": 5498 }, { "epoch": 0.8406650105102236, "grad_norm": 1.375, "learning_rate": 5.434835449744495e-05, "loss": 1.5588, "step": 5499 }, { "epoch": 0.8408178864895853, "grad_norm": 1.4921875, "learning_rate": 5.43443826758694e-05, "loss": 1.6102, "step": 5500 }, { "epoch": 0.8409707624689471, "grad_norm": 1.5859375, "learning_rate": 5.4340410945281417e-05, "loss": 1.5334, "step": 5501 }, { "epoch": 0.8411236384483088, "grad_norm": 1.2734375, "learning_rate": 5.4336439305688855e-05, "loss": 1.131, "step": 5502 }, { "epoch": 0.8412765144276706, "grad_norm": 1.3828125, "learning_rate": 5.433246775709967e-05, "loss": 1.2269, "step": 5503 }, { "epoch": 0.8414293904070322, "grad_norm": 1.3671875, "learning_rate": 5.4328496299521735e-05, "loss": 1.195, "step": 5504 }, { "epoch": 0.841582266386394, "grad_norm": 1.53125, "learning_rate": 5.432452493296295e-05, "loss": 1.6362, "step": 5505 }, { "epoch": 0.8417351423657558, "grad_norm": 1.3203125, "learning_rate": 5.432055365743134e-05, "loss": 1.2612, "step": 5506 }, { "epoch": 0.8418880183451175, "grad_norm": 1.2265625, "learning_rate": 5.431658247293465e-05, "loss": 1.3531, "step": 5507 }, { "epoch": 0.8420408943244793, "grad_norm": 1.4140625, "learning_rate": 5.431261137948094e-05, "loss": 1.169, "step": 5508 }, { "epoch": 0.842193770303841, "grad_norm": 1.265625, "learning_rate": 5.4308640377078055e-05, "loss": 1.1746, "step": 5509 }, { "epoch": 0.8423466462832028, "grad_norm": 1.3515625, "learning_rate": 5.430466946573393e-05, "loss": 1.3723, "step": 5510 }, { "epoch": 0.8424995222625645, "grad_norm": 1.421875, "learning_rate": 5.430069864545646e-05, "loss": 1.3095, "step": 5511 }, { "epoch": 0.8426523982419263, "grad_norm": 1.4453125, "learning_rate": 5.4296727916253576e-05, "loss": 1.3913, "step": 5512 }, { "epoch": 0.8428052742212879, "grad_norm": 1.4921875, "learning_rate": 5.429275727813319e-05, "loss": 1.6205, "step": 5513 }, { "epoch": 0.8429581502006497, "grad_norm": 1.3359375, "learning_rate": 5.428878673110317e-05, "loss": 1.472, "step": 5514 }, { "epoch": 0.8431110261800114, "grad_norm": 1.3359375, "learning_rate": 5.4284816275171544e-05, "loss": 1.1942, "step": 5515 }, { "epoch": 0.8432639021593732, "grad_norm": 1.46875, "learning_rate": 5.428084591034608e-05, "loss": 1.3382, "step": 5516 }, { "epoch": 0.843416778138735, "grad_norm": 1.3671875, "learning_rate": 5.427687563663478e-05, "loss": 1.3013, "step": 5517 }, { "epoch": 0.8435696541180967, "grad_norm": 1.5, "learning_rate": 5.4272905454045554e-05, "loss": 1.2745, "step": 5518 }, { "epoch": 0.8437225300974585, "grad_norm": 1.5, "learning_rate": 5.4268935362586284e-05, "loss": 1.4435, "step": 5519 }, { "epoch": 0.8438754060768202, "grad_norm": 1.359375, "learning_rate": 5.4264965362264906e-05, "loss": 1.2132, "step": 5520 }, { "epoch": 0.844028282056182, "grad_norm": 1.3828125, "learning_rate": 5.4260995453089315e-05, "loss": 1.2455, "step": 5521 }, { "epoch": 0.8441811580355436, "grad_norm": 1.4453125, "learning_rate": 5.425702563506744e-05, "loss": 1.0922, "step": 5522 }, { "epoch": 0.8443340340149054, "grad_norm": 1.453125, "learning_rate": 5.425305590820713e-05, "loss": 1.2318, "step": 5523 }, { "epoch": 0.8444869099942671, "grad_norm": 1.3828125, "learning_rate": 5.4249086272516395e-05, "loss": 1.4047, "step": 5524 }, { "epoch": 0.8446397859736289, "grad_norm": 1.25, "learning_rate": 5.424511672800312e-05, "loss": 1.4575, "step": 5525 }, { "epoch": 0.8447926619529906, "grad_norm": 1.234375, "learning_rate": 5.424114727467513e-05, "loss": 1.2364, "step": 5526 }, { "epoch": 0.8449455379323524, "grad_norm": 1.3984375, "learning_rate": 5.4237177912540436e-05, "loss": 1.429, "step": 5527 }, { "epoch": 0.8450984139117141, "grad_norm": 1.3984375, "learning_rate": 5.4233208641606915e-05, "loss": 1.3417, "step": 5528 }, { "epoch": 0.8452512898910759, "grad_norm": 1.3125, "learning_rate": 5.422923946188248e-05, "loss": 1.3364, "step": 5529 }, { "epoch": 0.8454041658704377, "grad_norm": 1.5078125, "learning_rate": 5.4225270373375025e-05, "loss": 1.3847, "step": 5530 }, { "epoch": 0.8455570418497993, "grad_norm": 1.3828125, "learning_rate": 5.422130137609247e-05, "loss": 1.3502, "step": 5531 }, { "epoch": 0.845709917829161, "grad_norm": 1.375, "learning_rate": 5.4217332470042734e-05, "loss": 1.2773, "step": 5532 }, { "epoch": 0.8458627938085228, "grad_norm": 1.375, "learning_rate": 5.4213363655233684e-05, "loss": 1.3218, "step": 5533 }, { "epoch": 0.8460156697878846, "grad_norm": 1.390625, "learning_rate": 5.42093949316733e-05, "loss": 1.5972, "step": 5534 }, { "epoch": 0.8461685457672463, "grad_norm": 1.25, "learning_rate": 5.420542629936944e-05, "loss": 1.1426, "step": 5535 }, { "epoch": 0.8463214217466081, "grad_norm": 1.359375, "learning_rate": 5.420145775833005e-05, "loss": 1.2254, "step": 5536 }, { "epoch": 0.8464742977259698, "grad_norm": 1.390625, "learning_rate": 5.419748930856301e-05, "loss": 1.5061, "step": 5537 }, { "epoch": 0.8466271737053316, "grad_norm": 1.171875, "learning_rate": 5.419352095007624e-05, "loss": 1.0444, "step": 5538 }, { "epoch": 0.8467800496846933, "grad_norm": 1.296875, "learning_rate": 5.418955268287764e-05, "loss": 1.4085, "step": 5539 }, { "epoch": 0.846932925664055, "grad_norm": 1.203125, "learning_rate": 5.4185584506975085e-05, "loss": 1.1872, "step": 5540 }, { "epoch": 0.8470858016434167, "grad_norm": 1.40625, "learning_rate": 5.4181616422376605e-05, "loss": 1.28, "step": 5541 }, { "epoch": 0.8472386776227785, "grad_norm": 1.5, "learning_rate": 5.4177648429089936e-05, "loss": 1.4109, "step": 5542 }, { "epoch": 0.8473915536021402, "grad_norm": 1.453125, "learning_rate": 5.4173680527123116e-05, "loss": 1.3509, "step": 5543 }, { "epoch": 0.847544429581502, "grad_norm": 1.4453125, "learning_rate": 5.416971271648406e-05, "loss": 1.4598, "step": 5544 }, { "epoch": 0.8476973055608638, "grad_norm": 1.53125, "learning_rate": 5.4165744997180544e-05, "loss": 1.498, "step": 5545 }, { "epoch": 0.8478501815402255, "grad_norm": 1.5703125, "learning_rate": 5.416177736922059e-05, "loss": 1.4979, "step": 5546 }, { "epoch": 0.8480030575195873, "grad_norm": 1.515625, "learning_rate": 5.415780983261207e-05, "loss": 1.3818, "step": 5547 }, { "epoch": 0.848155933498949, "grad_norm": 1.4609375, "learning_rate": 5.415384238736292e-05, "loss": 1.3473, "step": 5548 }, { "epoch": 0.8483088094783107, "grad_norm": 1.1953125, "learning_rate": 5.4149875033480956e-05, "loss": 1.192, "step": 5549 }, { "epoch": 0.8484616854576724, "grad_norm": 1.4453125, "learning_rate": 5.414590777097419e-05, "loss": 1.3053, "step": 5550 }, { "epoch": 0.8486145614370342, "grad_norm": 1.484375, "learning_rate": 5.414194059985054e-05, "loss": 1.5258, "step": 5551 }, { "epoch": 0.8487674374163959, "grad_norm": 1.40625, "learning_rate": 5.413797352011778e-05, "loss": 1.3833, "step": 5552 }, { "epoch": 0.8489203133957577, "grad_norm": 1.390625, "learning_rate": 5.413400653178393e-05, "loss": 1.3528, "step": 5553 }, { "epoch": 0.8490731893751194, "grad_norm": 1.3828125, "learning_rate": 5.413003963485686e-05, "loss": 1.4084, "step": 5554 }, { "epoch": 0.8492260653544812, "grad_norm": 1.3125, "learning_rate": 5.4126072829344474e-05, "loss": 1.4882, "step": 5555 }, { "epoch": 0.849378941333843, "grad_norm": 1.1328125, "learning_rate": 5.412210611525465e-05, "loss": 1.2265, "step": 5556 }, { "epoch": 0.8495318173132047, "grad_norm": 1.4375, "learning_rate": 5.411813949259541e-05, "loss": 1.7057, "step": 5557 }, { "epoch": 0.8496846932925664, "grad_norm": 1.5078125, "learning_rate": 5.411417296137453e-05, "loss": 1.3076, "step": 5558 }, { "epoch": 0.8498375692719281, "grad_norm": 1.3046875, "learning_rate": 5.4110206521599906e-05, "loss": 1.4448, "step": 5559 }, { "epoch": 0.8499904452512899, "grad_norm": 1.4296875, "learning_rate": 5.4106240173279586e-05, "loss": 1.3162, "step": 5560 }, { "epoch": 0.8501433212306516, "grad_norm": 1.3515625, "learning_rate": 5.410227391642131e-05, "loss": 1.4082, "step": 5561 }, { "epoch": 0.8502961972100134, "grad_norm": 1.4765625, "learning_rate": 5.409830775103308e-05, "loss": 1.8492, "step": 5562 }, { "epoch": 0.8504490731893751, "grad_norm": 1.53125, "learning_rate": 5.409434167712279e-05, "loss": 1.3977, "step": 5563 }, { "epoch": 0.8506019491687369, "grad_norm": 1.40625, "learning_rate": 5.4090375694698325e-05, "loss": 1.2526, "step": 5564 }, { "epoch": 0.8507548251480986, "grad_norm": 1.3984375, "learning_rate": 5.408640980376758e-05, "loss": 1.3298, "step": 5565 }, { "epoch": 0.8509077011274604, "grad_norm": 1.3046875, "learning_rate": 5.4082444004338485e-05, "loss": 1.3185, "step": 5566 }, { "epoch": 0.851060577106822, "grad_norm": 1.46875, "learning_rate": 5.407847829641894e-05, "loss": 1.2478, "step": 5567 }, { "epoch": 0.8512134530861838, "grad_norm": 1.8828125, "learning_rate": 5.40745126800168e-05, "loss": 1.3232, "step": 5568 }, { "epoch": 0.8513663290655455, "grad_norm": 1.3203125, "learning_rate": 5.407054715514004e-05, "loss": 1.2898, "step": 5569 }, { "epoch": 0.8515192050449073, "grad_norm": 1.2109375, "learning_rate": 5.406658172179656e-05, "loss": 1.1891, "step": 5570 }, { "epoch": 0.8516720810242691, "grad_norm": 1.3671875, "learning_rate": 5.406261637999417e-05, "loss": 1.283, "step": 5571 }, { "epoch": 0.8518249570036308, "grad_norm": 1.2578125, "learning_rate": 5.405865112974087e-05, "loss": 0.936, "step": 5572 }, { "epoch": 0.8519778329829926, "grad_norm": 1.328125, "learning_rate": 5.405468597104453e-05, "loss": 1.5606, "step": 5573 }, { "epoch": 0.8521307089623543, "grad_norm": 1.3125, "learning_rate": 5.405072090391304e-05, "loss": 1.2092, "step": 5574 }, { "epoch": 0.8522835849417161, "grad_norm": 1.4765625, "learning_rate": 5.4046755928354286e-05, "loss": 1.6449, "step": 5575 }, { "epoch": 0.8524364609210777, "grad_norm": 1.4765625, "learning_rate": 5.404279104437627e-05, "loss": 1.4714, "step": 5576 }, { "epoch": 0.8525893369004395, "grad_norm": 1.2890625, "learning_rate": 5.4038826251986785e-05, "loss": 1.4438, "step": 5577 }, { "epoch": 0.8527422128798012, "grad_norm": 1.390625, "learning_rate": 5.403486155119373e-05, "loss": 1.2964, "step": 5578 }, { "epoch": 0.852895088859163, "grad_norm": 1.40625, "learning_rate": 5.4030896942005074e-05, "loss": 1.3138, "step": 5579 }, { "epoch": 0.8530479648385247, "grad_norm": 1.3359375, "learning_rate": 5.402693242442869e-05, "loss": 1.3259, "step": 5580 }, { "epoch": 0.8532008408178865, "grad_norm": 1.578125, "learning_rate": 5.402296799847248e-05, "loss": 1.5604, "step": 5581 }, { "epoch": 0.8533537167972483, "grad_norm": 1.3046875, "learning_rate": 5.401900366414434e-05, "loss": 1.3195, "step": 5582 }, { "epoch": 0.85350659277661, "grad_norm": 1.328125, "learning_rate": 5.4015039421452184e-05, "loss": 1.4376, "step": 5583 }, { "epoch": 0.8536594687559718, "grad_norm": 1.421875, "learning_rate": 5.401107527040389e-05, "loss": 1.1616, "step": 5584 }, { "epoch": 0.8538123447353334, "grad_norm": 1.5546875, "learning_rate": 5.400711121100732e-05, "loss": 1.6093, "step": 5585 }, { "epoch": 0.8539652207146952, "grad_norm": 1.3671875, "learning_rate": 5.400314724327051e-05, "loss": 1.3716, "step": 5586 }, { "epoch": 0.8541180966940569, "grad_norm": 1.5078125, "learning_rate": 5.39991833672012e-05, "loss": 1.2417, "step": 5587 }, { "epoch": 0.8542709726734187, "grad_norm": 1.328125, "learning_rate": 5.39952195828074e-05, "loss": 1.4011, "step": 5588 }, { "epoch": 0.8544238486527804, "grad_norm": 1.390625, "learning_rate": 5.399125589009697e-05, "loss": 1.2349, "step": 5589 }, { "epoch": 0.8545767246321422, "grad_norm": 1.171875, "learning_rate": 5.398729228907782e-05, "loss": 1.3072, "step": 5590 }, { "epoch": 0.8547296006115039, "grad_norm": 1.375, "learning_rate": 5.398332877975782e-05, "loss": 1.3497, "step": 5591 }, { "epoch": 0.8548824765908657, "grad_norm": 1.4140625, "learning_rate": 5.3979365362144896e-05, "loss": 1.2623, "step": 5592 }, { "epoch": 0.8550353525702274, "grad_norm": 1.2890625, "learning_rate": 5.3975402036246956e-05, "loss": 1.4035, "step": 5593 }, { "epoch": 0.8551882285495891, "grad_norm": 1.3515625, "learning_rate": 5.397143880207183e-05, "loss": 1.1979, "step": 5594 }, { "epoch": 0.8553411045289508, "grad_norm": 1.53125, "learning_rate": 5.3967475659627544e-05, "loss": 1.3369, "step": 5595 }, { "epoch": 0.8554939805083126, "grad_norm": 1.34375, "learning_rate": 5.396351260892189e-05, "loss": 1.3181, "step": 5596 }, { "epoch": 0.8556468564876744, "grad_norm": 1.390625, "learning_rate": 5.395954964996275e-05, "loss": 1.1663, "step": 5597 }, { "epoch": 0.8557997324670361, "grad_norm": 1.53125, "learning_rate": 5.3955586782758106e-05, "loss": 1.54, "step": 5598 }, { "epoch": 0.8559526084463979, "grad_norm": 1.3515625, "learning_rate": 5.395162400731583e-05, "loss": 1.4478, "step": 5599 }, { "epoch": 0.8561054844257596, "grad_norm": 1.3828125, "learning_rate": 5.394766132364379e-05, "loss": 1.2222, "step": 5600 }, { "epoch": 0.8562583604051214, "grad_norm": 1.4453125, "learning_rate": 5.3943698731749884e-05, "loss": 1.3633, "step": 5601 }, { "epoch": 0.8564112363844831, "grad_norm": 1.625, "learning_rate": 5.393973623164209e-05, "loss": 1.5866, "step": 5602 }, { "epoch": 0.8565641123638448, "grad_norm": 1.3515625, "learning_rate": 5.393577382332821e-05, "loss": 1.3707, "step": 5603 }, { "epoch": 0.8567169883432065, "grad_norm": 1.5, "learning_rate": 5.393181150681612e-05, "loss": 1.4442, "step": 5604 }, { "epoch": 0.8568698643225683, "grad_norm": 1.4375, "learning_rate": 5.3927849282113854e-05, "loss": 1.2812, "step": 5605 }, { "epoch": 0.85702274030193, "grad_norm": 1.296875, "learning_rate": 5.3923887149229145e-05, "loss": 1.3816, "step": 5606 }, { "epoch": 0.8571756162812918, "grad_norm": 1.34375, "learning_rate": 5.391992510817001e-05, "loss": 1.4074, "step": 5607 }, { "epoch": 0.8573284922606536, "grad_norm": 1.4453125, "learning_rate": 5.391596315894429e-05, "loss": 1.2892, "step": 5608 }, { "epoch": 0.8574813682400153, "grad_norm": 1.2890625, "learning_rate": 5.391200130155991e-05, "loss": 1.1634, "step": 5609 }, { "epoch": 0.8576342442193771, "grad_norm": 1.3515625, "learning_rate": 5.3908039536024736e-05, "loss": 1.2945, "step": 5610 }, { "epoch": 0.8577871201987388, "grad_norm": 1.6953125, "learning_rate": 5.390407786234668e-05, "loss": 1.2891, "step": 5611 }, { "epoch": 0.8579399961781005, "grad_norm": 1.4609375, "learning_rate": 5.390011628053363e-05, "loss": 1.3634, "step": 5612 }, { "epoch": 0.8580928721574622, "grad_norm": 1.484375, "learning_rate": 5.389615479059344e-05, "loss": 1.5871, "step": 5613 }, { "epoch": 0.858245748136824, "grad_norm": 1.3046875, "learning_rate": 5.389219339253411e-05, "loss": 1.1444, "step": 5614 }, { "epoch": 0.8583986241161857, "grad_norm": 1.4140625, "learning_rate": 5.388823208636344e-05, "loss": 1.2461, "step": 5615 }, { "epoch": 0.8585515000955475, "grad_norm": 1.359375, "learning_rate": 5.388427087208938e-05, "loss": 1.2202, "step": 5616 }, { "epoch": 0.8587043760749092, "grad_norm": 1.5234375, "learning_rate": 5.38803097497198e-05, "loss": 1.4979, "step": 5617 }, { "epoch": 0.858857252054271, "grad_norm": 1.484375, "learning_rate": 5.3876348719262604e-05, "loss": 1.4566, "step": 5618 }, { "epoch": 0.8590101280336327, "grad_norm": 1.328125, "learning_rate": 5.387238778072568e-05, "loss": 1.2404, "step": 5619 }, { "epoch": 0.8591630040129945, "grad_norm": 1.2109375, "learning_rate": 5.3868426934116864e-05, "loss": 1.2167, "step": 5620 }, { "epoch": 0.8593158799923561, "grad_norm": 1.328125, "learning_rate": 5.386446617944419e-05, "loss": 1.2354, "step": 5621 }, { "epoch": 0.8594687559717179, "grad_norm": 1.1953125, "learning_rate": 5.386050551671543e-05, "loss": 1.127, "step": 5622 }, { "epoch": 0.8596216319510797, "grad_norm": 1.3515625, "learning_rate": 5.385654494593848e-05, "loss": 1.3259, "step": 5623 }, { "epoch": 0.8597745079304414, "grad_norm": 1.4296875, "learning_rate": 5.385258446712134e-05, "loss": 1.3305, "step": 5624 }, { "epoch": 0.8599273839098032, "grad_norm": 1.3828125, "learning_rate": 5.384862408027176e-05, "loss": 1.3716, "step": 5625 }, { "epoch": 0.8600802598891649, "grad_norm": 1.421875, "learning_rate": 5.384466378539773e-05, "loss": 1.2184, "step": 5626 }, { "epoch": 0.8602331358685267, "grad_norm": 1.5078125, "learning_rate": 5.384070358250712e-05, "loss": 1.4422, "step": 5627 }, { "epoch": 0.8603860118478884, "grad_norm": 1.390625, "learning_rate": 5.383674347160783e-05, "loss": 1.4982, "step": 5628 }, { "epoch": 0.8605388878272502, "grad_norm": 1.421875, "learning_rate": 5.383278345270773e-05, "loss": 1.4382, "step": 5629 }, { "epoch": 0.8606917638066118, "grad_norm": 1.4296875, "learning_rate": 5.382882352581469e-05, "loss": 1.4715, "step": 5630 }, { "epoch": 0.8608446397859736, "grad_norm": 1.40625, "learning_rate": 5.38248636909367e-05, "loss": 1.213, "step": 5631 }, { "epoch": 0.8609975157653353, "grad_norm": 1.4921875, "learning_rate": 5.382090394808151e-05, "loss": 1.3959, "step": 5632 }, { "epoch": 0.8611503917446971, "grad_norm": 1.4140625, "learning_rate": 5.381694429725713e-05, "loss": 1.4602, "step": 5633 }, { "epoch": 0.8613032677240589, "grad_norm": 1.5703125, "learning_rate": 5.381298473847141e-05, "loss": 1.389, "step": 5634 }, { "epoch": 0.8614561437034206, "grad_norm": 1.34375, "learning_rate": 5.3809025271732236e-05, "loss": 1.5447, "step": 5635 }, { "epoch": 0.8616090196827824, "grad_norm": 1.6015625, "learning_rate": 5.38050658970475e-05, "loss": 1.5154, "step": 5636 }, { "epoch": 0.8617618956621441, "grad_norm": 1.359375, "learning_rate": 5.3801106614425115e-05, "loss": 1.2676, "step": 5637 }, { "epoch": 0.8619147716415059, "grad_norm": 1.359375, "learning_rate": 5.379714742387293e-05, "loss": 1.4374, "step": 5638 }, { "epoch": 0.8620676476208675, "grad_norm": 1.2578125, "learning_rate": 5.3793188325398826e-05, "loss": 1.2106, "step": 5639 }, { "epoch": 0.8622205236002293, "grad_norm": 1.5390625, "learning_rate": 5.3789229319010805e-05, "loss": 1.4942, "step": 5640 }, { "epoch": 0.862373399579591, "grad_norm": 1.3984375, "learning_rate": 5.378527040471659e-05, "loss": 1.3677, "step": 5641 }, { "epoch": 0.8625262755589528, "grad_norm": 1.3984375, "learning_rate": 5.37813115825242e-05, "loss": 1.5065, "step": 5642 }, { "epoch": 0.8626791515383145, "grad_norm": 1.3515625, "learning_rate": 5.3777352852441476e-05, "loss": 1.3098, "step": 5643 }, { "epoch": 0.8628320275176763, "grad_norm": 1.453125, "learning_rate": 5.377339421447631e-05, "loss": 1.2855, "step": 5644 }, { "epoch": 0.862984903497038, "grad_norm": 1.4453125, "learning_rate": 5.376943566863659e-05, "loss": 1.2246, "step": 5645 }, { "epoch": 0.8631377794763998, "grad_norm": 1.4375, "learning_rate": 5.376547721493023e-05, "loss": 1.6014, "step": 5646 }, { "epoch": 0.8632906554557616, "grad_norm": 1.3828125, "learning_rate": 5.376151885336509e-05, "loss": 1.1961, "step": 5647 }, { "epoch": 0.8634435314351232, "grad_norm": 1.4296875, "learning_rate": 5.3757560583949005e-05, "loss": 1.3843, "step": 5648 }, { "epoch": 0.863596407414485, "grad_norm": 1.53125, "learning_rate": 5.3753602406689987e-05, "loss": 1.4512, "step": 5649 }, { "epoch": 0.8637492833938467, "grad_norm": 1.3203125, "learning_rate": 5.3749644321595904e-05, "loss": 1.4369, "step": 5650 }, { "epoch": 0.8639021593732085, "grad_norm": 1.515625, "learning_rate": 5.374568632867452e-05, "loss": 1.3179, "step": 5651 }, { "epoch": 0.8640550353525702, "grad_norm": 1.3671875, "learning_rate": 5.3741728427933836e-05, "loss": 1.1894, "step": 5652 }, { "epoch": 0.864207911331932, "grad_norm": 1.3828125, "learning_rate": 5.37377706193817e-05, "loss": 1.3432, "step": 5653 }, { "epoch": 0.8643607873112937, "grad_norm": 1.46875, "learning_rate": 5.373381290302602e-05, "loss": 1.38, "step": 5654 }, { "epoch": 0.8645136632906555, "grad_norm": 1.234375, "learning_rate": 5.372985527887469e-05, "loss": 1.1071, "step": 5655 }, { "epoch": 0.8646665392700172, "grad_norm": 1.34375, "learning_rate": 5.372589774693555e-05, "loss": 1.2374, "step": 5656 }, { "epoch": 0.8648194152493789, "grad_norm": 1.4609375, "learning_rate": 5.372194030721653e-05, "loss": 1.4658, "step": 5657 }, { "epoch": 0.8649722912287406, "grad_norm": 1.453125, "learning_rate": 5.371798295972546e-05, "loss": 1.2632, "step": 5658 }, { "epoch": 0.8651251672081024, "grad_norm": 1.3671875, "learning_rate": 5.3714025704470305e-05, "loss": 1.3116, "step": 5659 }, { "epoch": 0.8652780431874642, "grad_norm": 1.328125, "learning_rate": 5.3710068541458925e-05, "loss": 1.2663, "step": 5660 }, { "epoch": 0.8654309191668259, "grad_norm": 1.421875, "learning_rate": 5.370611147069919e-05, "loss": 1.4937, "step": 5661 }, { "epoch": 0.8655837951461877, "grad_norm": 1.3125, "learning_rate": 5.3702154492199e-05, "loss": 1.1978, "step": 5662 }, { "epoch": 0.8657366711255494, "grad_norm": 1.3125, "learning_rate": 5.369819760596622e-05, "loss": 1.4049, "step": 5663 }, { "epoch": 0.8658895471049112, "grad_norm": 1.3515625, "learning_rate": 5.369424081200875e-05, "loss": 1.3545, "step": 5664 }, { "epoch": 0.8660424230842729, "grad_norm": 1.796875, "learning_rate": 5.369028411033446e-05, "loss": 1.4478, "step": 5665 }, { "epoch": 0.8661952990636346, "grad_norm": 1.375, "learning_rate": 5.3686327500951306e-05, "loss": 1.5537, "step": 5666 }, { "epoch": 0.8663481750429963, "grad_norm": 1.21875, "learning_rate": 5.3682370983867045e-05, "loss": 1.4033, "step": 5667 }, { "epoch": 0.8665010510223581, "grad_norm": 1.359375, "learning_rate": 5.367841455908968e-05, "loss": 1.3293, "step": 5668 }, { "epoch": 0.8666539270017198, "grad_norm": 1.3203125, "learning_rate": 5.367445822662709e-05, "loss": 1.4136, "step": 5669 }, { "epoch": 0.8668068029810816, "grad_norm": 1.3125, "learning_rate": 5.367050198648703e-05, "loss": 1.3252, "step": 5670 }, { "epoch": 0.8669596789604433, "grad_norm": 1.53125, "learning_rate": 5.366654583867752e-05, "loss": 1.5525, "step": 5671 }, { "epoch": 0.8671125549398051, "grad_norm": 1.34375, "learning_rate": 5.3662589783206406e-05, "loss": 1.348, "step": 5672 }, { "epoch": 0.8672654309191669, "grad_norm": 1.3828125, "learning_rate": 5.3658633820081563e-05, "loss": 1.4744, "step": 5673 }, { "epoch": 0.8674183068985286, "grad_norm": 1.3359375, "learning_rate": 5.365467794931083e-05, "loss": 1.382, "step": 5674 }, { "epoch": 0.8675711828778903, "grad_norm": 1.4453125, "learning_rate": 5.3650722170902177e-05, "loss": 1.3794, "step": 5675 }, { "epoch": 0.867724058857252, "grad_norm": 1.625, "learning_rate": 5.364676648486349e-05, "loss": 1.5884, "step": 5676 }, { "epoch": 0.8678769348366138, "grad_norm": 1.375, "learning_rate": 5.364281089120253e-05, "loss": 1.4169, "step": 5677 }, { "epoch": 0.8680298108159755, "grad_norm": 1.5625, "learning_rate": 5.3638855389927303e-05, "loss": 1.3099, "step": 5678 }, { "epoch": 0.8681826867953373, "grad_norm": 1.5078125, "learning_rate": 5.363489998104565e-05, "loss": 1.6062, "step": 5679 }, { "epoch": 0.868335562774699, "grad_norm": 1.34375, "learning_rate": 5.3630944664565464e-05, "loss": 1.3626, "step": 5680 }, { "epoch": 0.8684884387540608, "grad_norm": 1.3671875, "learning_rate": 5.3626989440494554e-05, "loss": 1.2843, "step": 5681 }, { "epoch": 0.8686413147334225, "grad_norm": 1.2265625, "learning_rate": 5.3623034308840956e-05, "loss": 1.1358, "step": 5682 }, { "epoch": 0.8687941907127843, "grad_norm": 1.3671875, "learning_rate": 5.361907926961243e-05, "loss": 1.4175, "step": 5683 }, { "epoch": 0.8689470666921459, "grad_norm": 1.484375, "learning_rate": 5.361512432281683e-05, "loss": 1.3026, "step": 5684 }, { "epoch": 0.8690999426715077, "grad_norm": 1.40625, "learning_rate": 5.3611169468462184e-05, "loss": 1.4533, "step": 5685 }, { "epoch": 0.8692528186508695, "grad_norm": 1.5703125, "learning_rate": 5.360721470655622e-05, "loss": 1.5925, "step": 5686 }, { "epoch": 0.8694056946302312, "grad_norm": 1.8515625, "learning_rate": 5.360326003710692e-05, "loss": 1.3953, "step": 5687 }, { "epoch": 0.869558570609593, "grad_norm": 1.3671875, "learning_rate": 5.359930546012213e-05, "loss": 1.5874, "step": 5688 }, { "epoch": 0.8697114465889547, "grad_norm": 1.5, "learning_rate": 5.3595350975609725e-05, "loss": 1.5938, "step": 5689 }, { "epoch": 0.8698643225683165, "grad_norm": 1.3671875, "learning_rate": 5.359139658357761e-05, "loss": 1.136, "step": 5690 }, { "epoch": 0.8700171985476782, "grad_norm": 1.4453125, "learning_rate": 5.358744228403363e-05, "loss": 1.2646, "step": 5691 }, { "epoch": 0.87017007452704, "grad_norm": 1.53125, "learning_rate": 5.35834880769857e-05, "loss": 1.7207, "step": 5692 }, { "epoch": 0.8703229505064016, "grad_norm": 1.6015625, "learning_rate": 5.357953396244164e-05, "loss": 1.5418, "step": 5693 }, { "epoch": 0.8704758264857634, "grad_norm": 1.46875, "learning_rate": 5.3575579940409424e-05, "loss": 1.7032, "step": 5694 }, { "epoch": 0.8706287024651251, "grad_norm": 1.4140625, "learning_rate": 5.3571626010896915e-05, "loss": 1.2607, "step": 5695 }, { "epoch": 0.8707815784444869, "grad_norm": 1.3046875, "learning_rate": 5.356767217391188e-05, "loss": 1.2444, "step": 5696 }, { "epoch": 0.8709344544238486, "grad_norm": 1.40625, "learning_rate": 5.3563718429462326e-05, "loss": 1.3198, "step": 5697 }, { "epoch": 0.8710873304032104, "grad_norm": 1.328125, "learning_rate": 5.355976477755609e-05, "loss": 1.4352, "step": 5698 }, { "epoch": 0.8712402063825722, "grad_norm": 1.3671875, "learning_rate": 5.355581121820104e-05, "loss": 1.5058, "step": 5699 }, { "epoch": 0.8713930823619339, "grad_norm": 1.453125, "learning_rate": 5.355185775140501e-05, "loss": 1.4326, "step": 5700 }, { "epoch": 0.8715459583412957, "grad_norm": 1.40625, "learning_rate": 5.3547904377176026e-05, "loss": 1.4133, "step": 5701 }, { "epoch": 0.8716988343206573, "grad_norm": 1.3046875, "learning_rate": 5.354395109552184e-05, "loss": 1.0711, "step": 5702 }, { "epoch": 0.8718517103000191, "grad_norm": 1.40625, "learning_rate": 5.353999790645032e-05, "loss": 1.1727, "step": 5703 }, { "epoch": 0.8720045862793808, "grad_norm": 1.4609375, "learning_rate": 5.353604480996942e-05, "loss": 1.6173, "step": 5704 }, { "epoch": 0.8721574622587426, "grad_norm": 1.2265625, "learning_rate": 5.353209180608698e-05, "loss": 1.1094, "step": 5705 }, { "epoch": 0.8723103382381043, "grad_norm": 1.4453125, "learning_rate": 5.3528138894810895e-05, "loss": 1.2413, "step": 5706 }, { "epoch": 0.8724632142174661, "grad_norm": 1.46875, "learning_rate": 5.352418607614904e-05, "loss": 1.3142, "step": 5707 }, { "epoch": 0.8726160901968278, "grad_norm": 1.3671875, "learning_rate": 5.352023335010927e-05, "loss": 1.2129, "step": 5708 }, { "epoch": 0.8727689661761896, "grad_norm": 1.515625, "learning_rate": 5.351628071669947e-05, "loss": 1.431, "step": 5709 }, { "epoch": 0.8729218421555514, "grad_norm": 1.6328125, "learning_rate": 5.3512328175927506e-05, "loss": 1.495, "step": 5710 }, { "epoch": 0.873074718134913, "grad_norm": 1.359375, "learning_rate": 5.350837572780134e-05, "loss": 1.052, "step": 5711 }, { "epoch": 0.8732275941142748, "grad_norm": 1.4140625, "learning_rate": 5.3504423372328706e-05, "loss": 1.4811, "step": 5712 }, { "epoch": 0.8733804700936365, "grad_norm": 1.4296875, "learning_rate": 5.350047110951758e-05, "loss": 1.3666, "step": 5713 }, { "epoch": 0.8735333460729983, "grad_norm": 1.3203125, "learning_rate": 5.3496518939375864e-05, "loss": 1.2008, "step": 5714 }, { "epoch": 0.87368622205236, "grad_norm": 1.46875, "learning_rate": 5.349256686191131e-05, "loss": 1.4775, "step": 5715 }, { "epoch": 0.8738390980317218, "grad_norm": 1.4140625, "learning_rate": 5.3488614877131904e-05, "loss": 1.3946, "step": 5716 }, { "epoch": 0.8739919740110835, "grad_norm": 1.390625, "learning_rate": 5.3484662985045496e-05, "loss": 1.5541, "step": 5717 }, { "epoch": 0.8741448499904453, "grad_norm": 1.28125, "learning_rate": 5.348071118565995e-05, "loss": 1.3245, "step": 5718 }, { "epoch": 0.874297725969807, "grad_norm": 1.3125, "learning_rate": 5.3476759478983095e-05, "loss": 1.3607, "step": 5719 }, { "epoch": 0.8744506019491687, "grad_norm": 1.5546875, "learning_rate": 5.3472807865022944e-05, "loss": 1.6887, "step": 5720 }, { "epoch": 0.8746034779285304, "grad_norm": 1.2578125, "learning_rate": 5.3468856343787246e-05, "loss": 1.3299, "step": 5721 }, { "epoch": 0.8747563539078922, "grad_norm": 1.453125, "learning_rate": 5.346490491528386e-05, "loss": 1.4598, "step": 5722 }, { "epoch": 0.874909229887254, "grad_norm": 1.328125, "learning_rate": 5.346095357952078e-05, "loss": 1.6525, "step": 5723 }, { "epoch": 0.8750621058666157, "grad_norm": 1.2578125, "learning_rate": 5.345700233650579e-05, "loss": 0.937, "step": 5724 }, { "epoch": 0.8752149818459775, "grad_norm": 1.4765625, "learning_rate": 5.3453051186246815e-05, "loss": 1.3847, "step": 5725 }, { "epoch": 0.8753678578253392, "grad_norm": 1.09375, "learning_rate": 5.344910012875164e-05, "loss": 0.8889, "step": 5726 }, { "epoch": 0.875520733804701, "grad_norm": 1.3984375, "learning_rate": 5.34451491640283e-05, "loss": 1.7221, "step": 5727 }, { "epoch": 0.8756736097840627, "grad_norm": 1.34375, "learning_rate": 5.3441198292084526e-05, "loss": 1.2707, "step": 5728 }, { "epoch": 0.8758264857634244, "grad_norm": 1.5390625, "learning_rate": 5.34372475129282e-05, "loss": 1.2797, "step": 5729 }, { "epoch": 0.8759793617427861, "grad_norm": 1.3203125, "learning_rate": 5.3433296826567315e-05, "loss": 1.2493, "step": 5730 }, { "epoch": 0.8761322377221479, "grad_norm": 1.2265625, "learning_rate": 5.342934623300958e-05, "loss": 1.4611, "step": 5731 }, { "epoch": 0.8762851137015096, "grad_norm": 1.3515625, "learning_rate": 5.3425395732262996e-05, "loss": 1.2674, "step": 5732 }, { "epoch": 0.8764379896808714, "grad_norm": 1.4140625, "learning_rate": 5.3421445324335386e-05, "loss": 1.4453, "step": 5733 }, { "epoch": 0.8765908656602331, "grad_norm": 1.4140625, "learning_rate": 5.341749500923463e-05, "loss": 1.2518, "step": 5734 }, { "epoch": 0.8767437416395949, "grad_norm": 1.4140625, "learning_rate": 5.3413544786968594e-05, "loss": 1.5093, "step": 5735 }, { "epoch": 0.8768966176189567, "grad_norm": 1.2890625, "learning_rate": 5.340959465754516e-05, "loss": 1.3643, "step": 5736 }, { "epoch": 0.8770494935983184, "grad_norm": 1.4609375, "learning_rate": 5.3405644620972196e-05, "loss": 1.4349, "step": 5737 }, { "epoch": 0.87720236957768, "grad_norm": 1.3828125, "learning_rate": 5.340169467725753e-05, "loss": 1.4203, "step": 5738 }, { "epoch": 0.8773552455570418, "grad_norm": 1.3359375, "learning_rate": 5.3397744826409124e-05, "loss": 1.5686, "step": 5739 }, { "epoch": 0.8775081215364036, "grad_norm": 1.2734375, "learning_rate": 5.3393795068434804e-05, "loss": 1.2685, "step": 5740 }, { "epoch": 0.8776609975157653, "grad_norm": 1.5390625, "learning_rate": 5.338984540334242e-05, "loss": 1.5792, "step": 5741 }, { "epoch": 0.8778138734951271, "grad_norm": 1.4296875, "learning_rate": 5.3385895831139885e-05, "loss": 1.2644, "step": 5742 }, { "epoch": 0.8779667494744888, "grad_norm": 1.390625, "learning_rate": 5.338194635183505e-05, "loss": 1.3047, "step": 5743 }, { "epoch": 0.8781196254538506, "grad_norm": 1.296875, "learning_rate": 5.337799696543577e-05, "loss": 1.111, "step": 5744 }, { "epoch": 0.8782725014332123, "grad_norm": 1.4140625, "learning_rate": 5.33740476719499e-05, "loss": 1.229, "step": 5745 }, { "epoch": 0.8784253774125741, "grad_norm": 1.3046875, "learning_rate": 5.337009847138542e-05, "loss": 1.3399, "step": 5746 }, { "epoch": 0.8785782533919357, "grad_norm": 1.2734375, "learning_rate": 5.3366149363750084e-05, "loss": 1.2132, "step": 5747 }, { "epoch": 0.8787311293712975, "grad_norm": 1.4609375, "learning_rate": 5.336220034905175e-05, "loss": 1.2879, "step": 5748 }, { "epoch": 0.8788840053506592, "grad_norm": 1.4140625, "learning_rate": 5.335825142729839e-05, "loss": 1.4244, "step": 5749 }, { "epoch": 0.879036881330021, "grad_norm": 1.3671875, "learning_rate": 5.335430259849782e-05, "loss": 1.3451, "step": 5750 }, { "epoch": 0.8791897573093828, "grad_norm": 1.5, "learning_rate": 5.33503538626579e-05, "loss": 1.4595, "step": 5751 }, { "epoch": 0.8793426332887445, "grad_norm": 1.4140625, "learning_rate": 5.334640521978652e-05, "loss": 1.439, "step": 5752 }, { "epoch": 0.8794955092681063, "grad_norm": 1.7734375, "learning_rate": 5.334245666989155e-05, "loss": 1.4592, "step": 5753 }, { "epoch": 0.879648385247468, "grad_norm": 1.3359375, "learning_rate": 5.333850821298083e-05, "loss": 1.2011, "step": 5754 }, { "epoch": 0.8798012612268298, "grad_norm": 1.46875, "learning_rate": 5.333455984906222e-05, "loss": 1.6089, "step": 5755 }, { "epoch": 0.8799541372061914, "grad_norm": 1.3359375, "learning_rate": 5.33306115781437e-05, "loss": 1.2352, "step": 5756 }, { "epoch": 0.8801070131855532, "grad_norm": 1.1796875, "learning_rate": 5.332666340023296e-05, "loss": 0.9823, "step": 5757 }, { "epoch": 0.8802598891649149, "grad_norm": 1.4609375, "learning_rate": 5.332271531533801e-05, "loss": 1.4927, "step": 5758 }, { "epoch": 0.8804127651442767, "grad_norm": 1.375, "learning_rate": 5.331876732346668e-05, "loss": 1.2725, "step": 5759 }, { "epoch": 0.8805656411236384, "grad_norm": 1.234375, "learning_rate": 5.331481942462683e-05, "loss": 1.1174, "step": 5760 }, { "epoch": 0.8807185171030002, "grad_norm": 1.5703125, "learning_rate": 5.331087161882632e-05, "loss": 1.4927, "step": 5761 }, { "epoch": 0.880871393082362, "grad_norm": 1.5234375, "learning_rate": 5.330692390607303e-05, "loss": 1.6673, "step": 5762 }, { "epoch": 0.8810242690617237, "grad_norm": 1.3671875, "learning_rate": 5.3302976286374815e-05, "loss": 1.3479, "step": 5763 }, { "epoch": 0.8811771450410855, "grad_norm": 1.3671875, "learning_rate": 5.329902875973952e-05, "loss": 1.4057, "step": 5764 }, { "epoch": 0.8813300210204471, "grad_norm": 1.4140625, "learning_rate": 5.329508132617512e-05, "loss": 1.2572, "step": 5765 }, { "epoch": 0.8814828969998089, "grad_norm": 1.34375, "learning_rate": 5.3291133985689326e-05, "loss": 1.444, "step": 5766 }, { "epoch": 0.8816357729791706, "grad_norm": 1.3359375, "learning_rate": 5.328718673829012e-05, "loss": 1.1152, "step": 5767 }, { "epoch": 0.8817886489585324, "grad_norm": 1.34375, "learning_rate": 5.328323958398533e-05, "loss": 1.3961, "step": 5768 }, { "epoch": 0.8819415249378941, "grad_norm": 1.453125, "learning_rate": 5.3279292522782834e-05, "loss": 1.3132, "step": 5769 }, { "epoch": 0.8820944009172559, "grad_norm": 1.5234375, "learning_rate": 5.327534555469047e-05, "loss": 1.4127, "step": 5770 }, { "epoch": 0.8822472768966176, "grad_norm": 1.3828125, "learning_rate": 5.327139867971613e-05, "loss": 1.504, "step": 5771 }, { "epoch": 0.8824001528759794, "grad_norm": 1.265625, "learning_rate": 5.326745189786767e-05, "loss": 1.187, "step": 5772 }, { "epoch": 0.8825530288553411, "grad_norm": 1.6328125, "learning_rate": 5.326350520915292e-05, "loss": 1.6818, "step": 5773 }, { "epoch": 0.8827059048347028, "grad_norm": 1.4296875, "learning_rate": 5.325955861357982e-05, "loss": 1.2674, "step": 5774 }, { "epoch": 0.8828587808140645, "grad_norm": 1.484375, "learning_rate": 5.325561211115624e-05, "loss": 1.4176, "step": 5775 }, { "epoch": 0.8830116567934263, "grad_norm": 1.3125, "learning_rate": 5.3251665701889916e-05, "loss": 1.2194, "step": 5776 }, { "epoch": 0.8831645327727881, "grad_norm": 1.609375, "learning_rate": 5.3247719385788854e-05, "loss": 1.3348, "step": 5777 }, { "epoch": 0.8833174087521498, "grad_norm": 1.5078125, "learning_rate": 5.324377316286085e-05, "loss": 1.4695, "step": 5778 }, { "epoch": 0.8834702847315116, "grad_norm": 1.5, "learning_rate": 5.323982703311379e-05, "loss": 1.6887, "step": 5779 }, { "epoch": 0.8836231607108733, "grad_norm": 1.328125, "learning_rate": 5.323588099655552e-05, "loss": 1.2601, "step": 5780 }, { "epoch": 0.8837760366902351, "grad_norm": 1.421875, "learning_rate": 5.323193505319392e-05, "loss": 1.3545, "step": 5781 }, { "epoch": 0.8839289126695968, "grad_norm": 1.4375, "learning_rate": 5.322798920303686e-05, "loss": 1.4842, "step": 5782 }, { "epoch": 0.8840817886489585, "grad_norm": 1.5546875, "learning_rate": 5.322404344609215e-05, "loss": 1.459, "step": 5783 }, { "epoch": 0.8842346646283202, "grad_norm": 1.34375, "learning_rate": 5.322009778236773e-05, "loss": 1.2504, "step": 5784 }, { "epoch": 0.884387540607682, "grad_norm": 1.4609375, "learning_rate": 5.321615221187143e-05, "loss": 1.4465, "step": 5785 }, { "epoch": 0.8845404165870437, "grad_norm": 1.375, "learning_rate": 5.321220673461111e-05, "loss": 1.1688, "step": 5786 }, { "epoch": 0.8846932925664055, "grad_norm": 1.3046875, "learning_rate": 5.3208261350594645e-05, "loss": 1.5185, "step": 5787 }, { "epoch": 0.8848461685457673, "grad_norm": 1.34375, "learning_rate": 5.320431605982987e-05, "loss": 1.2014, "step": 5788 }, { "epoch": 0.884999044525129, "grad_norm": 1.5234375, "learning_rate": 5.3200370862324675e-05, "loss": 1.4919, "step": 5789 }, { "epoch": 0.8851519205044908, "grad_norm": 1.4453125, "learning_rate": 5.3196425758086884e-05, "loss": 1.3258, "step": 5790 }, { "epoch": 0.8853047964838525, "grad_norm": 1.515625, "learning_rate": 5.3192480747124464e-05, "loss": 1.442, "step": 5791 }, { "epoch": 0.8854576724632142, "grad_norm": 1.453125, "learning_rate": 5.318853582944512e-05, "loss": 1.4602, "step": 5792 }, { "epoch": 0.8856105484425759, "grad_norm": 1.359375, "learning_rate": 5.3184591005056835e-05, "loss": 1.1093, "step": 5793 }, { "epoch": 0.8857634244219377, "grad_norm": 1.5, "learning_rate": 5.318064627396747e-05, "loss": 1.3204, "step": 5794 }, { "epoch": 0.8859163004012994, "grad_norm": 1.3359375, "learning_rate": 5.317670163618478e-05, "loss": 1.1374, "step": 5795 }, { "epoch": 0.8860691763806612, "grad_norm": 1.46875, "learning_rate": 5.3172757091716717e-05, "loss": 1.5602, "step": 5796 }, { "epoch": 0.8862220523600229, "grad_norm": 1.46875, "learning_rate": 5.316881264057112e-05, "loss": 1.5666, "step": 5797 }, { "epoch": 0.8863749283393847, "grad_norm": 1.421875, "learning_rate": 5.316486828275586e-05, "loss": 1.2926, "step": 5798 }, { "epoch": 0.8865278043187464, "grad_norm": 1.3359375, "learning_rate": 5.316092401827875e-05, "loss": 1.3309, "step": 5799 }, { "epoch": 0.8866806802981082, "grad_norm": 1.4921875, "learning_rate": 5.315697984714771e-05, "loss": 1.5139, "step": 5800 }, { "epoch": 0.8868335562774698, "grad_norm": 1.5625, "learning_rate": 5.315303576937063e-05, "loss": 1.3927, "step": 5801 }, { "epoch": 0.8869864322568316, "grad_norm": 1.515625, "learning_rate": 5.314909178495523e-05, "loss": 1.2831, "step": 5802 }, { "epoch": 0.8871393082361934, "grad_norm": 1.40625, "learning_rate": 5.3145147893909494e-05, "loss": 1.1127, "step": 5803 }, { "epoch": 0.8872921842155551, "grad_norm": 1.390625, "learning_rate": 5.314120409624126e-05, "loss": 1.3876, "step": 5804 }, { "epoch": 0.8874450601949169, "grad_norm": 1.328125, "learning_rate": 5.3137260391958365e-05, "loss": 1.4, "step": 5805 }, { "epoch": 0.8875979361742786, "grad_norm": 1.375, "learning_rate": 5.3133316781068656e-05, "loss": 1.2008, "step": 5806 }, { "epoch": 0.8877508121536404, "grad_norm": 1.3671875, "learning_rate": 5.312937326358007e-05, "loss": 1.303, "step": 5807 }, { "epoch": 0.8879036881330021, "grad_norm": 1.4921875, "learning_rate": 5.3125429839500375e-05, "loss": 1.3414, "step": 5808 }, { "epoch": 0.8880565641123639, "grad_norm": 1.234375, "learning_rate": 5.312148650883742e-05, "loss": 1.1967, "step": 5809 }, { "epoch": 0.8882094400917255, "grad_norm": 1.421875, "learning_rate": 5.31175432715992e-05, "loss": 1.3372, "step": 5810 }, { "epoch": 0.8883623160710873, "grad_norm": 1.5859375, "learning_rate": 5.31136001277934e-05, "loss": 1.44, "step": 5811 }, { "epoch": 0.888515192050449, "grad_norm": 1.3984375, "learning_rate": 5.3109657077427986e-05, "loss": 1.4486, "step": 5812 }, { "epoch": 0.8886680680298108, "grad_norm": 1.4375, "learning_rate": 5.31057141205108e-05, "loss": 1.3701, "step": 5813 }, { "epoch": 0.8888209440091726, "grad_norm": 1.296875, "learning_rate": 5.310177125704969e-05, "loss": 1.1425, "step": 5814 }, { "epoch": 0.8889738199885343, "grad_norm": 1.375, "learning_rate": 5.309782848705251e-05, "loss": 1.1709, "step": 5815 }, { "epoch": 0.8891266959678961, "grad_norm": 1.515625, "learning_rate": 5.3093885810527124e-05, "loss": 1.4392, "step": 5816 }, { "epoch": 0.8892795719472578, "grad_norm": 1.3515625, "learning_rate": 5.30899432274814e-05, "loss": 1.3584, "step": 5817 }, { "epoch": 0.8894324479266196, "grad_norm": 1.3671875, "learning_rate": 5.3086000737923116e-05, "loss": 1.3623, "step": 5818 }, { "epoch": 0.8895853239059812, "grad_norm": 1.515625, "learning_rate": 5.3082058341860244e-05, "loss": 1.3191, "step": 5819 }, { "epoch": 0.889738199885343, "grad_norm": 1.3828125, "learning_rate": 5.307811603930064e-05, "loss": 1.5436, "step": 5820 }, { "epoch": 0.8898910758647047, "grad_norm": 1.421875, "learning_rate": 5.3074173830252025e-05, "loss": 1.3447, "step": 5821 }, { "epoch": 0.8900439518440665, "grad_norm": 1.265625, "learning_rate": 5.3070231714722386e-05, "loss": 1.4596, "step": 5822 }, { "epoch": 0.8901968278234282, "grad_norm": 1.484375, "learning_rate": 5.306628969271954e-05, "loss": 1.5726, "step": 5823 }, { "epoch": 0.89034970380279, "grad_norm": 1.3359375, "learning_rate": 5.3062347764251317e-05, "loss": 1.4277, "step": 5824 }, { "epoch": 0.8905025797821517, "grad_norm": 1.53125, "learning_rate": 5.305840592932558e-05, "loss": 1.6431, "step": 5825 }, { "epoch": 0.8906554557615135, "grad_norm": 1.4765625, "learning_rate": 5.305446418795027e-05, "loss": 1.6539, "step": 5826 }, { "epoch": 0.8908083317408753, "grad_norm": 1.46875, "learning_rate": 5.305052254013314e-05, "loss": 1.114, "step": 5827 }, { "epoch": 0.8909612077202369, "grad_norm": 1.296875, "learning_rate": 5.3046580985882023e-05, "loss": 1.1235, "step": 5828 }, { "epoch": 0.8911140836995987, "grad_norm": 1.4140625, "learning_rate": 5.304263952520487e-05, "loss": 1.4475, "step": 5829 }, { "epoch": 0.8912669596789604, "grad_norm": 1.359375, "learning_rate": 5.303869815810951e-05, "loss": 1.4541, "step": 5830 }, { "epoch": 0.8914198356583222, "grad_norm": 1.4453125, "learning_rate": 5.3034756884603773e-05, "loss": 1.4427, "step": 5831 }, { "epoch": 0.8915727116376839, "grad_norm": 1.40625, "learning_rate": 5.303081570469553e-05, "loss": 1.3006, "step": 5832 }, { "epoch": 0.8917255876170457, "grad_norm": 1.5078125, "learning_rate": 5.302687461839262e-05, "loss": 1.3421, "step": 5833 }, { "epoch": 0.8918784635964074, "grad_norm": 1.359375, "learning_rate": 5.3022933625702907e-05, "loss": 1.2333, "step": 5834 }, { "epoch": 0.8920313395757692, "grad_norm": 1.3828125, "learning_rate": 5.301899272663421e-05, "loss": 1.4375, "step": 5835 }, { "epoch": 0.8921842155551309, "grad_norm": 1.40625, "learning_rate": 5.301505192119449e-05, "loss": 1.3207, "step": 5836 }, { "epoch": 0.8923370915344926, "grad_norm": 1.421875, "learning_rate": 5.301111120939145e-05, "loss": 1.3157, "step": 5837 }, { "epoch": 0.8924899675138543, "grad_norm": 1.3828125, "learning_rate": 5.300717059123306e-05, "loss": 1.3917, "step": 5838 }, { "epoch": 0.8926428434932161, "grad_norm": 1.2890625, "learning_rate": 5.3003230066727185e-05, "loss": 1.2039, "step": 5839 }, { "epoch": 0.8927957194725779, "grad_norm": 1.453125, "learning_rate": 5.2999289635881534e-05, "loss": 1.4899, "step": 5840 }, { "epoch": 0.8929485954519396, "grad_norm": 1.5390625, "learning_rate": 5.2995349298704087e-05, "loss": 1.4869, "step": 5841 }, { "epoch": 0.8931014714313014, "grad_norm": 1.4765625, "learning_rate": 5.2991409055202676e-05, "loss": 1.3248, "step": 5842 }, { "epoch": 0.8932543474106631, "grad_norm": 1.5546875, "learning_rate": 5.298746890538514e-05, "loss": 1.3918, "step": 5843 }, { "epoch": 0.8934072233900249, "grad_norm": 1.625, "learning_rate": 5.298352884925929e-05, "loss": 1.4217, "step": 5844 }, { "epoch": 0.8935600993693866, "grad_norm": 1.3671875, "learning_rate": 5.29795888868331e-05, "loss": 1.1668, "step": 5845 }, { "epoch": 0.8937129753487483, "grad_norm": 1.28125, "learning_rate": 5.297564901811429e-05, "loss": 1.3481, "step": 5846 }, { "epoch": 0.89386585132811, "grad_norm": 1.4140625, "learning_rate": 5.2971709243110746e-05, "loss": 1.4069, "step": 5847 }, { "epoch": 0.8940187273074718, "grad_norm": 1.3828125, "learning_rate": 5.296776956183036e-05, "loss": 1.3684, "step": 5848 }, { "epoch": 0.8941716032868335, "grad_norm": 1.3828125, "learning_rate": 5.2963829974280974e-05, "loss": 1.4554, "step": 5849 }, { "epoch": 0.8943244792661953, "grad_norm": 1.28125, "learning_rate": 5.295989048047041e-05, "loss": 1.5195, "step": 5850 }, { "epoch": 0.894477355245557, "grad_norm": 1.40625, "learning_rate": 5.2955951080406505e-05, "loss": 1.2526, "step": 5851 }, { "epoch": 0.8946302312249188, "grad_norm": 1.453125, "learning_rate": 5.295201177409722e-05, "loss": 1.2645, "step": 5852 }, { "epoch": 0.8947831072042806, "grad_norm": 1.359375, "learning_rate": 5.294807256155029e-05, "loss": 1.5231, "step": 5853 }, { "epoch": 0.8949359831836423, "grad_norm": 1.2890625, "learning_rate": 5.2944133442773555e-05, "loss": 1.2736, "step": 5854 }, { "epoch": 0.895088859163004, "grad_norm": 1.328125, "learning_rate": 5.294019441777499e-05, "loss": 1.3222, "step": 5855 }, { "epoch": 0.8952417351423657, "grad_norm": 1.2578125, "learning_rate": 5.2936255486562295e-05, "loss": 1.0807, "step": 5856 }, { "epoch": 0.8953946111217275, "grad_norm": 1.3828125, "learning_rate": 5.293231664914342e-05, "loss": 1.4684, "step": 5857 }, { "epoch": 0.8955474871010892, "grad_norm": 1.15625, "learning_rate": 5.292837790552619e-05, "loss": 1.0372, "step": 5858 }, { "epoch": 0.895700363080451, "grad_norm": 1.3203125, "learning_rate": 5.292443925571846e-05, "loss": 1.2851, "step": 5859 }, { "epoch": 0.8958532390598127, "grad_norm": 1.40625, "learning_rate": 5.292050069972806e-05, "loss": 1.4027, "step": 5860 }, { "epoch": 0.8960061150391745, "grad_norm": 1.421875, "learning_rate": 5.291656223756285e-05, "loss": 1.3705, "step": 5861 }, { "epoch": 0.8961589910185362, "grad_norm": 1.421875, "learning_rate": 5.291262386923068e-05, "loss": 1.595, "step": 5862 }, { "epoch": 0.896311866997898, "grad_norm": 1.4609375, "learning_rate": 5.290868559473936e-05, "loss": 1.3282, "step": 5863 }, { "epoch": 0.8964647429772596, "grad_norm": 1.296875, "learning_rate": 5.2904747414096814e-05, "loss": 1.2421, "step": 5864 }, { "epoch": 0.8966176189566214, "grad_norm": 1.3671875, "learning_rate": 5.290080932731085e-05, "loss": 1.3038, "step": 5865 }, { "epoch": 0.8967704949359832, "grad_norm": 1.375, "learning_rate": 5.289687133438932e-05, "loss": 1.1904, "step": 5866 }, { "epoch": 0.8969233709153449, "grad_norm": 1.3984375, "learning_rate": 5.289293343534008e-05, "loss": 1.1156, "step": 5867 }, { "epoch": 0.8970762468947067, "grad_norm": 1.4609375, "learning_rate": 5.288899563017096e-05, "loss": 1.3539, "step": 5868 }, { "epoch": 0.8972291228740684, "grad_norm": 1.4453125, "learning_rate": 5.288505791888981e-05, "loss": 1.3782, "step": 5869 }, { "epoch": 0.8973819988534302, "grad_norm": 1.3203125, "learning_rate": 5.288112030150445e-05, "loss": 1.2479, "step": 5870 }, { "epoch": 0.8975348748327919, "grad_norm": 1.5390625, "learning_rate": 5.287718277802285e-05, "loss": 1.4841, "step": 5871 }, { "epoch": 0.8976877508121537, "grad_norm": 1.5703125, "learning_rate": 5.28732453484527e-05, "loss": 1.4522, "step": 5872 }, { "epoch": 0.8978406267915153, "grad_norm": 1.4765625, "learning_rate": 5.28693080128019e-05, "loss": 1.4351, "step": 5873 }, { "epoch": 0.8979935027708771, "grad_norm": 1.3828125, "learning_rate": 5.286537077107836e-05, "loss": 1.5017, "step": 5874 }, { "epoch": 0.8981463787502388, "grad_norm": 1.34375, "learning_rate": 5.286143362328986e-05, "loss": 1.6213, "step": 5875 }, { "epoch": 0.8982992547296006, "grad_norm": 1.6328125, "learning_rate": 5.285749656944427e-05, "loss": 1.6078, "step": 5876 }, { "epoch": 0.8984521307089623, "grad_norm": 1.515625, "learning_rate": 5.285355960954943e-05, "loss": 1.5437, "step": 5877 }, { "epoch": 0.8986050066883241, "grad_norm": 1.265625, "learning_rate": 5.284962274361318e-05, "loss": 1.2844, "step": 5878 }, { "epoch": 0.8987578826676859, "grad_norm": 1.46875, "learning_rate": 5.2845685971643386e-05, "loss": 1.2855, "step": 5879 }, { "epoch": 0.8989107586470476, "grad_norm": 1.4765625, "learning_rate": 5.284174929364784e-05, "loss": 1.5739, "step": 5880 }, { "epoch": 0.8990636346264094, "grad_norm": 1.4609375, "learning_rate": 5.2837812709634497e-05, "loss": 1.5415, "step": 5881 }, { "epoch": 0.899216510605771, "grad_norm": 1.3046875, "learning_rate": 5.2833876219611056e-05, "loss": 1.3133, "step": 5882 }, { "epoch": 0.8993693865851328, "grad_norm": 1.40625, "learning_rate": 5.282993982358548e-05, "loss": 1.3621, "step": 5883 }, { "epoch": 0.8995222625644945, "grad_norm": 1.3359375, "learning_rate": 5.282600352156557e-05, "loss": 1.2159, "step": 5884 }, { "epoch": 0.8996751385438563, "grad_norm": 1.3125, "learning_rate": 5.282206731355919e-05, "loss": 1.3041, "step": 5885 }, { "epoch": 0.899828014523218, "grad_norm": 1.4296875, "learning_rate": 5.281813119957415e-05, "loss": 1.4405, "step": 5886 }, { "epoch": 0.8999808905025798, "grad_norm": 1.3984375, "learning_rate": 5.281419517961832e-05, "loss": 1.4256, "step": 5887 }, { "epoch": 0.9001337664819415, "grad_norm": 1.390625, "learning_rate": 5.281025925369953e-05, "loss": 1.3431, "step": 5888 }, { "epoch": 0.9002866424613033, "grad_norm": 1.4609375, "learning_rate": 5.280632342182561e-05, "loss": 1.4677, "step": 5889 }, { "epoch": 0.900439518440665, "grad_norm": 1.421875, "learning_rate": 5.280238768400448e-05, "loss": 1.5109, "step": 5890 }, { "epoch": 0.9005923944200267, "grad_norm": 1.3515625, "learning_rate": 5.279845204024387e-05, "loss": 1.2289, "step": 5891 }, { "epoch": 0.9007452703993885, "grad_norm": 1.4140625, "learning_rate": 5.2794516490551706e-05, "loss": 1.4977, "step": 5892 }, { "epoch": 0.9008981463787502, "grad_norm": 1.28125, "learning_rate": 5.279058103493582e-05, "loss": 1.1, "step": 5893 }, { "epoch": 0.901051022358112, "grad_norm": 1.765625, "learning_rate": 5.2786645673404024e-05, "loss": 1.6646, "step": 5894 }, { "epoch": 0.9012038983374737, "grad_norm": 1.4453125, "learning_rate": 5.278271040596419e-05, "loss": 1.446, "step": 5895 }, { "epoch": 0.9013567743168355, "grad_norm": 1.3203125, "learning_rate": 5.277877523262414e-05, "loss": 1.3953, "step": 5896 }, { "epoch": 0.9015096502961972, "grad_norm": 1.4296875, "learning_rate": 5.2774840153391735e-05, "loss": 1.5291, "step": 5897 }, { "epoch": 0.901662526275559, "grad_norm": 1.265625, "learning_rate": 5.2770905168274764e-05, "loss": 1.1948, "step": 5898 }, { "epoch": 0.9018154022549207, "grad_norm": 1.390625, "learning_rate": 5.2766970277281144e-05, "loss": 1.2426, "step": 5899 }, { "epoch": 0.9019682782342824, "grad_norm": 1.4453125, "learning_rate": 5.276303548041874e-05, "loss": 1.3607, "step": 5900 }, { "epoch": 0.9021211542136441, "grad_norm": 1.2890625, "learning_rate": 5.275910077769526e-05, "loss": 1.1616, "step": 5901 }, { "epoch": 0.9022740301930059, "grad_norm": 1.5078125, "learning_rate": 5.2755166169118666e-05, "loss": 1.6224, "step": 5902 }, { "epoch": 0.9024269061723676, "grad_norm": 1.421875, "learning_rate": 5.2751231654696744e-05, "loss": 1.4648, "step": 5903 }, { "epoch": 0.9025797821517294, "grad_norm": 1.3203125, "learning_rate": 5.274729723443736e-05, "loss": 1.2025, "step": 5904 }, { "epoch": 0.9027326581310912, "grad_norm": 1.484375, "learning_rate": 5.274336290834834e-05, "loss": 1.4909, "step": 5905 }, { "epoch": 0.9028855341104529, "grad_norm": 1.3515625, "learning_rate": 5.273942867643753e-05, "loss": 1.2733, "step": 5906 }, { "epoch": 0.9030384100898147, "grad_norm": 1.328125, "learning_rate": 5.273549453871277e-05, "loss": 1.2798, "step": 5907 }, { "epoch": 0.9031912860691764, "grad_norm": 1.40625, "learning_rate": 5.2731560495181864e-05, "loss": 1.4631, "step": 5908 }, { "epoch": 0.9033441620485381, "grad_norm": 1.3125, "learning_rate": 5.272762654585273e-05, "loss": 1.303, "step": 5909 }, { "epoch": 0.9034970380278998, "grad_norm": 1.4296875, "learning_rate": 5.272369269073315e-05, "loss": 1.4112, "step": 5910 }, { "epoch": 0.9036499140072616, "grad_norm": 1.390625, "learning_rate": 5.271975892983099e-05, "loss": 1.2046, "step": 5911 }, { "epoch": 0.9038027899866233, "grad_norm": 1.390625, "learning_rate": 5.2715825263154085e-05, "loss": 1.2414, "step": 5912 }, { "epoch": 0.9039556659659851, "grad_norm": 1.3125, "learning_rate": 5.271189169071026e-05, "loss": 1.2844, "step": 5913 }, { "epoch": 0.9041085419453468, "grad_norm": 1.390625, "learning_rate": 5.2707958212507355e-05, "loss": 1.4881, "step": 5914 }, { "epoch": 0.9042614179247086, "grad_norm": 1.28125, "learning_rate": 5.27040248285532e-05, "loss": 1.1917, "step": 5915 }, { "epoch": 0.9044142939040704, "grad_norm": 1.5078125, "learning_rate": 5.270009153885572e-05, "loss": 1.5073, "step": 5916 }, { "epoch": 0.9045671698834321, "grad_norm": 1.4453125, "learning_rate": 5.269615834342261e-05, "loss": 1.3832, "step": 5917 }, { "epoch": 0.9047200458627938, "grad_norm": 1.21875, "learning_rate": 5.2692225242261816e-05, "loss": 1.4401, "step": 5918 }, { "epoch": 0.9048729218421555, "grad_norm": 1.5859375, "learning_rate": 5.2688292235381186e-05, "loss": 1.7443, "step": 5919 }, { "epoch": 0.9050257978215173, "grad_norm": 1.40625, "learning_rate": 5.2684359322788435e-05, "loss": 1.5044, "step": 5920 }, { "epoch": 0.905178673800879, "grad_norm": 1.3125, "learning_rate": 5.268042650449152e-05, "loss": 1.2599, "step": 5921 }, { "epoch": 0.9053315497802408, "grad_norm": 1.5234375, "learning_rate": 5.267649378049826e-05, "loss": 1.1614, "step": 5922 }, { "epoch": 0.9054844257596025, "grad_norm": 1.3671875, "learning_rate": 5.267256115081645e-05, "loss": 1.4256, "step": 5923 }, { "epoch": 0.9056373017389643, "grad_norm": 1.4609375, "learning_rate": 5.266862861545392e-05, "loss": 1.2427, "step": 5924 }, { "epoch": 0.905790177718326, "grad_norm": 1.5390625, "learning_rate": 5.2664696174418585e-05, "loss": 1.4771, "step": 5925 }, { "epoch": 0.9059430536976878, "grad_norm": 1.390625, "learning_rate": 5.266076382771825e-05, "loss": 1.2196, "step": 5926 }, { "epoch": 0.9060959296770494, "grad_norm": 1.625, "learning_rate": 5.2656831575360674e-05, "loss": 1.519, "step": 5927 }, { "epoch": 0.9062488056564112, "grad_norm": 1.3984375, "learning_rate": 5.265289941735381e-05, "loss": 1.256, "step": 5928 }, { "epoch": 0.906401681635773, "grad_norm": 1.515625, "learning_rate": 5.2648967353705414e-05, "loss": 1.4355, "step": 5929 }, { "epoch": 0.9065545576151347, "grad_norm": 1.4765625, "learning_rate": 5.264503538442336e-05, "loss": 1.3031, "step": 5930 }, { "epoch": 0.9067074335944965, "grad_norm": 1.4765625, "learning_rate": 5.264110350951548e-05, "loss": 1.469, "step": 5931 }, { "epoch": 0.9068603095738582, "grad_norm": 1.40625, "learning_rate": 5.263717172898959e-05, "loss": 1.2623, "step": 5932 }, { "epoch": 0.90701318555322, "grad_norm": 1.4765625, "learning_rate": 5.263324004285355e-05, "loss": 1.0628, "step": 5933 }, { "epoch": 0.9071660615325817, "grad_norm": 1.328125, "learning_rate": 5.2629308451115134e-05, "loss": 1.0708, "step": 5934 }, { "epoch": 0.9073189375119435, "grad_norm": 1.171875, "learning_rate": 5.2625376953782314e-05, "loss": 1.2185, "step": 5935 }, { "epoch": 0.9074718134913051, "grad_norm": 1.484375, "learning_rate": 5.262144555086275e-05, "loss": 1.5593, "step": 5936 }, { "epoch": 0.9076246894706669, "grad_norm": 1.46875, "learning_rate": 5.2617514242364404e-05, "loss": 1.4531, "step": 5937 }, { "epoch": 0.9077775654500286, "grad_norm": 1.484375, "learning_rate": 5.261358302829509e-05, "loss": 1.4453, "step": 5938 }, { "epoch": 0.9079304414293904, "grad_norm": 1.3359375, "learning_rate": 5.26096519086626e-05, "loss": 1.5425, "step": 5939 }, { "epoch": 0.9080833174087521, "grad_norm": 1.3671875, "learning_rate": 5.260572088347482e-05, "loss": 1.4143, "step": 5940 }, { "epoch": 0.9082361933881139, "grad_norm": 1.40625, "learning_rate": 5.260178995273955e-05, "loss": 1.2519, "step": 5941 }, { "epoch": 0.9083890693674757, "grad_norm": 1.375, "learning_rate": 5.259785911646462e-05, "loss": 1.3118, "step": 5942 }, { "epoch": 0.9085419453468374, "grad_norm": 1.3203125, "learning_rate": 5.259392837465784e-05, "loss": 1.3188, "step": 5943 }, { "epoch": 0.9086948213261992, "grad_norm": 1.3828125, "learning_rate": 5.258999772732711e-05, "loss": 1.3602, "step": 5944 }, { "epoch": 0.9088476973055608, "grad_norm": 1.6484375, "learning_rate": 5.2586067174480294e-05, "loss": 1.7851, "step": 5945 }, { "epoch": 0.9090005732849226, "grad_norm": 1.2265625, "learning_rate": 5.258213671612506e-05, "loss": 1.1848, "step": 5946 }, { "epoch": 0.9091534492642843, "grad_norm": 1.421875, "learning_rate": 5.25782063522694e-05, "loss": 1.1039, "step": 5947 }, { "epoch": 0.9093063252436461, "grad_norm": 1.390625, "learning_rate": 5.2574276082921093e-05, "loss": 1.284, "step": 5948 }, { "epoch": 0.9094592012230078, "grad_norm": 1.515625, "learning_rate": 5.257034590808796e-05, "loss": 1.3456, "step": 5949 }, { "epoch": 0.9096120772023696, "grad_norm": 1.453125, "learning_rate": 5.2566415827777794e-05, "loss": 1.373, "step": 5950 }, { "epoch": 0.9097649531817313, "grad_norm": 1.4453125, "learning_rate": 5.256248584199858e-05, "loss": 1.3362, "step": 5951 }, { "epoch": 0.9099178291610931, "grad_norm": 1.4921875, "learning_rate": 5.255855595075798e-05, "loss": 1.5846, "step": 5952 }, { "epoch": 0.9100707051404548, "grad_norm": 1.40625, "learning_rate": 5.255462615406388e-05, "loss": 1.4089, "step": 5953 }, { "epoch": 0.9102235811198165, "grad_norm": 1.328125, "learning_rate": 5.255069645192414e-05, "loss": 1.331, "step": 5954 }, { "epoch": 0.9103764570991782, "grad_norm": 1.640625, "learning_rate": 5.2546766844346596e-05, "loss": 1.6413, "step": 5955 }, { "epoch": 0.91052933307854, "grad_norm": 1.4453125, "learning_rate": 5.2542837331339044e-05, "loss": 1.4481, "step": 5956 }, { "epoch": 0.9106822090579018, "grad_norm": 1.3359375, "learning_rate": 5.253890791290934e-05, "loss": 1.2507, "step": 5957 }, { "epoch": 0.9108350850372635, "grad_norm": 1.40625, "learning_rate": 5.2534978589065307e-05, "loss": 1.3612, "step": 5958 }, { "epoch": 0.9109879610166253, "grad_norm": 1.484375, "learning_rate": 5.253104935981477e-05, "loss": 1.4714, "step": 5959 }, { "epoch": 0.911140836995987, "grad_norm": 1.3046875, "learning_rate": 5.252712022516553e-05, "loss": 1.3421, "step": 5960 }, { "epoch": 0.9112937129753488, "grad_norm": 1.3671875, "learning_rate": 5.252319118512552e-05, "loss": 1.4019, "step": 5961 }, { "epoch": 0.9114465889547105, "grad_norm": 1.234375, "learning_rate": 5.251926223970243e-05, "loss": 1.4965, "step": 5962 }, { "epoch": 0.9115994649340722, "grad_norm": 1.4609375, "learning_rate": 5.25153333889042e-05, "loss": 1.4577, "step": 5963 }, { "epoch": 0.9117523409134339, "grad_norm": 1.3671875, "learning_rate": 5.251140463273865e-05, "loss": 1.2848, "step": 5964 }, { "epoch": 0.9119052168927957, "grad_norm": 1.546875, "learning_rate": 5.250747597121352e-05, "loss": 1.8081, "step": 5965 }, { "epoch": 0.9120580928721574, "grad_norm": 1.515625, "learning_rate": 5.250354740433672e-05, "loss": 1.3536, "step": 5966 }, { "epoch": 0.9122109688515192, "grad_norm": 1.2421875, "learning_rate": 5.2499618932116076e-05, "loss": 1.1518, "step": 5967 }, { "epoch": 0.912363844830881, "grad_norm": 1.4140625, "learning_rate": 5.2495690554559395e-05, "loss": 1.4147, "step": 5968 }, { "epoch": 0.9125167208102427, "grad_norm": 1.359375, "learning_rate": 5.2491762271674475e-05, "loss": 1.2631, "step": 5969 }, { "epoch": 0.9126695967896045, "grad_norm": 1.3984375, "learning_rate": 5.248783408346926e-05, "loss": 1.7004, "step": 5970 }, { "epoch": 0.9128224727689662, "grad_norm": 1.328125, "learning_rate": 5.2483905989951456e-05, "loss": 1.2394, "step": 5971 }, { "epoch": 0.9129753487483279, "grad_norm": 1.3671875, "learning_rate": 5.2479977991128894e-05, "loss": 1.408, "step": 5972 }, { "epoch": 0.9131282247276896, "grad_norm": 1.3828125, "learning_rate": 5.247605008700949e-05, "loss": 1.1757, "step": 5973 }, { "epoch": 0.9132811007070514, "grad_norm": 1.3671875, "learning_rate": 5.247212227760101e-05, "loss": 1.4805, "step": 5974 }, { "epoch": 0.9134339766864131, "grad_norm": 1.5625, "learning_rate": 5.246819456291132e-05, "loss": 1.5312, "step": 5975 }, { "epoch": 0.9135868526657749, "grad_norm": 1.3828125, "learning_rate": 5.246426694294817e-05, "loss": 1.4006, "step": 5976 }, { "epoch": 0.9137397286451366, "grad_norm": 1.359375, "learning_rate": 5.2460339417719526e-05, "loss": 1.3154, "step": 5977 }, { "epoch": 0.9138926046244984, "grad_norm": 1.484375, "learning_rate": 5.245641198723309e-05, "loss": 1.381, "step": 5978 }, { "epoch": 0.9140454806038601, "grad_norm": 1.4609375, "learning_rate": 5.2452484651496695e-05, "loss": 1.4426, "step": 5979 }, { "epoch": 0.9141983565832219, "grad_norm": 1.4609375, "learning_rate": 5.2448557410518286e-05, "loss": 1.3322, "step": 5980 }, { "epoch": 0.9143512325625835, "grad_norm": 1.328125, "learning_rate": 5.24446302643055e-05, "loss": 1.2332, "step": 5981 }, { "epoch": 0.9145041085419453, "grad_norm": 1.3828125, "learning_rate": 5.2440703212866335e-05, "loss": 1.2365, "step": 5982 }, { "epoch": 0.9146569845213071, "grad_norm": 1.3515625, "learning_rate": 5.2436776256208554e-05, "loss": 1.2902, "step": 5983 }, { "epoch": 0.9148098605006688, "grad_norm": 1.2578125, "learning_rate": 5.243284939433998e-05, "loss": 1.0659, "step": 5984 }, { "epoch": 0.9149627364800306, "grad_norm": 1.4375, "learning_rate": 5.242892262726844e-05, "loss": 1.4436, "step": 5985 }, { "epoch": 0.9151156124593923, "grad_norm": 1.515625, "learning_rate": 5.242499595500174e-05, "loss": 1.6256, "step": 5986 }, { "epoch": 0.9152684884387541, "grad_norm": 1.484375, "learning_rate": 5.242106937754776e-05, "loss": 1.5621, "step": 5987 }, { "epoch": 0.9154213644181158, "grad_norm": 1.453125, "learning_rate": 5.241714289491423e-05, "loss": 1.5292, "step": 5988 }, { "epoch": 0.9155742403974776, "grad_norm": 1.609375, "learning_rate": 5.2413216507109076e-05, "loss": 1.4305, "step": 5989 }, { "epoch": 0.9157271163768392, "grad_norm": 1.5625, "learning_rate": 5.2409290214140086e-05, "loss": 1.466, "step": 5990 }, { "epoch": 0.915879992356201, "grad_norm": 1.25, "learning_rate": 5.240536401601508e-05, "loss": 1.3349, "step": 5991 }, { "epoch": 0.9160328683355627, "grad_norm": 1.4453125, "learning_rate": 5.240143791274188e-05, "loss": 1.7158, "step": 5992 }, { "epoch": 0.9161857443149245, "grad_norm": 1.3828125, "learning_rate": 5.2397511904328334e-05, "loss": 1.4866, "step": 5993 }, { "epoch": 0.9163386202942863, "grad_norm": 1.2109375, "learning_rate": 5.239358599078222e-05, "loss": 1.1508, "step": 5994 }, { "epoch": 0.916491496273648, "grad_norm": 1.3125, "learning_rate": 5.238966017211138e-05, "loss": 1.4148, "step": 5995 }, { "epoch": 0.9166443722530098, "grad_norm": 1.3984375, "learning_rate": 5.23857344483237e-05, "loss": 1.2844, "step": 5996 }, { "epoch": 0.9167972482323715, "grad_norm": 1.5, "learning_rate": 5.238180881942691e-05, "loss": 1.4252, "step": 5997 }, { "epoch": 0.9169501242117333, "grad_norm": 1.4296875, "learning_rate": 5.237788328542885e-05, "loss": 1.603, "step": 5998 }, { "epoch": 0.9171030001910949, "grad_norm": 1.3125, "learning_rate": 5.2373957846337404e-05, "loss": 1.3771, "step": 5999 }, { "epoch": 0.9172558761704567, "grad_norm": 1.2734375, "learning_rate": 5.237003250216035e-05, "loss": 1.2875, "step": 6000 }, { "epoch": 0.9172558761704567, "eval_loss": 1.3436775207519531, "eval_model_preparation_time": 0.0034, "eval_runtime": 111.8037, "eval_samples_per_second": 89.442, "eval_steps_per_second": 2.8, "step": 6000 }, { "epoch": 0.9174087521498184, "grad_norm": 1.46875, "learning_rate": 5.236610725290552e-05, "loss": 1.2726, "step": 6001 }, { "epoch": 0.9175616281291802, "grad_norm": 1.4765625, "learning_rate": 5.2362182098580746e-05, "loss": 1.4415, "step": 6002 }, { "epoch": 0.9177145041085419, "grad_norm": 1.3203125, "learning_rate": 5.2358257039193826e-05, "loss": 1.1986, "step": 6003 }, { "epoch": 0.9178673800879037, "grad_norm": 1.5625, "learning_rate": 5.23543320747526e-05, "loss": 1.5844, "step": 6004 }, { "epoch": 0.9180202560672654, "grad_norm": 1.421875, "learning_rate": 5.2350407205264854e-05, "loss": 1.5415, "step": 6005 }, { "epoch": 0.9181731320466272, "grad_norm": 1.359375, "learning_rate": 5.234648243073852e-05, "loss": 1.1441, "step": 6006 }, { "epoch": 0.918326008025989, "grad_norm": 1.5546875, "learning_rate": 5.2342557751181254e-05, "loss": 1.564, "step": 6007 }, { "epoch": 0.9184788840053506, "grad_norm": 1.125, "learning_rate": 5.233863316660102e-05, "loss": 1.007, "step": 6008 }, { "epoch": 0.9186317599847124, "grad_norm": 1.3984375, "learning_rate": 5.2334708677005565e-05, "loss": 1.3784, "step": 6009 }, { "epoch": 0.9187846359640741, "grad_norm": 1.421875, "learning_rate": 5.233078428240274e-05, "loss": 1.2412, "step": 6010 }, { "epoch": 0.9189375119434359, "grad_norm": 1.40625, "learning_rate": 5.232685998280036e-05, "loss": 1.4618, "step": 6011 }, { "epoch": 0.9190903879227976, "grad_norm": 1.3046875, "learning_rate": 5.232293577820624e-05, "loss": 1.3181, "step": 6012 }, { "epoch": 0.9192432639021594, "grad_norm": 1.484375, "learning_rate": 5.231901166862821e-05, "loss": 1.3717, "step": 6013 }, { "epoch": 0.9193961398815211, "grad_norm": 1.515625, "learning_rate": 5.2315087654074025e-05, "loss": 1.4426, "step": 6014 }, { "epoch": 0.9195490158608829, "grad_norm": 1.3359375, "learning_rate": 5.231116373455165e-05, "loss": 1.3812, "step": 6015 }, { "epoch": 0.9197018918402446, "grad_norm": 1.890625, "learning_rate": 5.230723991006874e-05, "loss": 1.6049, "step": 6016 }, { "epoch": 0.9198547678196063, "grad_norm": 1.203125, "learning_rate": 5.230331618063322e-05, "loss": 1.2389, "step": 6017 }, { "epoch": 0.920007643798968, "grad_norm": 1.375, "learning_rate": 5.2299392546252904e-05, "loss": 1.4019, "step": 6018 }, { "epoch": 0.9201605197783298, "grad_norm": 1.4453125, "learning_rate": 5.229546900693558e-05, "loss": 1.4892, "step": 6019 }, { "epoch": 0.9203133957576916, "grad_norm": 1.421875, "learning_rate": 5.229154556268907e-05, "loss": 1.3872, "step": 6020 }, { "epoch": 0.9204662717370533, "grad_norm": 1.3203125, "learning_rate": 5.2287622213521215e-05, "loss": 1.5511, "step": 6021 }, { "epoch": 0.9206191477164151, "grad_norm": 1.28125, "learning_rate": 5.2283698959439806e-05, "loss": 1.1678, "step": 6022 }, { "epoch": 0.9207720236957768, "grad_norm": 1.3984375, "learning_rate": 5.2279775800452646e-05, "loss": 1.5005, "step": 6023 }, { "epoch": 0.9209248996751386, "grad_norm": 1.2578125, "learning_rate": 5.227585273656761e-05, "loss": 1.1291, "step": 6024 }, { "epoch": 0.9210777756545003, "grad_norm": 1.3515625, "learning_rate": 5.227192976779254e-05, "loss": 1.3688, "step": 6025 }, { "epoch": 0.921230651633862, "grad_norm": 1.6484375, "learning_rate": 5.226800689413511e-05, "loss": 1.4022, "step": 6026 }, { "epoch": 0.9213835276132237, "grad_norm": 1.484375, "learning_rate": 5.226408411560328e-05, "loss": 1.5998, "step": 6027 }, { "epoch": 0.9215364035925855, "grad_norm": 1.3515625, "learning_rate": 5.226016143220482e-05, "loss": 1.4181, "step": 6028 }, { "epoch": 0.9216892795719472, "grad_norm": 1.3671875, "learning_rate": 5.2256238843947545e-05, "loss": 1.1977, "step": 6029 }, { "epoch": 0.921842155551309, "grad_norm": 1.390625, "learning_rate": 5.225231635083927e-05, "loss": 1.2964, "step": 6030 }, { "epoch": 0.9219950315306708, "grad_norm": 1.5, "learning_rate": 5.224839395288782e-05, "loss": 1.4553, "step": 6031 }, { "epoch": 0.9221479075100325, "grad_norm": 1.4765625, "learning_rate": 5.2244471650101004e-05, "loss": 1.434, "step": 6032 }, { "epoch": 0.9223007834893943, "grad_norm": 1.3203125, "learning_rate": 5.2240549442486606e-05, "loss": 1.2541, "step": 6033 }, { "epoch": 0.922453659468756, "grad_norm": 1.3515625, "learning_rate": 5.223662733005252e-05, "loss": 1.3919, "step": 6034 }, { "epoch": 0.9226065354481177, "grad_norm": 1.421875, "learning_rate": 5.223270531280651e-05, "loss": 1.6899, "step": 6035 }, { "epoch": 0.9227594114274794, "grad_norm": 1.453125, "learning_rate": 5.222878339075641e-05, "loss": 1.3897, "step": 6036 }, { "epoch": 0.9229122874068412, "grad_norm": 1.4296875, "learning_rate": 5.222486156391003e-05, "loss": 1.2175, "step": 6037 }, { "epoch": 0.9230651633862029, "grad_norm": 1.3359375, "learning_rate": 5.222093983227519e-05, "loss": 1.0694, "step": 6038 }, { "epoch": 0.9232180393655647, "grad_norm": 1.34375, "learning_rate": 5.2217018195859694e-05, "loss": 1.1447, "step": 6039 }, { "epoch": 0.9233709153449264, "grad_norm": 1.2578125, "learning_rate": 5.221309665467134e-05, "loss": 1.3827, "step": 6040 }, { "epoch": 0.9235237913242882, "grad_norm": 1.4921875, "learning_rate": 5.220917520871802e-05, "loss": 1.4998, "step": 6041 }, { "epoch": 0.92367666730365, "grad_norm": 1.5234375, "learning_rate": 5.220525385800743e-05, "loss": 1.4828, "step": 6042 }, { "epoch": 0.9238295432830117, "grad_norm": 1.421875, "learning_rate": 5.220133260254749e-05, "loss": 1.2281, "step": 6043 }, { "epoch": 0.9239824192623733, "grad_norm": 1.5859375, "learning_rate": 5.219741144234602e-05, "loss": 1.4771, "step": 6044 }, { "epoch": 0.9241352952417351, "grad_norm": 1.359375, "learning_rate": 5.2193490377410716e-05, "loss": 1.4793, "step": 6045 }, { "epoch": 0.9242881712210969, "grad_norm": 1.2890625, "learning_rate": 5.2189569407749505e-05, "loss": 1.2893, "step": 6046 }, { "epoch": 0.9244410472004586, "grad_norm": 1.5, "learning_rate": 5.218564853337016e-05, "loss": 1.4404, "step": 6047 }, { "epoch": 0.9245939231798204, "grad_norm": 1.2890625, "learning_rate": 5.21817277542805e-05, "loss": 1.2233, "step": 6048 }, { "epoch": 0.9247467991591821, "grad_norm": 1.3984375, "learning_rate": 5.217780707048829e-05, "loss": 1.4486, "step": 6049 }, { "epoch": 0.9248996751385439, "grad_norm": 1.3203125, "learning_rate": 5.2173886482001436e-05, "loss": 1.3327, "step": 6050 }, { "epoch": 0.9250525511179056, "grad_norm": 1.21875, "learning_rate": 5.2169965988827755e-05, "loss": 1.2239, "step": 6051 }, { "epoch": 0.9252054270972674, "grad_norm": 1.3671875, "learning_rate": 5.216604559097492e-05, "loss": 1.2321, "step": 6052 }, { "epoch": 0.925358303076629, "grad_norm": 1.46875, "learning_rate": 5.216212528845088e-05, "loss": 1.288, "step": 6053 }, { "epoch": 0.9255111790559908, "grad_norm": 1.5078125, "learning_rate": 5.21582050812634e-05, "loss": 1.3433, "step": 6054 }, { "epoch": 0.9256640550353525, "grad_norm": 1.2734375, "learning_rate": 5.21542849694203e-05, "loss": 1.2266, "step": 6055 }, { "epoch": 0.9258169310147143, "grad_norm": 1.4140625, "learning_rate": 5.2150364952929375e-05, "loss": 1.3191, "step": 6056 }, { "epoch": 0.925969806994076, "grad_norm": 1.46875, "learning_rate": 5.2146445031798466e-05, "loss": 1.3614, "step": 6057 }, { "epoch": 0.9261226829734378, "grad_norm": 1.46875, "learning_rate": 5.2142525206035377e-05, "loss": 1.4223, "step": 6058 }, { "epoch": 0.9262755589527996, "grad_norm": 1.46875, "learning_rate": 5.213860547564785e-05, "loss": 1.6801, "step": 6059 }, { "epoch": 0.9264284349321613, "grad_norm": 1.453125, "learning_rate": 5.213468584064385e-05, "loss": 1.3085, "step": 6060 }, { "epoch": 0.9265813109115231, "grad_norm": 1.4375, "learning_rate": 5.213076630103102e-05, "loss": 1.331, "step": 6061 }, { "epoch": 0.9267341868908847, "grad_norm": 1.453125, "learning_rate": 5.212684685681728e-05, "loss": 1.2383, "step": 6062 }, { "epoch": 0.9268870628702465, "grad_norm": 1.2890625, "learning_rate": 5.2122927508010425e-05, "loss": 1.2998, "step": 6063 }, { "epoch": 0.9270399388496082, "grad_norm": 1.4375, "learning_rate": 5.211900825461823e-05, "loss": 1.4022, "step": 6064 }, { "epoch": 0.92719281482897, "grad_norm": 1.234375, "learning_rate": 5.211508909664854e-05, "loss": 1.1263, "step": 6065 }, { "epoch": 0.9273456908083317, "grad_norm": 1.4375, "learning_rate": 5.2111170034109144e-05, "loss": 1.4747, "step": 6066 }, { "epoch": 0.9274985667876935, "grad_norm": 1.3359375, "learning_rate": 5.210725106700787e-05, "loss": 1.356, "step": 6067 }, { "epoch": 0.9276514427670552, "grad_norm": 1.4296875, "learning_rate": 5.210333219535247e-05, "loss": 1.5636, "step": 6068 }, { "epoch": 0.927804318746417, "grad_norm": 1.3203125, "learning_rate": 5.209941341915085e-05, "loss": 1.4547, "step": 6069 }, { "epoch": 0.9279571947257788, "grad_norm": 1.6484375, "learning_rate": 5.209549473841081e-05, "loss": 1.3893, "step": 6070 }, { "epoch": 0.9281100707051404, "grad_norm": 1.453125, "learning_rate": 5.209157615314004e-05, "loss": 1.5882, "step": 6071 }, { "epoch": 0.9282629466845022, "grad_norm": 1.3828125, "learning_rate": 5.208765766334647e-05, "loss": 1.2248, "step": 6072 }, { "epoch": 0.9284158226638639, "grad_norm": 1.2890625, "learning_rate": 5.208373926903787e-05, "loss": 1.3922, "step": 6073 }, { "epoch": 0.9285686986432257, "grad_norm": 1.53125, "learning_rate": 5.207982097022206e-05, "loss": 1.5446, "step": 6074 }, { "epoch": 0.9287215746225874, "grad_norm": 1.5078125, "learning_rate": 5.2075902766906794e-05, "loss": 1.5785, "step": 6075 }, { "epoch": 0.9288744506019492, "grad_norm": 1.3984375, "learning_rate": 5.2071984659099994e-05, "loss": 1.367, "step": 6076 }, { "epoch": 0.9290273265813109, "grad_norm": 1.3359375, "learning_rate": 5.2068066646809365e-05, "loss": 1.3917, "step": 6077 }, { "epoch": 0.9291802025606727, "grad_norm": 1.2421875, "learning_rate": 5.206414873004271e-05, "loss": 1.2424, "step": 6078 }, { "epoch": 0.9293330785400344, "grad_norm": 1.359375, "learning_rate": 5.2060230908807916e-05, "loss": 1.376, "step": 6079 }, { "epoch": 0.9294859545193961, "grad_norm": 1.4765625, "learning_rate": 5.205631318311275e-05, "loss": 1.3529, "step": 6080 }, { "epoch": 0.9296388304987578, "grad_norm": 1.390625, "learning_rate": 5.205239555296502e-05, "loss": 1.2837, "step": 6081 }, { "epoch": 0.9297917064781196, "grad_norm": 1.4140625, "learning_rate": 5.204847801837254e-05, "loss": 1.5518, "step": 6082 }, { "epoch": 0.9299445824574814, "grad_norm": 1.4296875, "learning_rate": 5.20445605793431e-05, "loss": 1.1852, "step": 6083 }, { "epoch": 0.9300974584368431, "grad_norm": 1.3125, "learning_rate": 5.2040643235884536e-05, "loss": 1.2157, "step": 6084 }, { "epoch": 0.9302503344162049, "grad_norm": 1.3828125, "learning_rate": 5.20367259880046e-05, "loss": 1.5511, "step": 6085 }, { "epoch": 0.9304032103955666, "grad_norm": 1.34375, "learning_rate": 5.20328088357112e-05, "loss": 1.2504, "step": 6086 }, { "epoch": 0.9305560863749284, "grad_norm": 1.3515625, "learning_rate": 5.2028891779012e-05, "loss": 1.2517, "step": 6087 }, { "epoch": 0.9307089623542901, "grad_norm": 1.359375, "learning_rate": 5.202497481791494e-05, "loss": 1.2256, "step": 6088 }, { "epoch": 0.9308618383336518, "grad_norm": 1.25, "learning_rate": 5.20210579524278e-05, "loss": 1.1196, "step": 6089 }, { "epoch": 0.9310147143130135, "grad_norm": 1.2421875, "learning_rate": 5.201714118255828e-05, "loss": 1.3338, "step": 6090 }, { "epoch": 0.9311675902923753, "grad_norm": 1.5703125, "learning_rate": 5.201322450831431e-05, "loss": 1.4151, "step": 6091 }, { "epoch": 0.931320466271737, "grad_norm": 1.2421875, "learning_rate": 5.2009307929703644e-05, "loss": 1.1145, "step": 6092 }, { "epoch": 0.9314733422510988, "grad_norm": 1.3984375, "learning_rate": 5.2005391446734084e-05, "loss": 1.2835, "step": 6093 }, { "epoch": 0.9316262182304605, "grad_norm": 1.4453125, "learning_rate": 5.200147505941342e-05, "loss": 1.357, "step": 6094 }, { "epoch": 0.9317790942098223, "grad_norm": 1.265625, "learning_rate": 5.199755876774954e-05, "loss": 1.0428, "step": 6095 }, { "epoch": 0.9319319701891841, "grad_norm": 1.4921875, "learning_rate": 5.1993642571750165e-05, "loss": 1.2856, "step": 6096 }, { "epoch": 0.9320848461685458, "grad_norm": 1.4921875, "learning_rate": 5.1989726471423084e-05, "loss": 1.4716, "step": 6097 }, { "epoch": 0.9322377221479075, "grad_norm": 1.3984375, "learning_rate": 5.1985810466776174e-05, "loss": 1.6556, "step": 6098 }, { "epoch": 0.9323905981272692, "grad_norm": 1.3046875, "learning_rate": 5.1981894557817215e-05, "loss": 1.2472, "step": 6099 }, { "epoch": 0.932543474106631, "grad_norm": 1.2265625, "learning_rate": 5.197797874455399e-05, "loss": 1.2206, "step": 6100 }, { "epoch": 0.9326963500859927, "grad_norm": 1.484375, "learning_rate": 5.197406302699429e-05, "loss": 1.3577, "step": 6101 }, { "epoch": 0.9328492260653545, "grad_norm": 1.359375, "learning_rate": 5.197014740514601e-05, "loss": 1.409, "step": 6102 }, { "epoch": 0.9330021020447162, "grad_norm": 1.359375, "learning_rate": 5.196623187901686e-05, "loss": 1.4512, "step": 6103 }, { "epoch": 0.933154978024078, "grad_norm": 1.5703125, "learning_rate": 5.196231644861462e-05, "loss": 1.5337, "step": 6104 }, { "epoch": 0.9333078540034397, "grad_norm": 1.3046875, "learning_rate": 5.195840111394723e-05, "loss": 1.2753, "step": 6105 }, { "epoch": 0.9334607299828015, "grad_norm": 1.4375, "learning_rate": 5.195448587502234e-05, "loss": 1.2399, "step": 6106 }, { "epoch": 0.9336136059621631, "grad_norm": 1.3671875, "learning_rate": 5.195057073184784e-05, "loss": 1.5329, "step": 6107 }, { "epoch": 0.9337664819415249, "grad_norm": 1.5, "learning_rate": 5.194665568443152e-05, "loss": 1.2971, "step": 6108 }, { "epoch": 0.9339193579208867, "grad_norm": 1.3125, "learning_rate": 5.194274073278118e-05, "loss": 1.3591, "step": 6109 }, { "epoch": 0.9340722339002484, "grad_norm": 1.34375, "learning_rate": 5.193882587690461e-05, "loss": 1.1782, "step": 6110 }, { "epoch": 0.9342251098796102, "grad_norm": 1.546875, "learning_rate": 5.193491111680962e-05, "loss": 1.5761, "step": 6111 }, { "epoch": 0.9343779858589719, "grad_norm": 1.3828125, "learning_rate": 5.193099645250401e-05, "loss": 1.4881, "step": 6112 }, { "epoch": 0.9345308618383337, "grad_norm": 1.3828125, "learning_rate": 5.1927081883995535e-05, "loss": 1.25, "step": 6113 }, { "epoch": 0.9346837378176954, "grad_norm": 1.5390625, "learning_rate": 5.192316741129208e-05, "loss": 1.402, "step": 6114 }, { "epoch": 0.9348366137970572, "grad_norm": 1.5546875, "learning_rate": 5.1919253034401424e-05, "loss": 1.5277, "step": 6115 }, { "epoch": 0.9349894897764188, "grad_norm": 1.234375, "learning_rate": 5.1915338753331355e-05, "loss": 1.179, "step": 6116 }, { "epoch": 0.9351423657557806, "grad_norm": 1.296875, "learning_rate": 5.191142456808966e-05, "loss": 1.0242, "step": 6117 }, { "epoch": 0.9352952417351423, "grad_norm": 1.4296875, "learning_rate": 5.190751047868416e-05, "loss": 1.5179, "step": 6118 }, { "epoch": 0.9354481177145041, "grad_norm": 1.4140625, "learning_rate": 5.1903596485122655e-05, "loss": 1.3527, "step": 6119 }, { "epoch": 0.9356009936938658, "grad_norm": 1.40625, "learning_rate": 5.189968258741289e-05, "loss": 1.3457, "step": 6120 }, { "epoch": 0.9357538696732276, "grad_norm": 1.40625, "learning_rate": 5.1895768785562795e-05, "loss": 1.3269, "step": 6121 }, { "epoch": 0.9359067456525894, "grad_norm": 1.4765625, "learning_rate": 5.1891855079580033e-05, "loss": 1.5682, "step": 6122 }, { "epoch": 0.9360596216319511, "grad_norm": 1.359375, "learning_rate": 5.188794146947242e-05, "loss": 1.3798, "step": 6123 }, { "epoch": 0.9362124976113129, "grad_norm": 1.328125, "learning_rate": 5.188402795524785e-05, "loss": 1.2412, "step": 6124 }, { "epoch": 0.9363653735906745, "grad_norm": 1.4609375, "learning_rate": 5.1880114536914056e-05, "loss": 1.4844, "step": 6125 }, { "epoch": 0.9365182495700363, "grad_norm": 1.1796875, "learning_rate": 5.187620121447886e-05, "loss": 1.2635, "step": 6126 }, { "epoch": 0.936671125549398, "grad_norm": 1.5390625, "learning_rate": 5.187228798795003e-05, "loss": 1.7629, "step": 6127 }, { "epoch": 0.9368240015287598, "grad_norm": 1.4140625, "learning_rate": 5.18683748573354e-05, "loss": 1.3018, "step": 6128 }, { "epoch": 0.9369768775081215, "grad_norm": 1.421875, "learning_rate": 5.1864461822642726e-05, "loss": 1.2769, "step": 6129 }, { "epoch": 0.9371297534874833, "grad_norm": 1.46875, "learning_rate": 5.186054888387981e-05, "loss": 1.3956, "step": 6130 }, { "epoch": 0.937282629466845, "grad_norm": 1.3046875, "learning_rate": 5.1856636041054555e-05, "loss": 1.5482, "step": 6131 }, { "epoch": 0.9374355054462068, "grad_norm": 1.375, "learning_rate": 5.1852723294174585e-05, "loss": 1.204, "step": 6132 }, { "epoch": 0.9375883814255686, "grad_norm": 1.5390625, "learning_rate": 5.184881064324785e-05, "loss": 1.521, "step": 6133 }, { "epoch": 0.9377412574049302, "grad_norm": 1.2265625, "learning_rate": 5.184489808828206e-05, "loss": 1.317, "step": 6134 }, { "epoch": 0.937894133384292, "grad_norm": 1.5, "learning_rate": 5.184098562928504e-05, "loss": 1.4571, "step": 6135 }, { "epoch": 0.9380470093636537, "grad_norm": 1.546875, "learning_rate": 5.183707326626459e-05, "loss": 1.3511, "step": 6136 }, { "epoch": 0.9381998853430155, "grad_norm": 1.5703125, "learning_rate": 5.183316099922851e-05, "loss": 1.3854, "step": 6137 }, { "epoch": 0.9383527613223772, "grad_norm": 1.3671875, "learning_rate": 5.182924882818459e-05, "loss": 1.3934, "step": 6138 }, { "epoch": 0.938505637301739, "grad_norm": 1.3515625, "learning_rate": 5.1825336753140584e-05, "loss": 1.1935, "step": 6139 }, { "epoch": 0.9386585132811007, "grad_norm": 1.4921875, "learning_rate": 5.18214247741044e-05, "loss": 1.3538, "step": 6140 }, { "epoch": 0.9388113892604625, "grad_norm": 1.3671875, "learning_rate": 5.18175128910837e-05, "loss": 1.3216, "step": 6141 }, { "epoch": 0.9389642652398242, "grad_norm": 1.3984375, "learning_rate": 5.1813601104086374e-05, "loss": 1.5544, "step": 6142 }, { "epoch": 0.9391171412191859, "grad_norm": 1.34375, "learning_rate": 5.180968941312019e-05, "loss": 1.329, "step": 6143 }, { "epoch": 0.9392700171985476, "grad_norm": 1.453125, "learning_rate": 5.1805777818192934e-05, "loss": 1.3501, "step": 6144 }, { "epoch": 0.9394228931779094, "grad_norm": 1.3203125, "learning_rate": 5.180186631931242e-05, "loss": 1.3921, "step": 6145 }, { "epoch": 0.9395757691572711, "grad_norm": 1.390625, "learning_rate": 5.1797954916486425e-05, "loss": 1.3737, "step": 6146 }, { "epoch": 0.9397286451366329, "grad_norm": 1.2734375, "learning_rate": 5.179404360972275e-05, "loss": 1.1725, "step": 6147 }, { "epoch": 0.9398815211159947, "grad_norm": 1.296875, "learning_rate": 5.179013239902921e-05, "loss": 1.3292, "step": 6148 }, { "epoch": 0.9400343970953564, "grad_norm": 1.296875, "learning_rate": 5.1786221284413524e-05, "loss": 1.2795, "step": 6149 }, { "epoch": 0.9401872730747182, "grad_norm": 1.390625, "learning_rate": 5.178231026588363e-05, "loss": 1.4901, "step": 6150 }, { "epoch": 0.9403401490540799, "grad_norm": 1.28125, "learning_rate": 5.177839934344716e-05, "loss": 1.2611, "step": 6151 }, { "epoch": 0.9404930250334416, "grad_norm": 1.4453125, "learning_rate": 5.177448851711202e-05, "loss": 1.3701, "step": 6152 }, { "epoch": 0.9406459010128033, "grad_norm": 1.421875, "learning_rate": 5.1770577786885963e-05, "loss": 1.2337, "step": 6153 }, { "epoch": 0.9407987769921651, "grad_norm": 1.375, "learning_rate": 5.1766667152776795e-05, "loss": 1.4293, "step": 6154 }, { "epoch": 0.9409516529715268, "grad_norm": 1.828125, "learning_rate": 5.176275661479231e-05, "loss": 1.6572, "step": 6155 }, { "epoch": 0.9411045289508886, "grad_norm": 1.3671875, "learning_rate": 5.1758846172940264e-05, "loss": 1.4173, "step": 6156 }, { "epoch": 0.9412574049302503, "grad_norm": 1.3125, "learning_rate": 5.1754935827228514e-05, "loss": 1.0372, "step": 6157 }, { "epoch": 0.9414102809096121, "grad_norm": 1.4140625, "learning_rate": 5.1751025577664755e-05, "loss": 1.3582, "step": 6158 }, { "epoch": 0.9415631568889739, "grad_norm": 1.5390625, "learning_rate": 5.1747115424256895e-05, "loss": 1.4306, "step": 6159 }, { "epoch": 0.9417160328683356, "grad_norm": 1.3515625, "learning_rate": 5.1743205367012684e-05, "loss": 1.3719, "step": 6160 }, { "epoch": 0.9418689088476973, "grad_norm": 1.421875, "learning_rate": 5.173929540593989e-05, "loss": 1.4448, "step": 6161 }, { "epoch": 0.942021784827059, "grad_norm": 1.4453125, "learning_rate": 5.173538554104633e-05, "loss": 1.3521, "step": 6162 }, { "epoch": 0.9421746608064208, "grad_norm": 1.546875, "learning_rate": 5.173147577233979e-05, "loss": 1.5421, "step": 6163 }, { "epoch": 0.9423275367857825, "grad_norm": 1.34375, "learning_rate": 5.1727566099828053e-05, "loss": 1.392, "step": 6164 }, { "epoch": 0.9424804127651443, "grad_norm": 1.5, "learning_rate": 5.172365652351887e-05, "loss": 1.2368, "step": 6165 }, { "epoch": 0.942633288744506, "grad_norm": 1.5390625, "learning_rate": 5.171974704342016e-05, "loss": 1.5878, "step": 6166 }, { "epoch": 0.9427861647238678, "grad_norm": 1.4375, "learning_rate": 5.171583765953956e-05, "loss": 1.2879, "step": 6167 }, { "epoch": 0.9429390407032295, "grad_norm": 1.4765625, "learning_rate": 5.171192837188496e-05, "loss": 1.281, "step": 6168 }, { "epoch": 0.9430919166825913, "grad_norm": 1.515625, "learning_rate": 5.17080191804642e-05, "loss": 1.2833, "step": 6169 }, { "epoch": 0.9432447926619529, "grad_norm": 1.203125, "learning_rate": 5.170411008528489e-05, "loss": 1.0453, "step": 6170 }, { "epoch": 0.9433976686413147, "grad_norm": 1.3671875, "learning_rate": 5.170020108635496e-05, "loss": 1.3112, "step": 6171 }, { "epoch": 0.9435505446206764, "grad_norm": 1.4140625, "learning_rate": 5.169629218368217e-05, "loss": 1.4807, "step": 6172 }, { "epoch": 0.9437034206000382, "grad_norm": 1.3359375, "learning_rate": 5.169238337727431e-05, "loss": 1.2162, "step": 6173 }, { "epoch": 0.9438562965794, "grad_norm": 1.421875, "learning_rate": 5.168847466713913e-05, "loss": 1.4119, "step": 6174 }, { "epoch": 0.9440091725587617, "grad_norm": 1.609375, "learning_rate": 5.168456605328449e-05, "loss": 1.4111, "step": 6175 }, { "epoch": 0.9441620485381235, "grad_norm": 1.4609375, "learning_rate": 5.168065753571819e-05, "loss": 1.249, "step": 6176 }, { "epoch": 0.9443149245174852, "grad_norm": 1.4453125, "learning_rate": 5.16767491144479e-05, "loss": 1.2818, "step": 6177 }, { "epoch": 0.944467800496847, "grad_norm": 1.421875, "learning_rate": 5.167284078948151e-05, "loss": 1.541, "step": 6178 }, { "epoch": 0.9446206764762086, "grad_norm": 1.4609375, "learning_rate": 5.1668932560826786e-05, "loss": 1.2639, "step": 6179 }, { "epoch": 0.9447735524555704, "grad_norm": 1.3984375, "learning_rate": 5.1665024428491525e-05, "loss": 1.4152, "step": 6180 }, { "epoch": 0.9449264284349321, "grad_norm": 1.40625, "learning_rate": 5.16611163924835e-05, "loss": 1.3006, "step": 6181 }, { "epoch": 0.9450793044142939, "grad_norm": 1.3359375, "learning_rate": 5.1657208452810504e-05, "loss": 1.3026, "step": 6182 }, { "epoch": 0.9452321803936556, "grad_norm": 1.2421875, "learning_rate": 5.165330060948032e-05, "loss": 1.2549, "step": 6183 }, { "epoch": 0.9453850563730174, "grad_norm": 1.2421875, "learning_rate": 5.16493928625007e-05, "loss": 1.0106, "step": 6184 }, { "epoch": 0.9455379323523792, "grad_norm": 1.328125, "learning_rate": 5.164548521187955e-05, "loss": 1.2279, "step": 6185 }, { "epoch": 0.9456908083317409, "grad_norm": 1.484375, "learning_rate": 5.1641577657624516e-05, "loss": 1.3699, "step": 6186 }, { "epoch": 0.9458436843111027, "grad_norm": 1.359375, "learning_rate": 5.163767019974348e-05, "loss": 1.166, "step": 6187 }, { "epoch": 0.9459965602904643, "grad_norm": 1.484375, "learning_rate": 5.163376283824419e-05, "loss": 1.5206, "step": 6188 }, { "epoch": 0.9461494362698261, "grad_norm": 1.21875, "learning_rate": 5.1629855573134444e-05, "loss": 1.1983, "step": 6189 }, { "epoch": 0.9463023122491878, "grad_norm": 1.46875, "learning_rate": 5.162594840442203e-05, "loss": 1.3849, "step": 6190 }, { "epoch": 0.9464551882285496, "grad_norm": 1.4765625, "learning_rate": 5.162204133211475e-05, "loss": 1.4738, "step": 6191 }, { "epoch": 0.9466080642079113, "grad_norm": 1.296875, "learning_rate": 5.161813435622035e-05, "loss": 1.2071, "step": 6192 }, { "epoch": 0.9467609401872731, "grad_norm": 1.34375, "learning_rate": 5.1614227476746604e-05, "loss": 1.4683, "step": 6193 }, { "epoch": 0.9469138161666348, "grad_norm": 1.453125, "learning_rate": 5.1610320693701374e-05, "loss": 1.3542, "step": 6194 }, { "epoch": 0.9470666921459966, "grad_norm": 1.40625, "learning_rate": 5.160641400709243e-05, "loss": 1.44, "step": 6195 }, { "epoch": 0.9472195681253583, "grad_norm": 1.3125, "learning_rate": 5.160250741692747e-05, "loss": 1.2341, "step": 6196 }, { "epoch": 0.94737244410472, "grad_norm": 1.34375, "learning_rate": 5.159860092321437e-05, "loss": 1.2028, "step": 6197 }, { "epoch": 0.9475253200840817, "grad_norm": 1.5078125, "learning_rate": 5.1594694525960886e-05, "loss": 1.3817, "step": 6198 }, { "epoch": 0.9476781960634435, "grad_norm": 1.3046875, "learning_rate": 5.15907882251748e-05, "loss": 1.3659, "step": 6199 }, { "epoch": 0.9478310720428053, "grad_norm": 1.78125, "learning_rate": 5.158688202086388e-05, "loss": 1.739, "step": 6200 }, { "epoch": 0.947983948022167, "grad_norm": 1.2109375, "learning_rate": 5.158297591303599e-05, "loss": 1.2483, "step": 6201 }, { "epoch": 0.9481368240015288, "grad_norm": 1.3984375, "learning_rate": 5.157906990169883e-05, "loss": 1.3439, "step": 6202 }, { "epoch": 0.9482896999808905, "grad_norm": 1.3828125, "learning_rate": 5.1575163986860156e-05, "loss": 1.4692, "step": 6203 }, { "epoch": 0.9484425759602523, "grad_norm": 1.4375, "learning_rate": 5.157125816852786e-05, "loss": 1.5029, "step": 6204 }, { "epoch": 0.948595451939614, "grad_norm": 1.3515625, "learning_rate": 5.156735244670966e-05, "loss": 1.3185, "step": 6205 }, { "epoch": 0.9487483279189757, "grad_norm": 1.53125, "learning_rate": 5.1563446821413366e-05, "loss": 1.7591, "step": 6206 }, { "epoch": 0.9489012038983374, "grad_norm": 1.484375, "learning_rate": 5.155954129264673e-05, "loss": 1.4367, "step": 6207 }, { "epoch": 0.9490540798776992, "grad_norm": 1.40625, "learning_rate": 5.155563586041756e-05, "loss": 1.3316, "step": 6208 }, { "epoch": 0.9492069558570609, "grad_norm": 1.4765625, "learning_rate": 5.155173052473363e-05, "loss": 1.5344, "step": 6209 }, { "epoch": 0.9493598318364227, "grad_norm": 1.40625, "learning_rate": 5.1547825285602694e-05, "loss": 1.413, "step": 6210 }, { "epoch": 0.9495127078157845, "grad_norm": 1.46875, "learning_rate": 5.154392014303264e-05, "loss": 1.524, "step": 6211 }, { "epoch": 0.9496655837951462, "grad_norm": 1.3359375, "learning_rate": 5.154001509703109e-05, "loss": 1.1962, "step": 6212 }, { "epoch": 0.949818459774508, "grad_norm": 1.296875, "learning_rate": 5.153611014760596e-05, "loss": 1.3389, "step": 6213 }, { "epoch": 0.9499713357538697, "grad_norm": 1.53125, "learning_rate": 5.1532205294765014e-05, "loss": 1.282, "step": 6214 }, { "epoch": 0.9501242117332314, "grad_norm": 1.2421875, "learning_rate": 5.152830053851594e-05, "loss": 1.3641, "step": 6215 }, { "epoch": 0.9502770877125931, "grad_norm": 1.2421875, "learning_rate": 5.152439587886662e-05, "loss": 1.1637, "step": 6216 }, { "epoch": 0.9504299636919549, "grad_norm": 1.3359375, "learning_rate": 5.152049131582479e-05, "loss": 1.3407, "step": 6217 }, { "epoch": 0.9505828396713166, "grad_norm": 1.21875, "learning_rate": 5.151658684939825e-05, "loss": 1.0811, "step": 6218 }, { "epoch": 0.9507357156506784, "grad_norm": 1.28125, "learning_rate": 5.1512682479594733e-05, "loss": 1.4061, "step": 6219 }, { "epoch": 0.9508885916300401, "grad_norm": 1.4140625, "learning_rate": 5.150877820642215e-05, "loss": 1.3665, "step": 6220 }, { "epoch": 0.9510414676094019, "grad_norm": 1.328125, "learning_rate": 5.150487402988813e-05, "loss": 1.217, "step": 6221 }, { "epoch": 0.9511943435887636, "grad_norm": 1.5546875, "learning_rate": 5.150096995000049e-05, "loss": 1.4882, "step": 6222 }, { "epoch": 0.9513472195681254, "grad_norm": 1.3671875, "learning_rate": 5.149706596676707e-05, "loss": 1.2556, "step": 6223 }, { "epoch": 0.951500095547487, "grad_norm": 1.2890625, "learning_rate": 5.149316208019562e-05, "loss": 1.2837, "step": 6224 }, { "epoch": 0.9516529715268488, "grad_norm": 1.453125, "learning_rate": 5.148925829029392e-05, "loss": 1.4322, "step": 6225 }, { "epoch": 0.9518058475062106, "grad_norm": 1.4140625, "learning_rate": 5.1485354597069704e-05, "loss": 1.4694, "step": 6226 }, { "epoch": 0.9519587234855723, "grad_norm": 1.3046875, "learning_rate": 5.148145100053087e-05, "loss": 1.269, "step": 6227 }, { "epoch": 0.9521115994649341, "grad_norm": 1.546875, "learning_rate": 5.1477547500685076e-05, "loss": 1.1796, "step": 6228 }, { "epoch": 0.9522644754442958, "grad_norm": 1.5546875, "learning_rate": 5.147364409754012e-05, "loss": 1.6119, "step": 6229 }, { "epoch": 0.9524173514236576, "grad_norm": 1.5390625, "learning_rate": 5.146974079110387e-05, "loss": 1.3677, "step": 6230 }, { "epoch": 0.9525702274030193, "grad_norm": 1.328125, "learning_rate": 5.146583758138399e-05, "loss": 1.2546, "step": 6231 }, { "epoch": 0.9527231033823811, "grad_norm": 1.515625, "learning_rate": 5.146193446838833e-05, "loss": 1.4405, "step": 6232 }, { "epoch": 0.9528759793617427, "grad_norm": 1.296875, "learning_rate": 5.145803145212466e-05, "loss": 1.2981, "step": 6233 }, { "epoch": 0.9530288553411045, "grad_norm": 1.390625, "learning_rate": 5.145412853260074e-05, "loss": 1.5142, "step": 6234 }, { "epoch": 0.9531817313204662, "grad_norm": 1.453125, "learning_rate": 5.145022570982436e-05, "loss": 1.5948, "step": 6235 }, { "epoch": 0.953334607299828, "grad_norm": 1.3671875, "learning_rate": 5.14463229838033e-05, "loss": 1.3824, "step": 6236 }, { "epoch": 0.9534874832791898, "grad_norm": 1.375, "learning_rate": 5.144242035454533e-05, "loss": 1.3837, "step": 6237 }, { "epoch": 0.9536403592585515, "grad_norm": 1.25, "learning_rate": 5.1438517822058186e-05, "loss": 1.1993, "step": 6238 }, { "epoch": 0.9537932352379133, "grad_norm": 1.515625, "learning_rate": 5.143461538634973e-05, "loss": 1.4973, "step": 6239 }, { "epoch": 0.953946111217275, "grad_norm": 1.375, "learning_rate": 5.14307130474277e-05, "loss": 1.2242, "step": 6240 }, { "epoch": 0.9540989871966368, "grad_norm": 1.2265625, "learning_rate": 5.142681080529987e-05, "loss": 1.1596, "step": 6241 }, { "epoch": 0.9542518631759984, "grad_norm": 1.2734375, "learning_rate": 5.1422908659974036e-05, "loss": 1.2895, "step": 6242 }, { "epoch": 0.9544047391553602, "grad_norm": 1.3359375, "learning_rate": 5.141900661145793e-05, "loss": 1.2437, "step": 6243 }, { "epoch": 0.9545576151347219, "grad_norm": 1.3203125, "learning_rate": 5.141510465975937e-05, "loss": 1.3035, "step": 6244 }, { "epoch": 0.9547104911140837, "grad_norm": 1.4453125, "learning_rate": 5.141120280488607e-05, "loss": 1.3363, "step": 6245 }, { "epoch": 0.9548633670934454, "grad_norm": 1.453125, "learning_rate": 5.1407301046845944e-05, "loss": 1.2908, "step": 6246 }, { "epoch": 0.9550162430728072, "grad_norm": 1.4609375, "learning_rate": 5.1403399385646634e-05, "loss": 1.5016, "step": 6247 }, { "epoch": 0.955169119052169, "grad_norm": 1.3046875, "learning_rate": 5.139949782129592e-05, "loss": 1.4885, "step": 6248 }, { "epoch": 0.9553219950315307, "grad_norm": 1.34375, "learning_rate": 5.139559635380166e-05, "loss": 1.229, "step": 6249 }, { "epoch": 0.9554748710108925, "grad_norm": 1.34375, "learning_rate": 5.139169498317158e-05, "loss": 1.1359, "step": 6250 }, { "epoch": 0.9556277469902541, "grad_norm": 1.3515625, "learning_rate": 5.1387793709413455e-05, "loss": 1.3551, "step": 6251 }, { "epoch": 0.9557806229696159, "grad_norm": 1.4296875, "learning_rate": 5.138389253253508e-05, "loss": 1.3338, "step": 6252 }, { "epoch": 0.9559334989489776, "grad_norm": 1.34375, "learning_rate": 5.1379991452544206e-05, "loss": 1.2219, "step": 6253 }, { "epoch": 0.9560863749283394, "grad_norm": 1.5703125, "learning_rate": 5.137609046944862e-05, "loss": 1.3697, "step": 6254 }, { "epoch": 0.9562392509077011, "grad_norm": 1.4296875, "learning_rate": 5.1372189583256045e-05, "loss": 1.4949, "step": 6255 }, { "epoch": 0.9563921268870629, "grad_norm": 1.34375, "learning_rate": 5.13682887939744e-05, "loss": 1.3816, "step": 6256 }, { "epoch": 0.9565450028664246, "grad_norm": 1.203125, "learning_rate": 5.1364388101611285e-05, "loss": 1.125, "step": 6257 }, { "epoch": 0.9566978788457864, "grad_norm": 1.484375, "learning_rate": 5.136048750617457e-05, "loss": 1.6958, "step": 6258 }, { "epoch": 0.9568507548251481, "grad_norm": 1.4296875, "learning_rate": 5.135658700767203e-05, "loss": 1.5637, "step": 6259 }, { "epoch": 0.9570036308045098, "grad_norm": 1.265625, "learning_rate": 5.13526866061114e-05, "loss": 1.2536, "step": 6260 }, { "epoch": 0.9571565067838715, "grad_norm": 1.359375, "learning_rate": 5.1348786301500484e-05, "loss": 1.3791, "step": 6261 }, { "epoch": 0.9573093827632333, "grad_norm": 1.3984375, "learning_rate": 5.134488609384704e-05, "loss": 1.438, "step": 6262 }, { "epoch": 0.957462258742595, "grad_norm": 1.421875, "learning_rate": 5.134098598315884e-05, "loss": 1.6544, "step": 6263 }, { "epoch": 0.9576151347219568, "grad_norm": 1.328125, "learning_rate": 5.133708596944362e-05, "loss": 1.2789, "step": 6264 }, { "epoch": 0.9577680107013186, "grad_norm": 1.3203125, "learning_rate": 5.133318605270927e-05, "loss": 1.2757, "step": 6265 }, { "epoch": 0.9579208866806803, "grad_norm": 1.4921875, "learning_rate": 5.1329286232963414e-05, "loss": 1.2444, "step": 6266 }, { "epoch": 0.9580737626600421, "grad_norm": 1.34375, "learning_rate": 5.1325386510213924e-05, "loss": 1.345, "step": 6267 }, { "epoch": 0.9582266386394038, "grad_norm": 1.234375, "learning_rate": 5.132148688446854e-05, "loss": 1.3323, "step": 6268 }, { "epoch": 0.9583795146187655, "grad_norm": 1.46875, "learning_rate": 5.131758735573504e-05, "loss": 1.3432, "step": 6269 }, { "epoch": 0.9585323905981272, "grad_norm": 1.453125, "learning_rate": 5.13136879240212e-05, "loss": 1.4143, "step": 6270 }, { "epoch": 0.958685266577489, "grad_norm": 1.359375, "learning_rate": 5.1309788589334776e-05, "loss": 1.1938, "step": 6271 }, { "epoch": 0.9588381425568507, "grad_norm": 1.328125, "learning_rate": 5.130588935168354e-05, "loss": 1.298, "step": 6272 }, { "epoch": 0.9589910185362125, "grad_norm": 1.5234375, "learning_rate": 5.130199021107527e-05, "loss": 1.7263, "step": 6273 }, { "epoch": 0.9591438945155742, "grad_norm": 1.2421875, "learning_rate": 5.129809116751769e-05, "loss": 1.5082, "step": 6274 }, { "epoch": 0.959296770494936, "grad_norm": 1.34375, "learning_rate": 5.12941922210187e-05, "loss": 1.2027, "step": 6275 }, { "epoch": 0.9594496464742978, "grad_norm": 1.3125, "learning_rate": 5.129029337158591e-05, "loss": 1.2123, "step": 6276 }, { "epoch": 0.9596025224536595, "grad_norm": 1.5390625, "learning_rate": 5.1286394619227205e-05, "loss": 1.3732, "step": 6277 }, { "epoch": 0.9597553984330212, "grad_norm": 1.3515625, "learning_rate": 5.1282495963950316e-05, "loss": 1.3509, "step": 6278 }, { "epoch": 0.9599082744123829, "grad_norm": 1.234375, "learning_rate": 5.127859740576301e-05, "loss": 1.2059, "step": 6279 }, { "epoch": 0.9600611503917447, "grad_norm": 1.4296875, "learning_rate": 5.127469894467305e-05, "loss": 1.2525, "step": 6280 }, { "epoch": 0.9602140263711064, "grad_norm": 1.4375, "learning_rate": 5.1270800580688225e-05, "loss": 1.4293, "step": 6281 }, { "epoch": 0.9603669023504682, "grad_norm": 1.3984375, "learning_rate": 5.1266902313816275e-05, "loss": 1.2678, "step": 6282 }, { "epoch": 0.9605197783298299, "grad_norm": 1.421875, "learning_rate": 5.1263004144064974e-05, "loss": 1.2302, "step": 6283 }, { "epoch": 0.9606726543091917, "grad_norm": 1.203125, "learning_rate": 5.125910607144212e-05, "loss": 1.1, "step": 6284 }, { "epoch": 0.9608255302885534, "grad_norm": 1.5, "learning_rate": 5.1255208095955476e-05, "loss": 1.662, "step": 6285 }, { "epoch": 0.9609784062679152, "grad_norm": 1.3359375, "learning_rate": 5.12513102176128e-05, "loss": 1.0372, "step": 6286 }, { "epoch": 0.9611312822472768, "grad_norm": 1.5078125, "learning_rate": 5.124741243642186e-05, "loss": 1.3703, "step": 6287 }, { "epoch": 0.9612841582266386, "grad_norm": 1.4140625, "learning_rate": 5.124351475239041e-05, "loss": 1.467, "step": 6288 }, { "epoch": 0.9614370342060004, "grad_norm": 1.421875, "learning_rate": 5.1239617165526246e-05, "loss": 1.4378, "step": 6289 }, { "epoch": 0.9615899101853621, "grad_norm": 1.3984375, "learning_rate": 5.1235719675837065e-05, "loss": 1.5397, "step": 6290 }, { "epoch": 0.9617427861647239, "grad_norm": 1.546875, "learning_rate": 5.1231822283330786e-05, "loss": 1.5215, "step": 6291 }, { "epoch": 0.9618956621440856, "grad_norm": 1.4140625, "learning_rate": 5.1227924988014985e-05, "loss": 1.287, "step": 6292 }, { "epoch": 0.9620485381234474, "grad_norm": 1.421875, "learning_rate": 5.1224027789897576e-05, "loss": 1.5363, "step": 6293 }, { "epoch": 0.9622014141028091, "grad_norm": 1.390625, "learning_rate": 5.122013068898631e-05, "loss": 1.1654, "step": 6294 }, { "epoch": 0.9623542900821709, "grad_norm": 1.5234375, "learning_rate": 5.121623368528884e-05, "loss": 1.3217, "step": 6295 }, { "epoch": 0.9625071660615325, "grad_norm": 1.2890625, "learning_rate": 5.121233677881305e-05, "loss": 1.1487, "step": 6296 }, { "epoch": 0.9626600420408943, "grad_norm": 1.46875, "learning_rate": 5.120843996956666e-05, "loss": 1.336, "step": 6297 }, { "epoch": 0.962812918020256, "grad_norm": 1.3671875, "learning_rate": 5.120454325755745e-05, "loss": 1.2613, "step": 6298 }, { "epoch": 0.9629657939996178, "grad_norm": 1.4296875, "learning_rate": 5.120064664279313e-05, "loss": 1.5915, "step": 6299 }, { "epoch": 0.9631186699789795, "grad_norm": 1.40625, "learning_rate": 5.1196750125281554e-05, "loss": 1.6184, "step": 6300 }, { "epoch": 0.9632715459583413, "grad_norm": 1.34375, "learning_rate": 5.119285370503048e-05, "loss": 1.2249, "step": 6301 }, { "epoch": 0.9634244219377031, "grad_norm": 1.2734375, "learning_rate": 5.118895738204757e-05, "loss": 1.2612, "step": 6302 }, { "epoch": 0.9635772979170648, "grad_norm": 1.4453125, "learning_rate": 5.11850611563407e-05, "loss": 1.7076, "step": 6303 }, { "epoch": 0.9637301738964266, "grad_norm": 1.4921875, "learning_rate": 5.118116502791759e-05, "loss": 1.5116, "step": 6304 }, { "epoch": 0.9638830498757882, "grad_norm": 1.328125, "learning_rate": 5.117726899678601e-05, "loss": 1.433, "step": 6305 }, { "epoch": 0.96403592585515, "grad_norm": 1.484375, "learning_rate": 5.1173373062953725e-05, "loss": 1.2325, "step": 6306 }, { "epoch": 0.9641888018345117, "grad_norm": 1.4296875, "learning_rate": 5.1169477226428484e-05, "loss": 1.5153, "step": 6307 }, { "epoch": 0.9643416778138735, "grad_norm": 1.34375, "learning_rate": 5.1165581487218086e-05, "loss": 1.3647, "step": 6308 }, { "epoch": 0.9644945537932352, "grad_norm": 1.4921875, "learning_rate": 5.116168584533021e-05, "loss": 1.5584, "step": 6309 }, { "epoch": 0.964647429772597, "grad_norm": 1.2890625, "learning_rate": 5.1157790300772766e-05, "loss": 1.1796, "step": 6310 }, { "epoch": 0.9648003057519587, "grad_norm": 1.4921875, "learning_rate": 5.1153894853553375e-05, "loss": 1.4832, "step": 6311 }, { "epoch": 0.9649531817313205, "grad_norm": 1.4296875, "learning_rate": 5.114999950367988e-05, "loss": 1.427, "step": 6312 }, { "epoch": 0.9651060577106823, "grad_norm": 1.4296875, "learning_rate": 5.114610425116003e-05, "loss": 1.1666, "step": 6313 }, { "epoch": 0.9652589336900439, "grad_norm": 1.4375, "learning_rate": 5.114220909600158e-05, "loss": 1.3111, "step": 6314 }, { "epoch": 0.9654118096694057, "grad_norm": 1.234375, "learning_rate": 5.1138314038212295e-05, "loss": 1.1211, "step": 6315 }, { "epoch": 0.9655646856487674, "grad_norm": 1.3515625, "learning_rate": 5.113441907779993e-05, "loss": 1.2488, "step": 6316 }, { "epoch": 0.9657175616281292, "grad_norm": 1.34375, "learning_rate": 5.113052421477227e-05, "loss": 1.4479, "step": 6317 }, { "epoch": 0.9658704376074909, "grad_norm": 1.421875, "learning_rate": 5.1126629449136996e-05, "loss": 1.4151, "step": 6318 }, { "epoch": 0.9660233135868527, "grad_norm": 1.2890625, "learning_rate": 5.112273478090199e-05, "loss": 1.214, "step": 6319 }, { "epoch": 0.9661761895662144, "grad_norm": 1.453125, "learning_rate": 5.1118840210075005e-05, "loss": 1.4269, "step": 6320 }, { "epoch": 0.9663290655455762, "grad_norm": 1.46875, "learning_rate": 5.1114945736663664e-05, "loss": 1.4991, "step": 6321 }, { "epoch": 0.9664819415249379, "grad_norm": 1.5703125, "learning_rate": 5.1111051360675864e-05, "loss": 1.4994, "step": 6322 }, { "epoch": 0.9666348175042996, "grad_norm": 1.265625, "learning_rate": 5.110715708211933e-05, "loss": 1.2699, "step": 6323 }, { "epoch": 0.9667876934836613, "grad_norm": 1.421875, "learning_rate": 5.1103262901001806e-05, "loss": 1.2916, "step": 6324 }, { "epoch": 0.9669405694630231, "grad_norm": 1.421875, "learning_rate": 5.1099368817331025e-05, "loss": 1.4242, "step": 6325 }, { "epoch": 0.9670934454423848, "grad_norm": 1.453125, "learning_rate": 5.109547483111486e-05, "loss": 1.4255, "step": 6326 }, { "epoch": 0.9672463214217466, "grad_norm": 1.3125, "learning_rate": 5.109158094236097e-05, "loss": 1.2892, "step": 6327 }, { "epoch": 0.9673991974011084, "grad_norm": 1.21875, "learning_rate": 5.108768715107709e-05, "loss": 1.3687, "step": 6328 }, { "epoch": 0.9675520733804701, "grad_norm": 1.421875, "learning_rate": 5.108379345727108e-05, "loss": 1.7486, "step": 6329 }, { "epoch": 0.9677049493598319, "grad_norm": 1.4140625, "learning_rate": 5.107989986095063e-05, "loss": 1.4809, "step": 6330 }, { "epoch": 0.9678578253391936, "grad_norm": 1.375, "learning_rate": 5.107600636212354e-05, "loss": 1.3638, "step": 6331 }, { "epoch": 0.9680107013185553, "grad_norm": 1.4609375, "learning_rate": 5.1072112960797544e-05, "loss": 1.4339, "step": 6332 }, { "epoch": 0.968163577297917, "grad_norm": 1.4375, "learning_rate": 5.106821965698041e-05, "loss": 1.2351, "step": 6333 }, { "epoch": 0.9683164532772788, "grad_norm": 1.328125, "learning_rate": 5.106432645067989e-05, "loss": 1.156, "step": 6334 }, { "epoch": 0.9684693292566405, "grad_norm": 1.53125, "learning_rate": 5.106043334190371e-05, "loss": 1.5288, "step": 6335 }, { "epoch": 0.9686222052360023, "grad_norm": 1.546875, "learning_rate": 5.105654033065974e-05, "loss": 1.4461, "step": 6336 }, { "epoch": 0.968775081215364, "grad_norm": 1.4296875, "learning_rate": 5.10526474169556e-05, "loss": 1.5529, "step": 6337 }, { "epoch": 0.9689279571947258, "grad_norm": 1.5859375, "learning_rate": 5.104875460079914e-05, "loss": 1.6393, "step": 6338 }, { "epoch": 0.9690808331740876, "grad_norm": 1.4140625, "learning_rate": 5.1044861882198126e-05, "loss": 1.5579, "step": 6339 }, { "epoch": 0.9692337091534493, "grad_norm": 1.296875, "learning_rate": 5.104096926116021e-05, "loss": 1.2513, "step": 6340 }, { "epoch": 0.969386585132811, "grad_norm": 1.390625, "learning_rate": 5.103707673769326e-05, "loss": 1.5095, "step": 6341 }, { "epoch": 0.9695394611121727, "grad_norm": 1.3515625, "learning_rate": 5.103318431180499e-05, "loss": 1.3675, "step": 6342 }, { "epoch": 0.9696923370915345, "grad_norm": 1.34375, "learning_rate": 5.102929198350316e-05, "loss": 1.3154, "step": 6343 }, { "epoch": 0.9698452130708962, "grad_norm": 1.578125, "learning_rate": 5.1025399752795475e-05, "loss": 1.3437, "step": 6344 }, { "epoch": 0.969998089050258, "grad_norm": 1.390625, "learning_rate": 5.1021507619689845e-05, "loss": 1.389, "step": 6345 }, { "epoch": 0.9701509650296197, "grad_norm": 1.21875, "learning_rate": 5.101761558419388e-05, "loss": 1.2821, "step": 6346 }, { "epoch": 0.9703038410089815, "grad_norm": 1.375, "learning_rate": 5.1013723646315336e-05, "loss": 1.3575, "step": 6347 }, { "epoch": 0.9704567169883432, "grad_norm": 1.3046875, "learning_rate": 5.100983180606207e-05, "loss": 1.2054, "step": 6348 }, { "epoch": 0.970609592967705, "grad_norm": 1.5625, "learning_rate": 5.1005940063441773e-05, "loss": 1.5366, "step": 6349 }, { "epoch": 0.9707624689470666, "grad_norm": 1.390625, "learning_rate": 5.100204841846221e-05, "loss": 1.3678, "step": 6350 }, { "epoch": 0.9709153449264284, "grad_norm": 1.515625, "learning_rate": 5.09981568711311e-05, "loss": 1.4852, "step": 6351 }, { "epoch": 0.9710682209057901, "grad_norm": 1.3125, "learning_rate": 5.099426542145631e-05, "loss": 1.3013, "step": 6352 }, { "epoch": 0.9712210968851519, "grad_norm": 1.4765625, "learning_rate": 5.09903740694455e-05, "loss": 1.3515, "step": 6353 }, { "epoch": 0.9713739728645137, "grad_norm": 1.59375, "learning_rate": 5.0986482815106406e-05, "loss": 1.3881, "step": 6354 }, { "epoch": 0.9715268488438754, "grad_norm": 1.2578125, "learning_rate": 5.0982591658446887e-05, "loss": 1.2265, "step": 6355 }, { "epoch": 0.9716797248232372, "grad_norm": 1.4140625, "learning_rate": 5.097870059947456e-05, "loss": 1.3994, "step": 6356 }, { "epoch": 0.9718326008025989, "grad_norm": 1.3203125, "learning_rate": 5.097480963819731e-05, "loss": 1.3461, "step": 6357 }, { "epoch": 0.9719854767819607, "grad_norm": 1.296875, "learning_rate": 5.0970918774622824e-05, "loss": 1.277, "step": 6358 }, { "epoch": 0.9721383527613223, "grad_norm": 1.34375, "learning_rate": 5.0967028008758875e-05, "loss": 1.3542, "step": 6359 }, { "epoch": 0.9722912287406841, "grad_norm": 1.515625, "learning_rate": 5.09631373406132e-05, "loss": 1.4903, "step": 6360 }, { "epoch": 0.9724441047200458, "grad_norm": 1.4140625, "learning_rate": 5.095924677019357e-05, "loss": 1.4144, "step": 6361 }, { "epoch": 0.9725969806994076, "grad_norm": 1.375, "learning_rate": 5.0955356297507715e-05, "loss": 1.2181, "step": 6362 }, { "epoch": 0.9727498566787693, "grad_norm": 1.25, "learning_rate": 5.095146592256339e-05, "loss": 1.2182, "step": 6363 }, { "epoch": 0.9729027326581311, "grad_norm": 1.328125, "learning_rate": 5.094757564536839e-05, "loss": 1.4045, "step": 6364 }, { "epoch": 0.9730556086374929, "grad_norm": 1.28125, "learning_rate": 5.0943685465930446e-05, "loss": 1.3198, "step": 6365 }, { "epoch": 0.9732084846168546, "grad_norm": 1.171875, "learning_rate": 5.0939795384257306e-05, "loss": 1.1305, "step": 6366 }, { "epoch": 0.9733613605962164, "grad_norm": 1.296875, "learning_rate": 5.093590540035671e-05, "loss": 1.4081, "step": 6367 }, { "epoch": 0.973514236575578, "grad_norm": 1.328125, "learning_rate": 5.0932015514236434e-05, "loss": 1.2058, "step": 6368 }, { "epoch": 0.9736671125549398, "grad_norm": 1.515625, "learning_rate": 5.092812572590422e-05, "loss": 1.3337, "step": 6369 }, { "epoch": 0.9738199885343015, "grad_norm": 1.5546875, "learning_rate": 5.092423603536777e-05, "loss": 1.3524, "step": 6370 }, { "epoch": 0.9739728645136633, "grad_norm": 1.328125, "learning_rate": 5.092034644263497e-05, "loss": 1.3444, "step": 6371 }, { "epoch": 0.974125740493025, "grad_norm": 1.3046875, "learning_rate": 5.0916456947713444e-05, "loss": 1.1489, "step": 6372 }, { "epoch": 0.9742786164723868, "grad_norm": 1.1484375, "learning_rate": 5.091256755061094e-05, "loss": 1.0212, "step": 6373 }, { "epoch": 0.9744314924517485, "grad_norm": 1.359375, "learning_rate": 5.0908678251335305e-05, "loss": 1.3392, "step": 6374 }, { "epoch": 0.9745843684311103, "grad_norm": 1.484375, "learning_rate": 5.090478904989424e-05, "loss": 1.3515, "step": 6375 }, { "epoch": 0.974737244410472, "grad_norm": 1.59375, "learning_rate": 5.090089994629549e-05, "loss": 1.3858, "step": 6376 }, { "epoch": 0.9748901203898337, "grad_norm": 1.3671875, "learning_rate": 5.089701094054682e-05, "loss": 1.2272, "step": 6377 }, { "epoch": 0.9750429963691954, "grad_norm": 1.1484375, "learning_rate": 5.089312203265596e-05, "loss": 0.9935, "step": 6378 }, { "epoch": 0.9751958723485572, "grad_norm": 1.5625, "learning_rate": 5.088923322263068e-05, "loss": 1.5207, "step": 6379 }, { "epoch": 0.975348748327919, "grad_norm": 1.453125, "learning_rate": 5.088534451047868e-05, "loss": 1.5308, "step": 6380 }, { "epoch": 0.9755016243072807, "grad_norm": 1.375, "learning_rate": 5.088145589620783e-05, "loss": 1.2844, "step": 6381 }, { "epoch": 0.9756545002866425, "grad_norm": 1.2734375, "learning_rate": 5.087756737982572e-05, "loss": 1.1193, "step": 6382 }, { "epoch": 0.9758073762660042, "grad_norm": 1.234375, "learning_rate": 5.087367896134022e-05, "loss": 1.1821, "step": 6383 }, { "epoch": 0.975960252245366, "grad_norm": 1.3515625, "learning_rate": 5.086979064075905e-05, "loss": 1.4632, "step": 6384 }, { "epoch": 0.9761131282247277, "grad_norm": 1.28125, "learning_rate": 5.0865902418089936e-05, "loss": 1.299, "step": 6385 }, { "epoch": 0.9762660042040894, "grad_norm": 1.3359375, "learning_rate": 5.0862014293340656e-05, "loss": 1.2383, "step": 6386 }, { "epoch": 0.9764188801834511, "grad_norm": 1.328125, "learning_rate": 5.0858126266518935e-05, "loss": 1.2911, "step": 6387 }, { "epoch": 0.9765717561628129, "grad_norm": 1.328125, "learning_rate": 5.085423833763252e-05, "loss": 1.3629, "step": 6388 }, { "epoch": 0.9767246321421746, "grad_norm": 1.5234375, "learning_rate": 5.085035050668914e-05, "loss": 1.4085, "step": 6389 }, { "epoch": 0.9768775081215364, "grad_norm": 1.640625, "learning_rate": 5.084646277369664e-05, "loss": 1.3494, "step": 6390 }, { "epoch": 0.9770303841008982, "grad_norm": 1.3984375, "learning_rate": 5.084257513866264e-05, "loss": 1.2935, "step": 6391 }, { "epoch": 0.9771832600802599, "grad_norm": 1.4140625, "learning_rate": 5.083868760159498e-05, "loss": 1.2826, "step": 6392 }, { "epoch": 0.9773361360596217, "grad_norm": 1.328125, "learning_rate": 5.083480016250137e-05, "loss": 1.5331, "step": 6393 }, { "epoch": 0.9774890120389834, "grad_norm": 1.265625, "learning_rate": 5.083091282138957e-05, "loss": 1.1842, "step": 6394 }, { "epoch": 0.9776418880183451, "grad_norm": 1.421875, "learning_rate": 5.082702557826731e-05, "loss": 1.4085, "step": 6395 }, { "epoch": 0.9777947639977068, "grad_norm": 1.328125, "learning_rate": 5.08231384331423e-05, "loss": 1.0543, "step": 6396 }, { "epoch": 0.9779476399770686, "grad_norm": 1.3515625, "learning_rate": 5.0819251386022414e-05, "loss": 1.5086, "step": 6397 }, { "epoch": 0.9781005159564303, "grad_norm": 1.25, "learning_rate": 5.081536443691529e-05, "loss": 1.0632, "step": 6398 }, { "epoch": 0.9782533919357921, "grad_norm": 1.3515625, "learning_rate": 5.081147758582867e-05, "loss": 1.332, "step": 6399 }, { "epoch": 0.9784062679151538, "grad_norm": 1.3671875, "learning_rate": 5.0807590832770394e-05, "loss": 1.3259, "step": 6400 }, { "epoch": 0.9785591438945156, "grad_norm": 1.53125, "learning_rate": 5.080370417774807e-05, "loss": 1.4984, "step": 6401 }, { "epoch": 0.9787120198738773, "grad_norm": 1.4375, "learning_rate": 5.079981762076955e-05, "loss": 1.267, "step": 6402 }, { "epoch": 0.9788648958532391, "grad_norm": 1.453125, "learning_rate": 5.079593116184257e-05, "loss": 1.3364, "step": 6403 }, { "epoch": 0.9790177718326007, "grad_norm": 1.359375, "learning_rate": 5.079204480097484e-05, "loss": 1.324, "step": 6404 }, { "epoch": 0.9791706478119625, "grad_norm": 1.453125, "learning_rate": 5.0788158538174134e-05, "loss": 1.3603, "step": 6405 }, { "epoch": 0.9793235237913243, "grad_norm": 1.3671875, "learning_rate": 5.0784272373448164e-05, "loss": 1.165, "step": 6406 }, { "epoch": 0.979476399770686, "grad_norm": 1.296875, "learning_rate": 5.0780386306804706e-05, "loss": 1.3495, "step": 6407 }, { "epoch": 0.9796292757500478, "grad_norm": 1.421875, "learning_rate": 5.077650033825145e-05, "loss": 1.3934, "step": 6408 }, { "epoch": 0.9797821517294095, "grad_norm": 1.4375, "learning_rate": 5.077261446779622e-05, "loss": 1.2235, "step": 6409 }, { "epoch": 0.9799350277087713, "grad_norm": 1.3671875, "learning_rate": 5.0768728695446736e-05, "loss": 1.4372, "step": 6410 }, { "epoch": 0.980087903688133, "grad_norm": 1.3046875, "learning_rate": 5.0764843021210704e-05, "loss": 1.2636, "step": 6411 }, { "epoch": 0.9802407796674948, "grad_norm": 1.3203125, "learning_rate": 5.0760957445095905e-05, "loss": 1.2976, "step": 6412 }, { "epoch": 0.9803936556468564, "grad_norm": 1.4296875, "learning_rate": 5.075707196711007e-05, "loss": 1.5122, "step": 6413 }, { "epoch": 0.9805465316262182, "grad_norm": 1.4296875, "learning_rate": 5.0753186587260935e-05, "loss": 1.3856, "step": 6414 }, { "epoch": 0.9806994076055799, "grad_norm": 1.359375, "learning_rate": 5.074930130555623e-05, "loss": 1.5033, "step": 6415 }, { "epoch": 0.9808522835849417, "grad_norm": 1.359375, "learning_rate": 5.074541612200379e-05, "loss": 1.3663, "step": 6416 }, { "epoch": 0.9810051595643035, "grad_norm": 1.484375, "learning_rate": 5.0741531036611204e-05, "loss": 1.4934, "step": 6417 }, { "epoch": 0.9811580355436652, "grad_norm": 1.4375, "learning_rate": 5.073764604938634e-05, "loss": 1.517, "step": 6418 }, { "epoch": 0.981310911523027, "grad_norm": 1.3828125, "learning_rate": 5.0733761160336925e-05, "loss": 1.3203, "step": 6419 }, { "epoch": 0.9814637875023887, "grad_norm": 1.3125, "learning_rate": 5.07298763694706e-05, "loss": 1.2891, "step": 6420 }, { "epoch": 0.9816166634817505, "grad_norm": 1.4765625, "learning_rate": 5.0725991676795226e-05, "loss": 1.5747, "step": 6421 }, { "epoch": 0.9817695394611121, "grad_norm": 1.3359375, "learning_rate": 5.07221070823185e-05, "loss": 1.3719, "step": 6422 }, { "epoch": 0.9819224154404739, "grad_norm": 1.3984375, "learning_rate": 5.0718222586048156e-05, "loss": 1.2333, "step": 6423 }, { "epoch": 0.9820752914198356, "grad_norm": 1.21875, "learning_rate": 5.0714338187991916e-05, "loss": 1.2336, "step": 6424 }, { "epoch": 0.9822281673991974, "grad_norm": 1.375, "learning_rate": 5.071045388815758e-05, "loss": 1.3745, "step": 6425 }, { "epoch": 0.9823810433785591, "grad_norm": 1.46875, "learning_rate": 5.0706569686552894e-05, "loss": 1.3251, "step": 6426 }, { "epoch": 0.9825339193579209, "grad_norm": 1.5, "learning_rate": 5.070268558318548e-05, "loss": 1.353, "step": 6427 }, { "epoch": 0.9826867953372826, "grad_norm": 1.484375, "learning_rate": 5.069880157806322e-05, "loss": 1.3206, "step": 6428 }, { "epoch": 0.9828396713166444, "grad_norm": 1.46875, "learning_rate": 5.069491767119377e-05, "loss": 1.4034, "step": 6429 }, { "epoch": 0.9829925472960062, "grad_norm": 1.328125, "learning_rate": 5.0691033862584916e-05, "loss": 1.2371, "step": 6430 }, { "epoch": 0.9831454232753678, "grad_norm": 1.34375, "learning_rate": 5.0687150152244355e-05, "loss": 1.4643, "step": 6431 }, { "epoch": 0.9832982992547296, "grad_norm": 1.4375, "learning_rate": 5.068326654017986e-05, "loss": 1.2214, "step": 6432 }, { "epoch": 0.9834511752340913, "grad_norm": 1.453125, "learning_rate": 5.0679383026399166e-05, "loss": 1.3144, "step": 6433 }, { "epoch": 0.9836040512134531, "grad_norm": 1.2421875, "learning_rate": 5.067549961090996e-05, "loss": 1.2154, "step": 6434 }, { "epoch": 0.9837569271928148, "grad_norm": 1.328125, "learning_rate": 5.06716162937201e-05, "loss": 1.3952, "step": 6435 }, { "epoch": 0.9839098031721766, "grad_norm": 1.3515625, "learning_rate": 5.06677330748372e-05, "loss": 1.4078, "step": 6436 }, { "epoch": 0.9840626791515383, "grad_norm": 1.265625, "learning_rate": 5.066384995426907e-05, "loss": 1.0731, "step": 6437 }, { "epoch": 0.9842155551309001, "grad_norm": 1.421875, "learning_rate": 5.065996693202344e-05, "loss": 1.4097, "step": 6438 }, { "epoch": 0.9843684311102618, "grad_norm": 1.4453125, "learning_rate": 5.065608400810803e-05, "loss": 1.5756, "step": 6439 }, { "epoch": 0.9845213070896235, "grad_norm": 1.265625, "learning_rate": 5.065220118253059e-05, "loss": 1.1896, "step": 6440 }, { "epoch": 0.9846741830689852, "grad_norm": 1.390625, "learning_rate": 5.064831845529886e-05, "loss": 1.4325, "step": 6441 }, { "epoch": 0.984827059048347, "grad_norm": 1.3203125, "learning_rate": 5.0644435826420565e-05, "loss": 1.3268, "step": 6442 }, { "epoch": 0.9849799350277088, "grad_norm": 1.4375, "learning_rate": 5.064055329590343e-05, "loss": 1.3831, "step": 6443 }, { "epoch": 0.9851328110070705, "grad_norm": 1.5, "learning_rate": 5.063667086375523e-05, "loss": 1.4816, "step": 6444 }, { "epoch": 0.9852856869864323, "grad_norm": 1.34375, "learning_rate": 5.0632788529983746e-05, "loss": 1.345, "step": 6445 }, { "epoch": 0.985438562965794, "grad_norm": 1.5234375, "learning_rate": 5.062890629459656e-05, "loss": 1.5703, "step": 6446 }, { "epoch": 0.9855914389451558, "grad_norm": 1.703125, "learning_rate": 5.0625024157601555e-05, "loss": 1.3844, "step": 6447 }, { "epoch": 0.9857443149245175, "grad_norm": 1.5546875, "learning_rate": 5.0621142119006405e-05, "loss": 1.4449, "step": 6448 }, { "epoch": 0.9858971909038792, "grad_norm": 1.40625, "learning_rate": 5.061726017881887e-05, "loss": 1.5147, "step": 6449 }, { "epoch": 0.9860500668832409, "grad_norm": 1.4453125, "learning_rate": 5.0613378337046626e-05, "loss": 1.4078, "step": 6450 }, { "epoch": 0.9862029428626027, "grad_norm": 1.5234375, "learning_rate": 5.0609496593697535e-05, "loss": 1.4451, "step": 6451 }, { "epoch": 0.9863558188419644, "grad_norm": 1.390625, "learning_rate": 5.060561494877922e-05, "loss": 1.2691, "step": 6452 }, { "epoch": 0.9865086948213262, "grad_norm": 1.3203125, "learning_rate": 5.060173340229942e-05, "loss": 1.464, "step": 6453 }, { "epoch": 0.986661570800688, "grad_norm": 1.3046875, "learning_rate": 5.059785195426594e-05, "loss": 1.2162, "step": 6454 }, { "epoch": 0.9868144467800497, "grad_norm": 1.4921875, "learning_rate": 5.059397060468647e-05, "loss": 1.5262, "step": 6455 }, { "epoch": 0.9869673227594115, "grad_norm": 1.5390625, "learning_rate": 5.059008935356877e-05, "loss": 1.4873, "step": 6456 }, { "epoch": 0.9871201987387732, "grad_norm": 1.5390625, "learning_rate": 5.058620820092055e-05, "loss": 1.6823, "step": 6457 }, { "epoch": 0.9872730747181349, "grad_norm": 1.375, "learning_rate": 5.0582327146749555e-05, "loss": 1.167, "step": 6458 }, { "epoch": 0.9874259506974966, "grad_norm": 1.4296875, "learning_rate": 5.057844619106351e-05, "loss": 1.2736, "step": 6459 }, { "epoch": 0.9875788266768584, "grad_norm": 1.359375, "learning_rate": 5.0574565333870125e-05, "loss": 1.1676, "step": 6460 }, { "epoch": 0.9877317026562201, "grad_norm": 1.375, "learning_rate": 5.057068457517724e-05, "loss": 1.2352, "step": 6461 }, { "epoch": 0.9878845786355819, "grad_norm": 1.453125, "learning_rate": 5.056680391499245e-05, "loss": 1.5344, "step": 6462 }, { "epoch": 0.9880374546149436, "grad_norm": 1.421875, "learning_rate": 5.056292335332358e-05, "loss": 1.4133, "step": 6463 }, { "epoch": 0.9881903305943054, "grad_norm": 1.328125, "learning_rate": 5.055904289017839e-05, "loss": 1.423, "step": 6464 }, { "epoch": 0.9883432065736671, "grad_norm": 1.4765625, "learning_rate": 5.055516252556447e-05, "loss": 1.3049, "step": 6465 }, { "epoch": 0.9884960825530289, "grad_norm": 1.375, "learning_rate": 5.0551282259489684e-05, "loss": 1.3993, "step": 6466 }, { "epoch": 0.9886489585323905, "grad_norm": 1.2734375, "learning_rate": 5.054740209196174e-05, "loss": 1.3601, "step": 6467 }, { "epoch": 0.9888018345117523, "grad_norm": 1.3203125, "learning_rate": 5.054352202298834e-05, "loss": 1.2663, "step": 6468 }, { "epoch": 0.988954710491114, "grad_norm": 1.3828125, "learning_rate": 5.0539642052577207e-05, "loss": 1.319, "step": 6469 }, { "epoch": 0.9891075864704758, "grad_norm": 1.4609375, "learning_rate": 5.053576218073617e-05, "loss": 1.4211, "step": 6470 }, { "epoch": 0.9892604624498376, "grad_norm": 1.2890625, "learning_rate": 5.0531882407472864e-05, "loss": 1.2376, "step": 6471 }, { "epoch": 0.9894133384291993, "grad_norm": 1.5390625, "learning_rate": 5.052800273279499e-05, "loss": 1.2799, "step": 6472 }, { "epoch": 0.9895662144085611, "grad_norm": 1.3671875, "learning_rate": 5.052412315671039e-05, "loss": 1.4072, "step": 6473 }, { "epoch": 0.9897190903879228, "grad_norm": 1.5078125, "learning_rate": 5.0520243679226744e-05, "loss": 1.369, "step": 6474 }, { "epoch": 0.9898719663672846, "grad_norm": 1.171875, "learning_rate": 5.051636430035178e-05, "loss": 0.9952, "step": 6475 }, { "epoch": 0.9900248423466462, "grad_norm": 1.34375, "learning_rate": 5.051248502009318e-05, "loss": 1.1776, "step": 6476 }, { "epoch": 0.990177718326008, "grad_norm": 1.3828125, "learning_rate": 5.0508605838458826e-05, "loss": 1.4821, "step": 6477 }, { "epoch": 0.9903305943053697, "grad_norm": 1.359375, "learning_rate": 5.05047267554563e-05, "loss": 1.4404, "step": 6478 }, { "epoch": 0.9904834702847315, "grad_norm": 1.3125, "learning_rate": 5.050084777109334e-05, "loss": 1.2026, "step": 6479 }, { "epoch": 0.9906363462640932, "grad_norm": 1.4296875, "learning_rate": 5.0496968885377804e-05, "loss": 1.5526, "step": 6480 }, { "epoch": 0.990789222243455, "grad_norm": 1.390625, "learning_rate": 5.049309009831726e-05, "loss": 1.5439, "step": 6481 }, { "epoch": 0.9909420982228168, "grad_norm": 1.5, "learning_rate": 5.0489211409919556e-05, "loss": 1.1912, "step": 6482 }, { "epoch": 0.9910949742021785, "grad_norm": 1.484375, "learning_rate": 5.0485332820192374e-05, "loss": 1.5776, "step": 6483 }, { "epoch": 0.9912478501815403, "grad_norm": 1.421875, "learning_rate": 5.048145432914346e-05, "loss": 1.3518, "step": 6484 }, { "epoch": 0.9914007261609019, "grad_norm": 1.3984375, "learning_rate": 5.047757593678053e-05, "loss": 1.2634, "step": 6485 }, { "epoch": 0.9915536021402637, "grad_norm": 1.3984375, "learning_rate": 5.047369764311132e-05, "loss": 1.5143, "step": 6486 }, { "epoch": 0.9917064781196254, "grad_norm": 1.375, "learning_rate": 5.046981944814355e-05, "loss": 1.368, "step": 6487 }, { "epoch": 0.9918593540989872, "grad_norm": 1.421875, "learning_rate": 5.0465941351884916e-05, "loss": 1.3845, "step": 6488 }, { "epoch": 0.9920122300783489, "grad_norm": 1.296875, "learning_rate": 5.0462063354343235e-05, "loss": 1.2195, "step": 6489 }, { "epoch": 0.9921651060577107, "grad_norm": 1.375, "learning_rate": 5.045818545552623e-05, "loss": 1.2265, "step": 6490 }, { "epoch": 0.9923179820370724, "grad_norm": 1.7734375, "learning_rate": 5.045430765544149e-05, "loss": 1.3878, "step": 6491 }, { "epoch": 0.9924708580164342, "grad_norm": 1.2890625, "learning_rate": 5.045042995409689e-05, "loss": 1.1182, "step": 6492 }, { "epoch": 0.992623733995796, "grad_norm": 1.3984375, "learning_rate": 5.044655235150011e-05, "loss": 1.477, "step": 6493 }, { "epoch": 0.9927766099751576, "grad_norm": 1.328125, "learning_rate": 5.0442674847658864e-05, "loss": 1.3227, "step": 6494 }, { "epoch": 0.9929294859545194, "grad_norm": 1.453125, "learning_rate": 5.043879744258086e-05, "loss": 1.5056, "step": 6495 }, { "epoch": 0.9930823619338811, "grad_norm": 1.2265625, "learning_rate": 5.0434920136273935e-05, "loss": 1.3637, "step": 6496 }, { "epoch": 0.9932352379132429, "grad_norm": 1.46875, "learning_rate": 5.0431042928745694e-05, "loss": 1.2674, "step": 6497 }, { "epoch": 0.9933881138926046, "grad_norm": 1.2578125, "learning_rate": 5.042716582000386e-05, "loss": 1.3201, "step": 6498 }, { "epoch": 0.9935409898719664, "grad_norm": 1.6875, "learning_rate": 5.042328881005627e-05, "loss": 1.3589, "step": 6499 }, { "epoch": 0.9936938658513281, "grad_norm": 1.359375, "learning_rate": 5.0419411898910575e-05, "loss": 1.2996, "step": 6500 } ], "logging_steps": 1, "max_steps": 45487, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.995982495783059e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }