{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998837479655894, "eval_steps": 500, "global_step": 2148, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013950244129272262, "grad_norm": 11.379429868139185, "learning_rate": 0.0, "loss": 1.3476, "step": 1 }, { "epoch": 0.0027900488258544524, "grad_norm": 10.786492716591312, "learning_rate": 4.6511627906976744e-08, "loss": 1.2479, "step": 2 }, { "epoch": 0.0041850732387816785, "grad_norm": 8.372280937783305, "learning_rate": 9.302325581395349e-08, "loss": 1.0961, "step": 3 }, { "epoch": 0.005580097651708905, "grad_norm": 10.736495380428916, "learning_rate": 1.3953488372093024e-07, "loss": 1.2604, "step": 4 }, { "epoch": 0.006975122064636131, "grad_norm": 10.116640860081516, "learning_rate": 1.8604651162790698e-07, "loss": 1.2106, "step": 5 }, { "epoch": 0.008370146477563357, "grad_norm": 10.766695575514, "learning_rate": 2.3255813953488374e-07, "loss": 1.2525, "step": 6 }, { "epoch": 0.009765170890490584, "grad_norm": 10.99454482427735, "learning_rate": 2.790697674418605e-07, "loss": 1.3476, "step": 7 }, { "epoch": 0.01116019530341781, "grad_norm": 11.28698129205508, "learning_rate": 3.2558139534883724e-07, "loss": 1.3435, "step": 8 }, { "epoch": 0.012555219716345037, "grad_norm": 8.962057899504524, "learning_rate": 3.7209302325581396e-07, "loss": 1.1189, "step": 9 }, { "epoch": 0.013950244129272262, "grad_norm": 9.664237520710328, "learning_rate": 4.186046511627907e-07, "loss": 1.1746, "step": 10 }, { "epoch": 0.015345268542199489, "grad_norm": 10.510339642939577, "learning_rate": 4.651162790697675e-07, "loss": 1.1856, "step": 11 }, { "epoch": 0.016740292955126714, "grad_norm": 8.450418704286758, "learning_rate": 5.116279069767442e-07, "loss": 1.1146, "step": 12 }, { "epoch": 0.01813531736805394, "grad_norm": 9.997481930337566, "learning_rate": 5.58139534883721e-07, "loss": 1.2582, "step": 13 }, { "epoch": 0.01953034178098117, "grad_norm": 11.164304506605909, "learning_rate": 6.046511627906977e-07, "loss": 1.3128, "step": 14 }, { "epoch": 0.020925366193908394, "grad_norm": 7.289362941364803, "learning_rate": 6.511627906976745e-07, "loss": 1.0723, "step": 15 }, { "epoch": 0.02232039060683562, "grad_norm": 9.18540276694778, "learning_rate": 6.976744186046513e-07, "loss": 1.1867, "step": 16 }, { "epoch": 0.023715415019762844, "grad_norm": 6.865370374666227, "learning_rate": 7.441860465116279e-07, "loss": 0.9764, "step": 17 }, { "epoch": 0.025110439432690073, "grad_norm": 9.96819423233472, "learning_rate": 7.906976744186047e-07, "loss": 1.3074, "step": 18 }, { "epoch": 0.0265054638456173, "grad_norm": 6.204207152619008, "learning_rate": 8.372093023255814e-07, "loss": 1.0375, "step": 19 }, { "epoch": 0.027900488258544524, "grad_norm": 8.657363364036945, "learning_rate": 8.837209302325582e-07, "loss": 1.2089, "step": 20 }, { "epoch": 0.029295512671471752, "grad_norm": 6.904545977440263, "learning_rate": 9.30232558139535e-07, "loss": 1.1135, "step": 21 }, { "epoch": 0.030690537084398978, "grad_norm": 6.340758365488793, "learning_rate": 9.767441860465117e-07, "loss": 1.1376, "step": 22 }, { "epoch": 0.03208556149732621, "grad_norm": 7.048377377112544, "learning_rate": 1.0232558139534884e-06, "loss": 1.1534, "step": 23 }, { "epoch": 0.03348058591025343, "grad_norm": 4.1272109948386015, "learning_rate": 1.0697674418604653e-06, "loss": 0.9849, "step": 24 }, { "epoch": 0.03487561032318066, "grad_norm": 6.172771604760807, "learning_rate": 1.116279069767442e-06, "loss": 1.1143, "step": 25 }, { "epoch": 0.03627063473610788, "grad_norm": 4.732758407469545, "learning_rate": 1.1627906976744188e-06, "loss": 1.0879, "step": 26 }, { "epoch": 0.03766565914903511, "grad_norm": 6.223350501520425, "learning_rate": 1.2093023255813954e-06, "loss": 1.1569, "step": 27 }, { "epoch": 0.03906068356196234, "grad_norm": 4.551580411005667, "learning_rate": 1.2558139534883723e-06, "loss": 1.0589, "step": 28 }, { "epoch": 0.04045570797488956, "grad_norm": 4.88083111097044, "learning_rate": 1.302325581395349e-06, "loss": 1.0142, "step": 29 }, { "epoch": 0.04185073238781679, "grad_norm": 5.514709686739808, "learning_rate": 1.3488372093023258e-06, "loss": 1.0306, "step": 30 }, { "epoch": 0.043245756800744016, "grad_norm": 6.464253894289652, "learning_rate": 1.3953488372093025e-06, "loss": 0.9414, "step": 31 }, { "epoch": 0.04464078121367124, "grad_norm": 5.969591244940147, "learning_rate": 1.4418604651162794e-06, "loss": 1.051, "step": 32 }, { "epoch": 0.04603580562659847, "grad_norm": 5.055405020242308, "learning_rate": 1.4883720930232558e-06, "loss": 1.0999, "step": 33 }, { "epoch": 0.04743083003952569, "grad_norm": 4.263516966840642, "learning_rate": 1.534883720930233e-06, "loss": 1.0341, "step": 34 }, { "epoch": 0.04882585445245292, "grad_norm": 4.076774893179348, "learning_rate": 1.5813953488372093e-06, "loss": 1.0142, "step": 35 }, { "epoch": 0.050220878865380146, "grad_norm": 4.011205347799821, "learning_rate": 1.6279069767441862e-06, "loss": 1.0805, "step": 36 }, { "epoch": 0.05161590327830737, "grad_norm": 3.566820403854327, "learning_rate": 1.6744186046511629e-06, "loss": 0.9663, "step": 37 }, { "epoch": 0.0530109276912346, "grad_norm": 3.220677288681038, "learning_rate": 1.7209302325581397e-06, "loss": 0.9265, "step": 38 }, { "epoch": 0.054405952104161825, "grad_norm": 3.223199084348664, "learning_rate": 1.7674418604651164e-06, "loss": 1.0135, "step": 39 }, { "epoch": 0.05580097651708905, "grad_norm": 3.3678339121689262, "learning_rate": 1.8139534883720933e-06, "loss": 0.9648, "step": 40 }, { "epoch": 0.057196000930016276, "grad_norm": 3.2913032573688112, "learning_rate": 1.86046511627907e-06, "loss": 0.9187, "step": 41 }, { "epoch": 0.058591025342943505, "grad_norm": 3.299053941968672, "learning_rate": 1.9069767441860468e-06, "loss": 0.915, "step": 42 }, { "epoch": 0.05998604975587073, "grad_norm": 3.1479203143233074, "learning_rate": 1.9534883720930235e-06, "loss": 0.9103, "step": 43 }, { "epoch": 0.061381074168797956, "grad_norm": 3.1962271038243246, "learning_rate": 2.0000000000000003e-06, "loss": 0.9446, "step": 44 }, { "epoch": 0.06277609858172518, "grad_norm": 3.3696107942557862, "learning_rate": 2.0465116279069768e-06, "loss": 1.0068, "step": 45 }, { "epoch": 0.06417112299465241, "grad_norm": 3.255282468949123, "learning_rate": 2.0930232558139536e-06, "loss": 0.8964, "step": 46 }, { "epoch": 0.06556614740757963, "grad_norm": 3.2533830705153144, "learning_rate": 2.1395348837209305e-06, "loss": 0.9856, "step": 47 }, { "epoch": 0.06696117182050686, "grad_norm": 2.8479055126994, "learning_rate": 2.1860465116279074e-06, "loss": 0.9416, "step": 48 }, { "epoch": 0.06835619623343409, "grad_norm": 2.952565799210018, "learning_rate": 2.232558139534884e-06, "loss": 0.9769, "step": 49 }, { "epoch": 0.06975122064636131, "grad_norm": 2.9876273326185707, "learning_rate": 2.2790697674418607e-06, "loss": 0.9474, "step": 50 }, { "epoch": 0.07114624505928854, "grad_norm": 2.769112422790038, "learning_rate": 2.3255813953488376e-06, "loss": 1.0002, "step": 51 }, { "epoch": 0.07254126947221576, "grad_norm": 2.7143094302429818, "learning_rate": 2.3720930232558144e-06, "loss": 0.9408, "step": 52 }, { "epoch": 0.07393629388514299, "grad_norm": 2.88367851575833, "learning_rate": 2.418604651162791e-06, "loss": 0.9457, "step": 53 }, { "epoch": 0.07533131829807022, "grad_norm": 2.9546459851743827, "learning_rate": 2.4651162790697678e-06, "loss": 0.9692, "step": 54 }, { "epoch": 0.07672634271099744, "grad_norm": 2.7510099063140445, "learning_rate": 2.5116279069767446e-06, "loss": 0.9352, "step": 55 }, { "epoch": 0.07812136712392467, "grad_norm": 2.6438847327166446, "learning_rate": 2.558139534883721e-06, "loss": 0.9551, "step": 56 }, { "epoch": 0.07951639153685189, "grad_norm": 2.6961012862270786, "learning_rate": 2.604651162790698e-06, "loss": 0.9291, "step": 57 }, { "epoch": 0.08091141594977912, "grad_norm": 2.7010244460234776, "learning_rate": 2.6511627906976744e-06, "loss": 0.919, "step": 58 }, { "epoch": 0.08230644036270635, "grad_norm": 2.7677317860126296, "learning_rate": 2.6976744186046517e-06, "loss": 1.0134, "step": 59 }, { "epoch": 0.08370146477563357, "grad_norm": 2.653458107883697, "learning_rate": 2.744186046511628e-06, "loss": 0.8584, "step": 60 }, { "epoch": 0.0850964891885608, "grad_norm": 2.5663696516620793, "learning_rate": 2.790697674418605e-06, "loss": 0.8595, "step": 61 }, { "epoch": 0.08649151360148803, "grad_norm": 2.652957590809662, "learning_rate": 2.8372093023255815e-06, "loss": 0.9573, "step": 62 }, { "epoch": 0.08788653801441525, "grad_norm": 2.479119998384544, "learning_rate": 2.8837209302325587e-06, "loss": 0.8479, "step": 63 }, { "epoch": 0.08928156242734248, "grad_norm": 2.7428906702008535, "learning_rate": 2.930232558139535e-06, "loss": 0.9596, "step": 64 }, { "epoch": 0.0906765868402697, "grad_norm": 2.6090941088401065, "learning_rate": 2.9767441860465116e-06, "loss": 0.8955, "step": 65 }, { "epoch": 0.09207161125319693, "grad_norm": 2.8774600614922816, "learning_rate": 3.0232558139534885e-06, "loss": 0.9253, "step": 66 }, { "epoch": 0.09346663566612416, "grad_norm": 2.5188045820933307, "learning_rate": 3.069767441860466e-06, "loss": 0.9836, "step": 67 }, { "epoch": 0.09486166007905138, "grad_norm": 2.621248713048024, "learning_rate": 3.1162790697674423e-06, "loss": 0.95, "step": 68 }, { "epoch": 0.0962566844919786, "grad_norm": 2.6556850886558485, "learning_rate": 3.1627906976744187e-06, "loss": 0.8778, "step": 69 }, { "epoch": 0.09765170890490583, "grad_norm": 2.6314137408062885, "learning_rate": 3.2093023255813956e-06, "loss": 0.835, "step": 70 }, { "epoch": 0.09904673331783306, "grad_norm": 2.698267862575301, "learning_rate": 3.2558139534883724e-06, "loss": 0.8684, "step": 71 }, { "epoch": 0.10044175773076029, "grad_norm": 2.617795512315845, "learning_rate": 3.3023255813953493e-06, "loss": 0.891, "step": 72 }, { "epoch": 0.10183678214368752, "grad_norm": 2.683367047578061, "learning_rate": 3.3488372093023258e-06, "loss": 0.8752, "step": 73 }, { "epoch": 0.10323180655661474, "grad_norm": 2.6571672444579586, "learning_rate": 3.3953488372093026e-06, "loss": 0.8913, "step": 74 }, { "epoch": 0.10462683096954196, "grad_norm": 2.6230124222656377, "learning_rate": 3.4418604651162795e-06, "loss": 0.8715, "step": 75 }, { "epoch": 0.1060218553824692, "grad_norm": 2.7284400098091712, "learning_rate": 3.4883720930232564e-06, "loss": 0.7679, "step": 76 }, { "epoch": 0.10741687979539642, "grad_norm": 2.310749956470483, "learning_rate": 3.534883720930233e-06, "loss": 0.7616, "step": 77 }, { "epoch": 0.10881190420832365, "grad_norm": 2.636425611386178, "learning_rate": 3.5813953488372093e-06, "loss": 0.917, "step": 78 }, { "epoch": 0.11020692862125087, "grad_norm": 2.4640065771763404, "learning_rate": 3.6279069767441866e-06, "loss": 0.8998, "step": 79 }, { "epoch": 0.1116019530341781, "grad_norm": 2.675569525544172, "learning_rate": 3.674418604651163e-06, "loss": 0.9195, "step": 80 }, { "epoch": 0.11299697744710532, "grad_norm": 2.719789067867562, "learning_rate": 3.72093023255814e-06, "loss": 0.8839, "step": 81 }, { "epoch": 0.11439200186003255, "grad_norm": 2.8965834985964016, "learning_rate": 3.7674418604651163e-06, "loss": 0.9911, "step": 82 }, { "epoch": 0.11578702627295978, "grad_norm": 2.6459997865591083, "learning_rate": 3.8139534883720936e-06, "loss": 0.8737, "step": 83 }, { "epoch": 0.11718205068588701, "grad_norm": 2.5811078506381246, "learning_rate": 3.86046511627907e-06, "loss": 0.88, "step": 84 }, { "epoch": 0.11857707509881422, "grad_norm": 2.3705691118568657, "learning_rate": 3.906976744186047e-06, "loss": 0.8546, "step": 85 }, { "epoch": 0.11997209951174145, "grad_norm": 2.4807971770859645, "learning_rate": 3.953488372093024e-06, "loss": 0.7756, "step": 86 }, { "epoch": 0.12136712392466868, "grad_norm": 2.7849607872761832, "learning_rate": 4.000000000000001e-06, "loss": 0.9276, "step": 87 }, { "epoch": 0.12276214833759591, "grad_norm": 2.6532978828687415, "learning_rate": 4.0465116279069775e-06, "loss": 0.9252, "step": 88 }, { "epoch": 0.12415717275052314, "grad_norm": 2.6745199366941628, "learning_rate": 4.0930232558139536e-06, "loss": 0.977, "step": 89 }, { "epoch": 0.12555219716345037, "grad_norm": 2.58811853301491, "learning_rate": 4.1395348837209304e-06, "loss": 0.8462, "step": 90 }, { "epoch": 0.12694722157637758, "grad_norm": 2.527892862884847, "learning_rate": 4.186046511627907e-06, "loss": 0.8642, "step": 91 }, { "epoch": 0.12834224598930483, "grad_norm": 2.6462488204829153, "learning_rate": 4.232558139534884e-06, "loss": 0.9091, "step": 92 }, { "epoch": 0.12973727040223204, "grad_norm": 2.7173396797677563, "learning_rate": 4.279069767441861e-06, "loss": 0.8592, "step": 93 }, { "epoch": 0.13113229481515926, "grad_norm": 2.629838578169337, "learning_rate": 4.325581395348837e-06, "loss": 0.8112, "step": 94 }, { "epoch": 0.1325273192280865, "grad_norm": 2.1938663201105597, "learning_rate": 4.372093023255815e-06, "loss": 0.7786, "step": 95 }, { "epoch": 0.1339223436410137, "grad_norm": 2.4589524966830414, "learning_rate": 4.418604651162791e-06, "loss": 0.8424, "step": 96 }, { "epoch": 0.13531736805394096, "grad_norm": 2.6942559494127996, "learning_rate": 4.465116279069768e-06, "loss": 0.8359, "step": 97 }, { "epoch": 0.13671239246686817, "grad_norm": 2.5897936189656434, "learning_rate": 4.5116279069767445e-06, "loss": 0.783, "step": 98 }, { "epoch": 0.13810741687979539, "grad_norm": 2.5525670294619776, "learning_rate": 4.558139534883721e-06, "loss": 0.8959, "step": 99 }, { "epoch": 0.13950244129272263, "grad_norm": 2.4769973037530426, "learning_rate": 4.604651162790698e-06, "loss": 0.831, "step": 100 }, { "epoch": 0.14089746570564984, "grad_norm": 2.654290195849283, "learning_rate": 4.651162790697675e-06, "loss": 0.8989, "step": 101 }, { "epoch": 0.1422924901185771, "grad_norm": 2.5503859026189124, "learning_rate": 4.697674418604651e-06, "loss": 0.8735, "step": 102 }, { "epoch": 0.1436875145315043, "grad_norm": 2.5221248593642143, "learning_rate": 4.744186046511629e-06, "loss": 0.8294, "step": 103 }, { "epoch": 0.14508253894443152, "grad_norm": 2.7318331713327937, "learning_rate": 4.790697674418605e-06, "loss": 0.8626, "step": 104 }, { "epoch": 0.14647756335735876, "grad_norm": 2.5254527669229634, "learning_rate": 4.837209302325582e-06, "loss": 0.8223, "step": 105 }, { "epoch": 0.14787258777028597, "grad_norm": 2.518313495366996, "learning_rate": 4.883720930232559e-06, "loss": 0.8125, "step": 106 }, { "epoch": 0.14926761218321322, "grad_norm": 2.632563815689639, "learning_rate": 4.9302325581395355e-06, "loss": 0.8962, "step": 107 }, { "epoch": 0.15066263659614043, "grad_norm": 2.484754209356544, "learning_rate": 4.976744186046512e-06, "loss": 0.8412, "step": 108 }, { "epoch": 0.15205766100906765, "grad_norm": 2.9499298530816027, "learning_rate": 5.023255813953489e-06, "loss": 1.0579, "step": 109 }, { "epoch": 0.1534526854219949, "grad_norm": 2.379414400893228, "learning_rate": 5.069767441860466e-06, "loss": 0.7961, "step": 110 }, { "epoch": 0.1548477098349221, "grad_norm": 2.4732868635767637, "learning_rate": 5.116279069767442e-06, "loss": 0.821, "step": 111 }, { "epoch": 0.15624273424784935, "grad_norm": 2.469356852853981, "learning_rate": 5.162790697674419e-06, "loss": 0.8826, "step": 112 }, { "epoch": 0.15763775866077656, "grad_norm": 2.624112360308422, "learning_rate": 5.209302325581396e-06, "loss": 0.8553, "step": 113 }, { "epoch": 0.15903278307370378, "grad_norm": 2.6295855603722584, "learning_rate": 5.255813953488372e-06, "loss": 0.871, "step": 114 }, { "epoch": 0.16042780748663102, "grad_norm": 2.684784880567183, "learning_rate": 5.302325581395349e-06, "loss": 0.881, "step": 115 }, { "epoch": 0.16182283189955823, "grad_norm": 2.5873345437710653, "learning_rate": 5.348837209302326e-06, "loss": 0.8537, "step": 116 }, { "epoch": 0.16321785631248548, "grad_norm": 2.69536707928066, "learning_rate": 5.395348837209303e-06, "loss": 0.9017, "step": 117 }, { "epoch": 0.1646128807254127, "grad_norm": 2.6881129852116827, "learning_rate": 5.44186046511628e-06, "loss": 0.8486, "step": 118 }, { "epoch": 0.16600790513833993, "grad_norm": 2.635300798477986, "learning_rate": 5.488372093023256e-06, "loss": 0.8154, "step": 119 }, { "epoch": 0.16740292955126715, "grad_norm": 2.6624058009242937, "learning_rate": 5.534883720930233e-06, "loss": 0.8408, "step": 120 }, { "epoch": 0.16879795396419436, "grad_norm": 2.685420218898209, "learning_rate": 5.58139534883721e-06, "loss": 0.8572, "step": 121 }, { "epoch": 0.1701929783771216, "grad_norm": 2.7618371194860716, "learning_rate": 5.627906976744186e-06, "loss": 0.9026, "step": 122 }, { "epoch": 0.17158800279004882, "grad_norm": 2.905023639498727, "learning_rate": 5.674418604651163e-06, "loss": 0.9025, "step": 123 }, { "epoch": 0.17298302720297606, "grad_norm": 2.6557494213408526, "learning_rate": 5.72093023255814e-06, "loss": 0.862, "step": 124 }, { "epoch": 0.17437805161590328, "grad_norm": 2.369404613266765, "learning_rate": 5.7674418604651175e-06, "loss": 0.8888, "step": 125 }, { "epoch": 0.1757730760288305, "grad_norm": 2.56909676029086, "learning_rate": 5.8139534883720935e-06, "loss": 0.8739, "step": 126 }, { "epoch": 0.17716810044175774, "grad_norm": 2.6463803892354885, "learning_rate": 5.86046511627907e-06, "loss": 0.881, "step": 127 }, { "epoch": 0.17856312485468495, "grad_norm": 2.472359482023516, "learning_rate": 5.906976744186047e-06, "loss": 0.8566, "step": 128 }, { "epoch": 0.1799581492676122, "grad_norm": 2.478254873430084, "learning_rate": 5.953488372093023e-06, "loss": 0.9464, "step": 129 }, { "epoch": 0.1813531736805394, "grad_norm": 2.6608861654591762, "learning_rate": 6e-06, "loss": 0.8724, "step": 130 }, { "epoch": 0.18274819809346662, "grad_norm": 2.59088730259204, "learning_rate": 6.046511627906977e-06, "loss": 0.7852, "step": 131 }, { "epoch": 0.18414322250639387, "grad_norm": 2.46645648566402, "learning_rate": 6.093023255813954e-06, "loss": 0.8958, "step": 132 }, { "epoch": 0.18553824691932108, "grad_norm": 2.6405825589960723, "learning_rate": 6.139534883720932e-06, "loss": 0.8526, "step": 133 }, { "epoch": 0.18693327133224832, "grad_norm": 2.4016905424386974, "learning_rate": 6.186046511627908e-06, "loss": 0.8398, "step": 134 }, { "epoch": 0.18832829574517554, "grad_norm": 2.7111687006610894, "learning_rate": 6.2325581395348845e-06, "loss": 0.906, "step": 135 }, { "epoch": 0.18972332015810275, "grad_norm": 2.597724264012575, "learning_rate": 6.279069767441861e-06, "loss": 0.8813, "step": 136 }, { "epoch": 0.19111834457103, "grad_norm": 2.63171357440872, "learning_rate": 6.325581395348837e-06, "loss": 0.8534, "step": 137 }, { "epoch": 0.1925133689839572, "grad_norm": 2.643583484726907, "learning_rate": 6.372093023255814e-06, "loss": 0.8662, "step": 138 }, { "epoch": 0.19390839339688445, "grad_norm": 2.8758855127243326, "learning_rate": 6.418604651162791e-06, "loss": 0.8406, "step": 139 }, { "epoch": 0.19530341780981167, "grad_norm": 2.6656389646587475, "learning_rate": 6.465116279069767e-06, "loss": 0.8899, "step": 140 }, { "epoch": 0.1966984422227389, "grad_norm": 2.689464231405438, "learning_rate": 6.511627906976745e-06, "loss": 0.925, "step": 141 }, { "epoch": 0.19809346663566613, "grad_norm": 2.681578885009761, "learning_rate": 6.558139534883722e-06, "loss": 0.8562, "step": 142 }, { "epoch": 0.19948849104859334, "grad_norm": 2.478161301027331, "learning_rate": 6.604651162790699e-06, "loss": 0.8401, "step": 143 }, { "epoch": 0.20088351546152058, "grad_norm": 2.527248076453986, "learning_rate": 6.651162790697675e-06, "loss": 0.8166, "step": 144 }, { "epoch": 0.2022785398744478, "grad_norm": 2.5182193607516554, "learning_rate": 6.6976744186046515e-06, "loss": 0.9298, "step": 145 }, { "epoch": 0.20367356428737504, "grad_norm": 2.5267286173323376, "learning_rate": 6.744186046511628e-06, "loss": 0.8569, "step": 146 }, { "epoch": 0.20506858870030226, "grad_norm": 2.787541389302817, "learning_rate": 6.790697674418605e-06, "loss": 0.9599, "step": 147 }, { "epoch": 0.20646361311322947, "grad_norm": 2.516750288989788, "learning_rate": 6.837209302325581e-06, "loss": 0.8445, "step": 148 }, { "epoch": 0.20785863752615671, "grad_norm": 2.676370734047779, "learning_rate": 6.883720930232559e-06, "loss": 0.9353, "step": 149 }, { "epoch": 0.20925366193908393, "grad_norm": 2.632997866339043, "learning_rate": 6.930232558139536e-06, "loss": 0.8689, "step": 150 }, { "epoch": 0.21064868635201117, "grad_norm": 2.6659186815099503, "learning_rate": 6.976744186046513e-06, "loss": 0.8653, "step": 151 }, { "epoch": 0.2120437107649384, "grad_norm": 2.4695213472136603, "learning_rate": 7.023255813953489e-06, "loss": 0.8339, "step": 152 }, { "epoch": 0.2134387351778656, "grad_norm": 2.614530131373863, "learning_rate": 7.069767441860466e-06, "loss": 0.883, "step": 153 }, { "epoch": 0.21483375959079284, "grad_norm": 2.525730595783954, "learning_rate": 7.1162790697674425e-06, "loss": 0.8466, "step": 154 }, { "epoch": 0.21622878400372006, "grad_norm": 2.479680632287938, "learning_rate": 7.1627906976744185e-06, "loss": 0.8247, "step": 155 }, { "epoch": 0.2176238084166473, "grad_norm": 2.5343231097405097, "learning_rate": 7.209302325581395e-06, "loss": 0.8706, "step": 156 }, { "epoch": 0.21901883282957452, "grad_norm": 2.32813051232102, "learning_rate": 7.255813953488373e-06, "loss": 0.7901, "step": 157 }, { "epoch": 0.22041385724250173, "grad_norm": 2.642515912261227, "learning_rate": 7.30232558139535e-06, "loss": 0.868, "step": 158 }, { "epoch": 0.22180888165542897, "grad_norm": 2.4885609129417294, "learning_rate": 7.348837209302326e-06, "loss": 0.8485, "step": 159 }, { "epoch": 0.2232039060683562, "grad_norm": 2.5570893478155545, "learning_rate": 7.395348837209303e-06, "loss": 0.8608, "step": 160 }, { "epoch": 0.22459893048128343, "grad_norm": 2.6687548081457475, "learning_rate": 7.44186046511628e-06, "loss": 0.8557, "step": 161 }, { "epoch": 0.22599395489421065, "grad_norm": 2.5029993306700558, "learning_rate": 7.488372093023256e-06, "loss": 0.8494, "step": 162 }, { "epoch": 0.22738897930713786, "grad_norm": 2.539979640357519, "learning_rate": 7.534883720930233e-06, "loss": 0.8984, "step": 163 }, { "epoch": 0.2287840037200651, "grad_norm": 2.323736952152181, "learning_rate": 7.5813953488372095e-06, "loss": 0.7936, "step": 164 }, { "epoch": 0.23017902813299232, "grad_norm": 2.799512827536288, "learning_rate": 7.627906976744187e-06, "loss": 0.8662, "step": 165 }, { "epoch": 0.23157405254591956, "grad_norm": 2.1760379427980334, "learning_rate": 7.674418604651164e-06, "loss": 0.7623, "step": 166 }, { "epoch": 0.23296907695884678, "grad_norm": 2.7185189156028238, "learning_rate": 7.72093023255814e-06, "loss": 0.8114, "step": 167 }, { "epoch": 0.23436410137177402, "grad_norm": 2.525252118751366, "learning_rate": 7.767441860465116e-06, "loss": 0.8417, "step": 168 }, { "epoch": 0.23575912578470123, "grad_norm": 2.542521161862427, "learning_rate": 7.813953488372094e-06, "loss": 0.8447, "step": 169 }, { "epoch": 0.23715415019762845, "grad_norm": 2.4184657922567796, "learning_rate": 7.86046511627907e-06, "loss": 0.7546, "step": 170 }, { "epoch": 0.2385491746105557, "grad_norm": 2.346603792187153, "learning_rate": 7.906976744186048e-06, "loss": 0.8373, "step": 171 }, { "epoch": 0.2399441990234829, "grad_norm": 2.7766124072308154, "learning_rate": 7.953488372093024e-06, "loss": 0.8358, "step": 172 }, { "epoch": 0.24133922343641015, "grad_norm": 2.630395885799787, "learning_rate": 8.000000000000001e-06, "loss": 0.8681, "step": 173 }, { "epoch": 0.24273424784933736, "grad_norm": 2.4503940498857455, "learning_rate": 8.046511627906977e-06, "loss": 0.8339, "step": 174 }, { "epoch": 0.24412927226226458, "grad_norm": 2.5293611626016173, "learning_rate": 8.093023255813955e-06, "loss": 0.8705, "step": 175 }, { "epoch": 0.24552429667519182, "grad_norm": 2.6812009953625386, "learning_rate": 8.139534883720931e-06, "loss": 0.8973, "step": 176 }, { "epoch": 0.24691932108811904, "grad_norm": 2.7143808569295533, "learning_rate": 8.186046511627907e-06, "loss": 0.8843, "step": 177 }, { "epoch": 0.24831434550104628, "grad_norm": 2.660439424385717, "learning_rate": 8.232558139534885e-06, "loss": 0.8831, "step": 178 }, { "epoch": 0.2497093699139735, "grad_norm": 2.5002399785608214, "learning_rate": 8.279069767441861e-06, "loss": 0.8146, "step": 179 }, { "epoch": 0.25110439432690074, "grad_norm": 2.6483152873790115, "learning_rate": 8.325581395348837e-06, "loss": 0.8778, "step": 180 }, { "epoch": 0.25249941873982795, "grad_norm": 2.6098964212793563, "learning_rate": 8.372093023255815e-06, "loss": 0.8577, "step": 181 }, { "epoch": 0.25389444315275517, "grad_norm": 2.4059846813405805, "learning_rate": 8.418604651162792e-06, "loss": 0.8061, "step": 182 }, { "epoch": 0.2552894675656824, "grad_norm": 2.71168774559371, "learning_rate": 8.465116279069768e-06, "loss": 0.8527, "step": 183 }, { "epoch": 0.25668449197860965, "grad_norm": 2.5104153727738496, "learning_rate": 8.511627906976744e-06, "loss": 0.8221, "step": 184 }, { "epoch": 0.25807951639153687, "grad_norm": 2.5067714026197603, "learning_rate": 8.558139534883722e-06, "loss": 0.8114, "step": 185 }, { "epoch": 0.2594745408044641, "grad_norm": 2.2838420237786985, "learning_rate": 8.604651162790698e-06, "loss": 0.7257, "step": 186 }, { "epoch": 0.2608695652173913, "grad_norm": 2.4945082107690326, "learning_rate": 8.651162790697674e-06, "loss": 0.9024, "step": 187 }, { "epoch": 0.2622645896303185, "grad_norm": 2.4859596111427753, "learning_rate": 8.697674418604652e-06, "loss": 0.8328, "step": 188 }, { "epoch": 0.2636596140432458, "grad_norm": 2.3938969837953046, "learning_rate": 8.74418604651163e-06, "loss": 0.8088, "step": 189 }, { "epoch": 0.265054638456173, "grad_norm": 2.3831847845091993, "learning_rate": 8.790697674418606e-06, "loss": 0.8195, "step": 190 }, { "epoch": 0.2664496628691002, "grad_norm": 2.667853010076038, "learning_rate": 8.837209302325582e-06, "loss": 0.8719, "step": 191 }, { "epoch": 0.2678446872820274, "grad_norm": 2.5194996837712496, "learning_rate": 8.88372093023256e-06, "loss": 0.8094, "step": 192 }, { "epoch": 0.26923971169495464, "grad_norm": 2.3692982431415937, "learning_rate": 8.930232558139535e-06, "loss": 0.8583, "step": 193 }, { "epoch": 0.2706347361078819, "grad_norm": 2.565427330704496, "learning_rate": 8.976744186046511e-06, "loss": 0.8072, "step": 194 }, { "epoch": 0.2720297605208091, "grad_norm": 2.497731647810981, "learning_rate": 9.023255813953489e-06, "loss": 0.8137, "step": 195 }, { "epoch": 0.27342478493373634, "grad_norm": 2.3717771912185763, "learning_rate": 9.069767441860465e-06, "loss": 0.9131, "step": 196 }, { "epoch": 0.27481980934666356, "grad_norm": 2.3181968094964187, "learning_rate": 9.116279069767443e-06, "loss": 0.8156, "step": 197 }, { "epoch": 0.27621483375959077, "grad_norm": 2.4236323760541914, "learning_rate": 9.162790697674419e-06, "loss": 0.8238, "step": 198 }, { "epoch": 0.27760985817251804, "grad_norm": 2.5217357862754057, "learning_rate": 9.209302325581397e-06, "loss": 0.8152, "step": 199 }, { "epoch": 0.27900488258544526, "grad_norm": 2.5198954322151157, "learning_rate": 9.255813953488373e-06, "loss": 0.8109, "step": 200 }, { "epoch": 0.2803999069983725, "grad_norm": 2.406130115183606, "learning_rate": 9.30232558139535e-06, "loss": 0.8244, "step": 201 }, { "epoch": 0.2817949314112997, "grad_norm": 2.5093788027653066, "learning_rate": 9.348837209302326e-06, "loss": 0.8398, "step": 202 }, { "epoch": 0.2831899558242269, "grad_norm": 2.5298276780747995, "learning_rate": 9.395348837209302e-06, "loss": 0.8877, "step": 203 }, { "epoch": 0.2845849802371542, "grad_norm": 2.570254180617528, "learning_rate": 9.44186046511628e-06, "loss": 0.836, "step": 204 }, { "epoch": 0.2859800046500814, "grad_norm": 2.6368089222022992, "learning_rate": 9.488372093023258e-06, "loss": 0.8894, "step": 205 }, { "epoch": 0.2873750290630086, "grad_norm": 2.639546985201896, "learning_rate": 9.534883720930234e-06, "loss": 0.8465, "step": 206 }, { "epoch": 0.2887700534759358, "grad_norm": 2.2835023080497105, "learning_rate": 9.58139534883721e-06, "loss": 0.8026, "step": 207 }, { "epoch": 0.29016507788886303, "grad_norm": 2.491551445189629, "learning_rate": 9.627906976744188e-06, "loss": 0.8713, "step": 208 }, { "epoch": 0.2915601023017903, "grad_norm": 2.529700087483362, "learning_rate": 9.674418604651164e-06, "loss": 0.8823, "step": 209 }, { "epoch": 0.2929551267147175, "grad_norm": 2.5430291093480393, "learning_rate": 9.72093023255814e-06, "loss": 0.8826, "step": 210 }, { "epoch": 0.29435015112764473, "grad_norm": 2.4210878322804943, "learning_rate": 9.767441860465117e-06, "loss": 0.8383, "step": 211 }, { "epoch": 0.29574517554057195, "grad_norm": 2.6555243262969963, "learning_rate": 9.813953488372093e-06, "loss": 0.8782, "step": 212 }, { "epoch": 0.29714019995349916, "grad_norm": 2.54621068607973, "learning_rate": 9.860465116279071e-06, "loss": 0.8542, "step": 213 }, { "epoch": 0.29853522436642643, "grad_norm": 2.435433967576266, "learning_rate": 9.906976744186047e-06, "loss": 0.8147, "step": 214 }, { "epoch": 0.29993024877935365, "grad_norm": 2.4421008440447687, "learning_rate": 9.953488372093025e-06, "loss": 0.8857, "step": 215 }, { "epoch": 0.30132527319228086, "grad_norm": 2.452695066399621, "learning_rate": 1e-05, "loss": 0.8653, "step": 216 }, { "epoch": 0.3027202976052081, "grad_norm": 2.5878240699983435, "learning_rate": 9.999993396473114e-06, "loss": 0.9005, "step": 217 }, { "epoch": 0.3041153220181353, "grad_norm": 2.230413884583294, "learning_rate": 9.999973585909898e-06, "loss": 0.7461, "step": 218 }, { "epoch": 0.30551034643106256, "grad_norm": 2.4929998512817755, "learning_rate": 9.99994056836268e-06, "loss": 0.804, "step": 219 }, { "epoch": 0.3069053708439898, "grad_norm": 2.36729320195286, "learning_rate": 9.999894343918674e-06, "loss": 0.8355, "step": 220 }, { "epoch": 0.308300395256917, "grad_norm": 2.3078192310706664, "learning_rate": 9.999834912699974e-06, "loss": 0.861, "step": 221 }, { "epoch": 0.3096954196698442, "grad_norm": 2.365438939226392, "learning_rate": 9.999762274863567e-06, "loss": 0.8452, "step": 222 }, { "epoch": 0.3110904440827714, "grad_norm": 2.6299048529527056, "learning_rate": 9.999676430601318e-06, "loss": 0.9125, "step": 223 }, { "epoch": 0.3124854684956987, "grad_norm": 2.2705893407719944, "learning_rate": 9.999577380139976e-06, "loss": 0.8077, "step": 224 }, { "epoch": 0.3138804929086259, "grad_norm": 2.396780743389595, "learning_rate": 9.999465123741172e-06, "loss": 0.8073, "step": 225 }, { "epoch": 0.3152755173215531, "grad_norm": 2.4521922945041568, "learning_rate": 9.999339661701424e-06, "loss": 0.823, "step": 226 }, { "epoch": 0.31667054173448034, "grad_norm": 2.46652310666913, "learning_rate": 9.99920099435213e-06, "loss": 0.8637, "step": 227 }, { "epoch": 0.31806556614740755, "grad_norm": 2.5152504775724505, "learning_rate": 9.999049122059565e-06, "loss": 0.892, "step": 228 }, { "epoch": 0.3194605905603348, "grad_norm": 2.4086907511664992, "learning_rate": 9.998884045224886e-06, "loss": 0.8286, "step": 229 }, { "epoch": 0.32085561497326204, "grad_norm": 2.4077278793396837, "learning_rate": 9.998705764284132e-06, "loss": 0.8062, "step": 230 }, { "epoch": 0.32225063938618925, "grad_norm": 2.5600792649715456, "learning_rate": 9.998514279708212e-06, "loss": 0.9026, "step": 231 }, { "epoch": 0.32364566379911647, "grad_norm": 2.4418898784845546, "learning_rate": 9.998309592002914e-06, "loss": 0.8873, "step": 232 }, { "epoch": 0.32504068821204374, "grad_norm": 2.3831999756848674, "learning_rate": 9.99809170170891e-06, "loss": 0.788, "step": 233 }, { "epoch": 0.32643571262497095, "grad_norm": 2.468251361585286, "learning_rate": 9.997860609401732e-06, "loss": 0.8986, "step": 234 }, { "epoch": 0.32783073703789817, "grad_norm": 2.3361574389540327, "learning_rate": 9.99761631569179e-06, "loss": 0.8939, "step": 235 }, { "epoch": 0.3292257614508254, "grad_norm": 2.5382682739241376, "learning_rate": 9.997358821224365e-06, "loss": 0.9016, "step": 236 }, { "epoch": 0.3306207858637526, "grad_norm": 2.674504172069268, "learning_rate": 9.997088126679607e-06, "loss": 0.8439, "step": 237 }, { "epoch": 0.33201581027667987, "grad_norm": 2.419127211606796, "learning_rate": 9.996804232772528e-06, "loss": 0.827, "step": 238 }, { "epoch": 0.3334108346896071, "grad_norm": 2.3153473423269717, "learning_rate": 9.996507140253012e-06, "loss": 0.8695, "step": 239 }, { "epoch": 0.3348058591025343, "grad_norm": 2.3459142136786113, "learning_rate": 9.9961968499058e-06, "loss": 0.8493, "step": 240 }, { "epoch": 0.3362008835154615, "grad_norm": 2.4621876263369393, "learning_rate": 9.9958733625505e-06, "loss": 0.8597, "step": 241 }, { "epoch": 0.3375959079283887, "grad_norm": 2.302585809337345, "learning_rate": 9.995536679041568e-06, "loss": 0.8197, "step": 242 }, { "epoch": 0.338990932341316, "grad_norm": 2.409240679949958, "learning_rate": 9.99518680026833e-06, "loss": 0.8742, "step": 243 }, { "epoch": 0.3403859567542432, "grad_norm": 2.4065740050690048, "learning_rate": 9.994823727154957e-06, "loss": 0.8713, "step": 244 }, { "epoch": 0.34178098116717043, "grad_norm": 2.617892582271531, "learning_rate": 9.994447460660473e-06, "loss": 0.8752, "step": 245 }, { "epoch": 0.34317600558009764, "grad_norm": 2.382232746812907, "learning_rate": 9.994058001778754e-06, "loss": 0.8597, "step": 246 }, { "epoch": 0.34457102999302486, "grad_norm": 2.2738606415367255, "learning_rate": 9.99365535153852e-06, "loss": 0.8433, "step": 247 }, { "epoch": 0.34596605440595213, "grad_norm": 2.3540814447812752, "learning_rate": 9.993239511003338e-06, "loss": 0.8388, "step": 248 }, { "epoch": 0.34736107881887934, "grad_norm": 2.528752295199993, "learning_rate": 9.992810481271611e-06, "loss": 0.8307, "step": 249 }, { "epoch": 0.34875610323180656, "grad_norm": 2.621386384497238, "learning_rate": 9.992368263476585e-06, "loss": 0.8629, "step": 250 }, { "epoch": 0.3501511276447338, "grad_norm": 2.545073679267954, "learning_rate": 9.991912858786335e-06, "loss": 0.875, "step": 251 }, { "epoch": 0.351546152057661, "grad_norm": 2.460969776254627, "learning_rate": 9.991444268403776e-06, "loss": 0.8972, "step": 252 }, { "epoch": 0.35294117647058826, "grad_norm": 2.4567535372729257, "learning_rate": 9.990962493566645e-06, "loss": 0.9225, "step": 253 }, { "epoch": 0.3543362008835155, "grad_norm": 2.532024289457298, "learning_rate": 9.99046753554751e-06, "loss": 0.8168, "step": 254 }, { "epoch": 0.3557312252964427, "grad_norm": 2.581977530730931, "learning_rate": 9.989959395653756e-06, "loss": 0.8378, "step": 255 }, { "epoch": 0.3571262497093699, "grad_norm": 2.255669888736637, "learning_rate": 9.989438075227588e-06, "loss": 0.7349, "step": 256 }, { "epoch": 0.3585212741222971, "grad_norm": 2.359115264264968, "learning_rate": 9.988903575646032e-06, "loss": 0.7745, "step": 257 }, { "epoch": 0.3599162985352244, "grad_norm": 2.3037312556925964, "learning_rate": 9.988355898320917e-06, "loss": 0.8567, "step": 258 }, { "epoch": 0.3613113229481516, "grad_norm": 2.434172530280232, "learning_rate": 9.987795044698885e-06, "loss": 0.8713, "step": 259 }, { "epoch": 0.3627063473610788, "grad_norm": 2.453926336248676, "learning_rate": 9.98722101626138e-06, "loss": 0.8435, "step": 260 }, { "epoch": 0.36410137177400603, "grad_norm": 2.439473444782122, "learning_rate": 9.986633814524648e-06, "loss": 0.8054, "step": 261 }, { "epoch": 0.36549639618693325, "grad_norm": 2.266649898523453, "learning_rate": 9.986033441039731e-06, "loss": 0.8226, "step": 262 }, { "epoch": 0.3668914205998605, "grad_norm": 2.693755550930556, "learning_rate": 9.985419897392459e-06, "loss": 0.9119, "step": 263 }, { "epoch": 0.36828644501278773, "grad_norm": 2.140665282156282, "learning_rate": 9.984793185203456e-06, "loss": 0.7929, "step": 264 }, { "epoch": 0.36968146942571495, "grad_norm": 2.4529426566632138, "learning_rate": 9.984153306128124e-06, "loss": 0.8796, "step": 265 }, { "epoch": 0.37107649383864216, "grad_norm": 2.4849290596270275, "learning_rate": 9.983500261856646e-06, "loss": 0.8676, "step": 266 }, { "epoch": 0.3724715182515694, "grad_norm": 2.246171593649269, "learning_rate": 9.982834054113982e-06, "loss": 0.7925, "step": 267 }, { "epoch": 0.37386654266449665, "grad_norm": 2.3984781097724253, "learning_rate": 9.98215468465986e-06, "loss": 0.8271, "step": 268 }, { "epoch": 0.37526156707742386, "grad_norm": 2.4937376226580223, "learning_rate": 9.981462155288773e-06, "loss": 0.8403, "step": 269 }, { "epoch": 0.3766565914903511, "grad_norm": 2.3749646784336287, "learning_rate": 9.980756467829977e-06, "loss": 0.7676, "step": 270 }, { "epoch": 0.3780516159032783, "grad_norm": 2.4139603371785605, "learning_rate": 9.98003762414748e-06, "loss": 0.8063, "step": 271 }, { "epoch": 0.3794466403162055, "grad_norm": 2.3782960478596418, "learning_rate": 9.979305626140046e-06, "loss": 0.7864, "step": 272 }, { "epoch": 0.3808416647291328, "grad_norm": 2.2635918115448406, "learning_rate": 9.978560475741181e-06, "loss": 0.8022, "step": 273 }, { "epoch": 0.38223668914206, "grad_norm": 2.4264395618909664, "learning_rate": 9.977802174919134e-06, "loss": 0.897, "step": 274 }, { "epoch": 0.3836317135549872, "grad_norm": 2.1976111228486173, "learning_rate": 9.977030725676887e-06, "loss": 0.8057, "step": 275 }, { "epoch": 0.3850267379679144, "grad_norm": 2.3947903434668922, "learning_rate": 9.976246130052157e-06, "loss": 0.8648, "step": 276 }, { "epoch": 0.38642176238084164, "grad_norm": 2.340490521005791, "learning_rate": 9.97544839011738e-06, "loss": 0.915, "step": 277 }, { "epoch": 0.3878167867937689, "grad_norm": 2.232889640737689, "learning_rate": 9.974637507979721e-06, "loss": 0.7917, "step": 278 }, { "epoch": 0.3892118112066961, "grad_norm": 2.4137413801366225, "learning_rate": 9.973813485781045e-06, "loss": 0.8328, "step": 279 }, { "epoch": 0.39060683561962334, "grad_norm": 2.1936027119127446, "learning_rate": 9.972976325697938e-06, "loss": 0.7979, "step": 280 }, { "epoch": 0.39200186003255055, "grad_norm": 2.1596779348429758, "learning_rate": 9.972126029941685e-06, "loss": 0.8252, "step": 281 }, { "epoch": 0.3933968844454778, "grad_norm": 2.3091455117209523, "learning_rate": 9.97126260075826e-06, "loss": 0.7924, "step": 282 }, { "epoch": 0.39479190885840504, "grad_norm": 2.412245613654812, "learning_rate": 9.97038604042834e-06, "loss": 0.8989, "step": 283 }, { "epoch": 0.39618693327133225, "grad_norm": 2.3418234808057976, "learning_rate": 9.969496351267278e-06, "loss": 0.8564, "step": 284 }, { "epoch": 0.39758195768425947, "grad_norm": 2.3346586850814037, "learning_rate": 9.96859353562511e-06, "loss": 0.8505, "step": 285 }, { "epoch": 0.3989769820971867, "grad_norm": 2.4043445190457993, "learning_rate": 9.967677595886542e-06, "loss": 0.8187, "step": 286 }, { "epoch": 0.40037200651011395, "grad_norm": 2.346496835909273, "learning_rate": 9.96674853447095e-06, "loss": 0.8595, "step": 287 }, { "epoch": 0.40176703092304117, "grad_norm": 2.185129462724462, "learning_rate": 9.96580635383236e-06, "loss": 0.7555, "step": 288 }, { "epoch": 0.4031620553359684, "grad_norm": 2.164942932898928, "learning_rate": 9.964851056459465e-06, "loss": 0.7291, "step": 289 }, { "epoch": 0.4045570797488956, "grad_norm": 2.3010669630601117, "learning_rate": 9.963882644875594e-06, "loss": 0.8161, "step": 290 }, { "epoch": 0.4059521041618228, "grad_norm": 2.4417739663886966, "learning_rate": 9.96290112163872e-06, "loss": 0.8173, "step": 291 }, { "epoch": 0.4073471285747501, "grad_norm": 2.3566144190862364, "learning_rate": 9.961906489341452e-06, "loss": 0.829, "step": 292 }, { "epoch": 0.4087421529876773, "grad_norm": 2.49754698612358, "learning_rate": 9.960898750611019e-06, "loss": 0.8803, "step": 293 }, { "epoch": 0.4101371774006045, "grad_norm": 2.3789219853473957, "learning_rate": 9.959877908109274e-06, "loss": 0.9305, "step": 294 }, { "epoch": 0.41153220181353173, "grad_norm": 2.4300365222504507, "learning_rate": 9.958843964532683e-06, "loss": 0.7843, "step": 295 }, { "epoch": 0.41292722622645894, "grad_norm": 2.3813885893994087, "learning_rate": 9.957796922612314e-06, "loss": 0.8118, "step": 296 }, { "epoch": 0.4143222506393862, "grad_norm": 2.5937704894729547, "learning_rate": 9.956736785113833e-06, "loss": 0.8718, "step": 297 }, { "epoch": 0.41571727505231343, "grad_norm": 2.180228830270413, "learning_rate": 9.955663554837503e-06, "loss": 0.7416, "step": 298 }, { "epoch": 0.41711229946524064, "grad_norm": 2.371308172793777, "learning_rate": 9.954577234618162e-06, "loss": 0.8335, "step": 299 }, { "epoch": 0.41850732387816786, "grad_norm": 2.2231881758497996, "learning_rate": 9.953477827325229e-06, "loss": 0.8336, "step": 300 }, { "epoch": 0.4199023482910951, "grad_norm": 2.445233303193449, "learning_rate": 9.952365335862693e-06, "loss": 0.8787, "step": 301 }, { "epoch": 0.42129737270402234, "grad_norm": 2.491009370026882, "learning_rate": 9.951239763169097e-06, "loss": 0.9098, "step": 302 }, { "epoch": 0.42269239711694956, "grad_norm": 2.327717446397162, "learning_rate": 9.950101112217543e-06, "loss": 0.8163, "step": 303 }, { "epoch": 0.4240874215298768, "grad_norm": 2.355123331040892, "learning_rate": 9.948949386015677e-06, "loss": 0.8009, "step": 304 }, { "epoch": 0.425482445942804, "grad_norm": 2.3988314771027714, "learning_rate": 9.947784587605678e-06, "loss": 0.8063, "step": 305 }, { "epoch": 0.4268774703557312, "grad_norm": 2.382293201804921, "learning_rate": 9.946606720064257e-06, "loss": 0.867, "step": 306 }, { "epoch": 0.4282724947686585, "grad_norm": 2.459817217467806, "learning_rate": 9.945415786502649e-06, "loss": 0.8424, "step": 307 }, { "epoch": 0.4296675191815857, "grad_norm": 2.3282386188112536, "learning_rate": 9.944211790066597e-06, "loss": 0.818, "step": 308 }, { "epoch": 0.4310625435945129, "grad_norm": 2.5666484910873697, "learning_rate": 9.94299473393635e-06, "loss": 0.8959, "step": 309 }, { "epoch": 0.4324575680074401, "grad_norm": 2.4273913472386175, "learning_rate": 9.941764621326655e-06, "loss": 0.8336, "step": 310 }, { "epoch": 0.43385259242036733, "grad_norm": 2.2711958845863087, "learning_rate": 9.94052145548674e-06, "loss": 0.8621, "step": 311 }, { "epoch": 0.4352476168332946, "grad_norm": 2.469404768269599, "learning_rate": 9.939265239700321e-06, "loss": 0.898, "step": 312 }, { "epoch": 0.4366426412462218, "grad_norm": 2.4902422039149137, "learning_rate": 9.93799597728558e-06, "loss": 0.8482, "step": 313 }, { "epoch": 0.43803766565914903, "grad_norm": 2.4229631048240377, "learning_rate": 9.936713671595158e-06, "loss": 0.8898, "step": 314 }, { "epoch": 0.43943269007207625, "grad_norm": 2.3910779731845326, "learning_rate": 9.935418326016153e-06, "loss": 0.8138, "step": 315 }, { "epoch": 0.44082771448500346, "grad_norm": 2.4552386090998275, "learning_rate": 9.934109943970103e-06, "loss": 0.9128, "step": 316 }, { "epoch": 0.44222273889793073, "grad_norm": 2.442527577960544, "learning_rate": 9.932788528912983e-06, "loss": 0.8672, "step": 317 }, { "epoch": 0.44361776331085795, "grad_norm": 2.344795751321148, "learning_rate": 9.931454084335192e-06, "loss": 0.8515, "step": 318 }, { "epoch": 0.44501278772378516, "grad_norm": 2.364416414555265, "learning_rate": 9.930106613761549e-06, "loss": 0.7947, "step": 319 }, { "epoch": 0.4464078121367124, "grad_norm": 2.256323705180913, "learning_rate": 9.928746120751275e-06, "loss": 0.8422, "step": 320 }, { "epoch": 0.4478028365496396, "grad_norm": 2.4203247455360217, "learning_rate": 9.927372608897992e-06, "loss": 0.8824, "step": 321 }, { "epoch": 0.44919786096256686, "grad_norm": 2.281454824638481, "learning_rate": 9.925986081829708e-06, "loss": 0.7903, "step": 322 }, { "epoch": 0.4505928853754941, "grad_norm": 2.373367397384208, "learning_rate": 9.924586543208812e-06, "loss": 0.8968, "step": 323 }, { "epoch": 0.4519879097884213, "grad_norm": 2.367295510695166, "learning_rate": 9.923173996732058e-06, "loss": 0.8127, "step": 324 }, { "epoch": 0.4533829342013485, "grad_norm": 2.3635544144384073, "learning_rate": 9.921748446130564e-06, "loss": 0.8632, "step": 325 }, { "epoch": 0.4547779586142757, "grad_norm": 2.119743635617115, "learning_rate": 9.920309895169793e-06, "loss": 0.8213, "step": 326 }, { "epoch": 0.456172983027203, "grad_norm": 2.4102045383117017, "learning_rate": 9.91885834764955e-06, "loss": 0.8128, "step": 327 }, { "epoch": 0.4575680074401302, "grad_norm": 2.308663008061708, "learning_rate": 9.917393807403965e-06, "loss": 0.878, "step": 328 }, { "epoch": 0.4589630318530574, "grad_norm": 2.2371915356686585, "learning_rate": 9.915916278301496e-06, "loss": 0.7975, "step": 329 }, { "epoch": 0.46035805626598464, "grad_norm": 2.2623728839393946, "learning_rate": 9.9144257642449e-06, "loss": 0.7888, "step": 330 }, { "epoch": 0.46175308067891185, "grad_norm": 2.1746453104415036, "learning_rate": 9.91292226917124e-06, "loss": 0.7562, "step": 331 }, { "epoch": 0.4631481050918391, "grad_norm": 2.327380391486263, "learning_rate": 9.91140579705186e-06, "loss": 0.8443, "step": 332 }, { "epoch": 0.46454312950476634, "grad_norm": 2.302250367501135, "learning_rate": 9.909876351892388e-06, "loss": 0.8753, "step": 333 }, { "epoch": 0.46593815391769355, "grad_norm": 2.1938519895752338, "learning_rate": 9.908333937732718e-06, "loss": 0.8179, "step": 334 }, { "epoch": 0.46733317833062077, "grad_norm": 2.2576976444664005, "learning_rate": 9.906778558647e-06, "loss": 0.8132, "step": 335 }, { "epoch": 0.46872820274354804, "grad_norm": 2.290846225349808, "learning_rate": 9.905210218743626e-06, "loss": 0.8323, "step": 336 }, { "epoch": 0.47012322715647525, "grad_norm": 2.327996605732, "learning_rate": 9.903628922165227e-06, "loss": 0.8116, "step": 337 }, { "epoch": 0.47151825156940247, "grad_norm": 2.524908945812549, "learning_rate": 9.902034673088656e-06, "loss": 0.8522, "step": 338 }, { "epoch": 0.4729132759823297, "grad_norm": 2.4399184280331823, "learning_rate": 9.90042747572498e-06, "loss": 0.7838, "step": 339 }, { "epoch": 0.4743083003952569, "grad_norm": 2.4034270262763537, "learning_rate": 9.898807334319471e-06, "loss": 0.8684, "step": 340 }, { "epoch": 0.47570332480818417, "grad_norm": 2.333648384087205, "learning_rate": 9.897174253151583e-06, "loss": 0.821, "step": 341 }, { "epoch": 0.4770983492211114, "grad_norm": 2.4581315986892194, "learning_rate": 9.895528236534957e-06, "loss": 0.8386, "step": 342 }, { "epoch": 0.4784933736340386, "grad_norm": 2.419787539111564, "learning_rate": 9.893869288817397e-06, "loss": 0.9022, "step": 343 }, { "epoch": 0.4798883980469658, "grad_norm": 2.1677116089219766, "learning_rate": 9.89219741438087e-06, "loss": 0.8122, "step": 344 }, { "epoch": 0.48128342245989303, "grad_norm": 2.375316977935794, "learning_rate": 9.890512617641474e-06, "loss": 0.8221, "step": 345 }, { "epoch": 0.4826784468728203, "grad_norm": 2.298504421310575, "learning_rate": 9.888814903049458e-06, "loss": 0.8552, "step": 346 }, { "epoch": 0.4840734712857475, "grad_norm": 2.340097450328989, "learning_rate": 9.88710427508918e-06, "loss": 0.8911, "step": 347 }, { "epoch": 0.48546849569867473, "grad_norm": 2.2883044488148467, "learning_rate": 9.885380738279111e-06, "loss": 0.7827, "step": 348 }, { "epoch": 0.48686352011160194, "grad_norm": 2.1585007303557724, "learning_rate": 9.883644297171821e-06, "loss": 0.8289, "step": 349 }, { "epoch": 0.48825854452452916, "grad_norm": 2.3150421842499336, "learning_rate": 9.881894956353963e-06, "loss": 0.8274, "step": 350 }, { "epoch": 0.48965356893745643, "grad_norm": 2.4260154482680516, "learning_rate": 9.880132720446265e-06, "loss": 0.8343, "step": 351 }, { "epoch": 0.49104859335038364, "grad_norm": 2.488096437013694, "learning_rate": 9.878357594103516e-06, "loss": 0.8788, "step": 352 }, { "epoch": 0.49244361776331086, "grad_norm": 2.265089333586129, "learning_rate": 9.876569582014554e-06, "loss": 0.8271, "step": 353 }, { "epoch": 0.4938386421762381, "grad_norm": 2.345345809912894, "learning_rate": 9.874768688902252e-06, "loss": 0.8533, "step": 354 }, { "epoch": 0.4952336665891653, "grad_norm": 1.9490060596274792, "learning_rate": 9.87295491952351e-06, "loss": 0.8594, "step": 355 }, { "epoch": 0.49662869100209256, "grad_norm": 2.2409855993315837, "learning_rate": 9.871128278669238e-06, "loss": 0.8273, "step": 356 }, { "epoch": 0.4980237154150198, "grad_norm": 2.267345461904061, "learning_rate": 9.869288771164344e-06, "loss": 0.8063, "step": 357 }, { "epoch": 0.499418739827947, "grad_norm": 2.2205116139516687, "learning_rate": 9.867436401867723e-06, "loss": 0.818, "step": 358 }, { "epoch": 0.5008137642408742, "grad_norm": 2.5030221770424266, "learning_rate": 9.865571175672245e-06, "loss": 0.8652, "step": 359 }, { "epoch": 0.5022087886538015, "grad_norm": 2.289617025599888, "learning_rate": 9.863693097504733e-06, "loss": 0.8105, "step": 360 }, { "epoch": 0.5036038130667286, "grad_norm": 2.3320118335025115, "learning_rate": 9.86180217232597e-06, "loss": 0.8885, "step": 361 }, { "epoch": 0.5049988374796559, "grad_norm": 2.444810576110176, "learning_rate": 9.859898405130661e-06, "loss": 0.8768, "step": 362 }, { "epoch": 0.5063938618925832, "grad_norm": 2.501148552838878, "learning_rate": 9.85798180094744e-06, "loss": 0.8267, "step": 363 }, { "epoch": 0.5077888863055103, "grad_norm": 2.4338221955408237, "learning_rate": 9.856052364838846e-06, "loss": 0.9283, "step": 364 }, { "epoch": 0.5091839107184376, "grad_norm": 2.1067698521138603, "learning_rate": 9.854110101901308e-06, "loss": 0.7598, "step": 365 }, { "epoch": 0.5105789351313648, "grad_norm": 2.132316478250159, "learning_rate": 9.852155017265146e-06, "loss": 0.7123, "step": 366 }, { "epoch": 0.511973959544292, "grad_norm": 2.4768500386344505, "learning_rate": 9.850187116094538e-06, "loss": 0.8386, "step": 367 }, { "epoch": 0.5133689839572193, "grad_norm": 2.215461965220729, "learning_rate": 9.848206403587521e-06, "loss": 0.8753, "step": 368 }, { "epoch": 0.5147640083701465, "grad_norm": 2.4235502880016413, "learning_rate": 9.84621288497597e-06, "loss": 0.8686, "step": 369 }, { "epoch": 0.5161590327830737, "grad_norm": 2.2337681276510444, "learning_rate": 9.844206565525585e-06, "loss": 0.847, "step": 370 }, { "epoch": 0.5175540571960009, "grad_norm": 2.3027789909106233, "learning_rate": 9.842187450535881e-06, "loss": 0.8437, "step": 371 }, { "epoch": 0.5189490816089282, "grad_norm": 2.5046861491929384, "learning_rate": 9.840155545340169e-06, "loss": 0.8602, "step": 372 }, { "epoch": 0.5203441060218554, "grad_norm": 2.2585753683186325, "learning_rate": 9.838110855305548e-06, "loss": 0.8245, "step": 373 }, { "epoch": 0.5217391304347826, "grad_norm": 2.1334774355774058, "learning_rate": 9.836053385832881e-06, "loss": 0.7502, "step": 374 }, { "epoch": 0.5231341548477099, "grad_norm": 2.2659216497115713, "learning_rate": 9.833983142356792e-06, "loss": 0.8257, "step": 375 }, { "epoch": 0.524529179260637, "grad_norm": 2.4304587346103217, "learning_rate": 9.831900130345645e-06, "loss": 0.8029, "step": 376 }, { "epoch": 0.5259242036735643, "grad_norm": 2.2041213476102914, "learning_rate": 9.829804355301527e-06, "loss": 0.7652, "step": 377 }, { "epoch": 0.5273192280864916, "grad_norm": 2.282662536221542, "learning_rate": 9.827695822760245e-06, "loss": 0.7858, "step": 378 }, { "epoch": 0.5287142524994187, "grad_norm": 2.226650168143614, "learning_rate": 9.825574538291293e-06, "loss": 0.8386, "step": 379 }, { "epoch": 0.530109276912346, "grad_norm": 2.107004540652317, "learning_rate": 9.823440507497863e-06, "loss": 0.7838, "step": 380 }, { "epoch": 0.5315043013252732, "grad_norm": 2.2509842063123493, "learning_rate": 9.821293736016802e-06, "loss": 0.8056, "step": 381 }, { "epoch": 0.5328993257382004, "grad_norm": 2.4022357458518484, "learning_rate": 9.819134229518617e-06, "loss": 0.8534, "step": 382 }, { "epoch": 0.5342943501511277, "grad_norm": 2.2746545692172297, "learning_rate": 9.81696199370745e-06, "loss": 0.8199, "step": 383 }, { "epoch": 0.5356893745640549, "grad_norm": 2.3555905221816573, "learning_rate": 9.814777034321069e-06, "loss": 0.8497, "step": 384 }, { "epoch": 0.5370843989769821, "grad_norm": 2.3503199320005317, "learning_rate": 9.812579357130848e-06, "loss": 0.8633, "step": 385 }, { "epoch": 0.5384794233899093, "grad_norm": 2.15387241899644, "learning_rate": 9.810368967941757e-06, "loss": 0.814, "step": 386 }, { "epoch": 0.5398744478028366, "grad_norm": 2.324498412192235, "learning_rate": 9.808145872592341e-06, "loss": 0.8162, "step": 387 }, { "epoch": 0.5412694722157638, "grad_norm": 2.3794310030781594, "learning_rate": 9.80591007695471e-06, "loss": 0.7916, "step": 388 }, { "epoch": 0.542664496628691, "grad_norm": 2.2813721353974814, "learning_rate": 9.803661586934514e-06, "loss": 0.8147, "step": 389 }, { "epoch": 0.5440595210416183, "grad_norm": 2.382905091000469, "learning_rate": 9.801400408470943e-06, "loss": 0.8573, "step": 390 }, { "epoch": 0.5454545454545454, "grad_norm": 2.36573095648591, "learning_rate": 9.799126547536695e-06, "loss": 0.8528, "step": 391 }, { "epoch": 0.5468495698674727, "grad_norm": 2.3170269826595145, "learning_rate": 9.796840010137972e-06, "loss": 0.8267, "step": 392 }, { "epoch": 0.5482445942804, "grad_norm": 2.257208144984935, "learning_rate": 9.79454080231446e-06, "loss": 0.9302, "step": 393 }, { "epoch": 0.5496396186933271, "grad_norm": 2.2152079813089016, "learning_rate": 9.79222893013931e-06, "loss": 0.7236, "step": 394 }, { "epoch": 0.5510346431062544, "grad_norm": 2.3668068440170074, "learning_rate": 9.789904399719124e-06, "loss": 0.8817, "step": 395 }, { "epoch": 0.5524296675191815, "grad_norm": 2.267969900752159, "learning_rate": 9.787567217193944e-06, "loss": 0.7608, "step": 396 }, { "epoch": 0.5538246919321088, "grad_norm": 2.4047203321440307, "learning_rate": 9.785217388737232e-06, "loss": 0.8608, "step": 397 }, { "epoch": 0.5552197163450361, "grad_norm": 2.2761610687662084, "learning_rate": 9.782854920555844e-06, "loss": 0.7997, "step": 398 }, { "epoch": 0.5566147407579632, "grad_norm": 2.198552549307009, "learning_rate": 9.780479818890032e-06, "loss": 0.8804, "step": 399 }, { "epoch": 0.5580097651708905, "grad_norm": 2.367066469808117, "learning_rate": 9.778092090013416e-06, "loss": 0.7939, "step": 400 }, { "epoch": 0.5594047895838177, "grad_norm": 2.0055326234004256, "learning_rate": 9.775691740232966e-06, "loss": 0.7534, "step": 401 }, { "epoch": 0.560799813996745, "grad_norm": 2.1890326804743228, "learning_rate": 9.773278775888995e-06, "loss": 0.8646, "step": 402 }, { "epoch": 0.5621948384096722, "grad_norm": 2.3418477658622807, "learning_rate": 9.77085320335513e-06, "loss": 0.7858, "step": 403 }, { "epoch": 0.5635898628225994, "grad_norm": 2.5063332319003986, "learning_rate": 9.768415029038304e-06, "loss": 0.8181, "step": 404 }, { "epoch": 0.5649848872355266, "grad_norm": 2.172825286783105, "learning_rate": 9.76596425937874e-06, "loss": 0.8031, "step": 405 }, { "epoch": 0.5663799116484538, "grad_norm": 2.3073728439899237, "learning_rate": 9.763500900849926e-06, "loss": 0.782, "step": 406 }, { "epoch": 0.5677749360613811, "grad_norm": 2.5459879839298654, "learning_rate": 9.761024959958605e-06, "loss": 0.8471, "step": 407 }, { "epoch": 0.5691699604743083, "grad_norm": 2.3768918310577676, "learning_rate": 9.75853644324475e-06, "loss": 0.8593, "step": 408 }, { "epoch": 0.5705649848872355, "grad_norm": 2.396344483737172, "learning_rate": 9.756035357281559e-06, "loss": 0.8348, "step": 409 }, { "epoch": 0.5719600093001628, "grad_norm": 2.2452406794760456, "learning_rate": 9.753521708675426e-06, "loss": 0.8026, "step": 410 }, { "epoch": 0.5733550337130899, "grad_norm": 2.389373465019408, "learning_rate": 9.75099550406593e-06, "loss": 0.8166, "step": 411 }, { "epoch": 0.5747500581260172, "grad_norm": 2.206381763441724, "learning_rate": 9.748456750125817e-06, "loss": 0.8472, "step": 412 }, { "epoch": 0.5761450825389445, "grad_norm": 2.3113579235556774, "learning_rate": 9.745905453560976e-06, "loss": 0.7931, "step": 413 }, { "epoch": 0.5775401069518716, "grad_norm": 2.107533685544665, "learning_rate": 9.74334162111043e-06, "loss": 0.8714, "step": 414 }, { "epoch": 0.5789351313647989, "grad_norm": 2.199866448579767, "learning_rate": 9.740765259546312e-06, "loss": 0.8222, "step": 415 }, { "epoch": 0.5803301557777261, "grad_norm": 2.267995859298616, "learning_rate": 9.738176375673856e-06, "loss": 0.8293, "step": 416 }, { "epoch": 0.5817251801906533, "grad_norm": 2.2579754561980914, "learning_rate": 9.735574976331362e-06, "loss": 0.7453, "step": 417 }, { "epoch": 0.5831202046035806, "grad_norm": 2.2254562014393247, "learning_rate": 9.732961068390199e-06, "loss": 0.8498, "step": 418 }, { "epoch": 0.5845152290165078, "grad_norm": 2.470323394273476, "learning_rate": 9.730334658754767e-06, "loss": 0.9387, "step": 419 }, { "epoch": 0.585910253429435, "grad_norm": 2.2846250877655665, "learning_rate": 9.727695754362498e-06, "loss": 0.86, "step": 420 }, { "epoch": 0.5873052778423622, "grad_norm": 2.2272292760109633, "learning_rate": 9.725044362183817e-06, "loss": 0.8108, "step": 421 }, { "epoch": 0.5887003022552895, "grad_norm": 2.0447514244982066, "learning_rate": 9.722380489222145e-06, "loss": 0.7704, "step": 422 }, { "epoch": 0.5900953266682167, "grad_norm": 2.1053388307108905, "learning_rate": 9.71970414251386e-06, "loss": 0.7527, "step": 423 }, { "epoch": 0.5914903510811439, "grad_norm": 2.2899420610340124, "learning_rate": 9.717015329128294e-06, "loss": 0.8584, "step": 424 }, { "epoch": 0.5928853754940712, "grad_norm": 2.304060071404452, "learning_rate": 9.714314056167711e-06, "loss": 0.8133, "step": 425 }, { "epoch": 0.5942803999069983, "grad_norm": 1.922690772501525, "learning_rate": 9.711600330767278e-06, "loss": 0.7393, "step": 426 }, { "epoch": 0.5956754243199256, "grad_norm": 2.3346883179500093, "learning_rate": 9.708874160095061e-06, "loss": 0.8185, "step": 427 }, { "epoch": 0.5970704487328529, "grad_norm": 2.2313171623584056, "learning_rate": 9.706135551351996e-06, "loss": 0.8333, "step": 428 }, { "epoch": 0.59846547314578, "grad_norm": 2.233185183876659, "learning_rate": 9.703384511771874e-06, "loss": 0.7598, "step": 429 }, { "epoch": 0.5998604975587073, "grad_norm": 2.1479998116354584, "learning_rate": 9.700621048621322e-06, "loss": 0.763, "step": 430 }, { "epoch": 0.6012555219716345, "grad_norm": 2.1089747709879703, "learning_rate": 9.697845169199775e-06, "loss": 0.7647, "step": 431 }, { "epoch": 0.6026505463845617, "grad_norm": 2.4350730978634347, "learning_rate": 9.69505688083948e-06, "loss": 0.8737, "step": 432 }, { "epoch": 0.604045570797489, "grad_norm": 2.0397925636132817, "learning_rate": 9.692256190905444e-06, "loss": 0.7736, "step": 433 }, { "epoch": 0.6054405952104162, "grad_norm": 2.2527228058323057, "learning_rate": 9.689443106795442e-06, "loss": 0.8212, "step": 434 }, { "epoch": 0.6068356196233434, "grad_norm": 2.317450367988872, "learning_rate": 9.686617635939988e-06, "loss": 0.7718, "step": 435 }, { "epoch": 0.6082306440362706, "grad_norm": 2.368353149525255, "learning_rate": 9.683779785802306e-06, "loss": 0.8641, "step": 436 }, { "epoch": 0.6096256684491979, "grad_norm": 2.387460398737295, "learning_rate": 9.680929563878327e-06, "loss": 0.8025, "step": 437 }, { "epoch": 0.6110206928621251, "grad_norm": 2.3621137391292435, "learning_rate": 9.678066977696656e-06, "loss": 0.852, "step": 438 }, { "epoch": 0.6124157172750523, "grad_norm": 2.201375841846717, "learning_rate": 9.675192034818561e-06, "loss": 0.8185, "step": 439 }, { "epoch": 0.6138107416879796, "grad_norm": 2.458536455126998, "learning_rate": 9.672304742837945e-06, "loss": 0.9198, "step": 440 }, { "epoch": 0.6152057661009067, "grad_norm": 2.2699151892872558, "learning_rate": 9.669405109381335e-06, "loss": 0.8127, "step": 441 }, { "epoch": 0.616600790513834, "grad_norm": 2.263489117442132, "learning_rate": 9.66649314210785e-06, "loss": 0.8241, "step": 442 }, { "epoch": 0.6179958149267613, "grad_norm": 2.0953094920397, "learning_rate": 9.663568848709194e-06, "loss": 0.8314, "step": 443 }, { "epoch": 0.6193908393396884, "grad_norm": 2.3091687564506764, "learning_rate": 9.660632236909628e-06, "loss": 0.8319, "step": 444 }, { "epoch": 0.6207858637526157, "grad_norm": 2.1200150861464357, "learning_rate": 9.657683314465948e-06, "loss": 0.7791, "step": 445 }, { "epoch": 0.6221808881655428, "grad_norm": 2.2192687258564803, "learning_rate": 9.65472208916747e-06, "loss": 0.8752, "step": 446 }, { "epoch": 0.6235759125784701, "grad_norm": 2.259805763798995, "learning_rate": 9.651748568836007e-06, "loss": 0.88, "step": 447 }, { "epoch": 0.6249709369913974, "grad_norm": 2.275122877615254, "learning_rate": 9.648762761325847e-06, "loss": 0.7829, "step": 448 }, { "epoch": 0.6263659614043245, "grad_norm": 2.229960138708985, "learning_rate": 9.645764674523732e-06, "loss": 0.8848, "step": 449 }, { "epoch": 0.6277609858172518, "grad_norm": 2.2502532634333896, "learning_rate": 9.642754316348846e-06, "loss": 0.8128, "step": 450 }, { "epoch": 0.629156010230179, "grad_norm": 2.216119970276079, "learning_rate": 9.639731694752776e-06, "loss": 0.8658, "step": 451 }, { "epoch": 0.6305510346431062, "grad_norm": 2.3276266229892237, "learning_rate": 9.636696817719511e-06, "loss": 0.861, "step": 452 }, { "epoch": 0.6319460590560335, "grad_norm": 2.361087414888192, "learning_rate": 9.633649693265406e-06, "loss": 0.8556, "step": 453 }, { "epoch": 0.6333410834689607, "grad_norm": 2.351561108240417, "learning_rate": 9.630590329439169e-06, "loss": 0.8786, "step": 454 }, { "epoch": 0.634736107881888, "grad_norm": 2.3416366446054484, "learning_rate": 9.627518734321837e-06, "loss": 0.866, "step": 455 }, { "epoch": 0.6361311322948151, "grad_norm": 2.403263648061331, "learning_rate": 9.624434916026752e-06, "loss": 0.864, "step": 456 }, { "epoch": 0.6375261567077424, "grad_norm": 2.3802769787196607, "learning_rate": 9.621338882699547e-06, "loss": 0.8491, "step": 457 }, { "epoch": 0.6389211811206696, "grad_norm": 2.2599860848885855, "learning_rate": 9.618230642518117e-06, "loss": 0.7888, "step": 458 }, { "epoch": 0.6403162055335968, "grad_norm": 2.266172697496585, "learning_rate": 9.615110203692602e-06, "loss": 0.8727, "step": 459 }, { "epoch": 0.6417112299465241, "grad_norm": 2.3253080498789873, "learning_rate": 9.61197757446536e-06, "loss": 0.8237, "step": 460 }, { "epoch": 0.6431062543594513, "grad_norm": 2.1804712573158045, "learning_rate": 9.608832763110955e-06, "loss": 0.7794, "step": 461 }, { "epoch": 0.6445012787723785, "grad_norm": 2.334526513741285, "learning_rate": 9.605675777936123e-06, "loss": 0.8184, "step": 462 }, { "epoch": 0.6458963031853058, "grad_norm": 2.193613481975262, "learning_rate": 9.60250662727976e-06, "loss": 0.8436, "step": 463 }, { "epoch": 0.6472913275982329, "grad_norm": 2.265808658780435, "learning_rate": 9.599325319512893e-06, "loss": 0.8332, "step": 464 }, { "epoch": 0.6486863520111602, "grad_norm": 2.1888907496537526, "learning_rate": 9.596131863038664e-06, "loss": 0.7998, "step": 465 }, { "epoch": 0.6500813764240875, "grad_norm": 2.1270401394734795, "learning_rate": 9.592926266292305e-06, "loss": 0.8173, "step": 466 }, { "epoch": 0.6514764008370146, "grad_norm": 2.350763578553108, "learning_rate": 9.589708537741109e-06, "loss": 0.8275, "step": 467 }, { "epoch": 0.6528714252499419, "grad_norm": 2.195004096421456, "learning_rate": 9.586478685884424e-06, "loss": 0.776, "step": 468 }, { "epoch": 0.6542664496628691, "grad_norm": 2.336786358798208, "learning_rate": 9.583236719253611e-06, "loss": 0.8128, "step": 469 }, { "epoch": 0.6556614740757963, "grad_norm": 2.3712982708091723, "learning_rate": 9.579982646412039e-06, "loss": 0.8173, "step": 470 }, { "epoch": 0.6570564984887236, "grad_norm": 2.1027405982381167, "learning_rate": 9.576716475955048e-06, "loss": 0.7769, "step": 471 }, { "epoch": 0.6584515229016508, "grad_norm": 2.2961212429457363, "learning_rate": 9.573438216509937e-06, "loss": 0.811, "step": 472 }, { "epoch": 0.659846547314578, "grad_norm": 2.171407997472196, "learning_rate": 9.570147876735937e-06, "loss": 0.83, "step": 473 }, { "epoch": 0.6612415717275052, "grad_norm": 2.0991266774037447, "learning_rate": 9.566845465324185e-06, "loss": 0.7772, "step": 474 }, { "epoch": 0.6626365961404325, "grad_norm": 2.1933452488357212, "learning_rate": 9.563530990997707e-06, "loss": 0.8649, "step": 475 }, { "epoch": 0.6640316205533597, "grad_norm": 2.165649284860308, "learning_rate": 9.560204462511392e-06, "loss": 0.8278, "step": 476 }, { "epoch": 0.6654266449662869, "grad_norm": 2.263527369320285, "learning_rate": 9.556865888651965e-06, "loss": 0.8703, "step": 477 }, { "epoch": 0.6668216693792142, "grad_norm": 2.1198035264638975, "learning_rate": 9.553515278237975e-06, "loss": 0.8058, "step": 478 }, { "epoch": 0.6682166937921413, "grad_norm": 2.3241378748496713, "learning_rate": 9.550152640119757e-06, "loss": 0.8333, "step": 479 }, { "epoch": 0.6696117182050686, "grad_norm": 2.2654003985826097, "learning_rate": 9.546777983179421e-06, "loss": 0.8557, "step": 480 }, { "epoch": 0.6710067426179959, "grad_norm": 2.036009219774052, "learning_rate": 9.543391316330822e-06, "loss": 0.7241, "step": 481 }, { "epoch": 0.672401767030923, "grad_norm": 2.2147363656128247, "learning_rate": 9.539992648519538e-06, "loss": 0.8536, "step": 482 }, { "epoch": 0.6737967914438503, "grad_norm": 2.3840805150143387, "learning_rate": 9.536581988722848e-06, "loss": 0.9432, "step": 483 }, { "epoch": 0.6751918158567775, "grad_norm": 2.319245797189447, "learning_rate": 9.533159345949704e-06, "loss": 0.8767, "step": 484 }, { "epoch": 0.6765868402697047, "grad_norm": 2.1576276796446665, "learning_rate": 9.529724729240712e-06, "loss": 0.7898, "step": 485 }, { "epoch": 0.677981864682632, "grad_norm": 2.2797498653244928, "learning_rate": 9.526278147668104e-06, "loss": 0.7759, "step": 486 }, { "epoch": 0.6793768890955592, "grad_norm": 2.4421042650111366, "learning_rate": 9.522819610335721e-06, "loss": 0.8093, "step": 487 }, { "epoch": 0.6807719135084864, "grad_norm": 2.0880499931957113, "learning_rate": 9.519349126378975e-06, "loss": 0.7914, "step": 488 }, { "epoch": 0.6821669379214136, "grad_norm": 2.4602935705769418, "learning_rate": 9.515866704964846e-06, "loss": 0.8543, "step": 489 }, { "epoch": 0.6835619623343409, "grad_norm": 2.247986905180214, "learning_rate": 9.512372355291838e-06, "loss": 0.8071, "step": 490 }, { "epoch": 0.6849569867472681, "grad_norm": 2.1045884857863975, "learning_rate": 9.50886608658996e-06, "loss": 0.7482, "step": 491 }, { "epoch": 0.6863520111601953, "grad_norm": 2.274881968805188, "learning_rate": 9.505347908120712e-06, "loss": 0.8398, "step": 492 }, { "epoch": 0.6877470355731226, "grad_norm": 2.302458995266219, "learning_rate": 9.501817829177046e-06, "loss": 0.8213, "step": 493 }, { "epoch": 0.6891420599860497, "grad_norm": 2.305167017793429, "learning_rate": 9.498275859083353e-06, "loss": 0.8259, "step": 494 }, { "epoch": 0.690537084398977, "grad_norm": 2.161649043172176, "learning_rate": 9.494722007195427e-06, "loss": 0.8019, "step": 495 }, { "epoch": 0.6919321088119043, "grad_norm": 2.233432312998205, "learning_rate": 9.491156282900454e-06, "loss": 0.8167, "step": 496 }, { "epoch": 0.6933271332248314, "grad_norm": 2.1601417212055827, "learning_rate": 9.487578695616974e-06, "loss": 0.9232, "step": 497 }, { "epoch": 0.6947221576377587, "grad_norm": 2.2285469876761734, "learning_rate": 9.483989254794865e-06, "loss": 0.8368, "step": 498 }, { "epoch": 0.6961171820506858, "grad_norm": 2.051054320555537, "learning_rate": 9.480387969915318e-06, "loss": 0.721, "step": 499 }, { "epoch": 0.6975122064636131, "grad_norm": 2.2064280469470114, "learning_rate": 9.476774850490803e-06, "loss": 0.8401, "step": 500 }, { "epoch": 0.6989072308765404, "grad_norm": 2.1474575452298876, "learning_rate": 9.47314990606505e-06, "loss": 0.8201, "step": 501 }, { "epoch": 0.7003022552894675, "grad_norm": 2.2186551401693846, "learning_rate": 9.46951314621303e-06, "loss": 0.7948, "step": 502 }, { "epoch": 0.7016972797023948, "grad_norm": 2.4150941665153, "learning_rate": 9.465864580540917e-06, "loss": 0.9115, "step": 503 }, { "epoch": 0.703092304115322, "grad_norm": 2.1746602063808345, "learning_rate": 9.462204218686075e-06, "loss": 0.7979, "step": 504 }, { "epoch": 0.7044873285282492, "grad_norm": 2.224821042793652, "learning_rate": 9.458532070317021e-06, "loss": 0.8215, "step": 505 }, { "epoch": 0.7058823529411765, "grad_norm": 2.039965290237771, "learning_rate": 9.454848145133406e-06, "loss": 0.8253, "step": 506 }, { "epoch": 0.7072773773541037, "grad_norm": 2.354790044909396, "learning_rate": 9.451152452865991e-06, "loss": 0.8017, "step": 507 }, { "epoch": 0.708672401767031, "grad_norm": 2.1350613101570186, "learning_rate": 9.447445003276618e-06, "loss": 0.7478, "step": 508 }, { "epoch": 0.7100674261799581, "grad_norm": 2.3273865714152397, "learning_rate": 9.443725806158182e-06, "loss": 0.8478, "step": 509 }, { "epoch": 0.7114624505928854, "grad_norm": 2.2295753480960623, "learning_rate": 9.439994871334614e-06, "loss": 0.8073, "step": 510 }, { "epoch": 0.7128574750058126, "grad_norm": 2.369639409423001, "learning_rate": 9.43625220866084e-06, "loss": 0.802, "step": 511 }, { "epoch": 0.7142524994187398, "grad_norm": 2.296779066323846, "learning_rate": 9.432497828022775e-06, "loss": 0.7908, "step": 512 }, { "epoch": 0.7156475238316671, "grad_norm": 2.306805383382658, "learning_rate": 9.428731739337277e-06, "loss": 0.9246, "step": 513 }, { "epoch": 0.7170425482445942, "grad_norm": 2.349944015426871, "learning_rate": 9.424953952552134e-06, "loss": 0.8527, "step": 514 }, { "epoch": 0.7184375726575215, "grad_norm": 2.1991822134592103, "learning_rate": 9.421164477646031e-06, "loss": 0.7972, "step": 515 }, { "epoch": 0.7198325970704488, "grad_norm": 2.5461019064994828, "learning_rate": 9.41736332462853e-06, "loss": 0.8648, "step": 516 }, { "epoch": 0.7212276214833759, "grad_norm": 2.3625066881621866, "learning_rate": 9.413550503540039e-06, "loss": 0.8339, "step": 517 }, { "epoch": 0.7226226458963032, "grad_norm": 2.1967417878447812, "learning_rate": 9.409726024451781e-06, "loss": 0.8252, "step": 518 }, { "epoch": 0.7240176703092304, "grad_norm": 2.121757089114745, "learning_rate": 9.40588989746578e-06, "loss": 0.775, "step": 519 }, { "epoch": 0.7254126947221576, "grad_norm": 2.1774124828253596, "learning_rate": 9.402042132714817e-06, "loss": 0.7909, "step": 520 }, { "epoch": 0.7268077191350849, "grad_norm": 2.284913602340862, "learning_rate": 9.398182740362424e-06, "loss": 0.833, "step": 521 }, { "epoch": 0.7282027435480121, "grad_norm": 2.2419194899796406, "learning_rate": 9.39431173060284e-06, "loss": 0.8295, "step": 522 }, { "epoch": 0.7295977679609393, "grad_norm": 2.16186818015941, "learning_rate": 9.390429113660993e-06, "loss": 0.7905, "step": 523 }, { "epoch": 0.7309927923738665, "grad_norm": 2.399199846340772, "learning_rate": 9.38653489979247e-06, "loss": 0.8646, "step": 524 }, { "epoch": 0.7323878167867938, "grad_norm": 2.368646891632743, "learning_rate": 9.382629099283486e-06, "loss": 0.8048, "step": 525 }, { "epoch": 0.733782841199721, "grad_norm": 2.157479631529037, "learning_rate": 9.378711722450866e-06, "loss": 0.8215, "step": 526 }, { "epoch": 0.7351778656126482, "grad_norm": 2.1792217689573095, "learning_rate": 9.374782779642013e-06, "loss": 0.7836, "step": 527 }, { "epoch": 0.7365728900255755, "grad_norm": 2.196632942682262, "learning_rate": 9.370842281234876e-06, "loss": 0.8332, "step": 528 }, { "epoch": 0.7379679144385026, "grad_norm": 2.101557798203941, "learning_rate": 9.366890237637932e-06, "loss": 0.796, "step": 529 }, { "epoch": 0.7393629388514299, "grad_norm": 2.140753587077071, "learning_rate": 9.362926659290149e-06, "loss": 0.7453, "step": 530 }, { "epoch": 0.7407579632643572, "grad_norm": 2.1660720656754893, "learning_rate": 9.358951556660968e-06, "loss": 0.8028, "step": 531 }, { "epoch": 0.7421529876772843, "grad_norm": 2.2326794559322525, "learning_rate": 9.354964940250269e-06, "loss": 0.7763, "step": 532 }, { "epoch": 0.7435480120902116, "grad_norm": 2.1577597500739936, "learning_rate": 9.35096682058834e-06, "loss": 0.7912, "step": 533 }, { "epoch": 0.7449430365031388, "grad_norm": 2.188406421531907, "learning_rate": 9.346957208235857e-06, "loss": 0.8417, "step": 534 }, { "epoch": 0.746338060916066, "grad_norm": 2.1109463368547754, "learning_rate": 9.342936113783855e-06, "loss": 0.7992, "step": 535 }, { "epoch": 0.7477330853289933, "grad_norm": 2.347691994611441, "learning_rate": 9.338903547853698e-06, "loss": 0.8537, "step": 536 }, { "epoch": 0.7491281097419205, "grad_norm": 2.3545676460932494, "learning_rate": 9.334859521097046e-06, "loss": 0.8066, "step": 537 }, { "epoch": 0.7505231341548477, "grad_norm": 2.0066507797248874, "learning_rate": 9.330804044195836e-06, "loss": 0.788, "step": 538 }, { "epoch": 0.7519181585677749, "grad_norm": 2.206811829304449, "learning_rate": 9.326737127862249e-06, "loss": 0.798, "step": 539 }, { "epoch": 0.7533131829807022, "grad_norm": 2.364712378887019, "learning_rate": 9.32265878283868e-06, "loss": 0.7977, "step": 540 }, { "epoch": 0.7547082073936294, "grad_norm": 2.275553955446557, "learning_rate": 9.318569019897713e-06, "loss": 0.8479, "step": 541 }, { "epoch": 0.7561032318065566, "grad_norm": 2.2450177314420516, "learning_rate": 9.314467849842093e-06, "loss": 0.802, "step": 542 }, { "epoch": 0.7574982562194839, "grad_norm": 2.368057321045666, "learning_rate": 9.310355283504696e-06, "loss": 0.8539, "step": 543 }, { "epoch": 0.758893280632411, "grad_norm": 2.142881035592353, "learning_rate": 9.306231331748496e-06, "loss": 0.8093, "step": 544 }, { "epoch": 0.7602883050453383, "grad_norm": 2.1712844758320866, "learning_rate": 9.302096005466547e-06, "loss": 0.7281, "step": 545 }, { "epoch": 0.7616833294582656, "grad_norm": 2.4164064830437595, "learning_rate": 9.29794931558194e-06, "loss": 0.8282, "step": 546 }, { "epoch": 0.7630783538711927, "grad_norm": 2.3538818205404004, "learning_rate": 9.29379127304779e-06, "loss": 0.8432, "step": 547 }, { "epoch": 0.76447337828412, "grad_norm": 2.214871715641266, "learning_rate": 9.289621888847194e-06, "loss": 0.8642, "step": 548 }, { "epoch": 0.7658684026970471, "grad_norm": 2.0727194415210017, "learning_rate": 9.285441173993207e-06, "loss": 0.719, "step": 549 }, { "epoch": 0.7672634271099744, "grad_norm": 2.3950797215809354, "learning_rate": 9.281249139528816e-06, "loss": 0.8693, "step": 550 }, { "epoch": 0.7686584515229017, "grad_norm": 2.234753257386714, "learning_rate": 9.277045796526904e-06, "loss": 0.8236, "step": 551 }, { "epoch": 0.7700534759358288, "grad_norm": 2.1978947273278284, "learning_rate": 9.272831156090229e-06, "loss": 0.8311, "step": 552 }, { "epoch": 0.7714485003487561, "grad_norm": 2.1748260828072254, "learning_rate": 9.268605229351387e-06, "loss": 0.8083, "step": 553 }, { "epoch": 0.7728435247616833, "grad_norm": 2.273649960995849, "learning_rate": 9.264368027472785e-06, "loss": 0.8218, "step": 554 }, { "epoch": 0.7742385491746105, "grad_norm": 2.1608644896350553, "learning_rate": 9.260119561646614e-06, "loss": 0.7582, "step": 555 }, { "epoch": 0.7756335735875378, "grad_norm": 2.160288750353843, "learning_rate": 9.255859843094817e-06, "loss": 0.8247, "step": 556 }, { "epoch": 0.777028598000465, "grad_norm": 2.1816042019779798, "learning_rate": 9.25158888306906e-06, "loss": 0.7732, "step": 557 }, { "epoch": 0.7784236224133922, "grad_norm": 2.2767243104461703, "learning_rate": 9.247306692850705e-06, "loss": 0.814, "step": 558 }, { "epoch": 0.7798186468263194, "grad_norm": 2.295857518976726, "learning_rate": 9.243013283750774e-06, "loss": 0.8582, "step": 559 }, { "epoch": 0.7812136712392467, "grad_norm": 2.227650216873973, "learning_rate": 9.238708667109924e-06, "loss": 0.815, "step": 560 }, { "epoch": 0.782608695652174, "grad_norm": 2.232468495079149, "learning_rate": 9.234392854298414e-06, "loss": 0.846, "step": 561 }, { "epoch": 0.7840037200651011, "grad_norm": 2.078287087145143, "learning_rate": 9.230065856716081e-06, "loss": 0.7768, "step": 562 }, { "epoch": 0.7853987444780284, "grad_norm": 2.1194103350023923, "learning_rate": 9.225727685792302e-06, "loss": 0.8093, "step": 563 }, { "epoch": 0.7867937688909556, "grad_norm": 2.3150542675754866, "learning_rate": 9.221378352985967e-06, "loss": 0.8736, "step": 564 }, { "epoch": 0.7881887933038828, "grad_norm": 2.2972975434124097, "learning_rate": 9.217017869785453e-06, "loss": 0.7981, "step": 565 }, { "epoch": 0.7895838177168101, "grad_norm": 2.2485473560920974, "learning_rate": 9.212646247708585e-06, "loss": 0.8308, "step": 566 }, { "epoch": 0.7909788421297372, "grad_norm": 2.299695444357409, "learning_rate": 9.208263498302613e-06, "loss": 0.8582, "step": 567 }, { "epoch": 0.7923738665426645, "grad_norm": 2.207139879402085, "learning_rate": 9.203869633144182e-06, "loss": 0.8227, "step": 568 }, { "epoch": 0.7937688909555918, "grad_norm": 2.185143379644172, "learning_rate": 9.19946466383929e-06, "loss": 0.7987, "step": 569 }, { "epoch": 0.7951639153685189, "grad_norm": 2.0867990066231705, "learning_rate": 9.19504860202327e-06, "loss": 0.7518, "step": 570 }, { "epoch": 0.7965589397814462, "grad_norm": 2.179194149821272, "learning_rate": 9.19062145936076e-06, "loss": 0.7301, "step": 571 }, { "epoch": 0.7979539641943734, "grad_norm": 2.3607340125565104, "learning_rate": 9.186183247545657e-06, "loss": 0.8974, "step": 572 }, { "epoch": 0.7993489886073006, "grad_norm": 2.1649867875828948, "learning_rate": 9.181733978301103e-06, "loss": 0.7966, "step": 573 }, { "epoch": 0.8007440130202279, "grad_norm": 2.246906322617855, "learning_rate": 9.177273663379449e-06, "loss": 0.762, "step": 574 }, { "epoch": 0.8021390374331551, "grad_norm": 2.3549495148480015, "learning_rate": 9.172802314562214e-06, "loss": 0.8477, "step": 575 }, { "epoch": 0.8035340618460823, "grad_norm": 2.118707797809669, "learning_rate": 9.16831994366007e-06, "loss": 0.7396, "step": 576 }, { "epoch": 0.8049290862590095, "grad_norm": 2.214004107188421, "learning_rate": 9.1638265625128e-06, "loss": 0.7578, "step": 577 }, { "epoch": 0.8063241106719368, "grad_norm": 2.3466224304780203, "learning_rate": 9.159322182989265e-06, "loss": 0.7956, "step": 578 }, { "epoch": 0.807719135084864, "grad_norm": 2.237364591530864, "learning_rate": 9.154806816987386e-06, "loss": 0.7407, "step": 579 }, { "epoch": 0.8091141594977912, "grad_norm": 2.159116646919434, "learning_rate": 9.150280476434098e-06, "loss": 0.7715, "step": 580 }, { "epoch": 0.8105091839107185, "grad_norm": 2.1952490317516014, "learning_rate": 9.145743173285325e-06, "loss": 0.7449, "step": 581 }, { "epoch": 0.8119042083236456, "grad_norm": 2.0798580524661694, "learning_rate": 9.141194919525949e-06, "loss": 0.8077, "step": 582 }, { "epoch": 0.8132992327365729, "grad_norm": 2.307763203994994, "learning_rate": 9.136635727169776e-06, "loss": 0.8187, "step": 583 }, { "epoch": 0.8146942571495002, "grad_norm": 2.2299563737624144, "learning_rate": 9.132065608259505e-06, "loss": 0.8549, "step": 584 }, { "epoch": 0.8160892815624273, "grad_norm": 2.194241554157894, "learning_rate": 9.127484574866699e-06, "loss": 0.8155, "step": 585 }, { "epoch": 0.8174843059753546, "grad_norm": 2.1577782867023565, "learning_rate": 9.122892639091748e-06, "loss": 0.8332, "step": 586 }, { "epoch": 0.8188793303882818, "grad_norm": 2.0686875530439477, "learning_rate": 9.118289813063842e-06, "loss": 0.7642, "step": 587 }, { "epoch": 0.820274354801209, "grad_norm": 2.150110810215111, "learning_rate": 9.11367610894093e-06, "loss": 0.8038, "step": 588 }, { "epoch": 0.8216693792141363, "grad_norm": 2.147340098567537, "learning_rate": 9.109051538909707e-06, "loss": 0.7447, "step": 589 }, { "epoch": 0.8230644036270635, "grad_norm": 2.250460029110901, "learning_rate": 9.104416115185557e-06, "loss": 0.8511, "step": 590 }, { "epoch": 0.8244594280399907, "grad_norm": 2.2951707558575913, "learning_rate": 9.099769850012539e-06, "loss": 0.8622, "step": 591 }, { "epoch": 0.8258544524529179, "grad_norm": 2.1688485016498062, "learning_rate": 9.095112755663349e-06, "loss": 0.7793, "step": 592 }, { "epoch": 0.8272494768658452, "grad_norm": 2.232478837452937, "learning_rate": 9.090444844439284e-06, "loss": 0.7877, "step": 593 }, { "epoch": 0.8286445012787724, "grad_norm": 2.188895683351632, "learning_rate": 9.085766128670218e-06, "loss": 0.8474, "step": 594 }, { "epoch": 0.8300395256916996, "grad_norm": 2.3037777929433845, "learning_rate": 9.08107662071456e-06, "loss": 0.8095, "step": 595 }, { "epoch": 0.8314345501046269, "grad_norm": 2.263787870846122, "learning_rate": 9.076376332959222e-06, "loss": 0.8419, "step": 596 }, { "epoch": 0.832829574517554, "grad_norm": 2.2629592541944965, "learning_rate": 9.071665277819603e-06, "loss": 0.7635, "step": 597 }, { "epoch": 0.8342245989304813, "grad_norm": 2.177401060744137, "learning_rate": 9.066943467739529e-06, "loss": 0.8035, "step": 598 }, { "epoch": 0.8356196233434086, "grad_norm": 2.412836543422082, "learning_rate": 9.06221091519124e-06, "loss": 0.8687, "step": 599 }, { "epoch": 0.8370146477563357, "grad_norm": 2.3460319061805106, "learning_rate": 9.057467632675357e-06, "loss": 0.881, "step": 600 }, { "epoch": 0.838409672169263, "grad_norm": 2.117132327581538, "learning_rate": 9.05271363272083e-06, "loss": 0.8043, "step": 601 }, { "epoch": 0.8398046965821901, "grad_norm": 2.24068972399544, "learning_rate": 9.047948927884927e-06, "loss": 0.7976, "step": 602 }, { "epoch": 0.8411997209951174, "grad_norm": 2.230559960025214, "learning_rate": 9.043173530753196e-06, "loss": 0.7925, "step": 603 }, { "epoch": 0.8425947454080447, "grad_norm": 2.3516196160265546, "learning_rate": 9.038387453939416e-06, "loss": 0.8043, "step": 604 }, { "epoch": 0.8439897698209718, "grad_norm": 2.2237291381402557, "learning_rate": 9.033590710085584e-06, "loss": 0.7683, "step": 605 }, { "epoch": 0.8453847942338991, "grad_norm": 2.2235956917186703, "learning_rate": 9.028783311861874e-06, "loss": 0.7579, "step": 606 }, { "epoch": 0.8467798186468263, "grad_norm": 2.0935326246322865, "learning_rate": 9.023965271966595e-06, "loss": 0.7542, "step": 607 }, { "epoch": 0.8481748430597535, "grad_norm": 2.192159824092134, "learning_rate": 9.019136603126171e-06, "loss": 0.8114, "step": 608 }, { "epoch": 0.8495698674726808, "grad_norm": 2.149661416187894, "learning_rate": 9.0142973180951e-06, "loss": 0.8014, "step": 609 }, { "epoch": 0.850964891885608, "grad_norm": 2.462653571927151, "learning_rate": 9.00944742965592e-06, "loss": 0.8659, "step": 610 }, { "epoch": 0.8523599162985352, "grad_norm": 2.0384955850280253, "learning_rate": 9.004586950619182e-06, "loss": 0.7714, "step": 611 }, { "epoch": 0.8537549407114624, "grad_norm": 2.321657690945843, "learning_rate": 8.999715893823404e-06, "loss": 0.8435, "step": 612 }, { "epoch": 0.8551499651243897, "grad_norm": 2.1591163634210937, "learning_rate": 8.994834272135049e-06, "loss": 0.7982, "step": 613 }, { "epoch": 0.856544989537317, "grad_norm": 2.2230369574469333, "learning_rate": 8.989942098448485e-06, "loss": 0.7865, "step": 614 }, { "epoch": 0.8579400139502441, "grad_norm": 2.208095935822042, "learning_rate": 8.985039385685952e-06, "loss": 0.8035, "step": 615 }, { "epoch": 0.8593350383631714, "grad_norm": 2.0985976111187816, "learning_rate": 8.98012614679753e-06, "loss": 0.7269, "step": 616 }, { "epoch": 0.8607300627760985, "grad_norm": 2.35121436714255, "learning_rate": 8.975202394761098e-06, "loss": 0.7976, "step": 617 }, { "epoch": 0.8621250871890258, "grad_norm": 2.3082541381386825, "learning_rate": 8.970268142582312e-06, "loss": 0.8035, "step": 618 }, { "epoch": 0.8635201116019531, "grad_norm": 2.2333124229220656, "learning_rate": 8.965323403294553e-06, "loss": 0.7694, "step": 619 }, { "epoch": 0.8649151360148802, "grad_norm": 2.2316755443884273, "learning_rate": 8.960368189958913e-06, "loss": 0.8237, "step": 620 }, { "epoch": 0.8663101604278075, "grad_norm": 2.311959848396848, "learning_rate": 8.955402515664144e-06, "loss": 0.8223, "step": 621 }, { "epoch": 0.8677051848407347, "grad_norm": 2.2670000561230124, "learning_rate": 8.950426393526633e-06, "loss": 0.7984, "step": 622 }, { "epoch": 0.8691002092536619, "grad_norm": 2.0863045982906185, "learning_rate": 8.945439836690359e-06, "loss": 0.7796, "step": 623 }, { "epoch": 0.8704952336665892, "grad_norm": 2.1228088383873893, "learning_rate": 8.940442858326871e-06, "loss": 0.7472, "step": 624 }, { "epoch": 0.8718902580795164, "grad_norm": 2.136053741383317, "learning_rate": 8.935435471635238e-06, "loss": 0.7986, "step": 625 }, { "epoch": 0.8732852824924436, "grad_norm": 2.126129290014962, "learning_rate": 8.93041768984203e-06, "loss": 0.7513, "step": 626 }, { "epoch": 0.8746803069053708, "grad_norm": 2.259497835695178, "learning_rate": 8.925389526201264e-06, "loss": 0.8019, "step": 627 }, { "epoch": 0.8760753313182981, "grad_norm": 2.066975723218542, "learning_rate": 8.920350993994387e-06, "loss": 0.7263, "step": 628 }, { "epoch": 0.8774703557312253, "grad_norm": 2.271122807776859, "learning_rate": 8.915302106530234e-06, "loss": 0.7809, "step": 629 }, { "epoch": 0.8788653801441525, "grad_norm": 2.1538099601290344, "learning_rate": 8.91024287714499e-06, "loss": 0.7693, "step": 630 }, { "epoch": 0.8802604045570798, "grad_norm": 2.1527263725970873, "learning_rate": 8.905173319202159e-06, "loss": 0.8112, "step": 631 }, { "epoch": 0.8816554289700069, "grad_norm": 2.123231944845647, "learning_rate": 8.900093446092523e-06, "loss": 0.7846, "step": 632 }, { "epoch": 0.8830504533829342, "grad_norm": 2.0863522805853076, "learning_rate": 8.895003271234116e-06, "loss": 0.9058, "step": 633 }, { "epoch": 0.8844454777958615, "grad_norm": 2.174647085232233, "learning_rate": 8.889902808072178e-06, "loss": 0.7502, "step": 634 }, { "epoch": 0.8858405022087886, "grad_norm": 2.1179203273198395, "learning_rate": 8.884792070079128e-06, "loss": 0.7909, "step": 635 }, { "epoch": 0.8872355266217159, "grad_norm": 2.090308987529134, "learning_rate": 8.879671070754527e-06, "loss": 0.807, "step": 636 }, { "epoch": 0.8886305510346431, "grad_norm": 2.250271839513656, "learning_rate": 8.874539823625037e-06, "loss": 0.8272, "step": 637 }, { "epoch": 0.8900255754475703, "grad_norm": 2.0943896904848454, "learning_rate": 8.869398342244387e-06, "loss": 0.7752, "step": 638 }, { "epoch": 0.8914205998604976, "grad_norm": 2.4422949920700003, "learning_rate": 8.86424664019334e-06, "loss": 0.8656, "step": 639 }, { "epoch": 0.8928156242734248, "grad_norm": 2.0853910470273576, "learning_rate": 8.859084731079664e-06, "loss": 0.7947, "step": 640 }, { "epoch": 0.894210648686352, "grad_norm": 2.174716825900607, "learning_rate": 8.853912628538072e-06, "loss": 0.7896, "step": 641 }, { "epoch": 0.8956056730992792, "grad_norm": 2.206326528316615, "learning_rate": 8.84873034623022e-06, "loss": 0.7354, "step": 642 }, { "epoch": 0.8970006975122065, "grad_norm": 2.143967980297265, "learning_rate": 8.84353789784464e-06, "loss": 0.8175, "step": 643 }, { "epoch": 0.8983957219251337, "grad_norm": 2.1331840075207413, "learning_rate": 8.83833529709672e-06, "loss": 0.8116, "step": 644 }, { "epoch": 0.8997907463380609, "grad_norm": 2.090702923582713, "learning_rate": 8.833122557728667e-06, "loss": 0.779, "step": 645 }, { "epoch": 0.9011857707509882, "grad_norm": 2.2899595983793346, "learning_rate": 8.827899693509467e-06, "loss": 0.8485, "step": 646 }, { "epoch": 0.9025807951639153, "grad_norm": 2.1294473966978424, "learning_rate": 8.82266671823485e-06, "loss": 0.8277, "step": 647 }, { "epoch": 0.9039758195768426, "grad_norm": 2.0927698013606464, "learning_rate": 8.817423645727252e-06, "loss": 0.7755, "step": 648 }, { "epoch": 0.9053708439897699, "grad_norm": 2.6509393173597835, "learning_rate": 8.812170489835784e-06, "loss": 0.8882, "step": 649 }, { "epoch": 0.906765868402697, "grad_norm": 2.257769377390973, "learning_rate": 8.806907264436183e-06, "loss": 0.7663, "step": 650 }, { "epoch": 0.9081608928156243, "grad_norm": 2.0686768112539218, "learning_rate": 8.801633983430794e-06, "loss": 0.7401, "step": 651 }, { "epoch": 0.9095559172285514, "grad_norm": 1.9527827751633693, "learning_rate": 8.796350660748516e-06, "loss": 0.7491, "step": 652 }, { "epoch": 0.9109509416414787, "grad_norm": 1.9782348386809452, "learning_rate": 8.791057310344775e-06, "loss": 0.7965, "step": 653 }, { "epoch": 0.912345966054406, "grad_norm": 2.2409303687940967, "learning_rate": 8.785753946201484e-06, "loss": 0.8988, "step": 654 }, { "epoch": 0.9137409904673331, "grad_norm": 2.1366605619518864, "learning_rate": 8.780440582327005e-06, "loss": 0.7759, "step": 655 }, { "epoch": 0.9151360148802604, "grad_norm": 2.1054648306830535, "learning_rate": 8.775117232756116e-06, "loss": 0.7425, "step": 656 }, { "epoch": 0.9165310392931876, "grad_norm": 2.2642902684639745, "learning_rate": 8.769783911549968e-06, "loss": 0.833, "step": 657 }, { "epoch": 0.9179260637061148, "grad_norm": 2.120609297273577, "learning_rate": 8.764440632796055e-06, "loss": 0.7979, "step": 658 }, { "epoch": 0.9193210881190421, "grad_norm": 2.0959633211183637, "learning_rate": 8.75908741060817e-06, "loss": 0.8156, "step": 659 }, { "epoch": 0.9207161125319693, "grad_norm": 2.1202763700612928, "learning_rate": 8.75372425912637e-06, "loss": 0.7873, "step": 660 }, { "epoch": 0.9221111369448965, "grad_norm": 2.411872744178197, "learning_rate": 8.748351192516943e-06, "loss": 0.8793, "step": 661 }, { "epoch": 0.9235061613578237, "grad_norm": 2.342679726506329, "learning_rate": 8.742968224972366e-06, "loss": 0.7882, "step": 662 }, { "epoch": 0.924901185770751, "grad_norm": 2.0784385315490588, "learning_rate": 8.737575370711265e-06, "loss": 0.7883, "step": 663 }, { "epoch": 0.9262962101836782, "grad_norm": 2.1204799912916803, "learning_rate": 8.732172643978383e-06, "loss": 0.8181, "step": 664 }, { "epoch": 0.9276912345966054, "grad_norm": 2.2952701504383968, "learning_rate": 8.726760059044542e-06, "loss": 0.801, "step": 665 }, { "epoch": 0.9290862590095327, "grad_norm": 2.2341352318249292, "learning_rate": 8.721337630206603e-06, "loss": 0.8726, "step": 666 }, { "epoch": 0.93048128342246, "grad_norm": 2.176335486406989, "learning_rate": 8.715905371787426e-06, "loss": 0.837, "step": 667 }, { "epoch": 0.9318763078353871, "grad_norm": 2.1115795089554434, "learning_rate": 8.710463298135836e-06, "loss": 0.7353, "step": 668 }, { "epoch": 0.9332713322483144, "grad_norm": 2.020997296314927, "learning_rate": 8.705011423626589e-06, "loss": 0.8335, "step": 669 }, { "epoch": 0.9346663566612415, "grad_norm": 2.2936272822069577, "learning_rate": 8.699549762660318e-06, "loss": 0.7897, "step": 670 }, { "epoch": 0.9360613810741688, "grad_norm": 2.150345016304781, "learning_rate": 8.69407832966352e-06, "loss": 0.8069, "step": 671 }, { "epoch": 0.9374564054870961, "grad_norm": 2.2433294524911056, "learning_rate": 8.688597139088494e-06, "loss": 0.809, "step": 672 }, { "epoch": 0.9388514299000232, "grad_norm": 1.9358633429206555, "learning_rate": 8.683106205413316e-06, "loss": 0.7729, "step": 673 }, { "epoch": 0.9402464543129505, "grad_norm": 1.9235354526724444, "learning_rate": 8.677605543141797e-06, "loss": 0.7401, "step": 674 }, { "epoch": 0.9416414787258777, "grad_norm": 2.3758881329662156, "learning_rate": 8.672095166803445e-06, "loss": 0.8262, "step": 675 }, { "epoch": 0.9430365031388049, "grad_norm": 2.055597722835468, "learning_rate": 8.666575090953426e-06, "loss": 0.7997, "step": 676 }, { "epoch": 0.9444315275517322, "grad_norm": 2.254232354640682, "learning_rate": 8.661045330172533e-06, "loss": 0.872, "step": 677 }, { "epoch": 0.9458265519646594, "grad_norm": 2.1285947234425544, "learning_rate": 8.65550589906713e-06, "loss": 0.7675, "step": 678 }, { "epoch": 0.9472215763775866, "grad_norm": 2.042807876152001, "learning_rate": 8.649956812269134e-06, "loss": 0.7574, "step": 679 }, { "epoch": 0.9486166007905138, "grad_norm": 2.0161979673445063, "learning_rate": 8.644398084435959e-06, "loss": 0.7472, "step": 680 }, { "epoch": 0.9500116252034411, "grad_norm": 2.1460083809250268, "learning_rate": 8.63882973025049e-06, "loss": 0.7915, "step": 681 }, { "epoch": 0.9514066496163683, "grad_norm": 2.0938906686754355, "learning_rate": 8.63325176442104e-06, "loss": 0.8237, "step": 682 }, { "epoch": 0.9528016740292955, "grad_norm": 2.1331759743749283, "learning_rate": 8.627664201681305e-06, "loss": 0.7822, "step": 683 }, { "epoch": 0.9541966984422228, "grad_norm": 2.1874575912264604, "learning_rate": 8.622067056790333e-06, "loss": 0.8402, "step": 684 }, { "epoch": 0.9555917228551499, "grad_norm": 2.041452366515073, "learning_rate": 8.616460344532483e-06, "loss": 0.7999, "step": 685 }, { "epoch": 0.9569867472680772, "grad_norm": 2.0516361438430373, "learning_rate": 8.610844079717387e-06, "loss": 0.7378, "step": 686 }, { "epoch": 0.9583817716810045, "grad_norm": 2.1589162691581967, "learning_rate": 8.605218277179907e-06, "loss": 0.8367, "step": 687 }, { "epoch": 0.9597767960939316, "grad_norm": 1.846207631710272, "learning_rate": 8.599582951780095e-06, "loss": 0.7084, "step": 688 }, { "epoch": 0.9611718205068589, "grad_norm": 2.111618160699312, "learning_rate": 8.593938118403164e-06, "loss": 0.7717, "step": 689 }, { "epoch": 0.9625668449197861, "grad_norm": 2.2184821954545466, "learning_rate": 8.588283791959437e-06, "loss": 0.7956, "step": 690 }, { "epoch": 0.9639618693327133, "grad_norm": 2.1860286842566157, "learning_rate": 8.582619987384311e-06, "loss": 0.7781, "step": 691 }, { "epoch": 0.9653568937456406, "grad_norm": 1.9796500924091212, "learning_rate": 8.57694671963822e-06, "loss": 0.7474, "step": 692 }, { "epoch": 0.9667519181585678, "grad_norm": 2.228372418106956, "learning_rate": 8.571264003706596e-06, "loss": 0.8164, "step": 693 }, { "epoch": 0.968146942571495, "grad_norm": 2.012263610261973, "learning_rate": 8.565571854599825e-06, "loss": 0.7635, "step": 694 }, { "epoch": 0.9695419669844222, "grad_norm": 2.2182554295673853, "learning_rate": 8.559870287353214e-06, "loss": 0.837, "step": 695 }, { "epoch": 0.9709369913973495, "grad_norm": 2.100638717714963, "learning_rate": 8.554159317026939e-06, "loss": 0.7615, "step": 696 }, { "epoch": 0.9723320158102767, "grad_norm": 2.1135451200711612, "learning_rate": 8.548438958706022e-06, "loss": 0.8345, "step": 697 }, { "epoch": 0.9737270402232039, "grad_norm": 2.2580934549512817, "learning_rate": 8.542709227500276e-06, "loss": 0.8589, "step": 698 }, { "epoch": 0.9751220646361312, "grad_norm": 2.100187153454962, "learning_rate": 8.536970138544278e-06, "loss": 0.7767, "step": 699 }, { "epoch": 0.9765170890490583, "grad_norm": 2.0908497405841358, "learning_rate": 8.531221706997316e-06, "loss": 0.7522, "step": 700 }, { "epoch": 0.9779121134619856, "grad_norm": 2.0910790644848, "learning_rate": 8.525463948043365e-06, "loss": 0.7858, "step": 701 }, { "epoch": 0.9793071378749129, "grad_norm": 2.183136575898104, "learning_rate": 8.519696876891024e-06, "loss": 0.7376, "step": 702 }, { "epoch": 0.98070216228784, "grad_norm": 2.12495711611419, "learning_rate": 8.513920508773499e-06, "loss": 0.7928, "step": 703 }, { "epoch": 0.9820971867007673, "grad_norm": 2.1198128853357465, "learning_rate": 8.508134858948553e-06, "loss": 0.785, "step": 704 }, { "epoch": 0.9834922111136944, "grad_norm": 2.1175204727975037, "learning_rate": 8.502339942698463e-06, "loss": 0.7525, "step": 705 }, { "epoch": 0.9848872355266217, "grad_norm": 2.171988690488729, "learning_rate": 8.496535775329982e-06, "loss": 0.8338, "step": 706 }, { "epoch": 0.986282259939549, "grad_norm": 2.224602203576195, "learning_rate": 8.4907223721743e-06, "loss": 0.7751, "step": 707 }, { "epoch": 0.9876772843524761, "grad_norm": 2.0374734001125225, "learning_rate": 8.484899748587003e-06, "loss": 0.7594, "step": 708 }, { "epoch": 0.9890723087654034, "grad_norm": 2.2279026304895795, "learning_rate": 8.479067919948032e-06, "loss": 0.7706, "step": 709 }, { "epoch": 0.9904673331783306, "grad_norm": 2.2876202675935144, "learning_rate": 8.473226901661643e-06, "loss": 0.88, "step": 710 }, { "epoch": 0.9918623575912578, "grad_norm": 2.1809551862224317, "learning_rate": 8.46737670915636e-06, "loss": 0.7882, "step": 711 }, { "epoch": 0.9932573820041851, "grad_norm": 2.2296359504909544, "learning_rate": 8.46151735788495e-06, "loss": 0.7949, "step": 712 }, { "epoch": 0.9946524064171123, "grad_norm": 1.9530205160014311, "learning_rate": 8.455648863324364e-06, "loss": 0.7235, "step": 713 }, { "epoch": 0.9960474308300395, "grad_norm": 2.0305347472535207, "learning_rate": 8.449771240975707e-06, "loss": 0.772, "step": 714 }, { "epoch": 0.9974424552429667, "grad_norm": 2.089889653040237, "learning_rate": 8.443884506364192e-06, "loss": 0.7653, "step": 715 }, { "epoch": 0.998837479655894, "grad_norm": 2.2213187246939623, "learning_rate": 8.437988675039108e-06, "loss": 0.7672, "step": 716 }, { "epoch": 1.0013950244129273, "grad_norm": 2.9778651831376517, "learning_rate": 8.432083762573761e-06, "loss": 1.3734, "step": 717 }, { "epoch": 1.0027900488258545, "grad_norm": 2.1357737436527846, "learning_rate": 8.426169784565452e-06, "loss": 0.5951, "step": 718 }, { "epoch": 1.0041850732387816, "grad_norm": 2.162812219843759, "learning_rate": 8.420246756635431e-06, "loss": 0.5452, "step": 719 }, { "epoch": 1.0055800976517089, "grad_norm": 2.1717230606279765, "learning_rate": 8.414314694428842e-06, "loss": 0.6032, "step": 720 }, { "epoch": 1.0069751220646361, "grad_norm": 1.7931462562353695, "learning_rate": 8.408373613614699e-06, "loss": 0.616, "step": 721 }, { "epoch": 1.0083701464775634, "grad_norm": 2.211689871556629, "learning_rate": 8.40242352988584e-06, "loss": 0.5557, "step": 722 }, { "epoch": 1.0097651708904907, "grad_norm": 2.013782568605161, "learning_rate": 8.396464458958876e-06, "loss": 0.6373, "step": 723 }, { "epoch": 1.0111601953034177, "grad_norm": 2.312124281074968, "learning_rate": 8.390496416574166e-06, "loss": 0.5939, "step": 724 }, { "epoch": 1.012555219716345, "grad_norm": 2.336088278685003, "learning_rate": 8.384519418495755e-06, "loss": 0.6368, "step": 725 }, { "epoch": 1.0139502441292723, "grad_norm": 2.3958681560355752, "learning_rate": 8.378533480511355e-06, "loss": 0.59, "step": 726 }, { "epoch": 1.0153452685421995, "grad_norm": 2.7371332905115318, "learning_rate": 8.372538618432282e-06, "loss": 0.6265, "step": 727 }, { "epoch": 1.0167402929551268, "grad_norm": 2.514899501179025, "learning_rate": 8.366534848093434e-06, "loss": 0.6563, "step": 728 }, { "epoch": 1.0181353173680538, "grad_norm": 2.7113362767806093, "learning_rate": 8.360522185353234e-06, "loss": 0.5913, "step": 729 }, { "epoch": 1.0195303417809811, "grad_norm": 2.3119553563905995, "learning_rate": 8.354500646093592e-06, "loss": 0.5672, "step": 730 }, { "epoch": 1.0209253661939084, "grad_norm": 2.5617036280302883, "learning_rate": 8.348470246219872e-06, "loss": 0.6477, "step": 731 }, { "epoch": 1.0223203906068357, "grad_norm": 2.3922384614084815, "learning_rate": 8.342431001660826e-06, "loss": 0.5537, "step": 732 }, { "epoch": 1.023715415019763, "grad_norm": 2.1051180791848387, "learning_rate": 8.33638292836859e-06, "loss": 0.5924, "step": 733 }, { "epoch": 1.02511043943269, "grad_norm": 2.3459101129966182, "learning_rate": 8.330326042318605e-06, "loss": 0.6392, "step": 734 }, { "epoch": 1.0265054638456172, "grad_norm": 2.1898954940637556, "learning_rate": 8.324260359509594e-06, "loss": 0.6036, "step": 735 }, { "epoch": 1.0279004882585445, "grad_norm": 2.262863875727622, "learning_rate": 8.31818589596352e-06, "loss": 0.5686, "step": 736 }, { "epoch": 1.0292955126714718, "grad_norm": 2.444813348712447, "learning_rate": 8.312102667725534e-06, "loss": 0.6333, "step": 737 }, { "epoch": 1.030690537084399, "grad_norm": 2.1165074348442943, "learning_rate": 8.306010690863943e-06, "loss": 0.582, "step": 738 }, { "epoch": 1.032085561497326, "grad_norm": 2.32188248909724, "learning_rate": 8.299909981470159e-06, "loss": 0.6144, "step": 739 }, { "epoch": 1.0334805859102534, "grad_norm": 2.263877168109314, "learning_rate": 8.29380055565866e-06, "loss": 0.609, "step": 740 }, { "epoch": 1.0348756103231807, "grad_norm": 2.261974140779004, "learning_rate": 8.28768242956695e-06, "loss": 0.5674, "step": 741 }, { "epoch": 1.036270634736108, "grad_norm": 2.1267204938897017, "learning_rate": 8.281555619355515e-06, "loss": 0.6781, "step": 742 }, { "epoch": 1.0376656591490352, "grad_norm": 2.3421023645134342, "learning_rate": 8.275420141207775e-06, "loss": 0.6151, "step": 743 }, { "epoch": 1.0390606835619622, "grad_norm": 2.2365660564257106, "learning_rate": 8.269276011330048e-06, "loss": 0.5706, "step": 744 }, { "epoch": 1.0404557079748895, "grad_norm": 2.171412051842706, "learning_rate": 8.263123245951504e-06, "loss": 0.654, "step": 745 }, { "epoch": 1.0418507323878168, "grad_norm": 2.3304136182471025, "learning_rate": 8.256961861324127e-06, "loss": 0.5947, "step": 746 }, { "epoch": 1.043245756800744, "grad_norm": 2.459536182556965, "learning_rate": 8.250791873722662e-06, "loss": 0.5795, "step": 747 }, { "epoch": 1.0446407812136713, "grad_norm": 2.385814169613257, "learning_rate": 8.244613299444581e-06, "loss": 0.6587, "step": 748 }, { "epoch": 1.0460358056265984, "grad_norm": 2.336696870546526, "learning_rate": 8.238426154810035e-06, "loss": 0.5799, "step": 749 }, { "epoch": 1.0474308300395256, "grad_norm": 2.2858020943613098, "learning_rate": 8.232230456161819e-06, "loss": 0.5772, "step": 750 }, { "epoch": 1.048825854452453, "grad_norm": 2.4359529141310765, "learning_rate": 8.226026219865313e-06, "loss": 0.6181, "step": 751 }, { "epoch": 1.0502208788653802, "grad_norm": 2.363347854451932, "learning_rate": 8.219813462308458e-06, "loss": 0.5874, "step": 752 }, { "epoch": 1.0516159032783075, "grad_norm": 2.341235771781904, "learning_rate": 8.213592199901692e-06, "loss": 0.5443, "step": 753 }, { "epoch": 1.0530109276912345, "grad_norm": 2.453634908739413, "learning_rate": 8.207362449077932e-06, "loss": 0.5728, "step": 754 }, { "epoch": 1.0544059521041618, "grad_norm": 2.3088723279066, "learning_rate": 8.201124226292505e-06, "loss": 0.5903, "step": 755 }, { "epoch": 1.055800976517089, "grad_norm": 2.242800400926247, "learning_rate": 8.19487754802312e-06, "loss": 0.5556, "step": 756 }, { "epoch": 1.0571960009300163, "grad_norm": 2.438157342272562, "learning_rate": 8.18862243076982e-06, "loss": 0.5679, "step": 757 }, { "epoch": 1.0585910253429436, "grad_norm": 2.302675384750607, "learning_rate": 8.18235889105494e-06, "loss": 0.5315, "step": 758 }, { "epoch": 1.0599860497558706, "grad_norm": 2.3803035677407802, "learning_rate": 8.17608694542306e-06, "loss": 0.5905, "step": 759 }, { "epoch": 1.061381074168798, "grad_norm": 2.2196578592419947, "learning_rate": 8.169806610440966e-06, "loss": 0.6342, "step": 760 }, { "epoch": 1.0627760985817252, "grad_norm": 2.4198993631070524, "learning_rate": 8.163517902697602e-06, "loss": 0.6314, "step": 761 }, { "epoch": 1.0641711229946524, "grad_norm": 2.441474742083292, "learning_rate": 8.157220838804026e-06, "loss": 0.6321, "step": 762 }, { "epoch": 1.0655661474075797, "grad_norm": 2.3970450415481612, "learning_rate": 8.150915435393371e-06, "loss": 0.5733, "step": 763 }, { "epoch": 1.0669611718205068, "grad_norm": 2.3095554371089015, "learning_rate": 8.1446017091208e-06, "loss": 0.5289, "step": 764 }, { "epoch": 1.068356196233434, "grad_norm": 2.3081816952365597, "learning_rate": 8.138279676663458e-06, "loss": 0.6242, "step": 765 }, { "epoch": 1.0697512206463613, "grad_norm": 2.5435581479624334, "learning_rate": 8.131949354720425e-06, "loss": 0.5323, "step": 766 }, { "epoch": 1.0711462450592886, "grad_norm": 2.278991003362597, "learning_rate": 8.125610760012685e-06, "loss": 0.5802, "step": 767 }, { "epoch": 1.0725412694722158, "grad_norm": 2.389881660704999, "learning_rate": 8.11926390928307e-06, "loss": 0.6285, "step": 768 }, { "epoch": 1.073936293885143, "grad_norm": 2.3381393974719376, "learning_rate": 8.112908819296217e-06, "loss": 0.5893, "step": 769 }, { "epoch": 1.0753313182980702, "grad_norm": 2.3161211102040555, "learning_rate": 8.106545506838533e-06, "loss": 0.5949, "step": 770 }, { "epoch": 1.0767263427109974, "grad_norm": 2.1327167383141163, "learning_rate": 8.100173988718136e-06, "loss": 0.5666, "step": 771 }, { "epoch": 1.0781213671239247, "grad_norm": 2.4533028222610005, "learning_rate": 8.093794281764824e-06, "loss": 0.5978, "step": 772 }, { "epoch": 1.079516391536852, "grad_norm": 2.382375993058778, "learning_rate": 8.087406402830026e-06, "loss": 0.5838, "step": 773 }, { "epoch": 1.080911415949779, "grad_norm": 2.276201121862134, "learning_rate": 8.081010368786751e-06, "loss": 0.5989, "step": 774 }, { "epoch": 1.0823064403627063, "grad_norm": 2.16767449592397, "learning_rate": 8.074606196529554e-06, "loss": 0.5659, "step": 775 }, { "epoch": 1.0837014647756336, "grad_norm": 2.312827707104984, "learning_rate": 8.068193902974482e-06, "loss": 0.5925, "step": 776 }, { "epoch": 1.0850964891885608, "grad_norm": 2.3139980447658473, "learning_rate": 8.06177350505904e-06, "loss": 0.561, "step": 777 }, { "epoch": 1.086491513601488, "grad_norm": 2.5462517649172964, "learning_rate": 8.055345019742133e-06, "loss": 0.5671, "step": 778 }, { "epoch": 1.0878865380144151, "grad_norm": 2.4347895609453714, "learning_rate": 8.048908464004032e-06, "loss": 0.6424, "step": 779 }, { "epoch": 1.0892815624273424, "grad_norm": 2.494171910614377, "learning_rate": 8.042463854846325e-06, "loss": 0.5612, "step": 780 }, { "epoch": 1.0906765868402697, "grad_norm": 2.484431057316156, "learning_rate": 8.036011209291872e-06, "loss": 0.5845, "step": 781 }, { "epoch": 1.092071611253197, "grad_norm": 2.450853423199779, "learning_rate": 8.029550544384758e-06, "loss": 0.5755, "step": 782 }, { "epoch": 1.0934666356661242, "grad_norm": 2.298785989607675, "learning_rate": 8.023081877190257e-06, "loss": 0.6469, "step": 783 }, { "epoch": 1.0948616600790513, "grad_norm": 2.4389824741964703, "learning_rate": 8.016605224794773e-06, "loss": 0.6032, "step": 784 }, { "epoch": 1.0962566844919786, "grad_norm": 2.4246327623742765, "learning_rate": 8.010120604305806e-06, "loss": 0.5922, "step": 785 }, { "epoch": 1.0976517089049058, "grad_norm": 2.3669105554620518, "learning_rate": 8.003628032851904e-06, "loss": 0.5844, "step": 786 }, { "epoch": 1.099046733317833, "grad_norm": 2.3745263582573863, "learning_rate": 7.997127527582613e-06, "loss": 0.5814, "step": 787 }, { "epoch": 1.1004417577307604, "grad_norm": 2.7380879386078134, "learning_rate": 7.990619105668437e-06, "loss": 0.5672, "step": 788 }, { "epoch": 1.1018367821436876, "grad_norm": 1.9905194720973909, "learning_rate": 7.984102784300794e-06, "loss": 0.5831, "step": 789 }, { "epoch": 1.1032318065566147, "grad_norm": 2.2820497460113827, "learning_rate": 7.977578580691963e-06, "loss": 0.6078, "step": 790 }, { "epoch": 1.104626830969542, "grad_norm": 2.2777357839770738, "learning_rate": 7.971046512075047e-06, "loss": 0.5657, "step": 791 }, { "epoch": 1.1060218553824692, "grad_norm": 2.4271136766014143, "learning_rate": 7.964506595703923e-06, "loss": 0.5497, "step": 792 }, { "epoch": 1.1074168797953965, "grad_norm": 2.143088781873585, "learning_rate": 7.957958848853193e-06, "loss": 0.6181, "step": 793 }, { "epoch": 1.1088119042083235, "grad_norm": 2.589329538034477, "learning_rate": 7.95140328881815e-06, "loss": 0.6041, "step": 794 }, { "epoch": 1.1102069286212508, "grad_norm": 2.362486795533797, "learning_rate": 7.944839932914718e-06, "loss": 0.5838, "step": 795 }, { "epoch": 1.111601953034178, "grad_norm": 2.5359444748618816, "learning_rate": 7.938268798479419e-06, "loss": 0.5783, "step": 796 }, { "epoch": 1.1129969774471054, "grad_norm": 2.192022113020744, "learning_rate": 7.931689902869314e-06, "loss": 0.6232, "step": 797 }, { "epoch": 1.1143920018600326, "grad_norm": 2.5622539565569777, "learning_rate": 7.925103263461971e-06, "loss": 0.6113, "step": 798 }, { "epoch": 1.11578702627296, "grad_norm": 2.1784999516213537, "learning_rate": 7.91850889765541e-06, "loss": 0.5754, "step": 799 }, { "epoch": 1.117182050685887, "grad_norm": 2.339608672399465, "learning_rate": 7.91190682286806e-06, "loss": 0.5288, "step": 800 }, { "epoch": 1.1185770750988142, "grad_norm": 2.340766183951467, "learning_rate": 7.905297056538713e-06, "loss": 0.5728, "step": 801 }, { "epoch": 1.1199720995117415, "grad_norm": 2.2909344135416823, "learning_rate": 7.898679616126474e-06, "loss": 0.5784, "step": 802 }, { "epoch": 1.1213671239246688, "grad_norm": 2.2069436007741943, "learning_rate": 7.892054519110726e-06, "loss": 0.597, "step": 803 }, { "epoch": 1.1227621483375958, "grad_norm": 2.3970714680305, "learning_rate": 7.885421782991064e-06, "loss": 0.6355, "step": 804 }, { "epoch": 1.124157172750523, "grad_norm": 2.3576340001664375, "learning_rate": 7.878781425287277e-06, "loss": 0.5481, "step": 805 }, { "epoch": 1.1255521971634503, "grad_norm": 2.366649272037593, "learning_rate": 7.872133463539274e-06, "loss": 0.5327, "step": 806 }, { "epoch": 1.1269472215763776, "grad_norm": 2.2901860584406206, "learning_rate": 7.86547791530705e-06, "loss": 0.6529, "step": 807 }, { "epoch": 1.1283422459893049, "grad_norm": 2.3092783455779453, "learning_rate": 7.858814798170644e-06, "loss": 0.6074, "step": 808 }, { "epoch": 1.1297372704022322, "grad_norm": 2.2631778849119004, "learning_rate": 7.852144129730087e-06, "loss": 0.5716, "step": 809 }, { "epoch": 1.1311322948151592, "grad_norm": 2.496762689724097, "learning_rate": 7.84546592760535e-06, "loss": 0.5899, "step": 810 }, { "epoch": 1.1325273192280865, "grad_norm": 2.1809466172112737, "learning_rate": 7.83878020943631e-06, "loss": 0.6312, "step": 811 }, { "epoch": 1.1339223436410137, "grad_norm": 2.577926229644585, "learning_rate": 7.832086992882697e-06, "loss": 0.5966, "step": 812 }, { "epoch": 1.135317368053941, "grad_norm": 2.1517652626645147, "learning_rate": 7.825386295624043e-06, "loss": 0.5766, "step": 813 }, { "epoch": 1.136712392466868, "grad_norm": 2.3237241447034434, "learning_rate": 7.818678135359641e-06, "loss": 0.6027, "step": 814 }, { "epoch": 1.1381074168797953, "grad_norm": 2.355150473445898, "learning_rate": 7.811962529808499e-06, "loss": 0.5822, "step": 815 }, { "epoch": 1.1395024412927226, "grad_norm": 2.3982926796508264, "learning_rate": 7.805239496709291e-06, "loss": 0.5488, "step": 816 }, { "epoch": 1.1408974657056499, "grad_norm": 2.2748174483034123, "learning_rate": 7.798509053820305e-06, "loss": 0.5593, "step": 817 }, { "epoch": 1.1422924901185771, "grad_norm": 2.303403868177857, "learning_rate": 7.79177121891941e-06, "loss": 0.5771, "step": 818 }, { "epoch": 1.1436875145315044, "grad_norm": 2.3806834964019647, "learning_rate": 7.785026009803993e-06, "loss": 0.6316, "step": 819 }, { "epoch": 1.1450825389444315, "grad_norm": 2.5362905850163306, "learning_rate": 7.778273444290921e-06, "loss": 0.6154, "step": 820 }, { "epoch": 1.1464775633573587, "grad_norm": 2.5173483872868814, "learning_rate": 7.771513540216496e-06, "loss": 0.5989, "step": 821 }, { "epoch": 1.147872587770286, "grad_norm": 2.355231441970645, "learning_rate": 7.764746315436399e-06, "loss": 0.5599, "step": 822 }, { "epoch": 1.1492676121832133, "grad_norm": 2.40368970561629, "learning_rate": 7.75797178782565e-06, "loss": 0.5725, "step": 823 }, { "epoch": 1.1506626365961403, "grad_norm": 2.0555825465033535, "learning_rate": 7.751189975278561e-06, "loss": 0.5284, "step": 824 }, { "epoch": 1.1520576610090676, "grad_norm": 2.3093980423779947, "learning_rate": 7.744400895708683e-06, "loss": 0.5868, "step": 825 }, { "epoch": 1.1534526854219949, "grad_norm": 2.388794599447151, "learning_rate": 7.737604567048766e-06, "loss": 0.6705, "step": 826 }, { "epoch": 1.1548477098349221, "grad_norm": 2.547423436538337, "learning_rate": 7.730801007250704e-06, "loss": 0.5653, "step": 827 }, { "epoch": 1.1562427342478494, "grad_norm": 2.2576583038939853, "learning_rate": 7.72399023428549e-06, "loss": 0.5665, "step": 828 }, { "epoch": 1.1576377586607767, "grad_norm": 2.1951247088619454, "learning_rate": 7.717172266143178e-06, "loss": 0.6708, "step": 829 }, { "epoch": 1.1590327830737037, "grad_norm": 2.523896688602727, "learning_rate": 7.710347120832821e-06, "loss": 0.5828, "step": 830 }, { "epoch": 1.160427807486631, "grad_norm": 2.407006758199056, "learning_rate": 7.703514816382432e-06, "loss": 0.5533, "step": 831 }, { "epoch": 1.1618228318995583, "grad_norm": 2.3087350336193406, "learning_rate": 7.696675370838929e-06, "loss": 0.5704, "step": 832 }, { "epoch": 1.1632178563124855, "grad_norm": 2.4331727016432296, "learning_rate": 7.689828802268102e-06, "loss": 0.5596, "step": 833 }, { "epoch": 1.1646128807254126, "grad_norm": 2.0100770850605696, "learning_rate": 7.682975128754548e-06, "loss": 0.5686, "step": 834 }, { "epoch": 1.1660079051383399, "grad_norm": 2.3494180690076214, "learning_rate": 7.676114368401635e-06, "loss": 0.6264, "step": 835 }, { "epoch": 1.1674029295512671, "grad_norm": 2.4611109219395955, "learning_rate": 7.66924653933145e-06, "loss": 0.6, "step": 836 }, { "epoch": 1.1687979539641944, "grad_norm": 2.341538519485921, "learning_rate": 7.662371659684749e-06, "loss": 0.6161, "step": 837 }, { "epoch": 1.1701929783771217, "grad_norm": 2.369721296084761, "learning_rate": 7.655489747620913e-06, "loss": 0.5792, "step": 838 }, { "epoch": 1.171588002790049, "grad_norm": 2.33807245031926, "learning_rate": 7.648600821317901e-06, "loss": 0.6223, "step": 839 }, { "epoch": 1.172983027202976, "grad_norm": 2.4316996570117575, "learning_rate": 7.641704898972194e-06, "loss": 0.5908, "step": 840 }, { "epoch": 1.1743780516159033, "grad_norm": 2.3415494794619267, "learning_rate": 7.634801998798755e-06, "loss": 0.6335, "step": 841 }, { "epoch": 1.1757730760288305, "grad_norm": 2.377777899516606, "learning_rate": 7.6278921390309834e-06, "loss": 0.526, "step": 842 }, { "epoch": 1.1771681004417578, "grad_norm": 2.12820061508409, "learning_rate": 7.620975337920653e-06, "loss": 0.5694, "step": 843 }, { "epoch": 1.1785631248546848, "grad_norm": 2.3129725251092417, "learning_rate": 7.6140516137378786e-06, "loss": 0.527, "step": 844 }, { "epoch": 1.179958149267612, "grad_norm": 2.244766559136893, "learning_rate": 7.607120984771058e-06, "loss": 0.5802, "step": 845 }, { "epoch": 1.1813531736805394, "grad_norm": 2.222074366301974, "learning_rate": 7.600183469326829e-06, "loss": 0.5711, "step": 846 }, { "epoch": 1.1827481980934667, "grad_norm": 2.3583494597434527, "learning_rate": 7.593239085730022e-06, "loss": 0.6363, "step": 847 }, { "epoch": 1.184143222506394, "grad_norm": 2.575014019174973, "learning_rate": 7.586287852323605e-06, "loss": 0.6161, "step": 848 }, { "epoch": 1.1855382469193212, "grad_norm": 2.4615824854539143, "learning_rate": 7.579329787468639e-06, "loss": 0.5525, "step": 849 }, { "epoch": 1.1869332713322482, "grad_norm": 2.3887289430353764, "learning_rate": 7.572364909544235e-06, "loss": 0.6083, "step": 850 }, { "epoch": 1.1883282957451755, "grad_norm": 2.280000581099081, "learning_rate": 7.565393236947494e-06, "loss": 0.5307, "step": 851 }, { "epoch": 1.1897233201581028, "grad_norm": 2.471973055640591, "learning_rate": 7.558414788093467e-06, "loss": 0.5922, "step": 852 }, { "epoch": 1.19111834457103, "grad_norm": 2.4337757817603882, "learning_rate": 7.551429581415104e-06, "loss": 0.6183, "step": 853 }, { "epoch": 1.192513368983957, "grad_norm": 2.4677312263728153, "learning_rate": 7.5444376353632064e-06, "loss": 0.5812, "step": 854 }, { "epoch": 1.1939083933968844, "grad_norm": 2.2146392699550765, "learning_rate": 7.537438968406372e-06, "loss": 0.6109, "step": 855 }, { "epoch": 1.1953034178098116, "grad_norm": 2.4024634281882387, "learning_rate": 7.530433599030962e-06, "loss": 0.5658, "step": 856 }, { "epoch": 1.196698442222739, "grad_norm": 2.350815587633893, "learning_rate": 7.5234215457410255e-06, "loss": 0.6112, "step": 857 }, { "epoch": 1.1980934666356662, "grad_norm": 2.421684381729925, "learning_rate": 7.516402827058283e-06, "loss": 0.6358, "step": 858 }, { "epoch": 1.1994884910485935, "grad_norm": 2.235374514768847, "learning_rate": 7.509377461522049e-06, "loss": 0.5984, "step": 859 }, { "epoch": 1.2008835154615205, "grad_norm": 2.675219392413384, "learning_rate": 7.502345467689202e-06, "loss": 0.6164, "step": 860 }, { "epoch": 1.2022785398744478, "grad_norm": 2.5343435550279554, "learning_rate": 7.4953068641341255e-06, "loss": 0.5694, "step": 861 }, { "epoch": 1.203673564287375, "grad_norm": 2.5386775212799075, "learning_rate": 7.488261669448662e-06, "loss": 0.6475, "step": 862 }, { "epoch": 1.2050685887003023, "grad_norm": 2.324159299219534, "learning_rate": 7.4812099022420636e-06, "loss": 0.5668, "step": 863 }, { "epoch": 1.2064636131132294, "grad_norm": 2.4176186705304574, "learning_rate": 7.474151581140947e-06, "loss": 0.56, "step": 864 }, { "epoch": 1.2078586375261566, "grad_norm": 2.46824418640957, "learning_rate": 7.4670867247892346e-06, "loss": 0.5734, "step": 865 }, { "epoch": 1.209253661939084, "grad_norm": 2.1865799047253933, "learning_rate": 7.460015351848115e-06, "loss": 0.5973, "step": 866 }, { "epoch": 1.2106486863520112, "grad_norm": 2.445682197703567, "learning_rate": 7.4529374809959895e-06, "loss": 0.5184, "step": 867 }, { "epoch": 1.2120437107649384, "grad_norm": 2.3138527495899917, "learning_rate": 7.445853130928422e-06, "loss": 0.6235, "step": 868 }, { "epoch": 1.2134387351778657, "grad_norm": 2.4820579663006153, "learning_rate": 7.438762320358089e-06, "loss": 0.6317, "step": 869 }, { "epoch": 1.2148337595907928, "grad_norm": 2.3628054612464036, "learning_rate": 7.431665068014737e-06, "loss": 0.5721, "step": 870 }, { "epoch": 1.21622878400372, "grad_norm": 2.263272014252997, "learning_rate": 7.424561392645122e-06, "loss": 0.5722, "step": 871 }, { "epoch": 1.2176238084166473, "grad_norm": 2.3766419536769026, "learning_rate": 7.417451313012971e-06, "loss": 0.611, "step": 872 }, { "epoch": 1.2190188328295746, "grad_norm": 2.588525586548804, "learning_rate": 7.410334847898921e-06, "loss": 0.6226, "step": 873 }, { "epoch": 1.2204138572425016, "grad_norm": 2.3950092278600525, "learning_rate": 7.403212016100484e-06, "loss": 0.5598, "step": 874 }, { "epoch": 1.221808881655429, "grad_norm": 2.381836331617083, "learning_rate": 7.396082836431981e-06, "loss": 0.5613, "step": 875 }, { "epoch": 1.2232039060683562, "grad_norm": 2.2740315820853527, "learning_rate": 7.388947327724506e-06, "loss": 0.5639, "step": 876 }, { "epoch": 1.2245989304812834, "grad_norm": 2.33002375304098, "learning_rate": 7.3818055088258676e-06, "loss": 0.5738, "step": 877 }, { "epoch": 1.2259939548942107, "grad_norm": 2.320747200335368, "learning_rate": 7.374657398600542e-06, "loss": 0.5524, "step": 878 }, { "epoch": 1.227388979307138, "grad_norm": 2.260470393935348, "learning_rate": 7.367503015929627e-06, "loss": 0.5768, "step": 879 }, { "epoch": 1.228784003720065, "grad_norm": 2.2939298491200337, "learning_rate": 7.3603423797107845e-06, "loss": 0.5551, "step": 880 }, { "epoch": 1.2301790281329923, "grad_norm": 2.433065183526248, "learning_rate": 7.353175508858195e-06, "loss": 0.6063, "step": 881 }, { "epoch": 1.2315740525459196, "grad_norm": 2.4740794969722755, "learning_rate": 7.3460024223025095e-06, "loss": 0.6281, "step": 882 }, { "epoch": 1.2329690769588468, "grad_norm": 1.9521752910755077, "learning_rate": 7.338823138990796e-06, "loss": 0.5452, "step": 883 }, { "epoch": 1.234364101371774, "grad_norm": 2.2672128366642776, "learning_rate": 7.33163767788649e-06, "loss": 0.5491, "step": 884 }, { "epoch": 1.2357591257847012, "grad_norm": 2.338187756813062, "learning_rate": 7.324446057969346e-06, "loss": 0.6297, "step": 885 }, { "epoch": 1.2371541501976284, "grad_norm": 2.5130433929079565, "learning_rate": 7.317248298235387e-06, "loss": 0.5918, "step": 886 }, { "epoch": 1.2385491746105557, "grad_norm": 2.231440601118177, "learning_rate": 7.3100444176968514e-06, "loss": 0.5516, "step": 887 }, { "epoch": 1.239944199023483, "grad_norm": 2.4992951488134025, "learning_rate": 7.302834435382147e-06, "loss": 0.6418, "step": 888 }, { "epoch": 1.2413392234364102, "grad_norm": 2.328228085313369, "learning_rate": 7.2956183703358e-06, "loss": 0.5199, "step": 889 }, { "epoch": 1.2427342478493373, "grad_norm": 2.239885086826493, "learning_rate": 7.288396241618401e-06, "loss": 0.5757, "step": 890 }, { "epoch": 1.2441292722622646, "grad_norm": 2.5409620463512463, "learning_rate": 7.281168068306559e-06, "loss": 0.6205, "step": 891 }, { "epoch": 1.2455242966751918, "grad_norm": 2.335358380869977, "learning_rate": 7.2739338694928485e-06, "loss": 0.5434, "step": 892 }, { "epoch": 1.246919321088119, "grad_norm": 2.3665742827513685, "learning_rate": 7.266693664285761e-06, "loss": 0.5878, "step": 893 }, { "epoch": 1.2483143455010464, "grad_norm": 2.3454306571434844, "learning_rate": 7.259447471809651e-06, "loss": 0.5732, "step": 894 }, { "epoch": 1.2497093699139734, "grad_norm": 2.253081502723134, "learning_rate": 7.252195311204689e-06, "loss": 0.6178, "step": 895 }, { "epoch": 1.2511043943269007, "grad_norm": 2.6185151941690794, "learning_rate": 7.244937201626812e-06, "loss": 0.6234, "step": 896 }, { "epoch": 1.252499418739828, "grad_norm": 2.3215615729542796, "learning_rate": 7.237673162247667e-06, "loss": 0.6249, "step": 897 }, { "epoch": 1.2538944431527552, "grad_norm": 2.4402645469604693, "learning_rate": 7.230403212254566e-06, "loss": 0.5538, "step": 898 }, { "epoch": 1.2552894675656825, "grad_norm": 2.2911975465733248, "learning_rate": 7.223127370850433e-06, "loss": 0.6162, "step": 899 }, { "epoch": 1.2566844919786098, "grad_norm": 2.404840951932685, "learning_rate": 7.215845657253755e-06, "loss": 0.6143, "step": 900 }, { "epoch": 1.2580795163915368, "grad_norm": 2.37583062964499, "learning_rate": 7.208558090698528e-06, "loss": 0.5934, "step": 901 }, { "epoch": 1.259474540804464, "grad_norm": 2.3139642621949883, "learning_rate": 7.2012646904342065e-06, "loss": 0.6116, "step": 902 }, { "epoch": 1.2608695652173914, "grad_norm": 2.3353480713811834, "learning_rate": 7.193965475725659e-06, "loss": 0.5515, "step": 903 }, { "epoch": 1.2622645896303184, "grad_norm": 2.222843520124317, "learning_rate": 7.186660465853111e-06, "loss": 0.5919, "step": 904 }, { "epoch": 1.2636596140432457, "grad_norm": 2.3102819505863823, "learning_rate": 7.1793496801120885e-06, "loss": 0.5641, "step": 905 }, { "epoch": 1.265054638456173, "grad_norm": 2.4634650571884342, "learning_rate": 7.172033137813387e-06, "loss": 0.6013, "step": 906 }, { "epoch": 1.2664496628691002, "grad_norm": 2.622108518489068, "learning_rate": 7.1647108582829924e-06, "loss": 0.6254, "step": 907 }, { "epoch": 1.2678446872820275, "grad_norm": 2.351963296439086, "learning_rate": 7.157382860862059e-06, "loss": 0.61, "step": 908 }, { "epoch": 1.2692397116949548, "grad_norm": 2.422474081079625, "learning_rate": 7.1500491649068345e-06, "loss": 0.5868, "step": 909 }, { "epoch": 1.270634736107882, "grad_norm": 2.491586982168358, "learning_rate": 7.1427097897886225e-06, "loss": 0.5849, "step": 910 }, { "epoch": 1.272029760520809, "grad_norm": 2.179812013406335, "learning_rate": 7.135364754893729e-06, "loss": 0.5857, "step": 911 }, { "epoch": 1.2734247849337363, "grad_norm": 2.346209035468536, "learning_rate": 7.128014079623408e-06, "loss": 0.5872, "step": 912 }, { "epoch": 1.2748198093466636, "grad_norm": 2.5345882601293632, "learning_rate": 7.120657783393809e-06, "loss": 0.5621, "step": 913 }, { "epoch": 1.2762148337595907, "grad_norm": 2.256524834892305, "learning_rate": 7.113295885635936e-06, "loss": 0.5636, "step": 914 }, { "epoch": 1.277609858172518, "grad_norm": 2.3452662297514877, "learning_rate": 7.105928405795584e-06, "loss": 0.6401, "step": 915 }, { "epoch": 1.2790048825854452, "grad_norm": 2.4055060460826767, "learning_rate": 7.098555363333289e-06, "loss": 0.5733, "step": 916 }, { "epoch": 1.2803999069983725, "grad_norm": 2.2779695838338627, "learning_rate": 7.091176777724291e-06, "loss": 0.5978, "step": 917 }, { "epoch": 1.2817949314112997, "grad_norm": 2.336689140121667, "learning_rate": 7.083792668458463e-06, "loss": 0.5798, "step": 918 }, { "epoch": 1.283189955824227, "grad_norm": 2.284027223834799, "learning_rate": 7.076403055040271e-06, "loss": 0.5666, "step": 919 }, { "epoch": 1.2845849802371543, "grad_norm": 2.298134400526252, "learning_rate": 7.069007956988718e-06, "loss": 0.5945, "step": 920 }, { "epoch": 1.2859800046500813, "grad_norm": 2.372669802919968, "learning_rate": 7.061607393837295e-06, "loss": 0.6112, "step": 921 }, { "epoch": 1.2873750290630086, "grad_norm": 2.465151416090266, "learning_rate": 7.0542013851339316e-06, "loss": 0.5193, "step": 922 }, { "epoch": 1.2887700534759359, "grad_norm": 2.258606688850305, "learning_rate": 7.04678995044094e-06, "loss": 0.543, "step": 923 }, { "epoch": 1.290165077888863, "grad_norm": 2.3135429126929217, "learning_rate": 7.039373109334957e-06, "loss": 0.588, "step": 924 }, { "epoch": 1.2915601023017902, "grad_norm": 2.328341067868312, "learning_rate": 7.031950881406913e-06, "loss": 0.5868, "step": 925 }, { "epoch": 1.2929551267147175, "grad_norm": 2.4231366762938062, "learning_rate": 7.024523286261959e-06, "loss": 0.5627, "step": 926 }, { "epoch": 1.2943501511276447, "grad_norm": 2.387028660157589, "learning_rate": 7.017090343519421e-06, "loss": 0.5951, "step": 927 }, { "epoch": 1.295745175540572, "grad_norm": 2.5975491799373827, "learning_rate": 7.009652072812758e-06, "loss": 0.5647, "step": 928 }, { "epoch": 1.2971401999534993, "grad_norm": 2.253225942456297, "learning_rate": 7.0022084937895e-06, "loss": 0.5981, "step": 929 }, { "epoch": 1.2985352243664265, "grad_norm": 2.569094240919765, "learning_rate": 6.994759626111189e-06, "loss": 0.6103, "step": 930 }, { "epoch": 1.2999302487793536, "grad_norm": 2.464842980140734, "learning_rate": 6.987305489453352e-06, "loss": 0.6566, "step": 931 }, { "epoch": 1.3013252731922809, "grad_norm": 2.5659983008507723, "learning_rate": 6.979846103505423e-06, "loss": 0.6084, "step": 932 }, { "epoch": 1.3027202976052081, "grad_norm": 2.2349870187733836, "learning_rate": 6.972381487970702e-06, "loss": 0.5592, "step": 933 }, { "epoch": 1.3041153220181352, "grad_norm": 2.4079519545453976, "learning_rate": 6.964911662566309e-06, "loss": 0.6355, "step": 934 }, { "epoch": 1.3055103464310625, "grad_norm": 2.5556981332669535, "learning_rate": 6.957436647023117e-06, "loss": 0.6081, "step": 935 }, { "epoch": 1.3069053708439897, "grad_norm": 2.2675340098098524, "learning_rate": 6.949956461085714e-06, "loss": 0.6026, "step": 936 }, { "epoch": 1.308300395256917, "grad_norm": 2.4183880454234132, "learning_rate": 6.942471124512346e-06, "loss": 0.5707, "step": 937 }, { "epoch": 1.3096954196698443, "grad_norm": 2.248283406540946, "learning_rate": 6.934980657074859e-06, "loss": 0.5171, "step": 938 }, { "epoch": 1.3110904440827715, "grad_norm": 2.24109968090452, "learning_rate": 6.9274850785586526e-06, "loss": 0.5988, "step": 939 }, { "epoch": 1.3124854684956988, "grad_norm": 2.3317817102662852, "learning_rate": 6.919984408762632e-06, "loss": 0.5678, "step": 940 }, { "epoch": 1.3138804929086259, "grad_norm": 2.295272201346642, "learning_rate": 6.9124786674991465e-06, "loss": 0.5815, "step": 941 }, { "epoch": 1.3152755173215531, "grad_norm": 2.2352759053204547, "learning_rate": 6.90496787459394e-06, "loss": 0.5768, "step": 942 }, { "epoch": 1.3166705417344804, "grad_norm": 2.4385069108997772, "learning_rate": 6.897452049886103e-06, "loss": 0.5635, "step": 943 }, { "epoch": 1.3180655661474074, "grad_norm": 2.0462483788173023, "learning_rate": 6.889931213228015e-06, "loss": 0.6569, "step": 944 }, { "epoch": 1.3194605905603347, "grad_norm": 2.607311071377726, "learning_rate": 6.882405384485294e-06, "loss": 0.6227, "step": 945 }, { "epoch": 1.320855614973262, "grad_norm": 2.3069543450378776, "learning_rate": 6.874874583536748e-06, "loss": 0.6048, "step": 946 }, { "epoch": 1.3222506393861893, "grad_norm": 2.4202317388198, "learning_rate": 6.867338830274312e-06, "loss": 0.6398, "step": 947 }, { "epoch": 1.3236456637991165, "grad_norm": 2.701796963095549, "learning_rate": 6.8597981446030095e-06, "loss": 0.634, "step": 948 }, { "epoch": 1.3250406882120438, "grad_norm": 2.3710411394708273, "learning_rate": 6.852252546440885e-06, "loss": 0.6108, "step": 949 }, { "epoch": 1.326435712624971, "grad_norm": 2.4591543126641757, "learning_rate": 6.844702055718964e-06, "loss": 0.5708, "step": 950 }, { "epoch": 1.3278307370378981, "grad_norm": 2.2384618557245073, "learning_rate": 6.837146692381197e-06, "loss": 0.5957, "step": 951 }, { "epoch": 1.3292257614508254, "grad_norm": 2.272577218828042, "learning_rate": 6.8295864763843965e-06, "loss": 0.6006, "step": 952 }, { "epoch": 1.3306207858637527, "grad_norm": 2.241443755720545, "learning_rate": 6.822021427698201e-06, "loss": 0.5779, "step": 953 }, { "epoch": 1.33201581027668, "grad_norm": 2.403816267479699, "learning_rate": 6.814451566305014e-06, "loss": 0.6281, "step": 954 }, { "epoch": 1.333410834689607, "grad_norm": 2.5272853261950665, "learning_rate": 6.806876912199945e-06, "loss": 0.61, "step": 955 }, { "epoch": 1.3348058591025342, "grad_norm": 2.446190039132259, "learning_rate": 6.7992974853907655e-06, "loss": 0.593, "step": 956 }, { "epoch": 1.3362008835154615, "grad_norm": 2.3113608408413935, "learning_rate": 6.791713305897861e-06, "loss": 0.598, "step": 957 }, { "epoch": 1.3375959079283888, "grad_norm": 2.194947708036908, "learning_rate": 6.78412439375416e-06, "loss": 0.6002, "step": 958 }, { "epoch": 1.338990932341316, "grad_norm": 2.407883829062523, "learning_rate": 6.776530769005099e-06, "loss": 0.5509, "step": 959 }, { "epoch": 1.3403859567542433, "grad_norm": 2.4060961947196455, "learning_rate": 6.768932451708557e-06, "loss": 0.5712, "step": 960 }, { "epoch": 1.3417809811671704, "grad_norm": 2.5027090479995207, "learning_rate": 6.761329461934814e-06, "loss": 0.5819, "step": 961 }, { "epoch": 1.3431760055800976, "grad_norm": 2.421116861843316, "learning_rate": 6.753721819766489e-06, "loss": 0.5962, "step": 962 }, { "epoch": 1.344571029993025, "grad_norm": 2.6339904648203025, "learning_rate": 6.746109545298488e-06, "loss": 0.6129, "step": 963 }, { "epoch": 1.3459660544059522, "grad_norm": 2.45844738464618, "learning_rate": 6.738492658637957e-06, "loss": 0.5629, "step": 964 }, { "epoch": 1.3473610788188792, "grad_norm": 2.076246802976755, "learning_rate": 6.730871179904218e-06, "loss": 0.653, "step": 965 }, { "epoch": 1.3487561032318065, "grad_norm": 2.4572190054921217, "learning_rate": 6.723245129228732e-06, "loss": 0.5394, "step": 966 }, { "epoch": 1.3501511276447338, "grad_norm": 2.4250919473341104, "learning_rate": 6.7156145267550275e-06, "loss": 0.5923, "step": 967 }, { "epoch": 1.351546152057661, "grad_norm": 2.4929030091496536, "learning_rate": 6.707979392638663e-06, "loss": 0.5847, "step": 968 }, { "epoch": 1.3529411764705883, "grad_norm": 2.3981186181779552, "learning_rate": 6.700339747047162e-06, "loss": 0.6013, "step": 969 }, { "epoch": 1.3543362008835156, "grad_norm": 2.2707353243708357, "learning_rate": 6.692695610159966e-06, "loss": 0.5848, "step": 970 }, { "epoch": 1.3557312252964426, "grad_norm": 2.1905715988849783, "learning_rate": 6.685047002168382e-06, "loss": 0.566, "step": 971 }, { "epoch": 1.35712624970937, "grad_norm": 2.3605362194781554, "learning_rate": 6.677393943275525e-06, "loss": 0.5789, "step": 972 }, { "epoch": 1.3585212741222972, "grad_norm": 2.37720244561515, "learning_rate": 6.669736453696266e-06, "loss": 0.5741, "step": 973 }, { "epoch": 1.3599162985352244, "grad_norm": 2.3172332279978365, "learning_rate": 6.66207455365718e-06, "loss": 0.5445, "step": 974 }, { "epoch": 1.3613113229481515, "grad_norm": 2.2467889307353874, "learning_rate": 6.6544082633964955e-06, "loss": 0.548, "step": 975 }, { "epoch": 1.3627063473610788, "grad_norm": 2.470183225613905, "learning_rate": 6.646737603164031e-06, "loss": 0.6273, "step": 976 }, { "epoch": 1.364101371774006, "grad_norm": 2.38027627995412, "learning_rate": 6.639062593221152e-06, "loss": 0.6452, "step": 977 }, { "epoch": 1.3654963961869333, "grad_norm": 2.6786050455856163, "learning_rate": 6.6313832538407106e-06, "loss": 0.5866, "step": 978 }, { "epoch": 1.3668914205998606, "grad_norm": 2.2270155489086574, "learning_rate": 6.623699605306999e-06, "loss": 0.6201, "step": 979 }, { "epoch": 1.3682864450127878, "grad_norm": 2.604368552304326, "learning_rate": 6.6160116679156874e-06, "loss": 0.5684, "step": 980 }, { "epoch": 1.369681469425715, "grad_norm": 2.553437258158537, "learning_rate": 6.608319461973778e-06, "loss": 0.5852, "step": 981 }, { "epoch": 1.3710764938386422, "grad_norm": 2.368614835108414, "learning_rate": 6.6006230077995424e-06, "loss": 0.5853, "step": 982 }, { "epoch": 1.3724715182515694, "grad_norm": 2.0833659659055037, "learning_rate": 6.592922325722483e-06, "loss": 0.5845, "step": 983 }, { "epoch": 1.3738665426644967, "grad_norm": 2.4893819932850993, "learning_rate": 6.58521743608326e-06, "loss": 0.6164, "step": 984 }, { "epoch": 1.3752615670774238, "grad_norm": 2.478330447977467, "learning_rate": 6.577508359233653e-06, "loss": 0.5917, "step": 985 }, { "epoch": 1.376656591490351, "grad_norm": 2.480170361497358, "learning_rate": 6.569795115536502e-06, "loss": 0.5892, "step": 986 }, { "epoch": 1.3780516159032783, "grad_norm": 2.450695254602758, "learning_rate": 6.562077725365648e-06, "loss": 0.6127, "step": 987 }, { "epoch": 1.3794466403162056, "grad_norm": 2.197461183086679, "learning_rate": 6.554356209105892e-06, "loss": 0.537, "step": 988 }, { "epoch": 1.3808416647291328, "grad_norm": 2.3762992909207026, "learning_rate": 6.54663058715293e-06, "loss": 0.6259, "step": 989 }, { "epoch": 1.38223668914206, "grad_norm": 2.337587634187039, "learning_rate": 6.538900879913301e-06, "loss": 0.599, "step": 990 }, { "epoch": 1.3836317135549872, "grad_norm": 2.5996256447921926, "learning_rate": 6.531167107804337e-06, "loss": 0.6072, "step": 991 }, { "epoch": 1.3850267379679144, "grad_norm": 2.4757474094894234, "learning_rate": 6.523429291254109e-06, "loss": 0.6248, "step": 992 }, { "epoch": 1.3864217623808417, "grad_norm": 2.4406756543368715, "learning_rate": 6.515687450701367e-06, "loss": 0.5887, "step": 993 }, { "epoch": 1.387816786793769, "grad_norm": 2.336323175100738, "learning_rate": 6.507941606595492e-06, "loss": 0.5489, "step": 994 }, { "epoch": 1.389211811206696, "grad_norm": 2.2461305128511144, "learning_rate": 6.500191779396439e-06, "loss": 0.6115, "step": 995 }, { "epoch": 1.3906068356196233, "grad_norm": 2.4919156817684027, "learning_rate": 6.492437989574689e-06, "loss": 0.5813, "step": 996 }, { "epoch": 1.3920018600325506, "grad_norm": 2.4826123837927736, "learning_rate": 6.48468025761118e-06, "loss": 0.5973, "step": 997 }, { "epoch": 1.3933968844454778, "grad_norm": 2.3907350520654207, "learning_rate": 6.476918603997273e-06, "loss": 0.5604, "step": 998 }, { "epoch": 1.394791908858405, "grad_norm": 2.460991825180263, "learning_rate": 6.469153049234683e-06, "loss": 0.6362, "step": 999 }, { "epoch": 1.3961869332713324, "grad_norm": 2.4394700065922996, "learning_rate": 6.461383613835427e-06, "loss": 0.5906, "step": 1000 }, { "epoch": 1.3975819576842594, "grad_norm": 2.186942968727129, "learning_rate": 6.453610318321777e-06, "loss": 0.5471, "step": 1001 }, { "epoch": 1.3989769820971867, "grad_norm": 2.3204128681362044, "learning_rate": 6.445833183226201e-06, "loss": 0.6252, "step": 1002 }, { "epoch": 1.400372006510114, "grad_norm": 2.6287078237725194, "learning_rate": 6.438052229091303e-06, "loss": 0.5978, "step": 1003 }, { "epoch": 1.4017670309230412, "grad_norm": 2.3623621252849842, "learning_rate": 6.430267476469783e-06, "loss": 0.5981, "step": 1004 }, { "epoch": 1.4031620553359683, "grad_norm": 2.4978392961251012, "learning_rate": 6.4224789459243705e-06, "loss": 0.5944, "step": 1005 }, { "epoch": 1.4045570797488955, "grad_norm": 2.578532588534208, "learning_rate": 6.4146866580277686e-06, "loss": 0.5483, "step": 1006 }, { "epoch": 1.4059521041618228, "grad_norm": 2.279179363437555, "learning_rate": 6.406890633362618e-06, "loss": 0.5513, "step": 1007 }, { "epoch": 1.40734712857475, "grad_norm": 2.2085590097099193, "learning_rate": 6.3990908925214155e-06, "loss": 0.548, "step": 1008 }, { "epoch": 1.4087421529876774, "grad_norm": 2.151428402798325, "learning_rate": 6.391287456106483e-06, "loss": 0.5691, "step": 1009 }, { "epoch": 1.4101371774006046, "grad_norm": 2.478945531262059, "learning_rate": 6.383480344729903e-06, "loss": 0.6294, "step": 1010 }, { "epoch": 1.4115322018135317, "grad_norm": 2.464101419771862, "learning_rate": 6.375669579013461e-06, "loss": 0.6396, "step": 1011 }, { "epoch": 1.412927226226459, "grad_norm": 2.455667450200804, "learning_rate": 6.367855179588597e-06, "loss": 0.6306, "step": 1012 }, { "epoch": 1.4143222506393862, "grad_norm": 2.395199812943258, "learning_rate": 6.3600371670963525e-06, "loss": 0.6072, "step": 1013 }, { "epoch": 1.4157172750523135, "grad_norm": 2.3637370958929598, "learning_rate": 6.352215562187307e-06, "loss": 0.5931, "step": 1014 }, { "epoch": 1.4171122994652405, "grad_norm": 2.3540234893105656, "learning_rate": 6.344390385521534e-06, "loss": 0.5887, "step": 1015 }, { "epoch": 1.4185073238781678, "grad_norm": 2.5878035051200095, "learning_rate": 6.33656165776854e-06, "loss": 0.594, "step": 1016 }, { "epoch": 1.419902348291095, "grad_norm": 2.6167978017200726, "learning_rate": 6.328729399607206e-06, "loss": 0.597, "step": 1017 }, { "epoch": 1.4212973727040223, "grad_norm": 2.4950132931583004, "learning_rate": 6.320893631725748e-06, "loss": 0.5707, "step": 1018 }, { "epoch": 1.4226923971169496, "grad_norm": 2.318351326831636, "learning_rate": 6.313054374821647e-06, "loss": 0.5904, "step": 1019 }, { "epoch": 1.4240874215298769, "grad_norm": 2.541284264944052, "learning_rate": 6.305211649601595e-06, "loss": 0.5376, "step": 1020 }, { "epoch": 1.425482445942804, "grad_norm": 2.3162211879862586, "learning_rate": 6.29736547678146e-06, "loss": 0.556, "step": 1021 }, { "epoch": 1.4268774703557312, "grad_norm": 2.4632541916252606, "learning_rate": 6.289515877086199e-06, "loss": 0.6604, "step": 1022 }, { "epoch": 1.4282724947686585, "grad_norm": 2.4343958773971135, "learning_rate": 6.2816628712498315e-06, "loss": 0.5994, "step": 1023 }, { "epoch": 1.4296675191815857, "grad_norm": 2.465368752712613, "learning_rate": 6.273806480015374e-06, "loss": 0.5776, "step": 1024 }, { "epoch": 1.4310625435945128, "grad_norm": 2.427253855707745, "learning_rate": 6.265946724134782e-06, "loss": 0.5065, "step": 1025 }, { "epoch": 1.43245756800744, "grad_norm": 2.301509825707397, "learning_rate": 6.258083624368895e-06, "loss": 0.5824, "step": 1026 }, { "epoch": 1.4338525924203673, "grad_norm": 2.2794207109120497, "learning_rate": 6.250217201487395e-06, "loss": 0.601, "step": 1027 }, { "epoch": 1.4352476168332946, "grad_norm": 2.39395194341529, "learning_rate": 6.242347476268733e-06, "loss": 0.6031, "step": 1028 }, { "epoch": 1.4366426412462219, "grad_norm": 2.403810116744624, "learning_rate": 6.2344744695000855e-06, "loss": 0.5922, "step": 1029 }, { "epoch": 1.4380376656591491, "grad_norm": 2.5507495872168318, "learning_rate": 6.226598201977299e-06, "loss": 0.6015, "step": 1030 }, { "epoch": 1.4394326900720762, "grad_norm": 2.4723264866135812, "learning_rate": 6.218718694504831e-06, "loss": 0.6114, "step": 1031 }, { "epoch": 1.4408277144850035, "grad_norm": 2.4418615769089214, "learning_rate": 6.2108359678956954e-06, "loss": 0.5468, "step": 1032 }, { "epoch": 1.4422227388979307, "grad_norm": 2.6243087081900534, "learning_rate": 6.202950042971414e-06, "loss": 0.626, "step": 1033 }, { "epoch": 1.443617763310858, "grad_norm": 2.419593234856155, "learning_rate": 6.19506094056195e-06, "loss": 0.6255, "step": 1034 }, { "epoch": 1.445012787723785, "grad_norm": 2.7226725135313243, "learning_rate": 6.187168681505666e-06, "loss": 0.6007, "step": 1035 }, { "epoch": 1.4464078121367123, "grad_norm": 2.4927079239567127, "learning_rate": 6.17927328664926e-06, "loss": 0.5672, "step": 1036 }, { "epoch": 1.4478028365496396, "grad_norm": 2.200793929764152, "learning_rate": 6.171374776847711e-06, "loss": 0.6231, "step": 1037 }, { "epoch": 1.4491978609625669, "grad_norm": 2.6270117223523135, "learning_rate": 6.163473172964229e-06, "loss": 0.5591, "step": 1038 }, { "epoch": 1.4505928853754941, "grad_norm": 2.4875634434280545, "learning_rate": 6.1555684958701965e-06, "loss": 0.6149, "step": 1039 }, { "epoch": 1.4519879097884214, "grad_norm": 2.4883391333673592, "learning_rate": 6.1476607664451105e-06, "loss": 0.6091, "step": 1040 }, { "epoch": 1.4533829342013485, "grad_norm": 2.4813670701264887, "learning_rate": 6.1397500055765345e-06, "loss": 0.6133, "step": 1041 }, { "epoch": 1.4547779586142757, "grad_norm": 2.3974358592939797, "learning_rate": 6.131836234160036e-06, "loss": 0.5835, "step": 1042 }, { "epoch": 1.456172983027203, "grad_norm": 2.2946151151292953, "learning_rate": 6.123919473099134e-06, "loss": 0.5921, "step": 1043 }, { "epoch": 1.4575680074401303, "grad_norm": 2.4533570945731533, "learning_rate": 6.115999743305252e-06, "loss": 0.5592, "step": 1044 }, { "epoch": 1.4589630318530573, "grad_norm": 2.4365852934069863, "learning_rate": 6.1080770656976444e-06, "loss": 0.5582, "step": 1045 }, { "epoch": 1.4603580562659846, "grad_norm": 2.32646080211653, "learning_rate": 6.100151461203359e-06, "loss": 0.5786, "step": 1046 }, { "epoch": 1.4617530806789119, "grad_norm": 2.268353838509341, "learning_rate": 6.0922229507571716e-06, "loss": 0.6079, "step": 1047 }, { "epoch": 1.4631481050918391, "grad_norm": 2.34635897093951, "learning_rate": 6.084291555301537e-06, "loss": 0.6362, "step": 1048 }, { "epoch": 1.4645431295047664, "grad_norm": 2.331286888571621, "learning_rate": 6.076357295786526e-06, "loss": 0.5682, "step": 1049 }, { "epoch": 1.4659381539176937, "grad_norm": 2.281817819451882, "learning_rate": 6.068420193169779e-06, "loss": 0.636, "step": 1050 }, { "epoch": 1.4673331783306207, "grad_norm": 2.3668368219510403, "learning_rate": 6.0604802684164436e-06, "loss": 0.614, "step": 1051 }, { "epoch": 1.468728202743548, "grad_norm": 2.36872021516229, "learning_rate": 6.052537542499122e-06, "loss": 0.5646, "step": 1052 }, { "epoch": 1.4701232271564753, "grad_norm": 2.2447909261724965, "learning_rate": 6.044592036397816e-06, "loss": 0.5684, "step": 1053 }, { "epoch": 1.4715182515694025, "grad_norm": 2.2929866827647474, "learning_rate": 6.0366437710998715e-06, "loss": 0.5847, "step": 1054 }, { "epoch": 1.4729132759823296, "grad_norm": 2.5483409579101735, "learning_rate": 6.0286927675999205e-06, "loss": 0.5917, "step": 1055 }, { "epoch": 1.4743083003952568, "grad_norm": 2.12517833408288, "learning_rate": 6.02073904689983e-06, "loss": 0.6262, "step": 1056 }, { "epoch": 1.4757033248081841, "grad_norm": 2.557049627093316, "learning_rate": 6.012782630008646e-06, "loss": 0.6149, "step": 1057 }, { "epoch": 1.4770983492211114, "grad_norm": 2.5997658063381515, "learning_rate": 6.004823537942528e-06, "loss": 0.5819, "step": 1058 }, { "epoch": 1.4784933736340387, "grad_norm": 2.34422786368084, "learning_rate": 5.996861791724713e-06, "loss": 0.6, "step": 1059 }, { "epoch": 1.479888398046966, "grad_norm": 2.413197800054857, "learning_rate": 5.98889741238544e-06, "loss": 0.5805, "step": 1060 }, { "epoch": 1.481283422459893, "grad_norm": 2.566292103437407, "learning_rate": 5.9809304209619054e-06, "loss": 0.588, "step": 1061 }, { "epoch": 1.4826784468728202, "grad_norm": 2.182383525883271, "learning_rate": 5.9729608384982085e-06, "loss": 0.5404, "step": 1062 }, { "epoch": 1.4840734712857475, "grad_norm": 2.4121820221647097, "learning_rate": 5.964988686045289e-06, "loss": 0.6052, "step": 1063 }, { "epoch": 1.4854684956986748, "grad_norm": 2.44487051844055, "learning_rate": 5.957013984660875e-06, "loss": 0.5845, "step": 1064 }, { "epoch": 1.4868635201116018, "grad_norm": 2.561107204003759, "learning_rate": 5.949036755409432e-06, "loss": 0.558, "step": 1065 }, { "epoch": 1.488258544524529, "grad_norm": 2.467996889822914, "learning_rate": 5.941057019362095e-06, "loss": 0.5573, "step": 1066 }, { "epoch": 1.4896535689374564, "grad_norm": 2.313537885068377, "learning_rate": 5.933074797596627e-06, "loss": 0.5604, "step": 1067 }, { "epoch": 1.4910485933503836, "grad_norm": 2.3807564862631816, "learning_rate": 5.925090111197355e-06, "loss": 0.5658, "step": 1068 }, { "epoch": 1.492443617763311, "grad_norm": 2.427208456552453, "learning_rate": 5.917102981255114e-06, "loss": 0.5943, "step": 1069 }, { "epoch": 1.4938386421762382, "grad_norm": 2.4536139868102294, "learning_rate": 5.909113428867195e-06, "loss": 0.6637, "step": 1070 }, { "epoch": 1.4952336665891652, "grad_norm": 2.5391427275120693, "learning_rate": 5.901121475137287e-06, "loss": 0.5988, "step": 1071 }, { "epoch": 1.4966286910020925, "grad_norm": 2.4696716861753805, "learning_rate": 5.893127141175425e-06, "loss": 0.6058, "step": 1072 }, { "epoch": 1.4980237154150198, "grad_norm": 2.481468163498509, "learning_rate": 5.885130448097926e-06, "loss": 0.6107, "step": 1073 }, { "epoch": 1.499418739827947, "grad_norm": 2.5690366801557083, "learning_rate": 5.877131417027343e-06, "loss": 0.6392, "step": 1074 }, { "epoch": 1.500813764240874, "grad_norm": 2.476027754827251, "learning_rate": 5.869130069092401e-06, "loss": 0.6191, "step": 1075 }, { "epoch": 1.5022087886538014, "grad_norm": 2.4034572635933147, "learning_rate": 5.861126425427949e-06, "loss": 0.5805, "step": 1076 }, { "epoch": 1.5036038130667286, "grad_norm": 2.4953898021302408, "learning_rate": 5.853120507174894e-06, "loss": 0.6188, "step": 1077 }, { "epoch": 1.504998837479656, "grad_norm": 2.4087345943143315, "learning_rate": 5.845112335480159e-06, "loss": 0.5931, "step": 1078 }, { "epoch": 1.5063938618925832, "grad_norm": 2.3741164224688966, "learning_rate": 5.83710193149661e-06, "loss": 0.5582, "step": 1079 }, { "epoch": 1.5077888863055104, "grad_norm": 2.286733487948878, "learning_rate": 5.829089316383018e-06, "loss": 0.6084, "step": 1080 }, { "epoch": 1.5091839107184377, "grad_norm": 2.454621223681621, "learning_rate": 5.821074511303988e-06, "loss": 0.5736, "step": 1081 }, { "epoch": 1.5105789351313648, "grad_norm": 2.258765603440206, "learning_rate": 5.813057537429915e-06, "loss": 0.5686, "step": 1082 }, { "epoch": 1.511973959544292, "grad_norm": 2.3678836314623357, "learning_rate": 5.805038415936919e-06, "loss": 0.5324, "step": 1083 }, { "epoch": 1.5133689839572193, "grad_norm": 2.1801692512861037, "learning_rate": 5.797017168006791e-06, "loss": 0.578, "step": 1084 }, { "epoch": 1.5147640083701464, "grad_norm": 2.5300889735953116, "learning_rate": 5.7889938148269445e-06, "loss": 0.6251, "step": 1085 }, { "epoch": 1.5161590327830736, "grad_norm": 2.3161360637554838, "learning_rate": 5.7809683775903525e-06, "loss": 0.5919, "step": 1086 }, { "epoch": 1.517554057196001, "grad_norm": 2.6352583861334957, "learning_rate": 5.7729408774954865e-06, "loss": 0.5878, "step": 1087 }, { "epoch": 1.5189490816089282, "grad_norm": 2.60418520333982, "learning_rate": 5.764911335746275e-06, "loss": 0.5796, "step": 1088 }, { "epoch": 1.5203441060218554, "grad_norm": 2.410738808943646, "learning_rate": 5.756879773552037e-06, "loss": 0.5776, "step": 1089 }, { "epoch": 1.5217391304347827, "grad_norm": 2.4561362854381614, "learning_rate": 5.748846212127421e-06, "loss": 0.6497, "step": 1090 }, { "epoch": 1.52313415484771, "grad_norm": 2.605269970711531, "learning_rate": 5.74081067269237e-06, "loss": 0.5467, "step": 1091 }, { "epoch": 1.524529179260637, "grad_norm": 2.277613326591145, "learning_rate": 5.732773176472042e-06, "loss": 0.5392, "step": 1092 }, { "epoch": 1.5259242036735643, "grad_norm": 2.2007229970664635, "learning_rate": 5.7247337446967625e-06, "loss": 0.5856, "step": 1093 }, { "epoch": 1.5273192280864916, "grad_norm": 2.387668496618452, "learning_rate": 5.716692398601975e-06, "loss": 0.5623, "step": 1094 }, { "epoch": 1.5287142524994186, "grad_norm": 2.3334855555379073, "learning_rate": 5.708649159428181e-06, "loss": 0.6257, "step": 1095 }, { "epoch": 1.5301092769123459, "grad_norm": 2.730940932707885, "learning_rate": 5.700604048420875e-06, "loss": 0.6129, "step": 1096 }, { "epoch": 1.5315043013252732, "grad_norm": 2.167080549765569, "learning_rate": 5.692557086830501e-06, "loss": 0.6044, "step": 1097 }, { "epoch": 1.5328993257382004, "grad_norm": 2.5764293595142926, "learning_rate": 5.68450829591239e-06, "loss": 0.5796, "step": 1098 }, { "epoch": 1.5342943501511277, "grad_norm": 2.3607481687682967, "learning_rate": 5.676457696926703e-06, "loss": 0.6142, "step": 1099 }, { "epoch": 1.535689374564055, "grad_norm": 2.519824608002849, "learning_rate": 5.668405311138382e-06, "loss": 0.5824, "step": 1100 }, { "epoch": 1.5370843989769822, "grad_norm": 2.3187571913453087, "learning_rate": 5.660351159817083e-06, "loss": 0.5619, "step": 1101 }, { "epoch": 1.5384794233899093, "grad_norm": 2.4508517441799063, "learning_rate": 5.652295264237128e-06, "loss": 0.601, "step": 1102 }, { "epoch": 1.5398744478028366, "grad_norm": 2.364883471098818, "learning_rate": 5.6442376456774495e-06, "loss": 0.5435, "step": 1103 }, { "epoch": 1.5412694722157638, "grad_norm": 2.2871731502769865, "learning_rate": 5.636178325421524e-06, "loss": 0.5778, "step": 1104 }, { "epoch": 1.5426644966286909, "grad_norm": 2.423950124689571, "learning_rate": 5.628117324757326e-06, "loss": 0.6099, "step": 1105 }, { "epoch": 1.5440595210416181, "grad_norm": 2.4547233624007956, "learning_rate": 5.620054664977275e-06, "loss": 0.5089, "step": 1106 }, { "epoch": 1.5454545454545454, "grad_norm": 2.206365931653314, "learning_rate": 5.61199036737816e-06, "loss": 0.5566, "step": 1107 }, { "epoch": 1.5468495698674727, "grad_norm": 2.1496339467764534, "learning_rate": 5.603924453261109e-06, "loss": 0.5395, "step": 1108 }, { "epoch": 1.5482445942804, "grad_norm": 2.497071667327221, "learning_rate": 5.595856943931512e-06, "loss": 0.609, "step": 1109 }, { "epoch": 1.5496396186933272, "grad_norm": 2.527071118334564, "learning_rate": 5.587787860698975e-06, "loss": 0.5951, "step": 1110 }, { "epoch": 1.5510346431062545, "grad_norm": 2.3274550319778586, "learning_rate": 5.579717224877261e-06, "loss": 0.6198, "step": 1111 }, { "epoch": 1.5524296675191815, "grad_norm": 2.563834145233702, "learning_rate": 5.571645057784236e-06, "loss": 0.5943, "step": 1112 }, { "epoch": 1.5538246919321088, "grad_norm": 2.514181958389433, "learning_rate": 5.5635713807418055e-06, "loss": 0.5876, "step": 1113 }, { "epoch": 1.555219716345036, "grad_norm": 2.361316565546348, "learning_rate": 5.55549621507587e-06, "loss": 0.5919, "step": 1114 }, { "epoch": 1.5566147407579631, "grad_norm": 2.3646753900139847, "learning_rate": 5.547419582116259e-06, "loss": 0.6392, "step": 1115 }, { "epoch": 1.5580097651708904, "grad_norm": 2.620604934175976, "learning_rate": 5.539341503196674e-06, "loss": 0.5571, "step": 1116 }, { "epoch": 1.5594047895838177, "grad_norm": 2.5045540492703307, "learning_rate": 5.531261999654646e-06, "loss": 0.538, "step": 1117 }, { "epoch": 1.560799813996745, "grad_norm": 2.3734255184878528, "learning_rate": 5.5231810928314555e-06, "loss": 0.564, "step": 1118 }, { "epoch": 1.5621948384096722, "grad_norm": 2.3186224558665756, "learning_rate": 5.5150988040721e-06, "loss": 0.5577, "step": 1119 }, { "epoch": 1.5635898628225995, "grad_norm": 2.530649350947161, "learning_rate": 5.507015154725226e-06, "loss": 0.5684, "step": 1120 }, { "epoch": 1.5649848872355268, "grad_norm": 2.192402286853964, "learning_rate": 5.4989301661430685e-06, "loss": 0.565, "step": 1121 }, { "epoch": 1.5663799116484538, "grad_norm": 2.460050837036643, "learning_rate": 5.490843859681404e-06, "loss": 0.5647, "step": 1122 }, { "epoch": 1.567774936061381, "grad_norm": 2.358261901759684, "learning_rate": 5.48275625669949e-06, "loss": 0.5435, "step": 1123 }, { "epoch": 1.5691699604743083, "grad_norm": 2.3175461263804453, "learning_rate": 5.474667378560007e-06, "loss": 0.5765, "step": 1124 }, { "epoch": 1.5705649848872354, "grad_norm": 2.2691420108429776, "learning_rate": 5.466577246629006e-06, "loss": 0.5829, "step": 1125 }, { "epoch": 1.5719600093001627, "grad_norm": 2.2397856688437576, "learning_rate": 5.458485882275848e-06, "loss": 0.5775, "step": 1126 }, { "epoch": 1.57335503371309, "grad_norm": 2.1873365349217275, "learning_rate": 5.45039330687315e-06, "loss": 0.5891, "step": 1127 }, { "epoch": 1.5747500581260172, "grad_norm": 2.317697617973825, "learning_rate": 5.442299541796727e-06, "loss": 0.5502, "step": 1128 }, { "epoch": 1.5761450825389445, "grad_norm": 2.174714032986347, "learning_rate": 5.4342046084255385e-06, "loss": 0.5337, "step": 1129 }, { "epoch": 1.5775401069518717, "grad_norm": 2.2668897352778195, "learning_rate": 5.426108528141627e-06, "loss": 0.5681, "step": 1130 }, { "epoch": 1.578935131364799, "grad_norm": 2.3300462528505745, "learning_rate": 5.4180113223300665e-06, "loss": 0.5438, "step": 1131 }, { "epoch": 1.580330155777726, "grad_norm": 2.3633604619755095, "learning_rate": 5.409913012378903e-06, "loss": 0.5293, "step": 1132 }, { "epoch": 1.5817251801906533, "grad_norm": 2.2965153519858643, "learning_rate": 5.401813619679102e-06, "loss": 0.5939, "step": 1133 }, { "epoch": 1.5831202046035806, "grad_norm": 2.4528822346970296, "learning_rate": 5.3937131656244834e-06, "loss": 0.5466, "step": 1134 }, { "epoch": 1.5845152290165077, "grad_norm": 2.6016954989708383, "learning_rate": 5.385611671611676e-06, "loss": 0.5904, "step": 1135 }, { "epoch": 1.585910253429435, "grad_norm": 2.4361132374081853, "learning_rate": 5.377509159040051e-06, "loss": 0.5944, "step": 1136 }, { "epoch": 1.5873052778423622, "grad_norm": 2.50171265747617, "learning_rate": 5.3694056493116745e-06, "loss": 0.548, "step": 1137 }, { "epoch": 1.5887003022552895, "grad_norm": 2.4380563616968676, "learning_rate": 5.361301163831242e-06, "loss": 0.5991, "step": 1138 }, { "epoch": 1.5900953266682167, "grad_norm": 2.4270323239659435, "learning_rate": 5.353195724006031e-06, "loss": 0.5316, "step": 1139 }, { "epoch": 1.591490351081144, "grad_norm": 2.3320301749423864, "learning_rate": 5.345089351245834e-06, "loss": 0.591, "step": 1140 }, { "epoch": 1.5928853754940713, "grad_norm": 2.4193364063324467, "learning_rate": 5.336982066962915e-06, "loss": 0.5905, "step": 1141 }, { "epoch": 1.5942803999069983, "grad_norm": 2.3572899814017556, "learning_rate": 5.328873892571941e-06, "loss": 0.5434, "step": 1142 }, { "epoch": 1.5956754243199256, "grad_norm": 2.5864680886228495, "learning_rate": 5.320764849489929e-06, "loss": 0.6094, "step": 1143 }, { "epoch": 1.5970704487328529, "grad_norm": 2.4477023668891773, "learning_rate": 5.312654959136194e-06, "loss": 0.4861, "step": 1144 }, { "epoch": 1.59846547314578, "grad_norm": 2.2448478988697533, "learning_rate": 5.304544242932288e-06, "loss": 0.6117, "step": 1145 }, { "epoch": 1.5998604975587072, "grad_norm": 2.4551415649752575, "learning_rate": 5.296432722301944e-06, "loss": 0.6285, "step": 1146 }, { "epoch": 1.6012555219716345, "grad_norm": 2.408093490768518, "learning_rate": 5.288320418671018e-06, "loss": 0.5705, "step": 1147 }, { "epoch": 1.6026505463845617, "grad_norm": 2.63575995294589, "learning_rate": 5.280207353467438e-06, "loss": 0.6164, "step": 1148 }, { "epoch": 1.604045570797489, "grad_norm": 2.3608165680094384, "learning_rate": 5.272093548121141e-06, "loss": 0.6108, "step": 1149 }, { "epoch": 1.6054405952104163, "grad_norm": 2.4172587769291485, "learning_rate": 5.26397902406402e-06, "loss": 0.6002, "step": 1150 }, { "epoch": 1.6068356196233435, "grad_norm": 2.508451681924291, "learning_rate": 5.255863802729866e-06, "loss": 0.6148, "step": 1151 }, { "epoch": 1.6082306440362706, "grad_norm": 2.517534897652446, "learning_rate": 5.247747905554311e-06, "loss": 0.5363, "step": 1152 }, { "epoch": 1.6096256684491979, "grad_norm": 2.2165434485529096, "learning_rate": 5.239631353974774e-06, "loss": 0.6492, "step": 1153 }, { "epoch": 1.6110206928621251, "grad_norm": 2.5784901475258235, "learning_rate": 5.231514169430403e-06, "loss": 0.5905, "step": 1154 }, { "epoch": 1.6124157172750522, "grad_norm": 2.4509989070494123, "learning_rate": 5.223396373362013e-06, "loss": 0.5563, "step": 1155 }, { "epoch": 1.6138107416879794, "grad_norm": 2.1404611374890163, "learning_rate": 5.215277987212041e-06, "loss": 0.645, "step": 1156 }, { "epoch": 1.6152057661009067, "grad_norm": 2.4228987332356713, "learning_rate": 5.207159032424478e-06, "loss": 0.6214, "step": 1157 }, { "epoch": 1.616600790513834, "grad_norm": 2.3593332448394975, "learning_rate": 5.199039530444819e-06, "loss": 0.5676, "step": 1158 }, { "epoch": 1.6179958149267613, "grad_norm": 2.367284874301585, "learning_rate": 5.1909195027200055e-06, "loss": 0.569, "step": 1159 }, { "epoch": 1.6193908393396885, "grad_norm": 2.275980262513176, "learning_rate": 5.182798970698361e-06, "loss": 0.5829, "step": 1160 }, { "epoch": 1.6207858637526158, "grad_norm": 2.447127239668667, "learning_rate": 5.174677955829551e-06, "loss": 0.5944, "step": 1161 }, { "epoch": 1.6221808881655428, "grad_norm": 2.2982968592461614, "learning_rate": 5.166556479564511e-06, "loss": 0.6247, "step": 1162 }, { "epoch": 1.6235759125784701, "grad_norm": 2.3327525783774417, "learning_rate": 5.158434563355392e-06, "loss": 0.6295, "step": 1163 }, { "epoch": 1.6249709369913974, "grad_norm": 2.393474226945324, "learning_rate": 5.150312228655515e-06, "loss": 0.5273, "step": 1164 }, { "epoch": 1.6263659614043244, "grad_norm": 2.2714026298001366, "learning_rate": 5.142189496919302e-06, "loss": 0.5573, "step": 1165 }, { "epoch": 1.6277609858172517, "grad_norm": 2.2626894746274298, "learning_rate": 5.1340663896022206e-06, "loss": 0.5721, "step": 1166 }, { "epoch": 1.629156010230179, "grad_norm": 2.2841637321177335, "learning_rate": 5.125942928160736e-06, "loss": 0.5755, "step": 1167 }, { "epoch": 1.6305510346431062, "grad_norm": 2.4462554966164447, "learning_rate": 5.117819134052246e-06, "loss": 0.6053, "step": 1168 }, { "epoch": 1.6319460590560335, "grad_norm": 2.364623878380785, "learning_rate": 5.10969502873503e-06, "loss": 0.5872, "step": 1169 }, { "epoch": 1.6333410834689608, "grad_norm": 2.6451345741785697, "learning_rate": 5.101570633668185e-06, "loss": 0.5875, "step": 1170 }, { "epoch": 1.634736107881888, "grad_norm": 2.5097413865863487, "learning_rate": 5.093445970311576e-06, "loss": 0.5611, "step": 1171 }, { "epoch": 1.636131132294815, "grad_norm": 2.490032641726588, "learning_rate": 5.085321060125775e-06, "loss": 0.5563, "step": 1172 }, { "epoch": 1.6375261567077424, "grad_norm": 2.3653899334732182, "learning_rate": 5.07719592457201e-06, "loss": 0.572, "step": 1173 }, { "epoch": 1.6389211811206696, "grad_norm": 2.586135926604021, "learning_rate": 5.069070585112097e-06, "loss": 0.6475, "step": 1174 }, { "epoch": 1.6403162055335967, "grad_norm": 2.3114223989907896, "learning_rate": 5.060945063208399e-06, "loss": 0.5717, "step": 1175 }, { "epoch": 1.641711229946524, "grad_norm": 2.5887542203415723, "learning_rate": 5.052819380323757e-06, "loss": 0.5857, "step": 1176 }, { "epoch": 1.6431062543594512, "grad_norm": 2.4070589065845565, "learning_rate": 5.044693557921434e-06, "loss": 0.5732, "step": 1177 }, { "epoch": 1.6445012787723785, "grad_norm": 2.5220026612023494, "learning_rate": 5.036567617465067e-06, "loss": 0.5971, "step": 1178 }, { "epoch": 1.6458963031853058, "grad_norm": 2.575655534900257, "learning_rate": 5.0284415804186025e-06, "loss": 0.5759, "step": 1179 }, { "epoch": 1.647291327598233, "grad_norm": 2.234171036706054, "learning_rate": 5.02031546824624e-06, "loss": 0.5587, "step": 1180 }, { "epoch": 1.6486863520111603, "grad_norm": 2.415823268996213, "learning_rate": 5.012189302412383e-06, "loss": 0.5938, "step": 1181 }, { "epoch": 1.6500813764240876, "grad_norm": 2.5094561247547613, "learning_rate": 5.0040631043815715e-06, "loss": 0.6122, "step": 1182 }, { "epoch": 1.6514764008370146, "grad_norm": 2.4762564181637736, "learning_rate": 4.99593689561843e-06, "loss": 0.586, "step": 1183 }, { "epoch": 1.652871425249942, "grad_norm": 2.2873534265131035, "learning_rate": 4.987810697587618e-06, "loss": 0.6207, "step": 1184 }, { "epoch": 1.654266449662869, "grad_norm": 2.5294064730611305, "learning_rate": 4.979684531753761e-06, "loss": 0.5669, "step": 1185 }, { "epoch": 1.6556614740757962, "grad_norm": 2.414968007554238, "learning_rate": 4.971558419581398e-06, "loss": 0.591, "step": 1186 }, { "epoch": 1.6570564984887235, "grad_norm": 2.3878939099648115, "learning_rate": 4.963432382534933e-06, "loss": 0.6452, "step": 1187 }, { "epoch": 1.6584515229016508, "grad_norm": 2.6257006750262515, "learning_rate": 4.955306442078568e-06, "loss": 0.6357, "step": 1188 }, { "epoch": 1.659846547314578, "grad_norm": 2.454312724998025, "learning_rate": 4.947180619676244e-06, "loss": 0.5361, "step": 1189 }, { "epoch": 1.6612415717275053, "grad_norm": 2.207956178143718, "learning_rate": 4.9390549367916004e-06, "loss": 0.5485, "step": 1190 }, { "epoch": 1.6626365961404326, "grad_norm": 2.101314598964255, "learning_rate": 4.930929414887904e-06, "loss": 0.5596, "step": 1191 }, { "epoch": 1.6640316205533598, "grad_norm": 2.4887536922671742, "learning_rate": 4.9228040754279915e-06, "loss": 0.5895, "step": 1192 }, { "epoch": 1.665426644966287, "grad_norm": 2.3311691629188216, "learning_rate": 4.914678939874225e-06, "loss": 0.6494, "step": 1193 }, { "epoch": 1.6668216693792142, "grad_norm": 2.561389643540223, "learning_rate": 4.906554029688427e-06, "loss": 0.5579, "step": 1194 }, { "epoch": 1.6682166937921412, "grad_norm": 2.1562687525919793, "learning_rate": 4.898429366331815e-06, "loss": 0.5827, "step": 1195 }, { "epoch": 1.6696117182050685, "grad_norm": 2.4542028018498407, "learning_rate": 4.8903049712649705e-06, "loss": 0.6127, "step": 1196 }, { "epoch": 1.6710067426179958, "grad_norm": 2.4338992274015454, "learning_rate": 4.8821808659477544e-06, "loss": 0.5426, "step": 1197 }, { "epoch": 1.672401767030923, "grad_norm": 2.3229635943909885, "learning_rate": 4.874057071839265e-06, "loss": 0.5562, "step": 1198 }, { "epoch": 1.6737967914438503, "grad_norm": 2.3266906354188013, "learning_rate": 4.86593361039778e-06, "loss": 0.6242, "step": 1199 }, { "epoch": 1.6751918158567776, "grad_norm": 2.529319982033688, "learning_rate": 4.857810503080701e-06, "loss": 0.614, "step": 1200 }, { "epoch": 1.6765868402697048, "grad_norm": 2.5304868111212495, "learning_rate": 4.849687771344487e-06, "loss": 0.6133, "step": 1201 }, { "epoch": 1.677981864682632, "grad_norm": 2.750263244496998, "learning_rate": 4.841565436644609e-06, "loss": 0.6299, "step": 1202 }, { "epoch": 1.6793768890955592, "grad_norm": 2.4288995683233874, "learning_rate": 4.8334435204354915e-06, "loss": 0.5804, "step": 1203 }, { "epoch": 1.6807719135084864, "grad_norm": 2.4933356815842296, "learning_rate": 4.825322044170451e-06, "loss": 0.6398, "step": 1204 }, { "epoch": 1.6821669379214135, "grad_norm": 2.2631109424568785, "learning_rate": 4.81720102930164e-06, "loss": 0.5834, "step": 1205 }, { "epoch": 1.6835619623343407, "grad_norm": 2.4961481434112947, "learning_rate": 4.809080497279998e-06, "loss": 0.6128, "step": 1206 }, { "epoch": 1.684956986747268, "grad_norm": 2.5817256711085212, "learning_rate": 4.800960469555183e-06, "loss": 0.5954, "step": 1207 }, { "epoch": 1.6863520111601953, "grad_norm": 2.273098616780608, "learning_rate": 4.792840967575523e-06, "loss": 0.5972, "step": 1208 }, { "epoch": 1.6877470355731226, "grad_norm": 2.3419373518832582, "learning_rate": 4.784722012787961e-06, "loss": 0.6055, "step": 1209 }, { "epoch": 1.6891420599860498, "grad_norm": 2.6104495375458785, "learning_rate": 4.776603626637988e-06, "loss": 0.5832, "step": 1210 }, { "epoch": 1.690537084398977, "grad_norm": 2.3104248828934897, "learning_rate": 4.768485830569598e-06, "loss": 0.5694, "step": 1211 }, { "epoch": 1.6919321088119044, "grad_norm": 2.3710095254332786, "learning_rate": 4.7603686460252265e-06, "loss": 0.6172, "step": 1212 }, { "epoch": 1.6933271332248314, "grad_norm": 2.5854470055873087, "learning_rate": 4.75225209444569e-06, "loss": 0.5669, "step": 1213 }, { "epoch": 1.6947221576377587, "grad_norm": 2.377445348110348, "learning_rate": 4.744136197270135e-06, "loss": 0.629, "step": 1214 }, { "epoch": 1.6961171820506857, "grad_norm": 2.497918160166489, "learning_rate": 4.736020975935981e-06, "loss": 0.5792, "step": 1215 }, { "epoch": 1.697512206463613, "grad_norm": 2.266275968009398, "learning_rate": 4.72790645187886e-06, "loss": 0.5375, "step": 1216 }, { "epoch": 1.6989072308765403, "grad_norm": 2.249520022475878, "learning_rate": 4.7197926465325626e-06, "loss": 0.5639, "step": 1217 }, { "epoch": 1.7003022552894675, "grad_norm": 2.492815939226133, "learning_rate": 4.711679581328983e-06, "loss": 0.6064, "step": 1218 }, { "epoch": 1.7016972797023948, "grad_norm": 2.590634490769363, "learning_rate": 4.703567277698058e-06, "loss": 0.6563, "step": 1219 }, { "epoch": 1.703092304115322, "grad_norm": 2.6656004577766956, "learning_rate": 4.695455757067712e-06, "loss": 0.5726, "step": 1220 }, { "epoch": 1.7044873285282494, "grad_norm": 2.1411772843194865, "learning_rate": 4.687345040863808e-06, "loss": 0.5817, "step": 1221 }, { "epoch": 1.7058823529411766, "grad_norm": 2.534486553597627, "learning_rate": 4.679235150510072e-06, "loss": 0.547, "step": 1222 }, { "epoch": 1.7072773773541037, "grad_norm": 2.199289555076591, "learning_rate": 4.671126107428061e-06, "loss": 0.5908, "step": 1223 }, { "epoch": 1.708672401767031, "grad_norm": 2.185281300191968, "learning_rate": 4.663017933037087e-06, "loss": 0.5798, "step": 1224 }, { "epoch": 1.710067426179958, "grad_norm": 2.399273686006833, "learning_rate": 4.6549106487541666e-06, "loss": 0.6004, "step": 1225 }, { "epoch": 1.7114624505928853, "grad_norm": 2.549388993820596, "learning_rate": 4.646804275993971e-06, "loss": 0.5644, "step": 1226 }, { "epoch": 1.7128574750058125, "grad_norm": 2.6503577876599973, "learning_rate": 4.63869883616876e-06, "loss": 0.5701, "step": 1227 }, { "epoch": 1.7142524994187398, "grad_norm": 2.2492034862832355, "learning_rate": 4.630594350688327e-06, "loss": 0.6037, "step": 1228 }, { "epoch": 1.715647523831667, "grad_norm": 2.5777981773494516, "learning_rate": 4.62249084095995e-06, "loss": 0.564, "step": 1229 }, { "epoch": 1.7170425482445943, "grad_norm": 2.4017212265967864, "learning_rate": 4.614388328388327e-06, "loss": 0.6284, "step": 1230 }, { "epoch": 1.7184375726575216, "grad_norm": 2.579088458003428, "learning_rate": 4.606286834375517e-06, "loss": 0.6132, "step": 1231 }, { "epoch": 1.7198325970704489, "grad_norm": 2.4476115883997243, "learning_rate": 4.598186380320899e-06, "loss": 0.5377, "step": 1232 }, { "epoch": 1.721227621483376, "grad_norm": 2.4361114012584273, "learning_rate": 4.5900869876210986e-06, "loss": 0.5784, "step": 1233 }, { "epoch": 1.7226226458963032, "grad_norm": 2.4097849085194087, "learning_rate": 4.581988677669935e-06, "loss": 0.6156, "step": 1234 }, { "epoch": 1.7240176703092303, "grad_norm": 2.3492288074653382, "learning_rate": 4.573891471858375e-06, "loss": 0.5844, "step": 1235 }, { "epoch": 1.7254126947221575, "grad_norm": 2.375146834968793, "learning_rate": 4.565795391574465e-06, "loss": 0.5824, "step": 1236 }, { "epoch": 1.7268077191350848, "grad_norm": 2.3857773266512794, "learning_rate": 4.5577004582032745e-06, "loss": 0.5526, "step": 1237 }, { "epoch": 1.728202743548012, "grad_norm": 2.46880377952201, "learning_rate": 4.549606693126851e-06, "loss": 0.5642, "step": 1238 }, { "epoch": 1.7295977679609393, "grad_norm": 2.431870930201487, "learning_rate": 4.541514117724155e-06, "loss": 0.6463, "step": 1239 }, { "epoch": 1.7309927923738666, "grad_norm": 2.411409490540053, "learning_rate": 4.533422753370995e-06, "loss": 0.5589, "step": 1240 }, { "epoch": 1.7323878167867939, "grad_norm": 2.368908029530818, "learning_rate": 4.525332621439995e-06, "loss": 0.6066, "step": 1241 }, { "epoch": 1.7337828411997211, "grad_norm": 2.4314655050718956, "learning_rate": 4.517243743300513e-06, "loss": 0.5386, "step": 1242 }, { "epoch": 1.7351778656126482, "grad_norm": 2.4970645052454428, "learning_rate": 4.5091561403185976e-06, "loss": 0.6268, "step": 1243 }, { "epoch": 1.7365728900255755, "grad_norm": 2.471609734111054, "learning_rate": 4.501069833856934e-06, "loss": 0.5515, "step": 1244 }, { "epoch": 1.7379679144385025, "grad_norm": 2.456241192134647, "learning_rate": 4.492984845274774e-06, "loss": 0.5451, "step": 1245 }, { "epoch": 1.7393629388514298, "grad_norm": 2.4396835247722666, "learning_rate": 4.484901195927901e-06, "loss": 0.5978, "step": 1246 }, { "epoch": 1.740757963264357, "grad_norm": 2.3340804580663312, "learning_rate": 4.476818907168545e-06, "loss": 0.5237, "step": 1247 }, { "epoch": 1.7421529876772843, "grad_norm": 2.3342716588679453, "learning_rate": 4.4687380003453555e-06, "loss": 0.5547, "step": 1248 }, { "epoch": 1.7435480120902116, "grad_norm": 2.4775827017919494, "learning_rate": 4.460658496803327e-06, "loss": 0.578, "step": 1249 }, { "epoch": 1.7449430365031389, "grad_norm": 2.4067171290740395, "learning_rate": 4.4525804178837425e-06, "loss": 0.5828, "step": 1250 }, { "epoch": 1.7463380609160661, "grad_norm": 2.6474413917667943, "learning_rate": 4.4445037849241305e-06, "loss": 0.6141, "step": 1251 }, { "epoch": 1.7477330853289934, "grad_norm": 2.4243705538983082, "learning_rate": 4.436428619258196e-06, "loss": 0.5803, "step": 1252 }, { "epoch": 1.7491281097419205, "grad_norm": 2.5275335358942197, "learning_rate": 4.428354942215766e-06, "loss": 0.5589, "step": 1253 }, { "epoch": 1.7505231341548477, "grad_norm": 2.386712241438412, "learning_rate": 4.42028277512274e-06, "loss": 0.5648, "step": 1254 }, { "epoch": 1.7519181585677748, "grad_norm": 2.2739679855414017, "learning_rate": 4.412212139301027e-06, "loss": 0.5923, "step": 1255 }, { "epoch": 1.753313182980702, "grad_norm": 2.4782528850013015, "learning_rate": 4.404143056068489e-06, "loss": 0.515, "step": 1256 }, { "epoch": 1.7547082073936293, "grad_norm": 2.2331347135884894, "learning_rate": 4.3960755467388916e-06, "loss": 0.5983, "step": 1257 }, { "epoch": 1.7561032318065566, "grad_norm": 2.6136593637351275, "learning_rate": 4.388009632621841e-06, "loss": 0.6423, "step": 1258 }, { "epoch": 1.7574982562194839, "grad_norm": 2.47308784916259, "learning_rate": 4.379945335022727e-06, "loss": 0.5798, "step": 1259 }, { "epoch": 1.7588932806324111, "grad_norm": 2.6120891420258796, "learning_rate": 4.371882675242674e-06, "loss": 0.622, "step": 1260 }, { "epoch": 1.7602883050453384, "grad_norm": 2.647718677631742, "learning_rate": 4.363821674578479e-06, "loss": 0.6373, "step": 1261 }, { "epoch": 1.7616833294582657, "grad_norm": 2.4263369721366406, "learning_rate": 4.355762354322552e-06, "loss": 0.5545, "step": 1262 }, { "epoch": 1.7630783538711927, "grad_norm": 2.321902996963418, "learning_rate": 4.347704735762872e-06, "loss": 0.5954, "step": 1263 }, { "epoch": 1.76447337828412, "grad_norm": 2.335339765418175, "learning_rate": 4.339648840182919e-06, "loss": 0.5785, "step": 1264 }, { "epoch": 1.765868402697047, "grad_norm": 2.3334525278302554, "learning_rate": 4.331594688861619e-06, "loss": 0.5819, "step": 1265 }, { "epoch": 1.7672634271099743, "grad_norm": 2.376638198907719, "learning_rate": 4.323542303073297e-06, "loss": 0.5351, "step": 1266 }, { "epoch": 1.7686584515229016, "grad_norm": 2.3758700172495404, "learning_rate": 4.315491704087613e-06, "loss": 0.5804, "step": 1267 }, { "epoch": 1.7700534759358288, "grad_norm": 2.329166154953201, "learning_rate": 4.3074429131695e-06, "loss": 0.5814, "step": 1268 }, { "epoch": 1.7714485003487561, "grad_norm": 2.5173321180522277, "learning_rate": 4.299395951579126e-06, "loss": 0.5736, "step": 1269 }, { "epoch": 1.7728435247616834, "grad_norm": 2.340985243306275, "learning_rate": 4.291350840571821e-06, "loss": 0.552, "step": 1270 }, { "epoch": 1.7742385491746107, "grad_norm": 2.311455400144604, "learning_rate": 4.283307601398026e-06, "loss": 0.5969, "step": 1271 }, { "epoch": 1.775633573587538, "grad_norm": 2.5089863412108016, "learning_rate": 4.275266255303238e-06, "loss": 0.5831, "step": 1272 }, { "epoch": 1.777028598000465, "grad_norm": 2.672360134839007, "learning_rate": 4.2672268235279616e-06, "loss": 0.5774, "step": 1273 }, { "epoch": 1.7784236224133922, "grad_norm": 2.2293033360373915, "learning_rate": 4.259189327307632e-06, "loss": 0.5897, "step": 1274 }, { "epoch": 1.7798186468263193, "grad_norm": 2.602966230519582, "learning_rate": 4.251153787872579e-06, "loss": 0.5843, "step": 1275 }, { "epoch": 1.7812136712392466, "grad_norm": 2.279642216410745, "learning_rate": 4.2431202264479665e-06, "loss": 0.5719, "step": 1276 }, { "epoch": 1.7826086956521738, "grad_norm": 2.5434728463544194, "learning_rate": 4.235088664253726e-06, "loss": 0.6239, "step": 1277 }, { "epoch": 1.784003720065101, "grad_norm": 2.6226135700938573, "learning_rate": 4.227059122504514e-06, "loss": 0.5896, "step": 1278 }, { "epoch": 1.7853987444780284, "grad_norm": 2.193671208212967, "learning_rate": 4.21903162240965e-06, "loss": 0.5374, "step": 1279 }, { "epoch": 1.7867937688909556, "grad_norm": 2.4133443786463173, "learning_rate": 4.211006185173056e-06, "loss": 0.6142, "step": 1280 }, { "epoch": 1.788188793303883, "grad_norm": 2.3891831018556746, "learning_rate": 4.20298283199321e-06, "loss": 0.5496, "step": 1281 }, { "epoch": 1.7895838177168102, "grad_norm": 2.495191272985779, "learning_rate": 4.1949615840630845e-06, "loss": 0.5908, "step": 1282 }, { "epoch": 1.7909788421297372, "grad_norm": 2.5766063766595746, "learning_rate": 4.186942462570087e-06, "loss": 0.6178, "step": 1283 }, { "epoch": 1.7923738665426645, "grad_norm": 2.551899482228534, "learning_rate": 4.178925488696012e-06, "loss": 0.5896, "step": 1284 }, { "epoch": 1.7937688909555918, "grad_norm": 2.4222576489735523, "learning_rate": 4.170910683616985e-06, "loss": 0.5697, "step": 1285 }, { "epoch": 1.7951639153685188, "grad_norm": 2.4294816894186244, "learning_rate": 4.1628980685033914e-06, "loss": 0.6203, "step": 1286 }, { "epoch": 1.796558939781446, "grad_norm": 2.461819180902959, "learning_rate": 4.154887664519842e-06, "loss": 0.6028, "step": 1287 }, { "epoch": 1.7979539641943734, "grad_norm": 2.4127818561530283, "learning_rate": 4.1468794928251064e-06, "loss": 0.5866, "step": 1288 }, { "epoch": 1.7993489886073006, "grad_norm": 2.455880254315481, "learning_rate": 4.138873574572053e-06, "loss": 0.5774, "step": 1289 }, { "epoch": 1.800744013020228, "grad_norm": 2.3922001685426766, "learning_rate": 4.130869930907599e-06, "loss": 0.5207, "step": 1290 }, { "epoch": 1.8021390374331552, "grad_norm": 2.1421249026637432, "learning_rate": 4.122868582972659e-06, "loss": 0.5809, "step": 1291 }, { "epoch": 1.8035340618460824, "grad_norm": 2.4069010885176985, "learning_rate": 4.114869551902075e-06, "loss": 0.5207, "step": 1292 }, { "epoch": 1.8049290862590095, "grad_norm": 2.3482221825958822, "learning_rate": 4.106872858824576e-06, "loss": 0.5729, "step": 1293 }, { "epoch": 1.8063241106719368, "grad_norm": 2.369748734633305, "learning_rate": 4.098878524862715e-06, "loss": 0.6243, "step": 1294 }, { "epoch": 1.807719135084864, "grad_norm": 2.611717258789472, "learning_rate": 4.090886571132807e-06, "loss": 0.6891, "step": 1295 }, { "epoch": 1.809114159497791, "grad_norm": 2.626761527096334, "learning_rate": 4.082897018744887e-06, "loss": 0.5976, "step": 1296 }, { "epoch": 1.8105091839107184, "grad_norm": 2.7566505445874796, "learning_rate": 4.074909888802648e-06, "loss": 0.6084, "step": 1297 }, { "epoch": 1.8119042083236456, "grad_norm": 2.0911505684928193, "learning_rate": 4.066925202403374e-06, "loss": 0.5396, "step": 1298 }, { "epoch": 1.813299232736573, "grad_norm": 2.3862111167297493, "learning_rate": 4.058942980637906e-06, "loss": 0.604, "step": 1299 }, { "epoch": 1.8146942571495002, "grad_norm": 2.3415026787372675, "learning_rate": 4.050963244590571e-06, "loss": 0.5513, "step": 1300 }, { "epoch": 1.8160892815624274, "grad_norm": 2.4903380561285604, "learning_rate": 4.042986015339126e-06, "loss": 0.5758, "step": 1301 }, { "epoch": 1.8174843059753547, "grad_norm": 2.307915629535402, "learning_rate": 4.035011313954713e-06, "loss": 0.5874, "step": 1302 }, { "epoch": 1.8188793303882818, "grad_norm": 2.476803671387306, "learning_rate": 4.027039161501795e-06, "loss": 0.5081, "step": 1303 }, { "epoch": 1.820274354801209, "grad_norm": 2.1862149210828647, "learning_rate": 4.019069579038096e-06, "loss": 0.5792, "step": 1304 }, { "epoch": 1.8216693792141363, "grad_norm": 2.200254955685143, "learning_rate": 4.011102587614563e-06, "loss": 0.5445, "step": 1305 }, { "epoch": 1.8230644036270633, "grad_norm": 2.550245715742735, "learning_rate": 4.00313820827529e-06, "loss": 0.618, "step": 1306 }, { "epoch": 1.8244594280399906, "grad_norm": 2.531233735139702, "learning_rate": 3.995176462057473e-06, "loss": 0.6428, "step": 1307 }, { "epoch": 1.8258544524529179, "grad_norm": 2.654997789715292, "learning_rate": 3.987217369991357e-06, "loss": 0.5684, "step": 1308 }, { "epoch": 1.8272494768658452, "grad_norm": 2.331740679934541, "learning_rate": 3.979260953100169e-06, "loss": 0.5724, "step": 1309 }, { "epoch": 1.8286445012787724, "grad_norm": 2.4051695039898546, "learning_rate": 3.97130723240008e-06, "loss": 0.5637, "step": 1310 }, { "epoch": 1.8300395256916997, "grad_norm": 2.7228635799614285, "learning_rate": 3.96335622890013e-06, "loss": 0.6573, "step": 1311 }, { "epoch": 1.831434550104627, "grad_norm": 1.9353078431595583, "learning_rate": 3.955407963602184e-06, "loss": 0.5788, "step": 1312 }, { "epoch": 1.832829574517554, "grad_norm": 2.316004343819745, "learning_rate": 3.94746245750088e-06, "loss": 0.5665, "step": 1313 }, { "epoch": 1.8342245989304813, "grad_norm": 2.5376783787299124, "learning_rate": 3.939519731583557e-06, "loss": 0.6118, "step": 1314 }, { "epoch": 1.8356196233434086, "grad_norm": 2.176344188186547, "learning_rate": 3.9315798068302214e-06, "loss": 0.5494, "step": 1315 }, { "epoch": 1.8370146477563356, "grad_norm": 2.1981846955673556, "learning_rate": 3.923642704213475e-06, "loss": 0.6032, "step": 1316 }, { "epoch": 1.8384096721692629, "grad_norm": 2.433565043584574, "learning_rate": 3.915708444698465e-06, "loss": 0.5987, "step": 1317 }, { "epoch": 1.8398046965821901, "grad_norm": 2.5049927584921, "learning_rate": 3.907777049242828e-06, "loss": 0.5723, "step": 1318 }, { "epoch": 1.8411997209951174, "grad_norm": 2.3765307463226564, "learning_rate": 3.899848538796643e-06, "loss": 0.5835, "step": 1319 }, { "epoch": 1.8425947454080447, "grad_norm": 2.323485514881854, "learning_rate": 3.891922934302356e-06, "loss": 0.6025, "step": 1320 }, { "epoch": 1.843989769820972, "grad_norm": 2.457060614268573, "learning_rate": 3.884000256694749e-06, "loss": 0.5851, "step": 1321 }, { "epoch": 1.8453847942338992, "grad_norm": 2.404063710116219, "learning_rate": 3.876080526900867e-06, "loss": 0.6344, "step": 1322 }, { "epoch": 1.8467798186468263, "grad_norm": 2.4423761005110713, "learning_rate": 3.868163765839966e-06, "loss": 0.5864, "step": 1323 }, { "epoch": 1.8481748430597535, "grad_norm": 2.3949292753394413, "learning_rate": 3.860249994423467e-06, "loss": 0.5808, "step": 1324 }, { "epoch": 1.8495698674726808, "grad_norm": 2.547744802112933, "learning_rate": 3.852339233554891e-06, "loss": 0.5584, "step": 1325 }, { "epoch": 1.8509648918856079, "grad_norm": 2.4834153020535936, "learning_rate": 3.844431504129804e-06, "loss": 0.6263, "step": 1326 }, { "epoch": 1.8523599162985351, "grad_norm": 2.476959740856088, "learning_rate": 3.8365268270357715e-06, "loss": 0.6004, "step": 1327 }, { "epoch": 1.8537549407114624, "grad_norm": 2.2822538837305064, "learning_rate": 3.828625223152291e-06, "loss": 0.5715, "step": 1328 }, { "epoch": 1.8551499651243897, "grad_norm": 2.547907379402697, "learning_rate": 3.820726713350742e-06, "loss": 0.5691, "step": 1329 }, { "epoch": 1.856544989537317, "grad_norm": 2.5112331176698173, "learning_rate": 3.812831318494335e-06, "loss": 0.587, "step": 1330 }, { "epoch": 1.8579400139502442, "grad_norm": 2.4285431524772387, "learning_rate": 3.804939059438052e-06, "loss": 0.5873, "step": 1331 }, { "epoch": 1.8593350383631715, "grad_norm": 2.474215348053435, "learning_rate": 3.797049957028588e-06, "loss": 0.5805, "step": 1332 }, { "epoch": 1.8607300627760985, "grad_norm": 2.458561063328716, "learning_rate": 3.7891640321043054e-06, "loss": 0.5278, "step": 1333 }, { "epoch": 1.8621250871890258, "grad_norm": 2.61124586493259, "learning_rate": 3.781281305495171e-06, "loss": 0.5352, "step": 1334 }, { "epoch": 1.863520111601953, "grad_norm": 2.2258120089192936, "learning_rate": 3.773401798022701e-06, "loss": 0.5519, "step": 1335 }, { "epoch": 1.8649151360148801, "grad_norm": 2.573674579335456, "learning_rate": 3.765525530499915e-06, "loss": 0.5675, "step": 1336 }, { "epoch": 1.8663101604278074, "grad_norm": 2.444142877525606, "learning_rate": 3.757652523731269e-06, "loss": 0.539, "step": 1337 }, { "epoch": 1.8677051848407347, "grad_norm": 2.3479096570637474, "learning_rate": 3.7497827985126054e-06, "loss": 0.5834, "step": 1338 }, { "epoch": 1.869100209253662, "grad_norm": 2.391238646279421, "learning_rate": 3.741916375631105e-06, "loss": 0.5424, "step": 1339 }, { "epoch": 1.8704952336665892, "grad_norm": 2.6117557071144293, "learning_rate": 3.7340532758652217e-06, "loss": 0.6332, "step": 1340 }, { "epoch": 1.8718902580795165, "grad_norm": 2.3773105416779585, "learning_rate": 3.7261935199846266e-06, "loss": 0.5607, "step": 1341 }, { "epoch": 1.8732852824924437, "grad_norm": 2.5748934797508367, "learning_rate": 3.7183371287501684e-06, "loss": 0.5698, "step": 1342 }, { "epoch": 1.8746803069053708, "grad_norm": 2.2124553232048507, "learning_rate": 3.7104841229138034e-06, "loss": 0.5842, "step": 1343 }, { "epoch": 1.876075331318298, "grad_norm": 2.5445624998871224, "learning_rate": 3.7026345232185416e-06, "loss": 0.5461, "step": 1344 }, { "epoch": 1.8774703557312253, "grad_norm": 2.292197245788284, "learning_rate": 3.6947883503984037e-06, "loss": 0.5464, "step": 1345 }, { "epoch": 1.8788653801441524, "grad_norm": 2.439180732430635, "learning_rate": 3.686945625178356e-06, "loss": 0.5607, "step": 1346 }, { "epoch": 1.8802604045570797, "grad_norm": 2.5085267187457085, "learning_rate": 3.6791063682742535e-06, "loss": 0.5991, "step": 1347 }, { "epoch": 1.881655428970007, "grad_norm": 2.3769709372484553, "learning_rate": 3.6712706003927937e-06, "loss": 0.5622, "step": 1348 }, { "epoch": 1.8830504533829342, "grad_norm": 2.31434064502011, "learning_rate": 3.6634383422314622e-06, "loss": 0.5601, "step": 1349 }, { "epoch": 1.8844454777958615, "grad_norm": 2.4663077476409865, "learning_rate": 3.655609614478467e-06, "loss": 0.6358, "step": 1350 }, { "epoch": 1.8858405022087887, "grad_norm": 2.599416231845492, "learning_rate": 3.647784437812693e-06, "loss": 0.5945, "step": 1351 }, { "epoch": 1.887235526621716, "grad_norm": 2.4806490095213736, "learning_rate": 3.6399628329036496e-06, "loss": 0.6242, "step": 1352 }, { "epoch": 1.888630551034643, "grad_norm": 2.477850138672849, "learning_rate": 3.632144820411405e-06, "loss": 0.5293, "step": 1353 }, { "epoch": 1.8900255754475703, "grad_norm": 2.2176520800215123, "learning_rate": 3.624330420986541e-06, "loss": 0.5241, "step": 1354 }, { "epoch": 1.8914205998604976, "grad_norm": 2.2974977192391046, "learning_rate": 3.6165196552701e-06, "loss": 0.5695, "step": 1355 }, { "epoch": 1.8928156242734246, "grad_norm": 2.27082611324128, "learning_rate": 3.6087125438935187e-06, "loss": 0.5723, "step": 1356 }, { "epoch": 1.894210648686352, "grad_norm": 2.3967696700286187, "learning_rate": 3.6009091074785853e-06, "loss": 0.5595, "step": 1357 }, { "epoch": 1.8956056730992792, "grad_norm": 2.4463247460483246, "learning_rate": 3.5931093666373845e-06, "loss": 0.6032, "step": 1358 }, { "epoch": 1.8970006975122065, "grad_norm": 2.3916992600442555, "learning_rate": 3.585313341972232e-06, "loss": 0.5201, "step": 1359 }, { "epoch": 1.8983957219251337, "grad_norm": 2.394957682541409, "learning_rate": 3.577521054075631e-06, "loss": 0.548, "step": 1360 }, { "epoch": 1.899790746338061, "grad_norm": 2.5560245870377205, "learning_rate": 3.5697325235302183e-06, "loss": 0.5367, "step": 1361 }, { "epoch": 1.9011857707509883, "grad_norm": 2.420954511211148, "learning_rate": 3.5619477709086982e-06, "loss": 0.5361, "step": 1362 }, { "epoch": 1.9025807951639153, "grad_norm": 2.509041174402962, "learning_rate": 3.5541668167738003e-06, "loss": 0.6669, "step": 1363 }, { "epoch": 1.9039758195768426, "grad_norm": 2.6578984012433113, "learning_rate": 3.546389681678224e-06, "loss": 0.5576, "step": 1364 }, { "epoch": 1.9053708439897699, "grad_norm": 2.2953801230408915, "learning_rate": 3.538616386164575e-06, "loss": 0.5571, "step": 1365 }, { "epoch": 1.906765868402697, "grad_norm": 2.44565460739909, "learning_rate": 3.530846950765318e-06, "loss": 0.5998, "step": 1366 }, { "epoch": 1.9081608928156242, "grad_norm": 2.4773338970237844, "learning_rate": 3.5230813960027275e-06, "loss": 0.592, "step": 1367 }, { "epoch": 1.9095559172285514, "grad_norm": 2.6529708989496363, "learning_rate": 3.5153197423888206e-06, "loss": 0.5937, "step": 1368 }, { "epoch": 1.9109509416414787, "grad_norm": 2.302983246543046, "learning_rate": 3.5075620104253123e-06, "loss": 0.536, "step": 1369 }, { "epoch": 1.912345966054406, "grad_norm": 2.4003891152155616, "learning_rate": 3.4998082206035606e-06, "loss": 0.5649, "step": 1370 }, { "epoch": 1.9137409904673333, "grad_norm": 2.2843163631729393, "learning_rate": 3.492058393404509e-06, "loss": 0.6197, "step": 1371 }, { "epoch": 1.9151360148802605, "grad_norm": 2.452437695636041, "learning_rate": 3.4843125492986345e-06, "loss": 0.5522, "step": 1372 }, { "epoch": 1.9165310392931876, "grad_norm": 2.2973233273270948, "learning_rate": 3.4765707087458912e-06, "loss": 0.6033, "step": 1373 }, { "epoch": 1.9179260637061148, "grad_norm": 2.464616612951487, "learning_rate": 3.468832892195664e-06, "loss": 0.6113, "step": 1374 }, { "epoch": 1.9193210881190421, "grad_norm": 2.4354839974590052, "learning_rate": 3.4610991200867006e-06, "loss": 0.5785, "step": 1375 }, { "epoch": 1.9207161125319692, "grad_norm": 2.3155774551311854, "learning_rate": 3.453369412847071e-06, "loss": 0.5086, "step": 1376 }, { "epoch": 1.9221111369448964, "grad_norm": 2.3690068499310883, "learning_rate": 3.445643790894109e-06, "loss": 0.54, "step": 1377 }, { "epoch": 1.9235061613578237, "grad_norm": 2.4287304982026545, "learning_rate": 3.4379222746343534e-06, "loss": 0.5838, "step": 1378 }, { "epoch": 1.924901185770751, "grad_norm": 2.4729059197566725, "learning_rate": 3.4302048844634995e-06, "loss": 0.5771, "step": 1379 }, { "epoch": 1.9262962101836782, "grad_norm": 2.2782611372607646, "learning_rate": 3.4224916407663484e-06, "loss": 0.5246, "step": 1380 }, { "epoch": 1.9276912345966055, "grad_norm": 2.371129482557867, "learning_rate": 3.414782563916742e-06, "loss": 0.61, "step": 1381 }, { "epoch": 1.9290862590095328, "grad_norm": 2.539776568836371, "learning_rate": 3.407077674277518e-06, "loss": 0.5726, "step": 1382 }, { "epoch": 1.93048128342246, "grad_norm": 2.5220848663873774, "learning_rate": 3.3993769922004584e-06, "loss": 0.6462, "step": 1383 }, { "epoch": 1.931876307835387, "grad_norm": 2.6516138414633543, "learning_rate": 3.391680538026224e-06, "loss": 0.5452, "step": 1384 }, { "epoch": 1.9332713322483144, "grad_norm": 2.427947010175842, "learning_rate": 3.3839883320843125e-06, "loss": 0.5714, "step": 1385 }, { "epoch": 1.9346663566612414, "grad_norm": 2.295195949355666, "learning_rate": 3.3763003946930023e-06, "loss": 0.5685, "step": 1386 }, { "epoch": 1.9360613810741687, "grad_norm": 2.423858581032542, "learning_rate": 3.36861674615929e-06, "loss": 0.5684, "step": 1387 }, { "epoch": 1.937456405487096, "grad_norm": 2.552124758674149, "learning_rate": 3.360937406778849e-06, "loss": 0.6081, "step": 1388 }, { "epoch": 1.9388514299000232, "grad_norm": 2.410802484024832, "learning_rate": 3.35326239683597e-06, "loss": 0.5349, "step": 1389 }, { "epoch": 1.9402464543129505, "grad_norm": 2.4699200492200957, "learning_rate": 3.3455917366035058e-06, "loss": 0.5793, "step": 1390 }, { "epoch": 1.9416414787258778, "grad_norm": 2.42697172037929, "learning_rate": 3.337925446342819e-06, "loss": 0.5459, "step": 1391 }, { "epoch": 1.943036503138805, "grad_norm": 2.1599493334100375, "learning_rate": 3.3302635463037352e-06, "loss": 0.4917, "step": 1392 }, { "epoch": 1.9444315275517323, "grad_norm": 2.4699911880029695, "learning_rate": 3.3226060567244767e-06, "loss": 0.5703, "step": 1393 }, { "epoch": 1.9458265519646594, "grad_norm": 2.358107224318486, "learning_rate": 3.314952997831618e-06, "loss": 0.5217, "step": 1394 }, { "epoch": 1.9472215763775866, "grad_norm": 2.081823849180352, "learning_rate": 3.307304389840036e-06, "loss": 0.5441, "step": 1395 }, { "epoch": 1.9486166007905137, "grad_norm": 2.3011841178781505, "learning_rate": 3.29966025295284e-06, "loss": 0.5523, "step": 1396 }, { "epoch": 1.950011625203441, "grad_norm": 2.4495679580518264, "learning_rate": 3.292020607361337e-06, "loss": 0.6918, "step": 1397 }, { "epoch": 1.9514066496163682, "grad_norm": 2.4603351352332057, "learning_rate": 3.284385473244974e-06, "loss": 0.5463, "step": 1398 }, { "epoch": 1.9528016740292955, "grad_norm": 2.4633595440493545, "learning_rate": 3.2767548707712693e-06, "loss": 0.591, "step": 1399 }, { "epoch": 1.9541966984422228, "grad_norm": 2.514155529883468, "learning_rate": 3.2691288200957826e-06, "loss": 0.6064, "step": 1400 }, { "epoch": 1.95559172285515, "grad_norm": 2.6433064258018137, "learning_rate": 3.2615073413620467e-06, "loss": 0.6019, "step": 1401 }, { "epoch": 1.9569867472680773, "grad_norm": 2.476462865436784, "learning_rate": 3.2538904547015137e-06, "loss": 0.6206, "step": 1402 }, { "epoch": 1.9583817716810046, "grad_norm": 2.427639957084644, "learning_rate": 3.2462781802335124e-06, "loss": 0.5966, "step": 1403 }, { "epoch": 1.9597767960939316, "grad_norm": 2.399446464350375, "learning_rate": 3.2386705380651877e-06, "loss": 0.5765, "step": 1404 }, { "epoch": 1.961171820506859, "grad_norm": 2.2545518564960587, "learning_rate": 3.2310675482914444e-06, "loss": 0.5959, "step": 1405 }, { "epoch": 1.962566844919786, "grad_norm": 2.502162766396442, "learning_rate": 3.2234692309949034e-06, "loss": 0.579, "step": 1406 }, { "epoch": 1.9639618693327132, "grad_norm": 2.3796305945442766, "learning_rate": 3.2158756062458422e-06, "loss": 0.5782, "step": 1407 }, { "epoch": 1.9653568937456405, "grad_norm": 2.3649376487115434, "learning_rate": 3.208286694102141e-06, "loss": 0.6307, "step": 1408 }, { "epoch": 1.9667519181585678, "grad_norm": 2.5886144876157293, "learning_rate": 3.2007025146092345e-06, "loss": 0.5541, "step": 1409 }, { "epoch": 1.968146942571495, "grad_norm": 2.1415801894250537, "learning_rate": 3.1931230878000586e-06, "loss": 0.5597, "step": 1410 }, { "epoch": 1.9695419669844223, "grad_norm": 2.396910676349146, "learning_rate": 3.1855484336949876e-06, "loss": 0.5639, "step": 1411 }, { "epoch": 1.9709369913973496, "grad_norm": 2.230763393247396, "learning_rate": 3.1779785723017988e-06, "loss": 0.6035, "step": 1412 }, { "epoch": 1.9723320158102768, "grad_norm": 2.392234933401445, "learning_rate": 3.170413523615605e-06, "loss": 0.5561, "step": 1413 }, { "epoch": 1.9737270402232039, "grad_norm": 2.4754979656680156, "learning_rate": 3.162853307618805e-06, "loss": 0.53, "step": 1414 }, { "epoch": 1.9751220646361312, "grad_norm": 2.2102784376798956, "learning_rate": 3.155297944281036e-06, "loss": 0.5648, "step": 1415 }, { "epoch": 1.9765170890490582, "grad_norm": 2.542609226538574, "learning_rate": 3.1477474535591167e-06, "loss": 0.5813, "step": 1416 }, { "epoch": 1.9779121134619855, "grad_norm": 2.4403359009661543, "learning_rate": 3.1402018553969917e-06, "loss": 0.6038, "step": 1417 }, { "epoch": 1.9793071378749127, "grad_norm": 2.320567534117926, "learning_rate": 3.132661169725688e-06, "loss": 0.5734, "step": 1418 }, { "epoch": 1.98070216228784, "grad_norm": 2.2119913026007376, "learning_rate": 3.125125416463254e-06, "loss": 0.5305, "step": 1419 }, { "epoch": 1.9820971867007673, "grad_norm": 2.4242151125059546, "learning_rate": 3.1175946155147064e-06, "loss": 0.578, "step": 1420 }, { "epoch": 1.9834922111136946, "grad_norm": 2.54729641210588, "learning_rate": 3.110068786771987e-06, "loss": 0.5817, "step": 1421 }, { "epoch": 1.9848872355266218, "grad_norm": 1.9244618605053603, "learning_rate": 3.1025479501139e-06, "loss": 0.5958, "step": 1422 }, { "epoch": 1.986282259939549, "grad_norm": 2.525675526559715, "learning_rate": 3.095032125406062e-06, "loss": 0.5444, "step": 1423 }, { "epoch": 1.9876772843524761, "grad_norm": 2.4747533250757283, "learning_rate": 3.0875213325008548e-06, "loss": 0.6296, "step": 1424 }, { "epoch": 1.9890723087654034, "grad_norm": 2.580628395908184, "learning_rate": 3.0800155912373696e-06, "loss": 0.5731, "step": 1425 }, { "epoch": 1.9904673331783305, "grad_norm": 2.5501404904105347, "learning_rate": 3.0725149214413487e-06, "loss": 0.5297, "step": 1426 }, { "epoch": 1.9918623575912577, "grad_norm": 2.324724016420583, "learning_rate": 3.065019342925143e-06, "loss": 0.5835, "step": 1427 }, { "epoch": 1.993257382004185, "grad_norm": 2.604270750619532, "learning_rate": 3.0575288754876565e-06, "loss": 0.5894, "step": 1428 }, { "epoch": 1.9946524064171123, "grad_norm": 2.2036597831728337, "learning_rate": 3.0500435389142867e-06, "loss": 0.5328, "step": 1429 }, { "epoch": 1.9960474308300395, "grad_norm": 2.600976919765329, "learning_rate": 3.042563352976884e-06, "loss": 0.5562, "step": 1430 }, { "epoch": 1.9974424552429668, "grad_norm": 2.497008237706695, "learning_rate": 3.035088337433694e-06, "loss": 0.6124, "step": 1431 }, { "epoch": 1.998837479655894, "grad_norm": 2.5882984852008666, "learning_rate": 3.0276185120292996e-06, "loss": 0.5979, "step": 1432 }, { "epoch": 2.0013950244129273, "grad_norm": 2.5214254592959224, "learning_rate": 3.0201538964945787e-06, "loss": 0.9592, "step": 1433 }, { "epoch": 2.0027900488258545, "grad_norm": 2.3242323613938956, "learning_rate": 3.0126945105466486e-06, "loss": 0.3727, "step": 1434 }, { "epoch": 2.004185073238782, "grad_norm": 2.190519265268296, "learning_rate": 3.005240373888812e-06, "loss": 0.4247, "step": 1435 }, { "epoch": 2.005580097651709, "grad_norm": 2.3219158314168573, "learning_rate": 2.9977915062105023e-06, "loss": 0.3726, "step": 1436 }, { "epoch": 2.0069751220646364, "grad_norm": 2.1431280290898904, "learning_rate": 2.9903479271872416e-06, "loss": 0.3639, "step": 1437 }, { "epoch": 2.008370146477563, "grad_norm": 2.3623291412185683, "learning_rate": 2.9829096564805804e-06, "loss": 0.3749, "step": 1438 }, { "epoch": 2.0097651708904904, "grad_norm": 2.1855499052774823, "learning_rate": 2.975476713738043e-06, "loss": 0.3742, "step": 1439 }, { "epoch": 2.0111601953034177, "grad_norm": 2.0858177666906115, "learning_rate": 2.9680491185930877e-06, "loss": 0.3814, "step": 1440 }, { "epoch": 2.012555219716345, "grad_norm": 2.2588712346494204, "learning_rate": 2.960626890665044e-06, "loss": 0.3697, "step": 1441 }, { "epoch": 2.0139502441292723, "grad_norm": 2.170807844500238, "learning_rate": 2.953210049559062e-06, "loss": 0.3538, "step": 1442 }, { "epoch": 2.0153452685421995, "grad_norm": 2.5423059774453245, "learning_rate": 2.945798614866068e-06, "loss": 0.4175, "step": 1443 }, { "epoch": 2.016740292955127, "grad_norm": 2.5158546286380057, "learning_rate": 2.9383926061627055e-06, "loss": 0.3804, "step": 1444 }, { "epoch": 2.018135317368054, "grad_norm": 2.6703341812592725, "learning_rate": 2.9309920430112825e-06, "loss": 0.4089, "step": 1445 }, { "epoch": 2.0195303417809813, "grad_norm": 2.599284947461446, "learning_rate": 2.92359694495973e-06, "loss": 0.3844, "step": 1446 }, { "epoch": 2.0209253661939086, "grad_norm": 2.385463387472501, "learning_rate": 2.9162073315415384e-06, "loss": 0.3838, "step": 1447 }, { "epoch": 2.0223203906068354, "grad_norm": 2.8267466863405213, "learning_rate": 2.9088232222757085e-06, "loss": 0.3765, "step": 1448 }, { "epoch": 2.0237154150197627, "grad_norm": 2.4998301846038142, "learning_rate": 2.9014446366667115e-06, "loss": 0.3976, "step": 1449 }, { "epoch": 2.02511043943269, "grad_norm": 2.679989278366601, "learning_rate": 2.8940715942044204e-06, "loss": 0.3863, "step": 1450 }, { "epoch": 2.0265054638456172, "grad_norm": 2.6992825087748957, "learning_rate": 2.8867041143640663e-06, "loss": 0.3672, "step": 1451 }, { "epoch": 2.0279004882585445, "grad_norm": 2.705813618377396, "learning_rate": 2.8793422166061918e-06, "loss": 0.4115, "step": 1452 }, { "epoch": 2.029295512671472, "grad_norm": 2.7331587495808516, "learning_rate": 2.8719859203765955e-06, "loss": 0.3534, "step": 1453 }, { "epoch": 2.030690537084399, "grad_norm": 2.7429708891278706, "learning_rate": 2.864635245106272e-06, "loss": 0.384, "step": 1454 }, { "epoch": 2.0320855614973263, "grad_norm": 2.567419886051019, "learning_rate": 2.8572902102113788e-06, "loss": 0.3627, "step": 1455 }, { "epoch": 2.0334805859102536, "grad_norm": 2.7119425415478022, "learning_rate": 2.849950835093168e-06, "loss": 0.3774, "step": 1456 }, { "epoch": 2.034875610323181, "grad_norm": 2.324398514703736, "learning_rate": 2.8426171391379433e-06, "loss": 0.3752, "step": 1457 }, { "epoch": 2.0362706347361077, "grad_norm": 2.2283421291671295, "learning_rate": 2.835289141717008e-06, "loss": 0.3573, "step": 1458 }, { "epoch": 2.037665659149035, "grad_norm": 2.6230366556881846, "learning_rate": 2.827966862186616e-06, "loss": 0.3765, "step": 1459 }, { "epoch": 2.0390606835619622, "grad_norm": 2.526326153911664, "learning_rate": 2.820650319887911e-06, "loss": 0.3761, "step": 1460 }, { "epoch": 2.0404557079748895, "grad_norm": 2.554070884489334, "learning_rate": 2.8133395341468915e-06, "loss": 0.3801, "step": 1461 }, { "epoch": 2.041850732387817, "grad_norm": 2.6278494667294545, "learning_rate": 2.8060345242743427e-06, "loss": 0.3556, "step": 1462 }, { "epoch": 2.043245756800744, "grad_norm": 2.333880470286764, "learning_rate": 2.7987353095657944e-06, "loss": 0.4165, "step": 1463 }, { "epoch": 2.0446407812136713, "grad_norm": 2.7181615581035836, "learning_rate": 2.7914419093014734e-06, "loss": 0.3881, "step": 1464 }, { "epoch": 2.0460358056265986, "grad_norm": 2.497765508335459, "learning_rate": 2.784154342746246e-06, "loss": 0.3654, "step": 1465 }, { "epoch": 2.047430830039526, "grad_norm": 2.725771213333872, "learning_rate": 2.7768726291495667e-06, "loss": 0.3872, "step": 1466 }, { "epoch": 2.048825854452453, "grad_norm": 2.670409440110483, "learning_rate": 2.7695967877454356e-06, "loss": 0.3984, "step": 1467 }, { "epoch": 2.05022087886538, "grad_norm": 2.581812362345877, "learning_rate": 2.7623268377523356e-06, "loss": 0.3305, "step": 1468 }, { "epoch": 2.0516159032783072, "grad_norm": 2.480694647002634, "learning_rate": 2.755062798373189e-06, "loss": 0.3994, "step": 1469 }, { "epoch": 2.0530109276912345, "grad_norm": 2.7491899914731333, "learning_rate": 2.747804688795311e-06, "loss": 0.3924, "step": 1470 }, { "epoch": 2.0544059521041618, "grad_norm": 2.575052554412191, "learning_rate": 2.7405525281903506e-06, "loss": 0.35, "step": 1471 }, { "epoch": 2.055800976517089, "grad_norm": 2.59506066381637, "learning_rate": 2.7333063357142414e-06, "loss": 0.373, "step": 1472 }, { "epoch": 2.0571960009300163, "grad_norm": 2.3480036780956826, "learning_rate": 2.7260661305071523e-06, "loss": 0.3514, "step": 1473 }, { "epoch": 2.0585910253429436, "grad_norm": 2.5530929259527357, "learning_rate": 2.718831931693443e-06, "loss": 0.3834, "step": 1474 }, { "epoch": 2.059986049755871, "grad_norm": 2.4654938844634016, "learning_rate": 2.7116037583816e-06, "loss": 0.3841, "step": 1475 }, { "epoch": 2.061381074168798, "grad_norm": 2.724119129361741, "learning_rate": 2.7043816296642005e-06, "loss": 0.3473, "step": 1476 }, { "epoch": 2.0627760985817254, "grad_norm": 2.6260922512615137, "learning_rate": 2.6971655646178544e-06, "loss": 0.3845, "step": 1477 }, { "epoch": 2.064171122994652, "grad_norm": 2.2473447213318587, "learning_rate": 2.689955582303152e-06, "loss": 0.3422, "step": 1478 }, { "epoch": 2.0655661474075795, "grad_norm": 2.319530981350255, "learning_rate": 2.6827517017646154e-06, "loss": 0.3576, "step": 1479 }, { "epoch": 2.0669611718205068, "grad_norm": 2.726860310897311, "learning_rate": 2.6755539420306565e-06, "loss": 0.3772, "step": 1480 }, { "epoch": 2.068356196233434, "grad_norm": 2.2429497479293543, "learning_rate": 2.668362322113512e-06, "loss": 0.3649, "step": 1481 }, { "epoch": 2.0697512206463613, "grad_norm": 2.378148861802425, "learning_rate": 2.661176861009205e-06, "loss": 0.3794, "step": 1482 }, { "epoch": 2.0711462450592886, "grad_norm": 2.70154385791087, "learning_rate": 2.6539975776974926e-06, "loss": 0.3587, "step": 1483 }, { "epoch": 2.072541269472216, "grad_norm": 2.6434233460434355, "learning_rate": 2.646824491141807e-06, "loss": 0.3573, "step": 1484 }, { "epoch": 2.073936293885143, "grad_norm": 2.451534451495633, "learning_rate": 2.6396576202892176e-06, "loss": 0.3736, "step": 1485 }, { "epoch": 2.0753313182980704, "grad_norm": 2.352322755516322, "learning_rate": 2.632496984070375e-06, "loss": 0.3509, "step": 1486 }, { "epoch": 2.0767263427109977, "grad_norm": 2.5988475024935145, "learning_rate": 2.6253426013994586e-06, "loss": 0.3696, "step": 1487 }, { "epoch": 2.0781213671239245, "grad_norm": 2.6378162951797473, "learning_rate": 2.6181944911741333e-06, "loss": 0.3785, "step": 1488 }, { "epoch": 2.0795163915368517, "grad_norm": 2.6018188335164862, "learning_rate": 2.6110526722754955e-06, "loss": 0.3963, "step": 1489 }, { "epoch": 2.080911415949779, "grad_norm": 2.7160197328732107, "learning_rate": 2.603917163568021e-06, "loss": 0.3963, "step": 1490 }, { "epoch": 2.0823064403627063, "grad_norm": 2.3442592598785255, "learning_rate": 2.5967879838995176e-06, "loss": 0.373, "step": 1491 }, { "epoch": 2.0837014647756336, "grad_norm": 2.661644652387228, "learning_rate": 2.589665152101081e-06, "loss": 0.3752, "step": 1492 }, { "epoch": 2.085096489188561, "grad_norm": 2.505540042893153, "learning_rate": 2.582548686987031e-06, "loss": 0.3708, "step": 1493 }, { "epoch": 2.086491513601488, "grad_norm": 2.597438401894174, "learning_rate": 2.5754386073548775e-06, "loss": 0.36, "step": 1494 }, { "epoch": 2.0878865380144154, "grad_norm": 2.4745024525570676, "learning_rate": 2.5683349319852647e-06, "loss": 0.3535, "step": 1495 }, { "epoch": 2.0892815624273426, "grad_norm": 2.4138827198001223, "learning_rate": 2.5612376796419126e-06, "loss": 0.3845, "step": 1496 }, { "epoch": 2.09067658684027, "grad_norm": 2.7625272767075573, "learning_rate": 2.5541468690715797e-06, "loss": 0.4151, "step": 1497 }, { "epoch": 2.0920716112531967, "grad_norm": 2.660477032207717, "learning_rate": 2.5470625190040105e-06, "loss": 0.3779, "step": 1498 }, { "epoch": 2.093466635666124, "grad_norm": 2.762539404217291, "learning_rate": 2.5399846481518857e-06, "loss": 0.3895, "step": 1499 }, { "epoch": 2.0948616600790513, "grad_norm": 2.4357291257550124, "learning_rate": 2.5329132752107675e-06, "loss": 0.4121, "step": 1500 }, { "epoch": 2.0962566844919786, "grad_norm": 2.6511341339428056, "learning_rate": 2.525848418859055e-06, "loss": 0.3746, "step": 1501 }, { "epoch": 2.097651708904906, "grad_norm": 2.2989369671194506, "learning_rate": 2.518790097757938e-06, "loss": 0.3633, "step": 1502 }, { "epoch": 2.099046733317833, "grad_norm": 2.521936267727281, "learning_rate": 2.51173833055134e-06, "loss": 0.3521, "step": 1503 }, { "epoch": 2.1004417577307604, "grad_norm": 2.634801238533228, "learning_rate": 2.504693135865875e-06, "loss": 0.3908, "step": 1504 }, { "epoch": 2.1018367821436876, "grad_norm": 2.427438334120426, "learning_rate": 2.497654532310799e-06, "loss": 0.3488, "step": 1505 }, { "epoch": 2.103231806556615, "grad_norm": 2.4053065632801522, "learning_rate": 2.490622538477952e-06, "loss": 0.3678, "step": 1506 }, { "epoch": 2.104626830969542, "grad_norm": 2.7257724790634725, "learning_rate": 2.483597172941718e-06, "loss": 0.3597, "step": 1507 }, { "epoch": 2.106021855382469, "grad_norm": 2.7067736530976867, "learning_rate": 2.4765784542589754e-06, "loss": 0.3942, "step": 1508 }, { "epoch": 2.1074168797953963, "grad_norm": 2.7056974647390057, "learning_rate": 2.46956640096904e-06, "loss": 0.3423, "step": 1509 }, { "epoch": 2.1088119042083235, "grad_norm": 2.512580679693648, "learning_rate": 2.4625610315936267e-06, "loss": 0.3902, "step": 1510 }, { "epoch": 2.110206928621251, "grad_norm": 2.6120205213000145, "learning_rate": 2.4555623646367952e-06, "loss": 0.4171, "step": 1511 }, { "epoch": 2.111601953034178, "grad_norm": 2.57427830161093, "learning_rate": 2.448570418584898e-06, "loss": 0.3968, "step": 1512 }, { "epoch": 2.1129969774471054, "grad_norm": 2.4580718340674097, "learning_rate": 2.4415852119065343e-06, "loss": 0.3736, "step": 1513 }, { "epoch": 2.1143920018600326, "grad_norm": 2.5268872843301122, "learning_rate": 2.4346067630525084e-06, "loss": 0.3713, "step": 1514 }, { "epoch": 2.11578702627296, "grad_norm": 2.58637036082415, "learning_rate": 2.427635090455766e-06, "loss": 0.3539, "step": 1515 }, { "epoch": 2.117182050685887, "grad_norm": 2.6866751237965296, "learning_rate": 2.42067021253136e-06, "loss": 0.37, "step": 1516 }, { "epoch": 2.1185770750988144, "grad_norm": 2.440328422865511, "learning_rate": 2.4137121476763965e-06, "loss": 0.3441, "step": 1517 }, { "epoch": 2.1199720995117413, "grad_norm": 2.549914156912127, "learning_rate": 2.4067609142699798e-06, "loss": 0.3472, "step": 1518 }, { "epoch": 2.1213671239246685, "grad_norm": 2.5988535940531867, "learning_rate": 2.3998165306731713e-06, "loss": 0.376, "step": 1519 }, { "epoch": 2.122762148337596, "grad_norm": 2.6070060981923664, "learning_rate": 2.3928790152289443e-06, "loss": 0.3351, "step": 1520 }, { "epoch": 2.124157172750523, "grad_norm": 2.568263848437461, "learning_rate": 2.385948386262123e-06, "loss": 0.379, "step": 1521 }, { "epoch": 2.1255521971634503, "grad_norm": 2.7356099856129052, "learning_rate": 2.3790246620793466e-06, "loss": 0.3802, "step": 1522 }, { "epoch": 2.1269472215763776, "grad_norm": 2.726315474435242, "learning_rate": 2.372107860969019e-06, "loss": 0.3533, "step": 1523 }, { "epoch": 2.128342245989305, "grad_norm": 2.510361159689518, "learning_rate": 2.3651980012012454e-06, "loss": 0.3379, "step": 1524 }, { "epoch": 2.129737270402232, "grad_norm": 2.4508695584388143, "learning_rate": 2.358295101027807e-06, "loss": 0.3394, "step": 1525 }, { "epoch": 2.1311322948151594, "grad_norm": 2.3672692063899223, "learning_rate": 2.351399178682101e-06, "loss": 0.3439, "step": 1526 }, { "epoch": 2.1325273192280867, "grad_norm": 2.7239258697590167, "learning_rate": 2.3445102523790876e-06, "loss": 0.345, "step": 1527 }, { "epoch": 2.1339223436410135, "grad_norm": 2.4560026869731386, "learning_rate": 2.3376283403152527e-06, "loss": 0.3501, "step": 1528 }, { "epoch": 2.135317368053941, "grad_norm": 2.5486507622785326, "learning_rate": 2.330753460668553e-06, "loss": 0.355, "step": 1529 }, { "epoch": 2.136712392466868, "grad_norm": 2.43913870561611, "learning_rate": 2.323885631598366e-06, "loss": 0.3926, "step": 1530 }, { "epoch": 2.1381074168797953, "grad_norm": 2.497894602939468, "learning_rate": 2.3170248712454525e-06, "loss": 0.3781, "step": 1531 }, { "epoch": 2.1395024412927226, "grad_norm": 2.5723470929814702, "learning_rate": 2.3101711977318995e-06, "loss": 0.4079, "step": 1532 }, { "epoch": 2.14089746570565, "grad_norm": 2.776452262159786, "learning_rate": 2.3033246291610717e-06, "loss": 0.3701, "step": 1533 }, { "epoch": 2.142292490118577, "grad_norm": 2.582363452032768, "learning_rate": 2.2964851836175705e-06, "loss": 0.3803, "step": 1534 }, { "epoch": 2.1436875145315044, "grad_norm": 2.6980623592928934, "learning_rate": 2.2896528791671807e-06, "loss": 0.3682, "step": 1535 }, { "epoch": 2.1450825389444317, "grad_norm": 2.5567811091282175, "learning_rate": 2.2828277338568226e-06, "loss": 0.3405, "step": 1536 }, { "epoch": 2.146477563357359, "grad_norm": 2.5896917096125947, "learning_rate": 2.2760097657145096e-06, "loss": 0.3684, "step": 1537 }, { "epoch": 2.147872587770286, "grad_norm": 2.4822446486898877, "learning_rate": 2.2691989927492984e-06, "loss": 0.3813, "step": 1538 }, { "epoch": 2.149267612183213, "grad_norm": 2.7213079715818043, "learning_rate": 2.262395432951235e-06, "loss": 0.3976, "step": 1539 }, { "epoch": 2.1506626365961403, "grad_norm": 2.6150187171344594, "learning_rate": 2.2555991042913177e-06, "loss": 0.3712, "step": 1540 }, { "epoch": 2.1520576610090676, "grad_norm": 2.6855506018110256, "learning_rate": 2.248810024721441e-06, "loss": 0.3485, "step": 1541 }, { "epoch": 2.153452685421995, "grad_norm": 2.5672329835298915, "learning_rate": 2.2420282121743513e-06, "loss": 0.3561, "step": 1542 }, { "epoch": 2.154847709834922, "grad_norm": 2.5179794237548925, "learning_rate": 2.235253684563602e-06, "loss": 0.3319, "step": 1543 }, { "epoch": 2.1562427342478494, "grad_norm": 2.580793346821453, "learning_rate": 2.228486459783506e-06, "loss": 0.3469, "step": 1544 }, { "epoch": 2.1576377586607767, "grad_norm": 2.335371518855133, "learning_rate": 2.221726555709079e-06, "loss": 0.3441, "step": 1545 }, { "epoch": 2.159032783073704, "grad_norm": 2.693191736998869, "learning_rate": 2.2149739901960088e-06, "loss": 0.3737, "step": 1546 }, { "epoch": 2.160427807486631, "grad_norm": 2.6455671401482586, "learning_rate": 2.208228781080592e-06, "loss": 0.3582, "step": 1547 }, { "epoch": 2.161822831899558, "grad_norm": 2.601319145030401, "learning_rate": 2.201490946179696e-06, "loss": 0.3417, "step": 1548 }, { "epoch": 2.1632178563124853, "grad_norm": 2.6039055287110973, "learning_rate": 2.19476050329071e-06, "loss": 0.355, "step": 1549 }, { "epoch": 2.1646128807254126, "grad_norm": 2.6204748870860333, "learning_rate": 2.188037470191502e-06, "loss": 0.366, "step": 1550 }, { "epoch": 2.16600790513834, "grad_norm": 2.384307413657059, "learning_rate": 2.181321864640362e-06, "loss": 0.3671, "step": 1551 }, { "epoch": 2.167402929551267, "grad_norm": 2.8119295684924115, "learning_rate": 2.1746137043759594e-06, "loss": 0.351, "step": 1552 }, { "epoch": 2.1687979539641944, "grad_norm": 2.3876003541324997, "learning_rate": 2.167913007117306e-06, "loss": 0.3581, "step": 1553 }, { "epoch": 2.1701929783771217, "grad_norm": 2.6405030145993473, "learning_rate": 2.1612197905636913e-06, "loss": 0.399, "step": 1554 }, { "epoch": 2.171588002790049, "grad_norm": 2.649281849826764, "learning_rate": 2.154534072394651e-06, "loss": 0.3543, "step": 1555 }, { "epoch": 2.172983027202976, "grad_norm": 2.688879491092561, "learning_rate": 2.147855870269916e-06, "loss": 0.3669, "step": 1556 }, { "epoch": 2.1743780516159035, "grad_norm": 2.545377452985502, "learning_rate": 2.1411852018293583e-06, "loss": 0.4235, "step": 1557 }, { "epoch": 2.1757730760288303, "grad_norm": 2.7617396360599065, "learning_rate": 2.1345220846929514e-06, "loss": 0.3735, "step": 1558 }, { "epoch": 2.1771681004417576, "grad_norm": 2.7183247500406678, "learning_rate": 2.127866536460727e-06, "loss": 0.3724, "step": 1559 }, { "epoch": 2.178563124854685, "grad_norm": 2.5020200205801455, "learning_rate": 2.1212185747127235e-06, "loss": 0.3872, "step": 1560 }, { "epoch": 2.179958149267612, "grad_norm": 2.509561393691642, "learning_rate": 2.1145782170089346e-06, "loss": 0.366, "step": 1561 }, { "epoch": 2.1813531736805394, "grad_norm": 2.6527274470517597, "learning_rate": 2.107945480889276e-06, "loss": 0.3558, "step": 1562 }, { "epoch": 2.1827481980934667, "grad_norm": 2.728645780451789, "learning_rate": 2.1013203838735273e-06, "loss": 0.3771, "step": 1563 }, { "epoch": 2.184143222506394, "grad_norm": 2.4977977216032645, "learning_rate": 2.094702943461289e-06, "loss": 0.3465, "step": 1564 }, { "epoch": 2.185538246919321, "grad_norm": 2.8169142439799066, "learning_rate": 2.0880931771319395e-06, "loss": 0.4153, "step": 1565 }, { "epoch": 2.1869332713322485, "grad_norm": 2.703701247881902, "learning_rate": 2.0814911023445904e-06, "loss": 0.329, "step": 1566 }, { "epoch": 2.1883282957451757, "grad_norm": 2.570951929782088, "learning_rate": 2.0748967365380292e-06, "loss": 0.3606, "step": 1567 }, { "epoch": 2.1897233201581026, "grad_norm": 2.6072511840707304, "learning_rate": 2.0683100971306873e-06, "loss": 0.3749, "step": 1568 }, { "epoch": 2.19111834457103, "grad_norm": 2.7388972862082257, "learning_rate": 2.0617312015205844e-06, "loss": 0.3792, "step": 1569 }, { "epoch": 2.192513368983957, "grad_norm": 2.5786522543445516, "learning_rate": 2.055160067085283e-06, "loss": 0.352, "step": 1570 }, { "epoch": 2.1939083933968844, "grad_norm": 2.688555719144535, "learning_rate": 2.0485967111818506e-06, "loss": 0.3884, "step": 1571 }, { "epoch": 2.1953034178098116, "grad_norm": 2.441471460614826, "learning_rate": 2.0420411511468086e-06, "loss": 0.3491, "step": 1572 }, { "epoch": 2.196698442222739, "grad_norm": 2.3808175135390197, "learning_rate": 2.0354934042960804e-06, "loss": 0.4161, "step": 1573 }, { "epoch": 2.198093466635666, "grad_norm": 2.615121032493676, "learning_rate": 2.0289534879249544e-06, "loss": 0.3507, "step": 1574 }, { "epoch": 2.1994884910485935, "grad_norm": 2.7207401153310533, "learning_rate": 2.0224214193080394e-06, "loss": 0.3625, "step": 1575 }, { "epoch": 2.2008835154615207, "grad_norm": 2.5841902068855442, "learning_rate": 2.015897215699208e-06, "loss": 0.3527, "step": 1576 }, { "epoch": 2.202278539874448, "grad_norm": 2.538104634426136, "learning_rate": 2.0093808943315636e-06, "loss": 0.3423, "step": 1577 }, { "epoch": 2.2036735642873753, "grad_norm": 2.5205684862541693, "learning_rate": 2.0028724724173886e-06, "loss": 0.3486, "step": 1578 }, { "epoch": 2.205068588700302, "grad_norm": 2.5553767387455495, "learning_rate": 1.996371967148098e-06, "loss": 0.354, "step": 1579 }, { "epoch": 2.2064636131132294, "grad_norm": 2.7348868370636232, "learning_rate": 1.989879395694194e-06, "loss": 0.4158, "step": 1580 }, { "epoch": 2.2078586375261566, "grad_norm": 2.4990716391647174, "learning_rate": 1.9833947752052286e-06, "loss": 0.3512, "step": 1581 }, { "epoch": 2.209253661939084, "grad_norm": 2.6967852036575795, "learning_rate": 1.976918122809744e-06, "loss": 0.3812, "step": 1582 }, { "epoch": 2.210648686352011, "grad_norm": 2.4778704676078447, "learning_rate": 1.9704494556152413e-06, "loss": 0.3773, "step": 1583 }, { "epoch": 2.2120437107649384, "grad_norm": 2.372789425033178, "learning_rate": 1.9639887907081297e-06, "loss": 0.3363, "step": 1584 }, { "epoch": 2.2134387351778657, "grad_norm": 2.6532138976398585, "learning_rate": 1.9575361451536772e-06, "loss": 0.3685, "step": 1585 }, { "epoch": 2.214833759590793, "grad_norm": 2.504816717396523, "learning_rate": 1.9510915359959694e-06, "loss": 0.3386, "step": 1586 }, { "epoch": 2.2162287840037203, "grad_norm": 2.292410183382475, "learning_rate": 1.944654980257869e-06, "loss": 0.3473, "step": 1587 }, { "epoch": 2.217623808416647, "grad_norm": 2.562641828248574, "learning_rate": 1.9382264949409614e-06, "loss": 0.3454, "step": 1588 }, { "epoch": 2.2190188328295744, "grad_norm": 2.5699177197615652, "learning_rate": 1.931806097025517e-06, "loss": 0.3645, "step": 1589 }, { "epoch": 2.2204138572425016, "grad_norm": 2.6112618531663307, "learning_rate": 1.925393803470447e-06, "loss": 0.3367, "step": 1590 }, { "epoch": 2.221808881655429, "grad_norm": 2.5569442289606865, "learning_rate": 1.9189896312132506e-06, "loss": 0.3787, "step": 1591 }, { "epoch": 2.223203906068356, "grad_norm": 2.584036012745022, "learning_rate": 1.912593597169975e-06, "loss": 0.3556, "step": 1592 }, { "epoch": 2.2245989304812834, "grad_norm": 2.636971916019812, "learning_rate": 1.9062057182351768e-06, "loss": 0.3776, "step": 1593 }, { "epoch": 2.2259939548942107, "grad_norm": 2.6329969382237226, "learning_rate": 1.899826011281865e-06, "loss": 0.3996, "step": 1594 }, { "epoch": 2.227388979307138, "grad_norm": 2.66855782824547, "learning_rate": 1.893454493161468e-06, "loss": 0.3461, "step": 1595 }, { "epoch": 2.2287840037200652, "grad_norm": 2.477020855703558, "learning_rate": 1.8870911807037856e-06, "loss": 0.3658, "step": 1596 }, { "epoch": 2.2301790281329925, "grad_norm": 2.7508524189860077, "learning_rate": 1.8807360907169326e-06, "loss": 0.3836, "step": 1597 }, { "epoch": 2.23157405254592, "grad_norm": 2.6877388583473976, "learning_rate": 1.8743892399873154e-06, "loss": 0.3718, "step": 1598 }, { "epoch": 2.2329690769588466, "grad_norm": 2.7410918375321596, "learning_rate": 1.868050645279576e-06, "loss": 0.3844, "step": 1599 }, { "epoch": 2.234364101371774, "grad_norm": 2.5525203670258634, "learning_rate": 1.8617203233365427e-06, "loss": 0.3477, "step": 1600 }, { "epoch": 2.235759125784701, "grad_norm": 2.5058386128012695, "learning_rate": 1.8553982908792e-06, "loss": 0.384, "step": 1601 }, { "epoch": 2.2371541501976284, "grad_norm": 2.7626254933018943, "learning_rate": 1.8490845646066303e-06, "loss": 0.4157, "step": 1602 }, { "epoch": 2.2385491746105557, "grad_norm": 2.843334768341675, "learning_rate": 1.8427791611959762e-06, "loss": 0.4019, "step": 1603 }, { "epoch": 2.239944199023483, "grad_norm": 2.4927788556663586, "learning_rate": 1.8364820973024e-06, "loss": 0.3274, "step": 1604 }, { "epoch": 2.2413392234364102, "grad_norm": 2.75021773744222, "learning_rate": 1.8301933895590362e-06, "loss": 0.3902, "step": 1605 }, { "epoch": 2.2427342478493375, "grad_norm": 2.5366276809092554, "learning_rate": 1.8239130545769408e-06, "loss": 0.3621, "step": 1606 }, { "epoch": 2.2441292722622648, "grad_norm": 2.5689817476181664, "learning_rate": 1.8176411089450618e-06, "loss": 0.3899, "step": 1607 }, { "epoch": 2.2455242966751916, "grad_norm": 2.4084699491900667, "learning_rate": 1.8113775692301822e-06, "loss": 0.3707, "step": 1608 }, { "epoch": 2.246919321088119, "grad_norm": 2.6634336900884117, "learning_rate": 1.8051224519768817e-06, "loss": 0.3548, "step": 1609 }, { "epoch": 2.248314345501046, "grad_norm": 2.502912579812478, "learning_rate": 1.7988757737074959e-06, "loss": 0.3443, "step": 1610 }, { "epoch": 2.2497093699139734, "grad_norm": 2.459030896848924, "learning_rate": 1.7926375509220695e-06, "loss": 0.3801, "step": 1611 }, { "epoch": 2.2511043943269007, "grad_norm": 2.6667907745675272, "learning_rate": 1.7864078000983076e-06, "loss": 0.3539, "step": 1612 }, { "epoch": 2.252499418739828, "grad_norm": 2.780457421601445, "learning_rate": 1.7801865376915451e-06, "loss": 0.3724, "step": 1613 }, { "epoch": 2.2538944431527552, "grad_norm": 2.7872874934509464, "learning_rate": 1.7739737801346895e-06, "loss": 0.3961, "step": 1614 }, { "epoch": 2.2552894675656825, "grad_norm": 2.3932089646793187, "learning_rate": 1.7677695438381831e-06, "loss": 0.3875, "step": 1615 }, { "epoch": 2.2566844919786098, "grad_norm": 2.5436980933215962, "learning_rate": 1.761573845189965e-06, "loss": 0.3675, "step": 1616 }, { "epoch": 2.258079516391537, "grad_norm": 2.446200754717377, "learning_rate": 1.7553867005554215e-06, "loss": 0.3372, "step": 1617 }, { "epoch": 2.2594745408044643, "grad_norm": 2.5961265814193073, "learning_rate": 1.7492081262773397e-06, "loss": 0.3575, "step": 1618 }, { "epoch": 2.260869565217391, "grad_norm": 2.680274599084196, "learning_rate": 1.7430381386758748e-06, "loss": 0.3695, "step": 1619 }, { "epoch": 2.2622645896303184, "grad_norm": 2.5797029583717133, "learning_rate": 1.7368767540484965e-06, "loss": 0.3693, "step": 1620 }, { "epoch": 2.2636596140432457, "grad_norm": 2.477418184713373, "learning_rate": 1.7307239886699546e-06, "loss": 0.4019, "step": 1621 }, { "epoch": 2.265054638456173, "grad_norm": 2.8020410183195326, "learning_rate": 1.7245798587922263e-06, "loss": 0.3831, "step": 1622 }, { "epoch": 2.2664496628691, "grad_norm": 2.510503085145707, "learning_rate": 1.7184443806444851e-06, "loss": 0.355, "step": 1623 }, { "epoch": 2.2678446872820275, "grad_norm": 2.372996696833314, "learning_rate": 1.7123175704330514e-06, "loss": 0.3398, "step": 1624 }, { "epoch": 2.2692397116949548, "grad_norm": 2.4256197934327783, "learning_rate": 1.706199444341341e-06, "loss": 0.3591, "step": 1625 }, { "epoch": 2.270634736107882, "grad_norm": 2.7151286179851737, "learning_rate": 1.7000900185298418e-06, "loss": 0.4181, "step": 1626 }, { "epoch": 2.2720297605208093, "grad_norm": 2.8044140181623707, "learning_rate": 1.6939893091360577e-06, "loss": 0.417, "step": 1627 }, { "epoch": 2.273424784933736, "grad_norm": 2.730094943121316, "learning_rate": 1.6878973322744658e-06, "loss": 0.3516, "step": 1628 }, { "epoch": 2.2748198093466634, "grad_norm": 2.5506676001098985, "learning_rate": 1.6818141040364816e-06, "loss": 0.377, "step": 1629 }, { "epoch": 2.2762148337595907, "grad_norm": 2.653839704157034, "learning_rate": 1.6757396404904087e-06, "loss": 0.3517, "step": 1630 }, { "epoch": 2.277609858172518, "grad_norm": 2.4617233330515123, "learning_rate": 1.6696739576813981e-06, "loss": 0.3597, "step": 1631 }, { "epoch": 2.279004882585445, "grad_norm": 2.5570103219153104, "learning_rate": 1.6636170716314114e-06, "loss": 0.4184, "step": 1632 }, { "epoch": 2.2803999069983725, "grad_norm": 2.671908519337253, "learning_rate": 1.657568998339175e-06, "loss": 0.3941, "step": 1633 }, { "epoch": 2.2817949314112997, "grad_norm": 2.9709000928843814, "learning_rate": 1.6515297537801305e-06, "loss": 0.3927, "step": 1634 }, { "epoch": 2.283189955824227, "grad_norm": 2.6725714643103613, "learning_rate": 1.6454993539064075e-06, "loss": 0.3562, "step": 1635 }, { "epoch": 2.2845849802371543, "grad_norm": 2.549547670330065, "learning_rate": 1.6394778146467672e-06, "loss": 0.4068, "step": 1636 }, { "epoch": 2.2859800046500816, "grad_norm": 2.719115497930459, "learning_rate": 1.6334651519065658e-06, "loss": 0.3962, "step": 1637 }, { "epoch": 2.287375029063009, "grad_norm": 2.749026350933844, "learning_rate": 1.6274613815677176e-06, "loss": 0.384, "step": 1638 }, { "epoch": 2.2887700534759357, "grad_norm": 2.692095067175911, "learning_rate": 1.6214665194886474e-06, "loss": 0.3111, "step": 1639 }, { "epoch": 2.290165077888863, "grad_norm": 2.3792849433263004, "learning_rate": 1.6154805815042457e-06, "loss": 0.3655, "step": 1640 }, { "epoch": 2.29156010230179, "grad_norm": 2.9120180222369068, "learning_rate": 1.6095035834258365e-06, "loss": 0.379, "step": 1641 }, { "epoch": 2.2929551267147175, "grad_norm": 2.553480354210846, "learning_rate": 1.6035355410411252e-06, "loss": 0.3524, "step": 1642 }, { "epoch": 2.2943501511276447, "grad_norm": 2.7151113072847113, "learning_rate": 1.5975764701141611e-06, "loss": 0.3612, "step": 1643 }, { "epoch": 2.295745175540572, "grad_norm": 2.652440672837758, "learning_rate": 1.5916263863853e-06, "loss": 0.3696, "step": 1644 }, { "epoch": 2.2971401999534993, "grad_norm": 2.6621041632025917, "learning_rate": 1.585685305571159e-06, "loss": 0.3807, "step": 1645 }, { "epoch": 2.2985352243664265, "grad_norm": 2.8812225427762304, "learning_rate": 1.5797532433645696e-06, "loss": 0.3621, "step": 1646 }, { "epoch": 2.299930248779354, "grad_norm": 2.7487630275314614, "learning_rate": 1.5738302154345475e-06, "loss": 0.3578, "step": 1647 }, { "epoch": 2.3013252731922806, "grad_norm": 2.712389418470319, "learning_rate": 1.5679162374262414e-06, "loss": 0.3725, "step": 1648 }, { "epoch": 2.302720297605208, "grad_norm": 2.5314718444711675, "learning_rate": 1.5620113249608943e-06, "loss": 0.4083, "step": 1649 }, { "epoch": 2.304115322018135, "grad_norm": 2.71749229725184, "learning_rate": 1.5561154936358069e-06, "loss": 0.3573, "step": 1650 }, { "epoch": 2.3055103464310625, "grad_norm": 2.8492353511993036, "learning_rate": 1.5502287590242942e-06, "loss": 0.3605, "step": 1651 }, { "epoch": 2.3069053708439897, "grad_norm": 2.404593126347923, "learning_rate": 1.5443511366756375e-06, "loss": 0.35, "step": 1652 }, { "epoch": 2.308300395256917, "grad_norm": 2.5448461971596514, "learning_rate": 1.53848264211505e-06, "loss": 0.3983, "step": 1653 }, { "epoch": 2.3096954196698443, "grad_norm": 2.3091021687195683, "learning_rate": 1.5326232908436405e-06, "loss": 0.3534, "step": 1654 }, { "epoch": 2.3110904440827715, "grad_norm": 2.620061723423142, "learning_rate": 1.526773098338359e-06, "loss": 0.3849, "step": 1655 }, { "epoch": 2.312485468495699, "grad_norm": 2.767261586613823, "learning_rate": 1.5209320800519683e-06, "loss": 0.3743, "step": 1656 }, { "epoch": 2.313880492908626, "grad_norm": 2.672347100918888, "learning_rate": 1.515100251412998e-06, "loss": 0.3768, "step": 1657 }, { "epoch": 2.3152755173215533, "grad_norm": 2.68923702999827, "learning_rate": 1.5092776278257027e-06, "loss": 0.3733, "step": 1658 }, { "epoch": 2.31667054173448, "grad_norm": 2.739014233161832, "learning_rate": 1.5034642246700203e-06, "loss": 0.3667, "step": 1659 }, { "epoch": 2.3180655661474074, "grad_norm": 2.7478370131477643, "learning_rate": 1.4976600573015398e-06, "loss": 0.3835, "step": 1660 }, { "epoch": 2.3194605905603347, "grad_norm": 2.752957846619066, "learning_rate": 1.4918651410514479e-06, "loss": 0.3849, "step": 1661 }, { "epoch": 2.320855614973262, "grad_norm": 2.614400765608651, "learning_rate": 1.486079491226501e-06, "loss": 0.3387, "step": 1662 }, { "epoch": 2.3222506393861893, "grad_norm": 2.634523503162356, "learning_rate": 1.4803031231089782e-06, "loss": 0.3661, "step": 1663 }, { "epoch": 2.3236456637991165, "grad_norm": 2.521459881351909, "learning_rate": 1.4745360519566382e-06, "loss": 0.3431, "step": 1664 }, { "epoch": 2.325040688212044, "grad_norm": 2.683879773628941, "learning_rate": 1.4687782930026833e-06, "loss": 0.3784, "step": 1665 }, { "epoch": 2.326435712624971, "grad_norm": 2.644476677634626, "learning_rate": 1.4630298614557236e-06, "loss": 0.3508, "step": 1666 }, { "epoch": 2.3278307370378983, "grad_norm": 2.6452431291055407, "learning_rate": 1.4572907724997249e-06, "loss": 0.4205, "step": 1667 }, { "epoch": 2.329225761450825, "grad_norm": 2.494414837424218, "learning_rate": 1.4515610412939791e-06, "loss": 0.3285, "step": 1668 }, { "epoch": 2.3306207858637524, "grad_norm": 2.469457378559849, "learning_rate": 1.445840682973062e-06, "loss": 0.3714, "step": 1669 }, { "epoch": 2.3320158102766797, "grad_norm": 2.7637245897998284, "learning_rate": 1.4401297126467884e-06, "loss": 0.3363, "step": 1670 }, { "epoch": 2.333410834689607, "grad_norm": 2.465535140664513, "learning_rate": 1.4344281454001751e-06, "loss": 0.318, "step": 1671 }, { "epoch": 2.3348058591025342, "grad_norm": 2.8183776771028692, "learning_rate": 1.4287359962934055e-06, "loss": 0.3698, "step": 1672 }, { "epoch": 2.3362008835154615, "grad_norm": 2.7053397982828553, "learning_rate": 1.4230532803617814e-06, "loss": 0.3985, "step": 1673 }, { "epoch": 2.337595907928389, "grad_norm": 2.64126392702144, "learning_rate": 1.4173800126156916e-06, "loss": 0.3688, "step": 1674 }, { "epoch": 2.338990932341316, "grad_norm": 2.5204983652566297, "learning_rate": 1.411716208040566e-06, "loss": 0.3734, "step": 1675 }, { "epoch": 2.3403859567542433, "grad_norm": 2.737115050304645, "learning_rate": 1.4060618815968375e-06, "loss": 0.3676, "step": 1676 }, { "epoch": 2.3417809811671706, "grad_norm": 2.549167402396859, "learning_rate": 1.4004170482199054e-06, "loss": 0.4148, "step": 1677 }, { "epoch": 2.343176005580098, "grad_norm": 2.5043455837705335, "learning_rate": 1.3947817228200956e-06, "loss": 0.3453, "step": 1678 }, { "epoch": 2.3445710299930247, "grad_norm": 2.577140131448287, "learning_rate": 1.3891559202826133e-06, "loss": 0.4072, "step": 1679 }, { "epoch": 2.345966054405952, "grad_norm": 2.823606075201277, "learning_rate": 1.3835396554675179e-06, "loss": 0.3632, "step": 1680 }, { "epoch": 2.3473610788188792, "grad_norm": 2.6518675864455648, "learning_rate": 1.37793294320967e-06, "loss": 0.3954, "step": 1681 }, { "epoch": 2.3487561032318065, "grad_norm": 2.5359682657391183, "learning_rate": 1.3723357983186974e-06, "loss": 0.3433, "step": 1682 }, { "epoch": 2.3501511276447338, "grad_norm": 2.576399973472235, "learning_rate": 1.3667482355789607e-06, "loss": 0.3635, "step": 1683 }, { "epoch": 2.351546152057661, "grad_norm": 2.6491892058594155, "learning_rate": 1.3611702697495088e-06, "loss": 0.332, "step": 1684 }, { "epoch": 2.3529411764705883, "grad_norm": 2.4219518229808066, "learning_rate": 1.3556019155640416e-06, "loss": 0.3583, "step": 1685 }, { "epoch": 2.3543362008835156, "grad_norm": 2.2600789673905357, "learning_rate": 1.350043187730868e-06, "loss": 0.3552, "step": 1686 }, { "epoch": 2.355731225296443, "grad_norm": 2.8721140501125615, "learning_rate": 1.34449410093287e-06, "loss": 0.3708, "step": 1687 }, { "epoch": 2.3571262497093697, "grad_norm": 2.4725009553795485, "learning_rate": 1.3389546698274686e-06, "loss": 0.35, "step": 1688 }, { "epoch": 2.358521274122297, "grad_norm": 2.682880966441649, "learning_rate": 1.333424909046574e-06, "loss": 0.4007, "step": 1689 }, { "epoch": 2.359916298535224, "grad_norm": 2.7862341896965708, "learning_rate": 1.327904833196556e-06, "loss": 0.4008, "step": 1690 }, { "epoch": 2.3613113229481515, "grad_norm": 2.476153813376653, "learning_rate": 1.3223944568582047e-06, "loss": 0.3464, "step": 1691 }, { "epoch": 2.3627063473610788, "grad_norm": 2.6459227028849654, "learning_rate": 1.3168937945866861e-06, "loss": 0.3625, "step": 1692 }, { "epoch": 2.364101371774006, "grad_norm": 2.748330057386969, "learning_rate": 1.311402860911507e-06, "loss": 0.3613, "step": 1693 }, { "epoch": 2.3654963961869333, "grad_norm": 2.575623668715176, "learning_rate": 1.3059216703364814e-06, "loss": 0.3615, "step": 1694 }, { "epoch": 2.3668914205998606, "grad_norm": 2.5918149647113182, "learning_rate": 1.3004502373396821e-06, "loss": 0.3565, "step": 1695 }, { "epoch": 2.368286445012788, "grad_norm": 2.6746759805152642, "learning_rate": 1.2949885763734127e-06, "loss": 0.3808, "step": 1696 }, { "epoch": 2.369681469425715, "grad_norm": 2.605861476386467, "learning_rate": 1.2895367018641658e-06, "loss": 0.3522, "step": 1697 }, { "epoch": 2.3710764938386424, "grad_norm": 2.3115889993411805, "learning_rate": 1.284094628212576e-06, "loss": 0.3694, "step": 1698 }, { "epoch": 2.372471518251569, "grad_norm": 2.586539926195426, "learning_rate": 1.278662369793398e-06, "loss": 0.3672, "step": 1699 }, { "epoch": 2.3738665426644965, "grad_norm": 2.678429200006771, "learning_rate": 1.273239940955459e-06, "loss": 0.3958, "step": 1700 }, { "epoch": 2.3752615670774238, "grad_norm": 2.635943529770674, "learning_rate": 1.267827356021618e-06, "loss": 0.383, "step": 1701 }, { "epoch": 2.376656591490351, "grad_norm": 2.8174113556770726, "learning_rate": 1.2624246292887377e-06, "loss": 0.3666, "step": 1702 }, { "epoch": 2.3780516159032783, "grad_norm": 2.551862967853263, "learning_rate": 1.2570317750276374e-06, "loss": 0.3843, "step": 1703 }, { "epoch": 2.3794466403162056, "grad_norm": 2.686569798529484, "learning_rate": 1.2516488074830586e-06, "loss": 0.3622, "step": 1704 }, { "epoch": 2.380841664729133, "grad_norm": 2.3597911342714997, "learning_rate": 1.246275740873631e-06, "loss": 0.3604, "step": 1705 }, { "epoch": 2.38223668914206, "grad_norm": 3.0721927564125964, "learning_rate": 1.2409125893918329e-06, "loss": 0.4, "step": 1706 }, { "epoch": 2.3836317135549874, "grad_norm": 2.7377974285778692, "learning_rate": 1.2355593672039462e-06, "loss": 0.3476, "step": 1707 }, { "epoch": 2.385026737967914, "grad_norm": 2.6851823826372776, "learning_rate": 1.2302160884500337e-06, "loss": 0.4052, "step": 1708 }, { "epoch": 2.3864217623808415, "grad_norm": 2.7466451584190335, "learning_rate": 1.2248827672438868e-06, "loss": 0.3315, "step": 1709 }, { "epoch": 2.3878167867937687, "grad_norm": 2.6776139566549553, "learning_rate": 1.2195594176729963e-06, "loss": 0.3775, "step": 1710 }, { "epoch": 2.389211811206696, "grad_norm": 2.7366763938795167, "learning_rate": 1.2142460537985168e-06, "loss": 0.374, "step": 1711 }, { "epoch": 2.3906068356196233, "grad_norm": 2.6797523563790313, "learning_rate": 1.2089426896552265e-06, "loss": 0.3441, "step": 1712 }, { "epoch": 2.3920018600325506, "grad_norm": 2.3738502203003295, "learning_rate": 1.2036493392514847e-06, "loss": 0.4275, "step": 1713 }, { "epoch": 2.393396884445478, "grad_norm": 2.701127044656414, "learning_rate": 1.1983660165692078e-06, "loss": 0.3445, "step": 1714 }, { "epoch": 2.394791908858405, "grad_norm": 2.7721933927483042, "learning_rate": 1.1930927355638189e-06, "loss": 0.3545, "step": 1715 }, { "epoch": 2.3961869332713324, "grad_norm": 2.400685659800295, "learning_rate": 1.1878295101642185e-06, "loss": 0.359, "step": 1716 }, { "epoch": 2.3975819576842596, "grad_norm": 2.629468257857636, "learning_rate": 1.182576354272748e-06, "loss": 0.3485, "step": 1717 }, { "epoch": 2.398976982097187, "grad_norm": 2.5673704298827142, "learning_rate": 1.1773332817651512e-06, "loss": 0.361, "step": 1718 }, { "epoch": 2.400372006510114, "grad_norm": 2.4561086309950473, "learning_rate": 1.1721003064905329e-06, "loss": 0.3376, "step": 1719 }, { "epoch": 2.401767030923041, "grad_norm": 2.5547325889982124, "learning_rate": 1.1668774422713336e-06, "loss": 0.3753, "step": 1720 }, { "epoch": 2.4031620553359683, "grad_norm": 2.4962840616062265, "learning_rate": 1.1616647029032818e-06, "loss": 0.356, "step": 1721 }, { "epoch": 2.4045570797488955, "grad_norm": 2.8557169626850536, "learning_rate": 1.1564621021553617e-06, "loss": 0.372, "step": 1722 }, { "epoch": 2.405952104161823, "grad_norm": 2.6618301879848785, "learning_rate": 1.1512696537697804e-06, "loss": 0.39, "step": 1723 }, { "epoch": 2.40734712857475, "grad_norm": 3.0534950245042576, "learning_rate": 1.1460873714619275e-06, "loss": 0.4285, "step": 1724 }, { "epoch": 2.4087421529876774, "grad_norm": 2.522325949132638, "learning_rate": 1.140915268920339e-06, "loss": 0.3603, "step": 1725 }, { "epoch": 2.4101371774006046, "grad_norm": 2.513733552995916, "learning_rate": 1.13575335980666e-06, "loss": 0.3595, "step": 1726 }, { "epoch": 2.411532201813532, "grad_norm": 2.85838281810729, "learning_rate": 1.130601657755616e-06, "loss": 0.363, "step": 1727 }, { "epoch": 2.4129272262264587, "grad_norm": 2.704472497164617, "learning_rate": 1.125460176374965e-06, "loss": 0.3573, "step": 1728 }, { "epoch": 2.414322250639386, "grad_norm": 2.35958128172627, "learning_rate": 1.1203289292454728e-06, "loss": 0.3799, "step": 1729 }, { "epoch": 2.4157172750523133, "grad_norm": 2.933296681498885, "learning_rate": 1.1152079299208724e-06, "loss": 0.3925, "step": 1730 }, { "epoch": 2.4171122994652405, "grad_norm": 2.597579120972063, "learning_rate": 1.1100971919278247e-06, "loss": 0.3527, "step": 1731 }, { "epoch": 2.418507323878168, "grad_norm": 2.5588768750386786, "learning_rate": 1.104996728765887e-06, "loss": 0.378, "step": 1732 }, { "epoch": 2.419902348291095, "grad_norm": 2.8655822645269398, "learning_rate": 1.0999065539074793e-06, "loss": 0.3931, "step": 1733 }, { "epoch": 2.4212973727040223, "grad_norm": 2.659714882480564, "learning_rate": 1.094826680797843e-06, "loss": 0.3671, "step": 1734 }, { "epoch": 2.4226923971169496, "grad_norm": 2.661113594821726, "learning_rate": 1.0897571228550097e-06, "loss": 0.3798, "step": 1735 }, { "epoch": 2.424087421529877, "grad_norm": 2.6003792343250036, "learning_rate": 1.0846978934697666e-06, "loss": 0.3758, "step": 1736 }, { "epoch": 2.425482445942804, "grad_norm": 2.56616591891264, "learning_rate": 1.0796490060056142e-06, "loss": 0.3758, "step": 1737 }, { "epoch": 2.4268774703557314, "grad_norm": 2.606522055031232, "learning_rate": 1.074610473798738e-06, "loss": 0.3728, "step": 1738 }, { "epoch": 2.4282724947686587, "grad_norm": 2.78032747911159, "learning_rate": 1.0695823101579728e-06, "loss": 0.3647, "step": 1739 }, { "epoch": 2.4296675191815855, "grad_norm": 2.577427401374459, "learning_rate": 1.0645645283647616e-06, "loss": 0.3635, "step": 1740 }, { "epoch": 2.431062543594513, "grad_norm": 2.61235914368249, "learning_rate": 1.0595571416731293e-06, "loss": 0.3633, "step": 1741 }, { "epoch": 2.43245756800744, "grad_norm": 2.5986587509515333, "learning_rate": 1.0545601633096414e-06, "loss": 0.3377, "step": 1742 }, { "epoch": 2.4338525924203673, "grad_norm": 2.750142076620141, "learning_rate": 1.049573606473369e-06, "loss": 0.3964, "step": 1743 }, { "epoch": 2.4352476168332946, "grad_norm": 2.679857745296654, "learning_rate": 1.0445974843358563e-06, "loss": 0.3336, "step": 1744 }, { "epoch": 2.436642641246222, "grad_norm": 2.510932730460405, "learning_rate": 1.0396318100410868e-06, "loss": 0.3665, "step": 1745 }, { "epoch": 2.438037665659149, "grad_norm": 2.6895500160853505, "learning_rate": 1.0346765967054472e-06, "loss": 0.3886, "step": 1746 }, { "epoch": 2.4394326900720764, "grad_norm": 2.6899293188290643, "learning_rate": 1.029731857417689e-06, "loss": 0.3943, "step": 1747 }, { "epoch": 2.4408277144850032, "grad_norm": 2.6740116396381213, "learning_rate": 1.0247976052389018e-06, "loss": 0.3665, "step": 1748 }, { "epoch": 2.4422227388979305, "grad_norm": 2.516546561886342, "learning_rate": 1.0198738532024715e-06, "loss": 0.326, "step": 1749 }, { "epoch": 2.443617763310858, "grad_norm": 2.489135842519529, "learning_rate": 1.0149606143140484e-06, "loss": 0.379, "step": 1750 }, { "epoch": 2.445012787723785, "grad_norm": 2.794094450669819, "learning_rate": 1.0100579015515156e-06, "loss": 0.3952, "step": 1751 }, { "epoch": 2.4464078121367123, "grad_norm": 2.945373689158043, "learning_rate": 1.005165727864953e-06, "loss": 0.364, "step": 1752 }, { "epoch": 2.4478028365496396, "grad_norm": 2.603329525317409, "learning_rate": 1.0002841061765989e-06, "loss": 0.3444, "step": 1753 }, { "epoch": 2.449197860962567, "grad_norm": 2.376814808243512, "learning_rate": 9.954130493808201e-07, "loss": 0.3608, "step": 1754 }, { "epoch": 2.450592885375494, "grad_norm": 2.7547222173372066, "learning_rate": 9.905525703440815e-07, "loss": 0.3484, "step": 1755 }, { "epoch": 2.4519879097884214, "grad_norm": 2.7456293001599676, "learning_rate": 9.85702681904902e-07, "loss": 0.3445, "step": 1756 }, { "epoch": 2.4533829342013487, "grad_norm": 2.650233775790436, "learning_rate": 9.808633968738297e-07, "loss": 0.349, "step": 1757 }, { "epoch": 2.454777958614276, "grad_norm": 2.6210051955587224, "learning_rate": 9.760347280334064e-07, "loss": 0.3891, "step": 1758 }, { "epoch": 2.456172983027203, "grad_norm": 2.557078486385038, "learning_rate": 9.712166881381279e-07, "loss": 0.3652, "step": 1759 }, { "epoch": 2.45756800744013, "grad_norm": 2.627856120263576, "learning_rate": 9.664092899144156e-07, "loss": 0.3689, "step": 1760 }, { "epoch": 2.4589630318530573, "grad_norm": 2.5794362940583193, "learning_rate": 9.616125460605857e-07, "loss": 0.349, "step": 1761 }, { "epoch": 2.4603580562659846, "grad_norm": 2.622303543350245, "learning_rate": 9.56826469246806e-07, "loss": 0.3481, "step": 1762 }, { "epoch": 2.461753080678912, "grad_norm": 2.7397756853626505, "learning_rate": 9.520510721150722e-07, "loss": 0.3429, "step": 1763 }, { "epoch": 2.463148105091839, "grad_norm": 2.7202665311722503, "learning_rate": 9.472863672791721e-07, "loss": 0.4126, "step": 1764 }, { "epoch": 2.4645431295047664, "grad_norm": 2.897322747559073, "learning_rate": 9.425323673246461e-07, "loss": 0.3878, "step": 1765 }, { "epoch": 2.4659381539176937, "grad_norm": 2.7313356618656166, "learning_rate": 9.377890848087595e-07, "loss": 0.3856, "step": 1766 }, { "epoch": 2.467333178330621, "grad_norm": 2.6981212724847157, "learning_rate": 9.330565322604729e-07, "loss": 0.3827, "step": 1767 }, { "epoch": 2.468728202743548, "grad_norm": 2.7448118234549828, "learning_rate": 9.283347221803985e-07, "loss": 0.3528, "step": 1768 }, { "epoch": 2.470123227156475, "grad_norm": 2.7531749376664396, "learning_rate": 9.236236670407772e-07, "loss": 0.3851, "step": 1769 }, { "epoch": 2.4715182515694023, "grad_norm": 2.9146141182521186, "learning_rate": 9.189233792854424e-07, "loss": 0.3928, "step": 1770 }, { "epoch": 2.4729132759823296, "grad_norm": 2.618382930701811, "learning_rate": 9.142338713297838e-07, "loss": 0.3648, "step": 1771 }, { "epoch": 2.474308300395257, "grad_norm": 2.6910866719684083, "learning_rate": 9.095551555607169e-07, "loss": 0.3989, "step": 1772 }, { "epoch": 2.475703324808184, "grad_norm": 2.8061128619447553, "learning_rate": 9.048872443366529e-07, "loss": 0.3599, "step": 1773 }, { "epoch": 2.4770983492211114, "grad_norm": 2.5170693112405877, "learning_rate": 9.002301499874622e-07, "loss": 0.3587, "step": 1774 }, { "epoch": 2.4784933736340387, "grad_norm": 2.719817697937222, "learning_rate": 8.955838848144449e-07, "loss": 0.3381, "step": 1775 }, { "epoch": 2.479888398046966, "grad_norm": 2.948302908879379, "learning_rate": 8.909484610902958e-07, "loss": 0.3951, "step": 1776 }, { "epoch": 2.481283422459893, "grad_norm": 2.6860235252242184, "learning_rate": 8.863238910590704e-07, "loss": 0.3598, "step": 1777 }, { "epoch": 2.4826784468728205, "grad_norm": 2.5719386435847094, "learning_rate": 8.817101869361599e-07, "loss": 0.3863, "step": 1778 }, { "epoch": 2.4840734712857477, "grad_norm": 2.8543620424643086, "learning_rate": 8.77107360908253e-07, "loss": 0.4165, "step": 1779 }, { "epoch": 2.4854684956986746, "grad_norm": 2.863626423480456, "learning_rate": 8.725154251333012e-07, "loss": 0.3476, "step": 1780 }, { "epoch": 2.486863520111602, "grad_norm": 2.8825773779158594, "learning_rate": 8.679343917404959e-07, "loss": 0.4695, "step": 1781 }, { "epoch": 2.488258544524529, "grad_norm": 2.373024307687679, "learning_rate": 8.633642728302266e-07, "loss": 0.3479, "step": 1782 }, { "epoch": 2.4896535689374564, "grad_norm": 2.6804007440321724, "learning_rate": 8.588050804740527e-07, "loss": 0.368, "step": 1783 }, { "epoch": 2.4910485933503836, "grad_norm": 2.7241377421501154, "learning_rate": 8.542568267146761e-07, "loss": 0.346, "step": 1784 }, { "epoch": 2.492443617763311, "grad_norm": 2.407145604194694, "learning_rate": 8.49719523565904e-07, "loss": 0.3562, "step": 1785 }, { "epoch": 2.493838642176238, "grad_norm": 2.782348216120243, "learning_rate": 8.451931830126148e-07, "loss": 0.367, "step": 1786 }, { "epoch": 2.4952336665891655, "grad_norm": 2.6372739006379864, "learning_rate": 8.40677817010736e-07, "loss": 0.3598, "step": 1787 }, { "epoch": 2.4966286910020927, "grad_norm": 2.6786747910439073, "learning_rate": 8.361734374872032e-07, "loss": 0.3661, "step": 1788 }, { "epoch": 2.4980237154150196, "grad_norm": 2.342552438467361, "learning_rate": 8.316800563399307e-07, "loss": 0.3368, "step": 1789 }, { "epoch": 2.499418739827947, "grad_norm": 2.6899679174639157, "learning_rate": 8.271976854377861e-07, "loss": 0.3496, "step": 1790 }, { "epoch": 2.500813764240874, "grad_norm": 2.740596100282574, "learning_rate": 8.227263366205523e-07, "loss": 0.4318, "step": 1791 }, { "epoch": 2.5022087886538014, "grad_norm": 2.4790194763787263, "learning_rate": 8.182660216988964e-07, "loss": 0.496, "step": 1792 }, { "epoch": 2.5036038130667286, "grad_norm": 3.0359859168964993, "learning_rate": 8.138167524543445e-07, "loss": 0.3857, "step": 1793 }, { "epoch": 2.504998837479656, "grad_norm": 2.9216902372615996, "learning_rate": 8.09378540639243e-07, "loss": 0.3777, "step": 1794 }, { "epoch": 2.506393861892583, "grad_norm": 2.6786172340667926, "learning_rate": 8.049513979767304e-07, "loss": 0.3837, "step": 1795 }, { "epoch": 2.5077888863055104, "grad_norm": 2.591419198678466, "learning_rate": 8.00535336160711e-07, "loss": 0.3257, "step": 1796 }, { "epoch": 2.5091839107184377, "grad_norm": 2.7708421481189407, "learning_rate": 7.96130366855819e-07, "loss": 0.3744, "step": 1797 }, { "epoch": 2.510578935131365, "grad_norm": 2.613860569890381, "learning_rate": 7.917365016973866e-07, "loss": 0.3601, "step": 1798 }, { "epoch": 2.5119739595442923, "grad_norm": 2.7701003013814027, "learning_rate": 7.873537522914155e-07, "loss": 0.3927, "step": 1799 }, { "epoch": 2.5133689839572195, "grad_norm": 2.784042420188742, "learning_rate": 7.829821302145485e-07, "loss": 0.4029, "step": 1800 }, { "epoch": 2.5147640083701464, "grad_norm": 2.615517808358739, "learning_rate": 7.786216470140334e-07, "loss": 0.3576, "step": 1801 }, { "epoch": 2.5161590327830736, "grad_norm": 2.7818293466460733, "learning_rate": 7.742723142076991e-07, "loss": 0.3844, "step": 1802 }, { "epoch": 2.517554057196001, "grad_norm": 2.6655467308791896, "learning_rate": 7.699341432839203e-07, "loss": 0.3842, "step": 1803 }, { "epoch": 2.518949081608928, "grad_norm": 2.8110874624548456, "learning_rate": 7.656071457015879e-07, "loss": 0.3419, "step": 1804 }, { "epoch": 2.5203441060218554, "grad_norm": 2.606920538314061, "learning_rate": 7.612913328900784e-07, "loss": 0.376, "step": 1805 }, { "epoch": 2.5217391304347827, "grad_norm": 2.8231480701227945, "learning_rate": 7.569867162492283e-07, "loss": 0.3475, "step": 1806 }, { "epoch": 2.52313415484771, "grad_norm": 2.659504060357367, "learning_rate": 7.526933071492959e-07, "loss": 0.3899, "step": 1807 }, { "epoch": 2.524529179260637, "grad_norm": 2.8773075333576954, "learning_rate": 7.484111169309399e-07, "loss": 0.3768, "step": 1808 }, { "epoch": 2.525924203673564, "grad_norm": 2.5697887693601635, "learning_rate": 7.441401569051848e-07, "loss": 0.3521, "step": 1809 }, { "epoch": 2.5273192280864913, "grad_norm": 2.765225823747802, "learning_rate": 7.398804383533886e-07, "loss": 0.3635, "step": 1810 }, { "epoch": 2.5287142524994186, "grad_norm": 2.740390186293855, "learning_rate": 7.356319725272165e-07, "loss": 0.3635, "step": 1811 }, { "epoch": 2.530109276912346, "grad_norm": 2.6665076691478307, "learning_rate": 7.313947706486136e-07, "loss": 0.3791, "step": 1812 }, { "epoch": 2.531504301325273, "grad_norm": 2.761373937061098, "learning_rate": 7.271688439097713e-07, "loss": 0.4012, "step": 1813 }, { "epoch": 2.5328993257382004, "grad_norm": 2.662649654462204, "learning_rate": 7.229542034730952e-07, "loss": 0.3116, "step": 1814 }, { "epoch": 2.5342943501511277, "grad_norm": 2.8719912457496513, "learning_rate": 7.187508604711851e-07, "loss": 0.3886, "step": 1815 }, { "epoch": 2.535689374564055, "grad_norm": 2.6138814266175596, "learning_rate": 7.145588260067943e-07, "loss": 0.3637, "step": 1816 }, { "epoch": 2.5370843989769822, "grad_norm": 2.540783537800378, "learning_rate": 7.103781111528074e-07, "loss": 0.3836, "step": 1817 }, { "epoch": 2.5384794233899095, "grad_norm": 2.698731916490577, "learning_rate": 7.062087269522105e-07, "loss": 0.3533, "step": 1818 }, { "epoch": 2.5398744478028368, "grad_norm": 2.4943579639494406, "learning_rate": 7.020506844180608e-07, "loss": 0.3889, "step": 1819 }, { "epoch": 2.541269472215764, "grad_norm": 2.8825223958679866, "learning_rate": 6.979039945334543e-07, "loss": 0.3627, "step": 1820 }, { "epoch": 2.542664496628691, "grad_norm": 2.514764083413655, "learning_rate": 6.937686682515044e-07, "loss": 0.3712, "step": 1821 }, { "epoch": 2.544059521041618, "grad_norm": 2.9557667799498355, "learning_rate": 6.896447164953057e-07, "loss": 0.402, "step": 1822 }, { "epoch": 2.5454545454545454, "grad_norm": 2.782165037955925, "learning_rate": 6.855321501579077e-07, "loss": 0.3682, "step": 1823 }, { "epoch": 2.5468495698674727, "grad_norm": 2.5954127629039507, "learning_rate": 6.814309801022873e-07, "loss": 0.3563, "step": 1824 }, { "epoch": 2.5482445942804, "grad_norm": 2.56191756773095, "learning_rate": 6.77341217161322e-07, "loss": 0.4087, "step": 1825 }, { "epoch": 2.5496396186933272, "grad_norm": 2.794974384602701, "learning_rate": 6.732628721377533e-07, "loss": 0.3698, "step": 1826 }, { "epoch": 2.5510346431062545, "grad_norm": 2.687559395667503, "learning_rate": 6.69195955804165e-07, "loss": 0.4582, "step": 1827 }, { "epoch": 2.5524296675191813, "grad_norm": 2.74641830957282, "learning_rate": 6.651404789029553e-07, "loss": 0.3584, "step": 1828 }, { "epoch": 2.5538246919321086, "grad_norm": 2.215945110919057, "learning_rate": 6.610964521463032e-07, "loss": 0.3637, "step": 1829 }, { "epoch": 2.555219716345036, "grad_norm": 2.8547508574424603, "learning_rate": 6.570638862161449e-07, "loss": 0.3536, "step": 1830 }, { "epoch": 2.556614740757963, "grad_norm": 2.7559615276452436, "learning_rate": 6.530427917641447e-07, "loss": 0.3425, "step": 1831 }, { "epoch": 2.5580097651708904, "grad_norm": 2.61498528851166, "learning_rate": 6.490331794116633e-07, "loss": 0.3763, "step": 1832 }, { "epoch": 2.5594047895838177, "grad_norm": 2.265397084840117, "learning_rate": 6.450350597497335e-07, "loss": 0.3499, "step": 1833 }, { "epoch": 2.560799813996745, "grad_norm": 2.608910969782399, "learning_rate": 6.410484433390335e-07, "loss": 0.3821, "step": 1834 }, { "epoch": 2.562194838409672, "grad_norm": 2.624424743290255, "learning_rate": 6.370733407098517e-07, "loss": 0.3236, "step": 1835 }, { "epoch": 2.5635898628225995, "grad_norm": 2.6170461681085793, "learning_rate": 6.331097623620697e-07, "loss": 0.366, "step": 1836 }, { "epoch": 2.5649848872355268, "grad_norm": 2.607944399462541, "learning_rate": 6.291577187651255e-07, "loss": 0.3582, "step": 1837 }, { "epoch": 2.566379911648454, "grad_norm": 2.7233390809570017, "learning_rate": 6.252172203579892e-07, "loss": 0.3754, "step": 1838 }, { "epoch": 2.5677749360613813, "grad_norm": 2.6508808626312352, "learning_rate": 6.212882775491352e-07, "loss": 0.4209, "step": 1839 }, { "epoch": 2.5691699604743086, "grad_norm": 2.640847255817163, "learning_rate": 6.173709007165158e-07, "loss": 0.378, "step": 1840 }, { "epoch": 2.5705649848872354, "grad_norm": 2.750685081100617, "learning_rate": 6.134651002075315e-07, "loss": 0.3709, "step": 1841 }, { "epoch": 2.5719600093001627, "grad_norm": 2.790131299638279, "learning_rate": 6.095708863390065e-07, "loss": 0.3882, "step": 1842 }, { "epoch": 2.57335503371309, "grad_norm": 2.5582714370114736, "learning_rate": 6.056882693971605e-07, "loss": 0.3511, "step": 1843 }, { "epoch": 2.574750058126017, "grad_norm": 2.7231029497838324, "learning_rate": 6.018172596375776e-07, "loss": 0.3937, "step": 1844 }, { "epoch": 2.5761450825389445, "grad_norm": 2.6320834366134513, "learning_rate": 5.979578672851843e-07, "loss": 0.3877, "step": 1845 }, { "epoch": 2.5775401069518717, "grad_norm": 2.6492569283504452, "learning_rate": 5.941101025342239e-07, "loss": 0.3727, "step": 1846 }, { "epoch": 2.578935131364799, "grad_norm": 2.9977172920283763, "learning_rate": 5.902739755482201e-07, "loss": 0.4302, "step": 1847 }, { "epoch": 2.580330155777726, "grad_norm": 2.5317465582134626, "learning_rate": 5.864494964599615e-07, "loss": 0.3364, "step": 1848 }, { "epoch": 2.581725180190653, "grad_norm": 2.5577541399129267, "learning_rate": 5.826366753714707e-07, "loss": 0.3652, "step": 1849 }, { "epoch": 2.5831202046035804, "grad_norm": 2.3558703804893084, "learning_rate": 5.788355223539698e-07, "loss": 0.3656, "step": 1850 }, { "epoch": 2.5845152290165077, "grad_norm": 2.67383588562154, "learning_rate": 5.750460474478675e-07, "loss": 0.3557, "step": 1851 }, { "epoch": 2.585910253429435, "grad_norm": 2.39540863320921, "learning_rate": 5.712682606627251e-07, "loss": 0.3485, "step": 1852 }, { "epoch": 2.587305277842362, "grad_norm": 2.7392064764484494, "learning_rate": 5.675021719772262e-07, "loss": 0.3884, "step": 1853 }, { "epoch": 2.5887003022552895, "grad_norm": 2.6104064273892043, "learning_rate": 5.637477913391604e-07, "loss": 0.3541, "step": 1854 }, { "epoch": 2.5900953266682167, "grad_norm": 2.5284619947081666, "learning_rate": 5.600051286653884e-07, "loss": 0.402, "step": 1855 }, { "epoch": 2.591490351081144, "grad_norm": 2.879840715740937, "learning_rate": 5.562741938418187e-07, "loss": 0.3566, "step": 1856 }, { "epoch": 2.5928853754940713, "grad_norm": 2.638018780553815, "learning_rate": 5.525549967233829e-07, "loss": 0.3808, "step": 1857 }, { "epoch": 2.5942803999069985, "grad_norm": 2.661848785529433, "learning_rate": 5.488475471340099e-07, "loss": 0.3636, "step": 1858 }, { "epoch": 2.595675424319926, "grad_norm": 2.582227544146746, "learning_rate": 5.451518548665946e-07, "loss": 0.3708, "step": 1859 }, { "epoch": 2.597070448732853, "grad_norm": 2.3190964016444657, "learning_rate": 5.414679296829806e-07, "loss": 0.3553, "step": 1860 }, { "epoch": 2.59846547314578, "grad_norm": 2.426516767011846, "learning_rate": 5.377957813139262e-07, "loss": 0.3284, "step": 1861 }, { "epoch": 2.599860497558707, "grad_norm": 2.6662961476720226, "learning_rate": 5.341354194590831e-07, "loss": 0.4481, "step": 1862 }, { "epoch": 2.6012555219716345, "grad_norm": 2.7838357283487287, "learning_rate": 5.304868537869706e-07, "loss": 0.3658, "step": 1863 }, { "epoch": 2.6026505463845617, "grad_norm": 2.35605954171155, "learning_rate": 5.268500939349514e-07, "loss": 0.3607, "step": 1864 }, { "epoch": 2.604045570797489, "grad_norm": 2.5179601857001823, "learning_rate": 5.232251495091989e-07, "loss": 0.3417, "step": 1865 }, { "epoch": 2.6054405952104163, "grad_norm": 2.5542587397601735, "learning_rate": 5.196120300846835e-07, "loss": 0.3562, "step": 1866 }, { "epoch": 2.6068356196233435, "grad_norm": 2.4959462257132, "learning_rate": 5.160107452051361e-07, "loss": 0.3578, "step": 1867 }, { "epoch": 2.6082306440362704, "grad_norm": 2.6936321538546757, "learning_rate": 5.124213043830278e-07, "loss": 0.3333, "step": 1868 }, { "epoch": 2.6096256684491976, "grad_norm": 2.6496189022056655, "learning_rate": 5.088437170995481e-07, "loss": 0.3514, "step": 1869 }, { "epoch": 2.611020692862125, "grad_norm": 2.5772857414447468, "learning_rate": 5.052779928045737e-07, "loss": 0.3689, "step": 1870 }, { "epoch": 2.612415717275052, "grad_norm": 2.7901053520024868, "learning_rate": 5.01724140916649e-07, "loss": 0.3778, "step": 1871 }, { "epoch": 2.6138107416879794, "grad_norm": 2.703816409050737, "learning_rate": 4.981821708229545e-07, "loss": 0.385, "step": 1872 }, { "epoch": 2.6152057661009067, "grad_norm": 2.6568921120743583, "learning_rate": 4.946520918792886e-07, "loss": 0.36, "step": 1873 }, { "epoch": 2.616600790513834, "grad_norm": 2.694154546037388, "learning_rate": 4.911339134100401e-07, "loss": 0.3699, "step": 1874 }, { "epoch": 2.6179958149267613, "grad_norm": 2.6548646230996487, "learning_rate": 4.87627644708163e-07, "loss": 0.3686, "step": 1875 }, { "epoch": 2.6193908393396885, "grad_norm": 2.6975123266988414, "learning_rate": 4.841332950351535e-07, "loss": 0.3776, "step": 1876 }, { "epoch": 2.620785863752616, "grad_norm": 2.6643432700274636, "learning_rate": 4.806508736210253e-07, "loss": 0.3699, "step": 1877 }, { "epoch": 2.622180888165543, "grad_norm": 2.406681000263141, "learning_rate": 4.771803896642812e-07, "loss": 0.3547, "step": 1878 }, { "epoch": 2.6235759125784703, "grad_norm": 2.536328324888638, "learning_rate": 4.737218523318965e-07, "loss": 0.3942, "step": 1879 }, { "epoch": 2.6249709369913976, "grad_norm": 2.6350073981219646, "learning_rate": 4.7027527075929e-07, "loss": 0.3597, "step": 1880 }, { "epoch": 2.6263659614043244, "grad_norm": 2.716521796381302, "learning_rate": 4.6684065405029677e-07, "loss": 0.3733, "step": 1881 }, { "epoch": 2.6277609858172517, "grad_norm": 2.6086481625399323, "learning_rate": 4.6341801127715303e-07, "loss": 0.3728, "step": 1882 }, { "epoch": 2.629156010230179, "grad_norm": 2.7741188322322574, "learning_rate": 4.6000735148046316e-07, "loss": 0.4013, "step": 1883 }, { "epoch": 2.6305510346431062, "grad_norm": 2.7869444024104273, "learning_rate": 4.566086836691791e-07, "loss": 0.3589, "step": 1884 }, { "epoch": 2.6319460590560335, "grad_norm": 2.473499696580084, "learning_rate": 4.532220168205798e-07, "loss": 0.3665, "step": 1885 }, { "epoch": 2.633341083468961, "grad_norm": 2.681197842050239, "learning_rate": 4.498473598802444e-07, "loss": 0.3509, "step": 1886 }, { "epoch": 2.634736107881888, "grad_norm": 2.8507623078173174, "learning_rate": 4.464847217620266e-07, "loss": 0.3617, "step": 1887 }, { "epoch": 2.636131132294815, "grad_norm": 2.6337656977278345, "learning_rate": 4.4313411134803584e-07, "loss": 0.3693, "step": 1888 }, { "epoch": 2.637526156707742, "grad_norm": 2.624876774464376, "learning_rate": 4.397955374886104e-07, "loss": 0.3455, "step": 1889 }, { "epoch": 2.6389211811206694, "grad_norm": 2.6974043720035077, "learning_rate": 4.364690090022938e-07, "loss": 0.3672, "step": 1890 }, { "epoch": 2.6403162055335967, "grad_norm": 2.586596680940854, "learning_rate": 4.331545346758159e-07, "loss": 0.3788, "step": 1891 }, { "epoch": 2.641711229946524, "grad_norm": 2.6336126233291037, "learning_rate": 4.2985212326406456e-07, "loss": 0.3218, "step": 1892 }, { "epoch": 2.6431062543594512, "grad_norm": 2.6844074671603626, "learning_rate": 4.265617834900637e-07, "loss": 0.3635, "step": 1893 }, { "epoch": 2.6445012787723785, "grad_norm": 2.5855332565201703, "learning_rate": 4.2328352404495346e-07, "loss": 0.3285, "step": 1894 }, { "epoch": 2.6458963031853058, "grad_norm": 2.578012852510128, "learning_rate": 4.2001735358796316e-07, "loss": 0.342, "step": 1895 }, { "epoch": 2.647291327598233, "grad_norm": 2.3499442924220095, "learning_rate": 4.167632807463895e-07, "loss": 0.3665, "step": 1896 }, { "epoch": 2.6486863520111603, "grad_norm": 2.5617317834674798, "learning_rate": 4.135213141155769e-07, "loss": 0.3462, "step": 1897 }, { "epoch": 2.6500813764240876, "grad_norm": 2.861248121434713, "learning_rate": 4.1029146225889103e-07, "loss": 0.3873, "step": 1898 }, { "epoch": 2.651476400837015, "grad_norm": 2.5681757006992867, "learning_rate": 4.0707373370769634e-07, "loss": 0.3648, "step": 1899 }, { "epoch": 2.652871425249942, "grad_norm": 2.637045003128861, "learning_rate": 4.0386813696133564e-07, "loss": 0.3718, "step": 1900 }, { "epoch": 2.654266449662869, "grad_norm": 2.663483788099685, "learning_rate": 4.0067468048710756e-07, "loss": 0.3589, "step": 1901 }, { "epoch": 2.6556614740757962, "grad_norm": 2.586561089948279, "learning_rate": 3.974933727202412e-07, "loss": 0.3634, "step": 1902 }, { "epoch": 2.6570564984887235, "grad_norm": 2.4663605260486765, "learning_rate": 3.943242220638777e-07, "loss": 0.3575, "step": 1903 }, { "epoch": 2.6584515229016508, "grad_norm": 2.557787126469995, "learning_rate": 3.911672368890462e-07, "loss": 0.3825, "step": 1904 }, { "epoch": 2.659846547314578, "grad_norm": 2.729278229028777, "learning_rate": 3.8802242553464096e-07, "loss": 0.376, "step": 1905 }, { "epoch": 2.6612415717275053, "grad_norm": 2.726608479992513, "learning_rate": 3.8488979630739996e-07, "loss": 0.3549, "step": 1906 }, { "epoch": 2.6626365961404326, "grad_norm": 2.6479547195709534, "learning_rate": 3.8176935748188425e-07, "loss": 0.3281, "step": 1907 }, { "epoch": 2.66403162055336, "grad_norm": 2.3088021015614064, "learning_rate": 3.78661117300454e-07, "loss": 0.3237, "step": 1908 }, { "epoch": 2.6654266449662867, "grad_norm": 2.523615301896734, "learning_rate": 3.755650839732489e-07, "loss": 0.3682, "step": 1909 }, { "epoch": 2.666821669379214, "grad_norm": 2.4319855141075593, "learning_rate": 3.7248126567816454e-07, "loss": 0.4036, "step": 1910 }, { "epoch": 2.668216693792141, "grad_norm": 2.6624423284752745, "learning_rate": 3.694096705608319e-07, "loss": 0.3617, "step": 1911 }, { "epoch": 2.6696117182050685, "grad_norm": 2.8072217080982984, "learning_rate": 3.6635030673459413e-07, "loss": 0.3567, "step": 1912 }, { "epoch": 2.6710067426179958, "grad_norm": 2.632365445318044, "learning_rate": 3.6330318228049e-07, "loss": 0.3977, "step": 1913 }, { "epoch": 2.672401767030923, "grad_norm": 2.5575974073520777, "learning_rate": 3.6026830524722443e-07, "loss": 0.3722, "step": 1914 }, { "epoch": 2.6737967914438503, "grad_norm": 2.481713487253495, "learning_rate": 3.572456836511551e-07, "loss": 0.3704, "step": 1915 }, { "epoch": 2.6751918158567776, "grad_norm": 2.462330259764663, "learning_rate": 3.5423532547626816e-07, "loss": 0.3418, "step": 1916 }, { "epoch": 2.676586840269705, "grad_norm": 2.3817511737169297, "learning_rate": 3.5123723867415527e-07, "loss": 0.3373, "step": 1917 }, { "epoch": 2.677981864682632, "grad_norm": 2.6369519623918407, "learning_rate": 3.4825143116399454e-07, "loss": 0.3606, "step": 1918 }, { "epoch": 2.6793768890955594, "grad_norm": 2.806490568230642, "learning_rate": 3.452779108325316e-07, "loss": 0.343, "step": 1919 }, { "epoch": 2.6807719135084866, "grad_norm": 2.568727872569913, "learning_rate": 3.4231668553405316e-07, "loss": 0.3387, "step": 1920 }, { "epoch": 2.6821669379214135, "grad_norm": 2.6863125625362363, "learning_rate": 3.39367763090373e-07, "loss": 0.3615, "step": 1921 }, { "epoch": 2.6835619623343407, "grad_norm": 2.679585653955118, "learning_rate": 3.3643115129080695e-07, "loss": 0.3642, "step": 1922 }, { "epoch": 2.684956986747268, "grad_norm": 2.7268490806844103, "learning_rate": 3.3350685789215133e-07, "loss": 0.344, "step": 1923 }, { "epoch": 2.6863520111601953, "grad_norm": 2.7635266728341707, "learning_rate": 3.3059489061866625e-07, "loss": 0.3619, "step": 1924 }, { "epoch": 2.6877470355731226, "grad_norm": 2.6823457795052428, "learning_rate": 3.276952571620556e-07, "loss": 0.4022, "step": 1925 }, { "epoch": 2.68914205998605, "grad_norm": 2.801507150873791, "learning_rate": 3.248079651814395e-07, "loss": 0.3797, "step": 1926 }, { "epoch": 2.690537084398977, "grad_norm": 2.467938098313279, "learning_rate": 3.2193302230334455e-07, "loss": 0.3305, "step": 1927 }, { "epoch": 2.6919321088119044, "grad_norm": 2.4429631300073607, "learning_rate": 3.190704361216751e-07, "loss": 0.3528, "step": 1928 }, { "epoch": 2.693327133224831, "grad_norm": 2.7192444191011087, "learning_rate": 3.162202141976956e-07, "loss": 0.4004, "step": 1929 }, { "epoch": 2.6947221576377585, "grad_norm": 2.891261364457905, "learning_rate": 3.133823640600137e-07, "loss": 0.394, "step": 1930 }, { "epoch": 2.6961171820506857, "grad_norm": 2.5395580985447705, "learning_rate": 3.105568932045577e-07, "loss": 0.3857, "step": 1931 }, { "epoch": 2.697512206463613, "grad_norm": 2.726124133060591, "learning_rate": 3.077438090945573e-07, "loss": 0.3416, "step": 1932 }, { "epoch": 2.6989072308765403, "grad_norm": 2.575374814322836, "learning_rate": 3.0494311916052234e-07, "loss": 0.378, "step": 1933 }, { "epoch": 2.7003022552894675, "grad_norm": 2.8054179200677667, "learning_rate": 3.021548308002248e-07, "loss": 0.3943, "step": 1934 }, { "epoch": 2.701697279702395, "grad_norm": 2.5546878022821873, "learning_rate": 2.9937895137868046e-07, "loss": 0.3594, "step": 1935 }, { "epoch": 2.703092304115322, "grad_norm": 2.9222924735820617, "learning_rate": 2.9661548822812636e-07, "loss": 0.3532, "step": 1936 }, { "epoch": 2.7044873285282494, "grad_norm": 2.354285724403774, "learning_rate": 2.9386444864800355e-07, "loss": 0.3882, "step": 1937 }, { "epoch": 2.7058823529411766, "grad_norm": 2.721448645374333, "learning_rate": 2.911258399049394e-07, "loss": 0.3974, "step": 1938 }, { "epoch": 2.707277377354104, "grad_norm": 2.7919955134214534, "learning_rate": 2.8839966923272286e-07, "loss": 0.378, "step": 1939 }, { "epoch": 2.708672401767031, "grad_norm": 2.691238117901245, "learning_rate": 2.8568594383229067e-07, "loss": 0.3729, "step": 1940 }, { "epoch": 2.710067426179958, "grad_norm": 2.7278028868049535, "learning_rate": 2.8298467087170655e-07, "loss": 0.3878, "step": 1941 }, { "epoch": 2.7114624505928853, "grad_norm": 2.6978939921514384, "learning_rate": 2.8029585748614196e-07, "loss": 0.3756, "step": 1942 }, { "epoch": 2.7128574750058125, "grad_norm": 2.694552600413923, "learning_rate": 2.7761951077785676e-07, "loss": 0.3847, "step": 1943 }, { "epoch": 2.71425249941874, "grad_norm": 2.5444870367269172, "learning_rate": 2.749556378161833e-07, "loss": 0.3567, "step": 1944 }, { "epoch": 2.715647523831667, "grad_norm": 2.6733853000908625, "learning_rate": 2.723042456375036e-07, "loss": 0.3601, "step": 1945 }, { "epoch": 2.7170425482445943, "grad_norm": 2.6111728885615806, "learning_rate": 2.696653412452327e-07, "loss": 0.3743, "step": 1946 }, { "epoch": 2.7184375726575216, "grad_norm": 2.521109332655959, "learning_rate": 2.6703893160980266e-07, "loss": 0.3677, "step": 1947 }, { "epoch": 2.719832597070449, "grad_norm": 2.5412167289238283, "learning_rate": 2.6442502366863854e-07, "loss": 0.3874, "step": 1948 }, { "epoch": 2.7212276214833757, "grad_norm": 2.7388813603693287, "learning_rate": 2.618236243261452e-07, "loss": 0.3546, "step": 1949 }, { "epoch": 2.722622645896303, "grad_norm": 2.718487520574506, "learning_rate": 2.592347404536888e-07, "loss": 0.3641, "step": 1950 }, { "epoch": 2.7240176703092303, "grad_norm": 2.841115954990652, "learning_rate": 2.566583788895721e-07, "loss": 0.3691, "step": 1951 }, { "epoch": 2.7254126947221575, "grad_norm": 2.550690740033465, "learning_rate": 2.5409454643902543e-07, "loss": 0.3498, "step": 1952 }, { "epoch": 2.726807719135085, "grad_norm": 2.5934585981756277, "learning_rate": 2.5154324987418434e-07, "loss": 0.3647, "step": 1953 }, { "epoch": 2.728202743548012, "grad_norm": 2.6746930688295234, "learning_rate": 2.4900449593406984e-07, "loss": 0.3503, "step": 1954 }, { "epoch": 2.7295977679609393, "grad_norm": 2.4497948026213137, "learning_rate": 2.4647829132457446e-07, "loss": 0.3748, "step": 1955 }, { "epoch": 2.7309927923738666, "grad_norm": 2.4056368751082418, "learning_rate": 2.439646427184428e-07, "loss": 0.3588, "step": 1956 }, { "epoch": 2.732387816786794, "grad_norm": 2.7969608908878683, "learning_rate": 2.4146355675525145e-07, "loss": 0.3663, "step": 1957 }, { "epoch": 2.733782841199721, "grad_norm": 2.4712871964905307, "learning_rate": 2.389750400413965e-07, "loss": 0.3867, "step": 1958 }, { "epoch": 2.7351778656126484, "grad_norm": 2.5643375161216038, "learning_rate": 2.364990991500743e-07, "loss": 0.3539, "step": 1959 }, { "epoch": 2.7365728900255757, "grad_norm": 2.6512150641250716, "learning_rate": 2.340357406212601e-07, "loss": 0.3368, "step": 1960 }, { "epoch": 2.7379679144385025, "grad_norm": 2.5374109766083164, "learning_rate": 2.315849709616963e-07, "loss": 0.317, "step": 1961 }, { "epoch": 2.73936293885143, "grad_norm": 2.6059540514417883, "learning_rate": 2.2914679664487237e-07, "loss": 0.3352, "step": 1962 }, { "epoch": 2.740757963264357, "grad_norm": 2.609274507744809, "learning_rate": 2.2672122411100727e-07, "loss": 0.365, "step": 1963 }, { "epoch": 2.7421529876772843, "grad_norm": 2.587346415135245, "learning_rate": 2.2430825976703485e-07, "loss": 0.3378, "step": 1964 }, { "epoch": 2.7435480120902116, "grad_norm": 2.5892502989822557, "learning_rate": 2.2190790998658561e-07, "loss": 0.403, "step": 1965 }, { "epoch": 2.744943036503139, "grad_norm": 2.8075462789188013, "learning_rate": 2.1952018110996843e-07, "loss": 0.3571, "step": 1966 }, { "epoch": 2.746338060916066, "grad_norm": 2.6611057924192925, "learning_rate": 2.1714507944415708e-07, "loss": 0.3934, "step": 1967 }, { "epoch": 2.7477330853289934, "grad_norm": 2.5516998415715553, "learning_rate": 2.1478261126276989e-07, "loss": 0.3481, "step": 1968 }, { "epoch": 2.7491281097419202, "grad_norm": 2.6908797598883707, "learning_rate": 2.1243278280605517e-07, "loss": 0.373, "step": 1969 }, { "epoch": 2.7505231341548475, "grad_norm": 2.8458037192488193, "learning_rate": 2.1009560028087627e-07, "loss": 0.3657, "step": 1970 }, { "epoch": 2.7519181585677748, "grad_norm": 2.623034153285088, "learning_rate": 2.0777106986069162e-07, "loss": 0.3865, "step": 1971 }, { "epoch": 2.753313182980702, "grad_norm": 2.6029551466846015, "learning_rate": 2.0545919768554078e-07, "loss": 0.3432, "step": 1972 }, { "epoch": 2.7547082073936293, "grad_norm": 2.8324423714446145, "learning_rate": 2.0315998986202902e-07, "loss": 0.4016, "step": 1973 }, { "epoch": 2.7561032318065566, "grad_norm": 2.609113693890731, "learning_rate": 2.0087345246330714e-07, "loss": 0.3681, "step": 1974 }, { "epoch": 2.757498256219484, "grad_norm": 2.774896709711885, "learning_rate": 1.985995915290595e-07, "loss": 0.3641, "step": 1975 }, { "epoch": 2.758893280632411, "grad_norm": 2.5597565213561806, "learning_rate": 1.9633841306548717e-07, "loss": 0.3423, "step": 1976 }, { "epoch": 2.7602883050453384, "grad_norm": 2.647972820626671, "learning_rate": 1.9408992304529252e-07, "loss": 0.3865, "step": 1977 }, { "epoch": 2.7616833294582657, "grad_norm": 2.5671850542000287, "learning_rate": 1.9185412740765962e-07, "loss": 0.3448, "step": 1978 }, { "epoch": 2.763078353871193, "grad_norm": 2.5080042009276684, "learning_rate": 1.8963103205824397e-07, "loss": 0.3374, "step": 1979 }, { "epoch": 2.76447337828412, "grad_norm": 2.7259725442900185, "learning_rate": 1.8742064286915329e-07, "loss": 0.3575, "step": 1980 }, { "epoch": 2.765868402697047, "grad_norm": 2.5839764767772966, "learning_rate": 1.8522296567893282e-07, "loss": 0.3742, "step": 1981 }, { "epoch": 2.7672634271099743, "grad_norm": 2.82049636286655, "learning_rate": 1.830380062925513e-07, "loss": 0.3563, "step": 1982 }, { "epoch": 2.7686584515229016, "grad_norm": 2.8440205176666504, "learning_rate": 1.8086577048138432e-07, "loss": 0.3827, "step": 1983 }, { "epoch": 2.770053475935829, "grad_norm": 2.4775389094825147, "learning_rate": 1.787062639831988e-07, "loss": 0.3403, "step": 1984 }, { "epoch": 2.771448500348756, "grad_norm": 2.8458479075751812, "learning_rate": 1.7655949250213743e-07, "loss": 0.368, "step": 1985 }, { "epoch": 2.7728435247616834, "grad_norm": 2.7735108476197405, "learning_rate": 1.7442546170870654e-07, "loss": 0.3491, "step": 1986 }, { "epoch": 2.7742385491746107, "grad_norm": 2.666826412809611, "learning_rate": 1.7230417723975766e-07, "loss": 0.372, "step": 1987 }, { "epoch": 2.775633573587538, "grad_norm": 2.5217264207112606, "learning_rate": 1.7019564469847372e-07, "loss": 0.3339, "step": 1988 }, { "epoch": 2.7770285980004648, "grad_norm": 2.454423681191967, "learning_rate": 1.6809986965435675e-07, "loss": 0.3347, "step": 1989 }, { "epoch": 2.778423622413392, "grad_norm": 2.6505425401161675, "learning_rate": 1.660168576432092e-07, "loss": 0.3874, "step": 1990 }, { "epoch": 2.7798186468263193, "grad_norm": 2.6748367915083526, "learning_rate": 1.6394661416711977e-07, "loss": 0.3741, "step": 1991 }, { "epoch": 2.7812136712392466, "grad_norm": 2.5544865537239163, "learning_rate": 1.6188914469445372e-07, "loss": 0.3559, "step": 1992 }, { "epoch": 2.782608695652174, "grad_norm": 2.572945333113618, "learning_rate": 1.5984445465983156e-07, "loss": 0.3814, "step": 1993 }, { "epoch": 2.784003720065101, "grad_norm": 2.5921826631458518, "learning_rate": 1.5781254946412029e-07, "loss": 0.3664, "step": 1994 }, { "epoch": 2.7853987444780284, "grad_norm": 2.5546503071586586, "learning_rate": 1.5579343447441663e-07, "loss": 0.3534, "step": 1995 }, { "epoch": 2.7867937688909556, "grad_norm": 2.58572104124402, "learning_rate": 1.5378711502403164e-07, "loss": 0.3433, "step": 1996 }, { "epoch": 2.788188793303883, "grad_norm": 2.5593555195474225, "learning_rate": 1.5179359641247948e-07, "loss": 0.3713, "step": 1997 }, { "epoch": 2.78958381771681, "grad_norm": 2.5369406839019373, "learning_rate": 1.4981288390546188e-07, "loss": 0.3732, "step": 1998 }, { "epoch": 2.7909788421297375, "grad_norm": 2.771520733827256, "learning_rate": 1.4784498273485436e-07, "loss": 0.3726, "step": 1999 }, { "epoch": 2.7923738665426647, "grad_norm": 2.6658815709848613, "learning_rate": 1.458898980986917e-07, "loss": 0.3792, "step": 2000 }, { "epoch": 2.793768890955592, "grad_norm": 2.9021975614858326, "learning_rate": 1.4394763516115573e-07, "loss": 0.3744, "step": 2001 }, { "epoch": 2.795163915368519, "grad_norm": 2.9047653612094733, "learning_rate": 1.4201819905256043e-07, "loss": 0.382, "step": 2002 }, { "epoch": 2.796558939781446, "grad_norm": 2.7588220849795317, "learning_rate": 1.4010159486933906e-07, "loss": 0.3953, "step": 2003 }, { "epoch": 2.7979539641943734, "grad_norm": 2.8233759987721703, "learning_rate": 1.3819782767403034e-07, "loss": 0.3712, "step": 2004 }, { "epoch": 2.7993489886073006, "grad_norm": 2.533038743186819, "learning_rate": 1.363069024952668e-07, "loss": 0.3615, "step": 2005 }, { "epoch": 2.800744013020228, "grad_norm": 2.580382614360014, "learning_rate": 1.344288243277575e-07, "loss": 0.3833, "step": 2006 }, { "epoch": 2.802139037433155, "grad_norm": 2.6561286020889665, "learning_rate": 1.3256359813227758e-07, "loss": 0.3581, "step": 2007 }, { "epoch": 2.8035340618460824, "grad_norm": 2.4862935131563138, "learning_rate": 1.3071122883565657e-07, "loss": 0.3833, "step": 2008 }, { "epoch": 2.8049290862590093, "grad_norm": 2.6169341270360493, "learning_rate": 1.288717213307622e-07, "loss": 0.3759, "step": 2009 }, { "epoch": 2.8063241106719365, "grad_norm": 2.7488952767172794, "learning_rate": 1.2704508047649e-07, "loss": 0.3866, "step": 2010 }, { "epoch": 2.807719135084864, "grad_norm": 2.566666887250899, "learning_rate": 1.2523131109774822e-07, "loss": 0.3521, "step": 2011 }, { "epoch": 2.809114159497791, "grad_norm": 2.288347083948126, "learning_rate": 1.234304179854473e-07, "loss": 0.3565, "step": 2012 }, { "epoch": 2.8105091839107184, "grad_norm": 2.8963414816812265, "learning_rate": 1.2164240589648436e-07, "loss": 0.3848, "step": 2013 }, { "epoch": 2.8119042083236456, "grad_norm": 2.797649460356226, "learning_rate": 1.1986727955373588e-07, "loss": 0.3627, "step": 2014 }, { "epoch": 2.813299232736573, "grad_norm": 2.5755351375722944, "learning_rate": 1.1810504364603737e-07, "loss": 0.3835, "step": 2015 }, { "epoch": 2.8146942571495, "grad_norm": 2.6993126246441785, "learning_rate": 1.163557028281792e-07, "loss": 0.3448, "step": 2016 }, { "epoch": 2.8160892815624274, "grad_norm": 2.835572814411952, "learning_rate": 1.146192617208891e-07, "loss": 0.3312, "step": 2017 }, { "epoch": 2.8174843059753547, "grad_norm": 2.5915158941050747, "learning_rate": 1.128957249108209e-07, "loss": 0.3446, "step": 2018 }, { "epoch": 2.818879330388282, "grad_norm": 2.701464863191518, "learning_rate": 1.1118509695054236e-07, "loss": 0.344, "step": 2019 }, { "epoch": 2.8202743548012092, "grad_norm": 2.604077633908004, "learning_rate": 1.094873823585263e-07, "loss": 0.3449, "step": 2020 }, { "epoch": 2.8216693792141365, "grad_norm": 2.5122109960318286, "learning_rate": 1.0780258561913281e-07, "loss": 0.3824, "step": 2021 }, { "epoch": 2.8230644036270633, "grad_norm": 2.943690126059559, "learning_rate": 1.0613071118260321e-07, "loss": 0.3879, "step": 2022 }, { "epoch": 2.8244594280399906, "grad_norm": 2.879585309997412, "learning_rate": 1.0447176346504439e-07, "loss": 0.415, "step": 2023 }, { "epoch": 2.825854452452918, "grad_norm": 2.56709219376867, "learning_rate": 1.0282574684841784e-07, "loss": 0.3883, "step": 2024 }, { "epoch": 2.827249476865845, "grad_norm": 2.669541689813132, "learning_rate": 1.011926656805301e-07, "loss": 0.3449, "step": 2025 }, { "epoch": 2.8286445012787724, "grad_norm": 2.396681985624105, "learning_rate": 9.957252427501951e-08, "loss": 0.3342, "step": 2026 }, { "epoch": 2.8300395256916997, "grad_norm": 2.6285527595453706, "learning_rate": 9.796532691134453e-08, "loss": 0.3698, "step": 2027 }, { "epoch": 2.831434550104627, "grad_norm": 2.380558912493783, "learning_rate": 9.637107783477484e-08, "loss": 0.3643, "step": 2028 }, { "epoch": 2.832829574517554, "grad_norm": 2.613720533749851, "learning_rate": 9.478978125637583e-08, "loss": 0.327, "step": 2029 }, { "epoch": 2.834224598930481, "grad_norm": 2.7107420886912097, "learning_rate": 9.322144135300137e-08, "loss": 0.367, "step": 2030 }, { "epoch": 2.8356196233434083, "grad_norm": 2.7070804925071235, "learning_rate": 9.166606226728103e-08, "loss": 0.3485, "step": 2031 }, { "epoch": 2.8370146477563356, "grad_norm": 2.4160902135745768, "learning_rate": 9.012364810761121e-08, "loss": 0.4132, "step": 2032 }, { "epoch": 2.838409672169263, "grad_norm": 2.7339263864590895, "learning_rate": 8.859420294814014e-08, "loss": 0.3845, "step": 2033 }, { "epoch": 2.83980469658219, "grad_norm": 2.6115175050253483, "learning_rate": 8.70777308287618e-08, "loss": 0.374, "step": 2034 }, { "epoch": 2.8411997209951174, "grad_norm": 2.9180260372625737, "learning_rate": 8.557423575510037e-08, "loss": 0.4111, "step": 2035 }, { "epoch": 2.8425947454080447, "grad_norm": 2.9635716961841596, "learning_rate": 8.408372169850521e-08, "loss": 0.4118, "step": 2036 }, { "epoch": 2.843989769820972, "grad_norm": 2.797807500928576, "learning_rate": 8.26061925960353e-08, "loss": 0.3974, "step": 2037 }, { "epoch": 2.8453847942338992, "grad_norm": 2.866396636768858, "learning_rate": 8.114165235045268e-08, "loss": 0.3657, "step": 2038 }, { "epoch": 2.8467798186468265, "grad_norm": 2.8025313933486893, "learning_rate": 7.969010483020845e-08, "loss": 0.3701, "step": 2039 }, { "epoch": 2.8481748430597538, "grad_norm": 2.4751707032526635, "learning_rate": 7.825155386943784e-08, "loss": 0.3668, "step": 2040 }, { "epoch": 2.849569867472681, "grad_norm": 2.7147612535717167, "learning_rate": 7.682600326794353e-08, "loss": 0.3768, "step": 2041 }, { "epoch": 2.850964891885608, "grad_norm": 2.4724809889134325, "learning_rate": 7.541345679118961e-08, "loss": 0.3632, "step": 2042 }, { "epoch": 2.852359916298535, "grad_norm": 2.7931666820075387, "learning_rate": 7.401391817029257e-08, "loss": 0.3577, "step": 2043 }, { "epoch": 2.8537549407114624, "grad_norm": 2.6287668601855776, "learning_rate": 7.262739110200923e-08, "loss": 0.343, "step": 2044 }, { "epoch": 2.8551499651243897, "grad_norm": 2.750936176792546, "learning_rate": 7.125387924872552e-08, "loss": 0.3746, "step": 2045 }, { "epoch": 2.856544989537317, "grad_norm": 2.5209387139989436, "learning_rate": 6.98933862384521e-08, "loss": 0.3583, "step": 2046 }, { "epoch": 2.857940013950244, "grad_norm": 2.6488935830462577, "learning_rate": 6.854591566480884e-08, "loss": 0.3091, "step": 2047 }, { "epoch": 2.8593350383631715, "grad_norm": 2.584991891321586, "learning_rate": 6.721147108701864e-08, "loss": 0.3738, "step": 2048 }, { "epoch": 2.8607300627760983, "grad_norm": 2.7522178647359796, "learning_rate": 6.589005602989862e-08, "loss": 0.3762, "step": 2049 }, { "epoch": 2.8621250871890256, "grad_norm": 2.834222669570978, "learning_rate": 6.458167398384896e-08, "loss": 0.3753, "step": 2050 }, { "epoch": 2.863520111601953, "grad_norm": 2.6543614929371766, "learning_rate": 6.328632840484294e-08, "loss": 0.3308, "step": 2051 }, { "epoch": 2.86491513601488, "grad_norm": 2.416955656324384, "learning_rate": 6.200402271442085e-08, "loss": 0.3447, "step": 2052 }, { "epoch": 2.8663101604278074, "grad_norm": 2.7191646500262565, "learning_rate": 6.073476029967884e-08, "loss": 0.3626, "step": 2053 }, { "epoch": 2.8677051848407347, "grad_norm": 2.5970270010095575, "learning_rate": 5.947854451326007e-08, "loss": 0.3888, "step": 2054 }, { "epoch": 2.869100209253662, "grad_norm": 2.559923163467664, "learning_rate": 5.823537867334694e-08, "loss": 0.3221, "step": 2055 }, { "epoch": 2.870495233666589, "grad_norm": 2.4952646682480144, "learning_rate": 5.7005266063650534e-08, "loss": 0.3419, "step": 2056 }, { "epoch": 2.8718902580795165, "grad_norm": 2.7414865825304617, "learning_rate": 5.5788209933403944e-08, "loss": 0.4039, "step": 2057 }, { "epoch": 2.8732852824924437, "grad_norm": 2.577990898772062, "learning_rate": 5.4584213497351766e-08, "loss": 0.3622, "step": 2058 }, { "epoch": 2.874680306905371, "grad_norm": 2.7232343070102503, "learning_rate": 5.339327993574339e-08, "loss": 0.3508, "step": 2059 }, { "epoch": 2.8760753313182983, "grad_norm": 2.532685117703517, "learning_rate": 5.221541239432415e-08, "loss": 0.3309, "step": 2060 }, { "epoch": 2.8774703557312256, "grad_norm": 2.7444987435313455, "learning_rate": 5.1050613984324756e-08, "loss": 0.3501, "step": 2061 }, { "epoch": 2.8788653801441524, "grad_norm": 2.9483441311446215, "learning_rate": 4.989888778245744e-08, "loss": 0.4031, "step": 2062 }, { "epoch": 2.8802604045570797, "grad_norm": 2.536273947372249, "learning_rate": 4.8760236830903697e-08, "loss": 0.3736, "step": 2063 }, { "epoch": 2.881655428970007, "grad_norm": 2.6846065464133133, "learning_rate": 4.763466413730822e-08, "loss": 0.3703, "step": 2064 }, { "epoch": 2.883050453382934, "grad_norm": 2.6929686362471923, "learning_rate": 4.65221726747711e-08, "loss": 0.3615, "step": 2065 }, { "epoch": 2.8844454777958615, "grad_norm": 2.795350010904855, "learning_rate": 4.542276538183954e-08, "loss": 0.3619, "step": 2066 }, { "epoch": 2.8858405022087887, "grad_norm": 2.8463422450135414, "learning_rate": 4.433644516249891e-08, "loss": 0.3702, "step": 2067 }, { "epoch": 2.887235526621716, "grad_norm": 2.6850145868720925, "learning_rate": 4.326321488616836e-08, "loss": 0.4077, "step": 2068 }, { "epoch": 2.888630551034643, "grad_norm": 2.817186461820522, "learning_rate": 4.220307738768859e-08, "loss": 0.3453, "step": 2069 }, { "epoch": 2.89002557544757, "grad_norm": 2.6301488722722617, "learning_rate": 4.11560354673185e-08, "loss": 0.3729, "step": 2070 }, { "epoch": 2.8914205998604974, "grad_norm": 2.495923079125419, "learning_rate": 4.0122091890726354e-08, "loss": 0.3727, "step": 2071 }, { "epoch": 2.8928156242734246, "grad_norm": 2.6072704420104884, "learning_rate": 3.9101249388981965e-08, "loss": 0.323, "step": 2072 }, { "epoch": 2.894210648686352, "grad_norm": 2.6691096376028804, "learning_rate": 3.809351065854894e-08, "loss": 0.3943, "step": 2073 }, { "epoch": 2.895605673099279, "grad_norm": 2.8620337192496117, "learning_rate": 3.709887836128023e-08, "loss": 0.3987, "step": 2074 }, { "epoch": 2.8970006975122065, "grad_norm": 2.5711577765655544, "learning_rate": 3.611735512440706e-08, "loss": 0.3768, "step": 2075 }, { "epoch": 2.8983957219251337, "grad_norm": 2.7555507422072325, "learning_rate": 3.5148943540536105e-08, "loss": 0.3567, "step": 2076 }, { "epoch": 2.899790746338061, "grad_norm": 2.6110894335359025, "learning_rate": 3.4193646167640646e-08, "loss": 0.3981, "step": 2077 }, { "epoch": 2.9011857707509883, "grad_norm": 3.007391508069282, "learning_rate": 3.325146552905223e-08, "loss": 0.3516, "step": 2078 }, { "epoch": 2.9025807951639155, "grad_norm": 2.4374814165540575, "learning_rate": 3.2322404113457886e-08, "loss": 0.3754, "step": 2079 }, { "epoch": 2.903975819576843, "grad_norm": 2.7403605243580067, "learning_rate": 3.1406464374890144e-08, "loss": 0.3688, "step": 2080 }, { "epoch": 2.90537084398977, "grad_norm": 2.7485844544406315, "learning_rate": 3.0503648732722046e-08, "loss": 0.378, "step": 2081 }, { "epoch": 2.906765868402697, "grad_norm": 2.844824702159113, "learning_rate": 2.9613959571660468e-08, "loss": 0.3447, "step": 2082 }, { "epoch": 2.908160892815624, "grad_norm": 2.5930250541069753, "learning_rate": 2.8737399241740016e-08, "loss": 0.3538, "step": 2083 }, { "epoch": 2.9095559172285514, "grad_norm": 2.7086658783332287, "learning_rate": 2.7873970058316934e-08, "loss": 0.3644, "step": 2084 }, { "epoch": 2.9109509416414787, "grad_norm": 2.4952247440400015, "learning_rate": 2.7023674302061875e-08, "loss": 0.3482, "step": 2085 }, { "epoch": 2.912345966054406, "grad_norm": 2.5180924294960887, "learning_rate": 2.6186514218954905e-08, "loss": 0.3433, "step": 2086 }, { "epoch": 2.9137409904673333, "grad_norm": 2.7800665253021455, "learning_rate": 2.5362492020280517e-08, "loss": 0.3552, "step": 2087 }, { "epoch": 2.9151360148802605, "grad_norm": 2.563569115588271, "learning_rate": 2.4551609882619288e-08, "loss": 0.384, "step": 2088 }, { "epoch": 2.9165310392931874, "grad_norm": 2.90719751076978, "learning_rate": 2.3753869947843457e-08, "loss": 0.3583, "step": 2089 }, { "epoch": 2.9179260637061146, "grad_norm": 2.653553722453103, "learning_rate": 2.296927432311358e-08, "loss": 0.3878, "step": 2090 }, { "epoch": 2.919321088119042, "grad_norm": 2.718000690462481, "learning_rate": 2.2197825080867432e-08, "loss": 0.3611, "step": 2091 }, { "epoch": 2.920716112531969, "grad_norm": 2.927993429687993, "learning_rate": 2.1439524258819456e-08, "loss": 0.382, "step": 2092 }, { "epoch": 2.9221111369448964, "grad_norm": 2.637121563447791, "learning_rate": 2.0694373859954653e-08, "loss": 0.3485, "step": 2093 }, { "epoch": 2.9235061613578237, "grad_norm": 2.631673082137937, "learning_rate": 1.99623758525197e-08, "loss": 0.3763, "step": 2094 }, { "epoch": 2.924901185770751, "grad_norm": 2.572904967099451, "learning_rate": 1.9243532170023504e-08, "loss": 0.3356, "step": 2095 }, { "epoch": 2.9262962101836782, "grad_norm": 2.526550125653735, "learning_rate": 1.8537844711227215e-08, "loss": 0.3176, "step": 2096 }, { "epoch": 2.9276912345966055, "grad_norm": 2.446573071821584, "learning_rate": 1.7845315340140334e-08, "loss": 0.3227, "step": 2097 }, { "epoch": 2.929086259009533, "grad_norm": 2.376437480501846, "learning_rate": 1.7165945886018498e-08, "loss": 0.3111, "step": 2098 }, { "epoch": 2.93048128342246, "grad_norm": 2.655448692152345, "learning_rate": 1.6499738143354594e-08, "loss": 0.3311, "step": 2099 }, { "epoch": 2.9318763078353873, "grad_norm": 2.667998282668599, "learning_rate": 1.584669387187765e-08, "loss": 0.3741, "step": 2100 }, { "epoch": 2.9332713322483146, "grad_norm": 2.518231877676796, "learning_rate": 1.520681479654562e-08, "loss": 0.3681, "step": 2101 }, { "epoch": 2.9346663566612414, "grad_norm": 2.7475229977430846, "learning_rate": 1.4580102607541502e-08, "loss": 0.3706, "step": 2102 }, { "epoch": 2.9360613810741687, "grad_norm": 2.537385114679741, "learning_rate": 1.3966558960269994e-08, "loss": 0.3608, "step": 2103 }, { "epoch": 2.937456405487096, "grad_norm": 2.280558540728219, "learning_rate": 1.3366185475351957e-08, "loss": 0.335, "step": 2104 }, { "epoch": 2.9388514299000232, "grad_norm": 2.4277057587949855, "learning_rate": 1.2778983738620521e-08, "loss": 0.3803, "step": 2105 }, { "epoch": 2.9402464543129505, "grad_norm": 2.8217220802810323, "learning_rate": 1.2204955301116095e-08, "loss": 0.3356, "step": 2106 }, { "epoch": 2.941641478725878, "grad_norm": 2.513328499269433, "learning_rate": 1.164410167908414e-08, "loss": 0.3525, "step": 2107 }, { "epoch": 2.943036503138805, "grad_norm": 2.7557561856910366, "learning_rate": 1.109642435396907e-08, "loss": 0.347, "step": 2108 }, { "epoch": 2.9444315275517323, "grad_norm": 2.579423374561251, "learning_rate": 1.0561924772412024e-08, "loss": 0.365, "step": 2109 }, { "epoch": 2.945826551964659, "grad_norm": 3.0318790410757037, "learning_rate": 1.0040604346245319e-08, "loss": 0.4255, "step": 2110 }, { "epoch": 2.9472215763775864, "grad_norm": 2.351738277268536, "learning_rate": 9.532464452491341e-09, "loss": 0.3458, "step": 2111 }, { "epoch": 2.9486166007905137, "grad_norm": 2.687540854869892, "learning_rate": 9.037506433355325e-09, "loss": 0.3598, "step": 2112 }, { "epoch": 2.950011625203441, "grad_norm": 2.8865613071451963, "learning_rate": 8.555731596224803e-09, "loss": 0.3358, "step": 2113 }, { "epoch": 2.9514066496163682, "grad_norm": 2.6558846374984384, "learning_rate": 8.087141213665717e-09, "loss": 0.377, "step": 2114 }, { "epoch": 2.9528016740292955, "grad_norm": 2.8196661160204375, "learning_rate": 7.631736523416867e-09, "loss": 0.3618, "step": 2115 }, { "epoch": 2.9541966984422228, "grad_norm": 2.7302953850838083, "learning_rate": 7.1895187283899104e-09, "loss": 0.3107, "step": 2116 }, { "epoch": 2.95559172285515, "grad_norm": 2.70246295378061, "learning_rate": 6.760488996662706e-09, "loss": 0.3754, "step": 2117 }, { "epoch": 2.9569867472680773, "grad_norm": 2.633243298071103, "learning_rate": 6.3446484614798635e-09, "loss": 0.338, "step": 2118 }, { "epoch": 2.9583817716810046, "grad_norm": 2.5178818374783303, "learning_rate": 5.941998221247192e-09, "loss": 0.3798, "step": 2119 }, { "epoch": 2.959776796093932, "grad_norm": 2.7644530096513518, "learning_rate": 5.552539339528373e-09, "loss": 0.4235, "step": 2120 }, { "epoch": 2.961171820506859, "grad_norm": 2.478640037329969, "learning_rate": 5.176272845045516e-09, "loss": 0.3607, "step": 2121 }, { "epoch": 2.962566844919786, "grad_norm": 2.533005522993558, "learning_rate": 4.813199731671381e-09, "loss": 0.3293, "step": 2122 }, { "epoch": 2.963961869332713, "grad_norm": 2.4146265368983606, "learning_rate": 4.463320958432716e-09, "loss": 0.3106, "step": 2123 }, { "epoch": 2.9653568937456405, "grad_norm": 2.5410876913692575, "learning_rate": 4.1266374495024795e-09, "loss": 0.338, "step": 2124 }, { "epoch": 2.9667519181585678, "grad_norm": 2.577984672033682, "learning_rate": 3.803150094200403e-09, "loss": 0.3356, "step": 2125 }, { "epoch": 2.968146942571495, "grad_norm": 2.637218188643197, "learning_rate": 3.4928597469885416e-09, "loss": 0.3421, "step": 2126 }, { "epoch": 2.9695419669844223, "grad_norm": 2.698864549842431, "learning_rate": 3.1957672274723907e-09, "loss": 0.3808, "step": 2127 }, { "epoch": 2.9709369913973496, "grad_norm": 2.7637500987409047, "learning_rate": 2.9118733203942207e-09, "loss": 0.3599, "step": 2128 }, { "epoch": 2.972332015810277, "grad_norm": 2.8427058128213507, "learning_rate": 2.6411787756353e-09, "loss": 0.3911, "step": 2129 }, { "epoch": 2.9737270402232037, "grad_norm": 2.446243966324141, "learning_rate": 2.3836843082108987e-09, "loss": 0.3867, "step": 2130 }, { "epoch": 2.975122064636131, "grad_norm": 2.4182336576743757, "learning_rate": 2.1393905982691752e-09, "loss": 0.3496, "step": 2131 }, { "epoch": 2.976517089049058, "grad_norm": 2.603850540185471, "learning_rate": 1.9082982910911817e-09, "loss": 0.3463, "step": 2132 }, { "epoch": 2.9779121134619855, "grad_norm": 2.5473548790439913, "learning_rate": 1.6904079970853083e-09, "loss": 0.3434, "step": 2133 }, { "epoch": 2.9793071378749127, "grad_norm": 2.464169364061302, "learning_rate": 1.4857202917900604e-09, "loss": 0.3765, "step": 2134 }, { "epoch": 2.98070216228784, "grad_norm": 2.865171168162494, "learning_rate": 1.2942357158701734e-09, "loss": 0.3341, "step": 2135 }, { "epoch": 2.9820971867007673, "grad_norm": 2.713552211745049, "learning_rate": 1.1159547751143918e-09, "loss": 0.3891, "step": 2136 }, { "epoch": 2.9834922111136946, "grad_norm": 2.576583825865272, "learning_rate": 9.508779404360235e-10, "loss": 0.3718, "step": 2137 }, { "epoch": 2.984887235526622, "grad_norm": 2.7872469337474772, "learning_rate": 7.990056478707209e-10, "loss": 0.3578, "step": 2138 }, { "epoch": 2.986282259939549, "grad_norm": 2.6296439012309274, "learning_rate": 6.603382985759244e-10, "loss": 0.367, "step": 2139 }, { "epoch": 2.9876772843524764, "grad_norm": 2.858142144880672, "learning_rate": 5.348762588286427e-10, "loss": 0.4021, "step": 2140 }, { "epoch": 2.9890723087654036, "grad_norm": 2.831351479545325, "learning_rate": 4.2261986002600783e-10, "loss": 0.3727, "step": 2141 }, { "epoch": 2.9904673331783305, "grad_norm": 2.7411843572898897, "learning_rate": 3.235693986830546e-10, "loss": 0.4118, "step": 2142 }, { "epoch": 2.9918623575912577, "grad_norm": 2.793238204844537, "learning_rate": 2.3772513643327555e-10, "loss": 0.3702, "step": 2143 }, { "epoch": 2.993257382004185, "grad_norm": 2.510662619113388, "learning_rate": 1.650873000258457e-10, "loss": 0.3671, "step": 2144 }, { "epoch": 2.9946524064171123, "grad_norm": 2.67582466626832, "learning_rate": 1.0565608132728778e-10, "loss": 0.3827, "step": 2145 }, { "epoch": 2.9960474308300395, "grad_norm": 2.716018419783856, "learning_rate": 5.943163732036183e-11, "loss": 0.3797, "step": 2146 }, { "epoch": 2.997442455242967, "grad_norm": 2.7356474153116843, "learning_rate": 2.6414090102400147e-11, "loss": 0.3871, "step": 2147 }, { "epoch": 2.998837479655894, "grad_norm": 2.7283702306955204, "learning_rate": 6.6035268864173e-12, "loss": 0.3639, "step": 2148 }, { "epoch": 2.998837479655894, "step": 2148, "total_flos": 88194404745216.0, "train_loss": 0.6016569009234564, "train_runtime": 11197.5198, "train_samples_per_second": 4.609, "train_steps_per_second": 0.192 } ], "logging_steps": 1, "max_steps": 2148, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 88194404745216.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }