{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5703, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 5.123785379357265, "learning_rate": 1.1627906976744187e-07, "loss": 0.9373, "step": 1 }, { "epoch": 0.0, "grad_norm": 4.2104847663651706, "learning_rate": 2.3255813953488374e-07, "loss": 0.9208, "step": 2 }, { "epoch": 0.0, "grad_norm": 4.756083943450628, "learning_rate": 3.488372093023256e-07, "loss": 0.9866, "step": 3 }, { "epoch": 0.0, "grad_norm": 4.055372869916307, "learning_rate": 4.651162790697675e-07, "loss": 0.9716, "step": 4 }, { "epoch": 0.0, "grad_norm": 4.469421246664214, "learning_rate": 5.813953488372094e-07, "loss": 0.9276, "step": 5 }, { "epoch": 0.0, "grad_norm": 4.928697884589698, "learning_rate": 6.976744186046513e-07, "loss": 0.9851, "step": 6 }, { "epoch": 0.0, "grad_norm": 4.62560547269135, "learning_rate": 8.139534883720931e-07, "loss": 0.9533, "step": 7 }, { "epoch": 0.0, "grad_norm": 4.089097037044055, "learning_rate": 9.30232558139535e-07, "loss": 0.9517, "step": 8 }, { "epoch": 0.0, "grad_norm": 4.100439409219032, "learning_rate": 1.0465116279069768e-06, "loss": 0.9363, "step": 9 }, { "epoch": 0.0, "grad_norm": 3.2556002889568596, "learning_rate": 1.1627906976744188e-06, "loss": 0.9148, "step": 10 }, { "epoch": 0.0, "grad_norm": 3.306207404930218, "learning_rate": 1.2790697674418605e-06, "loss": 0.901, "step": 11 }, { "epoch": 0.0, "grad_norm": 2.5389847167804622, "learning_rate": 1.3953488372093025e-06, "loss": 0.9759, "step": 12 }, { "epoch": 0.0, "grad_norm": 2.700435045141162, "learning_rate": 1.5116279069767443e-06, "loss": 0.872, "step": 13 }, { "epoch": 0.0, "grad_norm": 1.5980686872778085, "learning_rate": 1.6279069767441862e-06, "loss": 0.9175, "step": 14 }, { "epoch": 0.0, "grad_norm": 1.7142623633539618, "learning_rate": 1.7441860465116282e-06, "loss": 0.8917, "step": 15 }, { "epoch": 0.0, "grad_norm": 1.4801917390433823, "learning_rate": 1.86046511627907e-06, "loss": 0.8673, "step": 16 }, { "epoch": 0.0, "grad_norm": 1.3793021087188682, "learning_rate": 1.976744186046512e-06, "loss": 0.8718, "step": 17 }, { "epoch": 0.0, "grad_norm": 1.3846309953436386, "learning_rate": 2.0930232558139536e-06, "loss": 0.8575, "step": 18 }, { "epoch": 0.0, "grad_norm": 1.2066656185882516, "learning_rate": 2.2093023255813954e-06, "loss": 0.8925, "step": 19 }, { "epoch": 0.0, "grad_norm": 1.308663206951885, "learning_rate": 2.3255813953488376e-06, "loss": 0.8719, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.2934313950714285, "learning_rate": 2.4418604651162793e-06, "loss": 0.8592, "step": 21 }, { "epoch": 0.0, "grad_norm": 1.4417810067967454, "learning_rate": 2.558139534883721e-06, "loss": 0.9104, "step": 22 }, { "epoch": 0.0, "grad_norm": 1.1126123999161326, "learning_rate": 2.674418604651163e-06, "loss": 0.8737, "step": 23 }, { "epoch": 0.0, "grad_norm": 0.9485540137003432, "learning_rate": 2.790697674418605e-06, "loss": 0.8499, "step": 24 }, { "epoch": 0.0, "grad_norm": 0.9395766006297345, "learning_rate": 2.9069767441860468e-06, "loss": 0.8718, "step": 25 }, { "epoch": 0.0, "grad_norm": 1.1275164173107504, "learning_rate": 3.0232558139534885e-06, "loss": 0.8482, "step": 26 }, { "epoch": 0.0, "grad_norm": 1.3490148785483373, "learning_rate": 3.1395348837209307e-06, "loss": 0.8689, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.4580563449584825, "learning_rate": 3.2558139534883724e-06, "loss": 0.8501, "step": 28 }, { "epoch": 0.01, "grad_norm": 1.2435468146143105, "learning_rate": 3.372093023255814e-06, "loss": 0.8764, "step": 29 }, { "epoch": 0.01, "grad_norm": 0.8934267531893595, "learning_rate": 3.4883720930232564e-06, "loss": 0.8838, "step": 30 }, { "epoch": 0.01, "grad_norm": 1.3006625933904927, "learning_rate": 3.6046511627906977e-06, "loss": 0.8207, "step": 31 }, { "epoch": 0.01, "grad_norm": 2.068620976009512, "learning_rate": 3.72093023255814e-06, "loss": 0.8439, "step": 32 }, { "epoch": 0.01, "grad_norm": 1.2248888843967605, "learning_rate": 3.837209302325582e-06, "loss": 0.8551, "step": 33 }, { "epoch": 0.01, "grad_norm": 0.8121311628094761, "learning_rate": 3.953488372093024e-06, "loss": 0.833, "step": 34 }, { "epoch": 0.01, "grad_norm": 1.030558761815383, "learning_rate": 4.0697674418604655e-06, "loss": 0.8945, "step": 35 }, { "epoch": 0.01, "grad_norm": 0.9549142917110657, "learning_rate": 4.186046511627907e-06, "loss": 0.8378, "step": 36 }, { "epoch": 0.01, "grad_norm": 0.9718144089369075, "learning_rate": 4.302325581395349e-06, "loss": 0.8695, "step": 37 }, { "epoch": 0.01, "grad_norm": 0.7907654079361924, "learning_rate": 4.418604651162791e-06, "loss": 0.8331, "step": 38 }, { "epoch": 0.01, "grad_norm": 0.8442743610877188, "learning_rate": 4.5348837209302326e-06, "loss": 0.8723, "step": 39 }, { "epoch": 0.01, "grad_norm": 0.7766847990968313, "learning_rate": 4.651162790697675e-06, "loss": 0.8855, "step": 40 }, { "epoch": 0.01, "grad_norm": 0.9559345520186775, "learning_rate": 4.767441860465117e-06, "loss": 0.854, "step": 41 }, { "epoch": 0.01, "grad_norm": 0.9522718342241372, "learning_rate": 4.883720930232559e-06, "loss": 0.8512, "step": 42 }, { "epoch": 0.01, "grad_norm": 0.8334348584109049, "learning_rate": 5e-06, "loss": 0.8251, "step": 43 }, { "epoch": 0.01, "grad_norm": 0.840605010141594, "learning_rate": 5.116279069767442e-06, "loss": 0.8316, "step": 44 }, { "epoch": 0.01, "grad_norm": 1.0288583399717293, "learning_rate": 5.232558139534885e-06, "loss": 0.9028, "step": 45 }, { "epoch": 0.01, "grad_norm": 0.8941486793545607, "learning_rate": 5.348837209302326e-06, "loss": 0.8032, "step": 46 }, { "epoch": 0.01, "grad_norm": 0.8500052678848606, "learning_rate": 5.465116279069767e-06, "loss": 0.8378, "step": 47 }, { "epoch": 0.01, "grad_norm": 0.8622886023968779, "learning_rate": 5.58139534883721e-06, "loss": 0.8262, "step": 48 }, { "epoch": 0.01, "grad_norm": 0.8764643614331641, "learning_rate": 5.697674418604652e-06, "loss": 0.8419, "step": 49 }, { "epoch": 0.01, "grad_norm": 0.9001363079908395, "learning_rate": 5.8139534883720935e-06, "loss": 0.839, "step": 50 }, { "epoch": 0.01, "grad_norm": 0.8799921404678155, "learning_rate": 5.930232558139536e-06, "loss": 0.8182, "step": 51 }, { "epoch": 0.01, "grad_norm": 0.8897719952696889, "learning_rate": 6.046511627906977e-06, "loss": 0.8226, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.7064051831978361, "learning_rate": 6.162790697674419e-06, "loss": 0.8021, "step": 53 }, { "epoch": 0.01, "grad_norm": 0.8800348720980075, "learning_rate": 6.279069767441861e-06, "loss": 0.8479, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.0428466307026807, "learning_rate": 6.395348837209303e-06, "loss": 0.8349, "step": 55 }, { "epoch": 0.01, "grad_norm": 0.8749314512723254, "learning_rate": 6.511627906976745e-06, "loss": 0.7966, "step": 56 }, { "epoch": 0.01, "grad_norm": 0.8342355709261795, "learning_rate": 6.627906976744186e-06, "loss": 0.8457, "step": 57 }, { "epoch": 0.01, "grad_norm": 1.137035048551639, "learning_rate": 6.744186046511628e-06, "loss": 0.8196, "step": 58 }, { "epoch": 0.01, "grad_norm": 1.006545003268565, "learning_rate": 6.86046511627907e-06, "loss": 0.7932, "step": 59 }, { "epoch": 0.01, "grad_norm": 0.7800085871164048, "learning_rate": 6.976744186046513e-06, "loss": 0.7929, "step": 60 }, { "epoch": 0.01, "grad_norm": 0.9073128060160859, "learning_rate": 7.0930232558139545e-06, "loss": 0.8921, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.063750441910888, "learning_rate": 7.209302325581395e-06, "loss": 0.8774, "step": 62 }, { "epoch": 0.01, "grad_norm": 0.9209960563331431, "learning_rate": 7.325581395348837e-06, "loss": 0.8758, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.856257812647561, "learning_rate": 7.44186046511628e-06, "loss": 0.795, "step": 64 }, { "epoch": 0.01, "grad_norm": 0.9936447395297762, "learning_rate": 7.5581395348837215e-06, "loss": 0.8115, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.7629412397459028, "learning_rate": 7.674418604651164e-06, "loss": 0.7675, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.0661033667469286, "learning_rate": 7.790697674418605e-06, "loss": 0.8123, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.8604499475884759, "learning_rate": 7.906976744186048e-06, "loss": 0.8149, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.7564891340841373, "learning_rate": 8.023255813953488e-06, "loss": 0.7682, "step": 69 }, { "epoch": 0.01, "grad_norm": 0.7329540140667775, "learning_rate": 8.139534883720931e-06, "loss": 0.8067, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.864079170484272, "learning_rate": 8.255813953488374e-06, "loss": 0.7829, "step": 71 }, { "epoch": 0.01, "grad_norm": 0.8870411371499851, "learning_rate": 8.372093023255815e-06, "loss": 0.8029, "step": 72 }, { "epoch": 0.01, "grad_norm": 0.8732154888643351, "learning_rate": 8.488372093023256e-06, "loss": 0.8097, "step": 73 }, { "epoch": 0.01, "grad_norm": 0.8458286344598275, "learning_rate": 8.604651162790698e-06, "loss": 0.8176, "step": 74 }, { "epoch": 0.01, "grad_norm": 0.8824779035363114, "learning_rate": 8.72093023255814e-06, "loss": 0.8433, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.8608279610568753, "learning_rate": 8.837209302325582e-06, "loss": 0.8023, "step": 76 }, { "epoch": 0.01, "grad_norm": 0.7973380533785053, "learning_rate": 8.953488372093024e-06, "loss": 0.7997, "step": 77 }, { "epoch": 0.01, "grad_norm": 0.8671163651991461, "learning_rate": 9.069767441860465e-06, "loss": 0.8309, "step": 78 }, { "epoch": 0.01, "grad_norm": 0.7844803100354425, "learning_rate": 9.186046511627908e-06, "loss": 0.7867, "step": 79 }, { "epoch": 0.01, "grad_norm": 0.7635054950236818, "learning_rate": 9.30232558139535e-06, "loss": 0.8057, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.8701139655534844, "learning_rate": 9.418604651162791e-06, "loss": 0.7853, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.0703238628963716, "learning_rate": 9.534883720930234e-06, "loss": 0.7759, "step": 82 }, { "epoch": 0.01, "grad_norm": 0.9077817194450286, "learning_rate": 9.651162790697676e-06, "loss": 0.8182, "step": 83 }, { "epoch": 0.01, "grad_norm": 0.8572394984813722, "learning_rate": 9.767441860465117e-06, "loss": 0.8238, "step": 84 }, { "epoch": 0.01, "grad_norm": 0.7659803184623406, "learning_rate": 9.883720930232558e-06, "loss": 0.8, "step": 85 }, { "epoch": 0.02, "grad_norm": 0.9352670822175781, "learning_rate": 1e-05, "loss": 0.855, "step": 86 }, { "epoch": 0.02, "grad_norm": 0.9193380883373826, "learning_rate": 1.0116279069767443e-05, "loss": 0.8384, "step": 87 }, { "epoch": 0.02, "grad_norm": 0.9343933190951808, "learning_rate": 1.0232558139534884e-05, "loss": 0.7619, "step": 88 }, { "epoch": 0.02, "grad_norm": 0.8273336986325632, "learning_rate": 1.0348837209302327e-05, "loss": 0.8076, "step": 89 }, { "epoch": 0.02, "grad_norm": 1.0578461473727447, "learning_rate": 1.046511627906977e-05, "loss": 0.7684, "step": 90 }, { "epoch": 0.02, "grad_norm": 0.7567381473047289, "learning_rate": 1.058139534883721e-05, "loss": 0.7767, "step": 91 }, { "epoch": 0.02, "grad_norm": 1.1415926352256296, "learning_rate": 1.0697674418604651e-05, "loss": 0.8329, "step": 92 }, { "epoch": 0.02, "grad_norm": 0.8480414141922111, "learning_rate": 1.0813953488372094e-05, "loss": 0.7703, "step": 93 }, { "epoch": 0.02, "grad_norm": 0.9889127034483878, "learning_rate": 1.0930232558139535e-05, "loss": 0.8019, "step": 94 }, { "epoch": 0.02, "grad_norm": 0.8455987929389573, "learning_rate": 1.1046511627906977e-05, "loss": 0.7766, "step": 95 }, { "epoch": 0.02, "grad_norm": 0.9684126560558763, "learning_rate": 1.116279069767442e-05, "loss": 0.7498, "step": 96 }, { "epoch": 0.02, "grad_norm": 0.9664833907312704, "learning_rate": 1.1279069767441861e-05, "loss": 0.7774, "step": 97 }, { "epoch": 0.02, "grad_norm": 0.8962118721026515, "learning_rate": 1.1395348837209304e-05, "loss": 0.7786, "step": 98 }, { "epoch": 0.02, "grad_norm": 0.8716529667780261, "learning_rate": 1.1511627906976746e-05, "loss": 0.7645, "step": 99 }, { "epoch": 0.02, "grad_norm": 0.8321559671738246, "learning_rate": 1.1627906976744187e-05, "loss": 0.7883, "step": 100 }, { "epoch": 0.02, "grad_norm": 0.7765342321373482, "learning_rate": 1.174418604651163e-05, "loss": 0.777, "step": 101 }, { "epoch": 0.02, "grad_norm": 0.8352328783831124, "learning_rate": 1.1860465116279072e-05, "loss": 0.8032, "step": 102 }, { "epoch": 0.02, "grad_norm": 0.8553830386635267, "learning_rate": 1.1976744186046511e-05, "loss": 0.8029, "step": 103 }, { "epoch": 0.02, "grad_norm": 0.8331691093786251, "learning_rate": 1.2093023255813954e-05, "loss": 0.7697, "step": 104 }, { "epoch": 0.02, "grad_norm": 0.8582555508381573, "learning_rate": 1.2209302325581397e-05, "loss": 0.761, "step": 105 }, { "epoch": 0.02, "grad_norm": 0.945727532510146, "learning_rate": 1.2325581395348838e-05, "loss": 0.7407, "step": 106 }, { "epoch": 0.02, "grad_norm": 1.1228885803788857, "learning_rate": 1.244186046511628e-05, "loss": 0.7679, "step": 107 }, { "epoch": 0.02, "grad_norm": 1.0553926705081207, "learning_rate": 1.2558139534883723e-05, "loss": 0.8289, "step": 108 }, { "epoch": 0.02, "grad_norm": 0.8743878267280357, "learning_rate": 1.2674418604651164e-05, "loss": 0.75, "step": 109 }, { "epoch": 0.02, "grad_norm": 1.1077406594698664, "learning_rate": 1.2790697674418606e-05, "loss": 0.7768, "step": 110 }, { "epoch": 0.02, "grad_norm": 1.2088716666843915, "learning_rate": 1.2906976744186049e-05, "loss": 0.7541, "step": 111 }, { "epoch": 0.02, "grad_norm": 0.8312347487235417, "learning_rate": 1.302325581395349e-05, "loss": 0.8121, "step": 112 }, { "epoch": 0.02, "grad_norm": 1.542526862944443, "learning_rate": 1.313953488372093e-05, "loss": 0.7811, "step": 113 }, { "epoch": 0.02, "grad_norm": 1.1706123996049158, "learning_rate": 1.3255813953488372e-05, "loss": 0.7555, "step": 114 }, { "epoch": 0.02, "grad_norm": 1.2410765301353446, "learning_rate": 1.3372093023255814e-05, "loss": 0.7938, "step": 115 }, { "epoch": 0.02, "grad_norm": 1.2000010215731538, "learning_rate": 1.3488372093023257e-05, "loss": 0.7951, "step": 116 }, { "epoch": 0.02, "grad_norm": 1.0226069037947592, "learning_rate": 1.36046511627907e-05, "loss": 0.7782, "step": 117 }, { "epoch": 0.02, "grad_norm": 0.9796355316289806, "learning_rate": 1.372093023255814e-05, "loss": 0.795, "step": 118 }, { "epoch": 0.02, "grad_norm": 0.9267209270966663, "learning_rate": 1.3837209302325583e-05, "loss": 0.7891, "step": 119 }, { "epoch": 0.02, "grad_norm": 0.9779477032784839, "learning_rate": 1.3953488372093025e-05, "loss": 0.7788, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.8904927671811802, "learning_rate": 1.4069767441860466e-05, "loss": 0.7429, "step": 121 }, { "epoch": 0.02, "grad_norm": 0.9183065702790141, "learning_rate": 1.4186046511627909e-05, "loss": 0.7828, "step": 122 }, { "epoch": 0.02, "grad_norm": 0.9709459698603393, "learning_rate": 1.4302325581395352e-05, "loss": 0.7643, "step": 123 }, { "epoch": 0.02, "grad_norm": 0.871184707079809, "learning_rate": 1.441860465116279e-05, "loss": 0.7941, "step": 124 }, { "epoch": 0.02, "grad_norm": 0.9882279740390536, "learning_rate": 1.4534883720930233e-05, "loss": 0.8095, "step": 125 }, { "epoch": 0.02, "grad_norm": 0.9060620720106839, "learning_rate": 1.4651162790697674e-05, "loss": 0.7606, "step": 126 }, { "epoch": 0.02, "grad_norm": 1.090577371819845, "learning_rate": 1.4767441860465117e-05, "loss": 0.8068, "step": 127 }, { "epoch": 0.02, "grad_norm": 1.044351289978564, "learning_rate": 1.488372093023256e-05, "loss": 0.7929, "step": 128 }, { "epoch": 0.02, "grad_norm": 0.9242872736838782, "learning_rate": 1.5000000000000002e-05, "loss": 0.7838, "step": 129 }, { "epoch": 0.02, "grad_norm": 0.8764152218050635, "learning_rate": 1.5116279069767443e-05, "loss": 0.7847, "step": 130 }, { "epoch": 0.02, "grad_norm": 0.9479988426643892, "learning_rate": 1.5232558139534886e-05, "loss": 0.7933, "step": 131 }, { "epoch": 0.02, "grad_norm": 0.8689459495669496, "learning_rate": 1.5348837209302328e-05, "loss": 0.7742, "step": 132 }, { "epoch": 0.02, "grad_norm": 0.8150883583722773, "learning_rate": 1.546511627906977e-05, "loss": 0.7605, "step": 133 }, { "epoch": 0.02, "grad_norm": 1.1355257517954898, "learning_rate": 1.558139534883721e-05, "loss": 0.762, "step": 134 }, { "epoch": 0.02, "grad_norm": 1.7885906456493739, "learning_rate": 1.569767441860465e-05, "loss": 0.7522, "step": 135 }, { "epoch": 0.02, "grad_norm": 0.9396233416716444, "learning_rate": 1.5813953488372095e-05, "loss": 0.8062, "step": 136 }, { "epoch": 0.02, "grad_norm": 0.9485299970804153, "learning_rate": 1.5930232558139536e-05, "loss": 0.7657, "step": 137 }, { "epoch": 0.02, "grad_norm": 0.9085988577288342, "learning_rate": 1.6046511627906977e-05, "loss": 0.7817, "step": 138 }, { "epoch": 0.02, "grad_norm": 0.8531883087308522, "learning_rate": 1.616279069767442e-05, "loss": 0.8217, "step": 139 }, { "epoch": 0.02, "grad_norm": 1.0903517858998402, "learning_rate": 1.6279069767441862e-05, "loss": 0.7782, "step": 140 }, { "epoch": 0.02, "grad_norm": 1.1622160508217594, "learning_rate": 1.6395348837209303e-05, "loss": 0.8127, "step": 141 }, { "epoch": 0.02, "grad_norm": 0.7379747675787053, "learning_rate": 1.6511627906976747e-05, "loss": 0.7893, "step": 142 }, { "epoch": 0.03, "grad_norm": 1.2466289162813258, "learning_rate": 1.6627906976744188e-05, "loss": 0.781, "step": 143 }, { "epoch": 0.03, "grad_norm": 0.9815360848678397, "learning_rate": 1.674418604651163e-05, "loss": 0.7656, "step": 144 }, { "epoch": 0.03, "grad_norm": 0.9100632675623148, "learning_rate": 1.6860465116279073e-05, "loss": 0.7856, "step": 145 }, { "epoch": 0.03, "grad_norm": 0.9184642118227849, "learning_rate": 1.697674418604651e-05, "loss": 0.8565, "step": 146 }, { "epoch": 0.03, "grad_norm": 0.9829759948127988, "learning_rate": 1.7093023255813955e-05, "loss": 0.7965, "step": 147 }, { "epoch": 0.03, "grad_norm": 0.9845096516856808, "learning_rate": 1.7209302325581396e-05, "loss": 0.8274, "step": 148 }, { "epoch": 0.03, "grad_norm": 0.9646717199605087, "learning_rate": 1.7325581395348837e-05, "loss": 0.7782, "step": 149 }, { "epoch": 0.03, "grad_norm": 1.0009121781924153, "learning_rate": 1.744186046511628e-05, "loss": 0.748, "step": 150 }, { "epoch": 0.03, "grad_norm": 1.0269906040520693, "learning_rate": 1.7558139534883722e-05, "loss": 0.7815, "step": 151 }, { "epoch": 0.03, "grad_norm": 0.9517325046922676, "learning_rate": 1.7674418604651163e-05, "loss": 0.7967, "step": 152 }, { "epoch": 0.03, "grad_norm": 1.0839055345049087, "learning_rate": 1.7790697674418608e-05, "loss": 0.7454, "step": 153 }, { "epoch": 0.03, "grad_norm": 1.0098002446046428, "learning_rate": 1.790697674418605e-05, "loss": 0.7805, "step": 154 }, { "epoch": 0.03, "grad_norm": 0.9083954364222501, "learning_rate": 1.802325581395349e-05, "loss": 0.7764, "step": 155 }, { "epoch": 0.03, "grad_norm": 0.8646007137960834, "learning_rate": 1.813953488372093e-05, "loss": 0.7589, "step": 156 }, { "epoch": 0.03, "grad_norm": 1.0064014609093634, "learning_rate": 1.825581395348837e-05, "loss": 0.8105, "step": 157 }, { "epoch": 0.03, "grad_norm": 0.8870022502113245, "learning_rate": 1.8372093023255815e-05, "loss": 0.7466, "step": 158 }, { "epoch": 0.03, "grad_norm": 1.1312001191012668, "learning_rate": 1.8488372093023256e-05, "loss": 0.7894, "step": 159 }, { "epoch": 0.03, "grad_norm": 0.9461657639704655, "learning_rate": 1.86046511627907e-05, "loss": 0.7697, "step": 160 }, { "epoch": 0.03, "grad_norm": 1.0045162986154175, "learning_rate": 1.872093023255814e-05, "loss": 0.7955, "step": 161 }, { "epoch": 0.03, "grad_norm": 1.0554113428684353, "learning_rate": 1.8837209302325582e-05, "loss": 0.8149, "step": 162 }, { "epoch": 0.03, "grad_norm": 0.8521179240773811, "learning_rate": 1.8953488372093027e-05, "loss": 0.7948, "step": 163 }, { "epoch": 0.03, "grad_norm": 1.134501033841664, "learning_rate": 1.9069767441860468e-05, "loss": 0.7128, "step": 164 }, { "epoch": 0.03, "grad_norm": 1.1595736891729245, "learning_rate": 1.918604651162791e-05, "loss": 0.742, "step": 165 }, { "epoch": 0.03, "grad_norm": 1.9255921128494717, "learning_rate": 1.9302325581395353e-05, "loss": 0.8051, "step": 166 }, { "epoch": 0.03, "grad_norm": 0.9628573872156717, "learning_rate": 1.941860465116279e-05, "loss": 0.7713, "step": 167 }, { "epoch": 0.03, "grad_norm": 0.8667585216690853, "learning_rate": 1.9534883720930235e-05, "loss": 0.7503, "step": 168 }, { "epoch": 0.03, "grad_norm": 0.8654572010488044, "learning_rate": 1.9651162790697676e-05, "loss": 0.7353, "step": 169 }, { "epoch": 0.03, "grad_norm": 0.846386262484889, "learning_rate": 1.9767441860465116e-05, "loss": 0.7586, "step": 170 }, { "epoch": 0.03, "grad_norm": 1.065216121506597, "learning_rate": 1.988372093023256e-05, "loss": 0.7871, "step": 171 }, { "epoch": 0.03, "grad_norm": 0.9186522312367869, "learning_rate": 2e-05, "loss": 0.7805, "step": 172 }, { "epoch": 0.03, "grad_norm": 0.819316228728869, "learning_rate": 1.9999998386895803e-05, "loss": 0.7551, "step": 173 }, { "epoch": 0.03, "grad_norm": 0.9368514869213895, "learning_rate": 1.9999993547583733e-05, "loss": 0.7881, "step": 174 }, { "epoch": 0.03, "grad_norm": 0.9177713416924522, "learning_rate": 1.9999985482065353e-05, "loss": 0.7561, "step": 175 }, { "epoch": 0.03, "grad_norm": 1.1149571243095722, "learning_rate": 1.9999974190343262e-05, "loss": 0.8042, "step": 176 }, { "epoch": 0.03, "grad_norm": 1.0494497937988716, "learning_rate": 1.9999959672421103e-05, "loss": 0.8022, "step": 177 }, { "epoch": 0.03, "grad_norm": 1.0821242698974232, "learning_rate": 1.999994192830356e-05, "loss": 0.781, "step": 178 }, { "epoch": 0.03, "grad_norm": 1.0399749902397244, "learning_rate": 1.999992095799636e-05, "loss": 0.7213, "step": 179 }, { "epoch": 0.03, "grad_norm": 1.0674293781287443, "learning_rate": 1.9999896761506266e-05, "loss": 0.773, "step": 180 }, { "epoch": 0.03, "grad_norm": 1.1205184297543629, "learning_rate": 1.9999869338841083e-05, "loss": 0.8365, "step": 181 }, { "epoch": 0.03, "grad_norm": 0.940462266214596, "learning_rate": 1.9999838690009663e-05, "loss": 0.8175, "step": 182 }, { "epoch": 0.03, "grad_norm": 0.9586178313591558, "learning_rate": 1.9999804815021894e-05, "loss": 0.8002, "step": 183 }, { "epoch": 0.03, "grad_norm": 1.1853140101841466, "learning_rate": 1.9999767713888696e-05, "loss": 0.8205, "step": 184 }, { "epoch": 0.03, "grad_norm": 0.8404858936923402, "learning_rate": 1.999972738662205e-05, "loss": 0.754, "step": 185 }, { "epoch": 0.03, "grad_norm": 1.0476356458171743, "learning_rate": 1.9999683833234958e-05, "loss": 0.8009, "step": 186 }, { "epoch": 0.03, "grad_norm": 0.8547048434533767, "learning_rate": 1.9999637053741474e-05, "loss": 0.7827, "step": 187 }, { "epoch": 0.03, "grad_norm": 0.9612261183509333, "learning_rate": 1.9999587048156695e-05, "loss": 0.7448, "step": 188 }, { "epoch": 0.03, "grad_norm": 0.8713776593262271, "learning_rate": 1.9999533816496743e-05, "loss": 0.8141, "step": 189 }, { "epoch": 0.03, "grad_norm": 0.924141504031057, "learning_rate": 1.9999477358778804e-05, "loss": 0.772, "step": 190 }, { "epoch": 0.03, "grad_norm": 0.8409559421889676, "learning_rate": 1.999941767502108e-05, "loss": 0.7602, "step": 191 }, { "epoch": 0.03, "grad_norm": 0.9036075054131932, "learning_rate": 1.9999354765242837e-05, "loss": 0.8341, "step": 192 }, { "epoch": 0.03, "grad_norm": 0.8040838241173592, "learning_rate": 1.9999288629464363e-05, "loss": 0.7871, "step": 193 }, { "epoch": 0.03, "grad_norm": 1.0427603979545126, "learning_rate": 1.9999219267707e-05, "loss": 0.852, "step": 194 }, { "epoch": 0.03, "grad_norm": 0.9818504369921642, "learning_rate": 1.9999146679993126e-05, "loss": 0.7201, "step": 195 }, { "epoch": 0.03, "grad_norm": 0.8114142280251863, "learning_rate": 1.9999070866346153e-05, "loss": 0.826, "step": 196 }, { "epoch": 0.03, "grad_norm": 0.8514323155229955, "learning_rate": 1.9998991826790545e-05, "loss": 0.7885, "step": 197 }, { "epoch": 0.03, "grad_norm": 0.8395821368673114, "learning_rate": 1.99989095613518e-05, "loss": 0.8039, "step": 198 }, { "epoch": 0.03, "grad_norm": 0.8093999016507611, "learning_rate": 1.9998824070056467e-05, "loss": 0.731, "step": 199 }, { "epoch": 0.04, "grad_norm": 0.8715220714703587, "learning_rate": 1.999873535293211e-05, "loss": 0.7973, "step": 200 }, { "epoch": 0.04, "grad_norm": 0.898257936460322, "learning_rate": 1.999864341000737e-05, "loss": 0.7264, "step": 201 }, { "epoch": 0.04, "grad_norm": 2.340789629109145, "learning_rate": 1.9998548241311894e-05, "loss": 0.7481, "step": 202 }, { "epoch": 0.04, "grad_norm": 1.0215234515481244, "learning_rate": 1.9998449846876394e-05, "loss": 0.7929, "step": 203 }, { "epoch": 0.04, "grad_norm": 0.8627709078225212, "learning_rate": 1.999834822673261e-05, "loss": 0.7666, "step": 204 }, { "epoch": 0.04, "grad_norm": 1.0513377027280966, "learning_rate": 1.999824338091333e-05, "loss": 0.8362, "step": 205 }, { "epoch": 0.04, "grad_norm": 1.078386385595274, "learning_rate": 1.999813530945238e-05, "loss": 0.7825, "step": 206 }, { "epoch": 0.04, "grad_norm": 0.9419822243365016, "learning_rate": 1.9998024012384624e-05, "loss": 0.8214, "step": 207 }, { "epoch": 0.04, "grad_norm": 0.9938271788307446, "learning_rate": 1.9997909489745968e-05, "loss": 0.7638, "step": 208 }, { "epoch": 0.04, "grad_norm": 1.2577641280915903, "learning_rate": 1.999779174157336e-05, "loss": 0.7775, "step": 209 }, { "epoch": 0.04, "grad_norm": 0.9003331273859478, "learning_rate": 1.999767076790479e-05, "loss": 0.8154, "step": 210 }, { "epoch": 0.04, "grad_norm": 1.0364535526225775, "learning_rate": 1.9997546568779285e-05, "loss": 0.7551, "step": 211 }, { "epoch": 0.04, "grad_norm": 1.0373226009046603, "learning_rate": 1.9997419144236915e-05, "loss": 0.7534, "step": 212 }, { "epoch": 0.04, "grad_norm": 0.862550525263606, "learning_rate": 1.999728849431879e-05, "loss": 0.7338, "step": 213 }, { "epoch": 0.04, "grad_norm": 0.8328091621275889, "learning_rate": 1.9997154619067053e-05, "loss": 0.7857, "step": 214 }, { "epoch": 0.04, "grad_norm": 0.820477444157098, "learning_rate": 1.9997017518524905e-05, "loss": 0.7635, "step": 215 }, { "epoch": 0.04, "grad_norm": 0.8631528968456693, "learning_rate": 1.999687719273658e-05, "loss": 0.8215, "step": 216 }, { "epoch": 0.04, "grad_norm": 0.8175923141427798, "learning_rate": 1.9996733641747338e-05, "loss": 0.7715, "step": 217 }, { "epoch": 0.04, "grad_norm": 0.8389290084531597, "learning_rate": 1.9996586865603497e-05, "loss": 0.7487, "step": 218 }, { "epoch": 0.04, "grad_norm": 0.8473347766678769, "learning_rate": 1.9996436864352412e-05, "loss": 0.7623, "step": 219 }, { "epoch": 0.04, "grad_norm": 0.9220854282369375, "learning_rate": 1.9996283638042476e-05, "loss": 0.7892, "step": 220 }, { "epoch": 0.04, "grad_norm": 0.8957531715904382, "learning_rate": 1.999612718672312e-05, "loss": 0.8036, "step": 221 }, { "epoch": 0.04, "grad_norm": 0.8336341338989092, "learning_rate": 1.999596751044482e-05, "loss": 0.7353, "step": 222 }, { "epoch": 0.04, "grad_norm": 0.8290740180784058, "learning_rate": 1.9995804609259097e-05, "loss": 0.7795, "step": 223 }, { "epoch": 0.04, "grad_norm": 1.5743955140257853, "learning_rate": 1.9995638483218496e-05, "loss": 0.7633, "step": 224 }, { "epoch": 0.04, "grad_norm": 1.377001229281052, "learning_rate": 1.999546913237662e-05, "loss": 0.7561, "step": 225 }, { "epoch": 0.04, "grad_norm": 1.015801156503745, "learning_rate": 1.9995296556788103e-05, "loss": 0.7541, "step": 226 }, { "epoch": 0.04, "grad_norm": 1.9245041698060197, "learning_rate": 1.9995120756508618e-05, "loss": 0.7959, "step": 227 }, { "epoch": 0.04, "grad_norm": 0.8585043020713611, "learning_rate": 1.9994941731594885e-05, "loss": 0.7599, "step": 228 }, { "epoch": 0.04, "grad_norm": 1.1417323217173783, "learning_rate": 1.9994759482104663e-05, "loss": 0.7527, "step": 229 }, { "epoch": 0.04, "grad_norm": 0.9263994592982827, "learning_rate": 1.999457400809675e-05, "loss": 0.7263, "step": 230 }, { "epoch": 0.04, "grad_norm": 1.0436046293322503, "learning_rate": 1.9994385309630978e-05, "loss": 0.83, "step": 231 }, { "epoch": 0.04, "grad_norm": 1.213552856830767, "learning_rate": 1.9994193386768227e-05, "loss": 0.765, "step": 232 }, { "epoch": 0.04, "grad_norm": 0.8634205049079668, "learning_rate": 1.999399823957042e-05, "loss": 0.809, "step": 233 }, { "epoch": 0.04, "grad_norm": 1.0431394588261844, "learning_rate": 1.999379986810051e-05, "loss": 0.7803, "step": 234 }, { "epoch": 0.04, "grad_norm": 0.895987795424016, "learning_rate": 1.9993598272422502e-05, "loss": 0.7998, "step": 235 }, { "epoch": 0.04, "grad_norm": 0.8286901900927107, "learning_rate": 1.9993393452601425e-05, "loss": 0.7865, "step": 236 }, { "epoch": 0.04, "grad_norm": 0.9209354194400868, "learning_rate": 1.9993185408703368e-05, "loss": 0.8013, "step": 237 }, { "epoch": 0.04, "grad_norm": 0.9667449027740166, "learning_rate": 1.9992974140795448e-05, "loss": 0.7627, "step": 238 }, { "epoch": 0.04, "grad_norm": 0.9176102878127571, "learning_rate": 1.999275964894582e-05, "loss": 0.7815, "step": 239 }, { "epoch": 0.04, "grad_norm": 0.86787831216263, "learning_rate": 1.9992541933223687e-05, "loss": 0.7911, "step": 240 }, { "epoch": 0.04, "grad_norm": 0.7548241105185353, "learning_rate": 1.9992320993699287e-05, "loss": 0.773, "step": 241 }, { "epoch": 0.04, "grad_norm": 0.8006933134137093, "learning_rate": 1.9992096830443902e-05, "loss": 0.7242, "step": 242 }, { "epoch": 0.04, "grad_norm": 0.8437419207241251, "learning_rate": 1.9991869443529852e-05, "loss": 0.7756, "step": 243 }, { "epoch": 0.04, "grad_norm": 0.9351477453120055, "learning_rate": 1.9991638833030494e-05, "loss": 0.818, "step": 244 }, { "epoch": 0.04, "grad_norm": 0.9755931268438567, "learning_rate": 1.9991404999020227e-05, "loss": 0.7877, "step": 245 }, { "epoch": 0.04, "grad_norm": 0.8372108035891156, "learning_rate": 1.9991167941574495e-05, "loss": 0.7792, "step": 246 }, { "epoch": 0.04, "grad_norm": 0.9703093159535989, "learning_rate": 1.9990927660769778e-05, "loss": 0.7585, "step": 247 }, { "epoch": 0.04, "grad_norm": 0.9203540833502468, "learning_rate": 1.999068415668359e-05, "loss": 0.7765, "step": 248 }, { "epoch": 0.04, "grad_norm": 0.9518414010605435, "learning_rate": 1.999043742939449e-05, "loss": 0.7981, "step": 249 }, { "epoch": 0.04, "grad_norm": 0.8519326286020753, "learning_rate": 1.9990187478982087e-05, "loss": 0.815, "step": 250 }, { "epoch": 0.04, "grad_norm": 0.9523585409271742, "learning_rate": 1.9989934305527013e-05, "loss": 0.793, "step": 251 }, { "epoch": 0.04, "grad_norm": 0.8340686806473012, "learning_rate": 1.9989677909110945e-05, "loss": 0.7284, "step": 252 }, { "epoch": 0.04, "grad_norm": 1.069071784104213, "learning_rate": 1.9989418289816607e-05, "loss": 0.7673, "step": 253 }, { "epoch": 0.04, "grad_norm": 0.8340618308721339, "learning_rate": 1.9989155447727754e-05, "loss": 0.7373, "step": 254 }, { "epoch": 0.04, "grad_norm": 0.9351766593879471, "learning_rate": 1.9988889382929187e-05, "loss": 0.7861, "step": 255 }, { "epoch": 0.04, "grad_norm": 0.8712153138269318, "learning_rate": 1.9988620095506744e-05, "loss": 0.7587, "step": 256 }, { "epoch": 0.05, "grad_norm": 1.0777813136466325, "learning_rate": 1.9988347585547302e-05, "loss": 0.7406, "step": 257 }, { "epoch": 0.05, "grad_norm": 0.7976190552531871, "learning_rate": 1.9988071853138774e-05, "loss": 0.7471, "step": 258 }, { "epoch": 0.05, "grad_norm": 1.0013488659065124, "learning_rate": 1.9987792898370123e-05, "loss": 0.743, "step": 259 }, { "epoch": 0.05, "grad_norm": 0.9095152315741585, "learning_rate": 1.9987510721331346e-05, "loss": 0.7157, "step": 260 }, { "epoch": 0.05, "grad_norm": 0.9231724552716354, "learning_rate": 1.9987225322113476e-05, "loss": 0.739, "step": 261 }, { "epoch": 0.05, "grad_norm": 1.0587665132783133, "learning_rate": 1.9986936700808588e-05, "loss": 0.7918, "step": 262 }, { "epoch": 0.05, "grad_norm": 0.8705718111607289, "learning_rate": 1.99866448575098e-05, "loss": 0.7969, "step": 263 }, { "epoch": 0.05, "grad_norm": 0.9949534490931345, "learning_rate": 1.9986349792311267e-05, "loss": 0.7578, "step": 264 }, { "epoch": 0.05, "grad_norm": 0.9429815289656218, "learning_rate": 1.9986051505308178e-05, "loss": 0.7559, "step": 265 }, { "epoch": 0.05, "grad_norm": 0.8877005800096698, "learning_rate": 1.998574999659677e-05, "loss": 0.7624, "step": 266 }, { "epoch": 0.05, "grad_norm": 0.8425293399970402, "learning_rate": 1.9985445266274324e-05, "loss": 0.81, "step": 267 }, { "epoch": 0.05, "grad_norm": 0.9086558182062692, "learning_rate": 1.998513731443914e-05, "loss": 0.7633, "step": 268 }, { "epoch": 0.05, "grad_norm": 0.8091245178099699, "learning_rate": 1.9984826141190573e-05, "loss": 0.7797, "step": 269 }, { "epoch": 0.05, "grad_norm": 1.0378748780190088, "learning_rate": 1.9984511746629014e-05, "loss": 0.8724, "step": 270 }, { "epoch": 0.05, "grad_norm": 0.8033986223947497, "learning_rate": 1.99841941308559e-05, "loss": 0.7452, "step": 271 }, { "epoch": 0.05, "grad_norm": 0.9722143219130681, "learning_rate": 1.998387329397369e-05, "loss": 0.7479, "step": 272 }, { "epoch": 0.05, "grad_norm": 0.8458199690853218, "learning_rate": 1.99835492360859e-05, "loss": 0.7774, "step": 273 }, { "epoch": 0.05, "grad_norm": 0.8658253596584664, "learning_rate": 1.9983221957297077e-05, "loss": 0.763, "step": 274 }, { "epoch": 0.05, "grad_norm": 1.0087246895887887, "learning_rate": 1.9982891457712803e-05, "loss": 0.8572, "step": 275 }, { "epoch": 0.05, "grad_norm": 1.1239509421315388, "learning_rate": 1.998255773743971e-05, "loss": 0.7457, "step": 276 }, { "epoch": 0.05, "grad_norm": 1.0995916315318452, "learning_rate": 1.9982220796585462e-05, "loss": 0.8132, "step": 277 }, { "epoch": 0.05, "grad_norm": 0.9207859696334993, "learning_rate": 1.998188063525876e-05, "loss": 0.8008, "step": 278 }, { "epoch": 0.05, "grad_norm": 0.8878327693625236, "learning_rate": 1.9981537253569348e-05, "loss": 0.7577, "step": 279 }, { "epoch": 0.05, "grad_norm": 0.7378026901793506, "learning_rate": 1.998119065162801e-05, "loss": 0.743, "step": 280 }, { "epoch": 0.05, "grad_norm": 0.9501418037176955, "learning_rate": 1.9980840829546565e-05, "loss": 0.7465, "step": 281 }, { "epoch": 0.05, "grad_norm": 0.7715683560714279, "learning_rate": 1.9980487787437878e-05, "loss": 0.7865, "step": 282 }, { "epoch": 0.05, "grad_norm": 0.8762521911207162, "learning_rate": 1.998013152541584e-05, "loss": 0.7979, "step": 283 }, { "epoch": 0.05, "grad_norm": 0.8218406621660014, "learning_rate": 1.9979772043595395e-05, "loss": 0.7664, "step": 284 }, { "epoch": 0.05, "grad_norm": 1.046360493938903, "learning_rate": 1.9979409342092513e-05, "loss": 0.7815, "step": 285 }, { "epoch": 0.05, "grad_norm": 0.8276805510024934, "learning_rate": 1.9979043421024212e-05, "loss": 0.8151, "step": 286 }, { "epoch": 0.05, "grad_norm": 0.8767561772737308, "learning_rate": 1.997867428050855e-05, "loss": 0.7496, "step": 287 }, { "epoch": 0.05, "grad_norm": 0.7991384269575817, "learning_rate": 1.9978301920664614e-05, "loss": 0.7604, "step": 288 }, { "epoch": 0.05, "grad_norm": 0.9627758043286011, "learning_rate": 1.997792634161254e-05, "loss": 0.7212, "step": 289 }, { "epoch": 0.05, "grad_norm": 0.8313341870334467, "learning_rate": 1.997754754347349e-05, "loss": 0.8002, "step": 290 }, { "epoch": 0.05, "grad_norm": 0.9555627885827778, "learning_rate": 1.997716552636968e-05, "loss": 0.7351, "step": 291 }, { "epoch": 0.05, "grad_norm": 0.7830815896990123, "learning_rate": 1.9976780290424352e-05, "loss": 0.7359, "step": 292 }, { "epoch": 0.05, "grad_norm": 1.1000021983227037, "learning_rate": 1.997639183576179e-05, "loss": 0.7432, "step": 293 }, { "epoch": 0.05, "grad_norm": 0.8685840664699176, "learning_rate": 1.9976000162507326e-05, "loss": 0.8275, "step": 294 }, { "epoch": 0.05, "grad_norm": 0.9157713381323864, "learning_rate": 1.997560527078731e-05, "loss": 0.7604, "step": 295 }, { "epoch": 0.05, "grad_norm": 0.814227580245308, "learning_rate": 1.9975207160729154e-05, "loss": 0.7383, "step": 296 }, { "epoch": 0.05, "grad_norm": 0.989453163648394, "learning_rate": 1.9974805832461288e-05, "loss": 0.7784, "step": 297 }, { "epoch": 0.05, "grad_norm": 0.8448621584638188, "learning_rate": 1.9974401286113194e-05, "loss": 0.7214, "step": 298 }, { "epoch": 0.05, "grad_norm": 0.8785773248851321, "learning_rate": 1.997399352181538e-05, "loss": 0.7801, "step": 299 }, { "epoch": 0.05, "grad_norm": 0.8868001892666376, "learning_rate": 1.9973582539699404e-05, "loss": 0.8167, "step": 300 }, { "epoch": 0.05, "grad_norm": 0.8820995686540625, "learning_rate": 1.9973168339897865e-05, "loss": 0.7775, "step": 301 }, { "epoch": 0.05, "grad_norm": 0.832669543573008, "learning_rate": 1.997275092254438e-05, "loss": 0.7378, "step": 302 }, { "epoch": 0.05, "grad_norm": 0.8448692859390643, "learning_rate": 1.997233028777362e-05, "loss": 0.7576, "step": 303 }, { "epoch": 0.05, "grad_norm": 1.0542039260107812, "learning_rate": 1.9971906435721294e-05, "loss": 0.7901, "step": 304 }, { "epoch": 0.05, "grad_norm": 0.9139397781566436, "learning_rate": 1.9971479366524146e-05, "loss": 0.8019, "step": 305 }, { "epoch": 0.05, "grad_norm": 0.7902959740515295, "learning_rate": 1.997104908031995e-05, "loss": 0.7454, "step": 306 }, { "epoch": 0.05, "grad_norm": 0.769975450426412, "learning_rate": 1.9970615577247535e-05, "loss": 0.7231, "step": 307 }, { "epoch": 0.05, "grad_norm": 0.9101874234956655, "learning_rate": 1.997017885744675e-05, "loss": 0.7629, "step": 308 }, { "epoch": 0.05, "grad_norm": 0.7715395681360205, "learning_rate": 1.9969738921058494e-05, "loss": 0.771, "step": 309 }, { "epoch": 0.05, "grad_norm": 0.844319543712736, "learning_rate": 1.9969295768224702e-05, "loss": 0.7845, "step": 310 }, { "epoch": 0.05, "grad_norm": 0.7697226656126701, "learning_rate": 1.9968849399088337e-05, "loss": 0.7527, "step": 311 }, { "epoch": 0.05, "grad_norm": 0.8167977871166724, "learning_rate": 1.9968399813793417e-05, "loss": 0.7744, "step": 312 }, { "epoch": 0.05, "grad_norm": 0.9896642855018024, "learning_rate": 1.9967947012484975e-05, "loss": 0.7833, "step": 313 }, { "epoch": 0.06, "grad_norm": 0.9971706294712228, "learning_rate": 1.9967490995309108e-05, "loss": 0.7036, "step": 314 }, { "epoch": 0.06, "grad_norm": 0.7856243798764908, "learning_rate": 1.9967031762412925e-05, "loss": 0.7907, "step": 315 }, { "epoch": 0.06, "grad_norm": 0.9312618524614489, "learning_rate": 1.9966569313944588e-05, "loss": 0.7229, "step": 316 }, { "epoch": 0.06, "grad_norm": 0.8984908625114154, "learning_rate": 1.9966103650053297e-05, "loss": 0.7629, "step": 317 }, { "epoch": 0.06, "grad_norm": 0.9710960606111945, "learning_rate": 1.9965634770889277e-05, "loss": 0.7671, "step": 318 }, { "epoch": 0.06, "grad_norm": 0.807178389447196, "learning_rate": 1.9965162676603805e-05, "loss": 0.7614, "step": 319 }, { "epoch": 0.06, "grad_norm": 0.784297509161817, "learning_rate": 1.9964687367349187e-05, "loss": 0.7393, "step": 320 }, { "epoch": 0.06, "grad_norm": 0.8672054216755936, "learning_rate": 1.9964208843278765e-05, "loss": 0.8031, "step": 321 }, { "epoch": 0.06, "grad_norm": 0.7130877418362815, "learning_rate": 1.9963727104546922e-05, "loss": 0.702, "step": 322 }, { "epoch": 0.06, "grad_norm": 0.7848897553341526, "learning_rate": 1.996324215130908e-05, "loss": 0.774, "step": 323 }, { "epoch": 0.06, "grad_norm": 0.9420426860081439, "learning_rate": 1.996275398372169e-05, "loss": 0.8005, "step": 324 }, { "epoch": 0.06, "grad_norm": 0.8109668808453452, "learning_rate": 1.9962262601942246e-05, "loss": 0.716, "step": 325 }, { "epoch": 0.06, "grad_norm": 0.8652841290608206, "learning_rate": 1.996176800612928e-05, "loss": 0.7623, "step": 326 }, { "epoch": 0.06, "grad_norm": 0.8647543828842572, "learning_rate": 1.9961270196442363e-05, "loss": 0.7724, "step": 327 }, { "epoch": 0.06, "grad_norm": 0.8225613364996616, "learning_rate": 1.9960769173042088e-05, "loss": 0.732, "step": 328 }, { "epoch": 0.06, "grad_norm": 0.8374863438475668, "learning_rate": 1.9960264936090104e-05, "loss": 0.7783, "step": 329 }, { "epoch": 0.06, "grad_norm": 0.8559114817150187, "learning_rate": 1.9959757485749086e-05, "loss": 0.7536, "step": 330 }, { "epoch": 0.06, "grad_norm": 0.918035836669111, "learning_rate": 1.9959246822182745e-05, "loss": 0.778, "step": 331 }, { "epoch": 0.06, "grad_norm": 0.9962416832969171, "learning_rate": 1.995873294555584e-05, "loss": 0.824, "step": 332 }, { "epoch": 0.06, "grad_norm": 0.7921623544359869, "learning_rate": 1.9958215856034152e-05, "loss": 0.7722, "step": 333 }, { "epoch": 0.06, "grad_norm": 0.8715120967252615, "learning_rate": 1.9957695553784503e-05, "loss": 0.7305, "step": 334 }, { "epoch": 0.06, "grad_norm": 0.8694787913057882, "learning_rate": 1.9957172038974757e-05, "loss": 0.7193, "step": 335 }, { "epoch": 0.06, "grad_norm": 0.7872389514657487, "learning_rate": 1.995664531177381e-05, "loss": 0.7354, "step": 336 }, { "epoch": 0.06, "grad_norm": 0.9075911684539105, "learning_rate": 1.9956115372351595e-05, "loss": 0.8674, "step": 337 }, { "epoch": 0.06, "grad_norm": 0.8261141748179646, "learning_rate": 1.9955582220879088e-05, "loss": 0.7656, "step": 338 }, { "epoch": 0.06, "grad_norm": 0.8469148468776204, "learning_rate": 1.9955045857528283e-05, "loss": 0.7537, "step": 339 }, { "epoch": 0.06, "grad_norm": 0.8157021647950843, "learning_rate": 1.9954506282472223e-05, "loss": 0.7749, "step": 340 }, { "epoch": 0.06, "grad_norm": 0.7869720124218599, "learning_rate": 1.9953963495884995e-05, "loss": 0.7683, "step": 341 }, { "epoch": 0.06, "grad_norm": 0.8943010778175937, "learning_rate": 1.9953417497941712e-05, "loss": 0.7592, "step": 342 }, { "epoch": 0.06, "grad_norm": 0.8623201822368534, "learning_rate": 1.995286828881852e-05, "loss": 0.7512, "step": 343 }, { "epoch": 0.06, "grad_norm": 0.8574321345995202, "learning_rate": 1.9952315868692606e-05, "loss": 0.7974, "step": 344 }, { "epoch": 0.06, "grad_norm": 0.7564317619211998, "learning_rate": 1.995176023774219e-05, "loss": 0.7454, "step": 345 }, { "epoch": 0.06, "grad_norm": 0.8503093675017667, "learning_rate": 1.9951201396146534e-05, "loss": 0.7402, "step": 346 }, { "epoch": 0.06, "grad_norm": 0.771837491947763, "learning_rate": 1.995063934408593e-05, "loss": 0.7534, "step": 347 }, { "epoch": 0.06, "grad_norm": 0.9595748943674599, "learning_rate": 1.995007408174171e-05, "loss": 0.8111, "step": 348 }, { "epoch": 0.06, "grad_norm": 0.734000528350161, "learning_rate": 1.994950560929624e-05, "loss": 0.7163, "step": 349 }, { "epoch": 0.06, "grad_norm": 0.8740766781878783, "learning_rate": 1.9948933926932916e-05, "loss": 0.8094, "step": 350 }, { "epoch": 0.06, "grad_norm": 0.7622317864689694, "learning_rate": 1.994835903483618e-05, "loss": 0.716, "step": 351 }, { "epoch": 0.06, "grad_norm": 0.7347040067995632, "learning_rate": 1.99477809331915e-05, "loss": 0.7185, "step": 352 }, { "epoch": 0.06, "grad_norm": 0.8457297668942003, "learning_rate": 1.9947199622185387e-05, "loss": 0.751, "step": 353 }, { "epoch": 0.06, "grad_norm": 0.8556086269954896, "learning_rate": 1.9946615102005383e-05, "loss": 0.7432, "step": 354 }, { "epoch": 0.06, "grad_norm": 0.9005994494688468, "learning_rate": 1.9946027372840063e-05, "loss": 0.8161, "step": 355 }, { "epoch": 0.06, "grad_norm": 0.9491489451996729, "learning_rate": 1.9945436434879047e-05, "loss": 0.7599, "step": 356 }, { "epoch": 0.06, "grad_norm": 0.861158770692281, "learning_rate": 1.9944842288312977e-05, "loss": 0.7116, "step": 357 }, { "epoch": 0.06, "grad_norm": 0.9272666090955257, "learning_rate": 1.9944244933333544e-05, "loss": 0.8222, "step": 358 }, { "epoch": 0.06, "grad_norm": 0.9305043800370174, "learning_rate": 1.9943644370133462e-05, "loss": 0.8527, "step": 359 }, { "epoch": 0.06, "grad_norm": 0.9521778042789389, "learning_rate": 1.9943040598906488e-05, "loss": 0.7681, "step": 360 }, { "epoch": 0.06, "grad_norm": 0.7384444440360638, "learning_rate": 1.9942433619847406e-05, "loss": 0.7523, "step": 361 }, { "epoch": 0.06, "grad_norm": 0.8583175064258568, "learning_rate": 1.994182343315205e-05, "loss": 0.7533, "step": 362 }, { "epoch": 0.06, "grad_norm": 0.7179991195152676, "learning_rate": 1.994121003901727e-05, "loss": 0.7446, "step": 363 }, { "epoch": 0.06, "grad_norm": 0.9557388532877195, "learning_rate": 1.994059343764096e-05, "loss": 0.8019, "step": 364 }, { "epoch": 0.06, "grad_norm": 0.8583829789184324, "learning_rate": 1.9939973629222055e-05, "loss": 0.7084, "step": 365 }, { "epoch": 0.06, "grad_norm": 0.8272373294060107, "learning_rate": 1.9939350613960516e-05, "loss": 0.7592, "step": 366 }, { "epoch": 0.06, "grad_norm": 0.9824414897937457, "learning_rate": 1.993872439205734e-05, "loss": 0.7335, "step": 367 }, { "epoch": 0.06, "grad_norm": 0.7938265329299767, "learning_rate": 1.9938094963714553e-05, "loss": 0.8011, "step": 368 }, { "epoch": 0.06, "grad_norm": 0.923988488558115, "learning_rate": 1.9937462329135227e-05, "loss": 0.7264, "step": 369 }, { "epoch": 0.06, "grad_norm": 0.8656076587440547, "learning_rate": 1.9936826488523467e-05, "loss": 0.7495, "step": 370 }, { "epoch": 0.07, "grad_norm": 0.826998137916326, "learning_rate": 1.9936187442084403e-05, "loss": 0.7137, "step": 371 }, { "epoch": 0.07, "grad_norm": 1.0083657975564335, "learning_rate": 1.9935545190024207e-05, "loss": 0.7462, "step": 372 }, { "epoch": 0.07, "grad_norm": 0.9325360962571568, "learning_rate": 1.9934899732550083e-05, "loss": 0.7322, "step": 373 }, { "epoch": 0.07, "grad_norm": 0.8982281423171012, "learning_rate": 1.9934251069870266e-05, "loss": 0.7942, "step": 374 }, { "epoch": 0.07, "grad_norm": 0.9146718590995555, "learning_rate": 1.9933599202194034e-05, "loss": 0.7078, "step": 375 }, { "epoch": 0.07, "grad_norm": 0.7938957997303373, "learning_rate": 1.9932944129731685e-05, "loss": 0.7456, "step": 376 }, { "epoch": 0.07, "grad_norm": 0.8880164388752135, "learning_rate": 1.9932285852694568e-05, "loss": 0.7589, "step": 377 }, { "epoch": 0.07, "grad_norm": 0.810794679871374, "learning_rate": 1.993162437129505e-05, "loss": 0.7099, "step": 378 }, { "epoch": 0.07, "grad_norm": 0.8454310040042922, "learning_rate": 1.9930959685746538e-05, "loss": 0.7832, "step": 379 }, { "epoch": 0.07, "grad_norm": 0.8912986008610637, "learning_rate": 1.993029179626348e-05, "loss": 0.7418, "step": 380 }, { "epoch": 0.07, "grad_norm": 0.8470391014842785, "learning_rate": 1.992962070306135e-05, "loss": 0.7583, "step": 381 }, { "epoch": 0.07, "grad_norm": 1.0360726770067683, "learning_rate": 1.9928946406356647e-05, "loss": 0.7439, "step": 382 }, { "epoch": 0.07, "grad_norm": 1.0306337985978555, "learning_rate": 1.9928268906366927e-05, "loss": 0.8297, "step": 383 }, { "epoch": 0.07, "grad_norm": 0.9021026321439637, "learning_rate": 1.9927588203310756e-05, "loss": 0.7411, "step": 384 }, { "epoch": 0.07, "grad_norm": 0.8868620811200735, "learning_rate": 1.9926904297407747e-05, "loss": 0.7996, "step": 385 }, { "epoch": 0.07, "grad_norm": 0.8484605009705304, "learning_rate": 1.992621718887854e-05, "loss": 0.7376, "step": 386 }, { "epoch": 0.07, "grad_norm": 0.7850003860205665, "learning_rate": 1.992552687794481e-05, "loss": 0.7804, "step": 387 }, { "epoch": 0.07, "grad_norm": 0.8460827136194967, "learning_rate": 1.992483336482927e-05, "loss": 0.7548, "step": 388 }, { "epoch": 0.07, "grad_norm": 0.7644667952173284, "learning_rate": 1.992413664975566e-05, "loss": 0.7184, "step": 389 }, { "epoch": 0.07, "grad_norm": 0.8314675703242032, "learning_rate": 1.9923436732948754e-05, "loss": 0.7919, "step": 390 }, { "epoch": 0.07, "grad_norm": 0.7657328303259305, "learning_rate": 1.992273361463436e-05, "loss": 0.7722, "step": 391 }, { "epoch": 0.07, "grad_norm": 0.7578391599930594, "learning_rate": 1.9922027295039313e-05, "loss": 0.7517, "step": 392 }, { "epoch": 0.07, "grad_norm": 0.8284505662895363, "learning_rate": 1.9921317774391498e-05, "loss": 0.7432, "step": 393 }, { "epoch": 0.07, "grad_norm": 0.7731279063887819, "learning_rate": 1.9920605052919814e-05, "loss": 0.7387, "step": 394 }, { "epoch": 0.07, "grad_norm": 0.7714109616958139, "learning_rate": 1.99198891308542e-05, "loss": 0.7537, "step": 395 }, { "epoch": 0.07, "grad_norm": 1.0090158026406266, "learning_rate": 1.9919170008425625e-05, "loss": 0.813, "step": 396 }, { "epoch": 0.07, "grad_norm": 0.7322779955161317, "learning_rate": 1.99184476858661e-05, "loss": 0.7172, "step": 397 }, { "epoch": 0.07, "grad_norm": 0.976781307941285, "learning_rate": 1.991772216340866e-05, "loss": 0.7537, "step": 398 }, { "epoch": 0.07, "grad_norm": 0.8289065081846941, "learning_rate": 1.9916993441287364e-05, "loss": 0.7631, "step": 399 }, { "epoch": 0.07, "grad_norm": 0.8998868961840657, "learning_rate": 1.9916261519737327e-05, "loss": 0.7358, "step": 400 }, { "epoch": 0.07, "grad_norm": 0.8459547761018223, "learning_rate": 1.991552639899467e-05, "loss": 0.7603, "step": 401 }, { "epoch": 0.07, "grad_norm": 0.7562186861419029, "learning_rate": 1.9914788079296567e-05, "loss": 0.7565, "step": 402 }, { "epoch": 0.07, "grad_norm": 0.7623008085932237, "learning_rate": 1.9914046560881205e-05, "loss": 0.7548, "step": 403 }, { "epoch": 0.07, "grad_norm": 0.9138181335230982, "learning_rate": 1.9913301843987827e-05, "loss": 0.7612, "step": 404 }, { "epoch": 0.07, "grad_norm": 0.864228632110966, "learning_rate": 1.9912553928856684e-05, "loss": 0.7695, "step": 405 }, { "epoch": 0.07, "grad_norm": 0.8783501356809255, "learning_rate": 1.9911802815729073e-05, "loss": 0.7335, "step": 406 }, { "epoch": 0.07, "grad_norm": 0.7917636386168562, "learning_rate": 1.9911048504847318e-05, "loss": 0.727, "step": 407 }, { "epoch": 0.07, "grad_norm": 0.8683136780698806, "learning_rate": 1.9910290996454774e-05, "loss": 0.7228, "step": 408 }, { "epoch": 0.07, "grad_norm": 0.7900846602192605, "learning_rate": 1.990953029079583e-05, "loss": 0.7645, "step": 409 }, { "epoch": 0.07, "grad_norm": 0.8795520473014077, "learning_rate": 1.990876638811591e-05, "loss": 0.769, "step": 410 }, { "epoch": 0.07, "grad_norm": 0.8052553599439309, "learning_rate": 1.990799928866146e-05, "loss": 0.7672, "step": 411 }, { "epoch": 0.07, "grad_norm": 0.8943023523091413, "learning_rate": 1.990722899267996e-05, "loss": 0.7731, "step": 412 }, { "epoch": 0.07, "grad_norm": 0.8210085958183659, "learning_rate": 1.9906455500419927e-05, "loss": 0.7742, "step": 413 }, { "epoch": 0.07, "grad_norm": 0.9530034559960427, "learning_rate": 1.9905678812130905e-05, "loss": 0.8918, "step": 414 }, { "epoch": 0.07, "grad_norm": 0.8809080640391547, "learning_rate": 1.990489892806347e-05, "loss": 0.7658, "step": 415 }, { "epoch": 0.07, "grad_norm": 0.9537989601075222, "learning_rate": 1.9904115848469233e-05, "loss": 0.7189, "step": 416 }, { "epoch": 0.07, "grad_norm": 0.8187661717570067, "learning_rate": 1.9903329573600824e-05, "loss": 0.7968, "step": 417 }, { "epoch": 0.07, "grad_norm": 0.8168613508439478, "learning_rate": 1.9902540103711918e-05, "loss": 0.7649, "step": 418 }, { "epoch": 0.07, "grad_norm": 1.636125536859928, "learning_rate": 1.990174743905721e-05, "loss": 0.8074, "step": 419 }, { "epoch": 0.07, "grad_norm": 0.8905003794291997, "learning_rate": 1.990095157989243e-05, "loss": 0.7944, "step": 420 }, { "epoch": 0.07, "grad_norm": 0.9369478551297701, "learning_rate": 1.9900152526474343e-05, "loss": 0.7799, "step": 421 }, { "epoch": 0.07, "grad_norm": 0.7771516898357167, "learning_rate": 1.989935027906074e-05, "loss": 0.7505, "step": 422 }, { "epoch": 0.07, "grad_norm": 0.8091789992360671, "learning_rate": 1.9898544837910436e-05, "loss": 0.7241, "step": 423 }, { "epoch": 0.07, "grad_norm": 0.7261528562651761, "learning_rate": 1.9897736203283292e-05, "loss": 0.6922, "step": 424 }, { "epoch": 0.07, "grad_norm": 0.8741728311777652, "learning_rate": 1.9896924375440186e-05, "loss": 0.7745, "step": 425 }, { "epoch": 0.07, "grad_norm": 0.841021792599743, "learning_rate": 1.989610935464303e-05, "loss": 0.777, "step": 426 }, { "epoch": 0.07, "grad_norm": 0.7926417425519007, "learning_rate": 1.9895291141154765e-05, "loss": 0.7671, "step": 427 }, { "epoch": 0.08, "grad_norm": 0.8311163606898547, "learning_rate": 1.9894469735239366e-05, "loss": 0.7696, "step": 428 }, { "epoch": 0.08, "grad_norm": 0.7673111350715692, "learning_rate": 1.9893645137161843e-05, "loss": 0.7743, "step": 429 }, { "epoch": 0.08, "grad_norm": 0.7687679476412361, "learning_rate": 1.9892817347188217e-05, "loss": 0.7597, "step": 430 }, { "epoch": 0.08, "grad_norm": 0.7739636805899913, "learning_rate": 1.9891986365585553e-05, "loss": 0.7223, "step": 431 }, { "epoch": 0.08, "grad_norm": 0.7605372233607582, "learning_rate": 1.989115219262195e-05, "loss": 0.7558, "step": 432 }, { "epoch": 0.08, "grad_norm": 0.8369358213813649, "learning_rate": 1.9890314828566518e-05, "loss": 0.7651, "step": 433 }, { "epoch": 0.08, "grad_norm": 0.9763288847852094, "learning_rate": 1.9889474273689415e-05, "loss": 0.8098, "step": 434 }, { "epoch": 0.08, "grad_norm": 0.8494432800083765, "learning_rate": 1.988863052826182e-05, "loss": 0.7765, "step": 435 }, { "epoch": 0.08, "grad_norm": 0.822785302887837, "learning_rate": 1.9887783592555952e-05, "loss": 0.7559, "step": 436 }, { "epoch": 0.08, "grad_norm": 1.0736366843271696, "learning_rate": 1.9886933466845036e-05, "loss": 0.7799, "step": 437 }, { "epoch": 0.08, "grad_norm": 0.8080504153989466, "learning_rate": 1.9886080151403346e-05, "loss": 0.7429, "step": 438 }, { "epoch": 0.08, "grad_norm": 0.876086703306627, "learning_rate": 1.988522364650618e-05, "loss": 0.777, "step": 439 }, { "epoch": 0.08, "grad_norm": 0.8272949669824579, "learning_rate": 1.9884363952429864e-05, "loss": 0.7747, "step": 440 }, { "epoch": 0.08, "grad_norm": 0.9823591733255342, "learning_rate": 1.988350106945175e-05, "loss": 0.7587, "step": 441 }, { "epoch": 0.08, "grad_norm": 0.8127856459024151, "learning_rate": 1.988263499785023e-05, "loss": 0.7373, "step": 442 }, { "epoch": 0.08, "grad_norm": 0.8017213817615765, "learning_rate": 1.988176573790471e-05, "loss": 0.7076, "step": 443 }, { "epoch": 0.08, "grad_norm": 0.7426799116288191, "learning_rate": 1.9880893289895633e-05, "loss": 0.734, "step": 444 }, { "epoch": 0.08, "grad_norm": 0.8452937731367595, "learning_rate": 1.988001765410447e-05, "loss": 0.7426, "step": 445 }, { "epoch": 0.08, "grad_norm": 0.8397641582221184, "learning_rate": 1.9879138830813714e-05, "loss": 0.7522, "step": 446 }, { "epoch": 0.08, "grad_norm": 0.7600164271271203, "learning_rate": 1.98782568203069e-05, "loss": 0.7436, "step": 447 }, { "epoch": 0.08, "grad_norm": 0.8185411315627878, "learning_rate": 1.9877371622868583e-05, "loss": 0.758, "step": 448 }, { "epoch": 0.08, "grad_norm": 1.0284687280247709, "learning_rate": 1.9876483238784336e-05, "loss": 0.7569, "step": 449 }, { "epoch": 0.08, "grad_norm": 0.9164245825664988, "learning_rate": 1.9875591668340778e-05, "loss": 0.7656, "step": 450 }, { "epoch": 0.08, "grad_norm": 0.8324203564469429, "learning_rate": 1.9874696911825547e-05, "loss": 0.7961, "step": 451 }, { "epoch": 0.08, "grad_norm": 0.7818976206391318, "learning_rate": 1.9873798969527312e-05, "loss": 0.7293, "step": 452 }, { "epoch": 0.08, "grad_norm": 0.981812234349996, "learning_rate": 1.9872897841735765e-05, "loss": 0.799, "step": 453 }, { "epoch": 0.08, "grad_norm": 0.8992813985209943, "learning_rate": 1.9871993528741628e-05, "loss": 0.7848, "step": 454 }, { "epoch": 0.08, "grad_norm": 0.8367612847979895, "learning_rate": 1.9871086030836653e-05, "loss": 0.7858, "step": 455 }, { "epoch": 0.08, "grad_norm": 0.8865629152536015, "learning_rate": 1.9870175348313616e-05, "loss": 0.768, "step": 456 }, { "epoch": 0.08, "grad_norm": 0.7426504573237722, "learning_rate": 1.9869261481466324e-05, "loss": 0.7386, "step": 457 }, { "epoch": 0.08, "grad_norm": 0.8759786129633722, "learning_rate": 1.986834443058961e-05, "loss": 0.7355, "step": 458 }, { "epoch": 0.08, "grad_norm": 0.8660193152214409, "learning_rate": 1.9867424195979332e-05, "loss": 0.8011, "step": 459 }, { "epoch": 0.08, "grad_norm": 0.9071492120681568, "learning_rate": 1.986650077793238e-05, "loss": 0.8044, "step": 460 }, { "epoch": 0.08, "grad_norm": 0.7967859426296018, "learning_rate": 1.986557417674666e-05, "loss": 0.7292, "step": 461 }, { "epoch": 0.08, "grad_norm": 0.7457601622086821, "learning_rate": 1.986464439272112e-05, "loss": 0.6852, "step": 462 }, { "epoch": 0.08, "grad_norm": 0.9414665968555452, "learning_rate": 1.986371142615573e-05, "loss": 0.7992, "step": 463 }, { "epoch": 0.08, "grad_norm": 0.7306111133938757, "learning_rate": 1.9862775277351476e-05, "loss": 0.7596, "step": 464 }, { "epoch": 0.08, "grad_norm": 0.8551152737599159, "learning_rate": 1.9861835946610384e-05, "loss": 0.7015, "step": 465 }, { "epoch": 0.08, "grad_norm": 0.8092924797973844, "learning_rate": 1.9860893434235505e-05, "loss": 0.8068, "step": 466 }, { "epoch": 0.08, "grad_norm": 0.8698022481986285, "learning_rate": 1.9859947740530905e-05, "loss": 0.808, "step": 467 }, { "epoch": 0.08, "grad_norm": 0.9749619014880784, "learning_rate": 1.9858998865801688e-05, "loss": 0.7755, "step": 468 }, { "epoch": 0.08, "grad_norm": 1.11134160481534, "learning_rate": 1.9858046810353986e-05, "loss": 0.8088, "step": 469 }, { "epoch": 0.08, "grad_norm": 0.7682795774878506, "learning_rate": 1.9857091574494945e-05, "loss": 0.7222, "step": 470 }, { "epoch": 0.08, "grad_norm": 0.8675966306309555, "learning_rate": 1.9856133158532748e-05, "loss": 0.7609, "step": 471 }, { "epoch": 0.08, "grad_norm": 0.7387688872329765, "learning_rate": 1.9855171562776595e-05, "loss": 0.7208, "step": 472 }, { "epoch": 0.08, "grad_norm": 0.9017004746211407, "learning_rate": 1.9854206787536726e-05, "loss": 0.7713, "step": 473 }, { "epoch": 0.08, "grad_norm": 0.8158422135046964, "learning_rate": 1.9853238833124387e-05, "loss": 0.7806, "step": 474 }, { "epoch": 0.08, "grad_norm": 0.77504415793435, "learning_rate": 1.9852267699851865e-05, "loss": 0.7439, "step": 475 }, { "epoch": 0.08, "grad_norm": 0.8016697296863999, "learning_rate": 1.985129338803247e-05, "loss": 0.7428, "step": 476 }, { "epoch": 0.08, "grad_norm": 0.8294942204513672, "learning_rate": 1.9850315897980535e-05, "loss": 0.7705, "step": 477 }, { "epoch": 0.08, "grad_norm": 0.9505801828633351, "learning_rate": 1.9849335230011416e-05, "loss": 0.7746, "step": 478 }, { "epoch": 0.08, "grad_norm": 0.7615045565813228, "learning_rate": 1.9848351384441496e-05, "loss": 0.7325, "step": 479 }, { "epoch": 0.08, "grad_norm": 0.7511009681517573, "learning_rate": 1.9847364361588184e-05, "loss": 0.7464, "step": 480 }, { "epoch": 0.08, "grad_norm": 0.7719958102551266, "learning_rate": 1.984637416176992e-05, "loss": 0.7817, "step": 481 }, { "epoch": 0.08, "grad_norm": 0.8672223716889659, "learning_rate": 1.9845380785306157e-05, "loss": 0.7744, "step": 482 }, { "epoch": 0.08, "grad_norm": 0.876084822004357, "learning_rate": 1.9844384232517383e-05, "loss": 0.7196, "step": 483 }, { "epoch": 0.08, "grad_norm": 0.7897094965928901, "learning_rate": 1.9843384503725103e-05, "loss": 0.7537, "step": 484 }, { "epoch": 0.09, "grad_norm": 1.007353347021726, "learning_rate": 1.9842381599251856e-05, "loss": 0.8155, "step": 485 }, { "epoch": 0.09, "grad_norm": 0.8551700035708317, "learning_rate": 1.9841375519421194e-05, "loss": 0.7715, "step": 486 }, { "epoch": 0.09, "grad_norm": 0.858289473444655, "learning_rate": 1.9840366264557698e-05, "loss": 0.7405, "step": 487 }, { "epoch": 0.09, "grad_norm": 0.7180244214938318, "learning_rate": 1.983935383498698e-05, "loss": 0.6964, "step": 488 }, { "epoch": 0.09, "grad_norm": 0.7872777840043885, "learning_rate": 1.983833823103567e-05, "loss": 0.7447, "step": 489 }, { "epoch": 0.09, "grad_norm": 1.2969134807391185, "learning_rate": 1.9837319453031424e-05, "loss": 0.817, "step": 490 }, { "epoch": 0.09, "grad_norm": 0.7351416613557185, "learning_rate": 1.9836297501302915e-05, "loss": 0.7454, "step": 491 }, { "epoch": 0.09, "grad_norm": 0.748920966824341, "learning_rate": 1.983527237617985e-05, "loss": 0.7598, "step": 492 }, { "epoch": 0.09, "grad_norm": 0.9328531178908089, "learning_rate": 1.983424407799296e-05, "loss": 0.7741, "step": 493 }, { "epoch": 0.09, "grad_norm": 0.9215025507437631, "learning_rate": 1.9833212607073986e-05, "loss": 0.766, "step": 494 }, { "epoch": 0.09, "grad_norm": 0.8769164776130071, "learning_rate": 1.9832177963755707e-05, "loss": 0.7573, "step": 495 }, { "epoch": 0.09, "grad_norm": 0.8463112346786238, "learning_rate": 1.9831140148371926e-05, "loss": 0.7854, "step": 496 }, { "epoch": 0.09, "grad_norm": 0.6919258968671547, "learning_rate": 1.9830099161257454e-05, "loss": 0.7302, "step": 497 }, { "epoch": 0.09, "grad_norm": 1.2197117410460798, "learning_rate": 1.9829055002748137e-05, "loss": 0.7621, "step": 498 }, { "epoch": 0.09, "grad_norm": 0.7543575518054616, "learning_rate": 1.9828007673180847e-05, "loss": 0.7412, "step": 499 }, { "epoch": 0.09, "grad_norm": 0.798829068522451, "learning_rate": 1.9826957172893476e-05, "loss": 0.7331, "step": 500 }, { "epoch": 0.09, "grad_norm": 0.7096492144982032, "learning_rate": 1.982590350222493e-05, "loss": 0.7748, "step": 501 }, { "epoch": 0.09, "grad_norm": 0.8134383572789833, "learning_rate": 1.982484666151515e-05, "loss": 0.7632, "step": 502 }, { "epoch": 0.09, "grad_norm": 0.8899575060756721, "learning_rate": 1.9823786651105092e-05, "loss": 0.7386, "step": 503 }, { "epoch": 0.09, "grad_norm": 0.9127782595419466, "learning_rate": 1.982272347133674e-05, "loss": 0.7254, "step": 504 }, { "epoch": 0.09, "grad_norm": 0.7934277100899534, "learning_rate": 1.9821657122553097e-05, "loss": 0.7818, "step": 505 }, { "epoch": 0.09, "grad_norm": 0.808164797893132, "learning_rate": 1.982058760509819e-05, "loss": 0.7683, "step": 506 }, { "epoch": 0.09, "grad_norm": 0.7438308989496081, "learning_rate": 1.9819514919317066e-05, "loss": 0.7872, "step": 507 }, { "epoch": 0.09, "grad_norm": 0.7304858374019926, "learning_rate": 1.9818439065555793e-05, "loss": 0.7384, "step": 508 }, { "epoch": 0.09, "grad_norm": 0.6799981406023019, "learning_rate": 1.981736004416147e-05, "loss": 0.7152, "step": 509 }, { "epoch": 0.09, "grad_norm": 0.7389603698567997, "learning_rate": 1.9816277855482212e-05, "loss": 0.7058, "step": 510 }, { "epoch": 0.09, "grad_norm": 0.8802999081463164, "learning_rate": 1.9815192499867147e-05, "loss": 0.768, "step": 511 }, { "epoch": 0.09, "grad_norm": 0.9615673058983571, "learning_rate": 1.9814103977666443e-05, "loss": 0.7832, "step": 512 }, { "epoch": 0.09, "grad_norm": 0.8060413149655544, "learning_rate": 1.9813012289231276e-05, "loss": 0.7504, "step": 513 }, { "epoch": 0.09, "grad_norm": 0.834870347708771, "learning_rate": 1.9811917434913846e-05, "loss": 0.7684, "step": 514 }, { "epoch": 0.09, "grad_norm": 0.7635428155047254, "learning_rate": 1.981081941506738e-05, "loss": 0.7821, "step": 515 }, { "epoch": 0.09, "grad_norm": 0.7296487907535214, "learning_rate": 1.9809718230046116e-05, "loss": 0.7558, "step": 516 }, { "epoch": 0.09, "grad_norm": 0.730311596329511, "learning_rate": 1.9808613880205327e-05, "loss": 0.7576, "step": 517 }, { "epoch": 0.09, "grad_norm": 0.7068888939033778, "learning_rate": 1.980750636590129e-05, "loss": 0.7512, "step": 518 }, { "epoch": 0.09, "grad_norm": 0.9266662842456305, "learning_rate": 1.980639568749132e-05, "loss": 0.7906, "step": 519 }, { "epoch": 0.09, "grad_norm": 0.8243061561499516, "learning_rate": 1.980528184533374e-05, "loss": 0.7636, "step": 520 }, { "epoch": 0.09, "grad_norm": 0.9154261911917633, "learning_rate": 1.9804164839787903e-05, "loss": 0.7405, "step": 521 }, { "epoch": 0.09, "grad_norm": 0.895555976839802, "learning_rate": 1.9803044671214176e-05, "loss": 0.7686, "step": 522 }, { "epoch": 0.09, "grad_norm": 0.8138936857909488, "learning_rate": 1.9801921339973948e-05, "loss": 0.7744, "step": 523 }, { "epoch": 0.09, "grad_norm": 0.7553442575492129, "learning_rate": 1.9800794846429628e-05, "loss": 0.7523, "step": 524 }, { "epoch": 0.09, "grad_norm": 0.8503077479276419, "learning_rate": 1.9799665190944652e-05, "loss": 0.7511, "step": 525 }, { "epoch": 0.09, "grad_norm": 0.7661257217724603, "learning_rate": 1.9798532373883465e-05, "loss": 0.7131, "step": 526 }, { "epoch": 0.09, "grad_norm": 0.7548176801634663, "learning_rate": 1.979739639561154e-05, "loss": 0.7635, "step": 527 }, { "epoch": 0.09, "grad_norm": 0.7674808531883642, "learning_rate": 1.9796257256495364e-05, "loss": 0.7521, "step": 528 }, { "epoch": 0.09, "grad_norm": 0.6984249999527656, "learning_rate": 1.979511495690245e-05, "loss": 0.7319, "step": 529 }, { "epoch": 0.09, "grad_norm": 0.7249978207077354, "learning_rate": 1.9793969497201326e-05, "loss": 0.7049, "step": 530 }, { "epoch": 0.09, "grad_norm": 0.7573373837073646, "learning_rate": 1.9792820877761543e-05, "loss": 0.7192, "step": 531 }, { "epoch": 0.09, "grad_norm": 0.8512096684940289, "learning_rate": 1.979166909895367e-05, "loss": 0.7772, "step": 532 }, { "epoch": 0.09, "grad_norm": 0.8324671333554267, "learning_rate": 1.979051416114929e-05, "loss": 0.7989, "step": 533 }, { "epoch": 0.09, "grad_norm": 0.752902450756574, "learning_rate": 1.978935606472102e-05, "loss": 0.7667, "step": 534 }, { "epoch": 0.09, "grad_norm": 0.6731486246281305, "learning_rate": 1.9788194810042475e-05, "loss": 0.7119, "step": 535 }, { "epoch": 0.09, "grad_norm": 0.8012960721883187, "learning_rate": 1.9787030397488304e-05, "loss": 0.7982, "step": 536 }, { "epoch": 0.09, "grad_norm": 0.7651528887298984, "learning_rate": 1.978586282743417e-05, "loss": 0.7437, "step": 537 }, { "epoch": 0.09, "grad_norm": 0.7568889598773988, "learning_rate": 1.978469210025676e-05, "loss": 0.7144, "step": 538 }, { "epoch": 0.09, "grad_norm": 0.7595812835377417, "learning_rate": 1.978351821633377e-05, "loss": 0.7699, "step": 539 }, { "epoch": 0.09, "grad_norm": 0.7343889364111497, "learning_rate": 1.9782341176043923e-05, "loss": 0.7532, "step": 540 }, { "epoch": 0.09, "grad_norm": 0.8440756234466729, "learning_rate": 1.978116097976695e-05, "loss": 0.7404, "step": 541 }, { "epoch": 0.1, "grad_norm": 0.8447806000017273, "learning_rate": 1.9779977627883617e-05, "loss": 0.7887, "step": 542 }, { "epoch": 0.1, "grad_norm": 0.8206617830071264, "learning_rate": 1.977879112077569e-05, "loss": 0.7449, "step": 543 }, { "epoch": 0.1, "grad_norm": 0.7832428751682677, "learning_rate": 1.9777601458825965e-05, "loss": 0.7348, "step": 544 }, { "epoch": 0.1, "grad_norm": 0.8250984571494574, "learning_rate": 1.977640864241825e-05, "loss": 0.7297, "step": 545 }, { "epoch": 0.1, "grad_norm": 0.848589405714994, "learning_rate": 1.977521267193737e-05, "loss": 0.7857, "step": 546 }, { "epoch": 0.1, "grad_norm": 0.8540680848380097, "learning_rate": 1.9774013547769176e-05, "loss": 0.7579, "step": 547 }, { "epoch": 0.1, "grad_norm": 0.7537770840647847, "learning_rate": 1.9772811270300524e-05, "loss": 0.7605, "step": 548 }, { "epoch": 0.1, "grad_norm": 0.9481246855098646, "learning_rate": 1.97716058399193e-05, "loss": 0.7842, "step": 549 }, { "epoch": 0.1, "grad_norm": 0.7055489329336314, "learning_rate": 1.97703972570144e-05, "loss": 0.7562, "step": 550 }, { "epoch": 0.1, "grad_norm": 1.278727040807141, "learning_rate": 1.9769185521975732e-05, "loss": 0.7407, "step": 551 }, { "epoch": 0.1, "grad_norm": 0.8975871699480714, "learning_rate": 1.976797063519423e-05, "loss": 0.7953, "step": 552 }, { "epoch": 0.1, "grad_norm": 0.8759273543389297, "learning_rate": 1.9766752597061846e-05, "loss": 0.704, "step": 553 }, { "epoch": 0.1, "grad_norm": 0.8627404756637789, "learning_rate": 1.9765531407971543e-05, "loss": 0.7834, "step": 554 }, { "epoch": 0.1, "grad_norm": 1.099407660421469, "learning_rate": 1.9764307068317296e-05, "loss": 0.828, "step": 555 }, { "epoch": 0.1, "grad_norm": 0.8775521752411964, "learning_rate": 1.9763079578494107e-05, "loss": 0.7792, "step": 556 }, { "epoch": 0.1, "grad_norm": 0.994142090843372, "learning_rate": 1.9761848938897993e-05, "loss": 0.7312, "step": 557 }, { "epoch": 0.1, "grad_norm": 0.6975387788418437, "learning_rate": 1.9760615149925976e-05, "loss": 0.7712, "step": 558 }, { "epoch": 0.1, "grad_norm": 0.826134829728442, "learning_rate": 1.975937821197611e-05, "loss": 0.7346, "step": 559 }, { "epoch": 0.1, "grad_norm": 0.7875180667482798, "learning_rate": 1.975813812544745e-05, "loss": 0.7686, "step": 560 }, { "epoch": 0.1, "grad_norm": 0.7354877703931323, "learning_rate": 1.9756894890740082e-05, "loss": 0.7973, "step": 561 }, { "epoch": 0.1, "grad_norm": 0.8293674848055362, "learning_rate": 1.9755648508255092e-05, "loss": 0.7446, "step": 562 }, { "epoch": 0.1, "grad_norm": 0.7684531184354124, "learning_rate": 1.975439897839459e-05, "loss": 0.7609, "step": 563 }, { "epoch": 0.1, "grad_norm": 0.8954610427291553, "learning_rate": 1.9753146301561705e-05, "loss": 0.7614, "step": 564 }, { "epoch": 0.1, "grad_norm": 0.8010195021422107, "learning_rate": 1.9751890478160574e-05, "loss": 0.738, "step": 565 }, { "epoch": 0.1, "grad_norm": 0.7264467803660443, "learning_rate": 1.975063150859635e-05, "loss": 0.7107, "step": 566 }, { "epoch": 0.1, "grad_norm": 0.8829667202655547, "learning_rate": 1.9749369393275205e-05, "loss": 0.7554, "step": 567 }, { "epoch": 0.1, "grad_norm": 0.8035079738073652, "learning_rate": 1.974810413260432e-05, "loss": 0.776, "step": 568 }, { "epoch": 0.1, "grad_norm": 0.7253334673464773, "learning_rate": 1.9746835726991903e-05, "loss": 0.7558, "step": 569 }, { "epoch": 0.1, "grad_norm": 0.8081973302113362, "learning_rate": 1.974556417684716e-05, "loss": 0.7897, "step": 570 }, { "epoch": 0.1, "grad_norm": 0.9210247034691154, "learning_rate": 1.974428948258032e-05, "loss": 0.8322, "step": 571 }, { "epoch": 0.1, "grad_norm": 0.7684248969009789, "learning_rate": 1.974301164460263e-05, "loss": 0.7743, "step": 572 }, { "epoch": 0.1, "grad_norm": 0.7970642772088219, "learning_rate": 1.9741730663326348e-05, "loss": 0.755, "step": 573 }, { "epoch": 0.1, "grad_norm": 0.7464238330575304, "learning_rate": 1.9740446539164735e-05, "loss": 0.7544, "step": 574 }, { "epoch": 0.1, "grad_norm": 0.7143368781786331, "learning_rate": 1.9739159272532092e-05, "loss": 0.7544, "step": 575 }, { "epoch": 0.1, "grad_norm": 0.747353967939556, "learning_rate": 1.9737868863843704e-05, "loss": 0.7223, "step": 576 }, { "epoch": 0.1, "grad_norm": 0.9214918530383034, "learning_rate": 1.9736575313515894e-05, "loss": 0.7747, "step": 577 }, { "epoch": 0.1, "grad_norm": 0.7107374738748354, "learning_rate": 1.973527862196598e-05, "loss": 0.7374, "step": 578 }, { "epoch": 0.1, "grad_norm": 0.8011748004640891, "learning_rate": 1.973397878961231e-05, "loss": 0.7286, "step": 579 }, { "epoch": 0.1, "grad_norm": 0.8285663050307236, "learning_rate": 1.9732675816874227e-05, "loss": 0.7548, "step": 580 }, { "epoch": 0.1, "grad_norm": 0.7828747912904963, "learning_rate": 1.9731369704172107e-05, "loss": 0.719, "step": 581 }, { "epoch": 0.1, "grad_norm": 0.7483416817493264, "learning_rate": 1.973006045192732e-05, "loss": 0.7485, "step": 582 }, { "epoch": 0.1, "grad_norm": 0.7312356993669044, "learning_rate": 1.9728748060562267e-05, "loss": 0.7283, "step": 583 }, { "epoch": 0.1, "grad_norm": 0.7933031514826875, "learning_rate": 1.9727432530500348e-05, "loss": 0.7504, "step": 584 }, { "epoch": 0.1, "grad_norm": 0.7615497135585345, "learning_rate": 1.972611386216598e-05, "loss": 0.7187, "step": 585 }, { "epoch": 0.1, "grad_norm": 0.8639071240418686, "learning_rate": 1.9724792055984595e-05, "loss": 0.7777, "step": 586 }, { "epoch": 0.1, "grad_norm": 0.8002198283210307, "learning_rate": 1.9723467112382636e-05, "loss": 0.7645, "step": 587 }, { "epoch": 0.1, "grad_norm": 0.8362466292171453, "learning_rate": 1.9722139031787553e-05, "loss": 0.7227, "step": 588 }, { "epoch": 0.1, "grad_norm": 0.7712010037315337, "learning_rate": 1.9720807814627813e-05, "loss": 0.7359, "step": 589 }, { "epoch": 0.1, "grad_norm": 0.8938564116000176, "learning_rate": 1.97194734613329e-05, "loss": 0.8188, "step": 590 }, { "epoch": 0.1, "grad_norm": 0.7237717415515224, "learning_rate": 1.97181359723333e-05, "loss": 0.7559, "step": 591 }, { "epoch": 0.1, "grad_norm": 0.7494573536745601, "learning_rate": 1.9716795348060516e-05, "loss": 0.7477, "step": 592 }, { "epoch": 0.1, "grad_norm": 0.8552300762061101, "learning_rate": 1.971545158894706e-05, "loss": 0.7683, "step": 593 }, { "epoch": 0.1, "grad_norm": 0.7120110971434459, "learning_rate": 1.971410469542646e-05, "loss": 0.7302, "step": 594 }, { "epoch": 0.1, "grad_norm": 0.8043073429320737, "learning_rate": 1.9712754667933244e-05, "loss": 0.763, "step": 595 }, { "epoch": 0.1, "grad_norm": 0.7470812265707065, "learning_rate": 1.971140150690297e-05, "loss": 0.7326, "step": 596 }, { "epoch": 0.1, "grad_norm": 0.8086367737940354, "learning_rate": 1.9710045212772185e-05, "loss": 0.7478, "step": 597 }, { "epoch": 0.1, "grad_norm": 0.8718288877423573, "learning_rate": 1.970868578597847e-05, "loss": 0.7927, "step": 598 }, { "epoch": 0.11, "grad_norm": 0.7201783156529487, "learning_rate": 1.9707323226960394e-05, "loss": 0.7642, "step": 599 }, { "epoch": 0.11, "grad_norm": 0.8453456694905983, "learning_rate": 1.9705957536157547e-05, "loss": 0.7426, "step": 600 }, { "epoch": 0.11, "grad_norm": 0.6504997895248144, "learning_rate": 1.9704588714010538e-05, "loss": 0.7183, "step": 601 }, { "epoch": 0.11, "grad_norm": 0.8442584615492408, "learning_rate": 1.9703216760960968e-05, "loss": 0.7812, "step": 602 }, { "epoch": 0.11, "grad_norm": 1.1478460843256624, "learning_rate": 1.9701841677451466e-05, "loss": 0.8095, "step": 603 }, { "epoch": 0.11, "grad_norm": 0.7509800902611968, "learning_rate": 1.970046346392566e-05, "loss": 0.7817, "step": 604 }, { "epoch": 0.11, "grad_norm": 0.8294249423089739, "learning_rate": 1.9699082120828183e-05, "loss": 0.7741, "step": 605 }, { "epoch": 0.11, "grad_norm": 0.8580915826763138, "learning_rate": 1.9697697648604693e-05, "loss": 0.8049, "step": 606 }, { "epoch": 0.11, "grad_norm": 0.8700373624399388, "learning_rate": 1.969631004770185e-05, "loss": 0.7656, "step": 607 }, { "epoch": 0.11, "grad_norm": 0.7331766941105411, "learning_rate": 1.969491931856732e-05, "loss": 0.7097, "step": 608 }, { "epoch": 0.11, "grad_norm": 0.7606472505422243, "learning_rate": 1.969352546164978e-05, "loss": 0.7751, "step": 609 }, { "epoch": 0.11, "grad_norm": 0.9157016529821895, "learning_rate": 1.969212847739892e-05, "loss": 0.7825, "step": 610 }, { "epoch": 0.11, "grad_norm": 0.7427043187520821, "learning_rate": 1.9690728366265436e-05, "loss": 0.7383, "step": 611 }, { "epoch": 0.11, "grad_norm": 0.8440968548634247, "learning_rate": 1.9689325128701032e-05, "loss": 0.7463, "step": 612 }, { "epoch": 0.11, "grad_norm": 1.0512847090819606, "learning_rate": 1.968791876515842e-05, "loss": 0.7456, "step": 613 }, { "epoch": 0.11, "grad_norm": 0.9279768250432433, "learning_rate": 1.968650927609133e-05, "loss": 0.8418, "step": 614 }, { "epoch": 0.11, "grad_norm": 0.9232500044599615, "learning_rate": 1.9685096661954484e-05, "loss": 0.7551, "step": 615 }, { "epoch": 0.11, "grad_norm": 0.8313123568887517, "learning_rate": 1.968368092320362e-05, "loss": 0.7337, "step": 616 }, { "epoch": 0.11, "grad_norm": 0.7947495306550826, "learning_rate": 1.968226206029549e-05, "loss": 0.768, "step": 617 }, { "epoch": 0.11, "grad_norm": 0.8112831330039124, "learning_rate": 1.968084007368785e-05, "loss": 0.7343, "step": 618 }, { "epoch": 0.11, "grad_norm": 0.9562244824745674, "learning_rate": 1.967941496383946e-05, "loss": 0.8379, "step": 619 }, { "epoch": 0.11, "grad_norm": 0.7685555770256285, "learning_rate": 1.9677986731210084e-05, "loss": 0.7205, "step": 620 }, { "epoch": 0.11, "grad_norm": 0.7835450000812526, "learning_rate": 1.967655537626051e-05, "loss": 0.707, "step": 621 }, { "epoch": 0.11, "grad_norm": 0.6953327701313811, "learning_rate": 1.967512089945252e-05, "loss": 0.7384, "step": 622 }, { "epoch": 0.11, "grad_norm": 0.7287826917576935, "learning_rate": 1.9673683301248902e-05, "loss": 0.7547, "step": 623 }, { "epoch": 0.11, "grad_norm": 0.7057142511695544, "learning_rate": 1.9672242582113455e-05, "loss": 0.7418, "step": 624 }, { "epoch": 0.11, "grad_norm": 0.8040691085311827, "learning_rate": 1.9670798742510992e-05, "loss": 0.7754, "step": 625 }, { "epoch": 0.11, "grad_norm": 0.7674799730270281, "learning_rate": 1.966935178290732e-05, "loss": 0.7417, "step": 626 }, { "epoch": 0.11, "grad_norm": 0.8283420820415435, "learning_rate": 1.9667901703769255e-05, "loss": 0.773, "step": 627 }, { "epoch": 0.11, "grad_norm": 0.6931479143804953, "learning_rate": 1.966644850556463e-05, "loss": 0.7683, "step": 628 }, { "epoch": 0.11, "grad_norm": 0.7353388102914924, "learning_rate": 1.966499218876228e-05, "loss": 0.7203, "step": 629 }, { "epoch": 0.11, "grad_norm": 0.8451437998099038, "learning_rate": 1.966353275383203e-05, "loss": 0.7857, "step": 630 }, { "epoch": 0.11, "grad_norm": 0.7369261343897988, "learning_rate": 1.9662070201244736e-05, "loss": 0.7248, "step": 631 }, { "epoch": 0.11, "grad_norm": 0.7920298731653357, "learning_rate": 1.9660604531472246e-05, "loss": 0.7072, "step": 632 }, { "epoch": 0.11, "grad_norm": 0.9252563348725994, "learning_rate": 1.9659135744987407e-05, "loss": 0.7841, "step": 633 }, { "epoch": 0.11, "grad_norm": 0.7654436430622215, "learning_rate": 1.965766384226409e-05, "loss": 0.7244, "step": 634 }, { "epoch": 0.11, "grad_norm": 0.8820748665482847, "learning_rate": 1.9656188823777157e-05, "loss": 0.7678, "step": 635 }, { "epoch": 0.11, "grad_norm": 0.8368030923895007, "learning_rate": 1.9654710690002482e-05, "loss": 0.7588, "step": 636 }, { "epoch": 0.11, "grad_norm": 1.0590018104074201, "learning_rate": 1.965322944141694e-05, "loss": 0.7939, "step": 637 }, { "epoch": 0.11, "grad_norm": 0.7592766902851685, "learning_rate": 1.9651745078498412e-05, "loss": 0.7781, "step": 638 }, { "epoch": 0.11, "grad_norm": 0.895929559055156, "learning_rate": 1.9650257601725785e-05, "loss": 0.7506, "step": 639 }, { "epoch": 0.11, "grad_norm": 0.7295566450768064, "learning_rate": 1.9648767011578948e-05, "loss": 0.7844, "step": 640 }, { "epoch": 0.11, "grad_norm": 0.807988857781752, "learning_rate": 1.9647273308538802e-05, "loss": 0.75, "step": 641 }, { "epoch": 0.11, "grad_norm": 0.8490997395185098, "learning_rate": 1.9645776493087245e-05, "loss": 0.7306, "step": 642 }, { "epoch": 0.11, "grad_norm": 0.8397730910026341, "learning_rate": 1.9644276565707178e-05, "loss": 0.7558, "step": 643 }, { "epoch": 0.11, "grad_norm": 0.8939470484990434, "learning_rate": 1.9642773526882507e-05, "loss": 0.8794, "step": 644 }, { "epoch": 0.11, "grad_norm": 0.7261418426578365, "learning_rate": 1.9641267377098154e-05, "loss": 0.7551, "step": 645 }, { "epoch": 0.11, "grad_norm": 0.6995202321887527, "learning_rate": 1.9639758116840023e-05, "loss": 0.7722, "step": 646 }, { "epoch": 0.11, "grad_norm": 0.6989431530364268, "learning_rate": 1.9638245746595034e-05, "loss": 0.7138, "step": 647 }, { "epoch": 0.11, "grad_norm": 0.8154473439498702, "learning_rate": 1.9636730266851116e-05, "loss": 0.7433, "step": 648 }, { "epoch": 0.11, "grad_norm": 0.8657530144502702, "learning_rate": 1.963521167809719e-05, "loss": 0.8163, "step": 649 }, { "epoch": 0.11, "grad_norm": 0.8551800237869719, "learning_rate": 1.963368998082319e-05, "loss": 0.7954, "step": 650 }, { "epoch": 0.11, "grad_norm": 0.6814325641177337, "learning_rate": 1.9632165175520032e-05, "loss": 0.7582, "step": 651 }, { "epoch": 0.11, "grad_norm": 0.7356024059731523, "learning_rate": 1.9630637262679668e-05, "loss": 0.7561, "step": 652 }, { "epoch": 0.11, "grad_norm": 0.8023383413152883, "learning_rate": 1.9629106242795023e-05, "loss": 0.7432, "step": 653 }, { "epoch": 0.11, "grad_norm": 0.7554885855796972, "learning_rate": 1.9627572116360038e-05, "loss": 0.7736, "step": 654 }, { "epoch": 0.11, "grad_norm": 0.8638127320968847, "learning_rate": 1.962603488386966e-05, "loss": 0.7621, "step": 655 }, { "epoch": 0.12, "grad_norm": 0.7773569396694973, "learning_rate": 1.9624494545819827e-05, "loss": 0.7447, "step": 656 }, { "epoch": 0.12, "grad_norm": 0.7847415265722286, "learning_rate": 1.9622951102707482e-05, "loss": 0.7831, "step": 657 }, { "epoch": 0.12, "grad_norm": 0.7698427549120112, "learning_rate": 1.962140455503058e-05, "loss": 0.7451, "step": 658 }, { "epoch": 0.12, "grad_norm": 0.7467606009243652, "learning_rate": 1.961985490328806e-05, "loss": 0.7395, "step": 659 }, { "epoch": 0.12, "grad_norm": 0.7300580463927786, "learning_rate": 1.961830214797988e-05, "loss": 0.7243, "step": 660 }, { "epoch": 0.12, "grad_norm": 0.7384858315371257, "learning_rate": 1.9616746289606986e-05, "loss": 0.7352, "step": 661 }, { "epoch": 0.12, "grad_norm": 0.7445357765241579, "learning_rate": 1.961518732867133e-05, "loss": 0.7451, "step": 662 }, { "epoch": 0.12, "grad_norm": 1.5796015588976786, "learning_rate": 1.961362526567587e-05, "loss": 0.7695, "step": 663 }, { "epoch": 0.12, "grad_norm": 0.7815653887355843, "learning_rate": 1.961206010112456e-05, "loss": 0.755, "step": 664 }, { "epoch": 0.12, "grad_norm": 0.7638337574993752, "learning_rate": 1.9610491835522346e-05, "loss": 0.7973, "step": 665 }, { "epoch": 0.12, "grad_norm": 0.7321082351469685, "learning_rate": 1.9608920469375192e-05, "loss": 0.7144, "step": 666 }, { "epoch": 0.12, "grad_norm": 0.8531574315626369, "learning_rate": 1.9607346003190052e-05, "loss": 0.742, "step": 667 }, { "epoch": 0.12, "grad_norm": 0.8335275663836073, "learning_rate": 1.960576843747488e-05, "loss": 0.7607, "step": 668 }, { "epoch": 0.12, "grad_norm": 0.784554373827368, "learning_rate": 1.960418777273863e-05, "loss": 0.7261, "step": 669 }, { "epoch": 0.12, "grad_norm": 0.9066622216989532, "learning_rate": 1.960260400949126e-05, "loss": 0.7651, "step": 670 }, { "epoch": 0.12, "grad_norm": 0.9190190474226622, "learning_rate": 1.9601017148243726e-05, "loss": 0.7179, "step": 671 }, { "epoch": 0.12, "grad_norm": 0.7924137729218209, "learning_rate": 1.959942718950798e-05, "loss": 0.6988, "step": 672 }, { "epoch": 0.12, "grad_norm": 0.8257709273400585, "learning_rate": 1.9597834133796976e-05, "loss": 0.7122, "step": 673 }, { "epoch": 0.12, "grad_norm": 0.8175005931278989, "learning_rate": 1.9596237981624668e-05, "loss": 0.7416, "step": 674 }, { "epoch": 0.12, "grad_norm": 0.7124860939812055, "learning_rate": 1.959463873350601e-05, "loss": 0.7311, "step": 675 }, { "epoch": 0.12, "grad_norm": 0.9841833044953345, "learning_rate": 1.9593036389956946e-05, "loss": 0.7356, "step": 676 }, { "epoch": 0.12, "grad_norm": 0.8238346751303917, "learning_rate": 1.9591430951494433e-05, "loss": 0.7534, "step": 677 }, { "epoch": 0.12, "grad_norm": 0.9825072154236664, "learning_rate": 1.9589822418636415e-05, "loss": 0.7675, "step": 678 }, { "epoch": 0.12, "grad_norm": 0.8426416905435004, "learning_rate": 1.9588210791901837e-05, "loss": 0.8224, "step": 679 }, { "epoch": 0.12, "grad_norm": 1.1256165577415655, "learning_rate": 1.9586596071810646e-05, "loss": 0.7697, "step": 680 }, { "epoch": 0.12, "grad_norm": 0.8404138012298858, "learning_rate": 1.9584978258883785e-05, "loss": 0.7922, "step": 681 }, { "epoch": 0.12, "grad_norm": 0.872651223839841, "learning_rate": 1.9583357353643193e-05, "loss": 0.7974, "step": 682 }, { "epoch": 0.12, "grad_norm": 0.71865562316907, "learning_rate": 1.9581733356611807e-05, "loss": 0.7329, "step": 683 }, { "epoch": 0.12, "grad_norm": 0.9248065792866831, "learning_rate": 1.958010626831356e-05, "loss": 0.7668, "step": 684 }, { "epoch": 0.12, "grad_norm": 0.8140289463733111, "learning_rate": 1.957847608927339e-05, "loss": 0.7458, "step": 685 }, { "epoch": 0.12, "grad_norm": 0.7264401345131417, "learning_rate": 1.9576842820017225e-05, "loss": 0.724, "step": 686 }, { "epoch": 0.12, "grad_norm": 0.7490432584858902, "learning_rate": 1.9575206461071992e-05, "loss": 0.7222, "step": 687 }, { "epoch": 0.12, "grad_norm": 0.7035512587696805, "learning_rate": 1.957356701296561e-05, "loss": 0.7103, "step": 688 }, { "epoch": 0.12, "grad_norm": 0.8689293144057544, "learning_rate": 1.9571924476227006e-05, "loss": 0.808, "step": 689 }, { "epoch": 0.12, "grad_norm": 0.8065527820963424, "learning_rate": 1.9570278851386088e-05, "loss": 0.7789, "step": 690 }, { "epoch": 0.12, "grad_norm": 0.8302363927323284, "learning_rate": 1.956863013897378e-05, "loss": 0.7414, "step": 691 }, { "epoch": 0.12, "grad_norm": 0.8570518702750012, "learning_rate": 1.9566978339521984e-05, "loss": 0.7338, "step": 692 }, { "epoch": 0.12, "grad_norm": 0.7384544401793969, "learning_rate": 1.9565323453563602e-05, "loss": 0.7451, "step": 693 }, { "epoch": 0.12, "grad_norm": 0.7161989924977574, "learning_rate": 1.956366548163254e-05, "loss": 0.7487, "step": 694 }, { "epoch": 0.12, "grad_norm": 0.7862804700556284, "learning_rate": 1.9562004424263693e-05, "loss": 0.7637, "step": 695 }, { "epoch": 0.12, "grad_norm": 0.7773685204415005, "learning_rate": 1.9560340281992954e-05, "loss": 0.7619, "step": 696 }, { "epoch": 0.12, "grad_norm": 0.7185524365580993, "learning_rate": 1.955867305535721e-05, "loss": 0.7138, "step": 697 }, { "epoch": 0.12, "grad_norm": 0.7147330285486103, "learning_rate": 1.9557002744894338e-05, "loss": 0.692, "step": 698 }, { "epoch": 0.12, "grad_norm": 0.7765990724842751, "learning_rate": 1.955532935114322e-05, "loss": 0.7562, "step": 699 }, { "epoch": 0.12, "grad_norm": 0.7477897940338193, "learning_rate": 1.9553652874643725e-05, "loss": 0.7234, "step": 700 }, { "epoch": 0.12, "grad_norm": 0.7692264007380458, "learning_rate": 1.9551973315936724e-05, "loss": 0.7534, "step": 701 }, { "epoch": 0.12, "grad_norm": 0.7198750131790563, "learning_rate": 1.955029067556407e-05, "loss": 0.7226, "step": 702 }, { "epoch": 0.12, "grad_norm": 0.8907995645265948, "learning_rate": 1.9548604954068624e-05, "loss": 0.7991, "step": 703 }, { "epoch": 0.12, "grad_norm": 0.7552763487598365, "learning_rate": 1.9546916151994233e-05, "loss": 0.73, "step": 704 }, { "epoch": 0.12, "grad_norm": 0.8877639651286385, "learning_rate": 1.954522426988574e-05, "loss": 0.7977, "step": 705 }, { "epoch": 0.12, "grad_norm": 0.9157546694203894, "learning_rate": 1.954352930828898e-05, "loss": 0.8348, "step": 706 }, { "epoch": 0.12, "grad_norm": 0.7408630699192361, "learning_rate": 1.9541831267750785e-05, "loss": 0.7415, "step": 707 }, { "epoch": 0.12, "grad_norm": 0.7624770731898197, "learning_rate": 1.9540130148818978e-05, "loss": 0.7471, "step": 708 }, { "epoch": 0.12, "grad_norm": 0.854274021124812, "learning_rate": 1.9538425952042374e-05, "loss": 0.7341, "step": 709 }, { "epoch": 0.12, "grad_norm": 0.7932717273672455, "learning_rate": 1.953671867797078e-05, "loss": 0.745, "step": 710 }, { "epoch": 0.12, "grad_norm": 0.8141962686252763, "learning_rate": 1.9535008327155004e-05, "loss": 0.8173, "step": 711 }, { "epoch": 0.12, "grad_norm": 0.8454792388378248, "learning_rate": 1.953329490014684e-05, "loss": 0.8177, "step": 712 }, { "epoch": 0.13, "grad_norm": 0.8729256941465339, "learning_rate": 1.953157839749907e-05, "loss": 0.7204, "step": 713 }, { "epoch": 0.13, "grad_norm": 0.7521297236552301, "learning_rate": 1.952985881976548e-05, "loss": 0.713, "step": 714 }, { "epoch": 0.13, "grad_norm": 0.7510456939157029, "learning_rate": 1.9528136167500835e-05, "loss": 0.7852, "step": 715 }, { "epoch": 0.13, "grad_norm": 0.6729388972273128, "learning_rate": 1.9526410441260906e-05, "loss": 0.72, "step": 716 }, { "epoch": 0.13, "grad_norm": 0.7379414203538865, "learning_rate": 1.9524681641602444e-05, "loss": 0.7328, "step": 717 }, { "epoch": 0.13, "grad_norm": 0.7475296515458226, "learning_rate": 1.952294976908319e-05, "loss": 0.7284, "step": 718 }, { "epoch": 0.13, "grad_norm": 0.8809137616029937, "learning_rate": 1.9521214824261893e-05, "loss": 0.7764, "step": 719 }, { "epoch": 0.13, "grad_norm": 0.839816618387551, "learning_rate": 1.9519476807698278e-05, "loss": 0.7519, "step": 720 }, { "epoch": 0.13, "grad_norm": 0.8354399746459169, "learning_rate": 1.9517735719953065e-05, "loss": 0.7372, "step": 721 }, { "epoch": 0.13, "grad_norm": 0.7858516516804052, "learning_rate": 1.9515991561587963e-05, "loss": 0.7552, "step": 722 }, { "epoch": 0.13, "grad_norm": 0.7191735057111376, "learning_rate": 1.951424433316568e-05, "loss": 0.715, "step": 723 }, { "epoch": 0.13, "grad_norm": 0.9158915621330485, "learning_rate": 1.95124940352499e-05, "loss": 0.8205, "step": 724 }, { "epoch": 0.13, "grad_norm": 0.7188878904804176, "learning_rate": 1.9510740668405312e-05, "loss": 0.7509, "step": 725 }, { "epoch": 0.13, "grad_norm": 0.8230621847778415, "learning_rate": 1.9508984233197585e-05, "loss": 0.7522, "step": 726 }, { "epoch": 0.13, "grad_norm": 0.718567251274073, "learning_rate": 1.9507224730193383e-05, "loss": 0.7329, "step": 727 }, { "epoch": 0.13, "grad_norm": 0.8877738548602122, "learning_rate": 1.950546215996036e-05, "loss": 0.7737, "step": 728 }, { "epoch": 0.13, "grad_norm": 0.9040907673922772, "learning_rate": 1.9503696523067158e-05, "loss": 0.7244, "step": 729 }, { "epoch": 0.13, "grad_norm": 0.8208644014983358, "learning_rate": 1.9501927820083402e-05, "loss": 0.7938, "step": 730 }, { "epoch": 0.13, "grad_norm": 0.8340577123038697, "learning_rate": 1.950015605157972e-05, "loss": 0.7235, "step": 731 }, { "epoch": 0.13, "grad_norm": 0.7219745089325351, "learning_rate": 1.949838121812772e-05, "loss": 0.7084, "step": 732 }, { "epoch": 0.13, "grad_norm": 0.8742552440358988, "learning_rate": 1.9496603320299994e-05, "loss": 0.7772, "step": 733 }, { "epoch": 0.13, "grad_norm": 0.8386051024464336, "learning_rate": 1.9494822358670137e-05, "loss": 0.7605, "step": 734 }, { "epoch": 0.13, "grad_norm": 0.7695677692481361, "learning_rate": 1.9493038333812722e-05, "loss": 0.7919, "step": 735 }, { "epoch": 0.13, "grad_norm": 0.7942857330069666, "learning_rate": 1.949125124630331e-05, "loss": 0.76, "step": 736 }, { "epoch": 0.13, "grad_norm": 0.7982203347699682, "learning_rate": 1.9489461096718454e-05, "loss": 0.723, "step": 737 }, { "epoch": 0.13, "grad_norm": 0.8386424212268134, "learning_rate": 1.9487667885635694e-05, "loss": 0.7864, "step": 738 }, { "epoch": 0.13, "grad_norm": 0.7881743307849663, "learning_rate": 1.9485871613633557e-05, "loss": 0.766, "step": 739 }, { "epoch": 0.13, "grad_norm": 0.790584643651024, "learning_rate": 1.9484072281291556e-05, "loss": 0.7759, "step": 740 }, { "epoch": 0.13, "grad_norm": 0.7718971327759712, "learning_rate": 1.94822698891902e-05, "loss": 0.8162, "step": 741 }, { "epoch": 0.13, "grad_norm": 0.7453757747439862, "learning_rate": 1.948046443791097e-05, "loss": 0.718, "step": 742 }, { "epoch": 0.13, "grad_norm": 0.7796142164839395, "learning_rate": 1.9478655928036345e-05, "loss": 0.7532, "step": 743 }, { "epoch": 0.13, "grad_norm": 0.7926341783376187, "learning_rate": 1.947684436014979e-05, "loss": 0.7298, "step": 744 }, { "epoch": 0.13, "grad_norm": 0.8331962199825166, "learning_rate": 1.9475029734835754e-05, "loss": 0.7497, "step": 745 }, { "epoch": 0.13, "grad_norm": 0.8560071728488087, "learning_rate": 1.9473212052679667e-05, "loss": 0.775, "step": 746 }, { "epoch": 0.13, "grad_norm": 0.7580470207996753, "learning_rate": 1.947139131426796e-05, "loss": 0.7784, "step": 747 }, { "epoch": 0.13, "grad_norm": 0.7265208121720204, "learning_rate": 1.9469567520188036e-05, "loss": 0.7186, "step": 748 }, { "epoch": 0.13, "grad_norm": 0.872027530368671, "learning_rate": 1.9467740671028292e-05, "loss": 0.783, "step": 749 }, { "epoch": 0.13, "grad_norm": 0.7397228284248673, "learning_rate": 1.94659107673781e-05, "loss": 0.7359, "step": 750 }, { "epoch": 0.13, "grad_norm": 0.816133148117238, "learning_rate": 1.9464077809827837e-05, "loss": 0.7529, "step": 751 }, { "epoch": 0.13, "grad_norm": 0.7244030072944136, "learning_rate": 1.9462241798968846e-05, "loss": 0.7454, "step": 752 }, { "epoch": 0.13, "grad_norm": 0.7734347050455592, "learning_rate": 1.9460402735393466e-05, "loss": 0.751, "step": 753 }, { "epoch": 0.13, "grad_norm": 0.7560133100468683, "learning_rate": 1.9458560619695007e-05, "loss": 0.7856, "step": 754 }, { "epoch": 0.13, "grad_norm": 0.8123980274469631, "learning_rate": 1.945671545246779e-05, "loss": 0.7598, "step": 755 }, { "epoch": 0.13, "grad_norm": 2.7008438923259495, "learning_rate": 1.945486723430709e-05, "loss": 0.8014, "step": 756 }, { "epoch": 0.13, "grad_norm": 0.7646662650994435, "learning_rate": 1.945301596580919e-05, "loss": 0.7459, "step": 757 }, { "epoch": 0.13, "grad_norm": 0.7795209198239761, "learning_rate": 1.9451161647571343e-05, "loss": 0.7619, "step": 758 }, { "epoch": 0.13, "grad_norm": 0.7768205249404685, "learning_rate": 1.944930428019179e-05, "loss": 0.7332, "step": 759 }, { "epoch": 0.13, "grad_norm": 0.8237898771009718, "learning_rate": 1.9447443864269763e-05, "loss": 0.7851, "step": 760 }, { "epoch": 0.13, "grad_norm": 0.8103895441857897, "learning_rate": 1.944558040040547e-05, "loss": 0.7862, "step": 761 }, { "epoch": 0.13, "grad_norm": 0.7534274269896507, "learning_rate": 1.944371388920009e-05, "loss": 0.7616, "step": 762 }, { "epoch": 0.13, "grad_norm": 0.9002157667942096, "learning_rate": 1.9441844331255813e-05, "loss": 0.7875, "step": 763 }, { "epoch": 0.13, "grad_norm": 0.7006669745734746, "learning_rate": 1.9439971727175794e-05, "loss": 0.7264, "step": 764 }, { "epoch": 0.13, "grad_norm": 0.7296396161037644, "learning_rate": 1.943809607756417e-05, "loss": 0.7699, "step": 765 }, { "epoch": 0.13, "grad_norm": 0.7469177482199345, "learning_rate": 1.943621738302607e-05, "loss": 0.7323, "step": 766 }, { "epoch": 0.13, "grad_norm": 0.9077071654215104, "learning_rate": 1.9434335644167598e-05, "loss": 0.7725, "step": 767 }, { "epoch": 0.13, "grad_norm": 0.7901608032563896, "learning_rate": 1.9432450861595843e-05, "loss": 0.7942, "step": 768 }, { "epoch": 0.13, "grad_norm": 0.8117943689875206, "learning_rate": 1.9430563035918868e-05, "loss": 0.7849, "step": 769 }, { "epoch": 0.14, "grad_norm": 0.7588552667889448, "learning_rate": 1.9428672167745733e-05, "loss": 0.7424, "step": 770 }, { "epoch": 0.14, "grad_norm": 0.8513413621989011, "learning_rate": 1.9426778257686472e-05, "loss": 0.7216, "step": 771 }, { "epoch": 0.14, "grad_norm": 0.8090413132225497, "learning_rate": 1.9424881306352094e-05, "loss": 0.7585, "step": 772 }, { "epoch": 0.14, "grad_norm": 0.8784820810242319, "learning_rate": 1.94229813143546e-05, "loss": 0.7521, "step": 773 }, { "epoch": 0.14, "grad_norm": 0.7398735117337873, "learning_rate": 1.9421078282306963e-05, "loss": 0.7639, "step": 774 }, { "epoch": 0.14, "grad_norm": 0.8495484744404064, "learning_rate": 1.941917221082314e-05, "loss": 0.7823, "step": 775 }, { "epoch": 0.14, "grad_norm": 0.858956843203514, "learning_rate": 1.9417263100518078e-05, "loss": 0.7676, "step": 776 }, { "epoch": 0.14, "grad_norm": 0.8282704026170725, "learning_rate": 1.941535095200769e-05, "loss": 0.7773, "step": 777 }, { "epoch": 0.14, "grad_norm": 0.9000269084278699, "learning_rate": 1.941343576590887e-05, "loss": 0.8096, "step": 778 }, { "epoch": 0.14, "grad_norm": 0.7604085718112832, "learning_rate": 1.9411517542839504e-05, "loss": 0.7395, "step": 779 }, { "epoch": 0.14, "grad_norm": 1.4311214763645568, "learning_rate": 1.9409596283418448e-05, "loss": 0.7236, "step": 780 }, { "epoch": 0.14, "grad_norm": 0.792624391359825, "learning_rate": 1.9407671988265545e-05, "loss": 0.7642, "step": 781 }, { "epoch": 0.14, "grad_norm": 0.8111003365983842, "learning_rate": 1.9405744658001604e-05, "loss": 0.7476, "step": 782 }, { "epoch": 0.14, "grad_norm": 0.7500700751680208, "learning_rate": 1.9403814293248428e-05, "loss": 0.7125, "step": 783 }, { "epoch": 0.14, "grad_norm": 1.1113287864741606, "learning_rate": 1.940188089462879e-05, "loss": 0.8121, "step": 784 }, { "epoch": 0.14, "grad_norm": 0.8236277295548237, "learning_rate": 1.939994446276645e-05, "loss": 0.7391, "step": 785 }, { "epoch": 0.14, "grad_norm": 0.8007889008792068, "learning_rate": 1.9398004998286134e-05, "loss": 0.7506, "step": 786 }, { "epoch": 0.14, "grad_norm": 0.9102340130789494, "learning_rate": 1.9396062501813558e-05, "loss": 0.7343, "step": 787 }, { "epoch": 0.14, "grad_norm": 0.759345897689732, "learning_rate": 1.9394116973975412e-05, "loss": 0.7454, "step": 788 }, { "epoch": 0.14, "grad_norm": 0.8582563179273709, "learning_rate": 1.9392168415399363e-05, "loss": 0.7431, "step": 789 }, { "epoch": 0.14, "grad_norm": 0.8504392937405674, "learning_rate": 1.9390216826714053e-05, "loss": 0.7447, "step": 790 }, { "epoch": 0.14, "grad_norm": 0.8954385083962727, "learning_rate": 1.938826220854911e-05, "loss": 0.7799, "step": 791 }, { "epoch": 0.14, "grad_norm": 0.7631232540298577, "learning_rate": 1.9386304561535138e-05, "loss": 0.8075, "step": 792 }, { "epoch": 0.14, "grad_norm": 0.8289200292552591, "learning_rate": 1.9384343886303703e-05, "loss": 0.7562, "step": 793 }, { "epoch": 0.14, "grad_norm": 0.7018660991577796, "learning_rate": 1.9382380183487368e-05, "loss": 0.7532, "step": 794 }, { "epoch": 0.14, "grad_norm": 0.7233056938549726, "learning_rate": 1.9380413453719663e-05, "loss": 0.7648, "step": 795 }, { "epoch": 0.14, "grad_norm": 0.8015718214497052, "learning_rate": 1.9378443697635097e-05, "loss": 0.7597, "step": 796 }, { "epoch": 0.14, "grad_norm": 0.8440941191253214, "learning_rate": 1.9376470915869154e-05, "loss": 0.7838, "step": 797 }, { "epoch": 0.14, "grad_norm": 0.8320170127288685, "learning_rate": 1.9374495109058292e-05, "loss": 0.7636, "step": 798 }, { "epoch": 0.14, "grad_norm": 0.7695677281204358, "learning_rate": 1.937251627783995e-05, "loss": 0.7327, "step": 799 }, { "epoch": 0.14, "grad_norm": 0.7819767644828991, "learning_rate": 1.9370534422852542e-05, "loss": 0.7805, "step": 800 }, { "epoch": 0.14, "grad_norm": 0.8072888364761335, "learning_rate": 1.936854954473545e-05, "loss": 0.8035, "step": 801 }, { "epoch": 0.14, "grad_norm": 0.7824185432744247, "learning_rate": 1.936656164412904e-05, "loss": 0.765, "step": 802 }, { "epoch": 0.14, "grad_norm": 0.7766383803496726, "learning_rate": 1.9364570721674655e-05, "loss": 0.7489, "step": 803 }, { "epoch": 0.14, "grad_norm": 0.8617386097675106, "learning_rate": 1.93625767780146e-05, "loss": 0.7648, "step": 804 }, { "epoch": 0.14, "grad_norm": 0.7035311182657393, "learning_rate": 1.9360579813792164e-05, "loss": 0.7718, "step": 805 }, { "epoch": 0.14, "grad_norm": 1.0800389717614205, "learning_rate": 1.9358579829651615e-05, "loss": 0.8152, "step": 806 }, { "epoch": 0.14, "grad_norm": 0.7233906074340729, "learning_rate": 1.935657682623818e-05, "loss": 0.7107, "step": 807 }, { "epoch": 0.14, "grad_norm": 0.8074134267248764, "learning_rate": 1.9354570804198082e-05, "loss": 0.741, "step": 808 }, { "epoch": 0.14, "grad_norm": 0.867407865602712, "learning_rate": 1.93525617641785e-05, "loss": 0.764, "step": 809 }, { "epoch": 0.14, "grad_norm": 0.784323951946584, "learning_rate": 1.935054970682759e-05, "loss": 0.774, "step": 810 }, { "epoch": 0.14, "grad_norm": 1.0083089734445698, "learning_rate": 1.9348534632794482e-05, "loss": 0.7885, "step": 811 }, { "epoch": 0.14, "grad_norm": 0.6525840058604645, "learning_rate": 1.9346516542729285e-05, "loss": 0.7364, "step": 812 }, { "epoch": 0.14, "grad_norm": 0.8440195230431399, "learning_rate": 1.9344495437283077e-05, "loss": 0.75, "step": 813 }, { "epoch": 0.14, "grad_norm": 0.820408254975336, "learning_rate": 1.934247131710791e-05, "loss": 0.7782, "step": 814 }, { "epoch": 0.14, "grad_norm": 1.0846188371461798, "learning_rate": 1.93404441828568e-05, "loss": 0.8234, "step": 815 }, { "epoch": 0.14, "grad_norm": 0.7532549953034324, "learning_rate": 1.933841403518375e-05, "loss": 0.8048, "step": 816 }, { "epoch": 0.14, "grad_norm": 1.157263055837497, "learning_rate": 1.9336380874743727e-05, "loss": 0.7603, "step": 817 }, { "epoch": 0.14, "grad_norm": 0.665783758333789, "learning_rate": 1.9334344702192664e-05, "loss": 0.7104, "step": 818 }, { "epoch": 0.14, "grad_norm": 0.7902015752994779, "learning_rate": 1.9332305518187484e-05, "loss": 0.7882, "step": 819 }, { "epoch": 0.14, "grad_norm": 0.7205975266826072, "learning_rate": 1.9330263323386062e-05, "loss": 0.7037, "step": 820 }, { "epoch": 0.14, "grad_norm": 0.8331106660089228, "learning_rate": 1.9328218118447256e-05, "loss": 0.8102, "step": 821 }, { "epoch": 0.14, "grad_norm": 0.7768458513257069, "learning_rate": 1.9326169904030892e-05, "loss": 0.742, "step": 822 }, { "epoch": 0.14, "grad_norm": 0.8589847493899629, "learning_rate": 1.9324118680797764e-05, "loss": 0.7637, "step": 823 }, { "epoch": 0.14, "grad_norm": 0.7455110524239575, "learning_rate": 1.932206444940964e-05, "loss": 0.7815, "step": 824 }, { "epoch": 0.14, "grad_norm": 0.7752154334865642, "learning_rate": 1.932000721052926e-05, "loss": 0.8228, "step": 825 }, { "epoch": 0.14, "grad_norm": 0.6950538041620148, "learning_rate": 1.9317946964820332e-05, "loss": 0.72, "step": 826 }, { "epoch": 0.15, "grad_norm": 0.7191744915084023, "learning_rate": 1.931588371294753e-05, "loss": 0.7537, "step": 827 }, { "epoch": 0.15, "grad_norm": 0.8039591028516956, "learning_rate": 1.9313817455576506e-05, "loss": 0.7657, "step": 828 }, { "epoch": 0.15, "grad_norm": 0.7798978170399651, "learning_rate": 1.931174819337388e-05, "loss": 0.7458, "step": 829 }, { "epoch": 0.15, "grad_norm": 0.7674825966855309, "learning_rate": 1.930967592700723e-05, "loss": 0.7746, "step": 830 }, { "epoch": 0.15, "grad_norm": 0.7945981779230291, "learning_rate": 1.930760065714512e-05, "loss": 0.7562, "step": 831 }, { "epoch": 0.15, "grad_norm": 0.8359334482782033, "learning_rate": 1.9305522384457078e-05, "loss": 0.7861, "step": 832 }, { "epoch": 0.15, "grad_norm": 0.7447684704685391, "learning_rate": 1.9303441109613586e-05, "loss": 0.7469, "step": 833 }, { "epoch": 0.15, "grad_norm": 0.6888261289758142, "learning_rate": 1.9301356833286122e-05, "loss": 0.7148, "step": 834 }, { "epoch": 0.15, "grad_norm": 0.8092300689068944, "learning_rate": 1.9299269556147103e-05, "loss": 0.787, "step": 835 }, { "epoch": 0.15, "grad_norm": 0.7294126998828971, "learning_rate": 1.9297179278869936e-05, "loss": 0.7284, "step": 836 }, { "epoch": 0.15, "grad_norm": 0.7232498765268374, "learning_rate": 1.929508600212899e-05, "loss": 0.7365, "step": 837 }, { "epoch": 0.15, "grad_norm": 0.788980131910967, "learning_rate": 1.9292989726599593e-05, "loss": 0.7659, "step": 838 }, { "epoch": 0.15, "grad_norm": 0.7573175249747485, "learning_rate": 1.9290890452958053e-05, "loss": 0.7699, "step": 839 }, { "epoch": 0.15, "grad_norm": 0.8582941584183753, "learning_rate": 1.928878818188163e-05, "loss": 0.7479, "step": 840 }, { "epoch": 0.15, "grad_norm": 0.7537484735060604, "learning_rate": 1.9286682914048575e-05, "loss": 0.7689, "step": 841 }, { "epoch": 0.15, "grad_norm": 0.7831395298302953, "learning_rate": 1.9284574650138076e-05, "loss": 0.7506, "step": 842 }, { "epoch": 0.15, "grad_norm": 0.7572542961114399, "learning_rate": 1.9282463390830315e-05, "loss": 0.7599, "step": 843 }, { "epoch": 0.15, "grad_norm": 0.8139424792363875, "learning_rate": 1.9280349136806422e-05, "loss": 0.7821, "step": 844 }, { "epoch": 0.15, "grad_norm": 0.7594542181624855, "learning_rate": 1.9278231888748503e-05, "loss": 0.7117, "step": 845 }, { "epoch": 0.15, "grad_norm": 0.9224467690471277, "learning_rate": 1.927611164733962e-05, "loss": 0.7157, "step": 846 }, { "epoch": 0.15, "grad_norm": 0.9090997891576609, "learning_rate": 1.9273988413263813e-05, "loss": 0.7277, "step": 847 }, { "epoch": 0.15, "grad_norm": 0.8884519667842612, "learning_rate": 1.927186218720608e-05, "loss": 0.7977, "step": 848 }, { "epoch": 0.15, "grad_norm": 1.1079482778534167, "learning_rate": 1.9269732969852386e-05, "loss": 0.7639, "step": 849 }, { "epoch": 0.15, "grad_norm": 0.8103074282439823, "learning_rate": 1.926760076188966e-05, "loss": 0.7327, "step": 850 }, { "epoch": 0.15, "grad_norm": 0.8040022881186183, "learning_rate": 1.9265465564005797e-05, "loss": 0.7955, "step": 851 }, { "epoch": 0.15, "grad_norm": 0.833055586335902, "learning_rate": 1.9263327376889657e-05, "loss": 0.7145, "step": 852 }, { "epoch": 0.15, "grad_norm": 0.8263639003531955, "learning_rate": 1.9261186201231067e-05, "loss": 0.7512, "step": 853 }, { "epoch": 0.15, "grad_norm": 0.9727627936896989, "learning_rate": 1.9259042037720804e-05, "loss": 0.745, "step": 854 }, { "epoch": 0.15, "grad_norm": 0.8379804970410246, "learning_rate": 1.925689488705063e-05, "loss": 0.734, "step": 855 }, { "epoch": 0.15, "grad_norm": 0.906483411849566, "learning_rate": 1.925474474991326e-05, "loss": 0.7952, "step": 856 }, { "epoch": 0.15, "grad_norm": 0.8423369691673629, "learning_rate": 1.925259162700237e-05, "loss": 0.7421, "step": 857 }, { "epoch": 0.15, "grad_norm": 0.8188238329385958, "learning_rate": 1.92504355190126e-05, "loss": 0.7805, "step": 858 }, { "epoch": 0.15, "grad_norm": 0.9276514842367802, "learning_rate": 1.924827642663956e-05, "loss": 0.7748, "step": 859 }, { "epoch": 0.15, "grad_norm": 0.6887265948885554, "learning_rate": 1.924611435057982e-05, "loss": 0.743, "step": 860 }, { "epoch": 0.15, "grad_norm": 0.836629908101969, "learning_rate": 1.9243949291530903e-05, "loss": 0.7467, "step": 861 }, { "epoch": 0.15, "grad_norm": 0.8346739159040738, "learning_rate": 1.9241781250191307e-05, "loss": 0.7669, "step": 862 }, { "epoch": 0.15, "grad_norm": 0.7299294606141791, "learning_rate": 1.9239610227260487e-05, "loss": 0.7283, "step": 863 }, { "epoch": 0.15, "grad_norm": 0.8070819975563868, "learning_rate": 1.9237436223438863e-05, "loss": 0.7492, "step": 864 }, { "epoch": 0.15, "grad_norm": 0.7037457805950145, "learning_rate": 1.9235259239427808e-05, "loss": 0.7692, "step": 865 }, { "epoch": 0.15, "grad_norm": 0.6847608898504713, "learning_rate": 1.9233079275929666e-05, "loss": 0.7216, "step": 866 }, { "epoch": 0.15, "grad_norm": 0.7136076750277384, "learning_rate": 1.9230896333647736e-05, "loss": 0.7518, "step": 867 }, { "epoch": 0.15, "grad_norm": 0.7929694558791852, "learning_rate": 1.9228710413286286e-05, "loss": 0.7419, "step": 868 }, { "epoch": 0.15, "grad_norm": 0.7561940517977154, "learning_rate": 1.9226521515550534e-05, "loss": 0.7497, "step": 869 }, { "epoch": 0.15, "grad_norm": 0.8035011967558341, "learning_rate": 1.9224329641146666e-05, "loss": 0.7885, "step": 870 }, { "epoch": 0.15, "grad_norm": 0.7142304109677082, "learning_rate": 1.9222134790781828e-05, "loss": 0.7405, "step": 871 }, { "epoch": 0.15, "grad_norm": 0.7951102149966983, "learning_rate": 1.9219936965164123e-05, "loss": 0.7801, "step": 872 }, { "epoch": 0.15, "grad_norm": 0.8025763565424999, "learning_rate": 1.9217736165002617e-05, "loss": 0.78, "step": 873 }, { "epoch": 0.15, "grad_norm": 0.7361433065492453, "learning_rate": 1.921553239100733e-05, "loss": 0.7001, "step": 874 }, { "epoch": 0.15, "grad_norm": 0.7608899584273666, "learning_rate": 1.9213325643889245e-05, "loss": 0.754, "step": 875 }, { "epoch": 0.15, "grad_norm": 0.7170264306828357, "learning_rate": 1.9211115924360314e-05, "loss": 0.726, "step": 876 }, { "epoch": 0.15, "grad_norm": 0.8463444497918639, "learning_rate": 1.9208903233133428e-05, "loss": 0.7118, "step": 877 }, { "epoch": 0.15, "grad_norm": 0.8859603837273686, "learning_rate": 1.920668757092245e-05, "loss": 0.7687, "step": 878 }, { "epoch": 0.15, "grad_norm": 0.7287779158112355, "learning_rate": 1.9204468938442203e-05, "loss": 0.7558, "step": 879 }, { "epoch": 0.15, "grad_norm": 0.8600728982836613, "learning_rate": 1.920224733640846e-05, "loss": 0.7004, "step": 880 }, { "epoch": 0.15, "grad_norm": 0.8420167203443023, "learning_rate": 1.920002276553796e-05, "loss": 0.763, "step": 881 }, { "epoch": 0.15, "grad_norm": 0.8061249942932334, "learning_rate": 1.9197795226548388e-05, "loss": 0.727, "step": 882 }, { "epoch": 0.15, "grad_norm": 0.7880858185107402, "learning_rate": 1.91955647201584e-05, "loss": 0.7131, "step": 883 }, { "epoch": 0.16, "grad_norm": 0.7722174178916775, "learning_rate": 1.9193331247087608e-05, "loss": 0.7319, "step": 884 }, { "epoch": 0.16, "grad_norm": 0.7459374791986275, "learning_rate": 1.919109480805657e-05, "loss": 0.7477, "step": 885 }, { "epoch": 0.16, "grad_norm": 0.8659092577812452, "learning_rate": 1.9188855403786812e-05, "loss": 0.7839, "step": 886 }, { "epoch": 0.16, "grad_norm": 0.7688099953982104, "learning_rate": 1.9186613035000807e-05, "loss": 0.7401, "step": 887 }, { "epoch": 0.16, "grad_norm": 0.85749015139832, "learning_rate": 1.9184367702422e-05, "loss": 0.7548, "step": 888 }, { "epoch": 0.16, "grad_norm": 0.7870278686646514, "learning_rate": 1.918211940677477e-05, "loss": 0.801, "step": 889 }, { "epoch": 0.16, "grad_norm": 0.8373341969001753, "learning_rate": 1.917986814878447e-05, "loss": 0.7551, "step": 890 }, { "epoch": 0.16, "grad_norm": 0.8182074550747407, "learning_rate": 1.9177613929177403e-05, "loss": 0.763, "step": 891 }, { "epoch": 0.16, "grad_norm": 0.8179863357862194, "learning_rate": 1.917535674868083e-05, "loss": 0.7169, "step": 892 }, { "epoch": 0.16, "grad_norm": 0.7867996136578702, "learning_rate": 1.9173096608022957e-05, "loss": 0.7409, "step": 893 }, { "epoch": 0.16, "grad_norm": 0.8272934519340924, "learning_rate": 1.917083350793296e-05, "loss": 0.7999, "step": 894 }, { "epoch": 0.16, "grad_norm": 0.7663960919871204, "learning_rate": 1.9168567449140956e-05, "loss": 0.7126, "step": 895 }, { "epoch": 0.16, "grad_norm": 0.7053123988118982, "learning_rate": 1.9166298432378028e-05, "loss": 0.7181, "step": 896 }, { "epoch": 0.16, "grad_norm": 0.9769764163935666, "learning_rate": 1.9164026458376205e-05, "loss": 0.7445, "step": 897 }, { "epoch": 0.16, "grad_norm": 0.7518515259449893, "learning_rate": 1.9161751527868476e-05, "loss": 0.7312, "step": 898 }, { "epoch": 0.16, "grad_norm": 0.7027655536927091, "learning_rate": 1.9159473641588777e-05, "loss": 0.7588, "step": 899 }, { "epoch": 0.16, "grad_norm": 0.9931930026733069, "learning_rate": 1.9157192800272004e-05, "loss": 0.7668, "step": 900 }, { "epoch": 0.16, "grad_norm": 0.8965269331410464, "learning_rate": 1.9154909004654002e-05, "loss": 0.7577, "step": 901 }, { "epoch": 0.16, "grad_norm": 0.7834497563834214, "learning_rate": 1.9152622255471575e-05, "loss": 0.7979, "step": 902 }, { "epoch": 0.16, "grad_norm": 0.8908444605401595, "learning_rate": 1.915033255346247e-05, "loss": 0.775, "step": 903 }, { "epoch": 0.16, "grad_norm": 0.8721048566377623, "learning_rate": 1.91480398993654e-05, "loss": 0.7885, "step": 904 }, { "epoch": 0.16, "grad_norm": 0.8570576782375199, "learning_rate": 1.9145744293920015e-05, "loss": 0.8156, "step": 905 }, { "epoch": 0.16, "grad_norm": 0.8038807834227626, "learning_rate": 1.9143445737866935e-05, "loss": 0.7359, "step": 906 }, { "epoch": 0.16, "grad_norm": 0.7656298678246016, "learning_rate": 1.9141144231947713e-05, "loss": 0.777, "step": 907 }, { "epoch": 0.16, "grad_norm": 0.8047501932722437, "learning_rate": 1.9138839776904867e-05, "loss": 0.772, "step": 908 }, { "epoch": 0.16, "grad_norm": 0.8219674474485854, "learning_rate": 1.913653237348186e-05, "loss": 0.768, "step": 909 }, { "epoch": 0.16, "grad_norm": 0.7779492156971773, "learning_rate": 1.913422202242311e-05, "loss": 0.7458, "step": 910 }, { "epoch": 0.16, "grad_norm": 0.7776521934679539, "learning_rate": 1.9131908724473986e-05, "loss": 0.7771, "step": 911 }, { "epoch": 0.16, "grad_norm": 0.755133634968864, "learning_rate": 1.9129592480380803e-05, "loss": 0.7408, "step": 912 }, { "epoch": 0.16, "grad_norm": 0.8764062860760832, "learning_rate": 1.912727329089083e-05, "loss": 0.746, "step": 913 }, { "epoch": 0.16, "grad_norm": 0.8153012681362456, "learning_rate": 1.9124951156752287e-05, "loss": 0.8116, "step": 914 }, { "epoch": 0.16, "grad_norm": 0.7259146115470386, "learning_rate": 1.9122626078714345e-05, "loss": 0.7543, "step": 915 }, { "epoch": 0.16, "grad_norm": 0.7629615666477462, "learning_rate": 1.912029805752712e-05, "loss": 0.7481, "step": 916 }, { "epoch": 0.16, "grad_norm": 0.6615569565581164, "learning_rate": 1.9117967093941675e-05, "loss": 0.7273, "step": 917 }, { "epoch": 0.16, "grad_norm": 0.8513538113748502, "learning_rate": 1.9115633188710037e-05, "loss": 0.7687, "step": 918 }, { "epoch": 0.16, "grad_norm": 0.6512544680818377, "learning_rate": 1.9113296342585167e-05, "loss": 0.7087, "step": 919 }, { "epoch": 0.16, "grad_norm": 0.6964543208760609, "learning_rate": 1.9110956556320984e-05, "loss": 0.7102, "step": 920 }, { "epoch": 0.16, "grad_norm": 0.751428492862186, "learning_rate": 1.9108613830672348e-05, "loss": 0.7522, "step": 921 }, { "epoch": 0.16, "grad_norm": 0.7564227458532978, "learning_rate": 1.9106268166395068e-05, "loss": 0.7627, "step": 922 }, { "epoch": 0.16, "grad_norm": 0.8280094105655101, "learning_rate": 1.9103919564245916e-05, "loss": 0.7129, "step": 923 }, { "epoch": 0.16, "grad_norm": 0.7637378833479229, "learning_rate": 1.910156802498259e-05, "loss": 0.7511, "step": 924 }, { "epoch": 0.16, "grad_norm": 0.8375981917708896, "learning_rate": 1.9099213549363747e-05, "loss": 0.7672, "step": 925 }, { "epoch": 0.16, "grad_norm": 1.262098952805081, "learning_rate": 1.9096856138148993e-05, "loss": 0.7731, "step": 926 }, { "epoch": 0.16, "grad_norm": 0.7622351876585717, "learning_rate": 1.9094495792098874e-05, "loss": 0.7569, "step": 927 }, { "epoch": 0.16, "grad_norm": 0.7411815225959385, "learning_rate": 1.909213251197489e-05, "loss": 0.7893, "step": 928 }, { "epoch": 0.16, "grad_norm": 0.9021166604799222, "learning_rate": 1.908976629853948e-05, "loss": 0.7639, "step": 929 }, { "epoch": 0.16, "grad_norm": 0.7747097126805144, "learning_rate": 1.9087397152556043e-05, "loss": 0.7538, "step": 930 }, { "epoch": 0.16, "grad_norm": 0.7302733758959011, "learning_rate": 1.9085025074788906e-05, "loss": 0.7451, "step": 931 }, { "epoch": 0.16, "grad_norm": 0.6463356593459757, "learning_rate": 1.9082650066003354e-05, "loss": 0.7401, "step": 932 }, { "epoch": 0.16, "grad_norm": 0.6861210910823492, "learning_rate": 1.9080272126965615e-05, "loss": 0.7278, "step": 933 }, { "epoch": 0.16, "grad_norm": 0.7772105185097679, "learning_rate": 1.907789125844286e-05, "loss": 0.7591, "step": 934 }, { "epoch": 0.16, "grad_norm": 0.6987431579702154, "learning_rate": 1.9075507461203205e-05, "loss": 0.7307, "step": 935 }, { "epoch": 0.16, "grad_norm": 0.6962196728408497, "learning_rate": 1.9073120736015717e-05, "loss": 0.734, "step": 936 }, { "epoch": 0.16, "grad_norm": 0.7587350915361053, "learning_rate": 1.90707310836504e-05, "loss": 0.742, "step": 937 }, { "epoch": 0.16, "grad_norm": 0.6976547365783834, "learning_rate": 1.9068338504878212e-05, "loss": 0.7323, "step": 938 }, { "epoch": 0.16, "grad_norm": 0.6646864214774134, "learning_rate": 1.9065943000471043e-05, "loss": 0.723, "step": 939 }, { "epoch": 0.16, "grad_norm": 0.7176582209203917, "learning_rate": 1.9063544571201728e-05, "loss": 0.7376, "step": 940 }, { "epoch": 0.17, "grad_norm": 0.7522869789298734, "learning_rate": 1.906114321784406e-05, "loss": 0.7401, "step": 941 }, { "epoch": 0.17, "grad_norm": 0.7187092748948243, "learning_rate": 1.905873894117276e-05, "loss": 0.7341, "step": 942 }, { "epoch": 0.17, "grad_norm": 0.817231955034069, "learning_rate": 1.90563317419635e-05, "loss": 0.7004, "step": 943 }, { "epoch": 0.17, "grad_norm": 0.6429286435648299, "learning_rate": 1.9053921620992893e-05, "loss": 0.7156, "step": 944 }, { "epoch": 0.17, "grad_norm": 0.8984477121175869, "learning_rate": 1.905150857903849e-05, "loss": 0.7177, "step": 945 }, { "epoch": 0.17, "grad_norm": 0.7329864201047219, "learning_rate": 1.9049092616878796e-05, "loss": 0.7554, "step": 946 }, { "epoch": 0.17, "grad_norm": 0.8243376867286705, "learning_rate": 1.9046673735293243e-05, "loss": 0.7257, "step": 947 }, { "epoch": 0.17, "grad_norm": 0.7319216880636694, "learning_rate": 1.9044251935062214e-05, "loss": 0.7753, "step": 948 }, { "epoch": 0.17, "grad_norm": 0.9010190608851502, "learning_rate": 1.904182721696704e-05, "loss": 0.762, "step": 949 }, { "epoch": 0.17, "grad_norm": 0.677916422753305, "learning_rate": 1.9039399581789974e-05, "loss": 0.7002, "step": 950 }, { "epoch": 0.17, "grad_norm": 0.8427688153711639, "learning_rate": 1.903696903031423e-05, "loss": 0.7188, "step": 951 }, { "epoch": 0.17, "grad_norm": 0.7522658842867034, "learning_rate": 1.9034535563323952e-05, "loss": 0.7383, "step": 952 }, { "epoch": 0.17, "grad_norm": 0.8503630445241487, "learning_rate": 1.9032099181604227e-05, "loss": 0.7628, "step": 953 }, { "epoch": 0.17, "grad_norm": 0.5834438000726055, "learning_rate": 1.9029659885941087e-05, "loss": 0.6949, "step": 954 }, { "epoch": 0.17, "grad_norm": 0.7852526356256073, "learning_rate": 1.902721767712149e-05, "loss": 0.7271, "step": 955 }, { "epoch": 0.17, "grad_norm": 0.7826033250144177, "learning_rate": 1.902477255593335e-05, "loss": 0.7639, "step": 956 }, { "epoch": 0.17, "grad_norm": 0.6529926545591902, "learning_rate": 1.9022324523165514e-05, "loss": 0.7019, "step": 957 }, { "epoch": 0.17, "grad_norm": 0.7384990764356315, "learning_rate": 1.901987357960777e-05, "loss": 0.7142, "step": 958 }, { "epoch": 0.17, "grad_norm": 0.686698164827462, "learning_rate": 1.9017419726050836e-05, "loss": 0.7428, "step": 959 }, { "epoch": 0.17, "grad_norm": 0.8041961544156786, "learning_rate": 1.9014962963286382e-05, "loss": 0.7655, "step": 960 }, { "epoch": 0.17, "grad_norm": 0.770307268549736, "learning_rate": 1.9012503292107013e-05, "loss": 0.743, "step": 961 }, { "epoch": 0.17, "grad_norm": 0.756426893089863, "learning_rate": 1.9010040713306264e-05, "loss": 0.7684, "step": 962 }, { "epoch": 0.17, "grad_norm": 0.8025947695175756, "learning_rate": 1.9007575227678616e-05, "loss": 0.7751, "step": 963 }, { "epoch": 0.17, "grad_norm": 0.8518443881004355, "learning_rate": 1.9005106836019488e-05, "loss": 0.8192, "step": 964 }, { "epoch": 0.17, "grad_norm": 0.7801440564595743, "learning_rate": 1.9002635539125238e-05, "loss": 0.7254, "step": 965 }, { "epoch": 0.17, "grad_norm": 0.8276075912400426, "learning_rate": 1.900016133779315e-05, "loss": 0.7566, "step": 966 }, { "epoch": 0.17, "grad_norm": 0.7231568049127243, "learning_rate": 1.8997684232821458e-05, "loss": 0.7079, "step": 967 }, { "epoch": 0.17, "grad_norm": 1.0161463327612232, "learning_rate": 1.8995204225009324e-05, "loss": 0.804, "step": 968 }, { "epoch": 0.17, "grad_norm": 0.8081847931035346, "learning_rate": 1.8992721315156856e-05, "loss": 0.7402, "step": 969 }, { "epoch": 0.17, "grad_norm": 0.7394328229481172, "learning_rate": 1.8990235504065088e-05, "loss": 0.7361, "step": 970 }, { "epoch": 0.17, "grad_norm": 0.7510931496194859, "learning_rate": 1.8987746792535996e-05, "loss": 0.7567, "step": 971 }, { "epoch": 0.17, "grad_norm": 0.7698441318061824, "learning_rate": 1.898525518137249e-05, "loss": 0.7622, "step": 972 }, { "epoch": 0.17, "grad_norm": 0.7516001251616994, "learning_rate": 1.8982760671378414e-05, "loss": 0.7474, "step": 973 }, { "epoch": 0.17, "grad_norm": 0.7514682719257135, "learning_rate": 1.8980263263358553e-05, "loss": 0.7426, "step": 974 }, { "epoch": 0.17, "grad_norm": 0.8230886657040679, "learning_rate": 1.8977762958118617e-05, "loss": 0.7475, "step": 975 }, { "epoch": 0.17, "grad_norm": 0.7083414112539748, "learning_rate": 1.8975259756465263e-05, "loss": 0.7229, "step": 976 }, { "epoch": 0.17, "grad_norm": 0.8274865185364843, "learning_rate": 1.8972753659206068e-05, "loss": 0.7545, "step": 977 }, { "epoch": 0.17, "grad_norm": 0.6447154249448948, "learning_rate": 1.8970244667149564e-05, "loss": 0.6958, "step": 978 }, { "epoch": 0.17, "grad_norm": 0.8492939925579488, "learning_rate": 1.8967732781105187e-05, "loss": 0.7193, "step": 979 }, { "epoch": 0.17, "grad_norm": 0.870283062522345, "learning_rate": 1.896521800188334e-05, "loss": 0.7519, "step": 980 }, { "epoch": 0.17, "grad_norm": 0.7744260348816637, "learning_rate": 1.896270033029533e-05, "loss": 0.7154, "step": 981 }, { "epoch": 0.17, "grad_norm": 0.6813451466157958, "learning_rate": 1.896017976715342e-05, "loss": 0.7248, "step": 982 }, { "epoch": 0.17, "grad_norm": 0.9162455030471247, "learning_rate": 1.8957656313270795e-05, "loss": 0.7079, "step": 983 }, { "epoch": 0.17, "grad_norm": 0.6865379122114941, "learning_rate": 1.8955129969461565e-05, "loss": 0.726, "step": 984 }, { "epoch": 0.17, "grad_norm": 0.8292064838325677, "learning_rate": 1.8952600736540795e-05, "loss": 0.775, "step": 985 }, { "epoch": 0.17, "grad_norm": 0.7755617987802609, "learning_rate": 1.8950068615324455e-05, "loss": 0.7578, "step": 986 }, { "epoch": 0.17, "grad_norm": 0.8047560511183307, "learning_rate": 1.894753360662947e-05, "loss": 0.7652, "step": 987 }, { "epoch": 0.17, "grad_norm": 0.8945851859913141, "learning_rate": 1.894499571127368e-05, "loss": 0.7476, "step": 988 }, { "epoch": 0.17, "grad_norm": 0.6342112086924154, "learning_rate": 1.8942454930075865e-05, "loss": 0.7119, "step": 989 }, { "epoch": 0.17, "grad_norm": 0.7919195887711313, "learning_rate": 1.8939911263855735e-05, "loss": 0.6995, "step": 990 }, { "epoch": 0.17, "grad_norm": 0.7190522172356664, "learning_rate": 1.893736471343393e-05, "loss": 0.7791, "step": 991 }, { "epoch": 0.17, "grad_norm": 0.7601627989320285, "learning_rate": 1.8934815279632018e-05, "loss": 0.6913, "step": 992 }, { "epoch": 0.17, "grad_norm": 0.8591806248906775, "learning_rate": 1.8932262963272503e-05, "loss": 0.7168, "step": 993 }, { "epoch": 0.17, "grad_norm": 0.808678543129201, "learning_rate": 1.8929707765178812e-05, "loss": 0.7432, "step": 994 }, { "epoch": 0.17, "grad_norm": 0.8509940980601528, "learning_rate": 1.8927149686175307e-05, "loss": 0.7268, "step": 995 }, { "epoch": 0.17, "grad_norm": 0.7077077677884844, "learning_rate": 1.8924588727087277e-05, "loss": 0.7557, "step": 996 }, { "epoch": 0.17, "grad_norm": 0.8487266355769154, "learning_rate": 1.8922024888740943e-05, "loss": 0.7656, "step": 997 }, { "epoch": 0.17, "grad_norm": 0.8639486568099437, "learning_rate": 1.891945817196345e-05, "loss": 0.7415, "step": 998 }, { "epoch": 0.18, "grad_norm": 0.7587480718536903, "learning_rate": 1.8916888577582872e-05, "loss": 0.7527, "step": 999 }, { "epoch": 0.18, "grad_norm": 0.8199348371213521, "learning_rate": 1.8914316106428216e-05, "loss": 0.7245, "step": 1000 }, { "epoch": 0.18, "grad_norm": 0.6554188728525571, "learning_rate": 1.8911740759329418e-05, "loss": 0.7108, "step": 1001 }, { "epoch": 0.18, "grad_norm": 0.7689017669473027, "learning_rate": 1.8909162537117335e-05, "loss": 0.7544, "step": 1002 }, { "epoch": 0.18, "grad_norm": 0.8552415772067072, "learning_rate": 1.890658144062376e-05, "loss": 0.763, "step": 1003 }, { "epoch": 0.18, "grad_norm": 0.694806353649481, "learning_rate": 1.89039974706814e-05, "loss": 0.7659, "step": 1004 }, { "epoch": 0.18, "grad_norm": 0.7180637337651611, "learning_rate": 1.8901410628123898e-05, "loss": 0.6894, "step": 1005 }, { "epoch": 0.18, "grad_norm": 0.826263746783263, "learning_rate": 1.8898820913785836e-05, "loss": 0.7613, "step": 1006 }, { "epoch": 0.18, "grad_norm": 0.9093297638977191, "learning_rate": 1.8896228328502698e-05, "loss": 0.8418, "step": 1007 }, { "epoch": 0.18, "grad_norm": 0.8224237528482481, "learning_rate": 1.889363287311091e-05, "loss": 0.7317, "step": 1008 }, { "epoch": 0.18, "grad_norm": 0.7215284287695675, "learning_rate": 1.8891034548447818e-05, "loss": 0.7161, "step": 1009 }, { "epoch": 0.18, "grad_norm": 0.8085375144418608, "learning_rate": 1.8888433355351698e-05, "loss": 0.7952, "step": 1010 }, { "epoch": 0.18, "grad_norm": 0.6399051064975003, "learning_rate": 1.888582929466175e-05, "loss": 0.7051, "step": 1011 }, { "epoch": 0.18, "grad_norm": 0.9036463537607607, "learning_rate": 1.8883222367218096e-05, "loss": 0.7245, "step": 1012 }, { "epoch": 0.18, "grad_norm": 0.7778998486017087, "learning_rate": 1.8880612573861782e-05, "loss": 0.7716, "step": 1013 }, { "epoch": 0.18, "grad_norm": 0.724619401459826, "learning_rate": 1.887799991543479e-05, "loss": 0.7508, "step": 1014 }, { "epoch": 0.18, "grad_norm": 0.8688635413900918, "learning_rate": 1.8875384392780008e-05, "loss": 0.7864, "step": 1015 }, { "epoch": 0.18, "grad_norm": 0.7612905637798392, "learning_rate": 1.8872766006741268e-05, "loss": 0.6932, "step": 1016 }, { "epoch": 0.18, "grad_norm": 0.7140515559579382, "learning_rate": 1.8870144758163312e-05, "loss": 0.6759, "step": 1017 }, { "epoch": 0.18, "grad_norm": 0.8289097063133362, "learning_rate": 1.8867520647891805e-05, "loss": 0.7509, "step": 1018 }, { "epoch": 0.18, "grad_norm": 0.7657938277184583, "learning_rate": 1.8864893676773345e-05, "loss": 0.746, "step": 1019 }, { "epoch": 0.18, "grad_norm": 0.70392613763524, "learning_rate": 1.8862263845655447e-05, "loss": 0.7082, "step": 1020 }, { "epoch": 0.18, "grad_norm": 0.8238449752437228, "learning_rate": 1.8859631155386547e-05, "loss": 0.7405, "step": 1021 }, { "epoch": 0.18, "grad_norm": 0.8398859081436716, "learning_rate": 1.8856995606816008e-05, "loss": 0.7454, "step": 1022 }, { "epoch": 0.18, "grad_norm": 0.7732393324034595, "learning_rate": 1.8854357200794113e-05, "loss": 0.7521, "step": 1023 }, { "epoch": 0.18, "grad_norm": 0.7897478340880087, "learning_rate": 1.8851715938172065e-05, "loss": 0.7329, "step": 1024 }, { "epoch": 0.18, "grad_norm": 0.7988683864649565, "learning_rate": 1.8849071819801993e-05, "loss": 0.7469, "step": 1025 }, { "epoch": 0.18, "grad_norm": 0.7850658366065575, "learning_rate": 1.884642484653694e-05, "loss": 0.754, "step": 1026 }, { "epoch": 0.18, "grad_norm": 0.7746931234387092, "learning_rate": 1.884377501923088e-05, "loss": 0.7387, "step": 1027 }, { "epoch": 0.18, "grad_norm": 0.6594406892901932, "learning_rate": 1.8841122338738696e-05, "loss": 0.6995, "step": 1028 }, { "epoch": 0.18, "grad_norm": 0.7096631281418979, "learning_rate": 1.8838466805916206e-05, "loss": 0.6991, "step": 1029 }, { "epoch": 0.18, "grad_norm": 0.7052926059592697, "learning_rate": 1.8835808421620135e-05, "loss": 0.727, "step": 1030 }, { "epoch": 0.18, "grad_norm": 0.7320198293043083, "learning_rate": 1.8833147186708135e-05, "loss": 0.6832, "step": 1031 }, { "epoch": 0.18, "grad_norm": 0.7929733699258934, "learning_rate": 1.8830483102038775e-05, "loss": 0.7304, "step": 1032 }, { "epoch": 0.18, "grad_norm": 0.826544873404198, "learning_rate": 1.8827816168471543e-05, "loss": 0.7479, "step": 1033 }, { "epoch": 0.18, "grad_norm": 0.7401731442335532, "learning_rate": 1.8825146386866852e-05, "loss": 0.7654, "step": 1034 }, { "epoch": 0.18, "grad_norm": 0.75880469066902, "learning_rate": 1.8822473758086025e-05, "loss": 0.7717, "step": 1035 }, { "epoch": 0.18, "grad_norm": 0.7995993208437712, "learning_rate": 1.8819798282991306e-05, "loss": 0.7701, "step": 1036 }, { "epoch": 0.18, "grad_norm": 0.8781634256468713, "learning_rate": 1.8817119962445867e-05, "loss": 0.8118, "step": 1037 }, { "epoch": 0.18, "grad_norm": 0.6775719829055298, "learning_rate": 1.881443879731378e-05, "loss": 0.709, "step": 1038 }, { "epoch": 0.18, "grad_norm": 0.7773378792412322, "learning_rate": 1.8811754788460052e-05, "loss": 0.7664, "step": 1039 }, { "epoch": 0.18, "grad_norm": 0.8004800357283477, "learning_rate": 1.88090679367506e-05, "loss": 0.7229, "step": 1040 }, { "epoch": 0.18, "grad_norm": 0.790520995350165, "learning_rate": 1.8806378243052252e-05, "loss": 0.7731, "step": 1041 }, { "epoch": 0.18, "grad_norm": 0.7282058318688287, "learning_rate": 1.8803685708232765e-05, "loss": 0.729, "step": 1042 }, { "epoch": 0.18, "grad_norm": 0.687385947363831, "learning_rate": 1.8800990333160807e-05, "loss": 0.7423, "step": 1043 }, { "epoch": 0.18, "grad_norm": 0.7763744055278012, "learning_rate": 1.879829211870596e-05, "loss": 0.7366, "step": 1044 }, { "epoch": 0.18, "grad_norm": 0.7586032142231582, "learning_rate": 1.8795591065738724e-05, "loss": 0.7551, "step": 1045 }, { "epoch": 0.18, "grad_norm": 0.8203984806213649, "learning_rate": 1.8792887175130515e-05, "loss": 0.7729, "step": 1046 }, { "epoch": 0.18, "grad_norm": 0.7146994089510392, "learning_rate": 1.8790180447753665e-05, "loss": 0.7545, "step": 1047 }, { "epoch": 0.18, "grad_norm": 1.005920316434439, "learning_rate": 1.878747088448142e-05, "loss": 0.829, "step": 1048 }, { "epoch": 0.18, "grad_norm": 0.7466283309060033, "learning_rate": 1.8784758486187945e-05, "loss": 0.7199, "step": 1049 }, { "epoch": 0.18, "grad_norm": 0.8340868543267653, "learning_rate": 1.878204325374831e-05, "loss": 0.7505, "step": 1050 }, { "epoch": 0.18, "grad_norm": 0.7217488281059433, "learning_rate": 1.8779325188038512e-05, "loss": 0.7731, "step": 1051 }, { "epoch": 0.18, "grad_norm": 0.8272923207399109, "learning_rate": 1.8776604289935454e-05, "loss": 0.7131, "step": 1052 }, { "epoch": 0.18, "grad_norm": 0.7986994266661872, "learning_rate": 1.877388056031695e-05, "loss": 0.8062, "step": 1053 }, { "epoch": 0.18, "grad_norm": 0.9742922415213283, "learning_rate": 1.8771154000061737e-05, "loss": 0.8021, "step": 1054 }, { "epoch": 0.18, "grad_norm": 0.795567333161792, "learning_rate": 1.876842461004946e-05, "loss": 0.7638, "step": 1055 }, { "epoch": 0.19, "grad_norm": 0.7445791266647259, "learning_rate": 1.876569239116067e-05, "loss": 0.773, "step": 1056 }, { "epoch": 0.19, "grad_norm": 0.7498335542901929, "learning_rate": 1.8762957344276845e-05, "loss": 0.7432, "step": 1057 }, { "epoch": 0.19, "grad_norm": 0.8128771880420195, "learning_rate": 1.876021947028037e-05, "loss": 0.7516, "step": 1058 }, { "epoch": 0.19, "grad_norm": 0.9884523731944245, "learning_rate": 1.875747877005453e-05, "loss": 0.7271, "step": 1059 }, { "epoch": 0.19, "grad_norm": 0.7142486526605412, "learning_rate": 1.8754735244483545e-05, "loss": 0.7511, "step": 1060 }, { "epoch": 0.19, "grad_norm": 0.7525881845004174, "learning_rate": 1.8751988894452525e-05, "loss": 0.7397, "step": 1061 }, { "epoch": 0.19, "grad_norm": 0.6596001478219687, "learning_rate": 1.8749239720847498e-05, "loss": 0.7065, "step": 1062 }, { "epoch": 0.19, "grad_norm": 0.7291042821195997, "learning_rate": 1.874648772455541e-05, "loss": 0.7179, "step": 1063 }, { "epoch": 0.19, "grad_norm": 0.8321581029258853, "learning_rate": 1.8743732906464115e-05, "loss": 0.8006, "step": 1064 }, { "epoch": 0.19, "grad_norm": 0.7854433987044337, "learning_rate": 1.8740975267462366e-05, "loss": 0.734, "step": 1065 }, { "epoch": 0.19, "grad_norm": 0.8379442188587956, "learning_rate": 1.873821480843984e-05, "loss": 0.7636, "step": 1066 }, { "epoch": 0.19, "grad_norm": 0.7220817863154819, "learning_rate": 1.873545153028712e-05, "loss": 0.7164, "step": 1067 }, { "epoch": 0.19, "grad_norm": 0.8612358088199471, "learning_rate": 1.8732685433895694e-05, "loss": 0.7745, "step": 1068 }, { "epoch": 0.19, "grad_norm": 0.8342722027912547, "learning_rate": 1.872991652015796e-05, "loss": 0.758, "step": 1069 }, { "epoch": 0.19, "grad_norm": 0.7561759379402743, "learning_rate": 1.872714478996724e-05, "loss": 0.7261, "step": 1070 }, { "epoch": 0.19, "grad_norm": 0.7904136222151001, "learning_rate": 1.8724370244217733e-05, "loss": 0.7179, "step": 1071 }, { "epoch": 0.19, "grad_norm": 0.7788415420297166, "learning_rate": 1.8721592883804577e-05, "loss": 0.7668, "step": 1072 }, { "epoch": 0.19, "grad_norm": 0.7002275265438141, "learning_rate": 1.8718812709623807e-05, "loss": 0.735, "step": 1073 }, { "epoch": 0.19, "grad_norm": 0.7866693478140266, "learning_rate": 1.871602972257236e-05, "loss": 0.7653, "step": 1074 }, { "epoch": 0.19, "grad_norm": 0.7045143041547405, "learning_rate": 1.871324392354809e-05, "loss": 0.7204, "step": 1075 }, { "epoch": 0.19, "grad_norm": 0.7176371337372711, "learning_rate": 1.871045531344975e-05, "loss": 0.7071, "step": 1076 }, { "epoch": 0.19, "grad_norm": 0.6704843849536632, "learning_rate": 1.8707663893177008e-05, "loss": 0.6973, "step": 1077 }, { "epoch": 0.19, "grad_norm": 0.8169559515511249, "learning_rate": 1.870486966363043e-05, "loss": 0.762, "step": 1078 }, { "epoch": 0.19, "grad_norm": 0.6835049487325052, "learning_rate": 1.870207262571149e-05, "loss": 0.73, "step": 1079 }, { "epoch": 0.19, "grad_norm": 0.9193107320053966, "learning_rate": 1.8699272780322583e-05, "loss": 0.7957, "step": 1080 }, { "epoch": 0.19, "grad_norm": 0.791157337891335, "learning_rate": 1.8696470128366987e-05, "loss": 0.7754, "step": 1081 }, { "epoch": 0.19, "grad_norm": 0.7713739099587669, "learning_rate": 1.8693664670748896e-05, "loss": 0.7676, "step": 1082 }, { "epoch": 0.19, "grad_norm": 0.6775678001624642, "learning_rate": 1.8690856408373416e-05, "loss": 0.7212, "step": 1083 }, { "epoch": 0.19, "grad_norm": 0.7635197377519451, "learning_rate": 1.8688045342146543e-05, "loss": 0.7497, "step": 1084 }, { "epoch": 0.19, "grad_norm": 0.7261459214377627, "learning_rate": 1.8685231472975188e-05, "loss": 0.7682, "step": 1085 }, { "epoch": 0.19, "grad_norm": 0.7492633144516981, "learning_rate": 1.868241480176717e-05, "loss": 0.7582, "step": 1086 }, { "epoch": 0.19, "grad_norm": 0.7480154425531788, "learning_rate": 1.86795953294312e-05, "loss": 0.7322, "step": 1087 }, { "epoch": 0.19, "grad_norm": 0.7796121705441973, "learning_rate": 1.8676773056876897e-05, "loss": 0.768, "step": 1088 }, { "epoch": 0.19, "grad_norm": 0.7138960968376197, "learning_rate": 1.867394798501479e-05, "loss": 0.7124, "step": 1089 }, { "epoch": 0.19, "grad_norm": 0.8954399765973846, "learning_rate": 1.86711201147563e-05, "loss": 0.7162, "step": 1090 }, { "epoch": 0.19, "grad_norm": 0.7287715745907276, "learning_rate": 1.8668289447013763e-05, "loss": 0.7853, "step": 1091 }, { "epoch": 0.19, "grad_norm": 0.6790889970593916, "learning_rate": 1.8665455982700408e-05, "loss": 0.69, "step": 1092 }, { "epoch": 0.19, "grad_norm": 0.72421883581688, "learning_rate": 1.866261972273037e-05, "loss": 0.7284, "step": 1093 }, { "epoch": 0.19, "grad_norm": 0.7155023886217237, "learning_rate": 1.8659780668018686e-05, "loss": 0.7432, "step": 1094 }, { "epoch": 0.19, "grad_norm": 0.676722533101432, "learning_rate": 1.8656938819481295e-05, "loss": 0.674, "step": 1095 }, { "epoch": 0.19, "grad_norm": 0.7895272299387159, "learning_rate": 1.8654094178035035e-05, "loss": 0.7536, "step": 1096 }, { "epoch": 0.19, "grad_norm": 0.7269624511037626, "learning_rate": 1.865124674459765e-05, "loss": 0.7035, "step": 1097 }, { "epoch": 0.19, "grad_norm": 0.8212924929874763, "learning_rate": 1.8648396520087772e-05, "loss": 0.7273, "step": 1098 }, { "epoch": 0.19, "grad_norm": 0.8670277069807177, "learning_rate": 1.8645543505424953e-05, "loss": 0.7905, "step": 1099 }, { "epoch": 0.19, "grad_norm": 0.6688313434095239, "learning_rate": 1.8642687701529632e-05, "loss": 0.683, "step": 1100 }, { "epoch": 0.19, "grad_norm": 0.7433830345674834, "learning_rate": 1.863982910932315e-05, "loss": 0.743, "step": 1101 }, { "epoch": 0.19, "grad_norm": 0.7086023983734153, "learning_rate": 1.8636967729727752e-05, "loss": 0.6822, "step": 1102 }, { "epoch": 0.19, "grad_norm": 0.7920437851347683, "learning_rate": 1.8634103563666572e-05, "loss": 0.7902, "step": 1103 }, { "epoch": 0.19, "grad_norm": 0.6780854996028355, "learning_rate": 1.8631236612063654e-05, "loss": 0.7543, "step": 1104 }, { "epoch": 0.19, "grad_norm": 0.7403138659684492, "learning_rate": 1.8628366875843932e-05, "loss": 0.7662, "step": 1105 }, { "epoch": 0.19, "grad_norm": 0.7966639567611423, "learning_rate": 1.862549435593325e-05, "loss": 0.7237, "step": 1106 }, { "epoch": 0.19, "grad_norm": 0.7267141062375568, "learning_rate": 1.862261905325834e-05, "loss": 0.7669, "step": 1107 }, { "epoch": 0.19, "grad_norm": 0.8044752044740007, "learning_rate": 1.861974096874683e-05, "loss": 0.7442, "step": 1108 }, { "epoch": 0.19, "grad_norm": 0.7653869278481256, "learning_rate": 1.8616860103327257e-05, "loss": 0.7685, "step": 1109 }, { "epoch": 0.19, "grad_norm": 0.7153680352220333, "learning_rate": 1.8613976457929043e-05, "loss": 0.7186, "step": 1110 }, { "epoch": 0.19, "grad_norm": 0.784961117091671, "learning_rate": 1.8611090033482516e-05, "loss": 0.7517, "step": 1111 }, { "epoch": 0.19, "grad_norm": 0.8872916770530554, "learning_rate": 1.860820083091889e-05, "loss": 0.7416, "step": 1112 }, { "epoch": 0.2, "grad_norm": 0.7724169899398557, "learning_rate": 1.8605308851170288e-05, "loss": 0.7431, "step": 1113 }, { "epoch": 0.2, "grad_norm": 0.865956376166026, "learning_rate": 1.8602414095169723e-05, "loss": 0.7552, "step": 1114 }, { "epoch": 0.2, "grad_norm": 1.5111325096346635, "learning_rate": 1.85995165638511e-05, "loss": 0.7324, "step": 1115 }, { "epoch": 0.2, "grad_norm": 0.8000503097362549, "learning_rate": 1.8596616258149226e-05, "loss": 0.7333, "step": 1116 }, { "epoch": 0.2, "grad_norm": 0.79382718885793, "learning_rate": 1.85937131789998e-05, "loss": 0.7751, "step": 1117 }, { "epoch": 0.2, "grad_norm": 0.7975521406987497, "learning_rate": 1.8590807327339414e-05, "loss": 0.7356, "step": 1118 }, { "epoch": 0.2, "grad_norm": 0.7356012933174411, "learning_rate": 1.8587898704105556e-05, "loss": 0.7393, "step": 1119 }, { "epoch": 0.2, "grad_norm": 0.9600454055332855, "learning_rate": 1.8584987310236612e-05, "loss": 0.7722, "step": 1120 }, { "epoch": 0.2, "grad_norm": 0.9138688064164845, "learning_rate": 1.8582073146671855e-05, "loss": 0.8032, "step": 1121 }, { "epoch": 0.2, "grad_norm": 0.8574357712093279, "learning_rate": 1.8579156214351458e-05, "loss": 0.7711, "step": 1122 }, { "epoch": 0.2, "grad_norm": 0.7506882538445572, "learning_rate": 1.8576236514216476e-05, "loss": 0.7749, "step": 1123 }, { "epoch": 0.2, "grad_norm": 0.8058908461813106, "learning_rate": 1.857331404720888e-05, "loss": 0.7361, "step": 1124 }, { "epoch": 0.2, "grad_norm": 0.7750591484505364, "learning_rate": 1.8570388814271503e-05, "loss": 0.7341, "step": 1125 }, { "epoch": 0.2, "grad_norm": 0.7548345996859028, "learning_rate": 1.8567460816348097e-05, "loss": 0.7628, "step": 1126 }, { "epoch": 0.2, "grad_norm": 0.7049993230135037, "learning_rate": 1.8564530054383286e-05, "loss": 0.7045, "step": 1127 }, { "epoch": 0.2, "grad_norm": 0.7534955822899544, "learning_rate": 1.8561596529322603e-05, "loss": 0.7682, "step": 1128 }, { "epoch": 0.2, "grad_norm": 0.8468395611674842, "learning_rate": 1.855866024211246e-05, "loss": 0.7359, "step": 1129 }, { "epoch": 0.2, "grad_norm": 0.7410208670381517, "learning_rate": 1.8555721193700172e-05, "loss": 0.725, "step": 1130 }, { "epoch": 0.2, "grad_norm": 0.9523654290676339, "learning_rate": 1.8552779385033926e-05, "loss": 0.8114, "step": 1131 }, { "epoch": 0.2, "grad_norm": 0.9090904513328822, "learning_rate": 1.8549834817062814e-05, "loss": 0.7244, "step": 1132 }, { "epoch": 0.2, "grad_norm": 0.742170646651039, "learning_rate": 1.8546887490736823e-05, "loss": 0.7663, "step": 1133 }, { "epoch": 0.2, "grad_norm": 0.7670274170420512, "learning_rate": 1.854393740700681e-05, "loss": 0.7728, "step": 1134 }, { "epoch": 0.2, "grad_norm": 0.7525763697658396, "learning_rate": 1.8540984566824542e-05, "loss": 0.7264, "step": 1135 }, { "epoch": 0.2, "grad_norm": 0.8325851286588626, "learning_rate": 1.8538028971142662e-05, "loss": 0.7504, "step": 1136 }, { "epoch": 0.2, "grad_norm": 0.7178448566967377, "learning_rate": 1.8535070620914713e-05, "loss": 0.6925, "step": 1137 }, { "epoch": 0.2, "grad_norm": 0.8167619808334763, "learning_rate": 1.853210951709511e-05, "loss": 0.7542, "step": 1138 }, { "epoch": 0.2, "grad_norm": 0.7675766657035403, "learning_rate": 1.8529145660639178e-05, "loss": 0.7534, "step": 1139 }, { "epoch": 0.2, "grad_norm": 0.7520300964094615, "learning_rate": 1.8526179052503112e-05, "loss": 0.762, "step": 1140 }, { "epoch": 0.2, "grad_norm": 0.7600231379061789, "learning_rate": 1.8523209693644002e-05, "loss": 0.7255, "step": 1141 }, { "epoch": 0.2, "grad_norm": 0.7922333456566035, "learning_rate": 1.852023758501983e-05, "loss": 0.7528, "step": 1142 }, { "epoch": 0.2, "grad_norm": 0.6610261135108605, "learning_rate": 1.8517262727589456e-05, "loss": 0.7104, "step": 1143 }, { "epoch": 0.2, "grad_norm": 0.739445347299883, "learning_rate": 1.8514285122312627e-05, "loss": 0.7722, "step": 1144 }, { "epoch": 0.2, "grad_norm": 0.7562998344550865, "learning_rate": 1.851130477014999e-05, "loss": 0.7777, "step": 1145 }, { "epoch": 0.2, "grad_norm": 0.7957853276525217, "learning_rate": 1.8508321672063063e-05, "loss": 0.7827, "step": 1146 }, { "epoch": 0.2, "grad_norm": 0.8236704432229167, "learning_rate": 1.8505335829014255e-05, "loss": 0.7696, "step": 1147 }, { "epoch": 0.2, "grad_norm": 0.6860995542674312, "learning_rate": 1.8502347241966867e-05, "loss": 0.726, "step": 1148 }, { "epoch": 0.2, "grad_norm": 0.6864653913119477, "learning_rate": 1.849935591188507e-05, "loss": 0.7276, "step": 1149 }, { "epoch": 0.2, "grad_norm": 0.7427981296847243, "learning_rate": 1.8496361839733937e-05, "loss": 0.7865, "step": 1150 }, { "epoch": 0.2, "grad_norm": 0.8146723224369857, "learning_rate": 1.8493365026479418e-05, "loss": 0.7273, "step": 1151 }, { "epoch": 0.2, "grad_norm": 0.7931644103464452, "learning_rate": 1.8490365473088344e-05, "loss": 0.7112, "step": 1152 }, { "epoch": 0.2, "grad_norm": 0.719571154541641, "learning_rate": 1.848736318052843e-05, "loss": 0.728, "step": 1153 }, { "epoch": 0.2, "grad_norm": 0.7639824329665406, "learning_rate": 1.8484358149768287e-05, "loss": 0.7202, "step": 1154 }, { "epoch": 0.2, "grad_norm": 0.8393648200975895, "learning_rate": 1.8481350381777396e-05, "loss": 0.7926, "step": 1155 }, { "epoch": 0.2, "grad_norm": 0.8551869889945061, "learning_rate": 1.8478339877526122e-05, "loss": 0.7945, "step": 1156 }, { "epoch": 0.2, "grad_norm": 0.7841890578224007, "learning_rate": 1.8475326637985724e-05, "loss": 0.7554, "step": 1157 }, { "epoch": 0.2, "grad_norm": 0.8356327319823276, "learning_rate": 1.8472310664128332e-05, "loss": 0.7564, "step": 1158 }, { "epoch": 0.2, "grad_norm": 0.700222506613228, "learning_rate": 1.8469291956926963e-05, "loss": 0.733, "step": 1159 }, { "epoch": 0.2, "grad_norm": 0.7847504540165661, "learning_rate": 1.8466270517355508e-05, "loss": 0.7226, "step": 1160 }, { "epoch": 0.2, "grad_norm": 0.7415664534788005, "learning_rate": 1.846324634638876e-05, "loss": 0.6755, "step": 1161 }, { "epoch": 0.2, "grad_norm": 0.838838142120453, "learning_rate": 1.8460219445002367e-05, "loss": 0.8006, "step": 1162 }, { "epoch": 0.2, "grad_norm": 0.9562356493422661, "learning_rate": 1.8457189814172877e-05, "loss": 0.8256, "step": 1163 }, { "epoch": 0.2, "grad_norm": 0.7547285365755554, "learning_rate": 1.8454157454877712e-05, "loss": 0.7198, "step": 1164 }, { "epoch": 0.2, "grad_norm": 0.860736105722227, "learning_rate": 1.8451122368095173e-05, "loss": 0.7562, "step": 1165 }, { "epoch": 0.2, "grad_norm": 1.0339469658980582, "learning_rate": 1.844808455480444e-05, "loss": 0.7283, "step": 1166 }, { "epoch": 0.2, "grad_norm": 0.8365158191052576, "learning_rate": 1.8445044015985576e-05, "loss": 0.7544, "step": 1167 }, { "epoch": 0.2, "grad_norm": 0.7373342235515724, "learning_rate": 1.8442000752619525e-05, "loss": 0.7384, "step": 1168 }, { "epoch": 0.2, "grad_norm": 0.8023254237608446, "learning_rate": 1.8438954765688103e-05, "loss": 0.7475, "step": 1169 }, { "epoch": 0.21, "grad_norm": 0.7460343740361145, "learning_rate": 1.8435906056174016e-05, "loss": 0.7566, "step": 1170 }, { "epoch": 0.21, "grad_norm": 0.7281426639780573, "learning_rate": 1.8432854625060833e-05, "loss": 0.7893, "step": 1171 }, { "epoch": 0.21, "grad_norm": 0.7692775259693888, "learning_rate": 1.842980047333301e-05, "loss": 0.7277, "step": 1172 }, { "epoch": 0.21, "grad_norm": 0.73906504565724, "learning_rate": 1.8426743601975885e-05, "loss": 0.7329, "step": 1173 }, { "epoch": 0.21, "grad_norm": 0.663705456260769, "learning_rate": 1.8423684011975668e-05, "loss": 0.6882, "step": 1174 }, { "epoch": 0.21, "grad_norm": 0.7490089205498403, "learning_rate": 1.842062170431944e-05, "loss": 0.7155, "step": 1175 }, { "epoch": 0.21, "grad_norm": 0.6991318873470129, "learning_rate": 1.8417556679995175e-05, "loss": 0.7309, "step": 1176 }, { "epoch": 0.21, "grad_norm": 0.9019826167082213, "learning_rate": 1.8414488939991705e-05, "loss": 0.7056, "step": 1177 }, { "epoch": 0.21, "grad_norm": 0.7293755990544968, "learning_rate": 1.8411418485298752e-05, "loss": 0.757, "step": 1178 }, { "epoch": 0.21, "grad_norm": 0.73572001045144, "learning_rate": 1.8408345316906906e-05, "loss": 0.7124, "step": 1179 }, { "epoch": 0.21, "grad_norm": 0.8321679851581225, "learning_rate": 1.8405269435807637e-05, "loss": 0.8055, "step": 1180 }, { "epoch": 0.21, "grad_norm": 0.8579398581447657, "learning_rate": 1.8402190842993285e-05, "loss": 0.7264, "step": 1181 }, { "epoch": 0.21, "grad_norm": 0.7649053687704958, "learning_rate": 1.8399109539457072e-05, "loss": 0.7407, "step": 1182 }, { "epoch": 0.21, "grad_norm": 0.6603645165421631, "learning_rate": 1.8396025526193092e-05, "loss": 0.6866, "step": 1183 }, { "epoch": 0.21, "grad_norm": 0.7508880388119596, "learning_rate": 1.8392938804196308e-05, "loss": 0.8138, "step": 1184 }, { "epoch": 0.21, "grad_norm": 0.6841239052541156, "learning_rate": 1.8389849374462558e-05, "loss": 0.7498, "step": 1185 }, { "epoch": 0.21, "grad_norm": 0.8019286178727234, "learning_rate": 1.8386757237988568e-05, "loss": 0.7763, "step": 1186 }, { "epoch": 0.21, "grad_norm": 0.7142250087634718, "learning_rate": 1.8383662395771914e-05, "loss": 0.7828, "step": 1187 }, { "epoch": 0.21, "grad_norm": 0.7451091230936435, "learning_rate": 1.838056484881106e-05, "loss": 0.7623, "step": 1188 }, { "epoch": 0.21, "grad_norm": 0.7423323690566539, "learning_rate": 1.8377464598105342e-05, "loss": 0.734, "step": 1189 }, { "epoch": 0.21, "grad_norm": 0.7911803095528491, "learning_rate": 1.8374361644654965e-05, "loss": 0.7522, "step": 1190 }, { "epoch": 0.21, "grad_norm": 0.7021884025086534, "learning_rate": 1.8371255989461e-05, "loss": 0.7051, "step": 1191 }, { "epoch": 0.21, "grad_norm": 0.6978594144254934, "learning_rate": 1.8368147633525404e-05, "loss": 0.7576, "step": 1192 }, { "epoch": 0.21, "grad_norm": 0.8443328888376684, "learning_rate": 1.8365036577850996e-05, "loss": 0.7272, "step": 1193 }, { "epoch": 0.21, "grad_norm": 0.7399638278436399, "learning_rate": 1.8361922823441465e-05, "loss": 0.7155, "step": 1194 }, { "epoch": 0.21, "grad_norm": 0.8317299455036548, "learning_rate": 1.8358806371301372e-05, "loss": 0.773, "step": 1195 }, { "epoch": 0.21, "grad_norm": 0.7335366996803406, "learning_rate": 1.8355687222436157e-05, "loss": 0.7264, "step": 1196 }, { "epoch": 0.21, "grad_norm": 0.6532243860511874, "learning_rate": 1.835256537785211e-05, "loss": 0.7334, "step": 1197 }, { "epoch": 0.21, "grad_norm": 0.7434833523532612, "learning_rate": 1.8349440838556414e-05, "loss": 0.7745, "step": 1198 }, { "epoch": 0.21, "grad_norm": 0.7836879233659609, "learning_rate": 1.8346313605557105e-05, "loss": 0.6887, "step": 1199 }, { "epoch": 0.21, "grad_norm": 0.7479406901686436, "learning_rate": 1.8343183679863095e-05, "loss": 0.7431, "step": 1200 }, { "epoch": 0.21, "grad_norm": 0.6957101022413071, "learning_rate": 1.8340051062484164e-05, "loss": 0.7502, "step": 1201 }, { "epoch": 0.21, "grad_norm": 0.9097577767285466, "learning_rate": 1.8336915754430957e-05, "loss": 0.7794, "step": 1202 }, { "epoch": 0.21, "grad_norm": 0.8404625268273798, "learning_rate": 1.833377775671499e-05, "loss": 0.8093, "step": 1203 }, { "epoch": 0.21, "grad_norm": 0.7254139083492468, "learning_rate": 1.8330637070348653e-05, "loss": 0.7156, "step": 1204 }, { "epoch": 0.21, "grad_norm": 0.8504067950454907, "learning_rate": 1.8327493696345187e-05, "loss": 0.7054, "step": 1205 }, { "epoch": 0.21, "grad_norm": 0.8155655766315796, "learning_rate": 1.832434763571872e-05, "loss": 0.8113, "step": 1206 }, { "epoch": 0.21, "grad_norm": 0.8158315188993155, "learning_rate": 1.8321198889484227e-05, "loss": 0.746, "step": 1207 }, { "epoch": 0.21, "grad_norm": 0.7111083972240433, "learning_rate": 1.831804745865757e-05, "loss": 0.7433, "step": 1208 }, { "epoch": 0.21, "grad_norm": 0.8803373109402814, "learning_rate": 1.8314893344255454e-05, "loss": 0.7501, "step": 1209 }, { "epoch": 0.21, "grad_norm": 0.7776113912372178, "learning_rate": 1.831173654729547e-05, "loss": 0.7531, "step": 1210 }, { "epoch": 0.21, "grad_norm": 0.6929440798891552, "learning_rate": 1.8308577068796067e-05, "loss": 0.7107, "step": 1211 }, { "epoch": 0.21, "grad_norm": 0.7199069168372261, "learning_rate": 1.8305414909776548e-05, "loss": 0.757, "step": 1212 }, { "epoch": 0.21, "grad_norm": 0.6829538620226446, "learning_rate": 1.8302250071257107e-05, "loss": 0.7262, "step": 1213 }, { "epoch": 0.21, "grad_norm": 0.8213791813083442, "learning_rate": 1.8299082554258777e-05, "loss": 0.7735, "step": 1214 }, { "epoch": 0.21, "grad_norm": 0.724535308994745, "learning_rate": 1.8295912359803467e-05, "loss": 0.7113, "step": 1215 }, { "epoch": 0.21, "grad_norm": 0.7992369518707358, "learning_rate": 1.8292739488913948e-05, "loss": 0.7807, "step": 1216 }, { "epoch": 0.21, "grad_norm": 0.7282972294964696, "learning_rate": 1.8289563942613857e-05, "loss": 0.7706, "step": 1217 }, { "epoch": 0.21, "grad_norm": 0.6611005327968683, "learning_rate": 1.8286385721927686e-05, "loss": 0.6926, "step": 1218 }, { "epoch": 0.21, "grad_norm": 0.7397523476002679, "learning_rate": 1.82832048278808e-05, "loss": 0.7367, "step": 1219 }, { "epoch": 0.21, "grad_norm": 0.7538518709455814, "learning_rate": 1.828002126149942e-05, "loss": 0.7563, "step": 1220 }, { "epoch": 0.21, "grad_norm": 0.7669858847343568, "learning_rate": 1.8276835023810625e-05, "loss": 0.7093, "step": 1221 }, { "epoch": 0.21, "grad_norm": 0.7587673947430758, "learning_rate": 1.8273646115842374e-05, "loss": 0.7583, "step": 1222 }, { "epoch": 0.21, "grad_norm": 0.6848040767252197, "learning_rate": 1.8270454538623465e-05, "loss": 0.7513, "step": 1223 }, { "epoch": 0.21, "grad_norm": 0.7194737494867387, "learning_rate": 1.8267260293183573e-05, "loss": 0.7085, "step": 1224 }, { "epoch": 0.21, "grad_norm": 0.7182178069618557, "learning_rate": 1.8264063380553227e-05, "loss": 0.7433, "step": 1225 }, { "epoch": 0.21, "grad_norm": 0.7264556909933896, "learning_rate": 1.8260863801763812e-05, "loss": 0.7689, "step": 1226 }, { "epoch": 0.22, "grad_norm": 0.705776808918832, "learning_rate": 1.8257661557847588e-05, "loss": 0.7526, "step": 1227 }, { "epoch": 0.22, "grad_norm": 0.7692923025587816, "learning_rate": 1.825445664983766e-05, "loss": 0.7454, "step": 1228 }, { "epoch": 0.22, "grad_norm": 0.7301198095985111, "learning_rate": 1.8251249078767996e-05, "loss": 0.7407, "step": 1229 }, { "epoch": 0.22, "grad_norm": 0.6549011105365348, "learning_rate": 1.824803884567343e-05, "loss": 0.7444, "step": 1230 }, { "epoch": 0.22, "grad_norm": 0.8004584411844881, "learning_rate": 1.8244825951589648e-05, "loss": 0.7617, "step": 1231 }, { "epoch": 0.22, "grad_norm": 0.7087208375381548, "learning_rate": 1.82416103975532e-05, "loss": 0.7072, "step": 1232 }, { "epoch": 0.22, "grad_norm": 0.7180301320674589, "learning_rate": 1.8238392184601483e-05, "loss": 0.7287, "step": 1233 }, { "epoch": 0.22, "grad_norm": 0.7651916536351687, "learning_rate": 1.823517131377277e-05, "loss": 0.7765, "step": 1234 }, { "epoch": 0.22, "grad_norm": 0.7285825654059312, "learning_rate": 1.823194778610617e-05, "loss": 0.7108, "step": 1235 }, { "epoch": 0.22, "grad_norm": 0.8770598659240827, "learning_rate": 1.822872160264167e-05, "loss": 0.7912, "step": 1236 }, { "epoch": 0.22, "grad_norm": 0.9252546127532932, "learning_rate": 1.8225492764420098e-05, "loss": 0.7516, "step": 1237 }, { "epoch": 0.22, "grad_norm": 0.7622263116740613, "learning_rate": 1.8222261272483143e-05, "loss": 0.7117, "step": 1238 }, { "epoch": 0.22, "grad_norm": 0.7626335308570392, "learning_rate": 1.821902712787336e-05, "loss": 0.732, "step": 1239 }, { "epoch": 0.22, "grad_norm": 1.2319612251373464, "learning_rate": 1.8215790331634143e-05, "loss": 0.7203, "step": 1240 }, { "epoch": 0.22, "grad_norm": 0.8015420185275878, "learning_rate": 1.8212550884809755e-05, "loss": 0.7267, "step": 1241 }, { "epoch": 0.22, "grad_norm": 0.7937391439948012, "learning_rate": 1.8209308788445305e-05, "loss": 0.757, "step": 1242 }, { "epoch": 0.22, "grad_norm": 0.8503042426536581, "learning_rate": 1.8206064043586764e-05, "loss": 0.7347, "step": 1243 }, { "epoch": 0.22, "grad_norm": 0.7521065848829939, "learning_rate": 1.8202816651280955e-05, "loss": 0.7715, "step": 1244 }, { "epoch": 0.22, "grad_norm": 0.7950547015282948, "learning_rate": 1.819956661257555e-05, "loss": 0.7483, "step": 1245 }, { "epoch": 0.22, "grad_norm": 0.7604698733764718, "learning_rate": 1.819631392851908e-05, "loss": 0.756, "step": 1246 }, { "epoch": 0.22, "grad_norm": 0.7994890274825242, "learning_rate": 1.8193058600160935e-05, "loss": 0.7771, "step": 1247 }, { "epoch": 0.22, "grad_norm": 0.73204016561875, "learning_rate": 1.818980062855135e-05, "loss": 0.7197, "step": 1248 }, { "epoch": 0.22, "grad_norm": 0.8038335427677767, "learning_rate": 1.8186540014741405e-05, "loss": 0.7859, "step": 1249 }, { "epoch": 0.22, "grad_norm": 0.8002415011656815, "learning_rate": 1.8183276759783055e-05, "loss": 0.7071, "step": 1250 }, { "epoch": 0.22, "grad_norm": 0.6975202112031618, "learning_rate": 1.8180010864729084e-05, "loss": 0.7282, "step": 1251 }, { "epoch": 0.22, "grad_norm": 0.8111671126622318, "learning_rate": 1.8176742330633144e-05, "loss": 0.7889, "step": 1252 }, { "epoch": 0.22, "grad_norm": 0.8763200607581269, "learning_rate": 1.8173471158549726e-05, "loss": 0.7584, "step": 1253 }, { "epoch": 0.22, "grad_norm": 0.7654476033783075, "learning_rate": 1.8170197349534186e-05, "loss": 0.7011, "step": 1254 }, { "epoch": 0.22, "grad_norm": 0.6847552826540045, "learning_rate": 1.816692090464272e-05, "loss": 0.7167, "step": 1255 }, { "epoch": 0.22, "grad_norm": 0.7578340230680185, "learning_rate": 1.8163641824932376e-05, "loss": 0.733, "step": 1256 }, { "epoch": 0.22, "grad_norm": 0.8924980122387501, "learning_rate": 1.816036011146105e-05, "loss": 0.8162, "step": 1257 }, { "epoch": 0.22, "grad_norm": 0.7336200928274452, "learning_rate": 1.81570757652875e-05, "loss": 0.7426, "step": 1258 }, { "epoch": 0.22, "grad_norm": 0.7612483759890136, "learning_rate": 1.815378878747132e-05, "loss": 0.6903, "step": 1259 }, { "epoch": 0.22, "grad_norm": 0.7468983068022509, "learning_rate": 1.8150499179072952e-05, "loss": 0.7269, "step": 1260 }, { "epoch": 0.22, "grad_norm": 0.7472558044497042, "learning_rate": 1.8147206941153704e-05, "loss": 0.7907, "step": 1261 }, { "epoch": 0.22, "grad_norm": 0.7440712023076372, "learning_rate": 1.814391207477571e-05, "loss": 0.761, "step": 1262 }, { "epoch": 0.22, "grad_norm": 0.7602263049915268, "learning_rate": 1.814061458100197e-05, "loss": 0.7293, "step": 1263 }, { "epoch": 0.22, "grad_norm": 0.72859921136607, "learning_rate": 1.8137314460896317e-05, "loss": 0.7471, "step": 1264 }, { "epoch": 0.22, "grad_norm": 0.7794013856925844, "learning_rate": 1.813401171552345e-05, "loss": 0.7744, "step": 1265 }, { "epoch": 0.22, "grad_norm": 0.7243632209440674, "learning_rate": 1.8130706345948888e-05, "loss": 0.7138, "step": 1266 }, { "epoch": 0.22, "grad_norm": 0.8673439268302481, "learning_rate": 1.812739835323902e-05, "loss": 0.7643, "step": 1267 }, { "epoch": 0.22, "grad_norm": 0.6616654129079438, "learning_rate": 1.8124087738461076e-05, "loss": 0.7157, "step": 1268 }, { "epoch": 0.22, "grad_norm": 0.7909280323975353, "learning_rate": 1.812077450268313e-05, "loss": 0.7532, "step": 1269 }, { "epoch": 0.22, "grad_norm": 0.735728087949736, "learning_rate": 1.811745864697409e-05, "loss": 0.6987, "step": 1270 }, { "epoch": 0.22, "grad_norm": 0.8179221824382982, "learning_rate": 1.8114140172403734e-05, "loss": 0.785, "step": 1271 }, { "epoch": 0.22, "grad_norm": 0.7006981695040264, "learning_rate": 1.8110819080042663e-05, "loss": 0.7652, "step": 1272 }, { "epoch": 0.22, "grad_norm": 0.6795942924457778, "learning_rate": 1.810749537096233e-05, "loss": 0.6811, "step": 1273 }, { "epoch": 0.22, "grad_norm": 0.6453429426400221, "learning_rate": 1.810416904623504e-05, "loss": 0.7228, "step": 1274 }, { "epoch": 0.22, "grad_norm": 0.7851020832157586, "learning_rate": 1.810084010693393e-05, "loss": 0.7247, "step": 1275 }, { "epoch": 0.22, "grad_norm": 0.7329777615705523, "learning_rate": 1.809750855413298e-05, "loss": 0.7096, "step": 1276 }, { "epoch": 0.22, "grad_norm": 0.8210209483941616, "learning_rate": 1.8094174388907025e-05, "loss": 0.7493, "step": 1277 }, { "epoch": 0.22, "grad_norm": 0.7852878181391496, "learning_rate": 1.8090837612331736e-05, "loss": 0.735, "step": 1278 }, { "epoch": 0.22, "grad_norm": 0.6872835087165584, "learning_rate": 1.8087498225483627e-05, "loss": 0.7333, "step": 1279 }, { "epoch": 0.22, "grad_norm": 0.7956201815298437, "learning_rate": 1.8084156229440048e-05, "loss": 0.7531, "step": 1280 }, { "epoch": 0.22, "grad_norm": 0.8577791636942308, "learning_rate": 1.80808116252792e-05, "loss": 0.7422, "step": 1281 }, { "epoch": 0.22, "grad_norm": 0.8106992600665621, "learning_rate": 1.807746441408013e-05, "loss": 0.7821, "step": 1282 }, { "epoch": 0.22, "grad_norm": 0.7465904036540154, "learning_rate": 1.8074114596922704e-05, "loss": 0.7524, "step": 1283 }, { "epoch": 0.23, "grad_norm": 0.7612898461424514, "learning_rate": 1.8070762174887654e-05, "loss": 0.6547, "step": 1284 }, { "epoch": 0.23, "grad_norm": 0.7768227559907114, "learning_rate": 1.8067407149056533e-05, "loss": 0.7781, "step": 1285 }, { "epoch": 0.23, "grad_norm": 0.6791021681202012, "learning_rate": 1.8064049520511747e-05, "loss": 0.7218, "step": 1286 }, { "epoch": 0.23, "grad_norm": 0.8006722459955136, "learning_rate": 1.8060689290336536e-05, "loss": 0.7326, "step": 1287 }, { "epoch": 0.23, "grad_norm": 0.8476169162885574, "learning_rate": 1.805732645961498e-05, "loss": 0.7968, "step": 1288 }, { "epoch": 0.23, "grad_norm": 0.7056573209214967, "learning_rate": 1.8053961029432e-05, "loss": 0.7386, "step": 1289 }, { "epoch": 0.23, "grad_norm": 0.6832871326025702, "learning_rate": 1.8050593000873352e-05, "loss": 0.7089, "step": 1290 }, { "epoch": 0.23, "grad_norm": 0.7628705195109529, "learning_rate": 1.8047222375025632e-05, "loss": 0.7151, "step": 1291 }, { "epoch": 0.23, "grad_norm": 0.7262441986219392, "learning_rate": 1.8043849152976277e-05, "loss": 0.7346, "step": 1292 }, { "epoch": 0.23, "grad_norm": 0.7544242084154198, "learning_rate": 1.8040473335813555e-05, "loss": 0.7144, "step": 1293 }, { "epoch": 0.23, "grad_norm": 0.6441637814363119, "learning_rate": 1.8037094924626573e-05, "loss": 0.6749, "step": 1294 }, { "epoch": 0.23, "grad_norm": 0.7478308657903865, "learning_rate": 1.8033713920505285e-05, "loss": 0.7286, "step": 1295 }, { "epoch": 0.23, "grad_norm": 0.7624574060661394, "learning_rate": 1.8030330324540468e-05, "loss": 0.7545, "step": 1296 }, { "epoch": 0.23, "grad_norm": 0.6973477203656536, "learning_rate": 1.8026944137823742e-05, "loss": 0.7022, "step": 1297 }, { "epoch": 0.23, "grad_norm": 0.7851650339707589, "learning_rate": 1.8023555361447558e-05, "loss": 0.7729, "step": 1298 }, { "epoch": 0.23, "grad_norm": 0.7874601140473092, "learning_rate": 1.802016399650521e-05, "loss": 0.7752, "step": 1299 }, { "epoch": 0.23, "grad_norm": 0.7100548619818904, "learning_rate": 1.801677004409082e-05, "loss": 0.7331, "step": 1300 }, { "epoch": 0.23, "grad_norm": 0.7342342824177541, "learning_rate": 1.8013373505299353e-05, "loss": 0.7697, "step": 1301 }, { "epoch": 0.23, "grad_norm": 0.8143582657752351, "learning_rate": 1.8009974381226596e-05, "loss": 0.7222, "step": 1302 }, { "epoch": 0.23, "grad_norm": 0.8876348759525704, "learning_rate": 1.8006572672969182e-05, "loss": 0.771, "step": 1303 }, { "epoch": 0.23, "grad_norm": 0.8803522770625064, "learning_rate": 1.8003168381624572e-05, "loss": 0.7658, "step": 1304 }, { "epoch": 0.23, "grad_norm": 0.7432589768071872, "learning_rate": 1.799976150829106e-05, "loss": 0.7577, "step": 1305 }, { "epoch": 0.23, "grad_norm": 0.676948216048753, "learning_rate": 1.7996352054067778e-05, "loss": 0.735, "step": 1306 }, { "epoch": 0.23, "grad_norm": 0.7193076482738695, "learning_rate": 1.7992940020054682e-05, "loss": 0.7173, "step": 1307 }, { "epoch": 0.23, "grad_norm": 0.8086535623434694, "learning_rate": 1.7989525407352567e-05, "loss": 0.7124, "step": 1308 }, { "epoch": 0.23, "grad_norm": 0.8369103815286871, "learning_rate": 1.7986108217063064e-05, "loss": 0.8428, "step": 1309 }, { "epoch": 0.23, "grad_norm": 0.6925851777058056, "learning_rate": 1.798268845028862e-05, "loss": 0.7408, "step": 1310 }, { "epoch": 0.23, "grad_norm": 0.73359677106135, "learning_rate": 1.797926610813253e-05, "loss": 0.7607, "step": 1311 }, { "epoch": 0.23, "grad_norm": 0.708032767663543, "learning_rate": 1.7975841191698908e-05, "loss": 0.7291, "step": 1312 }, { "epoch": 0.23, "grad_norm": 0.7569330135053018, "learning_rate": 1.7972413702092708e-05, "loss": 0.7268, "step": 1313 }, { "epoch": 0.23, "grad_norm": 0.8627782538004081, "learning_rate": 1.7968983640419708e-05, "loss": 0.7255, "step": 1314 }, { "epoch": 0.23, "grad_norm": 0.7442692027232115, "learning_rate": 1.7965551007786515e-05, "loss": 0.7137, "step": 1315 }, { "epoch": 0.23, "grad_norm": 0.8159752903584095, "learning_rate": 1.7962115805300572e-05, "loss": 0.7682, "step": 1316 }, { "epoch": 0.23, "grad_norm": 0.778330746358816, "learning_rate": 1.7958678034070143e-05, "loss": 0.7724, "step": 1317 }, { "epoch": 0.23, "grad_norm": 0.7670129919634199, "learning_rate": 1.795523769520433e-05, "loss": 0.7078, "step": 1318 }, { "epoch": 0.23, "grad_norm": 0.8445475111875871, "learning_rate": 1.795179478981305e-05, "loss": 0.7877, "step": 1319 }, { "epoch": 0.23, "grad_norm": 0.7180548074022983, "learning_rate": 1.7948349319007065e-05, "loss": 0.7286, "step": 1320 }, { "epoch": 0.23, "grad_norm": 0.6874728123523715, "learning_rate": 1.7944901283897947e-05, "loss": 0.7044, "step": 1321 }, { "epoch": 0.23, "grad_norm": 0.8703204213937699, "learning_rate": 1.7941450685598108e-05, "loss": 0.7332, "step": 1322 }, { "epoch": 0.23, "grad_norm": 0.7154973458444274, "learning_rate": 1.7937997525220785e-05, "loss": 0.7101, "step": 1323 }, { "epoch": 0.23, "grad_norm": 0.702549518142173, "learning_rate": 1.7934541803880035e-05, "loss": 0.746, "step": 1324 }, { "epoch": 0.23, "grad_norm": 0.7457648322831936, "learning_rate": 1.7931083522690752e-05, "loss": 0.6896, "step": 1325 }, { "epoch": 0.23, "grad_norm": 0.638618156794349, "learning_rate": 1.7927622682768645e-05, "loss": 0.6674, "step": 1326 }, { "epoch": 0.23, "grad_norm": 0.7668558452203226, "learning_rate": 1.792415928523025e-05, "loss": 0.787, "step": 1327 }, { "epoch": 0.23, "grad_norm": 0.7214678759960359, "learning_rate": 1.792069333119294e-05, "loss": 0.7492, "step": 1328 }, { "epoch": 0.23, "grad_norm": 0.8973117234821939, "learning_rate": 1.7917224821774897e-05, "loss": 0.8509, "step": 1329 }, { "epoch": 0.23, "grad_norm": 0.8711969361929509, "learning_rate": 1.7913753758095138e-05, "loss": 0.7517, "step": 1330 }, { "epoch": 0.23, "grad_norm": 0.6716969978518681, "learning_rate": 1.7910280141273494e-05, "loss": 0.7126, "step": 1331 }, { "epoch": 0.23, "grad_norm": 0.6532971788014965, "learning_rate": 1.7906803972430636e-05, "loss": 0.7351, "step": 1332 }, { "epoch": 0.23, "grad_norm": 0.7532776857812551, "learning_rate": 1.790332525268804e-05, "loss": 0.7166, "step": 1333 }, { "epoch": 0.23, "grad_norm": 0.6057226564298777, "learning_rate": 1.789984398316802e-05, "loss": 0.6843, "step": 1334 }, { "epoch": 0.23, "grad_norm": 0.860729529505972, "learning_rate": 1.7896360164993705e-05, "loss": 0.8262, "step": 1335 }, { "epoch": 0.23, "grad_norm": 0.8081331136515698, "learning_rate": 1.7892873799289043e-05, "loss": 0.7786, "step": 1336 }, { "epoch": 0.23, "grad_norm": 0.8619067158805226, "learning_rate": 1.788938488717881e-05, "loss": 0.7036, "step": 1337 }, { "epoch": 0.23, "grad_norm": 0.8073997590577728, "learning_rate": 1.7885893429788605e-05, "loss": 0.7463, "step": 1338 }, { "epoch": 0.23, "grad_norm": 0.6868919022058878, "learning_rate": 1.7882399428244843e-05, "loss": 0.7343, "step": 1339 }, { "epoch": 0.23, "grad_norm": 0.8163188637738732, "learning_rate": 1.7878902883674757e-05, "loss": 0.6951, "step": 1340 }, { "epoch": 0.24, "grad_norm": 0.6872713967586096, "learning_rate": 1.7875403797206414e-05, "loss": 0.7147, "step": 1341 }, { "epoch": 0.24, "grad_norm": 0.7784020422528409, "learning_rate": 1.7871902169968683e-05, "loss": 0.6985, "step": 1342 }, { "epoch": 0.24, "grad_norm": 0.8416105288983569, "learning_rate": 1.786839800309127e-05, "loss": 0.7378, "step": 1343 }, { "epoch": 0.24, "grad_norm": 0.6592747520206479, "learning_rate": 1.786489129770469e-05, "loss": 0.71, "step": 1344 }, { "epoch": 0.24, "grad_norm": 0.8334875104087311, "learning_rate": 1.7861382054940274e-05, "loss": 0.7458, "step": 1345 }, { "epoch": 0.24, "grad_norm": 0.8364986903326895, "learning_rate": 1.7857870275930183e-05, "loss": 0.7799, "step": 1346 }, { "epoch": 0.24, "grad_norm": 0.8323425589240953, "learning_rate": 1.785435596180739e-05, "loss": 0.7594, "step": 1347 }, { "epoch": 0.24, "grad_norm": 0.786406269725757, "learning_rate": 1.785083911370568e-05, "loss": 0.7178, "step": 1348 }, { "epoch": 0.24, "grad_norm": 0.7441814899849001, "learning_rate": 1.7847319732759668e-05, "loss": 0.7491, "step": 1349 }, { "epoch": 0.24, "grad_norm": 0.791867606080257, "learning_rate": 1.7843797820104772e-05, "loss": 0.7014, "step": 1350 }, { "epoch": 0.24, "grad_norm": 0.7772383184903296, "learning_rate": 1.7840273376877246e-05, "loss": 0.7509, "step": 1351 }, { "epoch": 0.24, "grad_norm": 0.8263562164278344, "learning_rate": 1.7836746404214142e-05, "loss": 0.7514, "step": 1352 }, { "epoch": 0.24, "grad_norm": 0.8188149967432777, "learning_rate": 1.783321690325333e-05, "loss": 0.7735, "step": 1353 }, { "epoch": 0.24, "grad_norm": 0.7462170264569716, "learning_rate": 1.7829684875133512e-05, "loss": 0.6955, "step": 1354 }, { "epoch": 0.24, "grad_norm": 0.7023853277446592, "learning_rate": 1.7826150320994183e-05, "loss": 0.6945, "step": 1355 }, { "epoch": 0.24, "grad_norm": 0.7036000313166134, "learning_rate": 1.782261324197567e-05, "loss": 0.7472, "step": 1356 }, { "epoch": 0.24, "grad_norm": 0.7783184652440365, "learning_rate": 1.7819073639219105e-05, "loss": 0.7782, "step": 1357 }, { "epoch": 0.24, "grad_norm": 0.8159968185967744, "learning_rate": 1.781553151386644e-05, "loss": 0.7714, "step": 1358 }, { "epoch": 0.24, "grad_norm": 1.0762621694906045, "learning_rate": 1.781198686706044e-05, "loss": 0.759, "step": 1359 }, { "epoch": 0.24, "grad_norm": 0.7880196513435874, "learning_rate": 1.780843969994468e-05, "loss": 0.7735, "step": 1360 }, { "epoch": 0.24, "grad_norm": 0.696004489591867, "learning_rate": 1.7804890013663545e-05, "loss": 0.7255, "step": 1361 }, { "epoch": 0.24, "grad_norm": 0.7149908158700372, "learning_rate": 1.7801337809362248e-05, "loss": 0.7309, "step": 1362 }, { "epoch": 0.24, "grad_norm": 0.7246305802818837, "learning_rate": 1.7797783088186797e-05, "loss": 0.7262, "step": 1363 }, { "epoch": 0.24, "grad_norm": 0.6650281131988163, "learning_rate": 1.779422585128402e-05, "loss": 0.6894, "step": 1364 }, { "epoch": 0.24, "grad_norm": 0.7592314954204435, "learning_rate": 1.7790666099801558e-05, "loss": 0.7394, "step": 1365 }, { "epoch": 0.24, "grad_norm": 0.7906348464038496, "learning_rate": 1.7787103834887856e-05, "loss": 0.7616, "step": 1366 }, { "epoch": 0.24, "grad_norm": 0.9259755791647426, "learning_rate": 1.7783539057692187e-05, "loss": 0.8053, "step": 1367 }, { "epoch": 0.24, "grad_norm": 0.7719806815437071, "learning_rate": 1.7779971769364603e-05, "loss": 0.7691, "step": 1368 }, { "epoch": 0.24, "grad_norm": 0.7435901382467853, "learning_rate": 1.7776401971056004e-05, "loss": 0.7448, "step": 1369 }, { "epoch": 0.24, "grad_norm": 0.7765686569657504, "learning_rate": 1.777282966391807e-05, "loss": 0.7525, "step": 1370 }, { "epoch": 0.24, "grad_norm": 0.7673111669109495, "learning_rate": 1.776925484910331e-05, "loss": 0.7358, "step": 1371 }, { "epoch": 0.24, "grad_norm": 0.7551828757526, "learning_rate": 1.7765677527765025e-05, "loss": 0.7237, "step": 1372 }, { "epoch": 0.24, "grad_norm": 0.8605191428215649, "learning_rate": 1.7762097701057338e-05, "loss": 0.7997, "step": 1373 }, { "epoch": 0.24, "grad_norm": 0.8449519260512914, "learning_rate": 1.7758515370135177e-05, "loss": 0.7754, "step": 1374 }, { "epoch": 0.24, "grad_norm": 0.7211630251945444, "learning_rate": 1.7754930536154275e-05, "loss": 0.7202, "step": 1375 }, { "epoch": 0.24, "grad_norm": 0.8668652903356483, "learning_rate": 1.7751343200271174e-05, "loss": 0.8472, "step": 1376 }, { "epoch": 0.24, "grad_norm": 0.6951045242773715, "learning_rate": 1.7747753363643226e-05, "loss": 0.7363, "step": 1377 }, { "epoch": 0.24, "grad_norm": 0.8142740569334616, "learning_rate": 1.774416102742858e-05, "loss": 0.7678, "step": 1378 }, { "epoch": 0.24, "grad_norm": 0.7067243069068384, "learning_rate": 1.7740566192786208e-05, "loss": 0.7614, "step": 1379 }, { "epoch": 0.24, "grad_norm": 0.685028911861874, "learning_rate": 1.7736968860875867e-05, "loss": 0.7317, "step": 1380 }, { "epoch": 0.24, "grad_norm": 0.7856902872162957, "learning_rate": 1.7733369032858143e-05, "loss": 0.7631, "step": 1381 }, { "epoch": 0.24, "grad_norm": 0.642469927060816, "learning_rate": 1.7729766709894408e-05, "loss": 0.6801, "step": 1382 }, { "epoch": 0.24, "grad_norm": 0.7793534204073017, "learning_rate": 1.772616189314685e-05, "loss": 0.7475, "step": 1383 }, { "epoch": 0.24, "grad_norm": 0.8192174351740954, "learning_rate": 1.772255458377846e-05, "loss": 0.7736, "step": 1384 }, { "epoch": 0.24, "grad_norm": 0.6508202834454628, "learning_rate": 1.7718944782953025e-05, "loss": 0.688, "step": 1385 }, { "epoch": 0.24, "grad_norm": 0.7174294367253088, "learning_rate": 1.7715332491835146e-05, "loss": 0.6965, "step": 1386 }, { "epoch": 0.24, "grad_norm": 0.7335128139729328, "learning_rate": 1.771171771159022e-05, "loss": 0.7285, "step": 1387 }, { "epoch": 0.24, "grad_norm": 0.7518910787841253, "learning_rate": 1.7708100443384455e-05, "loss": 0.7526, "step": 1388 }, { "epoch": 0.24, "grad_norm": 0.7973656881399546, "learning_rate": 1.7704480688384854e-05, "loss": 0.7715, "step": 1389 }, { "epoch": 0.24, "grad_norm": 0.6532681164566032, "learning_rate": 1.7700858447759228e-05, "loss": 0.6964, "step": 1390 }, { "epoch": 0.24, "grad_norm": 0.8504845256564982, "learning_rate": 1.7697233722676185e-05, "loss": 0.7305, "step": 1391 }, { "epoch": 0.24, "grad_norm": 0.716412340130392, "learning_rate": 1.769360651430514e-05, "loss": 0.7369, "step": 1392 }, { "epoch": 0.24, "grad_norm": 0.7158785448744391, "learning_rate": 1.76899768238163e-05, "loss": 0.6794, "step": 1393 }, { "epoch": 0.24, "grad_norm": 0.7737870669286501, "learning_rate": 1.7686344652380684e-05, "loss": 0.7419, "step": 1394 }, { "epoch": 0.24, "grad_norm": 0.7432099600317918, "learning_rate": 1.76827100011701e-05, "loss": 0.7095, "step": 1395 }, { "epoch": 0.24, "grad_norm": 0.779994432275517, "learning_rate": 1.7679072871357174e-05, "loss": 0.6995, "step": 1396 }, { "epoch": 0.24, "grad_norm": 0.7062657899227921, "learning_rate": 1.767543326411531e-05, "loss": 0.7347, "step": 1397 }, { "epoch": 0.25, "grad_norm": 0.711440750315502, "learning_rate": 1.7671791180618722e-05, "loss": 0.7066, "step": 1398 }, { "epoch": 0.25, "grad_norm": 0.711399017771291, "learning_rate": 1.7668146622042426e-05, "loss": 0.6788, "step": 1399 }, { "epoch": 0.25, "grad_norm": 0.6939842121445274, "learning_rate": 1.7664499589562227e-05, "loss": 0.6925, "step": 1400 }, { "epoch": 0.25, "grad_norm": 0.6988167337609682, "learning_rate": 1.7660850084354743e-05, "loss": 0.7288, "step": 1401 }, { "epoch": 0.25, "grad_norm": 0.680263999877132, "learning_rate": 1.765719810759737e-05, "loss": 0.6947, "step": 1402 }, { "epoch": 0.25, "grad_norm": 0.6634028373507175, "learning_rate": 1.7653543660468317e-05, "loss": 0.7522, "step": 1403 }, { "epoch": 0.25, "grad_norm": 0.6758769361931172, "learning_rate": 1.7649886744146586e-05, "loss": 0.7111, "step": 1404 }, { "epoch": 0.25, "grad_norm": 1.3084487256320256, "learning_rate": 1.764622735981197e-05, "loss": 0.7981, "step": 1405 }, { "epoch": 0.25, "grad_norm": 0.7596811747259102, "learning_rate": 1.7642565508645067e-05, "loss": 0.7671, "step": 1406 }, { "epoch": 0.25, "grad_norm": 0.7378567413149598, "learning_rate": 1.7638901191827264e-05, "loss": 0.7324, "step": 1407 }, { "epoch": 0.25, "grad_norm": 0.8268960065672961, "learning_rate": 1.7635234410540745e-05, "loss": 0.8255, "step": 1408 }, { "epoch": 0.25, "grad_norm": 0.723060352221315, "learning_rate": 1.7631565165968493e-05, "loss": 0.7367, "step": 1409 }, { "epoch": 0.25, "grad_norm": 0.700112633798048, "learning_rate": 1.7627893459294283e-05, "loss": 0.7082, "step": 1410 }, { "epoch": 0.25, "grad_norm": 0.7760275506025097, "learning_rate": 1.7624219291702682e-05, "loss": 0.7784, "step": 1411 }, { "epoch": 0.25, "grad_norm": 0.8094556514712448, "learning_rate": 1.7620542664379054e-05, "loss": 0.7596, "step": 1412 }, { "epoch": 0.25, "grad_norm": 0.8192017153548703, "learning_rate": 1.7616863578509553e-05, "loss": 0.8516, "step": 1413 }, { "epoch": 0.25, "grad_norm": 0.7990276515867887, "learning_rate": 1.761318203528113e-05, "loss": 0.7636, "step": 1414 }, { "epoch": 0.25, "grad_norm": 0.7694498696659816, "learning_rate": 1.760949803588153e-05, "loss": 0.6742, "step": 1415 }, { "epoch": 0.25, "grad_norm": 1.1304965669801186, "learning_rate": 1.760581158149929e-05, "loss": 0.8138, "step": 1416 }, { "epoch": 0.25, "grad_norm": 0.8766000289876036, "learning_rate": 1.760212267332373e-05, "loss": 0.7356, "step": 1417 }, { "epoch": 0.25, "grad_norm": 0.68347727879849, "learning_rate": 1.759843131254497e-05, "loss": 0.7186, "step": 1418 }, { "epoch": 0.25, "grad_norm": 0.7945192234227846, "learning_rate": 1.7594737500353926e-05, "loss": 0.7614, "step": 1419 }, { "epoch": 0.25, "grad_norm": 0.7511923148479042, "learning_rate": 1.7591041237942296e-05, "loss": 0.6801, "step": 1420 }, { "epoch": 0.25, "grad_norm": 0.6304698331729877, "learning_rate": 1.7587342526502566e-05, "loss": 0.7167, "step": 1421 }, { "epoch": 0.25, "grad_norm": 0.8180611544702946, "learning_rate": 1.7583641367228025e-05, "loss": 0.7789, "step": 1422 }, { "epoch": 0.25, "grad_norm": 1.5676947671667003, "learning_rate": 1.7579937761312737e-05, "loss": 0.8696, "step": 1423 }, { "epoch": 0.25, "grad_norm": 0.6760830481925173, "learning_rate": 1.757623170995157e-05, "loss": 0.7381, "step": 1424 }, { "epoch": 0.25, "grad_norm": 0.7548844441624366, "learning_rate": 1.7572523214340167e-05, "loss": 0.7331, "step": 1425 }, { "epoch": 0.25, "grad_norm": 0.6579158736727312, "learning_rate": 1.756881227567497e-05, "loss": 0.7348, "step": 1426 }, { "epoch": 0.25, "grad_norm": 0.6385721005315309, "learning_rate": 1.75650988951532e-05, "loss": 0.7379, "step": 1427 }, { "epoch": 0.25, "grad_norm": 0.7333982742965844, "learning_rate": 1.7561383073972876e-05, "loss": 0.7045, "step": 1428 }, { "epoch": 0.25, "grad_norm": 0.6935378406121746, "learning_rate": 1.75576648133328e-05, "loss": 0.722, "step": 1429 }, { "epoch": 0.25, "grad_norm": 0.7841624143823144, "learning_rate": 1.7553944114432557e-05, "loss": 0.7872, "step": 1430 }, { "epoch": 0.25, "grad_norm": 0.767486433293147, "learning_rate": 1.7550220978472526e-05, "loss": 0.8257, "step": 1431 }, { "epoch": 0.25, "grad_norm": 1.5031421649920633, "learning_rate": 1.7546495406653862e-05, "loss": 0.8719, "step": 1432 }, { "epoch": 0.25, "grad_norm": 0.7075344210563942, "learning_rate": 1.7542767400178514e-05, "loss": 0.7474, "step": 1433 }, { "epoch": 0.25, "grad_norm": 0.8169103130355345, "learning_rate": 1.7539036960249222e-05, "loss": 0.815, "step": 1434 }, { "epoch": 0.25, "grad_norm": 0.744616773273098, "learning_rate": 1.7535304088069494e-05, "loss": 0.7509, "step": 1435 }, { "epoch": 0.25, "grad_norm": 0.8361518892010283, "learning_rate": 1.7531568784843635e-05, "loss": 0.7544, "step": 1436 }, { "epoch": 0.25, "grad_norm": 0.6717744171383363, "learning_rate": 1.7527831051776735e-05, "loss": 0.7096, "step": 1437 }, { "epoch": 0.25, "grad_norm": 0.8507608848407532, "learning_rate": 1.7524090890074662e-05, "loss": 0.7287, "step": 1438 }, { "epoch": 0.25, "grad_norm": 0.7874230578052511, "learning_rate": 1.752034830094407e-05, "loss": 0.7509, "step": 1439 }, { "epoch": 0.25, "grad_norm": 0.7042124281091713, "learning_rate": 1.75166032855924e-05, "loss": 0.7067, "step": 1440 }, { "epoch": 0.25, "grad_norm": 0.6006767592364758, "learning_rate": 1.7512855845227866e-05, "loss": 0.7018, "step": 1441 }, { "epoch": 0.25, "grad_norm": 0.8688799257925469, "learning_rate": 1.7509105981059474e-05, "loss": 0.7065, "step": 1442 }, { "epoch": 0.25, "grad_norm": 0.7316939076536134, "learning_rate": 1.7505353694297005e-05, "loss": 0.7569, "step": 1443 }, { "epoch": 0.25, "grad_norm": 1.0070711470836196, "learning_rate": 1.750159898615103e-05, "loss": 0.7549, "step": 1444 }, { "epoch": 0.25, "grad_norm": 0.6515367006280961, "learning_rate": 1.7497841857832894e-05, "loss": 0.7347, "step": 1445 }, { "epoch": 0.25, "grad_norm": 0.729092086694196, "learning_rate": 1.749408231055472e-05, "loss": 0.7665, "step": 1446 }, { "epoch": 0.25, "grad_norm": 0.7218456259209016, "learning_rate": 1.7490320345529424e-05, "loss": 0.7365, "step": 1447 }, { "epoch": 0.25, "grad_norm": 0.7305659888375168, "learning_rate": 1.7486555963970694e-05, "loss": 0.7652, "step": 1448 }, { "epoch": 0.25, "grad_norm": 0.7221108838989603, "learning_rate": 1.748278916709299e-05, "loss": 0.7519, "step": 1449 }, { "epoch": 0.25, "grad_norm": 0.6397289545699121, "learning_rate": 1.747901995611157e-05, "loss": 0.6791, "step": 1450 }, { "epoch": 0.25, "grad_norm": 0.7289007090050836, "learning_rate": 1.7475248332242446e-05, "loss": 0.7663, "step": 1451 }, { "epoch": 0.25, "grad_norm": 0.6686665266815089, "learning_rate": 1.7471474296702434e-05, "loss": 0.7058, "step": 1452 }, { "epoch": 0.25, "grad_norm": 0.8615963485012111, "learning_rate": 1.7467697850709115e-05, "loss": 0.757, "step": 1453 }, { "epoch": 0.25, "grad_norm": 0.8304739329628903, "learning_rate": 1.7463918995480845e-05, "loss": 0.7768, "step": 1454 }, { "epoch": 0.26, "grad_norm": 0.6432408500504139, "learning_rate": 1.7460137732236764e-05, "loss": 0.7246, "step": 1455 }, { "epoch": 0.26, "grad_norm": 0.8379590775150785, "learning_rate": 1.745635406219679e-05, "loss": 0.7184, "step": 1456 }, { "epoch": 0.26, "grad_norm": 0.6657795757082339, "learning_rate": 1.7452567986581604e-05, "loss": 0.7116, "step": 1457 }, { "epoch": 0.26, "grad_norm": 0.6824887887293511, "learning_rate": 1.7448779506612683e-05, "loss": 0.686, "step": 1458 }, { "epoch": 0.26, "grad_norm": 0.6970780568658766, "learning_rate": 1.7444988623512262e-05, "loss": 0.765, "step": 1459 }, { "epoch": 0.26, "grad_norm": 0.8707025924201838, "learning_rate": 1.7441195338503366e-05, "loss": 0.8073, "step": 1460 }, { "epoch": 0.26, "grad_norm": 0.7123686495216246, "learning_rate": 1.743739965280978e-05, "loss": 0.7148, "step": 1461 }, { "epoch": 0.26, "grad_norm": 0.9662516661056841, "learning_rate": 1.743360156765608e-05, "loss": 0.7765, "step": 1462 }, { "epoch": 0.26, "grad_norm": 0.636747213059274, "learning_rate": 1.7429801084267598e-05, "loss": 0.7273, "step": 1463 }, { "epoch": 0.26, "grad_norm": 0.7353149815865959, "learning_rate": 1.7425998203870453e-05, "loss": 0.7092, "step": 1464 }, { "epoch": 0.26, "grad_norm": 0.6784037121940032, "learning_rate": 1.742219292769154e-05, "loss": 0.691, "step": 1465 }, { "epoch": 0.26, "grad_norm": 0.7545530837439987, "learning_rate": 1.7418385256958508e-05, "loss": 0.7191, "step": 1466 }, { "epoch": 0.26, "grad_norm": 0.7586222432041834, "learning_rate": 1.7414575192899803e-05, "loss": 0.7821, "step": 1467 }, { "epoch": 0.26, "grad_norm": 0.8244516088003997, "learning_rate": 1.7410762736744625e-05, "loss": 0.7298, "step": 1468 }, { "epoch": 0.26, "grad_norm": 0.7100044462035451, "learning_rate": 1.740694788972295e-05, "loss": 0.7201, "step": 1469 }, { "epoch": 0.26, "grad_norm": 0.7879301108613668, "learning_rate": 1.740313065306553e-05, "loss": 0.7265, "step": 1470 }, { "epoch": 0.26, "grad_norm": 0.7961056542810394, "learning_rate": 1.7399311028003886e-05, "loss": 0.7593, "step": 1471 }, { "epoch": 0.26, "grad_norm": 0.743778166660551, "learning_rate": 1.739548901577031e-05, "loss": 0.76, "step": 1472 }, { "epoch": 0.26, "grad_norm": 0.6526869723745664, "learning_rate": 1.7391664617597858e-05, "loss": 0.7815, "step": 1473 }, { "epoch": 0.26, "grad_norm": 0.6923517147388485, "learning_rate": 1.738783783472036e-05, "loss": 0.7398, "step": 1474 }, { "epoch": 0.26, "grad_norm": 0.7575313659472268, "learning_rate": 1.738400866837242e-05, "loss": 0.7744, "step": 1475 }, { "epoch": 0.26, "grad_norm": 0.6951544159623877, "learning_rate": 1.738017711978941e-05, "loss": 0.7542, "step": 1476 }, { "epoch": 0.26, "grad_norm": 0.6723936270744089, "learning_rate": 1.7376343190207458e-05, "loss": 0.6883, "step": 1477 }, { "epoch": 0.26, "grad_norm": 0.7036440872851514, "learning_rate": 1.7372506880863477e-05, "loss": 0.7161, "step": 1478 }, { "epoch": 0.26, "grad_norm": 0.6667971966932783, "learning_rate": 1.7368668192995133e-05, "loss": 0.7484, "step": 1479 }, { "epoch": 0.26, "grad_norm": 0.7219492508319645, "learning_rate": 1.7364827127840874e-05, "loss": 0.7351, "step": 1480 }, { "epoch": 0.26, "grad_norm": 0.7487171665168834, "learning_rate": 1.736098368663991e-05, "loss": 0.7805, "step": 1481 }, { "epoch": 0.26, "grad_norm": 0.7247557930953763, "learning_rate": 1.7357137870632205e-05, "loss": 0.7304, "step": 1482 }, { "epoch": 0.26, "grad_norm": 0.7402013443662868, "learning_rate": 1.7353289681058507e-05, "loss": 0.7771, "step": 1483 }, { "epoch": 0.26, "grad_norm": 0.7127527478159407, "learning_rate": 1.734943911916032e-05, "loss": 0.7257, "step": 1484 }, { "epoch": 0.26, "grad_norm": 0.6852785725963915, "learning_rate": 1.7345586186179916e-05, "loss": 0.7263, "step": 1485 }, { "epoch": 0.26, "grad_norm": 0.7848548447243251, "learning_rate": 1.734173088336033e-05, "loss": 0.7196, "step": 1486 }, { "epoch": 0.26, "grad_norm": 0.7431362557862778, "learning_rate": 1.7337873211945365e-05, "loss": 0.7107, "step": 1487 }, { "epoch": 0.26, "grad_norm": 0.7268800605774455, "learning_rate": 1.7334013173179582e-05, "loss": 0.7431, "step": 1488 }, { "epoch": 0.26, "grad_norm": 0.7632035959108727, "learning_rate": 1.7330150768308315e-05, "loss": 0.7436, "step": 1489 }, { "epoch": 0.26, "grad_norm": 0.6297407699039074, "learning_rate": 1.7326285998577654e-05, "loss": 0.6915, "step": 1490 }, { "epoch": 0.26, "grad_norm": 0.7546392128514601, "learning_rate": 1.7322418865234457e-05, "loss": 0.7496, "step": 1491 }, { "epoch": 0.26, "grad_norm": 0.7251017285381203, "learning_rate": 1.7318549369526337e-05, "loss": 0.7714, "step": 1492 }, { "epoch": 0.26, "grad_norm": 0.7043883304667362, "learning_rate": 1.731467751270168e-05, "loss": 0.7225, "step": 1493 }, { "epoch": 0.26, "grad_norm": 0.7621117538423045, "learning_rate": 1.731080329600962e-05, "loss": 0.7649, "step": 1494 }, { "epoch": 0.26, "grad_norm": 0.9163464853690335, "learning_rate": 1.7306926720700067e-05, "loss": 0.778, "step": 1495 }, { "epoch": 0.26, "grad_norm": 0.7962765373934559, "learning_rate": 1.730304778802368e-05, "loss": 0.7391, "step": 1496 }, { "epoch": 0.26, "grad_norm": 0.8583136525221953, "learning_rate": 1.7299166499231884e-05, "loss": 0.7528, "step": 1497 }, { "epoch": 0.26, "grad_norm": 0.7206375854505027, "learning_rate": 1.7295282855576868e-05, "loss": 0.6785, "step": 1498 }, { "epoch": 0.26, "grad_norm": 0.7542682760335205, "learning_rate": 1.7291396858311575e-05, "loss": 0.7291, "step": 1499 }, { "epoch": 0.26, "grad_norm": 0.8541144003587724, "learning_rate": 1.7287508508689708e-05, "loss": 0.827, "step": 1500 }, { "epoch": 0.26, "grad_norm": 0.6916554120817306, "learning_rate": 1.7283617807965726e-05, "loss": 0.6948, "step": 1501 }, { "epoch": 0.26, "grad_norm": 0.8299235180873263, "learning_rate": 1.7279724757394854e-05, "loss": 0.7624, "step": 1502 }, { "epoch": 0.26, "grad_norm": 0.6455443779786671, "learning_rate": 1.727582935823307e-05, "loss": 0.7226, "step": 1503 }, { "epoch": 0.26, "grad_norm": 0.7320481035635267, "learning_rate": 1.7271931611737113e-05, "loss": 0.7222, "step": 1504 }, { "epoch": 0.26, "grad_norm": 0.7189353796916486, "learning_rate": 1.7268031519164474e-05, "loss": 0.7393, "step": 1505 }, { "epoch": 0.26, "grad_norm": 0.7280545859817267, "learning_rate": 1.7264129081773407e-05, "loss": 0.7347, "step": 1506 }, { "epoch": 0.26, "grad_norm": 0.7217587999977617, "learning_rate": 1.7260224300822918e-05, "loss": 0.7353, "step": 1507 }, { "epoch": 0.26, "grad_norm": 0.7222852385484946, "learning_rate": 1.725631717757277e-05, "loss": 0.7069, "step": 1508 }, { "epoch": 0.26, "grad_norm": 0.7801373838885238, "learning_rate": 1.7252407713283484e-05, "loss": 0.7344, "step": 1509 }, { "epoch": 0.26, "grad_norm": 0.7483089903681078, "learning_rate": 1.7248495909216335e-05, "loss": 0.7335, "step": 1510 }, { "epoch": 0.26, "grad_norm": 0.8668139504178476, "learning_rate": 1.724458176663335e-05, "loss": 0.7807, "step": 1511 }, { "epoch": 0.27, "grad_norm": 0.734562701634865, "learning_rate": 1.7240665286797316e-05, "loss": 0.6879, "step": 1512 }, { "epoch": 0.27, "grad_norm": 0.748912431094992, "learning_rate": 1.723674647097177e-05, "loss": 0.7597, "step": 1513 }, { "epoch": 0.27, "grad_norm": 0.7889585383725976, "learning_rate": 1.7232825320421e-05, "loss": 0.7579, "step": 1514 }, { "epoch": 0.27, "grad_norm": 0.6857193605390461, "learning_rate": 1.7228901836410056e-05, "loss": 0.6958, "step": 1515 }, { "epoch": 0.27, "grad_norm": 0.7357683315130081, "learning_rate": 1.7224976020204737e-05, "loss": 0.7551, "step": 1516 }, { "epoch": 0.27, "grad_norm": 0.7650144125312769, "learning_rate": 1.7221047873071586e-05, "loss": 0.7381, "step": 1517 }, { "epoch": 0.27, "grad_norm": 0.7420287304762396, "learning_rate": 1.721711739627791e-05, "loss": 0.7238, "step": 1518 }, { "epoch": 0.27, "grad_norm": 0.7903512760107481, "learning_rate": 1.7213184591091764e-05, "loss": 0.7728, "step": 1519 }, { "epoch": 0.27, "grad_norm": 0.7134402656927645, "learning_rate": 1.7209249458781947e-05, "loss": 0.733, "step": 1520 }, { "epoch": 0.27, "grad_norm": 0.8251416033025214, "learning_rate": 1.720531200061802e-05, "loss": 0.7556, "step": 1521 }, { "epoch": 0.27, "grad_norm": 0.6209190614076074, "learning_rate": 1.7201372217870285e-05, "loss": 0.7153, "step": 1522 }, { "epoch": 0.27, "grad_norm": 0.6286523997128947, "learning_rate": 1.7197430111809804e-05, "loss": 0.7209, "step": 1523 }, { "epoch": 0.27, "grad_norm": 0.8024434804836762, "learning_rate": 1.719348568370838e-05, "loss": 0.7226, "step": 1524 }, { "epoch": 0.27, "grad_norm": 0.7071049685593067, "learning_rate": 1.7189538934838564e-05, "loss": 0.7621, "step": 1525 }, { "epoch": 0.27, "grad_norm": 0.6736551339443139, "learning_rate": 1.7185589866473665e-05, "loss": 0.6625, "step": 1526 }, { "epoch": 0.27, "grad_norm": 0.8045672955446157, "learning_rate": 1.7181638479887728e-05, "loss": 0.8059, "step": 1527 }, { "epoch": 0.27, "grad_norm": 0.7662848558795228, "learning_rate": 1.717768477635556e-05, "loss": 0.7643, "step": 1528 }, { "epoch": 0.27, "grad_norm": 0.6912249897609034, "learning_rate": 1.71737287571527e-05, "loss": 0.6877, "step": 1529 }, { "epoch": 0.27, "grad_norm": 0.7271238711369477, "learning_rate": 1.716977042355545e-05, "loss": 0.734, "step": 1530 }, { "epoch": 0.27, "grad_norm": 0.7525342754130883, "learning_rate": 1.716580977684085e-05, "loss": 0.7845, "step": 1531 }, { "epoch": 0.27, "grad_norm": 0.7540505833186464, "learning_rate": 1.7161846818286683e-05, "loss": 0.7516, "step": 1532 }, { "epoch": 0.27, "grad_norm": 0.6680501741589535, "learning_rate": 1.7157881549171486e-05, "loss": 0.6817, "step": 1533 }, { "epoch": 0.27, "grad_norm": 0.7404765692461728, "learning_rate": 1.715391397077453e-05, "loss": 0.6957, "step": 1534 }, { "epoch": 0.27, "grad_norm": 0.8007240234311871, "learning_rate": 1.714994408437585e-05, "loss": 0.7321, "step": 1535 }, { "epoch": 0.27, "grad_norm": 0.7229562884201313, "learning_rate": 1.7145971891256204e-05, "loss": 0.7371, "step": 1536 }, { "epoch": 0.27, "grad_norm": 0.6782597653035877, "learning_rate": 1.714199739269711e-05, "loss": 0.7161, "step": 1537 }, { "epoch": 0.27, "grad_norm": 0.8553495508070331, "learning_rate": 1.7138020589980825e-05, "loss": 0.7204, "step": 1538 }, { "epoch": 0.27, "grad_norm": 0.8306299475507694, "learning_rate": 1.7134041484390344e-05, "loss": 0.7451, "step": 1539 }, { "epoch": 0.27, "grad_norm": 0.890056971865775, "learning_rate": 1.713006007720941e-05, "loss": 0.7611, "step": 1540 }, { "epoch": 0.27, "grad_norm": 0.7745538095095827, "learning_rate": 1.712607636972251e-05, "loss": 0.7664, "step": 1541 }, { "epoch": 0.27, "grad_norm": 0.6895327327656987, "learning_rate": 1.7122090363214866e-05, "loss": 0.7138, "step": 1542 }, { "epoch": 0.27, "grad_norm": 0.758549585398954, "learning_rate": 1.7118102058972457e-05, "loss": 0.7206, "step": 1543 }, { "epoch": 0.27, "grad_norm": 0.6964162002770254, "learning_rate": 1.7114111458281985e-05, "loss": 0.7063, "step": 1544 }, { "epoch": 0.27, "grad_norm": 0.700207282771962, "learning_rate": 1.7110118562430904e-05, "loss": 0.7376, "step": 1545 }, { "epoch": 0.27, "grad_norm": 0.7433365429234048, "learning_rate": 1.71061233727074e-05, "loss": 0.7276, "step": 1546 }, { "epoch": 0.27, "grad_norm": 0.6464340858684804, "learning_rate": 1.7102125890400413e-05, "loss": 0.6865, "step": 1547 }, { "epoch": 0.27, "grad_norm": 0.7665288530040991, "learning_rate": 1.709812611679961e-05, "loss": 0.7471, "step": 1548 }, { "epoch": 0.27, "grad_norm": 0.7421583994573777, "learning_rate": 1.70941240531954e-05, "loss": 0.7584, "step": 1549 }, { "epoch": 0.27, "grad_norm": 0.8547421715330368, "learning_rate": 1.7090119700878933e-05, "loss": 0.7331, "step": 1550 }, { "epoch": 0.27, "grad_norm": 0.8372133047433974, "learning_rate": 1.70861130611421e-05, "loss": 0.8148, "step": 1551 }, { "epoch": 0.27, "grad_norm": 0.7226686043268731, "learning_rate": 1.708210413527752e-05, "loss": 0.7316, "step": 1552 }, { "epoch": 0.27, "grad_norm": 0.758660056219762, "learning_rate": 1.707809292457856e-05, "loss": 0.7266, "step": 1553 }, { "epoch": 0.27, "grad_norm": 0.7193904993436278, "learning_rate": 1.707407943033932e-05, "loss": 0.7566, "step": 1554 }, { "epoch": 0.27, "grad_norm": 0.7581086000331252, "learning_rate": 1.7070063653854636e-05, "loss": 0.7103, "step": 1555 }, { "epoch": 0.27, "grad_norm": 0.8081601423926386, "learning_rate": 1.706604559642008e-05, "loss": 0.7526, "step": 1556 }, { "epoch": 0.27, "grad_norm": 0.7301041310752289, "learning_rate": 1.7062025259331966e-05, "loss": 0.7353, "step": 1557 }, { "epoch": 0.27, "grad_norm": 0.7321380109435941, "learning_rate": 1.7058002643887332e-05, "loss": 0.7104, "step": 1558 }, { "epoch": 0.27, "grad_norm": 0.7536206072522272, "learning_rate": 1.7053977751383964e-05, "loss": 0.7541, "step": 1559 }, { "epoch": 0.27, "grad_norm": 0.7054130770681657, "learning_rate": 1.704995058312037e-05, "loss": 0.6941, "step": 1560 }, { "epoch": 0.27, "grad_norm": 0.7590284871449714, "learning_rate": 1.7045921140395802e-05, "loss": 0.7389, "step": 1561 }, { "epoch": 0.27, "grad_norm": 0.7319187593901468, "learning_rate": 1.7041889424510243e-05, "loss": 0.7325, "step": 1562 }, { "epoch": 0.27, "grad_norm": 0.6653347004724218, "learning_rate": 1.7037855436764406e-05, "loss": 0.7346, "step": 1563 }, { "epoch": 0.27, "grad_norm": 0.6981961953324062, "learning_rate": 1.7033819178459743e-05, "loss": 0.7235, "step": 1564 }, { "epoch": 0.27, "grad_norm": 0.7247546511623132, "learning_rate": 1.702978065089843e-05, "loss": 0.7066, "step": 1565 }, { "epoch": 0.27, "grad_norm": 0.6918828974743905, "learning_rate": 1.7025739855383383e-05, "loss": 0.7572, "step": 1566 }, { "epoch": 0.27, "grad_norm": 0.6937841563521224, "learning_rate": 1.7021696793218246e-05, "loss": 0.7432, "step": 1567 }, { "epoch": 0.27, "grad_norm": 0.784615985951674, "learning_rate": 1.7017651465707395e-05, "loss": 0.7377, "step": 1568 }, { "epoch": 0.28, "grad_norm": 0.7013296654359493, "learning_rate": 1.7013603874155942e-05, "loss": 0.7398, "step": 1569 }, { "epoch": 0.28, "grad_norm": 0.7423723772177212, "learning_rate": 1.7009554019869717e-05, "loss": 0.7698, "step": 1570 }, { "epoch": 0.28, "grad_norm": 0.7948108554763507, "learning_rate": 1.7005501904155292e-05, "loss": 0.7305, "step": 1571 }, { "epoch": 0.28, "grad_norm": 0.8489380796884386, "learning_rate": 1.7001447528319963e-05, "loss": 0.8044, "step": 1572 }, { "epoch": 0.28, "grad_norm": 0.7281821434393893, "learning_rate": 1.6997390893671754e-05, "loss": 0.7071, "step": 1573 }, { "epoch": 0.28, "grad_norm": 0.7651058298432656, "learning_rate": 1.699333200151942e-05, "loss": 0.763, "step": 1574 }, { "epoch": 0.28, "grad_norm": 0.9546614282269983, "learning_rate": 1.698927085317245e-05, "loss": 0.8292, "step": 1575 }, { "epoch": 0.28, "grad_norm": 0.7817371300241337, "learning_rate": 1.698520744994105e-05, "loss": 0.7872, "step": 1576 }, { "epoch": 0.28, "grad_norm": 0.8989404692789652, "learning_rate": 1.6981141793136154e-05, "loss": 0.8026, "step": 1577 }, { "epoch": 0.28, "grad_norm": 0.7620658871259062, "learning_rate": 1.697707388406944e-05, "loss": 0.7733, "step": 1578 }, { "epoch": 0.28, "grad_norm": 0.8129242871816393, "learning_rate": 1.697300372405329e-05, "loss": 0.7668, "step": 1579 }, { "epoch": 0.28, "grad_norm": 0.7359578211060912, "learning_rate": 1.6968931314400826e-05, "loss": 0.769, "step": 1580 }, { "epoch": 0.28, "grad_norm": 0.712899315146925, "learning_rate": 1.696485665642589e-05, "loss": 0.7317, "step": 1581 }, { "epoch": 0.28, "grad_norm": 0.6265476721346054, "learning_rate": 1.6960779751443054e-05, "loss": 0.7135, "step": 1582 }, { "epoch": 0.28, "grad_norm": 0.7544325638661844, "learning_rate": 1.695670060076761e-05, "loss": 0.732, "step": 1583 }, { "epoch": 0.28, "grad_norm": 0.7512562215975038, "learning_rate": 1.6952619205715585e-05, "loss": 0.7855, "step": 1584 }, { "epoch": 0.28, "grad_norm": 0.6774587821326736, "learning_rate": 1.6948535567603715e-05, "loss": 0.7076, "step": 1585 }, { "epoch": 0.28, "grad_norm": 0.8321531111005945, "learning_rate": 1.6944449687749466e-05, "loss": 0.7391, "step": 1586 }, { "epoch": 0.28, "grad_norm": 0.8495041154632778, "learning_rate": 1.6940361567471025e-05, "loss": 0.7141, "step": 1587 }, { "epoch": 0.28, "grad_norm": 0.712436706450894, "learning_rate": 1.6936271208087318e-05, "loss": 0.7659, "step": 1588 }, { "epoch": 0.28, "grad_norm": 0.7550323797703236, "learning_rate": 1.6932178610917963e-05, "loss": 0.7659, "step": 1589 }, { "epoch": 0.28, "grad_norm": 0.7527852037790559, "learning_rate": 1.6928083777283333e-05, "loss": 0.733, "step": 1590 }, { "epoch": 0.28, "grad_norm": 0.7230096601539913, "learning_rate": 1.6923986708504498e-05, "loss": 0.7558, "step": 1591 }, { "epoch": 0.28, "grad_norm": 0.6258243169085509, "learning_rate": 1.691988740590326e-05, "loss": 0.7106, "step": 1592 }, { "epoch": 0.28, "grad_norm": 1.619949302715282, "learning_rate": 1.6915785870802136e-05, "loss": 0.7262, "step": 1593 }, { "epoch": 0.28, "grad_norm": 0.7872727538942906, "learning_rate": 1.691168210452437e-05, "loss": 0.745, "step": 1594 }, { "epoch": 0.28, "grad_norm": 0.6603543471535815, "learning_rate": 1.690757610839392e-05, "loss": 0.7467, "step": 1595 }, { "epoch": 0.28, "grad_norm": 0.7260225816880552, "learning_rate": 1.6903467883735472e-05, "loss": 0.6925, "step": 1596 }, { "epoch": 0.28, "grad_norm": 0.7368036723039798, "learning_rate": 1.689935743187442e-05, "loss": 0.7974, "step": 1597 }, { "epoch": 0.28, "grad_norm": 0.7954667292508565, "learning_rate": 1.6895244754136883e-05, "loss": 0.7652, "step": 1598 }, { "epoch": 0.28, "grad_norm": 0.694328979335732, "learning_rate": 1.6891129851849696e-05, "loss": 0.7105, "step": 1599 }, { "epoch": 0.28, "grad_norm": 0.6654913170059589, "learning_rate": 1.6887012726340407e-05, "loss": 0.7248, "step": 1600 }, { "epoch": 0.28, "grad_norm": 0.694392719957424, "learning_rate": 1.6882893378937297e-05, "loss": 0.7721, "step": 1601 }, { "epoch": 0.28, "grad_norm": 0.7540668455625681, "learning_rate": 1.6878771810969343e-05, "loss": 0.8082, "step": 1602 }, { "epoch": 0.28, "grad_norm": 0.6982971455473405, "learning_rate": 1.6874648023766258e-05, "loss": 0.7779, "step": 1603 }, { "epoch": 0.28, "grad_norm": 0.6690353006124236, "learning_rate": 1.6870522018658456e-05, "loss": 0.7587, "step": 1604 }, { "epoch": 0.28, "grad_norm": 0.7163624224667181, "learning_rate": 1.686639379697707e-05, "loss": 0.7587, "step": 1605 }, { "epoch": 0.28, "grad_norm": 0.6462083995142982, "learning_rate": 1.6862263360053958e-05, "loss": 0.7206, "step": 1606 }, { "epoch": 0.28, "grad_norm": 0.6791577162484901, "learning_rate": 1.6858130709221682e-05, "loss": 0.7363, "step": 1607 }, { "epoch": 0.28, "grad_norm": 0.6897676835754933, "learning_rate": 1.6853995845813516e-05, "loss": 0.7462, "step": 1608 }, { "epoch": 0.28, "grad_norm": 0.6483923738025086, "learning_rate": 1.6849858771163455e-05, "loss": 0.7514, "step": 1609 }, { "epoch": 0.28, "grad_norm": 0.6441693814943756, "learning_rate": 1.6845719486606217e-05, "loss": 0.7423, "step": 1610 }, { "epoch": 0.28, "grad_norm": 0.6966798815405627, "learning_rate": 1.6841577993477204e-05, "loss": 0.7273, "step": 1611 }, { "epoch": 0.28, "grad_norm": 0.681205348671442, "learning_rate": 1.683743429311256e-05, "loss": 0.7338, "step": 1612 }, { "epoch": 0.28, "grad_norm": 0.8014487703446517, "learning_rate": 1.6833288386849123e-05, "loss": 0.7042, "step": 1613 }, { "epoch": 0.28, "grad_norm": 0.5943056853235076, "learning_rate": 1.682914027602445e-05, "loss": 0.6861, "step": 1614 }, { "epoch": 0.28, "grad_norm": 0.7556655695095701, "learning_rate": 1.682498996197681e-05, "loss": 0.7351, "step": 1615 }, { "epoch": 0.28, "grad_norm": 0.6920968197267094, "learning_rate": 1.6820837446045183e-05, "loss": 0.702, "step": 1616 }, { "epoch": 0.28, "grad_norm": 0.630298328636567, "learning_rate": 1.6816682729569253e-05, "loss": 0.7136, "step": 1617 }, { "epoch": 0.28, "grad_norm": 0.7391889108092748, "learning_rate": 1.681252581388942e-05, "loss": 0.7087, "step": 1618 }, { "epoch": 0.28, "grad_norm": 0.7444445257799376, "learning_rate": 1.6808366700346787e-05, "loss": 0.7437, "step": 1619 }, { "epoch": 0.28, "grad_norm": 0.7355356271507535, "learning_rate": 1.6804205390283174e-05, "loss": 0.7501, "step": 1620 }, { "epoch": 0.28, "grad_norm": 0.7101921758292455, "learning_rate": 1.680004188504111e-05, "loss": 0.7358, "step": 1621 }, { "epoch": 0.28, "grad_norm": 0.8904989296285332, "learning_rate": 1.6795876185963824e-05, "loss": 0.8251, "step": 1622 }, { "epoch": 0.28, "grad_norm": 0.7896349901388369, "learning_rate": 1.6791708294395258e-05, "loss": 0.7631, "step": 1623 }, { "epoch": 0.28, "grad_norm": 0.7552480957372827, "learning_rate": 1.678753821168006e-05, "loss": 0.803, "step": 1624 }, { "epoch": 0.28, "grad_norm": 0.7763280897667751, "learning_rate": 1.6783365939163593e-05, "loss": 0.7836, "step": 1625 }, { "epoch": 0.29, "grad_norm": 0.6642419333742102, "learning_rate": 1.6779191478191904e-05, "loss": 0.736, "step": 1626 }, { "epoch": 0.29, "grad_norm": 0.7126651687337622, "learning_rate": 1.6775014830111776e-05, "loss": 0.7208, "step": 1627 }, { "epoch": 0.29, "grad_norm": 0.6963881073043047, "learning_rate": 1.6770835996270677e-05, "loss": 0.7298, "step": 1628 }, { "epoch": 0.29, "grad_norm": 0.6240590960553469, "learning_rate": 1.6766654978016785e-05, "loss": 0.7166, "step": 1629 }, { "epoch": 0.29, "grad_norm": 0.7201863433825337, "learning_rate": 1.6762471776698982e-05, "loss": 0.7122, "step": 1630 }, { "epoch": 0.29, "grad_norm": 0.7402702031759963, "learning_rate": 1.675828639366686e-05, "loss": 0.7916, "step": 1631 }, { "epoch": 0.29, "grad_norm": 0.7473629766098271, "learning_rate": 1.6754098830270708e-05, "loss": 0.7277, "step": 1632 }, { "epoch": 0.29, "grad_norm": 0.7348579180440732, "learning_rate": 1.6749909087861523e-05, "loss": 0.7306, "step": 1633 }, { "epoch": 0.29, "grad_norm": 0.7101696649633948, "learning_rate": 1.6745717167791004e-05, "loss": 0.7144, "step": 1634 }, { "epoch": 0.29, "grad_norm": 0.7258142783869698, "learning_rate": 1.6741523071411547e-05, "loss": 0.7011, "step": 1635 }, { "epoch": 0.29, "grad_norm": 0.734560223125416, "learning_rate": 1.673732680007626e-05, "loss": 0.7505, "step": 1636 }, { "epoch": 0.29, "grad_norm": 0.7120035540876785, "learning_rate": 1.6733128355138943e-05, "loss": 0.7394, "step": 1637 }, { "epoch": 0.29, "grad_norm": 0.7271378541363523, "learning_rate": 1.672892773795411e-05, "loss": 0.6997, "step": 1638 }, { "epoch": 0.29, "grad_norm": 0.7024325870954093, "learning_rate": 1.672472494987696e-05, "loss": 0.687, "step": 1639 }, { "epoch": 0.29, "grad_norm": 0.7579682067576949, "learning_rate": 1.67205199922634e-05, "loss": 0.743, "step": 1640 }, { "epoch": 0.29, "grad_norm": 0.8125761797191711, "learning_rate": 1.671631286647004e-05, "loss": 0.8209, "step": 1641 }, { "epoch": 0.29, "grad_norm": 0.6781132540256432, "learning_rate": 1.6712103573854187e-05, "loss": 0.7049, "step": 1642 }, { "epoch": 0.29, "grad_norm": 0.7535275523031032, "learning_rate": 1.6707892115773843e-05, "loss": 0.6668, "step": 1643 }, { "epoch": 0.29, "grad_norm": 0.6609082144171392, "learning_rate": 1.6703678493587715e-05, "loss": 0.7463, "step": 1644 }, { "epoch": 0.29, "grad_norm": 0.7038413993922519, "learning_rate": 1.6699462708655202e-05, "loss": 0.7179, "step": 1645 }, { "epoch": 0.29, "grad_norm": 0.6971056159646477, "learning_rate": 1.669524476233641e-05, "loss": 0.6888, "step": 1646 }, { "epoch": 0.29, "grad_norm": 0.772959231337151, "learning_rate": 1.6691024655992132e-05, "loss": 0.7505, "step": 1647 }, { "epoch": 0.29, "grad_norm": 0.7801239305515716, "learning_rate": 1.6686802390983862e-05, "loss": 0.7623, "step": 1648 }, { "epoch": 0.29, "grad_norm": 0.7595964038715058, "learning_rate": 1.6682577968673796e-05, "loss": 0.7141, "step": 1649 }, { "epoch": 0.29, "grad_norm": 0.7260209836075087, "learning_rate": 1.6678351390424813e-05, "loss": 0.7226, "step": 1650 }, { "epoch": 0.29, "grad_norm": 0.7766267572614689, "learning_rate": 1.6674122657600504e-05, "loss": 0.6778, "step": 1651 }, { "epoch": 0.29, "grad_norm": 0.7568440786939926, "learning_rate": 1.666989177156513e-05, "loss": 0.6943, "step": 1652 }, { "epoch": 0.29, "grad_norm": 0.6487677046115865, "learning_rate": 1.6665658733683683e-05, "loss": 0.7474, "step": 1653 }, { "epoch": 0.29, "grad_norm": 0.6928402635360607, "learning_rate": 1.6661423545321816e-05, "loss": 0.7263, "step": 1654 }, { "epoch": 0.29, "grad_norm": 0.8233435789549827, "learning_rate": 1.66571862078459e-05, "loss": 0.7067, "step": 1655 }, { "epoch": 0.29, "grad_norm": 0.6821024543915482, "learning_rate": 1.6652946722622978e-05, "loss": 0.7709, "step": 1656 }, { "epoch": 0.29, "grad_norm": 0.8410084692521922, "learning_rate": 1.6648705091020798e-05, "loss": 0.7679, "step": 1657 }, { "epoch": 0.29, "grad_norm": 0.7426333303358318, "learning_rate": 1.6644461314407805e-05, "loss": 0.729, "step": 1658 }, { "epoch": 0.29, "grad_norm": 0.7889411508218517, "learning_rate": 1.6640215394153122e-05, "loss": 0.7413, "step": 1659 }, { "epoch": 0.29, "grad_norm": 0.6917348594152969, "learning_rate": 1.6635967331626573e-05, "loss": 0.7208, "step": 1660 }, { "epoch": 0.29, "grad_norm": 0.704703011565691, "learning_rate": 1.663171712819868e-05, "loss": 0.7256, "step": 1661 }, { "epoch": 0.29, "grad_norm": 0.7063055871961792, "learning_rate": 1.6627464785240635e-05, "loss": 0.7474, "step": 1662 }, { "epoch": 0.29, "grad_norm": 0.6224048624233247, "learning_rate": 1.6623210304124338e-05, "loss": 0.6814, "step": 1663 }, { "epoch": 0.29, "grad_norm": 0.768407013325527, "learning_rate": 1.661895368622238e-05, "loss": 0.7453, "step": 1664 }, { "epoch": 0.29, "grad_norm": 0.8390401987266851, "learning_rate": 1.6614694932908023e-05, "loss": 0.7501, "step": 1665 }, { "epoch": 0.29, "grad_norm": 0.773486895801635, "learning_rate": 1.6610434045555233e-05, "loss": 0.7707, "step": 1666 }, { "epoch": 0.29, "grad_norm": 0.8478274760007114, "learning_rate": 1.6606171025538668e-05, "loss": 0.7331, "step": 1667 }, { "epoch": 0.29, "grad_norm": 0.7425157618372791, "learning_rate": 1.6601905874233657e-05, "loss": 0.724, "step": 1668 }, { "epoch": 0.29, "grad_norm": 0.7584963882422961, "learning_rate": 1.6597638593016232e-05, "loss": 0.7767, "step": 1669 }, { "epoch": 0.29, "grad_norm": 0.7240817320587802, "learning_rate": 1.6593369183263108e-05, "loss": 0.6879, "step": 1670 }, { "epoch": 0.29, "grad_norm": 0.7682152552110898, "learning_rate": 1.6589097646351684e-05, "loss": 0.7812, "step": 1671 }, { "epoch": 0.29, "grad_norm": 0.740321364874834, "learning_rate": 1.6584823983660046e-05, "loss": 0.7367, "step": 1672 }, { "epoch": 0.29, "grad_norm": 0.7130933325771466, "learning_rate": 1.6580548196566968e-05, "loss": 0.7348, "step": 1673 }, { "epoch": 0.29, "grad_norm": 0.71650464744774, "learning_rate": 1.6576270286451906e-05, "loss": 0.7212, "step": 1674 }, { "epoch": 0.29, "grad_norm": 0.7221531594872036, "learning_rate": 1.6571990254695004e-05, "loss": 0.7238, "step": 1675 }, { "epoch": 0.29, "grad_norm": 0.752270630844889, "learning_rate": 1.656770810267709e-05, "loss": 0.7667, "step": 1676 }, { "epoch": 0.29, "grad_norm": 1.1728964849047407, "learning_rate": 1.6563423831779677e-05, "loss": 0.8088, "step": 1677 }, { "epoch": 0.29, "grad_norm": 0.7287433116129227, "learning_rate": 1.6559137443384953e-05, "loss": 0.7027, "step": 1678 }, { "epoch": 0.29, "grad_norm": 0.6328130486829807, "learning_rate": 1.655484893887581e-05, "loss": 0.7031, "step": 1679 }, { "epoch": 0.29, "grad_norm": 0.6598246551596837, "learning_rate": 1.6550558319635793e-05, "loss": 0.7291, "step": 1680 }, { "epoch": 0.29, "grad_norm": 0.6714068608599545, "learning_rate": 1.6546265587049156e-05, "loss": 0.7277, "step": 1681 }, { "epoch": 0.29, "grad_norm": 0.7443357850753283, "learning_rate": 1.654197074250082e-05, "loss": 0.764, "step": 1682 }, { "epoch": 0.3, "grad_norm": 0.6498067150362423, "learning_rate": 1.6537673787376395e-05, "loss": 0.717, "step": 1683 }, { "epoch": 0.3, "grad_norm": 0.6778213292011972, "learning_rate": 1.6533374723062163e-05, "loss": 0.7467, "step": 1684 }, { "epoch": 0.3, "grad_norm": 0.7315422993731058, "learning_rate": 1.652907355094509e-05, "loss": 0.756, "step": 1685 }, { "epoch": 0.3, "grad_norm": 0.7227595232780747, "learning_rate": 1.652477027241283e-05, "loss": 0.7405, "step": 1686 }, { "epoch": 0.3, "grad_norm": 0.7367975061886926, "learning_rate": 1.652046488885371e-05, "loss": 0.7187, "step": 1687 }, { "epoch": 0.3, "grad_norm": 0.7966224112175703, "learning_rate": 1.6516157401656734e-05, "loss": 0.7864, "step": 1688 }, { "epoch": 0.3, "grad_norm": 0.8004063644313205, "learning_rate": 1.6511847812211584e-05, "loss": 0.7136, "step": 1689 }, { "epoch": 0.3, "grad_norm": 0.6686875947192994, "learning_rate": 1.650753612190863e-05, "loss": 0.7125, "step": 1690 }, { "epoch": 0.3, "grad_norm": 0.666600877653839, "learning_rate": 1.650322233213891e-05, "loss": 0.7145, "step": 1691 }, { "epoch": 0.3, "grad_norm": 0.693964098426029, "learning_rate": 1.6498906444294146e-05, "loss": 0.7187, "step": 1692 }, { "epoch": 0.3, "grad_norm": 0.7207561822274463, "learning_rate": 1.6494588459766726e-05, "loss": 0.7481, "step": 1693 }, { "epoch": 0.3, "grad_norm": 0.6890281770516284, "learning_rate": 1.6490268379949723e-05, "loss": 0.6678, "step": 1694 }, { "epoch": 0.3, "grad_norm": 0.7150397868941992, "learning_rate": 1.648594620623689e-05, "loss": 0.6987, "step": 1695 }, { "epoch": 0.3, "grad_norm": 0.7655246240205092, "learning_rate": 1.648162194002265e-05, "loss": 0.7136, "step": 1696 }, { "epoch": 0.3, "grad_norm": 0.7042608858700523, "learning_rate": 1.6477295582702098e-05, "loss": 0.7379, "step": 1697 }, { "epoch": 0.3, "grad_norm": 0.7191012484424234, "learning_rate": 1.6472967135671008e-05, "loss": 0.6722, "step": 1698 }, { "epoch": 0.3, "grad_norm": 0.7925961584562499, "learning_rate": 1.6468636600325828e-05, "loss": 0.719, "step": 1699 }, { "epoch": 0.3, "grad_norm": 0.7559729973916803, "learning_rate": 1.6464303978063675e-05, "loss": 0.7063, "step": 1700 }, { "epoch": 0.3, "grad_norm": 0.6648583697672539, "learning_rate": 1.6459969270282352e-05, "loss": 0.74, "step": 1701 }, { "epoch": 0.3, "grad_norm": 0.8605287542460726, "learning_rate": 1.6455632478380322e-05, "loss": 0.8358, "step": 1702 }, { "epoch": 0.3, "grad_norm": 0.7140178239722209, "learning_rate": 1.6451293603756716e-05, "loss": 0.725, "step": 1703 }, { "epoch": 0.3, "grad_norm": 0.6863114051040952, "learning_rate": 1.644695264781136e-05, "loss": 0.7607, "step": 1704 }, { "epoch": 0.3, "grad_norm": 0.788387083753253, "learning_rate": 1.644260961194472e-05, "loss": 0.7554, "step": 1705 }, { "epoch": 0.3, "grad_norm": 0.6330643588837368, "learning_rate": 1.6438264497557968e-05, "loss": 0.6553, "step": 1706 }, { "epoch": 0.3, "grad_norm": 0.7587706413945704, "learning_rate": 1.6433917306052918e-05, "loss": 0.7357, "step": 1707 }, { "epoch": 0.3, "grad_norm": 0.6162192453933829, "learning_rate": 1.642956803883206e-05, "loss": 0.7095, "step": 1708 }, { "epoch": 0.3, "grad_norm": 0.7225334300031716, "learning_rate": 1.6425216697298573e-05, "loss": 0.775, "step": 1709 }, { "epoch": 0.3, "grad_norm": 0.7697485214289547, "learning_rate": 1.6420863282856278e-05, "loss": 0.7431, "step": 1710 }, { "epoch": 0.3, "grad_norm": 0.6993716286086523, "learning_rate": 1.641650779690968e-05, "loss": 0.7189, "step": 1711 }, { "epoch": 0.3, "grad_norm": 0.8108728136119241, "learning_rate": 1.6412150240863954e-05, "loss": 0.7692, "step": 1712 }, { "epoch": 0.3, "grad_norm": 0.7325571249128215, "learning_rate": 1.6407790616124934e-05, "loss": 0.7721, "step": 1713 }, { "epoch": 0.3, "grad_norm": 0.6597150967883952, "learning_rate": 1.6403428924099125e-05, "loss": 0.7727, "step": 1714 }, { "epoch": 0.3, "grad_norm": 0.640553891083603, "learning_rate": 1.6399065166193703e-05, "loss": 0.7493, "step": 1715 }, { "epoch": 0.3, "grad_norm": 0.7416221478991782, "learning_rate": 1.6394699343816506e-05, "loss": 0.7499, "step": 1716 }, { "epoch": 0.3, "grad_norm": 0.7640916454024901, "learning_rate": 1.639033145837604e-05, "loss": 0.7118, "step": 1717 }, { "epoch": 0.3, "grad_norm": 0.7124771334698424, "learning_rate": 1.6385961511281473e-05, "loss": 0.7162, "step": 1718 }, { "epoch": 0.3, "grad_norm": 0.7116583009797277, "learning_rate": 1.6381589503942644e-05, "loss": 0.7479, "step": 1719 }, { "epoch": 0.3, "grad_norm": 0.715054055589945, "learning_rate": 1.6377215437770057e-05, "loss": 0.7698, "step": 1720 }, { "epoch": 0.3, "grad_norm": 0.6569380574670167, "learning_rate": 1.6372839314174868e-05, "loss": 0.7244, "step": 1721 }, { "epoch": 0.3, "grad_norm": 0.6506291012265675, "learning_rate": 1.6368461134568908e-05, "loss": 0.7007, "step": 1722 }, { "epoch": 0.3, "grad_norm": 0.7619721550678522, "learning_rate": 1.6364080900364676e-05, "loss": 0.7927, "step": 1723 }, { "epoch": 0.3, "grad_norm": 0.6502591319375385, "learning_rate": 1.6359698612975318e-05, "loss": 0.7115, "step": 1724 }, { "epoch": 0.3, "grad_norm": 0.7182127829095467, "learning_rate": 1.635531427381466e-05, "loss": 0.7118, "step": 1725 }, { "epoch": 0.3, "grad_norm": 0.6711344114378908, "learning_rate": 1.6350927884297173e-05, "loss": 0.7851, "step": 1726 }, { "epoch": 0.3, "grad_norm": 0.6289528604737881, "learning_rate": 1.6346539445838004e-05, "loss": 0.7232, "step": 1727 }, { "epoch": 0.3, "grad_norm": 0.7947749214133506, "learning_rate": 1.634214895985295e-05, "loss": 0.7247, "step": 1728 }, { "epoch": 0.3, "grad_norm": 0.7330115254407826, "learning_rate": 1.6337756427758477e-05, "loss": 0.7799, "step": 1729 }, { "epoch": 0.3, "grad_norm": 0.6931376718343705, "learning_rate": 1.6333361850971703e-05, "loss": 0.7136, "step": 1730 }, { "epoch": 0.3, "grad_norm": 0.6913732038156273, "learning_rate": 1.6328965230910414e-05, "loss": 0.7309, "step": 1731 }, { "epoch": 0.3, "grad_norm": 0.7060402029736395, "learning_rate": 1.6324566568993047e-05, "loss": 0.6872, "step": 1732 }, { "epoch": 0.3, "grad_norm": 0.7493340581250791, "learning_rate": 1.6320165866638708e-05, "loss": 0.7165, "step": 1733 }, { "epoch": 0.3, "grad_norm": 0.6930763793306046, "learning_rate": 1.6315763125267153e-05, "loss": 0.7072, "step": 1734 }, { "epoch": 0.3, "grad_norm": 0.7448265361045131, "learning_rate": 1.6311358346298795e-05, "loss": 0.7251, "step": 1735 }, { "epoch": 0.3, "grad_norm": 0.7381536068940797, "learning_rate": 1.630695153115471e-05, "loss": 0.7044, "step": 1736 }, { "epoch": 0.3, "grad_norm": 0.6770168900725423, "learning_rate": 1.630254268125663e-05, "loss": 0.719, "step": 1737 }, { "epoch": 0.3, "grad_norm": 0.7592210220469693, "learning_rate": 1.629813179802694e-05, "loss": 0.7115, "step": 1738 }, { "epoch": 0.3, "grad_norm": 0.7779660715209328, "learning_rate": 1.6293718882888684e-05, "loss": 0.7322, "step": 1739 }, { "epoch": 0.31, "grad_norm": 0.7178854885322038, "learning_rate": 1.6289303937265556e-05, "loss": 0.7194, "step": 1740 }, { "epoch": 0.31, "grad_norm": 0.7604733486913685, "learning_rate": 1.628488696258191e-05, "loss": 0.7295, "step": 1741 }, { "epoch": 0.31, "grad_norm": 0.7463515595636703, "learning_rate": 1.628046796026276e-05, "loss": 0.7379, "step": 1742 }, { "epoch": 0.31, "grad_norm": 0.7732800282356235, "learning_rate": 1.627604693173377e-05, "loss": 0.7192, "step": 1743 }, { "epoch": 0.31, "grad_norm": 0.6913571439044297, "learning_rate": 1.6271623878421242e-05, "loss": 0.71, "step": 1744 }, { "epoch": 0.31, "grad_norm": 0.6913839824125327, "learning_rate": 1.626719880175216e-05, "loss": 0.7234, "step": 1745 }, { "epoch": 0.31, "grad_norm": 0.6836355493156644, "learning_rate": 1.626277170315414e-05, "loss": 0.7289, "step": 1746 }, { "epoch": 0.31, "grad_norm": 0.7860085010800116, "learning_rate": 1.6258342584055453e-05, "loss": 0.7495, "step": 1747 }, { "epoch": 0.31, "grad_norm": 0.6672694102002024, "learning_rate": 1.6253911445885032e-05, "loss": 0.7175, "step": 1748 }, { "epoch": 0.31, "grad_norm": 0.8107172178981801, "learning_rate": 1.6249478290072447e-05, "loss": 0.7551, "step": 1749 }, { "epoch": 0.31, "grad_norm": 0.7223613050272006, "learning_rate": 1.6245043118047935e-05, "loss": 0.741, "step": 1750 }, { "epoch": 0.31, "grad_norm": 0.6603456255676778, "learning_rate": 1.624060593124237e-05, "loss": 0.7123, "step": 1751 }, { "epoch": 0.31, "grad_norm": 0.6434660468058411, "learning_rate": 1.623616673108728e-05, "loss": 0.6837, "step": 1752 }, { "epoch": 0.31, "grad_norm": 0.6897128818735523, "learning_rate": 1.623172551901484e-05, "loss": 0.7361, "step": 1753 }, { "epoch": 0.31, "grad_norm": 0.7524854793400989, "learning_rate": 1.622728229645789e-05, "loss": 0.6818, "step": 1754 }, { "epoch": 0.31, "grad_norm": 0.7306830353415686, "learning_rate": 1.6222837064849894e-05, "loss": 0.7638, "step": 1755 }, { "epoch": 0.31, "grad_norm": 0.8101543819411311, "learning_rate": 1.6218389825624983e-05, "loss": 0.7805, "step": 1756 }, { "epoch": 0.31, "grad_norm": 0.7293511334904239, "learning_rate": 1.6213940580217925e-05, "loss": 0.7424, "step": 1757 }, { "epoch": 0.31, "grad_norm": 0.8009363405311527, "learning_rate": 1.6209489330064143e-05, "loss": 0.7927, "step": 1758 }, { "epoch": 0.31, "grad_norm": 0.6663656805281886, "learning_rate": 1.62050360765997e-05, "loss": 0.7326, "step": 1759 }, { "epoch": 0.31, "grad_norm": 0.6302519211047121, "learning_rate": 1.620058082126131e-05, "loss": 0.7065, "step": 1760 }, { "epoch": 0.31, "grad_norm": 0.75955129822271, "learning_rate": 1.6196123565486333e-05, "loss": 0.7256, "step": 1761 }, { "epoch": 0.31, "grad_norm": 0.7000663692555863, "learning_rate": 1.619166431071277e-05, "loss": 0.731, "step": 1762 }, { "epoch": 0.31, "grad_norm": 0.9874024724611322, "learning_rate": 1.6187203058379267e-05, "loss": 0.7939, "step": 1763 }, { "epoch": 0.31, "grad_norm": 0.7570545706045478, "learning_rate": 1.6182739809925122e-05, "loss": 0.7517, "step": 1764 }, { "epoch": 0.31, "grad_norm": 0.8623351233128626, "learning_rate": 1.617827456679027e-05, "loss": 0.7095, "step": 1765 }, { "epoch": 0.31, "grad_norm": 0.6606789719937634, "learning_rate": 1.617380733041529e-05, "loss": 0.7731, "step": 1766 }, { "epoch": 0.31, "grad_norm": 0.7799008742784628, "learning_rate": 1.616933810224141e-05, "loss": 0.713, "step": 1767 }, { "epoch": 0.31, "grad_norm": 0.8010628399392395, "learning_rate": 1.616486688371049e-05, "loss": 0.7376, "step": 1768 }, { "epoch": 0.31, "grad_norm": 0.7052688932719632, "learning_rate": 1.616039367626504e-05, "loss": 0.7311, "step": 1769 }, { "epoch": 0.31, "grad_norm": 0.7867384094525514, "learning_rate": 1.6155918481348215e-05, "loss": 0.7413, "step": 1770 }, { "epoch": 0.31, "grad_norm": 0.7488765095728127, "learning_rate": 1.61514413004038e-05, "loss": 0.7149, "step": 1771 }, { "epoch": 0.31, "grad_norm": 0.7306998037045603, "learning_rate": 1.6146962134876233e-05, "loss": 0.7056, "step": 1772 }, { "epoch": 0.31, "grad_norm": 0.8402467445612952, "learning_rate": 1.6142480986210576e-05, "loss": 0.7052, "step": 1773 }, { "epoch": 0.31, "grad_norm": 0.7405972340899858, "learning_rate": 1.6137997855852555e-05, "loss": 0.7326, "step": 1774 }, { "epoch": 0.31, "grad_norm": 0.696102308494348, "learning_rate": 1.6133512745248508e-05, "loss": 0.6985, "step": 1775 }, { "epoch": 0.31, "grad_norm": 0.6783367011186321, "learning_rate": 1.6129025655845433e-05, "loss": 0.7351, "step": 1776 }, { "epoch": 0.31, "grad_norm": 0.753424798346466, "learning_rate": 1.6124536589090957e-05, "loss": 0.749, "step": 1777 }, { "epoch": 0.31, "grad_norm": 0.7063233657980029, "learning_rate": 1.6120045546433346e-05, "loss": 0.7216, "step": 1778 }, { "epoch": 0.31, "grad_norm": 0.6888697756106884, "learning_rate": 1.61155525293215e-05, "loss": 0.7481, "step": 1779 }, { "epoch": 0.31, "grad_norm": 0.757688665550209, "learning_rate": 1.611105753920497e-05, "loss": 0.7202, "step": 1780 }, { "epoch": 0.31, "grad_norm": 0.7288909804705143, "learning_rate": 1.6106560577533924e-05, "loss": 0.7308, "step": 1781 }, { "epoch": 0.31, "grad_norm": 0.746386787838614, "learning_rate": 1.610206164575918e-05, "loss": 0.7809, "step": 1782 }, { "epoch": 0.31, "grad_norm": 0.7914935554844562, "learning_rate": 1.6097560745332185e-05, "loss": 0.7916, "step": 1783 }, { "epoch": 0.31, "grad_norm": 0.6158347930678214, "learning_rate": 1.6093057877705022e-05, "loss": 0.7014, "step": 1784 }, { "epoch": 0.31, "grad_norm": 0.6579478939132576, "learning_rate": 1.6088553044330413e-05, "loss": 0.7371, "step": 1785 }, { "epoch": 0.31, "grad_norm": 0.7348489574326306, "learning_rate": 1.6084046246661713e-05, "loss": 0.6912, "step": 1786 }, { "epoch": 0.31, "grad_norm": 0.7602788909693398, "learning_rate": 1.6079537486152905e-05, "loss": 0.7739, "step": 1787 }, { "epoch": 0.31, "grad_norm": 0.7950431895820315, "learning_rate": 1.607502676425861e-05, "loss": 0.8039, "step": 1788 }, { "epoch": 0.31, "grad_norm": 0.6996545782145634, "learning_rate": 1.607051408243408e-05, "loss": 0.7114, "step": 1789 }, { "epoch": 0.31, "grad_norm": 0.7570830982769285, "learning_rate": 1.6065999442135205e-05, "loss": 0.735, "step": 1790 }, { "epoch": 0.31, "grad_norm": 0.7196048047770995, "learning_rate": 1.6061482844818494e-05, "loss": 0.7146, "step": 1791 }, { "epoch": 0.31, "grad_norm": 0.7409519032180568, "learning_rate": 1.60569642919411e-05, "loss": 0.7427, "step": 1792 }, { "epoch": 0.31, "grad_norm": 0.7705215863517672, "learning_rate": 1.6052443784960808e-05, "loss": 0.7093, "step": 1793 }, { "epoch": 0.31, "grad_norm": 0.7397477327670193, "learning_rate": 1.6047921325336014e-05, "loss": 0.7174, "step": 1794 }, { "epoch": 0.31, "grad_norm": 0.6877335715071068, "learning_rate": 1.604339691452577e-05, "loss": 0.6613, "step": 1795 }, { "epoch": 0.31, "grad_norm": 0.7630886499275451, "learning_rate": 1.603887055398974e-05, "loss": 0.7512, "step": 1796 }, { "epoch": 0.32, "grad_norm": 0.7121124545711951, "learning_rate": 1.6034342245188223e-05, "loss": 0.6916, "step": 1797 }, { "epoch": 0.32, "grad_norm": 0.6903376607364442, "learning_rate": 1.602981198958215e-05, "loss": 0.7209, "step": 1798 }, { "epoch": 0.32, "grad_norm": 0.7123685524298092, "learning_rate": 1.6025279788633064e-05, "loss": 0.7056, "step": 1799 }, { "epoch": 0.32, "grad_norm": 0.7208122684814061, "learning_rate": 1.602074564380316e-05, "loss": 0.7485, "step": 1800 }, { "epoch": 0.32, "grad_norm": 0.7070969046975417, "learning_rate": 1.6016209556555238e-05, "loss": 0.6748, "step": 1801 }, { "epoch": 0.32, "grad_norm": 0.7213650416987847, "learning_rate": 1.6011671528352747e-05, "loss": 0.7486, "step": 1802 }, { "epoch": 0.32, "grad_norm": 0.7424783522380406, "learning_rate": 1.6007131560659733e-05, "loss": 0.707, "step": 1803 }, { "epoch": 0.32, "grad_norm": 0.7838450411753418, "learning_rate": 1.60025896549409e-05, "loss": 0.7372, "step": 1804 }, { "epoch": 0.32, "grad_norm": 0.7481797504678842, "learning_rate": 1.5998045812661548e-05, "loss": 0.7737, "step": 1805 }, { "epoch": 0.32, "grad_norm": 0.7708799889714664, "learning_rate": 1.5993500035287625e-05, "loss": 0.8077, "step": 1806 }, { "epoch": 0.32, "grad_norm": 0.6450368806488394, "learning_rate": 1.598895232428569e-05, "loss": 0.7101, "step": 1807 }, { "epoch": 0.32, "grad_norm": 0.7050060470066803, "learning_rate": 1.5984402681122928e-05, "loss": 0.7393, "step": 1808 }, { "epoch": 0.32, "grad_norm": 0.6980612517234095, "learning_rate": 1.597985110726715e-05, "loss": 0.7365, "step": 1809 }, { "epoch": 0.32, "grad_norm": 0.7003942417784108, "learning_rate": 1.597529760418679e-05, "loss": 0.7067, "step": 1810 }, { "epoch": 0.32, "grad_norm": 0.7220506087331539, "learning_rate": 1.59707421733509e-05, "loss": 0.6977, "step": 1811 }, { "epoch": 0.32, "grad_norm": 0.6927502013402784, "learning_rate": 1.5966184816229157e-05, "loss": 0.7299, "step": 1812 }, { "epoch": 0.32, "grad_norm": 0.6954087513604053, "learning_rate": 1.5961625534291864e-05, "loss": 0.733, "step": 1813 }, { "epoch": 0.32, "grad_norm": 0.7894605940368379, "learning_rate": 1.5957064329009933e-05, "loss": 0.7254, "step": 1814 }, { "epoch": 0.32, "grad_norm": 0.7507154203872005, "learning_rate": 1.595250120185491e-05, "loss": 0.7562, "step": 1815 }, { "epoch": 0.32, "grad_norm": 0.7039814712983276, "learning_rate": 1.5947936154298956e-05, "loss": 0.7391, "step": 1816 }, { "epoch": 0.32, "grad_norm": 0.6913019691460912, "learning_rate": 1.5943369187814844e-05, "loss": 0.7194, "step": 1817 }, { "epoch": 0.32, "grad_norm": 0.7604233172401739, "learning_rate": 1.5938800303875977e-05, "loss": 0.7916, "step": 1818 }, { "epoch": 0.32, "grad_norm": 0.6959374460045886, "learning_rate": 1.593422950395637e-05, "loss": 0.7229, "step": 1819 }, { "epoch": 0.32, "grad_norm": 0.874538464836645, "learning_rate": 1.592965678953066e-05, "loss": 0.8021, "step": 1820 }, { "epoch": 0.32, "grad_norm": 0.7899716059360099, "learning_rate": 1.5925082162074098e-05, "loss": 0.7266, "step": 1821 }, { "epoch": 0.32, "grad_norm": 0.7566789049973051, "learning_rate": 1.592050562306256e-05, "loss": 0.6934, "step": 1822 }, { "epoch": 0.32, "grad_norm": 1.046205518056806, "learning_rate": 1.5915927173972522e-05, "loss": 0.6899, "step": 1823 }, { "epoch": 0.32, "grad_norm": 0.8462858148517756, "learning_rate": 1.5911346816281098e-05, "loss": 0.7433, "step": 1824 }, { "epoch": 0.32, "grad_norm": 0.6686454640592628, "learning_rate": 1.5906764551466004e-05, "loss": 0.7625, "step": 1825 }, { "epoch": 0.32, "grad_norm": 0.8001985458688817, "learning_rate": 1.5902180381005567e-05, "loss": 0.7442, "step": 1826 }, { "epoch": 0.32, "grad_norm": 0.8469033585100983, "learning_rate": 1.5897594306378748e-05, "loss": 0.741, "step": 1827 }, { "epoch": 0.32, "grad_norm": 0.6878540434542597, "learning_rate": 1.58930063290651e-05, "loss": 0.7228, "step": 1828 }, { "epoch": 0.32, "grad_norm": 0.8069132408469945, "learning_rate": 1.5888416450544803e-05, "loss": 0.7069, "step": 1829 }, { "epoch": 0.32, "grad_norm": 0.9409174251735448, "learning_rate": 1.588382467229865e-05, "loss": 0.8068, "step": 1830 }, { "epoch": 0.32, "grad_norm": 0.7057450979027633, "learning_rate": 1.587923099580804e-05, "loss": 0.728, "step": 1831 }, { "epoch": 0.32, "grad_norm": 0.6786751503702344, "learning_rate": 1.587463542255499e-05, "loss": 0.732, "step": 1832 }, { "epoch": 0.32, "grad_norm": 0.7261416670575954, "learning_rate": 1.5870037954022133e-05, "loss": 0.7232, "step": 1833 }, { "epoch": 0.32, "grad_norm": 0.8303550128306896, "learning_rate": 1.5865438591692703e-05, "loss": 0.7004, "step": 1834 }, { "epoch": 0.32, "grad_norm": 0.6673332073985466, "learning_rate": 1.586083733705055e-05, "loss": 0.7075, "step": 1835 }, { "epoch": 0.32, "grad_norm": 0.6614710822670398, "learning_rate": 1.585623419158014e-05, "loss": 0.7283, "step": 1836 }, { "epoch": 0.32, "grad_norm": 0.6825363944859301, "learning_rate": 1.5851629156766532e-05, "loss": 0.6827, "step": 1837 }, { "epoch": 0.32, "grad_norm": 0.6882758456232992, "learning_rate": 1.584702223409542e-05, "loss": 0.7116, "step": 1838 }, { "epoch": 0.32, "grad_norm": 0.9166312494604072, "learning_rate": 1.5842413425053088e-05, "loss": 0.759, "step": 1839 }, { "epoch": 0.32, "grad_norm": 0.6223715244954526, "learning_rate": 1.5837802731126427e-05, "loss": 0.7012, "step": 1840 }, { "epoch": 0.32, "grad_norm": 0.6672993505492546, "learning_rate": 1.5833190153802954e-05, "loss": 0.7204, "step": 1841 }, { "epoch": 0.32, "grad_norm": 0.7491882954663822, "learning_rate": 1.5828575694570772e-05, "loss": 0.7871, "step": 1842 }, { "epoch": 0.32, "grad_norm": 0.6993911648053436, "learning_rate": 1.5823959354918613e-05, "loss": 0.7204, "step": 1843 }, { "epoch": 0.32, "grad_norm": 0.7548555853583049, "learning_rate": 1.5819341136335796e-05, "loss": 0.7434, "step": 1844 }, { "epoch": 0.32, "grad_norm": 0.6480823123626158, "learning_rate": 1.5814721040312257e-05, "loss": 0.7134, "step": 1845 }, { "epoch": 0.32, "grad_norm": 0.6712698209810227, "learning_rate": 1.5810099068338534e-05, "loss": 0.7335, "step": 1846 }, { "epoch": 0.32, "grad_norm": 0.7135434205251908, "learning_rate": 1.5805475221905774e-05, "loss": 0.7389, "step": 1847 }, { "epoch": 0.32, "grad_norm": 0.675496143415033, "learning_rate": 1.5800849502505725e-05, "loss": 0.6774, "step": 1848 }, { "epoch": 0.32, "grad_norm": 0.7074830547186598, "learning_rate": 1.579622191163074e-05, "loss": 0.7689, "step": 1849 }, { "epoch": 0.32, "grad_norm": 0.6653635070100244, "learning_rate": 1.5791592450773778e-05, "loss": 0.685, "step": 1850 }, { "epoch": 0.32, "grad_norm": 0.666433714010464, "learning_rate": 1.5786961121428397e-05, "loss": 0.7351, "step": 1851 }, { "epoch": 0.32, "grad_norm": 0.6992225669256741, "learning_rate": 1.578232792508876e-05, "loss": 0.7196, "step": 1852 }, { "epoch": 0.32, "grad_norm": 0.7382585380595815, "learning_rate": 1.5777692863249638e-05, "loss": 0.7444, "step": 1853 }, { "epoch": 0.33, "grad_norm": 0.6451709835997319, "learning_rate": 1.5773055937406392e-05, "loss": 0.6944, "step": 1854 }, { "epoch": 0.33, "grad_norm": 0.7311375018139137, "learning_rate": 1.5768417149054996e-05, "loss": 0.7676, "step": 1855 }, { "epoch": 0.33, "grad_norm": 0.7348730114509042, "learning_rate": 1.5763776499692018e-05, "loss": 0.7454, "step": 1856 }, { "epoch": 0.33, "grad_norm": 0.7344992071653852, "learning_rate": 1.5759133990814625e-05, "loss": 0.7286, "step": 1857 }, { "epoch": 0.33, "grad_norm": 0.6633425650443611, "learning_rate": 1.5754489623920593e-05, "loss": 0.7243, "step": 1858 }, { "epoch": 0.33, "grad_norm": 0.6468479022523999, "learning_rate": 1.5749843400508284e-05, "loss": 0.6727, "step": 1859 }, { "epoch": 0.33, "grad_norm": 0.6726295576995787, "learning_rate": 1.5745195322076674e-05, "loss": 0.7175, "step": 1860 }, { "epoch": 0.33, "grad_norm": 0.6324150461623673, "learning_rate": 1.5740545390125326e-05, "loss": 0.7005, "step": 1861 }, { "epoch": 0.33, "grad_norm": 0.658118438880032, "learning_rate": 1.5735893606154406e-05, "loss": 0.6631, "step": 1862 }, { "epoch": 0.33, "grad_norm": 0.6844170147641135, "learning_rate": 1.5731239971664677e-05, "loss": 0.7419, "step": 1863 }, { "epoch": 0.33, "grad_norm": 0.7115752301142595, "learning_rate": 1.5726584488157497e-05, "loss": 0.7699, "step": 1864 }, { "epoch": 0.33, "grad_norm": 0.7685728732666665, "learning_rate": 1.5721927157134823e-05, "loss": 0.7474, "step": 1865 }, { "epoch": 0.33, "grad_norm": 0.6774957363450368, "learning_rate": 1.5717267980099203e-05, "loss": 0.7398, "step": 1866 }, { "epoch": 0.33, "grad_norm": 0.6060622974264642, "learning_rate": 1.5712606958553792e-05, "loss": 0.6749, "step": 1867 }, { "epoch": 0.33, "grad_norm": 0.7848923765487986, "learning_rate": 1.5707944094002334e-05, "loss": 0.7753, "step": 1868 }, { "epoch": 0.33, "grad_norm": 0.7708765768277887, "learning_rate": 1.5703279387949152e-05, "loss": 0.7646, "step": 1869 }, { "epoch": 0.33, "grad_norm": 0.7780204285966534, "learning_rate": 1.569861284189919e-05, "loss": 0.7392, "step": 1870 }, { "epoch": 0.33, "grad_norm": 0.7618156791908864, "learning_rate": 1.5693944457357973e-05, "loss": 0.7788, "step": 1871 }, { "epoch": 0.33, "grad_norm": 0.8346527135282461, "learning_rate": 1.5689274235831612e-05, "loss": 0.7454, "step": 1872 }, { "epoch": 0.33, "grad_norm": 0.7124248824883883, "learning_rate": 1.5684602178826823e-05, "loss": 0.7001, "step": 1873 }, { "epoch": 0.33, "grad_norm": 0.7484721132085644, "learning_rate": 1.567992828785091e-05, "loss": 0.79, "step": 1874 }, { "epoch": 0.33, "grad_norm": 0.7496242005665792, "learning_rate": 1.5675252564411762e-05, "loss": 0.7227, "step": 1875 }, { "epoch": 0.33, "grad_norm": 0.6991159475958557, "learning_rate": 1.567057501001787e-05, "loss": 0.7676, "step": 1876 }, { "epoch": 0.33, "grad_norm": 0.7329870888734562, "learning_rate": 1.5665895626178307e-05, "loss": 0.7119, "step": 1877 }, { "epoch": 0.33, "grad_norm": 0.6806241618633562, "learning_rate": 1.566121441440274e-05, "loss": 0.7216, "step": 1878 }, { "epoch": 0.33, "grad_norm": 0.6442127958060552, "learning_rate": 1.5656531376201426e-05, "loss": 0.6832, "step": 1879 }, { "epoch": 0.33, "grad_norm": 0.653422083928669, "learning_rate": 1.5651846513085218e-05, "loss": 0.7043, "step": 1880 }, { "epoch": 0.33, "grad_norm": 0.6886485136558183, "learning_rate": 1.5647159826565537e-05, "loss": 0.738, "step": 1881 }, { "epoch": 0.33, "grad_norm": 0.7666683793588472, "learning_rate": 1.5642471318154417e-05, "loss": 0.7386, "step": 1882 }, { "epoch": 0.33, "grad_norm": 0.633202347078734, "learning_rate": 1.5637780989364464e-05, "loss": 0.7106, "step": 1883 }, { "epoch": 0.33, "grad_norm": 0.8231829086653383, "learning_rate": 1.5633088841708876e-05, "loss": 0.7438, "step": 1884 }, { "epoch": 0.33, "grad_norm": 0.7229258886509021, "learning_rate": 1.5628394876701436e-05, "loss": 0.7482, "step": 1885 }, { "epoch": 0.33, "grad_norm": 0.7432100224579861, "learning_rate": 1.5623699095856518e-05, "loss": 0.7342, "step": 1886 }, { "epoch": 0.33, "grad_norm": 0.7020970741848643, "learning_rate": 1.5619001500689076e-05, "loss": 0.7249, "step": 1887 }, { "epoch": 0.33, "grad_norm": 0.6758750289680571, "learning_rate": 1.5614302092714654e-05, "loss": 0.724, "step": 1888 }, { "epoch": 0.33, "grad_norm": 0.767881074936369, "learning_rate": 1.5609600873449378e-05, "loss": 0.7817, "step": 1889 }, { "epoch": 0.33, "grad_norm": 0.8017386450213722, "learning_rate": 1.560489784440996e-05, "loss": 0.8112, "step": 1890 }, { "epoch": 0.33, "grad_norm": 0.7554469046243286, "learning_rate": 1.5600193007113695e-05, "loss": 0.7322, "step": 1891 }, { "epoch": 0.33, "grad_norm": 0.8102137822116793, "learning_rate": 1.559548636307846e-05, "loss": 0.822, "step": 1892 }, { "epoch": 0.33, "grad_norm": 0.7266622504409334, "learning_rate": 1.559077791382272e-05, "loss": 0.7331, "step": 1893 }, { "epoch": 0.33, "grad_norm": 0.8236314696978291, "learning_rate": 1.5586067660865517e-05, "loss": 0.7315, "step": 1894 }, { "epoch": 0.33, "grad_norm": 0.9073719111798381, "learning_rate": 1.5581355605726478e-05, "loss": 0.7265, "step": 1895 }, { "epoch": 0.33, "grad_norm": 0.6673654520266233, "learning_rate": 1.5576641749925802e-05, "loss": 0.7335, "step": 1896 }, { "epoch": 0.33, "grad_norm": 0.6551505165165572, "learning_rate": 1.557192609498429e-05, "loss": 0.714, "step": 1897 }, { "epoch": 0.33, "grad_norm": 0.7127133672735998, "learning_rate": 1.55672086424233e-05, "loss": 0.7067, "step": 1898 }, { "epoch": 0.33, "grad_norm": 0.5771659618549808, "learning_rate": 1.556248939376479e-05, "loss": 0.6791, "step": 1899 }, { "epoch": 0.33, "grad_norm": 0.6591285618444185, "learning_rate": 1.5557768350531278e-05, "loss": 0.6851, "step": 1900 }, { "epoch": 0.33, "grad_norm": 0.7129118595899143, "learning_rate": 1.5553045514245877e-05, "loss": 0.7347, "step": 1901 }, { "epoch": 0.33, "grad_norm": 0.7404110339990851, "learning_rate": 1.5548320886432274e-05, "loss": 0.7126, "step": 1902 }, { "epoch": 0.33, "grad_norm": 0.7820583609241064, "learning_rate": 1.5543594468614725e-05, "loss": 0.7365, "step": 1903 }, { "epoch": 0.33, "grad_norm": 0.685974100484121, "learning_rate": 1.553886626231808e-05, "loss": 0.6807, "step": 1904 }, { "epoch": 0.33, "grad_norm": 0.8421931952642077, "learning_rate": 1.5534136269067747e-05, "loss": 0.7466, "step": 1905 }, { "epoch": 0.33, "grad_norm": 0.6567549530648689, "learning_rate": 1.5529404490389727e-05, "loss": 0.7198, "step": 1906 }, { "epoch": 0.33, "grad_norm": 0.7951364700305693, "learning_rate": 1.5524670927810595e-05, "loss": 0.708, "step": 1907 }, { "epoch": 0.33, "grad_norm": 0.7444582016187891, "learning_rate": 1.5519935582857485e-05, "loss": 0.7661, "step": 1908 }, { "epoch": 0.33, "grad_norm": 0.7175075079424397, "learning_rate": 1.5515198457058126e-05, "loss": 0.7772, "step": 1909 }, { "epoch": 0.33, "grad_norm": 0.7536624564683143, "learning_rate": 1.551045955194081e-05, "loss": 0.7065, "step": 1910 }, { "epoch": 0.34, "grad_norm": 0.6984653265623155, "learning_rate": 1.5505718869034413e-05, "loss": 0.6951, "step": 1911 }, { "epoch": 0.34, "grad_norm": 0.7357531590512498, "learning_rate": 1.5500976409868368e-05, "loss": 0.7222, "step": 1912 }, { "epoch": 0.34, "grad_norm": 0.6373570860441494, "learning_rate": 1.5496232175972697e-05, "loss": 0.7594, "step": 1913 }, { "epoch": 0.34, "grad_norm": 0.7154995245785827, "learning_rate": 1.549148616887799e-05, "loss": 0.7486, "step": 1914 }, { "epoch": 0.34, "grad_norm": 0.6961924538924301, "learning_rate": 1.5486738390115402e-05, "loss": 0.7369, "step": 1915 }, { "epoch": 0.34, "grad_norm": 0.7508730863096518, "learning_rate": 1.5481988841216676e-05, "loss": 0.7383, "step": 1916 }, { "epoch": 0.34, "grad_norm": 0.6618676905826198, "learning_rate": 1.54772375237141e-05, "loss": 0.7226, "step": 1917 }, { "epoch": 0.34, "grad_norm": 0.7043906666140048, "learning_rate": 1.5472484439140566e-05, "loss": 0.7459, "step": 1918 }, { "epoch": 0.34, "grad_norm": 0.6796200011015335, "learning_rate": 1.54677295890295e-05, "loss": 0.7125, "step": 1919 }, { "epoch": 0.34, "grad_norm": 0.7180529013515841, "learning_rate": 1.546297297491493e-05, "loss": 0.6882, "step": 1920 }, { "epoch": 0.34, "grad_norm": 0.651686568274942, "learning_rate": 1.5458214598331435e-05, "loss": 0.7207, "step": 1921 }, { "epoch": 0.34, "grad_norm": 0.7944072657511791, "learning_rate": 1.5453454460814158e-05, "loss": 0.7021, "step": 1922 }, { "epoch": 0.34, "grad_norm": 0.6558657641758138, "learning_rate": 1.544869256389883e-05, "loss": 0.7264, "step": 1923 }, { "epoch": 0.34, "grad_norm": 0.6866109950791599, "learning_rate": 1.5443928909121732e-05, "loss": 0.7259, "step": 1924 }, { "epoch": 0.34, "grad_norm": 0.7134935733351867, "learning_rate": 1.543916349801972e-05, "loss": 0.6804, "step": 1925 }, { "epoch": 0.34, "grad_norm": 0.7166327369195801, "learning_rate": 1.5434396332130216e-05, "loss": 0.7399, "step": 1926 }, { "epoch": 0.34, "grad_norm": 0.7337132084009084, "learning_rate": 1.5429627412991208e-05, "loss": 0.707, "step": 1927 }, { "epoch": 0.34, "grad_norm": 0.9392821243663715, "learning_rate": 1.542485674214124e-05, "loss": 0.7547, "step": 1928 }, { "epoch": 0.34, "grad_norm": 0.682557266283373, "learning_rate": 1.542008432111944e-05, "loss": 0.7237, "step": 1929 }, { "epoch": 0.34, "grad_norm": 0.7116895072851667, "learning_rate": 1.5415310151465492e-05, "loss": 0.7576, "step": 1930 }, { "epoch": 0.34, "grad_norm": 0.6816209601869964, "learning_rate": 1.541053423471963e-05, "loss": 0.7074, "step": 1931 }, { "epoch": 0.34, "grad_norm": 0.7524473093059351, "learning_rate": 1.5405756572422674e-05, "loss": 0.7369, "step": 1932 }, { "epoch": 0.34, "grad_norm": 0.6521897815958001, "learning_rate": 1.5400977166115996e-05, "loss": 0.7522, "step": 1933 }, { "epoch": 0.34, "grad_norm": 0.8770470777387647, "learning_rate": 1.5396196017341528e-05, "loss": 0.7762, "step": 1934 }, { "epoch": 0.34, "grad_norm": 0.6871408964915432, "learning_rate": 1.5391413127641775e-05, "loss": 0.7036, "step": 1935 }, { "epoch": 0.34, "grad_norm": 0.7642985635266702, "learning_rate": 1.5386628498559794e-05, "loss": 0.6925, "step": 1936 }, { "epoch": 0.34, "grad_norm": 0.7377978677191588, "learning_rate": 1.5381842131639204e-05, "loss": 0.7665, "step": 1937 }, { "epoch": 0.34, "grad_norm": 0.6122595266159292, "learning_rate": 1.537705402842419e-05, "loss": 0.6924, "step": 1938 }, { "epoch": 0.34, "grad_norm": 0.695378768107626, "learning_rate": 1.5372264190459492e-05, "loss": 0.6968, "step": 1939 }, { "epoch": 0.34, "grad_norm": 0.5879554397453552, "learning_rate": 1.5367472619290407e-05, "loss": 0.6567, "step": 1940 }, { "epoch": 0.34, "grad_norm": 0.6850838425723593, "learning_rate": 1.5362679316462802e-05, "loss": 0.6961, "step": 1941 }, { "epoch": 0.34, "grad_norm": 0.7161293879110254, "learning_rate": 1.5357884283523097e-05, "loss": 0.7495, "step": 1942 }, { "epoch": 0.34, "grad_norm": 0.7621098860228991, "learning_rate": 1.5353087522018265e-05, "loss": 0.7762, "step": 1943 }, { "epoch": 0.34, "grad_norm": 0.7655694904627991, "learning_rate": 1.5348289033495843e-05, "loss": 0.7777, "step": 1944 }, { "epoch": 0.34, "grad_norm": 0.692966152190597, "learning_rate": 1.534348881950393e-05, "loss": 0.7319, "step": 1945 }, { "epoch": 0.34, "grad_norm": 0.732390788169402, "learning_rate": 1.533868688159116e-05, "loss": 0.7397, "step": 1946 }, { "epoch": 0.34, "grad_norm": 0.6486022347816912, "learning_rate": 1.5333883221306754e-05, "loss": 0.6892, "step": 1947 }, { "epoch": 0.34, "grad_norm": 0.6902685651215071, "learning_rate": 1.5329077840200463e-05, "loss": 0.7267, "step": 1948 }, { "epoch": 0.34, "grad_norm": 0.7869055847431982, "learning_rate": 1.5324270739822608e-05, "loss": 0.7514, "step": 1949 }, { "epoch": 0.34, "grad_norm": 0.8074973866712688, "learning_rate": 1.5319461921724056e-05, "loss": 0.8226, "step": 1950 }, { "epoch": 0.34, "grad_norm": 0.6901162234274941, "learning_rate": 1.531465138745623e-05, "loss": 0.7356, "step": 1951 }, { "epoch": 0.34, "grad_norm": 0.7409716231300452, "learning_rate": 1.5309839138571118e-05, "loss": 0.7516, "step": 1952 }, { "epoch": 0.34, "grad_norm": 0.6865949896345983, "learning_rate": 1.5305025176621245e-05, "loss": 0.7535, "step": 1953 }, { "epoch": 0.34, "grad_norm": 0.8733130435967762, "learning_rate": 1.5300209503159696e-05, "loss": 0.8532, "step": 1954 }, { "epoch": 0.34, "grad_norm": 0.7045106633901236, "learning_rate": 1.529539211974011e-05, "loss": 0.7424, "step": 1955 }, { "epoch": 0.34, "grad_norm": 0.6676309071028051, "learning_rate": 1.5290573027916673e-05, "loss": 0.7505, "step": 1956 }, { "epoch": 0.34, "grad_norm": 0.7258665677157482, "learning_rate": 1.5285752229244125e-05, "loss": 0.7441, "step": 1957 }, { "epoch": 0.34, "grad_norm": 0.71288460958766, "learning_rate": 1.5280929725277752e-05, "loss": 0.7374, "step": 1958 }, { "epoch": 0.34, "grad_norm": 0.6292389235946626, "learning_rate": 1.5276105517573405e-05, "loss": 0.679, "step": 1959 }, { "epoch": 0.34, "grad_norm": 0.7200624771969867, "learning_rate": 1.527127960768746e-05, "loss": 0.7216, "step": 1960 }, { "epoch": 0.34, "grad_norm": 0.6299620320716252, "learning_rate": 1.526645199717687e-05, "loss": 0.6836, "step": 1961 }, { "epoch": 0.34, "grad_norm": 0.693988325740059, "learning_rate": 1.5261622687599117e-05, "loss": 0.7608, "step": 1962 }, { "epoch": 0.34, "grad_norm": 0.7423686705313406, "learning_rate": 1.5256791680512231e-05, "loss": 0.7322, "step": 1963 }, { "epoch": 0.34, "grad_norm": 0.7280501651932013, "learning_rate": 1.52519589774748e-05, "loss": 0.7856, "step": 1964 }, { "epoch": 0.34, "grad_norm": 0.7332830845612114, "learning_rate": 1.5247124580045957e-05, "loss": 0.7077, "step": 1965 }, { "epoch": 0.34, "grad_norm": 0.7331775789142313, "learning_rate": 1.5242288489785379e-05, "loss": 0.7624, "step": 1966 }, { "epoch": 0.34, "grad_norm": 0.6568811547893852, "learning_rate": 1.5237450708253288e-05, "loss": 0.7139, "step": 1967 }, { "epoch": 0.35, "grad_norm": 0.7726159435544646, "learning_rate": 1.5232611237010455e-05, "loss": 0.6817, "step": 1968 }, { "epoch": 0.35, "grad_norm": 0.620721580798881, "learning_rate": 1.5227770077618189e-05, "loss": 0.6838, "step": 1969 }, { "epoch": 0.35, "grad_norm": 0.6780444030256918, "learning_rate": 1.5222927231638354e-05, "loss": 0.7186, "step": 1970 }, { "epoch": 0.35, "grad_norm": 0.6619323604231557, "learning_rate": 1.5218082700633355e-05, "loss": 0.7341, "step": 1971 }, { "epoch": 0.35, "grad_norm": 0.7446727691438678, "learning_rate": 1.521323648616613e-05, "loss": 0.7245, "step": 1972 }, { "epoch": 0.35, "grad_norm": 0.7705211639128885, "learning_rate": 1.5208388589800173e-05, "loss": 0.7904, "step": 1973 }, { "epoch": 0.35, "grad_norm": 0.7212536243366315, "learning_rate": 1.5203539013099521e-05, "loss": 0.6996, "step": 1974 }, { "epoch": 0.35, "grad_norm": 0.6512083849489966, "learning_rate": 1.5198687757628745e-05, "loss": 0.7081, "step": 1975 }, { "epoch": 0.35, "grad_norm": 0.6917407660518554, "learning_rate": 1.5193834824952961e-05, "loss": 0.7375, "step": 1976 }, { "epoch": 0.35, "grad_norm": 0.6985719695593829, "learning_rate": 1.5188980216637828e-05, "loss": 0.7136, "step": 1977 }, { "epoch": 0.35, "grad_norm": 0.7345751227142048, "learning_rate": 1.5184123934249537e-05, "loss": 0.7394, "step": 1978 }, { "epoch": 0.35, "grad_norm": 0.7029171904807912, "learning_rate": 1.5179265979354837e-05, "loss": 0.7283, "step": 1979 }, { "epoch": 0.35, "grad_norm": 0.7027320843672292, "learning_rate": 1.5174406353520996e-05, "loss": 0.743, "step": 1980 }, { "epoch": 0.35, "grad_norm": 0.762395911310599, "learning_rate": 1.516954505831583e-05, "loss": 0.7128, "step": 1981 }, { "epoch": 0.35, "grad_norm": 0.7437376659551443, "learning_rate": 1.5164682095307703e-05, "loss": 0.7282, "step": 1982 }, { "epoch": 0.35, "grad_norm": 0.6179752965097325, "learning_rate": 1.5159817466065503e-05, "loss": 0.7174, "step": 1983 }, { "epoch": 0.35, "grad_norm": 0.8037840910471826, "learning_rate": 1.5154951172158661e-05, "loss": 0.7903, "step": 1984 }, { "epoch": 0.35, "grad_norm": 0.7592002485243258, "learning_rate": 1.5150083215157146e-05, "loss": 0.7216, "step": 1985 }, { "epoch": 0.35, "grad_norm": 0.6808264159889833, "learning_rate": 1.5145213596631459e-05, "loss": 0.7217, "step": 1986 }, { "epoch": 0.35, "grad_norm": 0.8333529468090015, "learning_rate": 1.514034231815264e-05, "loss": 0.7734, "step": 1987 }, { "epoch": 0.35, "grad_norm": 0.7433670222150692, "learning_rate": 1.5135469381292275e-05, "loss": 0.7335, "step": 1988 }, { "epoch": 0.35, "grad_norm": 0.6930811609387546, "learning_rate": 1.5130594787622462e-05, "loss": 0.7566, "step": 1989 }, { "epoch": 0.35, "grad_norm": 0.7129616707476818, "learning_rate": 1.5125718538715854e-05, "loss": 0.7107, "step": 1990 }, { "epoch": 0.35, "grad_norm": 0.6451015336109553, "learning_rate": 1.512084063614563e-05, "loss": 0.7077, "step": 1991 }, { "epoch": 0.35, "grad_norm": 0.7599356304266249, "learning_rate": 1.51159610814855e-05, "loss": 0.7221, "step": 1992 }, { "epoch": 0.35, "grad_norm": 0.6239136181747024, "learning_rate": 1.511107987630971e-05, "loss": 0.6853, "step": 1993 }, { "epoch": 0.35, "grad_norm": 0.7284843098405713, "learning_rate": 1.5106197022193047e-05, "loss": 0.7195, "step": 1994 }, { "epoch": 0.35, "grad_norm": 0.7247203459335336, "learning_rate": 1.5101312520710809e-05, "loss": 0.753, "step": 1995 }, { "epoch": 0.35, "grad_norm": 0.6706448073996855, "learning_rate": 1.5096426373438843e-05, "loss": 0.7011, "step": 1996 }, { "epoch": 0.35, "grad_norm": 0.7712467016321712, "learning_rate": 1.5091538581953526e-05, "loss": 0.7225, "step": 1997 }, { "epoch": 0.35, "grad_norm": 0.6279182877069985, "learning_rate": 1.5086649147831755e-05, "loss": 0.6933, "step": 1998 }, { "epoch": 0.35, "grad_norm": 0.7456078684377504, "learning_rate": 1.5081758072650967e-05, "loss": 0.7225, "step": 1999 }, { "epoch": 0.35, "grad_norm": 0.6902512183781513, "learning_rate": 1.5076865357989126e-05, "loss": 0.7169, "step": 2000 }, { "epoch": 0.35, "grad_norm": 0.7360224789904406, "learning_rate": 1.5071971005424719e-05, "loss": 0.7055, "step": 2001 }, { "epoch": 0.35, "grad_norm": 0.6948627412854241, "learning_rate": 1.506707501653677e-05, "loss": 0.7145, "step": 2002 }, { "epoch": 0.35, "grad_norm": 0.7182584676931578, "learning_rate": 1.5062177392904832e-05, "loss": 0.7496, "step": 2003 }, { "epoch": 0.35, "grad_norm": 0.7018152334935637, "learning_rate": 1.5057278136108968e-05, "loss": 0.6791, "step": 2004 }, { "epoch": 0.35, "grad_norm": 0.7123042427514472, "learning_rate": 1.5052377247729785e-05, "loss": 0.7321, "step": 2005 }, { "epoch": 0.35, "grad_norm": 0.7454731513669736, "learning_rate": 1.5047474729348417e-05, "loss": 0.6868, "step": 2006 }, { "epoch": 0.35, "grad_norm": 0.8440948790909236, "learning_rate": 1.5042570582546513e-05, "loss": 0.8385, "step": 2007 }, { "epoch": 0.35, "grad_norm": 0.7242557393493116, "learning_rate": 1.5037664808906258e-05, "loss": 0.7433, "step": 2008 }, { "epoch": 0.35, "grad_norm": 0.7977179279542953, "learning_rate": 1.5032757410010354e-05, "loss": 0.7773, "step": 2009 }, { "epoch": 0.35, "grad_norm": 0.7733499272337097, "learning_rate": 1.5027848387442028e-05, "loss": 0.6933, "step": 2010 }, { "epoch": 0.35, "grad_norm": 0.7552280327114969, "learning_rate": 1.5022937742785036e-05, "loss": 0.7112, "step": 2011 }, { "epoch": 0.35, "grad_norm": 0.7435606031348168, "learning_rate": 1.5018025477623655e-05, "loss": 0.7183, "step": 2012 }, { "epoch": 0.35, "grad_norm": 0.7619123063517244, "learning_rate": 1.501311159354268e-05, "loss": 0.7329, "step": 2013 }, { "epoch": 0.35, "grad_norm": 0.8500076652794074, "learning_rate": 1.5008196092127434e-05, "loss": 0.7235, "step": 2014 }, { "epoch": 0.35, "grad_norm": 0.7646327733119529, "learning_rate": 1.5003278974963764e-05, "loss": 0.7583, "step": 2015 }, { "epoch": 0.35, "grad_norm": 0.8347281690094923, "learning_rate": 1.4998360243638029e-05, "loss": 0.8075, "step": 2016 }, { "epoch": 0.35, "grad_norm": 0.7181108800612663, "learning_rate": 1.4993439899737117e-05, "loss": 0.6934, "step": 2017 }, { "epoch": 0.35, "grad_norm": 0.6005504771116612, "learning_rate": 1.4988517944848436e-05, "loss": 0.6886, "step": 2018 }, { "epoch": 0.35, "grad_norm": 0.7123322540531, "learning_rate": 1.4983594380559904e-05, "loss": 0.7296, "step": 2019 }, { "epoch": 0.35, "grad_norm": 0.8085352961142085, "learning_rate": 1.4978669208459973e-05, "loss": 0.7458, "step": 2020 }, { "epoch": 0.35, "grad_norm": 0.5910297931904384, "learning_rate": 1.4973742430137603e-05, "loss": 0.7106, "step": 2021 }, { "epoch": 0.35, "grad_norm": 0.7889342445642351, "learning_rate": 1.4968814047182273e-05, "loss": 0.778, "step": 2022 }, { "epoch": 0.35, "grad_norm": 0.6790172595532437, "learning_rate": 1.4963884061183984e-05, "loss": 0.7014, "step": 2023 }, { "epoch": 0.35, "grad_norm": 0.648634528993748, "learning_rate": 1.4958952473733256e-05, "loss": 0.7358, "step": 2024 }, { "epoch": 0.36, "grad_norm": 0.760720580994878, "learning_rate": 1.4954019286421116e-05, "loss": 0.7264, "step": 2025 }, { "epoch": 0.36, "grad_norm": 0.7103172811363859, "learning_rate": 1.4949084500839116e-05, "loss": 0.7517, "step": 2026 }, { "epoch": 0.36, "grad_norm": 0.7296891252485188, "learning_rate": 1.4944148118579325e-05, "loss": 0.7338, "step": 2027 }, { "epoch": 0.36, "grad_norm": 0.7040217439456037, "learning_rate": 1.4939210141234312e-05, "loss": 0.7086, "step": 2028 }, { "epoch": 0.36, "grad_norm": 0.7632603892221763, "learning_rate": 1.493427057039718e-05, "loss": 0.7127, "step": 2029 }, { "epoch": 0.36, "grad_norm": 0.7934579763254618, "learning_rate": 1.4929329407661532e-05, "loss": 0.7636, "step": 2030 }, { "epoch": 0.36, "grad_norm": 0.6990705450006336, "learning_rate": 1.4924386654621496e-05, "loss": 0.7746, "step": 2031 }, { "epoch": 0.36, "grad_norm": 0.7554774241085195, "learning_rate": 1.4919442312871705e-05, "loss": 0.7401, "step": 2032 }, { "epoch": 0.36, "grad_norm": 0.7649190545633071, "learning_rate": 1.4914496384007304e-05, "loss": 0.7977, "step": 2033 }, { "epoch": 0.36, "grad_norm": 0.6990341985996713, "learning_rate": 1.4909548869623953e-05, "loss": 0.694, "step": 2034 }, { "epoch": 0.36, "grad_norm": 0.7221761774059925, "learning_rate": 1.490459977131783e-05, "loss": 0.7218, "step": 2035 }, { "epoch": 0.36, "grad_norm": 0.7729379267063865, "learning_rate": 1.4899649090685607e-05, "loss": 0.6897, "step": 2036 }, { "epoch": 0.36, "grad_norm": 0.7801234097557065, "learning_rate": 1.4894696829324482e-05, "loss": 0.7582, "step": 2037 }, { "epoch": 0.36, "grad_norm": 0.6695622867487041, "learning_rate": 1.4889742988832159e-05, "loss": 0.7244, "step": 2038 }, { "epoch": 0.36, "grad_norm": 0.7398609398499874, "learning_rate": 1.4884787570806846e-05, "loss": 0.7439, "step": 2039 }, { "epoch": 0.36, "grad_norm": 0.7357663488314146, "learning_rate": 1.4879830576847272e-05, "loss": 0.6893, "step": 2040 }, { "epoch": 0.36, "grad_norm": 0.7107642538850485, "learning_rate": 1.4874872008552658e-05, "loss": 0.7287, "step": 2041 }, { "epoch": 0.36, "grad_norm": 0.7353527214684926, "learning_rate": 1.4869911867522747e-05, "loss": 0.7498, "step": 2042 }, { "epoch": 0.36, "grad_norm": 0.7006849693599703, "learning_rate": 1.4864950155357778e-05, "loss": 0.7322, "step": 2043 }, { "epoch": 0.36, "grad_norm": 0.7070320079119091, "learning_rate": 1.485998687365851e-05, "loss": 0.7136, "step": 2044 }, { "epoch": 0.36, "grad_norm": 0.7044983040980524, "learning_rate": 1.4855022024026196e-05, "loss": 0.722, "step": 2045 }, { "epoch": 0.36, "grad_norm": 0.7527161481774529, "learning_rate": 1.4850055608062599e-05, "loss": 0.8111, "step": 2046 }, { "epoch": 0.36, "grad_norm": 0.7790965295818147, "learning_rate": 1.4845087627369995e-05, "loss": 0.7022, "step": 2047 }, { "epoch": 0.36, "grad_norm": 0.6189982856207513, "learning_rate": 1.484011808355115e-05, "loss": 0.6529, "step": 2048 }, { "epoch": 0.36, "grad_norm": 0.6956495446104659, "learning_rate": 1.4835146978209349e-05, "loss": 0.753, "step": 2049 }, { "epoch": 0.36, "grad_norm": 0.7101180494177186, "learning_rate": 1.4830174312948372e-05, "loss": 0.7383, "step": 2050 }, { "epoch": 0.36, "grad_norm": 0.7178519768122911, "learning_rate": 1.48252000893725e-05, "loss": 0.7113, "step": 2051 }, { "epoch": 0.36, "grad_norm": 0.6697359623447462, "learning_rate": 1.4820224309086527e-05, "loss": 0.6736, "step": 2052 }, { "epoch": 0.36, "grad_norm": 0.690395694620125, "learning_rate": 1.4815246973695744e-05, "loss": 0.6806, "step": 2053 }, { "epoch": 0.36, "grad_norm": 0.7121209197364909, "learning_rate": 1.4810268084805937e-05, "loss": 0.7234, "step": 2054 }, { "epoch": 0.36, "grad_norm": 0.725958024859341, "learning_rate": 1.4805287644023404e-05, "loss": 0.7114, "step": 2055 }, { "epoch": 0.36, "grad_norm": 0.6461711715685327, "learning_rate": 1.4800305652954937e-05, "loss": 0.6752, "step": 2056 }, { "epoch": 0.36, "grad_norm": 0.7593961033084474, "learning_rate": 1.4795322113207833e-05, "loss": 0.7188, "step": 2057 }, { "epoch": 0.36, "grad_norm": 0.7825458957238552, "learning_rate": 1.4790337026389883e-05, "loss": 0.7412, "step": 2058 }, { "epoch": 0.36, "grad_norm": 0.6830042399290495, "learning_rate": 1.4785350394109381e-05, "loss": 0.7163, "step": 2059 }, { "epoch": 0.36, "grad_norm": 0.5803241050703224, "learning_rate": 1.478036221797512e-05, "loss": 0.6713, "step": 2060 }, { "epoch": 0.36, "grad_norm": 0.698100802058542, "learning_rate": 1.4775372499596383e-05, "loss": 0.7129, "step": 2061 }, { "epoch": 0.36, "grad_norm": 0.7425326382574537, "learning_rate": 1.4770381240582964e-05, "loss": 0.7583, "step": 2062 }, { "epoch": 0.36, "grad_norm": 1.8973600442849898, "learning_rate": 1.4765388442545148e-05, "loss": 0.7995, "step": 2063 }, { "epoch": 0.36, "grad_norm": 0.7069040713128016, "learning_rate": 1.4760394107093708e-05, "loss": 0.726, "step": 2064 }, { "epoch": 0.36, "grad_norm": 0.699625996695417, "learning_rate": 1.4755398235839931e-05, "loss": 0.7498, "step": 2065 }, { "epoch": 0.36, "grad_norm": 0.6387265056322635, "learning_rate": 1.4750400830395577e-05, "loss": 0.6931, "step": 2066 }, { "epoch": 0.36, "grad_norm": 0.6319723969968118, "learning_rate": 1.4745401892372924e-05, "loss": 0.7306, "step": 2067 }, { "epoch": 0.36, "grad_norm": 0.6895581082203649, "learning_rate": 1.474040142338473e-05, "loss": 0.7345, "step": 2068 }, { "epoch": 0.36, "grad_norm": 0.6062998484630243, "learning_rate": 1.4735399425044248e-05, "loss": 0.7048, "step": 2069 }, { "epoch": 0.36, "grad_norm": 0.7257633669915735, "learning_rate": 1.4730395898965229e-05, "loss": 0.7467, "step": 2070 }, { "epoch": 0.36, "grad_norm": 0.649202311402616, "learning_rate": 1.4725390846761913e-05, "loss": 0.7382, "step": 2071 }, { "epoch": 0.36, "grad_norm": 0.7113348502503187, "learning_rate": 1.472038427004904e-05, "loss": 0.746, "step": 2072 }, { "epoch": 0.36, "grad_norm": 0.6559122353389562, "learning_rate": 1.4715376170441829e-05, "loss": 0.7704, "step": 2073 }, { "epoch": 0.36, "grad_norm": 0.6453421828503672, "learning_rate": 1.4710366549555999e-05, "loss": 0.7236, "step": 2074 }, { "epoch": 0.36, "grad_norm": 0.6611303225878586, "learning_rate": 1.4705355409007758e-05, "loss": 0.7425, "step": 2075 }, { "epoch": 0.36, "grad_norm": 0.6919090912832708, "learning_rate": 1.470034275041381e-05, "loss": 0.724, "step": 2076 }, { "epoch": 0.36, "grad_norm": 0.7552045533960824, "learning_rate": 1.4695328575391337e-05, "loss": 0.7148, "step": 2077 }, { "epoch": 0.36, "grad_norm": 0.6814202582594446, "learning_rate": 1.4690312885558013e-05, "loss": 0.7156, "step": 2078 }, { "epoch": 0.36, "grad_norm": 0.7205892078663084, "learning_rate": 1.4685295682532015e-05, "loss": 0.7193, "step": 2079 }, { "epoch": 0.36, "grad_norm": 0.6578369714613123, "learning_rate": 1.4680276967931988e-05, "loss": 0.7143, "step": 2080 }, { "epoch": 0.36, "grad_norm": 0.791678825656993, "learning_rate": 1.4675256743377078e-05, "loss": 0.712, "step": 2081 }, { "epoch": 0.37, "grad_norm": 0.7037080030073689, "learning_rate": 1.4670235010486915e-05, "loss": 0.7164, "step": 2082 }, { "epoch": 0.37, "grad_norm": 0.8333742208162048, "learning_rate": 1.4665211770881609e-05, "loss": 0.7469, "step": 2083 }, { "epoch": 0.37, "grad_norm": 0.7435400897767561, "learning_rate": 1.4660187026181766e-05, "loss": 0.7516, "step": 2084 }, { "epoch": 0.37, "grad_norm": 0.6897898505683674, "learning_rate": 1.4655160778008478e-05, "loss": 0.6828, "step": 2085 }, { "epoch": 0.37, "grad_norm": 0.7312156866714145, "learning_rate": 1.4650133027983308e-05, "loss": 0.7124, "step": 2086 }, { "epoch": 0.37, "grad_norm": 0.6410438746305827, "learning_rate": 1.464510377772832e-05, "loss": 0.7295, "step": 2087 }, { "epoch": 0.37, "grad_norm": 0.6987845399657587, "learning_rate": 1.4640073028866051e-05, "loss": 0.6831, "step": 2088 }, { "epoch": 0.37, "grad_norm": 0.8269171543888246, "learning_rate": 1.4635040783019526e-05, "loss": 0.7546, "step": 2089 }, { "epoch": 0.37, "grad_norm": 0.7348168344345398, "learning_rate": 1.4630007041812253e-05, "loss": 0.7167, "step": 2090 }, { "epoch": 0.37, "grad_norm": 0.6604725255771705, "learning_rate": 1.4624971806868224e-05, "loss": 0.7052, "step": 2091 }, { "epoch": 0.37, "grad_norm": 0.685288551038183, "learning_rate": 1.4619935079811906e-05, "loss": 0.7465, "step": 2092 }, { "epoch": 0.37, "grad_norm": 0.9715181108376021, "learning_rate": 1.4614896862268255e-05, "loss": 0.7435, "step": 2093 }, { "epoch": 0.37, "grad_norm": 0.7541000429819498, "learning_rate": 1.4609857155862705e-05, "loss": 0.7327, "step": 2094 }, { "epoch": 0.37, "grad_norm": 0.64307130244925, "learning_rate": 1.4604815962221173e-05, "loss": 0.6824, "step": 2095 }, { "epoch": 0.37, "grad_norm": 0.7246425792713121, "learning_rate": 1.4599773282970047e-05, "loss": 0.774, "step": 2096 }, { "epoch": 0.37, "grad_norm": 0.8242266242818043, "learning_rate": 1.4594729119736203e-05, "loss": 0.8015, "step": 2097 }, { "epoch": 0.37, "grad_norm": 0.8282775310077742, "learning_rate": 1.4589683474146994e-05, "loss": 0.7386, "step": 2098 }, { "epoch": 0.37, "grad_norm": 0.6444631569232817, "learning_rate": 1.4584636347830252e-05, "loss": 0.7115, "step": 2099 }, { "epoch": 0.37, "grad_norm": 0.7413012375516175, "learning_rate": 1.4579587742414285e-05, "loss": 0.7572, "step": 2100 }, { "epoch": 0.37, "grad_norm": 0.7414763020558572, "learning_rate": 1.4574537659527871e-05, "loss": 0.7878, "step": 2101 }, { "epoch": 0.37, "grad_norm": 0.7112403030047766, "learning_rate": 1.456948610080028e-05, "loss": 0.7637, "step": 2102 }, { "epoch": 0.37, "grad_norm": 0.8100998752716444, "learning_rate": 1.4564433067861248e-05, "loss": 0.7854, "step": 2103 }, { "epoch": 0.37, "grad_norm": 0.7184707305179868, "learning_rate": 1.455937856234099e-05, "loss": 0.7535, "step": 2104 }, { "epoch": 0.37, "grad_norm": 0.6921407049473625, "learning_rate": 1.4554322585870191e-05, "loss": 0.7062, "step": 2105 }, { "epoch": 0.37, "grad_norm": 0.752560886208899, "learning_rate": 1.4549265140080017e-05, "loss": 0.7281, "step": 2106 }, { "epoch": 0.37, "grad_norm": 0.7335720821344569, "learning_rate": 1.4544206226602105e-05, "loss": 0.7442, "step": 2107 }, { "epoch": 0.37, "grad_norm": 0.819115612290902, "learning_rate": 1.4539145847068568e-05, "loss": 0.7224, "step": 2108 }, { "epoch": 0.37, "grad_norm": 0.676087446659887, "learning_rate": 1.4534084003111985e-05, "loss": 0.7149, "step": 2109 }, { "epoch": 0.37, "grad_norm": 0.8698679623577381, "learning_rate": 1.4529020696365417e-05, "loss": 0.7366, "step": 2110 }, { "epoch": 0.37, "grad_norm": 0.7296822718264654, "learning_rate": 1.452395592846239e-05, "loss": 0.7553, "step": 2111 }, { "epoch": 0.37, "grad_norm": 0.7226862940834334, "learning_rate": 1.4518889701036903e-05, "loss": 0.7011, "step": 2112 }, { "epoch": 0.37, "grad_norm": 0.774451364885461, "learning_rate": 1.4513822015723429e-05, "loss": 0.6952, "step": 2113 }, { "epoch": 0.37, "grad_norm": 0.6474507781777041, "learning_rate": 1.4508752874156908e-05, "loss": 0.6955, "step": 2114 }, { "epoch": 0.37, "grad_norm": 0.7150905352099216, "learning_rate": 1.4503682277972748e-05, "loss": 0.706, "step": 2115 }, { "epoch": 0.37, "grad_norm": 0.7320017860895511, "learning_rate": 1.4498610228806831e-05, "loss": 0.7738, "step": 2116 }, { "epoch": 0.37, "grad_norm": 0.7283444232943792, "learning_rate": 1.4493536728295512e-05, "loss": 0.7216, "step": 2117 }, { "epoch": 0.37, "grad_norm": 0.8490950344561371, "learning_rate": 1.4488461778075595e-05, "loss": 0.7999, "step": 2118 }, { "epoch": 0.37, "grad_norm": 0.7612990881546638, "learning_rate": 1.4483385379784374e-05, "loss": 0.7671, "step": 2119 }, { "epoch": 0.37, "grad_norm": 0.670153630504126, "learning_rate": 1.4478307535059599e-05, "loss": 0.6728, "step": 2120 }, { "epoch": 0.37, "grad_norm": 0.6763711162476099, "learning_rate": 1.4473228245539485e-05, "loss": 0.7434, "step": 2121 }, { "epoch": 0.37, "grad_norm": 0.6794612391489825, "learning_rate": 1.4468147512862725e-05, "loss": 0.6968, "step": 2122 }, { "epoch": 0.37, "grad_norm": 0.6693408464934483, "learning_rate": 1.4463065338668463e-05, "loss": 0.7183, "step": 2123 }, { "epoch": 0.37, "grad_norm": 0.7656847148217901, "learning_rate": 1.4457981724596311e-05, "loss": 0.7301, "step": 2124 }, { "epoch": 0.37, "grad_norm": 0.6751716409677763, "learning_rate": 1.4452896672286355e-05, "loss": 0.7048, "step": 2125 }, { "epoch": 0.37, "grad_norm": 0.6759682343424311, "learning_rate": 1.4447810183379138e-05, "loss": 0.7134, "step": 2126 }, { "epoch": 0.37, "grad_norm": 0.7636872107858956, "learning_rate": 1.4442722259515667e-05, "loss": 0.7361, "step": 2127 }, { "epoch": 0.37, "grad_norm": 0.671640834117611, "learning_rate": 1.443763290233741e-05, "loss": 0.675, "step": 2128 }, { "epoch": 0.37, "grad_norm": 0.7687935930024894, "learning_rate": 1.4432542113486304e-05, "loss": 0.7719, "step": 2129 }, { "epoch": 0.37, "grad_norm": 0.7669356707115488, "learning_rate": 1.4427449894604738e-05, "loss": 0.7552, "step": 2130 }, { "epoch": 0.37, "grad_norm": 0.6981495109688766, "learning_rate": 1.4422356247335571e-05, "loss": 0.7415, "step": 2131 }, { "epoch": 0.37, "grad_norm": 0.7738253301810663, "learning_rate": 1.4417261173322121e-05, "loss": 0.7625, "step": 2132 }, { "epoch": 0.37, "grad_norm": 0.6991950735793045, "learning_rate": 1.441216467420816e-05, "loss": 0.6858, "step": 2133 }, { "epoch": 0.37, "grad_norm": 0.7361296106109229, "learning_rate": 1.440706675163793e-05, "loss": 0.7407, "step": 2134 }, { "epoch": 0.37, "grad_norm": 0.7182765708144832, "learning_rate": 1.4401967407256128e-05, "loss": 0.7075, "step": 2135 }, { "epoch": 0.37, "grad_norm": 0.7110897853428468, "learning_rate": 1.4396866642707905e-05, "loss": 0.7154, "step": 2136 }, { "epoch": 0.37, "grad_norm": 0.7615873112146907, "learning_rate": 1.4391764459638873e-05, "loss": 0.6979, "step": 2137 }, { "epoch": 0.37, "grad_norm": 0.6774618438561336, "learning_rate": 1.4386660859695107e-05, "loss": 0.6944, "step": 2138 }, { "epoch": 0.38, "grad_norm": 0.710427162542709, "learning_rate": 1.4381555844523133e-05, "loss": 0.6523, "step": 2139 }, { "epoch": 0.38, "grad_norm": 0.7965061247270306, "learning_rate": 1.4376449415769933e-05, "loss": 0.7493, "step": 2140 }, { "epoch": 0.38, "grad_norm": 0.7362826737814067, "learning_rate": 1.437134157508295e-05, "loss": 0.7573, "step": 2141 }, { "epoch": 0.38, "grad_norm": 0.7379877485179942, "learning_rate": 1.4366232324110077e-05, "loss": 0.7281, "step": 2142 }, { "epoch": 0.38, "grad_norm": 0.7323668228199085, "learning_rate": 1.436112166449967e-05, "loss": 0.7151, "step": 2143 }, { "epoch": 0.38, "grad_norm": 0.8197054712167071, "learning_rate": 1.4356009597900526e-05, "loss": 0.7302, "step": 2144 }, { "epoch": 0.38, "grad_norm": 0.7771985844535266, "learning_rate": 1.4350896125961915e-05, "loss": 0.6657, "step": 2145 }, { "epoch": 0.38, "grad_norm": 0.6406337548806971, "learning_rate": 1.434578125033354e-05, "loss": 0.7141, "step": 2146 }, { "epoch": 0.38, "grad_norm": 0.7458534633175371, "learning_rate": 1.4340664972665572e-05, "loss": 0.6845, "step": 2147 }, { "epoch": 0.38, "grad_norm": 1.0118349968298501, "learning_rate": 1.4335547294608625e-05, "loss": 0.8524, "step": 2148 }, { "epoch": 0.38, "grad_norm": 0.6823615432896413, "learning_rate": 1.4330428217813773e-05, "loss": 0.707, "step": 2149 }, { "epoch": 0.38, "grad_norm": 0.8965102553002238, "learning_rate": 1.4325307743932535e-05, "loss": 0.6945, "step": 2150 }, { "epoch": 0.38, "grad_norm": 0.8369569635647724, "learning_rate": 1.4320185874616881e-05, "loss": 0.7249, "step": 2151 }, { "epoch": 0.38, "grad_norm": 0.615975614873533, "learning_rate": 1.4315062611519235e-05, "loss": 0.7059, "step": 2152 }, { "epoch": 0.38, "grad_norm": 0.8279652513650405, "learning_rate": 1.4309937956292464e-05, "loss": 0.7274, "step": 2153 }, { "epoch": 0.38, "grad_norm": 0.810607831456142, "learning_rate": 1.4304811910589896e-05, "loss": 0.7208, "step": 2154 }, { "epoch": 0.38, "grad_norm": 0.8649595502575466, "learning_rate": 1.4299684476065294e-05, "loss": 0.8114, "step": 2155 }, { "epoch": 0.38, "grad_norm": 0.8958670051889356, "learning_rate": 1.4294555654372876e-05, "loss": 0.7735, "step": 2156 }, { "epoch": 0.38, "grad_norm": 0.7543620629254074, "learning_rate": 1.4289425447167308e-05, "loss": 0.7523, "step": 2157 }, { "epoch": 0.38, "grad_norm": 0.8028198830588136, "learning_rate": 1.4284293856103703e-05, "loss": 0.7603, "step": 2158 }, { "epoch": 0.38, "grad_norm": 0.8255733469081601, "learning_rate": 1.4279160882837622e-05, "loss": 0.7385, "step": 2159 }, { "epoch": 0.38, "grad_norm": 0.6577618161735683, "learning_rate": 1.427402652902506e-05, "loss": 0.6811, "step": 2160 }, { "epoch": 0.38, "grad_norm": 0.7454285647093175, "learning_rate": 1.4268890796322475e-05, "loss": 0.7536, "step": 2161 }, { "epoch": 0.38, "grad_norm": 1.1067409559462653, "learning_rate": 1.4263753686386754e-05, "loss": 0.804, "step": 2162 }, { "epoch": 0.38, "grad_norm": 0.6878657536264345, "learning_rate": 1.4258615200875243e-05, "loss": 0.7047, "step": 2163 }, { "epoch": 0.38, "grad_norm": 0.6927159931381263, "learning_rate": 1.4253475341445722e-05, "loss": 0.7115, "step": 2164 }, { "epoch": 0.38, "grad_norm": 0.6792245111177933, "learning_rate": 1.4248334109756414e-05, "loss": 0.7355, "step": 2165 }, { "epoch": 0.38, "grad_norm": 0.9211252310706203, "learning_rate": 1.424319150746599e-05, "loss": 0.8263, "step": 2166 }, { "epoch": 0.38, "grad_norm": 0.6694415499479912, "learning_rate": 1.4238047536233559e-05, "loss": 0.7339, "step": 2167 }, { "epoch": 0.38, "grad_norm": 0.6727164376016272, "learning_rate": 1.4232902197718679e-05, "loss": 0.7057, "step": 2168 }, { "epoch": 0.38, "grad_norm": 0.725865785069817, "learning_rate": 1.4227755493581335e-05, "loss": 0.7412, "step": 2169 }, { "epoch": 0.38, "grad_norm": 0.7236970601136149, "learning_rate": 1.4222607425481967e-05, "loss": 0.753, "step": 2170 }, { "epoch": 0.38, "grad_norm": 0.736402521612048, "learning_rate": 1.4217457995081445e-05, "loss": 0.7275, "step": 2171 }, { "epoch": 0.38, "grad_norm": 0.6927836227479977, "learning_rate": 1.4212307204041088e-05, "loss": 0.7081, "step": 2172 }, { "epoch": 0.38, "grad_norm": 0.7013498501092824, "learning_rate": 1.4207155054022644e-05, "loss": 0.7207, "step": 2173 }, { "epoch": 0.38, "grad_norm": 0.7476268084614962, "learning_rate": 1.4202001546688302e-05, "loss": 0.7543, "step": 2174 }, { "epoch": 0.38, "grad_norm": 0.713014867693021, "learning_rate": 1.4196846683700694e-05, "loss": 0.7427, "step": 2175 }, { "epoch": 0.38, "grad_norm": 0.6502133540350608, "learning_rate": 1.419169046672289e-05, "loss": 0.6913, "step": 2176 }, { "epoch": 0.38, "grad_norm": 0.7023877971994874, "learning_rate": 1.4186532897418388e-05, "loss": 0.6989, "step": 2177 }, { "epoch": 0.38, "grad_norm": 0.6749938253721791, "learning_rate": 1.4181373977451127e-05, "loss": 0.7242, "step": 2178 }, { "epoch": 0.38, "grad_norm": 0.6873977249395283, "learning_rate": 1.4176213708485486e-05, "loss": 0.728, "step": 2179 }, { "epoch": 0.38, "grad_norm": 0.6639708022496743, "learning_rate": 1.4171052092186271e-05, "loss": 0.671, "step": 2180 }, { "epoch": 0.38, "grad_norm": 1.558632361017741, "learning_rate": 1.4165889130218731e-05, "loss": 0.733, "step": 2181 }, { "epoch": 0.38, "grad_norm": 0.6548470158696784, "learning_rate": 1.4160724824248542e-05, "loss": 0.7067, "step": 2182 }, { "epoch": 0.38, "grad_norm": 0.6774548391307514, "learning_rate": 1.4155559175941818e-05, "loss": 0.7326, "step": 2183 }, { "epoch": 0.38, "grad_norm": 0.6490306070569153, "learning_rate": 1.4150392186965105e-05, "loss": 0.7141, "step": 2184 }, { "epoch": 0.38, "grad_norm": 0.6703306060815707, "learning_rate": 1.414522385898538e-05, "loss": 0.682, "step": 2185 }, { "epoch": 0.38, "grad_norm": 0.7164795312899472, "learning_rate": 1.4140054193670054e-05, "loss": 0.7331, "step": 2186 }, { "epoch": 0.38, "grad_norm": 0.814294821071235, "learning_rate": 1.413488319268697e-05, "loss": 0.7581, "step": 2187 }, { "epoch": 0.38, "grad_norm": 0.7127136595570086, "learning_rate": 1.4129710857704399e-05, "loss": 0.7514, "step": 2188 }, { "epoch": 0.38, "grad_norm": 0.7234094790129846, "learning_rate": 1.4124537190391043e-05, "loss": 0.7453, "step": 2189 }, { "epoch": 0.38, "grad_norm": 0.7689176100932479, "learning_rate": 1.411936219241604e-05, "loss": 0.6853, "step": 2190 }, { "epoch": 0.38, "grad_norm": 0.8394851424791324, "learning_rate": 1.4114185865448948e-05, "loss": 0.7476, "step": 2191 }, { "epoch": 0.38, "grad_norm": 0.6662651229452621, "learning_rate": 1.4109008211159757e-05, "loss": 0.7457, "step": 2192 }, { "epoch": 0.38, "grad_norm": 0.8229621000244826, "learning_rate": 1.4103829231218891e-05, "loss": 0.6943, "step": 2193 }, { "epoch": 0.38, "grad_norm": 0.7392247100360231, "learning_rate": 1.409864892729719e-05, "loss": 0.7397, "step": 2194 }, { "epoch": 0.38, "grad_norm": 0.7519823621503527, "learning_rate": 1.4093467301065934e-05, "loss": 0.6767, "step": 2195 }, { "epoch": 0.39, "grad_norm": 0.8366143938971783, "learning_rate": 1.4088284354196823e-05, "loss": 0.7293, "step": 2196 }, { "epoch": 0.39, "grad_norm": 0.6400409102172189, "learning_rate": 1.4083100088361978e-05, "loss": 0.7282, "step": 2197 }, { "epoch": 0.39, "grad_norm": 0.7966635013337957, "learning_rate": 1.4077914505233958e-05, "loss": 0.7735, "step": 2198 }, { "epoch": 0.39, "grad_norm": 0.8070313179849543, "learning_rate": 1.4072727606485738e-05, "loss": 0.7778, "step": 2199 }, { "epoch": 0.39, "grad_norm": 0.8117857249524921, "learning_rate": 1.4067539393790718e-05, "loss": 0.7778, "step": 2200 }, { "epoch": 0.39, "grad_norm": 0.6737261158845725, "learning_rate": 1.4062349868822724e-05, "loss": 0.7174, "step": 2201 }, { "epoch": 0.39, "grad_norm": 0.7779700365313287, "learning_rate": 1.4057159033256007e-05, "loss": 0.7756, "step": 2202 }, { "epoch": 0.39, "grad_norm": 0.6471716813349117, "learning_rate": 1.4051966888765236e-05, "loss": 0.6882, "step": 2203 }, { "epoch": 0.39, "grad_norm": 0.7719981424125798, "learning_rate": 1.4046773437025508e-05, "loss": 0.6995, "step": 2204 }, { "epoch": 0.39, "grad_norm": 0.7014290824295639, "learning_rate": 1.4041578679712335e-05, "loss": 0.7308, "step": 2205 }, { "epoch": 0.39, "grad_norm": 0.6722708372442774, "learning_rate": 1.4036382618501656e-05, "loss": 0.7172, "step": 2206 }, { "epoch": 0.39, "grad_norm": 0.8289432702898906, "learning_rate": 1.4031185255069828e-05, "loss": 0.724, "step": 2207 }, { "epoch": 0.39, "grad_norm": 0.6613573593154712, "learning_rate": 1.402598659109363e-05, "loss": 0.7254, "step": 2208 }, { "epoch": 0.39, "grad_norm": 0.7072426905624368, "learning_rate": 1.402078662825026e-05, "loss": 0.7515, "step": 2209 }, { "epoch": 0.39, "grad_norm": 0.744069982082698, "learning_rate": 1.4015585368217332e-05, "loss": 0.7401, "step": 2210 }, { "epoch": 0.39, "grad_norm": 0.6443364128101892, "learning_rate": 1.4010382812672884e-05, "loss": 0.7147, "step": 2211 }, { "epoch": 0.39, "grad_norm": 0.8001396555127299, "learning_rate": 1.4005178963295362e-05, "loss": 0.7931, "step": 2212 }, { "epoch": 0.39, "grad_norm": 0.7497790115729023, "learning_rate": 1.3999973821763648e-05, "loss": 0.7163, "step": 2213 }, { "epoch": 0.39, "grad_norm": 1.0831385866205998, "learning_rate": 1.3994767389757018e-05, "loss": 0.7411, "step": 2214 }, { "epoch": 0.39, "grad_norm": 0.7579261282091356, "learning_rate": 1.398955966895518e-05, "loss": 0.708, "step": 2215 }, { "epoch": 0.39, "grad_norm": 0.7636624948343397, "learning_rate": 1.398435066103825e-05, "loss": 0.7267, "step": 2216 }, { "epoch": 0.39, "grad_norm": 0.7288568140404238, "learning_rate": 1.397914036768677e-05, "loss": 0.7495, "step": 2217 }, { "epoch": 0.39, "grad_norm": 0.7858518041283405, "learning_rate": 1.3973928790581683e-05, "loss": 0.7775, "step": 2218 }, { "epoch": 0.39, "grad_norm": 0.7331830269329576, "learning_rate": 1.3968715931404352e-05, "loss": 0.7199, "step": 2219 }, { "epoch": 0.39, "grad_norm": 0.6926937823937076, "learning_rate": 1.396350179183656e-05, "loss": 0.7105, "step": 2220 }, { "epoch": 0.39, "grad_norm": 0.7310060836181793, "learning_rate": 1.395828637356049e-05, "loss": 0.7555, "step": 2221 }, { "epoch": 0.39, "grad_norm": 0.7457909996185009, "learning_rate": 1.3953069678258748e-05, "loss": 0.7396, "step": 2222 }, { "epoch": 0.39, "grad_norm": 0.7623829229826191, "learning_rate": 1.394785170761435e-05, "loss": 0.7242, "step": 2223 }, { "epoch": 0.39, "grad_norm": 0.8162033561282758, "learning_rate": 1.3942632463310719e-05, "loss": 0.7506, "step": 2224 }, { "epoch": 0.39, "grad_norm": 0.8172242138626911, "learning_rate": 1.3937411947031697e-05, "loss": 0.7495, "step": 2225 }, { "epoch": 0.39, "grad_norm": 0.6872305997182108, "learning_rate": 1.3932190160461521e-05, "loss": 0.7067, "step": 2226 }, { "epoch": 0.39, "grad_norm": 0.6056912379667825, "learning_rate": 1.3926967105284859e-05, "loss": 0.6701, "step": 2227 }, { "epoch": 0.39, "grad_norm": 0.6734506037071749, "learning_rate": 1.3921742783186774e-05, "loss": 0.6637, "step": 2228 }, { "epoch": 0.39, "grad_norm": 0.7997869417281829, "learning_rate": 1.3916517195852736e-05, "loss": 0.7459, "step": 2229 }, { "epoch": 0.39, "grad_norm": 0.7006362701919371, "learning_rate": 1.3911290344968636e-05, "loss": 0.7293, "step": 2230 }, { "epoch": 0.39, "grad_norm": 0.6930022120424849, "learning_rate": 1.3906062232220762e-05, "loss": 0.7173, "step": 2231 }, { "epoch": 0.39, "grad_norm": 0.7477081356784656, "learning_rate": 1.3900832859295814e-05, "loss": 0.787, "step": 2232 }, { "epoch": 0.39, "grad_norm": 0.7293468746249121, "learning_rate": 1.389560222788089e-05, "loss": 0.6971, "step": 2233 }, { "epoch": 0.39, "grad_norm": 0.7819266141437773, "learning_rate": 1.389037033966351e-05, "loss": 0.7386, "step": 2234 }, { "epoch": 0.39, "grad_norm": 0.6405687236251681, "learning_rate": 1.3885137196331581e-05, "loss": 0.7174, "step": 2235 }, { "epoch": 0.39, "grad_norm": 0.6657764882117962, "learning_rate": 1.3879902799573435e-05, "loss": 0.7326, "step": 2236 }, { "epoch": 0.39, "grad_norm": 0.7238342099991129, "learning_rate": 1.3874667151077787e-05, "loss": 0.7364, "step": 2237 }, { "epoch": 0.39, "grad_norm": 0.8041460106161582, "learning_rate": 1.386943025253377e-05, "loss": 0.7612, "step": 2238 }, { "epoch": 0.39, "grad_norm": 0.6775569987776883, "learning_rate": 1.3864192105630916e-05, "loss": 0.7435, "step": 2239 }, { "epoch": 0.39, "grad_norm": 0.688475620165104, "learning_rate": 1.3858952712059166e-05, "loss": 0.7232, "step": 2240 }, { "epoch": 0.39, "grad_norm": 0.730748122481352, "learning_rate": 1.385371207350885e-05, "loss": 0.7609, "step": 2241 }, { "epoch": 0.39, "grad_norm": 0.6721459689899024, "learning_rate": 1.3848470191670709e-05, "loss": 0.7521, "step": 2242 }, { "epoch": 0.39, "grad_norm": 0.6829281111220477, "learning_rate": 1.3843227068235887e-05, "loss": 0.7246, "step": 2243 }, { "epoch": 0.39, "grad_norm": 0.719431487294415, "learning_rate": 1.383798270489592e-05, "loss": 0.7001, "step": 2244 }, { "epoch": 0.39, "grad_norm": 0.7704146085467647, "learning_rate": 1.3832737103342753e-05, "loss": 0.7399, "step": 2245 }, { "epoch": 0.39, "grad_norm": 0.7172149167407539, "learning_rate": 1.3827490265268724e-05, "loss": 0.691, "step": 2246 }, { "epoch": 0.39, "grad_norm": 0.8023821990002282, "learning_rate": 1.3822242192366572e-05, "loss": 0.7545, "step": 2247 }, { "epoch": 0.39, "grad_norm": 0.7251059398331451, "learning_rate": 1.3816992886329436e-05, "loss": 0.7152, "step": 2248 }, { "epoch": 0.39, "grad_norm": 0.7070403647928714, "learning_rate": 1.3811742348850853e-05, "loss": 0.7104, "step": 2249 }, { "epoch": 0.39, "grad_norm": 0.806052418890586, "learning_rate": 1.3806490581624755e-05, "loss": 0.7226, "step": 2250 }, { "epoch": 0.39, "grad_norm": 0.6489841013927836, "learning_rate": 1.3801237586345466e-05, "loss": 0.707, "step": 2251 }, { "epoch": 0.39, "grad_norm": 0.6884232682118497, "learning_rate": 1.379598336470772e-05, "loss": 0.6928, "step": 2252 }, { "epoch": 0.4, "grad_norm": 0.7338242566353534, "learning_rate": 1.3790727918406632e-05, "loss": 0.7354, "step": 2253 }, { "epoch": 0.4, "grad_norm": 0.7402276163874215, "learning_rate": 1.3785471249137722e-05, "loss": 0.7325, "step": 2254 }, { "epoch": 0.4, "grad_norm": 0.7846923505488829, "learning_rate": 1.3780213358596903e-05, "loss": 0.7059, "step": 2255 }, { "epoch": 0.4, "grad_norm": 0.6588803769851069, "learning_rate": 1.3774954248480473e-05, "loss": 0.6978, "step": 2256 }, { "epoch": 0.4, "grad_norm": 0.7617396194165048, "learning_rate": 1.3769693920485136e-05, "loss": 0.7222, "step": 2257 }, { "epoch": 0.4, "grad_norm": 0.6644974386060717, "learning_rate": 1.3764432376307984e-05, "loss": 0.6724, "step": 2258 }, { "epoch": 0.4, "grad_norm": 0.681132455837786, "learning_rate": 1.3759169617646497e-05, "loss": 0.7602, "step": 2259 }, { "epoch": 0.4, "grad_norm": 0.6829738574675814, "learning_rate": 1.3753905646198553e-05, "loss": 0.6938, "step": 2260 }, { "epoch": 0.4, "grad_norm": 0.6444548185000165, "learning_rate": 1.3748640463662419e-05, "loss": 0.7201, "step": 2261 }, { "epoch": 0.4, "grad_norm": 0.7077586839512241, "learning_rate": 1.374337407173675e-05, "loss": 0.6681, "step": 2262 }, { "epoch": 0.4, "grad_norm": 0.7298465407193, "learning_rate": 1.3738106472120601e-05, "loss": 0.7302, "step": 2263 }, { "epoch": 0.4, "grad_norm": 0.7799323196081233, "learning_rate": 1.37328376665134e-05, "loss": 0.7835, "step": 2264 }, { "epoch": 0.4, "grad_norm": 0.7033756057283252, "learning_rate": 1.3727567656614979e-05, "loss": 0.677, "step": 2265 }, { "epoch": 0.4, "grad_norm": 0.6719613792347849, "learning_rate": 1.3722296444125554e-05, "loss": 0.6985, "step": 2266 }, { "epoch": 0.4, "grad_norm": 0.7293412746896747, "learning_rate": 1.3717024030745722e-05, "loss": 0.7312, "step": 2267 }, { "epoch": 0.4, "grad_norm": 0.7098142813694521, "learning_rate": 1.371175041817648e-05, "loss": 0.746, "step": 2268 }, { "epoch": 0.4, "grad_norm": 0.6944520675415881, "learning_rate": 1.3706475608119203e-05, "loss": 0.7456, "step": 2269 }, { "epoch": 0.4, "grad_norm": 0.6773032248286235, "learning_rate": 1.370119960227565e-05, "loss": 0.7023, "step": 2270 }, { "epoch": 0.4, "grad_norm": 0.6314803769392858, "learning_rate": 1.3695922402347976e-05, "loss": 0.7066, "step": 2271 }, { "epoch": 0.4, "grad_norm": 0.7221805694145836, "learning_rate": 1.3690644010038719e-05, "loss": 0.6949, "step": 2272 }, { "epoch": 0.4, "grad_norm": 0.7182170098088343, "learning_rate": 1.368536442705079e-05, "loss": 0.7066, "step": 2273 }, { "epoch": 0.4, "grad_norm": 0.6796548462858892, "learning_rate": 1.3680083655087495e-05, "loss": 0.7155, "step": 2274 }, { "epoch": 0.4, "grad_norm": 0.7176345852811481, "learning_rate": 1.3674801695852524e-05, "loss": 0.7408, "step": 2275 }, { "epoch": 0.4, "grad_norm": 0.7005792145822987, "learning_rate": 1.3669518551049945e-05, "loss": 0.7345, "step": 2276 }, { "epoch": 0.4, "grad_norm": 0.6845887182008077, "learning_rate": 1.3664234222384211e-05, "loss": 0.7373, "step": 2277 }, { "epoch": 0.4, "grad_norm": 0.6719471178066246, "learning_rate": 1.3658948711560158e-05, "loss": 0.7587, "step": 2278 }, { "epoch": 0.4, "grad_norm": 0.636217088170897, "learning_rate": 1.3653662020282999e-05, "loss": 0.7262, "step": 2279 }, { "epoch": 0.4, "grad_norm": 0.7570233619485549, "learning_rate": 1.364837415025833e-05, "loss": 0.753, "step": 2280 }, { "epoch": 0.4, "grad_norm": 0.7141796310135061, "learning_rate": 1.3643085103192136e-05, "loss": 0.7267, "step": 2281 }, { "epoch": 0.4, "grad_norm": 0.602311214798034, "learning_rate": 1.3637794880790765e-05, "loss": 0.7017, "step": 2282 }, { "epoch": 0.4, "grad_norm": 0.6807265309764913, "learning_rate": 1.3632503484760956e-05, "loss": 0.7241, "step": 2283 }, { "epoch": 0.4, "grad_norm": 0.6818456355111752, "learning_rate": 1.3627210916809824e-05, "loss": 0.7025, "step": 2284 }, { "epoch": 0.4, "grad_norm": 0.8326223882686892, "learning_rate": 1.3621917178644858e-05, "loss": 0.7359, "step": 2285 }, { "epoch": 0.4, "grad_norm": 0.6916724191951673, "learning_rate": 1.3616622271973934e-05, "loss": 0.7221, "step": 2286 }, { "epoch": 0.4, "grad_norm": 0.7317106665765071, "learning_rate": 1.3611326198505301e-05, "loss": 0.8041, "step": 2287 }, { "epoch": 0.4, "grad_norm": 0.7541448686933699, "learning_rate": 1.3606028959947575e-05, "loss": 0.7426, "step": 2288 }, { "epoch": 0.4, "grad_norm": 0.6134635636320508, "learning_rate": 1.3600730558009758e-05, "loss": 0.6492, "step": 2289 }, { "epoch": 0.4, "grad_norm": 0.7289292781280018, "learning_rate": 1.359543099440123e-05, "loss": 0.7116, "step": 2290 }, { "epoch": 0.4, "grad_norm": 0.6889961392992617, "learning_rate": 1.3590130270831733e-05, "loss": 0.7212, "step": 2291 }, { "epoch": 0.4, "grad_norm": 0.6527038240610591, "learning_rate": 1.3584828389011395e-05, "loss": 0.7257, "step": 2292 }, { "epoch": 0.4, "grad_norm": 0.6565879509312308, "learning_rate": 1.3579525350650714e-05, "loss": 0.6902, "step": 2293 }, { "epoch": 0.4, "grad_norm": 0.7761210606859039, "learning_rate": 1.3574221157460559e-05, "loss": 0.7505, "step": 2294 }, { "epoch": 0.4, "grad_norm": 0.8139854287245177, "learning_rate": 1.3568915811152177e-05, "loss": 0.7086, "step": 2295 }, { "epoch": 0.4, "grad_norm": 0.6847015351290453, "learning_rate": 1.3563609313437178e-05, "loss": 0.6622, "step": 2296 }, { "epoch": 0.4, "grad_norm": 0.6887425131409302, "learning_rate": 1.355830166602755e-05, "loss": 0.6938, "step": 2297 }, { "epoch": 0.4, "grad_norm": 0.6980458836383652, "learning_rate": 1.3552992870635653e-05, "loss": 0.7285, "step": 2298 }, { "epoch": 0.4, "grad_norm": 0.7195627159181451, "learning_rate": 1.3547682928974215e-05, "loss": 0.7694, "step": 2299 }, { "epoch": 0.4, "grad_norm": 0.7320559431071314, "learning_rate": 1.3542371842756332e-05, "loss": 0.7744, "step": 2300 }, { "epoch": 0.4, "grad_norm": 0.7299450934004963, "learning_rate": 1.353705961369547e-05, "loss": 0.7392, "step": 2301 }, { "epoch": 0.4, "grad_norm": 0.7400449050016393, "learning_rate": 1.353174624350547e-05, "loss": 0.6959, "step": 2302 }, { "epoch": 0.4, "grad_norm": 0.6394718401734768, "learning_rate": 1.352643173390053e-05, "loss": 0.7255, "step": 2303 }, { "epoch": 0.4, "grad_norm": 0.8205883510853714, "learning_rate": 1.3521116086595223e-05, "loss": 0.7543, "step": 2304 }, { "epoch": 0.4, "grad_norm": 0.7102579530228583, "learning_rate": 1.3515799303304491e-05, "loss": 0.7343, "step": 2305 }, { "epoch": 0.4, "grad_norm": 0.6681380752265207, "learning_rate": 1.3510481385743634e-05, "loss": 0.7182, "step": 2306 }, { "epoch": 0.4, "grad_norm": 0.6993928777601985, "learning_rate": 1.350516233562833e-05, "loss": 0.7038, "step": 2307 }, { "epoch": 0.4, "grad_norm": 0.6497288147542657, "learning_rate": 1.3499842154674607e-05, "loss": 0.6774, "step": 2308 }, { "epoch": 0.4, "grad_norm": 0.6921281276940463, "learning_rate": 1.3494520844598873e-05, "loss": 0.727, "step": 2309 }, { "epoch": 0.41, "grad_norm": 0.6676333724645953, "learning_rate": 1.3489198407117891e-05, "loss": 0.7673, "step": 2310 }, { "epoch": 0.41, "grad_norm": 0.6814929789318541, "learning_rate": 1.3483874843948787e-05, "loss": 0.7342, "step": 2311 }, { "epoch": 0.41, "grad_norm": 0.6755613185210761, "learning_rate": 1.3478550156809058e-05, "loss": 0.7031, "step": 2312 }, { "epoch": 0.41, "grad_norm": 0.7640544258591926, "learning_rate": 1.3473224347416561e-05, "loss": 0.7551, "step": 2313 }, { "epoch": 0.41, "grad_norm": 0.7768728497742771, "learning_rate": 1.3467897417489506e-05, "loss": 0.7529, "step": 2314 }, { "epoch": 0.41, "grad_norm": 0.7851402742716136, "learning_rate": 1.3462569368746477e-05, "loss": 0.8019, "step": 2315 }, { "epoch": 0.41, "grad_norm": 0.7376089125873085, "learning_rate": 1.3457240202906412e-05, "loss": 0.7009, "step": 2316 }, { "epoch": 0.41, "grad_norm": 0.6305607068461869, "learning_rate": 1.3451909921688609e-05, "loss": 0.6988, "step": 2317 }, { "epoch": 0.41, "grad_norm": 0.6802142969079492, "learning_rate": 1.344657852681273e-05, "loss": 0.7048, "step": 2318 }, { "epoch": 0.41, "grad_norm": 0.642361316837244, "learning_rate": 1.3441246019998797e-05, "loss": 0.7455, "step": 2319 }, { "epoch": 0.41, "grad_norm": 0.6109534143501324, "learning_rate": 1.3435912402967182e-05, "loss": 0.7062, "step": 2320 }, { "epoch": 0.41, "grad_norm": 0.6987157452615502, "learning_rate": 1.3430577677438622e-05, "loss": 0.716, "step": 2321 }, { "epoch": 0.41, "grad_norm": 0.7232775043139741, "learning_rate": 1.3425241845134213e-05, "loss": 0.739, "step": 2322 }, { "epoch": 0.41, "grad_norm": 0.708108096369702, "learning_rate": 1.3419904907775407e-05, "loss": 0.7533, "step": 2323 }, { "epoch": 0.41, "grad_norm": 0.7270532314010189, "learning_rate": 1.3414566867084007e-05, "loss": 0.7363, "step": 2324 }, { "epoch": 0.41, "grad_norm": 0.7265672226783425, "learning_rate": 1.3409227724782179e-05, "loss": 0.7297, "step": 2325 }, { "epoch": 0.41, "grad_norm": 0.7183417346709867, "learning_rate": 1.340388748259244e-05, "loss": 0.7611, "step": 2326 }, { "epoch": 0.41, "grad_norm": 0.8350082376224714, "learning_rate": 1.3398546142237665e-05, "loss": 0.7932, "step": 2327 }, { "epoch": 0.41, "grad_norm": 0.6678368715800845, "learning_rate": 1.3393203705441082e-05, "loss": 0.6724, "step": 2328 }, { "epoch": 0.41, "grad_norm": 0.65571157845097, "learning_rate": 1.338786017392627e-05, "loss": 0.6844, "step": 2329 }, { "epoch": 0.41, "grad_norm": 0.7221769744702788, "learning_rate": 1.3382515549417166e-05, "loss": 0.6925, "step": 2330 }, { "epoch": 0.41, "grad_norm": 0.6747626071102608, "learning_rate": 1.3377169833638054e-05, "loss": 0.7325, "step": 2331 }, { "epoch": 0.41, "grad_norm": 0.7086110941316548, "learning_rate": 1.337182302831358e-05, "loss": 0.7092, "step": 2332 }, { "epoch": 0.41, "grad_norm": 0.6898999615026636, "learning_rate": 1.3366475135168726e-05, "loss": 0.6918, "step": 2333 }, { "epoch": 0.41, "grad_norm": 0.9455048736264099, "learning_rate": 1.3361126155928843e-05, "loss": 0.7754, "step": 2334 }, { "epoch": 0.41, "grad_norm": 0.6959739710528368, "learning_rate": 1.3355776092319613e-05, "loss": 0.7313, "step": 2335 }, { "epoch": 0.41, "grad_norm": 0.7120140040097467, "learning_rate": 1.3350424946067086e-05, "loss": 0.7081, "step": 2336 }, { "epoch": 0.41, "grad_norm": 0.6958876601767419, "learning_rate": 1.334507271889765e-05, "loss": 0.6935, "step": 2337 }, { "epoch": 0.41, "grad_norm": 0.7715670441318386, "learning_rate": 1.3339719412538046e-05, "loss": 0.7411, "step": 2338 }, { "epoch": 0.41, "grad_norm": 0.7045963985282403, "learning_rate": 1.3334365028715364e-05, "loss": 0.7576, "step": 2339 }, { "epoch": 0.41, "grad_norm": 0.7812009781144019, "learning_rate": 1.3329009569157035e-05, "loss": 0.8024, "step": 2340 }, { "epoch": 0.41, "grad_norm": 0.7219785546088793, "learning_rate": 1.3323653035590844e-05, "loss": 0.7399, "step": 2341 }, { "epoch": 0.41, "grad_norm": 0.7569365508190538, "learning_rate": 1.3318295429744921e-05, "loss": 0.785, "step": 2342 }, { "epoch": 0.41, "grad_norm": 0.7034514282212251, "learning_rate": 1.3312936753347745e-05, "loss": 0.6875, "step": 2343 }, { "epoch": 0.41, "grad_norm": 0.6776200322476766, "learning_rate": 1.3307577008128128e-05, "loss": 0.6981, "step": 2344 }, { "epoch": 0.41, "grad_norm": 0.6865018279972217, "learning_rate": 1.3302216195815244e-05, "loss": 0.6841, "step": 2345 }, { "epoch": 0.41, "grad_norm": 0.6699932323048641, "learning_rate": 1.3296854318138596e-05, "loss": 0.6978, "step": 2346 }, { "epoch": 0.41, "grad_norm": 0.641702278651155, "learning_rate": 1.3291491376828043e-05, "loss": 0.7442, "step": 2347 }, { "epoch": 0.41, "grad_norm": 0.7289600109619448, "learning_rate": 1.328612737361378e-05, "loss": 0.703, "step": 2348 }, { "epoch": 0.41, "grad_norm": 0.6891202411670234, "learning_rate": 1.328076231022634e-05, "loss": 0.7304, "step": 2349 }, { "epoch": 0.41, "grad_norm": 0.7023429402195688, "learning_rate": 1.3275396188396615e-05, "loss": 0.6693, "step": 2350 }, { "epoch": 0.41, "grad_norm": 0.7099527556660274, "learning_rate": 1.3270029009855822e-05, "loss": 0.7245, "step": 2351 }, { "epoch": 0.41, "grad_norm": 0.8222040338014409, "learning_rate": 1.3264660776335523e-05, "loss": 0.8539, "step": 2352 }, { "epoch": 0.41, "grad_norm": 0.7646090840450225, "learning_rate": 1.3259291489567626e-05, "loss": 0.752, "step": 2353 }, { "epoch": 0.41, "grad_norm": 0.7077004489708681, "learning_rate": 1.325392115128437e-05, "loss": 0.6861, "step": 2354 }, { "epoch": 0.41, "grad_norm": 0.8989779757035115, "learning_rate": 1.3248549763218345e-05, "loss": 0.8174, "step": 2355 }, { "epoch": 0.41, "grad_norm": 0.7185680083867938, "learning_rate": 1.3243177327102465e-05, "loss": 0.7093, "step": 2356 }, { "epoch": 0.41, "grad_norm": 0.758154145144408, "learning_rate": 1.3237803844669997e-05, "loss": 0.7485, "step": 2357 }, { "epoch": 0.41, "grad_norm": 0.7089891436576027, "learning_rate": 1.3232429317654531e-05, "loss": 0.7692, "step": 2358 }, { "epoch": 0.41, "grad_norm": 0.6541759137963825, "learning_rate": 1.3227053747790006e-05, "loss": 0.6973, "step": 2359 }, { "epoch": 0.41, "grad_norm": 0.8877253688383792, "learning_rate": 1.3221677136810698e-05, "loss": 0.7476, "step": 2360 }, { "epoch": 0.41, "grad_norm": 0.7286850949706205, "learning_rate": 1.3216299486451201e-05, "loss": 0.719, "step": 2361 }, { "epoch": 0.41, "grad_norm": 0.5874420332451844, "learning_rate": 1.3210920798446467e-05, "loss": 0.6889, "step": 2362 }, { "epoch": 0.41, "grad_norm": 0.6677226251942857, "learning_rate": 1.3205541074531769e-05, "loss": 0.6966, "step": 2363 }, { "epoch": 0.41, "grad_norm": 0.7296709813106913, "learning_rate": 1.320016031644272e-05, "loss": 0.7243, "step": 2364 }, { "epoch": 0.41, "grad_norm": 0.7234033603808813, "learning_rate": 1.3194778525915262e-05, "loss": 0.7068, "step": 2365 }, { "epoch": 0.41, "grad_norm": 0.6704867789154204, "learning_rate": 1.3189395704685677e-05, "loss": 0.7263, "step": 2366 }, { "epoch": 0.42, "grad_norm": 0.6477864199136458, "learning_rate": 1.3184011854490569e-05, "loss": 0.69, "step": 2367 }, { "epoch": 0.42, "grad_norm": 0.6293910138917186, "learning_rate": 1.3178626977066885e-05, "loss": 0.663, "step": 2368 }, { "epoch": 0.42, "grad_norm": 0.689976345552514, "learning_rate": 1.3173241074151902e-05, "loss": 0.7348, "step": 2369 }, { "epoch": 0.42, "grad_norm": 0.9752829974008477, "learning_rate": 1.3167854147483214e-05, "loss": 0.7333, "step": 2370 }, { "epoch": 0.42, "grad_norm": 0.7598553515762735, "learning_rate": 1.3162466198798765e-05, "loss": 0.731, "step": 2371 }, { "epoch": 0.42, "grad_norm": 0.7712509447109908, "learning_rate": 1.3157077229836817e-05, "loss": 0.7273, "step": 2372 }, { "epoch": 0.42, "grad_norm": 0.6633454891202795, "learning_rate": 1.3151687242335963e-05, "loss": 0.6978, "step": 2373 }, { "epoch": 0.42, "grad_norm": 0.7074520448408939, "learning_rate": 1.3146296238035124e-05, "loss": 0.7067, "step": 2374 }, { "epoch": 0.42, "grad_norm": 0.6986050377459303, "learning_rate": 1.3140904218673552e-05, "loss": 0.7302, "step": 2375 }, { "epoch": 0.42, "grad_norm": 0.6510284247257021, "learning_rate": 1.3135511185990825e-05, "loss": 0.683, "step": 2376 }, { "epoch": 0.42, "grad_norm": 0.7454568775892058, "learning_rate": 1.3130117141726848e-05, "loss": 0.7203, "step": 2377 }, { "epoch": 0.42, "grad_norm": 0.7621891335488251, "learning_rate": 1.312472208762185e-05, "loss": 0.7597, "step": 2378 }, { "epoch": 0.42, "grad_norm": 0.7190573198464152, "learning_rate": 1.311932602541639e-05, "loss": 0.7367, "step": 2379 }, { "epoch": 0.42, "grad_norm": 0.6289054364108255, "learning_rate": 1.3113928956851351e-05, "loss": 0.7137, "step": 2380 }, { "epoch": 0.42, "grad_norm": 0.6148626430202413, "learning_rate": 1.3108530883667936e-05, "loss": 0.6635, "step": 2381 }, { "epoch": 0.42, "grad_norm": 0.6970307614387126, "learning_rate": 1.3103131807607677e-05, "loss": 0.7539, "step": 2382 }, { "epoch": 0.42, "grad_norm": 0.67890119872192, "learning_rate": 1.3097731730412433e-05, "loss": 0.7284, "step": 2383 }, { "epoch": 0.42, "grad_norm": 0.64659510831162, "learning_rate": 1.3092330653824377e-05, "loss": 0.704, "step": 2384 }, { "epoch": 0.42, "grad_norm": 0.7098230002104676, "learning_rate": 1.308692857958601e-05, "loss": 0.7357, "step": 2385 }, { "epoch": 0.42, "grad_norm": 0.6781010871287947, "learning_rate": 1.3081525509440151e-05, "loss": 0.6705, "step": 2386 }, { "epoch": 0.42, "grad_norm": 0.6539325876131098, "learning_rate": 1.3076121445129948e-05, "loss": 0.7217, "step": 2387 }, { "epoch": 0.42, "grad_norm": 0.6299518019084265, "learning_rate": 1.307071638839886e-05, "loss": 0.7032, "step": 2388 }, { "epoch": 0.42, "grad_norm": 0.6140045561885484, "learning_rate": 1.3065310340990678e-05, "loss": 0.6959, "step": 2389 }, { "epoch": 0.42, "grad_norm": 0.7841518040239945, "learning_rate": 1.3059903304649497e-05, "loss": 0.7269, "step": 2390 }, { "epoch": 0.42, "grad_norm": 0.7584955920690053, "learning_rate": 1.3054495281119744e-05, "loss": 0.7716, "step": 2391 }, { "epoch": 0.42, "grad_norm": 1.002300221513755, "learning_rate": 1.304908627214616e-05, "loss": 0.7336, "step": 2392 }, { "epoch": 0.42, "grad_norm": 0.6660963648320816, "learning_rate": 1.3043676279473804e-05, "loss": 0.6923, "step": 2393 }, { "epoch": 0.42, "grad_norm": 0.7142337165407261, "learning_rate": 1.303826530484805e-05, "loss": 0.6882, "step": 2394 }, { "epoch": 0.42, "grad_norm": 0.6923757537343708, "learning_rate": 1.3032853350014597e-05, "loss": 0.6851, "step": 2395 }, { "epoch": 0.42, "grad_norm": 0.7201847977142652, "learning_rate": 1.3027440416719449e-05, "loss": 0.7366, "step": 2396 }, { "epoch": 0.42, "grad_norm": 0.752411971654645, "learning_rate": 1.3022026506708932e-05, "loss": 0.736, "step": 2397 }, { "epoch": 0.42, "grad_norm": 0.764779364947937, "learning_rate": 1.3016611621729686e-05, "loss": 0.7379, "step": 2398 }, { "epoch": 0.42, "grad_norm": 0.693408803568944, "learning_rate": 1.3011195763528666e-05, "loss": 0.7272, "step": 2399 }, { "epoch": 0.42, "grad_norm": 0.6614633779721318, "learning_rate": 1.3005778933853142e-05, "loss": 0.7094, "step": 2400 }, { "epoch": 0.42, "grad_norm": 0.6938519997650747, "learning_rate": 1.3000361134450699e-05, "loss": 0.6871, "step": 2401 }, { "epoch": 0.42, "grad_norm": 0.6828814091619929, "learning_rate": 1.2994942367069224e-05, "loss": 0.7223, "step": 2402 }, { "epoch": 0.42, "grad_norm": 0.7413651800094773, "learning_rate": 1.2989522633456929e-05, "loss": 0.7404, "step": 2403 }, { "epoch": 0.42, "grad_norm": 0.78029943101753, "learning_rate": 1.2984101935362335e-05, "loss": 0.8184, "step": 2404 }, { "epoch": 0.42, "grad_norm": 0.6601770425725246, "learning_rate": 1.2978680274534268e-05, "loss": 0.6828, "step": 2405 }, { "epoch": 0.42, "grad_norm": 0.6426819580390787, "learning_rate": 1.2973257652721869e-05, "loss": 0.6713, "step": 2406 }, { "epoch": 0.42, "grad_norm": 0.6494402765431891, "learning_rate": 1.2967834071674594e-05, "loss": 0.6904, "step": 2407 }, { "epoch": 0.42, "grad_norm": 0.5751640122241534, "learning_rate": 1.2962409533142198e-05, "loss": 0.6739, "step": 2408 }, { "epoch": 0.42, "grad_norm": 0.7067622840667034, "learning_rate": 1.295698403887475e-05, "loss": 0.7574, "step": 2409 }, { "epoch": 0.42, "grad_norm": 0.796337010696576, "learning_rate": 1.2951557590622634e-05, "loss": 0.7512, "step": 2410 }, { "epoch": 0.42, "grad_norm": 0.6287948195615669, "learning_rate": 1.2946130190136528e-05, "loss": 0.6689, "step": 2411 }, { "epoch": 0.42, "grad_norm": 0.6901663629242247, "learning_rate": 1.2940701839167424e-05, "loss": 0.7173, "step": 2412 }, { "epoch": 0.42, "grad_norm": 0.7811199472969944, "learning_rate": 1.293527253946663e-05, "loss": 0.7005, "step": 2413 }, { "epoch": 0.42, "grad_norm": 0.8029462283079076, "learning_rate": 1.2929842292785742e-05, "loss": 0.7232, "step": 2414 }, { "epoch": 0.42, "grad_norm": 0.7275829239096612, "learning_rate": 1.2924411100876677e-05, "loss": 0.7507, "step": 2415 }, { "epoch": 0.42, "grad_norm": 0.7409321440367378, "learning_rate": 1.2918978965491646e-05, "loss": 0.6871, "step": 2416 }, { "epoch": 0.42, "grad_norm": 0.7757034558830632, "learning_rate": 1.291354588838317e-05, "loss": 0.7232, "step": 2417 }, { "epoch": 0.42, "grad_norm": 0.6757682004760615, "learning_rate": 1.2908111871304076e-05, "loss": 0.7054, "step": 2418 }, { "epoch": 0.42, "grad_norm": 0.7105448079641924, "learning_rate": 1.2902676916007491e-05, "loss": 0.7081, "step": 2419 }, { "epoch": 0.42, "grad_norm": 0.6842502169586996, "learning_rate": 1.2897241024246838e-05, "loss": 0.7021, "step": 2420 }, { "epoch": 0.42, "grad_norm": 0.7104564301303338, "learning_rate": 1.2891804197775859e-05, "loss": 0.7475, "step": 2421 }, { "epoch": 0.42, "grad_norm": 0.713608135949888, "learning_rate": 1.2886366438348579e-05, "loss": 0.6939, "step": 2422 }, { "epoch": 0.42, "grad_norm": 0.7570979133305654, "learning_rate": 1.2880927747719333e-05, "loss": 0.6776, "step": 2423 }, { "epoch": 0.43, "grad_norm": 0.9041649931138871, "learning_rate": 1.2875488127642765e-05, "loss": 0.8052, "step": 2424 }, { "epoch": 0.43, "grad_norm": 0.7910652776050097, "learning_rate": 1.2870047579873801e-05, "loss": 0.7609, "step": 2425 }, { "epoch": 0.43, "grad_norm": 0.6779380963168343, "learning_rate": 1.2864606106167679e-05, "loss": 0.7071, "step": 2426 }, { "epoch": 0.43, "grad_norm": 0.6709239562566711, "learning_rate": 1.2859163708279929e-05, "loss": 0.6886, "step": 2427 }, { "epoch": 0.43, "grad_norm": 0.7485432973989077, "learning_rate": 1.2853720387966386e-05, "loss": 0.7264, "step": 2428 }, { "epoch": 0.43, "grad_norm": 0.7509066306920159, "learning_rate": 1.2848276146983172e-05, "loss": 0.7525, "step": 2429 }, { "epoch": 0.43, "grad_norm": 0.6191768795054967, "learning_rate": 1.284283098708672e-05, "loss": 0.7084, "step": 2430 }, { "epoch": 0.43, "grad_norm": 0.7278428127082829, "learning_rate": 1.2837384910033748e-05, "loss": 0.7076, "step": 2431 }, { "epoch": 0.43, "grad_norm": 0.6809586130116474, "learning_rate": 1.2831937917581272e-05, "loss": 0.7115, "step": 2432 }, { "epoch": 0.43, "grad_norm": 0.7222698473575949, "learning_rate": 1.2826490011486615e-05, "loss": 0.7184, "step": 2433 }, { "epoch": 0.43, "grad_norm": 0.6527514472304208, "learning_rate": 1.282104119350737e-05, "loss": 0.7034, "step": 2434 }, { "epoch": 0.43, "grad_norm": 0.664847461203532, "learning_rate": 1.2815591465401452e-05, "loss": 0.7024, "step": 2435 }, { "epoch": 0.43, "grad_norm": 0.6553442758486085, "learning_rate": 1.2810140828927051e-05, "loss": 0.6794, "step": 2436 }, { "epoch": 0.43, "grad_norm": 0.6785737349385125, "learning_rate": 1.2804689285842657e-05, "loss": 0.7043, "step": 2437 }, { "epoch": 0.43, "grad_norm": 0.7017255079249497, "learning_rate": 1.279923683790705e-05, "loss": 0.7652, "step": 2438 }, { "epoch": 0.43, "grad_norm": 0.8252900337510232, "learning_rate": 1.2793783486879308e-05, "loss": 0.7272, "step": 2439 }, { "epoch": 0.43, "grad_norm": 0.5848309798228564, "learning_rate": 1.2788329234518788e-05, "loss": 0.6491, "step": 2440 }, { "epoch": 0.43, "grad_norm": 0.6897299214375676, "learning_rate": 1.2782874082585153e-05, "loss": 0.7262, "step": 2441 }, { "epoch": 0.43, "grad_norm": 0.7193473638161236, "learning_rate": 1.2777418032838348e-05, "loss": 0.7284, "step": 2442 }, { "epoch": 0.43, "grad_norm": 0.7108282565069285, "learning_rate": 1.27719610870386e-05, "loss": 0.7192, "step": 2443 }, { "epoch": 0.43, "grad_norm": 0.703275366549796, "learning_rate": 1.2766503246946442e-05, "loss": 0.7321, "step": 2444 }, { "epoch": 0.43, "grad_norm": 0.6863627906528372, "learning_rate": 1.2761044514322685e-05, "loss": 0.7106, "step": 2445 }, { "epoch": 0.43, "grad_norm": 0.6763514553462665, "learning_rate": 1.2755584890928427e-05, "loss": 0.7242, "step": 2446 }, { "epoch": 0.43, "grad_norm": 0.8751340099590099, "learning_rate": 1.2750124378525062e-05, "loss": 0.7907, "step": 2447 }, { "epoch": 0.43, "grad_norm": 0.7095203325185853, "learning_rate": 1.2744662978874262e-05, "loss": 0.7013, "step": 2448 }, { "epoch": 0.43, "grad_norm": 0.62491551319793, "learning_rate": 1.2739200693737985e-05, "loss": 0.7283, "step": 2449 }, { "epoch": 0.43, "grad_norm": 0.7932121180983142, "learning_rate": 1.273373752487848e-05, "loss": 0.7401, "step": 2450 }, { "epoch": 0.43, "grad_norm": 0.6962371703502113, "learning_rate": 1.2728273474058285e-05, "loss": 0.7297, "step": 2451 }, { "epoch": 0.43, "grad_norm": 0.6891702322059048, "learning_rate": 1.2722808543040208e-05, "loss": 0.7398, "step": 2452 }, { "epoch": 0.43, "grad_norm": 0.7043401245707164, "learning_rate": 1.2717342733587354e-05, "loss": 0.6975, "step": 2453 }, { "epoch": 0.43, "grad_norm": 0.5976413747727971, "learning_rate": 1.2711876047463107e-05, "loss": 0.7013, "step": 2454 }, { "epoch": 0.43, "grad_norm": 0.7863354526885893, "learning_rate": 1.2706408486431133e-05, "loss": 0.8211, "step": 2455 }, { "epoch": 0.43, "grad_norm": 0.7646308610693818, "learning_rate": 1.2700940052255382e-05, "loss": 0.7129, "step": 2456 }, { "epoch": 0.43, "grad_norm": 0.7476039486790339, "learning_rate": 1.2695470746700086e-05, "loss": 0.7464, "step": 2457 }, { "epoch": 0.43, "grad_norm": 0.6474497106702266, "learning_rate": 1.269000057152975e-05, "loss": 0.7013, "step": 2458 }, { "epoch": 0.43, "grad_norm": 0.7779635627083211, "learning_rate": 1.2684529528509177e-05, "loss": 0.7412, "step": 2459 }, { "epoch": 0.43, "grad_norm": 0.7143040459083574, "learning_rate": 1.2679057619403432e-05, "loss": 0.7708, "step": 2460 }, { "epoch": 0.43, "grad_norm": 0.670716356244487, "learning_rate": 1.2673584845977869e-05, "loss": 0.7141, "step": 2461 }, { "epoch": 0.43, "grad_norm": 0.6656855592435986, "learning_rate": 1.266811120999812e-05, "loss": 0.7027, "step": 2462 }, { "epoch": 0.43, "grad_norm": 0.7153128025534002, "learning_rate": 1.266263671323009e-05, "loss": 0.7264, "step": 2463 }, { "epoch": 0.43, "grad_norm": 0.6655824733270616, "learning_rate": 1.2657161357439968e-05, "loss": 0.6666, "step": 2464 }, { "epoch": 0.43, "grad_norm": 0.6892386964277015, "learning_rate": 1.2651685144394223e-05, "loss": 0.7601, "step": 2465 }, { "epoch": 0.43, "grad_norm": 0.7884999340297445, "learning_rate": 1.264620807585959e-05, "loss": 0.7382, "step": 2466 }, { "epoch": 0.43, "grad_norm": 0.6686061403235437, "learning_rate": 1.2640730153603085e-05, "loss": 0.6828, "step": 2467 }, { "epoch": 0.43, "grad_norm": 0.7267559789314942, "learning_rate": 1.2635251379392002e-05, "loss": 0.7704, "step": 2468 }, { "epoch": 0.43, "grad_norm": 0.6944426675103238, "learning_rate": 1.262977175499391e-05, "loss": 0.7149, "step": 2469 }, { "epoch": 0.43, "grad_norm": 0.6575656047329317, "learning_rate": 1.2624291282176642e-05, "loss": 0.6805, "step": 2470 }, { "epoch": 0.43, "grad_norm": 0.7329960806648298, "learning_rate": 1.2618809962708318e-05, "loss": 0.742, "step": 2471 }, { "epoch": 0.43, "grad_norm": 0.6864808929277261, "learning_rate": 1.2613327798357329e-05, "loss": 0.7083, "step": 2472 }, { "epoch": 0.43, "grad_norm": 0.6192826611099626, "learning_rate": 1.2607844790892327e-05, "loss": 0.7078, "step": 2473 }, { "epoch": 0.43, "grad_norm": 1.0670318830492338, "learning_rate": 1.2602360942082257e-05, "loss": 0.7239, "step": 2474 }, { "epoch": 0.43, "grad_norm": 0.774072163861016, "learning_rate": 1.259687625369631e-05, "loss": 0.7466, "step": 2475 }, { "epoch": 0.43, "grad_norm": 0.685327555032074, "learning_rate": 1.2591390727503968e-05, "loss": 0.7375, "step": 2476 }, { "epoch": 0.43, "grad_norm": 0.6822488486839149, "learning_rate": 1.2585904365274975e-05, "loss": 0.7162, "step": 2477 }, { "epoch": 0.43, "grad_norm": 0.6784356654988051, "learning_rate": 1.2580417168779342e-05, "loss": 0.6932, "step": 2478 }, { "epoch": 0.43, "grad_norm": 0.6158365500872892, "learning_rate": 1.257492913978736e-05, "loss": 0.7029, "step": 2479 }, { "epoch": 0.43, "grad_norm": 0.6915595913338037, "learning_rate": 1.2569440280069575e-05, "loss": 0.7338, "step": 2480 }, { "epoch": 0.44, "grad_norm": 0.6586618401317746, "learning_rate": 1.2563950591396812e-05, "loss": 0.6989, "step": 2481 }, { "epoch": 0.44, "grad_norm": 0.7111800024197478, "learning_rate": 1.2558460075540156e-05, "loss": 0.7424, "step": 2482 }, { "epoch": 0.44, "grad_norm": 0.6475301333781439, "learning_rate": 1.2552968734270965e-05, "loss": 0.6745, "step": 2483 }, { "epoch": 0.44, "grad_norm": 0.6692668182724142, "learning_rate": 1.2547476569360857e-05, "loss": 0.7285, "step": 2484 }, { "epoch": 0.44, "grad_norm": 0.7296983960430479, "learning_rate": 1.2541983582581715e-05, "loss": 0.7787, "step": 2485 }, { "epoch": 0.44, "grad_norm": 0.7264489839817135, "learning_rate": 1.2536489775705703e-05, "loss": 0.7287, "step": 2486 }, { "epoch": 0.44, "grad_norm": 0.6802378360820478, "learning_rate": 1.2530995150505226e-05, "loss": 0.7086, "step": 2487 }, { "epoch": 0.44, "grad_norm": 0.622803983944456, "learning_rate": 1.2525499708752973e-05, "loss": 0.6972, "step": 2488 }, { "epoch": 0.44, "grad_norm": 0.8630893302316937, "learning_rate": 1.2520003452221883e-05, "loss": 0.742, "step": 2489 }, { "epoch": 0.44, "grad_norm": 0.735497470392842, "learning_rate": 1.251450638268516e-05, "loss": 0.7208, "step": 2490 }, { "epoch": 0.44, "grad_norm": 0.7203888095629136, "learning_rate": 1.250900850191628e-05, "loss": 0.7342, "step": 2491 }, { "epoch": 0.44, "grad_norm": 0.6020587197769756, "learning_rate": 1.2503509811688974e-05, "loss": 0.724, "step": 2492 }, { "epoch": 0.44, "grad_norm": 0.6524918479875016, "learning_rate": 1.2498010313777226e-05, "loss": 0.693, "step": 2493 }, { "epoch": 0.44, "grad_norm": 0.5524960103447677, "learning_rate": 1.2492510009955297e-05, "loss": 0.6886, "step": 2494 }, { "epoch": 0.44, "grad_norm": 0.6622245952843252, "learning_rate": 1.2487008901997697e-05, "loss": 0.7361, "step": 2495 }, { "epoch": 0.44, "grad_norm": 0.7414523400119793, "learning_rate": 1.2481506991679195e-05, "loss": 0.73, "step": 2496 }, { "epoch": 0.44, "grad_norm": 0.8726833780272764, "learning_rate": 1.2476004280774827e-05, "loss": 0.7189, "step": 2497 }, { "epoch": 0.44, "grad_norm": 0.6829724368792278, "learning_rate": 1.2470500771059879e-05, "loss": 0.7225, "step": 2498 }, { "epoch": 0.44, "grad_norm": 0.7074358755209815, "learning_rate": 1.2464996464309898e-05, "loss": 0.688, "step": 2499 }, { "epoch": 0.44, "grad_norm": 0.8180899802094852, "learning_rate": 1.2459491362300688e-05, "loss": 0.7938, "step": 2500 }, { "epoch": 0.44, "grad_norm": 0.7046634826341415, "learning_rate": 1.2453985466808311e-05, "loss": 0.7202, "step": 2501 }, { "epoch": 0.44, "grad_norm": 0.7010341634084777, "learning_rate": 1.2448478779609083e-05, "loss": 0.6906, "step": 2502 }, { "epoch": 0.44, "grad_norm": 0.7776221721541776, "learning_rate": 1.2442971302479577e-05, "loss": 0.7598, "step": 2503 }, { "epoch": 0.44, "grad_norm": 0.6661280350835119, "learning_rate": 1.2437463037196616e-05, "loss": 0.6738, "step": 2504 }, { "epoch": 0.44, "grad_norm": 0.6802776639569043, "learning_rate": 1.2431953985537283e-05, "loss": 0.6907, "step": 2505 }, { "epoch": 0.44, "grad_norm": 0.8610463120026057, "learning_rate": 1.2426444149278917e-05, "loss": 0.7603, "step": 2506 }, { "epoch": 0.44, "grad_norm": 0.7208543597417296, "learning_rate": 1.2420933530199105e-05, "loss": 0.7053, "step": 2507 }, { "epoch": 0.44, "grad_norm": 0.7001479309862667, "learning_rate": 1.241542213007568e-05, "loss": 0.6695, "step": 2508 }, { "epoch": 0.44, "grad_norm": 0.663029923358737, "learning_rate": 1.2409909950686745e-05, "loss": 0.6895, "step": 2509 }, { "epoch": 0.44, "grad_norm": 0.7794785163611814, "learning_rate": 1.2404396993810637e-05, "loss": 0.7572, "step": 2510 }, { "epoch": 0.44, "grad_norm": 0.7011457488843825, "learning_rate": 1.2398883261225951e-05, "loss": 0.7315, "step": 2511 }, { "epoch": 0.44, "grad_norm": 0.7754329988013688, "learning_rate": 1.2393368754711536e-05, "loss": 0.7472, "step": 2512 }, { "epoch": 0.44, "grad_norm": 0.768832665022927, "learning_rate": 1.2387853476046482e-05, "loss": 0.7536, "step": 2513 }, { "epoch": 0.44, "grad_norm": 0.6457364246219567, "learning_rate": 1.2382337427010138e-05, "loss": 0.7221, "step": 2514 }, { "epoch": 0.44, "grad_norm": 0.7056032959553097, "learning_rate": 1.2376820609382094e-05, "loss": 0.6827, "step": 2515 }, { "epoch": 0.44, "grad_norm": 0.7463769884403302, "learning_rate": 1.2371303024942187e-05, "loss": 0.7542, "step": 2516 }, { "epoch": 0.44, "grad_norm": 0.7878769975086084, "learning_rate": 1.2365784675470509e-05, "loss": 0.7815, "step": 2517 }, { "epoch": 0.44, "grad_norm": 0.8768499996005572, "learning_rate": 1.2360265562747393e-05, "loss": 0.7787, "step": 2518 }, { "epoch": 0.44, "grad_norm": 0.7519926283466613, "learning_rate": 1.235474568855342e-05, "loss": 0.7399, "step": 2519 }, { "epoch": 0.44, "grad_norm": 0.6619560525283733, "learning_rate": 1.2349225054669418e-05, "loss": 0.6986, "step": 2520 }, { "epoch": 0.44, "grad_norm": 0.718500298315222, "learning_rate": 1.2343703662876456e-05, "loss": 0.7392, "step": 2521 }, { "epoch": 0.44, "grad_norm": 0.7088046898065011, "learning_rate": 1.233818151495585e-05, "loss": 0.7362, "step": 2522 }, { "epoch": 0.44, "grad_norm": 0.734182344516928, "learning_rate": 1.2332658612689161e-05, "loss": 0.7215, "step": 2523 }, { "epoch": 0.44, "grad_norm": 0.8219387282270771, "learning_rate": 1.2327134957858195e-05, "loss": 0.8145, "step": 2524 }, { "epoch": 0.44, "grad_norm": 0.7165244986984408, "learning_rate": 1.2321610552244993e-05, "loss": 0.7457, "step": 2525 }, { "epoch": 0.44, "grad_norm": 0.7218319491623219, "learning_rate": 1.2316085397631848e-05, "loss": 0.7303, "step": 2526 }, { "epoch": 0.44, "grad_norm": 0.6934877205913756, "learning_rate": 1.2310559495801286e-05, "loss": 0.7028, "step": 2527 }, { "epoch": 0.44, "grad_norm": 0.7479928418726803, "learning_rate": 1.2305032848536081e-05, "loss": 0.7396, "step": 2528 }, { "epoch": 0.44, "grad_norm": 0.681079183008768, "learning_rate": 1.2299505457619243e-05, "loss": 0.723, "step": 2529 }, { "epoch": 0.44, "grad_norm": 0.7168967524005019, "learning_rate": 1.2293977324834025e-05, "loss": 0.7139, "step": 2530 }, { "epoch": 0.44, "grad_norm": 0.6743956997022388, "learning_rate": 1.2288448451963916e-05, "loss": 0.7197, "step": 2531 }, { "epoch": 0.44, "grad_norm": 0.6350306318516435, "learning_rate": 1.2282918840792645e-05, "loss": 0.7043, "step": 2532 }, { "epoch": 0.44, "grad_norm": 0.646922193244222, "learning_rate": 1.2277388493104187e-05, "loss": 0.6886, "step": 2533 }, { "epoch": 0.44, "grad_norm": 0.6665080608332145, "learning_rate": 1.227185741068274e-05, "loss": 0.7015, "step": 2534 }, { "epoch": 0.44, "grad_norm": 0.679666228161228, "learning_rate": 1.2266325595312744e-05, "loss": 0.6997, "step": 2535 }, { "epoch": 0.44, "grad_norm": 0.6095118414312688, "learning_rate": 1.2260793048778885e-05, "loss": 0.7194, "step": 2536 }, { "epoch": 0.44, "grad_norm": 0.68700037576717, "learning_rate": 1.2255259772866075e-05, "loss": 0.6963, "step": 2537 }, { "epoch": 0.45, "grad_norm": 0.7555858452187916, "learning_rate": 1.2249725769359464e-05, "loss": 0.7733, "step": 2538 }, { "epoch": 0.45, "grad_norm": 0.740603757799908, "learning_rate": 1.2244191040044437e-05, "loss": 0.6651, "step": 2539 }, { "epoch": 0.45, "grad_norm": 0.646690173861469, "learning_rate": 1.223865558670661e-05, "loss": 0.7115, "step": 2540 }, { "epoch": 0.45, "grad_norm": 0.7526135824296872, "learning_rate": 1.2233119411131844e-05, "loss": 0.7367, "step": 2541 }, { "epoch": 0.45, "grad_norm": 0.6816787312856507, "learning_rate": 1.2227582515106216e-05, "loss": 0.7151, "step": 2542 }, { "epoch": 0.45, "grad_norm": 0.7725925108758829, "learning_rate": 1.2222044900416048e-05, "loss": 0.7551, "step": 2543 }, { "epoch": 0.45, "grad_norm": 0.6402996661487249, "learning_rate": 1.221650656884789e-05, "loss": 0.7081, "step": 2544 }, { "epoch": 0.45, "grad_norm": 0.7239523764799829, "learning_rate": 1.221096752218852e-05, "loss": 0.7184, "step": 2545 }, { "epoch": 0.45, "grad_norm": 0.7076494064446679, "learning_rate": 1.2205427762224951e-05, "loss": 0.6854, "step": 2546 }, { "epoch": 0.45, "grad_norm": 0.683663937893182, "learning_rate": 1.219988729074443e-05, "loss": 0.7306, "step": 2547 }, { "epoch": 0.45, "grad_norm": 0.7039974706884854, "learning_rate": 1.2194346109534423e-05, "loss": 0.6937, "step": 2548 }, { "epoch": 0.45, "grad_norm": 0.6355524793693244, "learning_rate": 1.2188804220382633e-05, "loss": 0.7115, "step": 2549 }, { "epoch": 0.45, "grad_norm": 0.6282312369399005, "learning_rate": 1.2183261625076987e-05, "loss": 0.6944, "step": 2550 }, { "epoch": 0.45, "grad_norm": 0.760342800134097, "learning_rate": 1.2177718325405643e-05, "loss": 0.7454, "step": 2551 }, { "epoch": 0.45, "grad_norm": 0.7146595443991985, "learning_rate": 1.2172174323156987e-05, "loss": 0.7488, "step": 2552 }, { "epoch": 0.45, "grad_norm": 0.7454778261369719, "learning_rate": 1.2166629620119628e-05, "loss": 0.715, "step": 2553 }, { "epoch": 0.45, "grad_norm": 0.7658573992469987, "learning_rate": 1.2161084218082397e-05, "loss": 0.7187, "step": 2554 }, { "epoch": 0.45, "grad_norm": 0.6496028719755729, "learning_rate": 1.2155538118834365e-05, "loss": 0.7267, "step": 2555 }, { "epoch": 0.45, "grad_norm": 0.6881325787929229, "learning_rate": 1.2149991324164817e-05, "loss": 0.7062, "step": 2556 }, { "epoch": 0.45, "grad_norm": 0.7949114009021576, "learning_rate": 1.2144443835863262e-05, "loss": 0.803, "step": 2557 }, { "epoch": 0.45, "grad_norm": 0.7576903604605215, "learning_rate": 1.2138895655719436e-05, "loss": 0.7178, "step": 2558 }, { "epoch": 0.45, "grad_norm": 0.6917941454317039, "learning_rate": 1.21333467855233e-05, "loss": 0.6927, "step": 2559 }, { "epoch": 0.45, "grad_norm": 0.7533633155090416, "learning_rate": 1.2127797227065033e-05, "loss": 0.7636, "step": 2560 }, { "epoch": 0.45, "grad_norm": 0.7377718468739967, "learning_rate": 1.212224698213504e-05, "loss": 0.7211, "step": 2561 }, { "epoch": 0.45, "grad_norm": 0.7315583117889375, "learning_rate": 1.2116696052523942e-05, "loss": 0.7288, "step": 2562 }, { "epoch": 0.45, "grad_norm": 0.737956817925368, "learning_rate": 1.2111144440022586e-05, "loss": 0.7333, "step": 2563 }, { "epoch": 0.45, "grad_norm": 0.767513998244305, "learning_rate": 1.2105592146422038e-05, "loss": 0.7425, "step": 2564 }, { "epoch": 0.45, "grad_norm": 0.6999872220391549, "learning_rate": 1.210003917351359e-05, "loss": 0.7128, "step": 2565 }, { "epoch": 0.45, "grad_norm": 0.6624028282168123, "learning_rate": 1.2094485523088737e-05, "loss": 0.6576, "step": 2566 }, { "epoch": 0.45, "grad_norm": 0.743906630058857, "learning_rate": 1.2088931196939206e-05, "loss": 0.719, "step": 2567 }, { "epoch": 0.45, "grad_norm": 0.6884374646372952, "learning_rate": 1.208337619685694e-05, "loss": 0.7395, "step": 2568 }, { "epoch": 0.45, "grad_norm": 0.6696075366423262, "learning_rate": 1.2077820524634096e-05, "loss": 0.71, "step": 2569 }, { "epoch": 0.45, "grad_norm": 0.6107427026391251, "learning_rate": 1.2072264182063052e-05, "loss": 0.7166, "step": 2570 }, { "epoch": 0.45, "grad_norm": 0.7517931603895875, "learning_rate": 1.2066707170936399e-05, "loss": 0.7456, "step": 2571 }, { "epoch": 0.45, "grad_norm": 0.7457088259946913, "learning_rate": 1.206114949304694e-05, "loss": 0.7253, "step": 2572 }, { "epoch": 0.45, "grad_norm": 0.7028783749191799, "learning_rate": 1.2055591150187704e-05, "loss": 0.7505, "step": 2573 }, { "epoch": 0.45, "grad_norm": 0.7471134441203098, "learning_rate": 1.2050032144151923e-05, "loss": 0.7584, "step": 2574 }, { "epoch": 0.45, "grad_norm": 0.651752204585861, "learning_rate": 1.2044472476733053e-05, "loss": 0.6906, "step": 2575 }, { "epoch": 0.45, "grad_norm": 0.7090877633496344, "learning_rate": 1.2038912149724758e-05, "loss": 0.7096, "step": 2576 }, { "epoch": 0.45, "grad_norm": 0.7427004438124023, "learning_rate": 1.2033351164920913e-05, "loss": 0.751, "step": 2577 }, { "epoch": 0.45, "grad_norm": 0.6848322227757028, "learning_rate": 1.2027789524115606e-05, "loss": 0.7129, "step": 2578 }, { "epoch": 0.45, "grad_norm": 0.6607336321808883, "learning_rate": 1.2022227229103143e-05, "loss": 0.6939, "step": 2579 }, { "epoch": 0.45, "grad_norm": 0.7035241041500059, "learning_rate": 1.2016664281678035e-05, "loss": 0.711, "step": 2580 }, { "epoch": 0.45, "grad_norm": 0.6688081097273204, "learning_rate": 1.2011100683635e-05, "loss": 0.7273, "step": 2581 }, { "epoch": 0.45, "grad_norm": 0.7041046573662716, "learning_rate": 1.2005536436768976e-05, "loss": 0.7322, "step": 2582 }, { "epoch": 0.45, "grad_norm": 0.6869850752571246, "learning_rate": 1.1999971542875104e-05, "loss": 0.678, "step": 2583 }, { "epoch": 0.45, "grad_norm": 0.7109285629516063, "learning_rate": 1.1994406003748735e-05, "loss": 0.6461, "step": 2584 }, { "epoch": 0.45, "grad_norm": 0.6589737109288186, "learning_rate": 1.1988839821185428e-05, "loss": 0.6779, "step": 2585 }, { "epoch": 0.45, "grad_norm": 0.7384845526534729, "learning_rate": 1.1983272996980943e-05, "loss": 0.7321, "step": 2586 }, { "epoch": 0.45, "grad_norm": 0.7572532422449944, "learning_rate": 1.1977705532931263e-05, "loss": 0.7412, "step": 2587 }, { "epoch": 0.45, "grad_norm": 0.8448848077135841, "learning_rate": 1.1972137430832565e-05, "loss": 0.7121, "step": 2588 }, { "epoch": 0.45, "grad_norm": 0.6861605419218091, "learning_rate": 1.1966568692481233e-05, "loss": 0.6748, "step": 2589 }, { "epoch": 0.45, "grad_norm": 0.6902295505413473, "learning_rate": 1.1960999319673856e-05, "loss": 0.7151, "step": 2590 }, { "epoch": 0.45, "grad_norm": 0.7281280262049067, "learning_rate": 1.1955429314207234e-05, "loss": 0.6748, "step": 2591 }, { "epoch": 0.45, "grad_norm": 0.6495165295299163, "learning_rate": 1.1949858677878366e-05, "loss": 0.7117, "step": 2592 }, { "epoch": 0.45, "grad_norm": 0.6456428714810494, "learning_rate": 1.1944287412484454e-05, "loss": 0.7089, "step": 2593 }, { "epoch": 0.45, "grad_norm": 0.6417431530289259, "learning_rate": 1.1938715519822908e-05, "loss": 0.6894, "step": 2594 }, { "epoch": 0.46, "grad_norm": 0.6844240989248419, "learning_rate": 1.193314300169133e-05, "loss": 0.7259, "step": 2595 }, { "epoch": 0.46, "grad_norm": 0.7079904295108993, "learning_rate": 1.1927569859887534e-05, "loss": 0.7116, "step": 2596 }, { "epoch": 0.46, "grad_norm": 0.6542047457686662, "learning_rate": 1.1921996096209535e-05, "loss": 0.6846, "step": 2597 }, { "epoch": 0.46, "grad_norm": 0.7408392500909895, "learning_rate": 1.1916421712455542e-05, "loss": 0.7288, "step": 2598 }, { "epoch": 0.46, "grad_norm": 0.6637681474814193, "learning_rate": 1.1910846710423962e-05, "loss": 0.7202, "step": 2599 }, { "epoch": 0.46, "grad_norm": 0.7130399900391309, "learning_rate": 1.1905271091913417e-05, "loss": 0.7475, "step": 2600 }, { "epoch": 0.46, "grad_norm": 0.6766186647989126, "learning_rate": 1.1899694858722713e-05, "loss": 0.7281, "step": 2601 }, { "epoch": 0.46, "grad_norm": 0.6482351630359908, "learning_rate": 1.189411801265086e-05, "loss": 0.6768, "step": 2602 }, { "epoch": 0.46, "grad_norm": 0.6818706520526164, "learning_rate": 1.1888540555497064e-05, "loss": 0.7134, "step": 2603 }, { "epoch": 0.46, "grad_norm": 0.7905151730624117, "learning_rate": 1.1882962489060726e-05, "loss": 0.7437, "step": 2604 }, { "epoch": 0.46, "grad_norm": 0.6514308996172898, "learning_rate": 1.1877383815141448e-05, "loss": 0.73, "step": 2605 }, { "epoch": 0.46, "grad_norm": 0.6996204533261572, "learning_rate": 1.187180453553903e-05, "loss": 0.7033, "step": 2606 }, { "epoch": 0.46, "grad_norm": 0.687804306063394, "learning_rate": 1.1866224652053465e-05, "loss": 0.7226, "step": 2607 }, { "epoch": 0.46, "grad_norm": 0.759762122584781, "learning_rate": 1.1860644166484932e-05, "loss": 0.7794, "step": 2608 }, { "epoch": 0.46, "grad_norm": 0.6338461310405153, "learning_rate": 1.1855063080633818e-05, "loss": 0.6796, "step": 2609 }, { "epoch": 0.46, "grad_norm": 0.7205391141913385, "learning_rate": 1.1849481396300692e-05, "loss": 0.6781, "step": 2610 }, { "epoch": 0.46, "grad_norm": 0.7066614022171972, "learning_rate": 1.184389911528633e-05, "loss": 0.7138, "step": 2611 }, { "epoch": 0.46, "grad_norm": 0.6943493732615446, "learning_rate": 1.183831623939169e-05, "loss": 0.6825, "step": 2612 }, { "epoch": 0.46, "grad_norm": 0.7054800409814129, "learning_rate": 1.1832732770417916e-05, "loss": 0.7746, "step": 2613 }, { "epoch": 0.46, "grad_norm": 0.7049447703131623, "learning_rate": 1.182714871016636e-05, "loss": 0.7078, "step": 2614 }, { "epoch": 0.46, "grad_norm": 0.7404410711891249, "learning_rate": 1.1821564060438552e-05, "loss": 0.7414, "step": 2615 }, { "epoch": 0.46, "grad_norm": 0.672170475571877, "learning_rate": 1.181597882303622e-05, "loss": 0.7133, "step": 2616 }, { "epoch": 0.46, "grad_norm": 0.6290630216160689, "learning_rate": 1.1810392999761273e-05, "loss": 0.7238, "step": 2617 }, { "epoch": 0.46, "grad_norm": 0.716706388694791, "learning_rate": 1.1804806592415821e-05, "loss": 0.7253, "step": 2618 }, { "epoch": 0.46, "grad_norm": 0.688756085142853, "learning_rate": 1.1799219602802148e-05, "loss": 0.7323, "step": 2619 }, { "epoch": 0.46, "grad_norm": 0.7018997618645729, "learning_rate": 1.1793632032722737e-05, "loss": 0.6838, "step": 2620 }, { "epoch": 0.46, "grad_norm": 0.6697263985106993, "learning_rate": 1.1788043883980256e-05, "loss": 0.6969, "step": 2621 }, { "epoch": 0.46, "grad_norm": 0.7534757163512334, "learning_rate": 1.1782455158377553e-05, "loss": 0.7362, "step": 2622 }, { "epoch": 0.46, "grad_norm": 0.6381662856892655, "learning_rate": 1.1776865857717675e-05, "loss": 0.6956, "step": 2623 }, { "epoch": 0.46, "grad_norm": 0.7075548371931198, "learning_rate": 1.1771275983803839e-05, "loss": 0.754, "step": 2624 }, { "epoch": 0.46, "grad_norm": 0.7641281396539292, "learning_rate": 1.176568553843946e-05, "loss": 0.7182, "step": 2625 }, { "epoch": 0.46, "grad_norm": 0.8812841570370782, "learning_rate": 1.176009452342813e-05, "loss": 0.7658, "step": 2626 }, { "epoch": 0.46, "grad_norm": 0.7060404792043924, "learning_rate": 1.1754502940573626e-05, "loss": 0.7256, "step": 2627 }, { "epoch": 0.46, "grad_norm": 0.7014813537542534, "learning_rate": 1.1748910791679911e-05, "loss": 0.7299, "step": 2628 }, { "epoch": 0.46, "grad_norm": 0.7880113623857194, "learning_rate": 1.1743318078551131e-05, "loss": 0.696, "step": 2629 }, { "epoch": 0.46, "grad_norm": 0.6907915336117115, "learning_rate": 1.1737724802991608e-05, "loss": 0.7353, "step": 2630 }, { "epoch": 0.46, "grad_norm": 0.7114927603026453, "learning_rate": 1.1732130966805848e-05, "loss": 0.7505, "step": 2631 }, { "epoch": 0.46, "grad_norm": 0.6636498623395907, "learning_rate": 1.1726536571798546e-05, "loss": 0.6768, "step": 2632 }, { "epoch": 0.46, "grad_norm": 0.7750451966017317, "learning_rate": 1.1720941619774562e-05, "loss": 0.7632, "step": 2633 }, { "epoch": 0.46, "grad_norm": 0.6529016769809391, "learning_rate": 1.171534611253895e-05, "loss": 0.6935, "step": 2634 }, { "epoch": 0.46, "grad_norm": 0.7078333077560511, "learning_rate": 1.1709750051896937e-05, "loss": 0.7146, "step": 2635 }, { "epoch": 0.46, "grad_norm": 0.7063685580861095, "learning_rate": 1.1704153439653925e-05, "loss": 0.7216, "step": 2636 }, { "epoch": 0.46, "grad_norm": 0.8639926606774052, "learning_rate": 1.1698556277615499e-05, "loss": 0.7265, "step": 2637 }, { "epoch": 0.46, "grad_norm": 0.6734112392757491, "learning_rate": 1.1692958567587423e-05, "loss": 0.6966, "step": 2638 }, { "epoch": 0.46, "grad_norm": 0.7357397585372072, "learning_rate": 1.1687360311375634e-05, "loss": 0.7277, "step": 2639 }, { "epoch": 0.46, "grad_norm": 0.7052629698501591, "learning_rate": 1.168176151078624e-05, "loss": 0.6764, "step": 2640 }, { "epoch": 0.46, "grad_norm": 0.7021018370850047, "learning_rate": 1.167616216762554e-05, "loss": 0.7005, "step": 2641 }, { "epoch": 0.46, "grad_norm": 0.7158913985640424, "learning_rate": 1.1670562283699993e-05, "loss": 0.7363, "step": 2642 }, { "epoch": 0.46, "grad_norm": 0.7433680223567416, "learning_rate": 1.1664961860816241e-05, "loss": 0.7369, "step": 2643 }, { "epoch": 0.46, "grad_norm": 0.732675730405724, "learning_rate": 1.1659360900781095e-05, "loss": 0.728, "step": 2644 }, { "epoch": 0.46, "grad_norm": 0.618294256228388, "learning_rate": 1.1653759405401544e-05, "loss": 0.6976, "step": 2645 }, { "epoch": 0.46, "grad_norm": 0.7276173270339732, "learning_rate": 1.1648157376484741e-05, "loss": 0.7399, "step": 2646 }, { "epoch": 0.46, "grad_norm": 0.6258664491414127, "learning_rate": 1.1642554815838025e-05, "loss": 0.6991, "step": 2647 }, { "epoch": 0.46, "grad_norm": 0.6864898047496029, "learning_rate": 1.16369517252689e-05, "loss": 0.6995, "step": 2648 }, { "epoch": 0.46, "grad_norm": 0.6768916759775832, "learning_rate": 1.1631348106585027e-05, "loss": 0.698, "step": 2649 }, { "epoch": 0.46, "grad_norm": 0.6333198182313837, "learning_rate": 1.1625743961594264e-05, "loss": 0.7099, "step": 2650 }, { "epoch": 0.46, "grad_norm": 0.7926335588438118, "learning_rate": 1.1620139292104617e-05, "loss": 0.7889, "step": 2651 }, { "epoch": 0.47, "grad_norm": 0.6068998776070016, "learning_rate": 1.1614534099924274e-05, "loss": 0.6625, "step": 2652 }, { "epoch": 0.47, "grad_norm": 0.6701688363025057, "learning_rate": 1.1608928386861583e-05, "loss": 0.72, "step": 2653 }, { "epoch": 0.47, "grad_norm": 0.6478605029493304, "learning_rate": 1.1603322154725065e-05, "loss": 0.6658, "step": 2654 }, { "epoch": 0.47, "grad_norm": 0.8085131580104724, "learning_rate": 1.1597715405323409e-05, "loss": 0.7334, "step": 2655 }, { "epoch": 0.47, "grad_norm": 0.7635136470741118, "learning_rate": 1.1592108140465466e-05, "loss": 0.7282, "step": 2656 }, { "epoch": 0.47, "grad_norm": 0.6931935277176683, "learning_rate": 1.158650036196026e-05, "loss": 0.7421, "step": 2657 }, { "epoch": 0.47, "grad_norm": 0.7408374553368173, "learning_rate": 1.1580892071616974e-05, "loss": 0.734, "step": 2658 }, { "epoch": 0.47, "grad_norm": 0.6587015407413689, "learning_rate": 1.1575283271244964e-05, "loss": 0.7151, "step": 2659 }, { "epoch": 0.47, "grad_norm": 0.7514840071845719, "learning_rate": 1.1569673962653737e-05, "loss": 0.7247, "step": 2660 }, { "epoch": 0.47, "grad_norm": 0.6456436899363663, "learning_rate": 1.1564064147652984e-05, "loss": 0.701, "step": 2661 }, { "epoch": 0.47, "grad_norm": 0.696971846249675, "learning_rate": 1.1558453828052542e-05, "loss": 0.7376, "step": 2662 }, { "epoch": 0.47, "grad_norm": 0.6585062240609977, "learning_rate": 1.1552843005662417e-05, "loss": 0.7031, "step": 2663 }, { "epoch": 0.47, "grad_norm": 0.6768638041333195, "learning_rate": 1.154723168229278e-05, "loss": 0.709, "step": 2664 }, { "epoch": 0.47, "grad_norm": 0.6820978094731669, "learning_rate": 1.1541619859753954e-05, "loss": 0.7133, "step": 2665 }, { "epoch": 0.47, "grad_norm": 0.6860815002061117, "learning_rate": 1.1536007539856439e-05, "loss": 0.6942, "step": 2666 }, { "epoch": 0.47, "grad_norm": 0.6897465206966269, "learning_rate": 1.1530394724410883e-05, "loss": 0.6839, "step": 2667 }, { "epoch": 0.47, "grad_norm": 0.6880398603519243, "learning_rate": 1.1524781415228095e-05, "loss": 0.7325, "step": 2668 }, { "epoch": 0.47, "grad_norm": 0.6730911071840638, "learning_rate": 1.1519167614119044e-05, "loss": 0.6841, "step": 2669 }, { "epoch": 0.47, "grad_norm": 0.7088018538989049, "learning_rate": 1.1513553322894866e-05, "loss": 0.7063, "step": 2670 }, { "epoch": 0.47, "grad_norm": 0.7455386497472378, "learning_rate": 1.1507938543366844e-05, "loss": 0.7334, "step": 2671 }, { "epoch": 0.47, "grad_norm": 0.75339073880226, "learning_rate": 1.1502323277346422e-05, "loss": 0.6953, "step": 2672 }, { "epoch": 0.47, "grad_norm": 0.702716414046401, "learning_rate": 1.1496707526645205e-05, "loss": 0.7175, "step": 2673 }, { "epoch": 0.47, "grad_norm": 0.7148043206238749, "learning_rate": 1.1491091293074948e-05, "loss": 0.7102, "step": 2674 }, { "epoch": 0.47, "grad_norm": 0.6246455615656044, "learning_rate": 1.1485474578447566e-05, "loss": 0.7115, "step": 2675 }, { "epoch": 0.47, "grad_norm": 0.7705514814151652, "learning_rate": 1.147985738457513e-05, "loss": 0.7476, "step": 2676 }, { "epoch": 0.47, "grad_norm": 0.6651928053680907, "learning_rate": 1.1474239713269861e-05, "loss": 0.6861, "step": 2677 }, { "epoch": 0.47, "grad_norm": 0.7349399223683145, "learning_rate": 1.1468621566344138e-05, "loss": 0.7209, "step": 2678 }, { "epoch": 0.47, "grad_norm": 0.7094142796858918, "learning_rate": 1.1463002945610495e-05, "loss": 0.7474, "step": 2679 }, { "epoch": 0.47, "grad_norm": 0.7232703088050717, "learning_rate": 1.1457383852881613e-05, "loss": 0.7182, "step": 2680 }, { "epoch": 0.47, "grad_norm": 0.7564387145513923, "learning_rate": 1.145176428997033e-05, "loss": 0.7339, "step": 2681 }, { "epoch": 0.47, "grad_norm": 0.696596100402127, "learning_rate": 1.1446144258689631e-05, "loss": 0.6942, "step": 2682 }, { "epoch": 0.47, "grad_norm": 0.6333098779669122, "learning_rate": 1.1440523760852657e-05, "loss": 0.7054, "step": 2683 }, { "epoch": 0.47, "grad_norm": 0.7519174767448613, "learning_rate": 1.14349027982727e-05, "loss": 0.743, "step": 2684 }, { "epoch": 0.47, "grad_norm": 0.7502795816664956, "learning_rate": 1.1429281372763198e-05, "loss": 0.7361, "step": 2685 }, { "epoch": 0.47, "grad_norm": 0.6748494808417854, "learning_rate": 1.142365948613774e-05, "loss": 0.7752, "step": 2686 }, { "epoch": 0.47, "grad_norm": 0.6619515072169204, "learning_rate": 1.1418037140210061e-05, "loss": 0.6857, "step": 2687 }, { "epoch": 0.47, "grad_norm": 0.6805484326952689, "learning_rate": 1.1412414336794052e-05, "loss": 0.7402, "step": 2688 }, { "epoch": 0.47, "grad_norm": 0.7563933808355008, "learning_rate": 1.1406791077703744e-05, "loss": 0.7722, "step": 2689 }, { "epoch": 0.47, "grad_norm": 0.7542795511356861, "learning_rate": 1.1401167364753316e-05, "loss": 0.7256, "step": 2690 }, { "epoch": 0.47, "grad_norm": 0.5878345650580514, "learning_rate": 1.1395543199757097e-05, "loss": 0.6764, "step": 2691 }, { "epoch": 0.47, "grad_norm": 0.7240953282258596, "learning_rate": 1.138991858452956e-05, "loss": 0.7368, "step": 2692 }, { "epoch": 0.47, "grad_norm": 0.618755746825381, "learning_rate": 1.1384293520885323e-05, "loss": 0.7081, "step": 2693 }, { "epoch": 0.47, "grad_norm": 0.6869105352269484, "learning_rate": 1.1378668010639151e-05, "loss": 0.716, "step": 2694 }, { "epoch": 0.47, "grad_norm": 0.7063099663811014, "learning_rate": 1.1373042055605944e-05, "loss": 0.7223, "step": 2695 }, { "epoch": 0.47, "grad_norm": 0.7148963982735701, "learning_rate": 1.1367415657600757e-05, "loss": 0.7048, "step": 2696 }, { "epoch": 0.47, "grad_norm": 0.7344694746961528, "learning_rate": 1.1361788818438784e-05, "loss": 0.7467, "step": 2697 }, { "epoch": 0.47, "grad_norm": 0.6771536607654247, "learning_rate": 1.135616153993536e-05, "loss": 0.7595, "step": 2698 }, { "epoch": 0.47, "grad_norm": 0.6919642374869757, "learning_rate": 1.1350533823905959e-05, "loss": 0.7442, "step": 2699 }, { "epoch": 0.47, "grad_norm": 0.752419772381414, "learning_rate": 1.1344905672166205e-05, "loss": 0.7531, "step": 2700 }, { "epoch": 0.47, "grad_norm": 0.6971849920813366, "learning_rate": 1.1339277086531853e-05, "loss": 0.7488, "step": 2701 }, { "epoch": 0.47, "grad_norm": 0.6222171484332547, "learning_rate": 1.1333648068818804e-05, "loss": 0.6792, "step": 2702 }, { "epoch": 0.47, "grad_norm": 0.7163940721903748, "learning_rate": 1.1328018620843095e-05, "loss": 0.739, "step": 2703 }, { "epoch": 0.47, "grad_norm": 0.6349934037502499, "learning_rate": 1.1322388744420902e-05, "loss": 0.6641, "step": 2704 }, { "epoch": 0.47, "grad_norm": 0.7162145614823003, "learning_rate": 1.1316758441368545e-05, "loss": 0.7434, "step": 2705 }, { "epoch": 0.47, "grad_norm": 0.7580632231814334, "learning_rate": 1.1311127713502472e-05, "loss": 0.7555, "step": 2706 }, { "epoch": 0.47, "grad_norm": 0.7059545826524883, "learning_rate": 1.1305496562639278e-05, "loss": 0.6864, "step": 2707 }, { "epoch": 0.47, "grad_norm": 0.7213220959021831, "learning_rate": 1.1299864990595689e-05, "loss": 0.7484, "step": 2708 }, { "epoch": 0.48, "grad_norm": 0.7647186531600428, "learning_rate": 1.129423299918856e-05, "loss": 0.7627, "step": 2709 }, { "epoch": 0.48, "grad_norm": 0.6434706841859783, "learning_rate": 1.1288600590234898e-05, "loss": 0.6903, "step": 2710 }, { "epoch": 0.48, "grad_norm": 0.7494809987721406, "learning_rate": 1.1282967765551832e-05, "loss": 0.7212, "step": 2711 }, { "epoch": 0.48, "grad_norm": 0.6823895045314353, "learning_rate": 1.1277334526956628e-05, "loss": 0.6951, "step": 2712 }, { "epoch": 0.48, "grad_norm": 0.7527909641536127, "learning_rate": 1.1271700876266684e-05, "loss": 0.753, "step": 2713 }, { "epoch": 0.48, "grad_norm": 0.6485361286589707, "learning_rate": 1.1266066815299541e-05, "loss": 0.6738, "step": 2714 }, { "epoch": 0.48, "grad_norm": 0.6368774011245344, "learning_rate": 1.1260432345872854e-05, "loss": 0.7175, "step": 2715 }, { "epoch": 0.48, "grad_norm": 0.7081370958327272, "learning_rate": 1.125479746980443e-05, "loss": 0.7307, "step": 2716 }, { "epoch": 0.48, "grad_norm": 0.706203401327245, "learning_rate": 1.1249162188912193e-05, "loss": 0.7465, "step": 2717 }, { "epoch": 0.48, "grad_norm": 0.6975261453283592, "learning_rate": 1.1243526505014199e-05, "loss": 0.6952, "step": 2718 }, { "epoch": 0.48, "grad_norm": 0.6826044363331263, "learning_rate": 1.1237890419928641e-05, "loss": 0.7068, "step": 2719 }, { "epoch": 0.48, "grad_norm": 0.6634716316928342, "learning_rate": 1.1232253935473838e-05, "loss": 0.7156, "step": 2720 }, { "epoch": 0.48, "grad_norm": 0.7242953430226801, "learning_rate": 1.1226617053468237e-05, "loss": 0.7644, "step": 2721 }, { "epoch": 0.48, "grad_norm": 0.6854092390647029, "learning_rate": 1.1220979775730409e-05, "loss": 0.6941, "step": 2722 }, { "epoch": 0.48, "grad_norm": 0.9135791294299105, "learning_rate": 1.1215342104079064e-05, "loss": 0.7824, "step": 2723 }, { "epoch": 0.48, "grad_norm": 0.7100277991685624, "learning_rate": 1.1209704040333027e-05, "loss": 0.7126, "step": 2724 }, { "epoch": 0.48, "grad_norm": 0.6995258722737833, "learning_rate": 1.1204065586311259e-05, "loss": 0.7208, "step": 2725 }, { "epoch": 0.48, "grad_norm": 0.7484784164348334, "learning_rate": 1.1198426743832842e-05, "loss": 0.7437, "step": 2726 }, { "epoch": 0.48, "grad_norm": 0.6515412274343712, "learning_rate": 1.1192787514716979e-05, "loss": 0.7125, "step": 2727 }, { "epoch": 0.48, "grad_norm": 0.7200731237638842, "learning_rate": 1.1187147900783009e-05, "loss": 0.7031, "step": 2728 }, { "epoch": 0.48, "grad_norm": 0.6836308507893978, "learning_rate": 1.1181507903850388e-05, "loss": 0.7443, "step": 2729 }, { "epoch": 0.48, "grad_norm": 0.6334066852001051, "learning_rate": 1.1175867525738696e-05, "loss": 0.6976, "step": 2730 }, { "epoch": 0.48, "grad_norm": 0.6709627310511967, "learning_rate": 1.1170226768267631e-05, "loss": 0.6903, "step": 2731 }, { "epoch": 0.48, "grad_norm": 0.6800963315094194, "learning_rate": 1.1164585633257028e-05, "loss": 0.7086, "step": 2732 }, { "epoch": 0.48, "grad_norm": 0.6305784791789842, "learning_rate": 1.1158944122526827e-05, "loss": 0.7065, "step": 2733 }, { "epoch": 0.48, "grad_norm": 0.6559470340354706, "learning_rate": 1.1153302237897105e-05, "loss": 0.7132, "step": 2734 }, { "epoch": 0.48, "grad_norm": 0.6499445821402583, "learning_rate": 1.1147659981188046e-05, "loss": 0.7145, "step": 2735 }, { "epoch": 0.48, "grad_norm": 0.7834381561894229, "learning_rate": 1.1142017354219959e-05, "loss": 0.7742, "step": 2736 }, { "epoch": 0.48, "grad_norm": 0.7311127571256035, "learning_rate": 1.1136374358813273e-05, "loss": 0.6805, "step": 2737 }, { "epoch": 0.48, "grad_norm": 0.5995382098898412, "learning_rate": 1.1130730996788539e-05, "loss": 0.6559, "step": 2738 }, { "epoch": 0.48, "grad_norm": 0.6744613621479518, "learning_rate": 1.1125087269966423e-05, "loss": 0.673, "step": 2739 }, { "epoch": 0.48, "grad_norm": 0.7357053492629709, "learning_rate": 1.1119443180167706e-05, "loss": 0.7157, "step": 2740 }, { "epoch": 0.48, "grad_norm": 0.7231688222516701, "learning_rate": 1.1113798729213292e-05, "loss": 0.7048, "step": 2741 }, { "epoch": 0.48, "grad_norm": 0.7782090160458149, "learning_rate": 1.1108153918924196e-05, "loss": 0.7101, "step": 2742 }, { "epoch": 0.48, "grad_norm": 0.7926361843350189, "learning_rate": 1.1102508751121553e-05, "loss": 0.76, "step": 2743 }, { "epoch": 0.48, "grad_norm": 0.8569783788680596, "learning_rate": 1.1096863227626612e-05, "loss": 0.795, "step": 2744 }, { "epoch": 0.48, "grad_norm": 0.6903421067384572, "learning_rate": 1.1091217350260734e-05, "loss": 0.6914, "step": 2745 }, { "epoch": 0.48, "grad_norm": 0.7256812632073264, "learning_rate": 1.10855711208454e-05, "loss": 0.7393, "step": 2746 }, { "epoch": 0.48, "grad_norm": 0.7002652711478142, "learning_rate": 1.1079924541202199e-05, "loss": 0.698, "step": 2747 }, { "epoch": 0.48, "grad_norm": 0.7861107780449234, "learning_rate": 1.1074277613152838e-05, "loss": 0.7593, "step": 2748 }, { "epoch": 0.48, "grad_norm": 0.7245040971287392, "learning_rate": 1.1068630338519131e-05, "loss": 0.7198, "step": 2749 }, { "epoch": 0.48, "grad_norm": 0.9665351099673138, "learning_rate": 1.1062982719123003e-05, "loss": 0.7463, "step": 2750 }, { "epoch": 0.48, "grad_norm": 0.7067837861862152, "learning_rate": 1.1057334756786503e-05, "loss": 0.7254, "step": 2751 }, { "epoch": 0.48, "grad_norm": 0.6517548955900009, "learning_rate": 1.1051686453331774e-05, "loss": 0.7113, "step": 2752 }, { "epoch": 0.48, "grad_norm": 0.7488072302844772, "learning_rate": 1.1046037810581081e-05, "loss": 0.752, "step": 2753 }, { "epoch": 0.48, "grad_norm": 0.8328616242451561, "learning_rate": 1.1040388830356787e-05, "loss": 0.733, "step": 2754 }, { "epoch": 0.48, "grad_norm": 0.7652846595278713, "learning_rate": 1.1034739514481378e-05, "loss": 0.7475, "step": 2755 }, { "epoch": 0.48, "grad_norm": 0.7009817011167535, "learning_rate": 1.1029089864777438e-05, "loss": 0.6949, "step": 2756 }, { "epoch": 0.48, "grad_norm": 0.7573448884557715, "learning_rate": 1.1023439883067664e-05, "loss": 0.7211, "step": 2757 }, { "epoch": 0.48, "grad_norm": 0.6459205287078026, "learning_rate": 1.1017789571174855e-05, "loss": 0.7021, "step": 2758 }, { "epoch": 0.48, "grad_norm": 0.6984512932865691, "learning_rate": 1.1012138930921918e-05, "loss": 0.6965, "step": 2759 }, { "epoch": 0.48, "grad_norm": 0.6368129837068165, "learning_rate": 1.100648796413187e-05, "loss": 0.688, "step": 2760 }, { "epoch": 0.48, "grad_norm": 0.6682001877673517, "learning_rate": 1.1000836672627833e-05, "loss": 0.6791, "step": 2761 }, { "epoch": 0.48, "grad_norm": 0.6385094970040416, "learning_rate": 1.0995185058233027e-05, "loss": 0.6929, "step": 2762 }, { "epoch": 0.48, "grad_norm": 0.6621261960761494, "learning_rate": 1.098953312277078e-05, "loss": 0.6553, "step": 2763 }, { "epoch": 0.48, "grad_norm": 0.6895998667925124, "learning_rate": 1.0983880868064532e-05, "loss": 0.6915, "step": 2764 }, { "epoch": 0.48, "grad_norm": 0.603268956456155, "learning_rate": 1.0978228295937807e-05, "loss": 0.6723, "step": 2765 }, { "epoch": 0.49, "grad_norm": 0.6537797696356261, "learning_rate": 1.0972575408214248e-05, "loss": 0.7419, "step": 2766 }, { "epoch": 0.49, "grad_norm": 0.7528935339183178, "learning_rate": 1.0966922206717602e-05, "loss": 0.7096, "step": 2767 }, { "epoch": 0.49, "grad_norm": 0.7069432454546059, "learning_rate": 1.0961268693271694e-05, "loss": 0.7206, "step": 2768 }, { "epoch": 0.49, "grad_norm": 0.7085505904308331, "learning_rate": 1.0955614869700473e-05, "loss": 0.7463, "step": 2769 }, { "epoch": 0.49, "grad_norm": 0.6634201609072256, "learning_rate": 1.0949960737827985e-05, "loss": 0.7064, "step": 2770 }, { "epoch": 0.49, "grad_norm": 0.6922443187203114, "learning_rate": 1.0944306299478367e-05, "loss": 0.7778, "step": 2771 }, { "epoch": 0.49, "grad_norm": 0.6742486405072988, "learning_rate": 1.0938651556475852e-05, "loss": 0.6887, "step": 2772 }, { "epoch": 0.49, "grad_norm": 0.792476426451177, "learning_rate": 1.093299651064479e-05, "loss": 0.7135, "step": 2773 }, { "epoch": 0.49, "grad_norm": 0.6868144478350685, "learning_rate": 1.0927341163809608e-05, "loss": 0.6906, "step": 2774 }, { "epoch": 0.49, "grad_norm": 0.7102466876419056, "learning_rate": 1.0921685517794842e-05, "loss": 0.7302, "step": 2775 }, { "epoch": 0.49, "grad_norm": 0.7106565489563269, "learning_rate": 1.0916029574425122e-05, "loss": 0.7458, "step": 2776 }, { "epoch": 0.49, "grad_norm": 0.6832813963283006, "learning_rate": 1.0910373335525169e-05, "loss": 0.6963, "step": 2777 }, { "epoch": 0.49, "grad_norm": 0.6573138664136541, "learning_rate": 1.0904716802919808e-05, "loss": 0.6976, "step": 2778 }, { "epoch": 0.49, "grad_norm": 0.6244010138581234, "learning_rate": 1.0899059978433951e-05, "loss": 0.6784, "step": 2779 }, { "epoch": 0.49, "grad_norm": 0.7066182212305232, "learning_rate": 1.0893402863892613e-05, "loss": 0.6906, "step": 2780 }, { "epoch": 0.49, "grad_norm": 0.7018365956357668, "learning_rate": 1.0887745461120888e-05, "loss": 0.7228, "step": 2781 }, { "epoch": 0.49, "grad_norm": 0.6605688333961236, "learning_rate": 1.0882087771943981e-05, "loss": 0.6922, "step": 2782 }, { "epoch": 0.49, "grad_norm": 0.7096716750067581, "learning_rate": 1.0876429798187174e-05, "loss": 0.7704, "step": 2783 }, { "epoch": 0.49, "grad_norm": 0.7204135783918414, "learning_rate": 1.087077154167585e-05, "loss": 0.7292, "step": 2784 }, { "epoch": 0.49, "grad_norm": 0.5813328926073041, "learning_rate": 1.0865113004235484e-05, "loss": 0.6706, "step": 2785 }, { "epoch": 0.49, "grad_norm": 0.6407873153016231, "learning_rate": 1.0859454187691631e-05, "loss": 0.7036, "step": 2786 }, { "epoch": 0.49, "grad_norm": 0.7507221652174928, "learning_rate": 1.0853795093869947e-05, "loss": 0.7139, "step": 2787 }, { "epoch": 0.49, "grad_norm": 0.6616117240856642, "learning_rate": 1.0848135724596174e-05, "loss": 0.6792, "step": 2788 }, { "epoch": 0.49, "grad_norm": 0.8030532447895435, "learning_rate": 1.0842476081696142e-05, "loss": 0.7651, "step": 2789 }, { "epoch": 0.49, "grad_norm": 0.7631282882485894, "learning_rate": 1.0836816166995769e-05, "loss": 0.7283, "step": 2790 }, { "epoch": 0.49, "grad_norm": 0.7527381613194298, "learning_rate": 1.083115598232106e-05, "loss": 0.7712, "step": 2791 }, { "epoch": 0.49, "grad_norm": 0.6404141167399571, "learning_rate": 1.0825495529498111e-05, "loss": 0.7113, "step": 2792 }, { "epoch": 0.49, "grad_norm": 0.7404435349239418, "learning_rate": 1.0819834810353104e-05, "loss": 0.7533, "step": 2793 }, { "epoch": 0.49, "grad_norm": 0.6881252291028457, "learning_rate": 1.0814173826712302e-05, "loss": 0.7428, "step": 2794 }, { "epoch": 0.49, "grad_norm": 0.7082043600837183, "learning_rate": 1.0808512580402054e-05, "loss": 0.717, "step": 2795 }, { "epoch": 0.49, "grad_norm": 0.6968416399751947, "learning_rate": 1.0802851073248801e-05, "loss": 0.7128, "step": 2796 }, { "epoch": 0.49, "grad_norm": 0.6646071340098443, "learning_rate": 1.0797189307079056e-05, "loss": 0.7334, "step": 2797 }, { "epoch": 0.49, "grad_norm": 0.6367644314412191, "learning_rate": 1.0791527283719432e-05, "loss": 0.7043, "step": 2798 }, { "epoch": 0.49, "grad_norm": 0.700316144454102, "learning_rate": 1.0785865004996614e-05, "loss": 0.688, "step": 2799 }, { "epoch": 0.49, "grad_norm": 0.7510593707175758, "learning_rate": 1.0780202472737363e-05, "loss": 0.7621, "step": 2800 }, { "epoch": 0.49, "grad_norm": 0.8501912818737972, "learning_rate": 1.0774539688768537e-05, "loss": 0.7736, "step": 2801 }, { "epoch": 0.49, "grad_norm": 0.7262810749907788, "learning_rate": 1.0768876654917068e-05, "loss": 0.7525, "step": 2802 }, { "epoch": 0.49, "grad_norm": 0.7373290026043067, "learning_rate": 1.0763213373009967e-05, "loss": 0.6917, "step": 2803 }, { "epoch": 0.49, "grad_norm": 0.6567365390498224, "learning_rate": 1.0757549844874325e-05, "loss": 0.7256, "step": 2804 }, { "epoch": 0.49, "grad_norm": 0.7861414882618926, "learning_rate": 1.0751886072337318e-05, "loss": 0.7541, "step": 2805 }, { "epoch": 0.49, "grad_norm": 0.7903643360213701, "learning_rate": 1.0746222057226194e-05, "loss": 0.7221, "step": 2806 }, { "epoch": 0.49, "grad_norm": 0.6719017747395004, "learning_rate": 1.0740557801368286e-05, "loss": 0.7186, "step": 2807 }, { "epoch": 0.49, "grad_norm": 0.672216821317448, "learning_rate": 1.0734893306590997e-05, "loss": 0.6889, "step": 2808 }, { "epoch": 0.49, "grad_norm": 0.7394990929007595, "learning_rate": 1.0729228574721811e-05, "loss": 0.7326, "step": 2809 }, { "epoch": 0.49, "grad_norm": 0.6995096349308622, "learning_rate": 1.072356360758829e-05, "loss": 0.7037, "step": 2810 }, { "epoch": 0.49, "grad_norm": 0.7728299609703555, "learning_rate": 1.0717898407018073e-05, "loss": 0.7563, "step": 2811 }, { "epoch": 0.49, "grad_norm": 0.7333577919287224, "learning_rate": 1.0712232974838867e-05, "loss": 0.6995, "step": 2812 }, { "epoch": 0.49, "grad_norm": 0.6441607114657054, "learning_rate": 1.0706567312878461e-05, "loss": 0.7008, "step": 2813 }, { "epoch": 0.49, "grad_norm": 0.6534031224491351, "learning_rate": 1.070090142296472e-05, "loss": 0.6724, "step": 2814 }, { "epoch": 0.49, "grad_norm": 0.6141624958067369, "learning_rate": 1.069523530692557e-05, "loss": 0.6923, "step": 2815 }, { "epoch": 0.49, "grad_norm": 0.7145784662733589, "learning_rate": 1.0689568966589021e-05, "loss": 0.7118, "step": 2816 }, { "epoch": 0.49, "grad_norm": 0.5808789622354602, "learning_rate": 1.0683902403783155e-05, "loss": 0.6558, "step": 2817 }, { "epoch": 0.49, "grad_norm": 0.7811950388151783, "learning_rate": 1.0678235620336118e-05, "loss": 0.7468, "step": 2818 }, { "epoch": 0.49, "grad_norm": 0.6855247404740413, "learning_rate": 1.067256861807614e-05, "loss": 0.672, "step": 2819 }, { "epoch": 0.49, "grad_norm": 0.6668546652477397, "learning_rate": 1.0666901398831508e-05, "loss": 0.7294, "step": 2820 }, { "epoch": 0.49, "grad_norm": 0.6869906642558223, "learning_rate": 1.0661233964430588e-05, "loss": 0.693, "step": 2821 }, { "epoch": 0.49, "grad_norm": 0.7054356429731287, "learning_rate": 1.065556631670181e-05, "loss": 0.7453, "step": 2822 }, { "epoch": 0.5, "grad_norm": 0.7063264233382386, "learning_rate": 1.0649898457473678e-05, "loss": 0.7291, "step": 2823 }, { "epoch": 0.5, "grad_norm": 0.6702885570844186, "learning_rate": 1.064423038857476e-05, "loss": 0.6612, "step": 2824 }, { "epoch": 0.5, "grad_norm": 0.6305824421012113, "learning_rate": 1.0638562111833692e-05, "loss": 0.6913, "step": 2825 }, { "epoch": 0.5, "grad_norm": 0.7013332039023127, "learning_rate": 1.0632893629079181e-05, "loss": 0.7435, "step": 2826 }, { "epoch": 0.5, "grad_norm": 0.8278239952908487, "learning_rate": 1.0627224942139993e-05, "loss": 0.7339, "step": 2827 }, { "epoch": 0.5, "grad_norm": 0.7145008070457886, "learning_rate": 1.0621556052844972e-05, "loss": 0.7033, "step": 2828 }, { "epoch": 0.5, "grad_norm": 0.7893194399052197, "learning_rate": 1.0615886963023013e-05, "loss": 0.7389, "step": 2829 }, { "epoch": 0.5, "grad_norm": 0.7053017679872559, "learning_rate": 1.0610217674503083e-05, "loss": 0.7144, "step": 2830 }, { "epoch": 0.5, "grad_norm": 0.6698175845697973, "learning_rate": 1.0604548189114222e-05, "loss": 0.6722, "step": 2831 }, { "epoch": 0.5, "grad_norm": 0.7419643381440596, "learning_rate": 1.059887850868551e-05, "loss": 0.7161, "step": 2832 }, { "epoch": 0.5, "grad_norm": 0.6836653391633116, "learning_rate": 1.0593208635046112e-05, "loss": 0.698, "step": 2833 }, { "epoch": 0.5, "grad_norm": 0.6738355971997058, "learning_rate": 1.0587538570025249e-05, "loss": 0.6719, "step": 2834 }, { "epoch": 0.5, "grad_norm": 0.844268452168889, "learning_rate": 1.0581868315452197e-05, "loss": 0.7577, "step": 2835 }, { "epoch": 0.5, "grad_norm": 0.732461338357128, "learning_rate": 1.05761978731563e-05, "loss": 0.7526, "step": 2836 }, { "epoch": 0.5, "grad_norm": 0.6775544133124999, "learning_rate": 1.0570527244966963e-05, "loss": 0.7177, "step": 2837 }, { "epoch": 0.5, "grad_norm": 0.7205145020423612, "learning_rate": 1.0564856432713646e-05, "loss": 0.6554, "step": 2838 }, { "epoch": 0.5, "grad_norm": 0.6725616801561719, "learning_rate": 1.0559185438225874e-05, "loss": 0.7085, "step": 2839 }, { "epoch": 0.5, "grad_norm": 0.999371129673703, "learning_rate": 1.0553514263333229e-05, "loss": 0.7193, "step": 2840 }, { "epoch": 0.5, "grad_norm": 0.790983199158061, "learning_rate": 1.0547842909865342e-05, "loss": 0.7216, "step": 2841 }, { "epoch": 0.5, "grad_norm": 0.6693868120926424, "learning_rate": 1.0542171379651918e-05, "loss": 0.6843, "step": 2842 }, { "epoch": 0.5, "grad_norm": 0.7092564451054137, "learning_rate": 1.0536499674522711e-05, "loss": 0.732, "step": 2843 }, { "epoch": 0.5, "grad_norm": 0.9640677462003469, "learning_rate": 1.0530827796307529e-05, "loss": 0.7001, "step": 2844 }, { "epoch": 0.5, "grad_norm": 0.7416962484065477, "learning_rate": 1.0525155746836233e-05, "loss": 0.7894, "step": 2845 }, { "epoch": 0.5, "grad_norm": 0.6987866598640389, "learning_rate": 1.0519483527938751e-05, "loss": 0.7243, "step": 2846 }, { "epoch": 0.5, "grad_norm": 0.7524603023588484, "learning_rate": 1.0513811141445059e-05, "loss": 0.7301, "step": 2847 }, { "epoch": 0.5, "grad_norm": 0.7180544456769066, "learning_rate": 1.0508138589185184e-05, "loss": 0.7176, "step": 2848 }, { "epoch": 0.5, "grad_norm": 0.612373614223005, "learning_rate": 1.0502465872989212e-05, "loss": 0.6656, "step": 2849 }, { "epoch": 0.5, "grad_norm": 0.7096786822590555, "learning_rate": 1.0496792994687276e-05, "loss": 0.7122, "step": 2850 }, { "epoch": 0.5, "grad_norm": 0.6900316203412138, "learning_rate": 1.0491119956109565e-05, "loss": 0.6955, "step": 2851 }, { "epoch": 0.5, "grad_norm": 0.6946954460604637, "learning_rate": 1.0485446759086327e-05, "loss": 0.7152, "step": 2852 }, { "epoch": 0.5, "grad_norm": 0.7584105948735997, "learning_rate": 1.0479773405447847e-05, "loss": 0.7146, "step": 2853 }, { "epoch": 0.5, "grad_norm": 0.6424878614199026, "learning_rate": 1.0474099897024465e-05, "loss": 0.7091, "step": 2854 }, { "epoch": 0.5, "grad_norm": 0.703607196044149, "learning_rate": 1.0468426235646581e-05, "loss": 0.6971, "step": 2855 }, { "epoch": 0.5, "grad_norm": 0.7741010368702056, "learning_rate": 1.0462752423144626e-05, "loss": 0.7577, "step": 2856 }, { "epoch": 0.5, "grad_norm": 1.0129679062381745, "learning_rate": 1.0457078461349099e-05, "loss": 0.67, "step": 2857 }, { "epoch": 0.5, "grad_norm": 0.679408281471285, "learning_rate": 1.0451404352090538e-05, "loss": 0.7269, "step": 2858 }, { "epoch": 0.5, "grad_norm": 0.8863361834265046, "learning_rate": 1.0445730097199522e-05, "loss": 0.7523, "step": 2859 }, { "epoch": 0.5, "grad_norm": 0.6240434662769712, "learning_rate": 1.0440055698506688e-05, "loss": 0.6915, "step": 2860 }, { "epoch": 0.5, "grad_norm": 0.72326316396151, "learning_rate": 1.0434381157842717e-05, "loss": 0.6983, "step": 2861 }, { "epoch": 0.5, "grad_norm": 0.7230700072483903, "learning_rate": 1.0428706477038328e-05, "loss": 0.757, "step": 2862 }, { "epoch": 0.5, "grad_norm": 0.6264199914087596, "learning_rate": 1.04230316579243e-05, "loss": 0.6776, "step": 2863 }, { "epoch": 0.5, "grad_norm": 0.6532831857092505, "learning_rate": 1.0417356702331445e-05, "loss": 0.6825, "step": 2864 }, { "epoch": 0.5, "grad_norm": 0.7584829761868984, "learning_rate": 1.0411681612090616e-05, "loss": 0.7217, "step": 2865 }, { "epoch": 0.5, "grad_norm": 0.6973964214390191, "learning_rate": 1.0406006389032724e-05, "loss": 0.6773, "step": 2866 }, { "epoch": 0.5, "grad_norm": 0.7460731014569532, "learning_rate": 1.0400331034988706e-05, "loss": 0.7214, "step": 2867 }, { "epoch": 0.5, "grad_norm": 0.7805049643446456, "learning_rate": 1.0394655551789555e-05, "loss": 0.7465, "step": 2868 }, { "epoch": 0.5, "grad_norm": 0.6762835611195458, "learning_rate": 1.03889799412663e-05, "loss": 0.7402, "step": 2869 }, { "epoch": 0.5, "grad_norm": 0.6617632900725374, "learning_rate": 1.0383304205250007e-05, "loss": 0.7083, "step": 2870 }, { "epoch": 0.5, "grad_norm": 0.631553373576675, "learning_rate": 1.0377628345571789e-05, "loss": 0.6919, "step": 2871 }, { "epoch": 0.5, "grad_norm": 0.6185586045461744, "learning_rate": 1.0371952364062803e-05, "loss": 0.696, "step": 2872 }, { "epoch": 0.5, "grad_norm": 0.7637138764805216, "learning_rate": 1.0366276262554227e-05, "loss": 0.7148, "step": 2873 }, { "epoch": 0.5, "grad_norm": 0.682690469240136, "learning_rate": 1.0360600042877298e-05, "loss": 0.6987, "step": 2874 }, { "epoch": 0.5, "grad_norm": 0.7970988861007818, "learning_rate": 1.035492370686328e-05, "loss": 0.7219, "step": 2875 }, { "epoch": 0.5, "grad_norm": 0.8283207576996682, "learning_rate": 1.034924725634348e-05, "loss": 0.7399, "step": 2876 }, { "epoch": 0.5, "grad_norm": 0.6665626501692733, "learning_rate": 1.0343570693149234e-05, "loss": 0.6608, "step": 2877 }, { "epoch": 0.5, "grad_norm": 0.6613340266363869, "learning_rate": 1.0337894019111925e-05, "loss": 0.707, "step": 2878 }, { "epoch": 0.5, "grad_norm": 0.7266575585928552, "learning_rate": 1.0332217236062962e-05, "loss": 0.7335, "step": 2879 }, { "epoch": 0.5, "grad_norm": 0.7143181560186473, "learning_rate": 1.0326540345833796e-05, "loss": 0.7756, "step": 2880 }, { "epoch": 0.51, "grad_norm": 0.724285193983289, "learning_rate": 1.0320863350255911e-05, "loss": 0.7236, "step": 2881 }, { "epoch": 0.51, "grad_norm": 0.6854156559488439, "learning_rate": 1.031518625116082e-05, "loss": 0.6913, "step": 2882 }, { "epoch": 0.51, "grad_norm": 0.7656531676946688, "learning_rate": 1.0309509050380078e-05, "loss": 0.7687, "step": 2883 }, { "epoch": 0.51, "grad_norm": 0.6651865018714193, "learning_rate": 1.0303831749745266e-05, "loss": 0.7274, "step": 2884 }, { "epoch": 0.51, "grad_norm": 0.772158075290913, "learning_rate": 1.0298154351088e-05, "loss": 0.7624, "step": 2885 }, { "epoch": 0.51, "grad_norm": 0.7503930461612056, "learning_rate": 1.0292476856239926e-05, "loss": 0.7755, "step": 2886 }, { "epoch": 0.51, "grad_norm": 0.6503181857034405, "learning_rate": 1.0286799267032725e-05, "loss": 0.735, "step": 2887 }, { "epoch": 0.51, "grad_norm": 0.7086870208484286, "learning_rate": 1.0281121585298101e-05, "loss": 0.7548, "step": 2888 }, { "epoch": 0.51, "grad_norm": 0.697188915173979, "learning_rate": 1.0275443812867798e-05, "loss": 0.7204, "step": 2889 }, { "epoch": 0.51, "grad_norm": 0.8171872080507754, "learning_rate": 1.026976595157358e-05, "loss": 0.7773, "step": 2890 }, { "epoch": 0.51, "grad_norm": 0.6733980374510048, "learning_rate": 1.0264088003247244e-05, "loss": 0.7023, "step": 2891 }, { "epoch": 0.51, "grad_norm": 0.6913558643610924, "learning_rate": 1.0258409969720612e-05, "loss": 0.7081, "step": 2892 }, { "epoch": 0.51, "grad_norm": 0.7094867182830448, "learning_rate": 1.0252731852825544e-05, "loss": 0.6945, "step": 2893 }, { "epoch": 0.51, "grad_norm": 0.8017292268235946, "learning_rate": 1.0247053654393908e-05, "loss": 0.7047, "step": 2894 }, { "epoch": 0.51, "grad_norm": 0.6599728505830732, "learning_rate": 1.0241375376257619e-05, "loss": 0.6883, "step": 2895 }, { "epoch": 0.51, "grad_norm": 0.6910567327367606, "learning_rate": 1.0235697020248603e-05, "loss": 0.7003, "step": 2896 }, { "epoch": 0.51, "grad_norm": 0.6853637519197242, "learning_rate": 1.0230018588198816e-05, "loss": 0.6731, "step": 2897 }, { "epoch": 0.51, "grad_norm": 0.6812911479677479, "learning_rate": 1.0224340081940238e-05, "loss": 0.6686, "step": 2898 }, { "epoch": 0.51, "grad_norm": 0.7560340506847467, "learning_rate": 1.0218661503304876e-05, "loss": 0.7374, "step": 2899 }, { "epoch": 0.51, "grad_norm": 1.1655884379596895, "learning_rate": 1.0212982854124754e-05, "loss": 0.7142, "step": 2900 }, { "epoch": 0.51, "grad_norm": 0.7118330259558254, "learning_rate": 1.0207304136231928e-05, "loss": 0.732, "step": 2901 }, { "epoch": 0.51, "grad_norm": 0.7225094186556846, "learning_rate": 1.0201625351458463e-05, "loss": 0.7095, "step": 2902 }, { "epoch": 0.51, "grad_norm": 0.6342480763357196, "learning_rate": 1.019594650163646e-05, "loss": 0.6901, "step": 2903 }, { "epoch": 0.51, "grad_norm": 0.8023184529444487, "learning_rate": 1.0190267588598033e-05, "loss": 0.7818, "step": 2904 }, { "epoch": 0.51, "grad_norm": 0.6744351529371273, "learning_rate": 1.018458861417532e-05, "loss": 0.7033, "step": 2905 }, { "epoch": 0.51, "grad_norm": 0.7723792593997167, "learning_rate": 1.0178909580200467e-05, "loss": 0.7044, "step": 2906 }, { "epoch": 0.51, "grad_norm": 0.6156399170020428, "learning_rate": 1.0173230488505659e-05, "loss": 0.6699, "step": 2907 }, { "epoch": 0.51, "grad_norm": 0.65743438542043, "learning_rate": 1.0167551340923085e-05, "loss": 0.7355, "step": 2908 }, { "epoch": 0.51, "grad_norm": 0.6752900657284924, "learning_rate": 1.0161872139284956e-05, "loss": 0.7458, "step": 2909 }, { "epoch": 0.51, "grad_norm": 0.7236034034998909, "learning_rate": 1.0156192885423502e-05, "loss": 0.6959, "step": 2910 }, { "epoch": 0.51, "grad_norm": 1.0680054973020572, "learning_rate": 1.0150513581170966e-05, "loss": 0.7032, "step": 2911 }, { "epoch": 0.51, "grad_norm": 0.6598017423207226, "learning_rate": 1.0144834228359613e-05, "loss": 0.7114, "step": 2912 }, { "epoch": 0.51, "grad_norm": 0.6109098542432276, "learning_rate": 1.0139154828821724e-05, "loss": 0.6997, "step": 2913 }, { "epoch": 0.51, "grad_norm": 0.6774155698474208, "learning_rate": 1.0133475384389584e-05, "loss": 0.6825, "step": 2914 }, { "epoch": 0.51, "grad_norm": 0.6739448718415745, "learning_rate": 1.01277958968955e-05, "loss": 0.7204, "step": 2915 }, { "epoch": 0.51, "grad_norm": 0.9150734724432642, "learning_rate": 1.0122116368171801e-05, "loss": 0.8561, "step": 2916 }, { "epoch": 0.51, "grad_norm": 0.7562458322963391, "learning_rate": 1.0116436800050817e-05, "loss": 0.715, "step": 2917 }, { "epoch": 0.51, "grad_norm": 0.7933913764609585, "learning_rate": 1.0110757194364894e-05, "loss": 0.7233, "step": 2918 }, { "epoch": 0.51, "grad_norm": 0.6647073391429036, "learning_rate": 1.0105077552946392e-05, "loss": 0.7212, "step": 2919 }, { "epoch": 0.51, "grad_norm": 0.6955659469820858, "learning_rate": 1.009939787762768e-05, "loss": 0.6931, "step": 2920 }, { "epoch": 0.51, "grad_norm": 0.698786936393277, "learning_rate": 1.0093718170241141e-05, "loss": 0.718, "step": 2921 }, { "epoch": 0.51, "grad_norm": 0.7722088505446867, "learning_rate": 1.0088038432619172e-05, "loss": 0.7232, "step": 2922 }, { "epoch": 0.51, "grad_norm": 0.7631917959056111, "learning_rate": 1.0082358666594166e-05, "loss": 0.7396, "step": 2923 }, { "epoch": 0.51, "grad_norm": 1.3764620820493751, "learning_rate": 1.0076678873998535e-05, "loss": 0.6944, "step": 2924 }, { "epoch": 0.51, "grad_norm": 0.6158398549497865, "learning_rate": 1.0070999056664708e-05, "loss": 0.6909, "step": 2925 }, { "epoch": 0.51, "grad_norm": 0.8075736471867986, "learning_rate": 1.00653192164251e-05, "loss": 0.7697, "step": 2926 }, { "epoch": 0.51, "grad_norm": 0.5997465787802853, "learning_rate": 1.0059639355112156e-05, "loss": 0.665, "step": 2927 }, { "epoch": 0.51, "grad_norm": 0.7640314526243934, "learning_rate": 1.0053959474558313e-05, "loss": 0.7511, "step": 2928 }, { "epoch": 0.51, "grad_norm": 0.7138658863607185, "learning_rate": 1.0048279576596016e-05, "loss": 0.7491, "step": 2929 }, { "epoch": 0.51, "grad_norm": 0.6528793310517919, "learning_rate": 1.0042599663057723e-05, "loss": 0.6981, "step": 2930 }, { "epoch": 0.51, "grad_norm": 0.7080994809132561, "learning_rate": 1.0036919735775892e-05, "loss": 0.7557, "step": 2931 }, { "epoch": 0.51, "grad_norm": 0.7094869592823664, "learning_rate": 1.0031239796582984e-05, "loss": 0.7051, "step": 2932 }, { "epoch": 0.51, "grad_norm": 0.6795176908263497, "learning_rate": 1.0025559847311464e-05, "loss": 0.7203, "step": 2933 }, { "epoch": 0.51, "grad_norm": 0.7142129071239831, "learning_rate": 1.001987988979381e-05, "loss": 0.7766, "step": 2934 }, { "epoch": 0.51, "grad_norm": 0.7412390373773144, "learning_rate": 1.0014199925862482e-05, "loss": 0.702, "step": 2935 }, { "epoch": 0.51, "grad_norm": 0.6592792312078375, "learning_rate": 1.0008519957349967e-05, "loss": 0.6899, "step": 2936 }, { "epoch": 0.51, "grad_norm": 0.6536505019891399, "learning_rate": 1.0002839986088737e-05, "loss": 0.6934, "step": 2937 }, { "epoch": 0.52, "grad_norm": 0.6492995912228667, "learning_rate": 9.997160013911268e-06, "loss": 0.7011, "step": 2938 }, { "epoch": 0.52, "grad_norm": 0.6989220183587851, "learning_rate": 9.991480042650034e-06, "loss": 0.696, "step": 2939 }, { "epoch": 0.52, "grad_norm": 0.7053840419896936, "learning_rate": 9.985800074137521e-06, "loss": 0.714, "step": 2940 }, { "epoch": 0.52, "grad_norm": 0.6138875064803275, "learning_rate": 9.980120110206196e-06, "loss": 0.6637, "step": 2941 }, { "epoch": 0.52, "grad_norm": 0.7712427702137129, "learning_rate": 9.97444015268854e-06, "loss": 0.7443, "step": 2942 }, { "epoch": 0.52, "grad_norm": 0.6490332986355238, "learning_rate": 9.968760203417021e-06, "loss": 0.6974, "step": 2943 }, { "epoch": 0.52, "grad_norm": 0.7154324888076159, "learning_rate": 9.963080264224111e-06, "loss": 0.7101, "step": 2944 }, { "epoch": 0.52, "grad_norm": 0.6195796607566696, "learning_rate": 9.957400336942279e-06, "loss": 0.7151, "step": 2945 }, { "epoch": 0.52, "grad_norm": 0.6539005457197838, "learning_rate": 9.951720423403986e-06, "loss": 0.7038, "step": 2946 }, { "epoch": 0.52, "grad_norm": 0.6875255046656791, "learning_rate": 9.946040525441692e-06, "loss": 0.708, "step": 2947 }, { "epoch": 0.52, "grad_norm": 0.688542262448344, "learning_rate": 9.940360644887846e-06, "loss": 0.7121, "step": 2948 }, { "epoch": 0.52, "grad_norm": 0.6699326839112691, "learning_rate": 9.934680783574901e-06, "loss": 0.6753, "step": 2949 }, { "epoch": 0.52, "grad_norm": 0.7423986853884421, "learning_rate": 9.929000943335295e-06, "loss": 0.759, "step": 2950 }, { "epoch": 0.52, "grad_norm": 0.699773237326604, "learning_rate": 9.923321126001466e-06, "loss": 0.7028, "step": 2951 }, { "epoch": 0.52, "grad_norm": 0.7165672667413334, "learning_rate": 9.917641333405838e-06, "loss": 0.7491, "step": 2952 }, { "epoch": 0.52, "grad_norm": 0.6231133927055954, "learning_rate": 9.911961567380831e-06, "loss": 0.6817, "step": 2953 }, { "epoch": 0.52, "grad_norm": 0.691130879548913, "learning_rate": 9.90628182975886e-06, "loss": 0.7358, "step": 2954 }, { "epoch": 0.52, "grad_norm": 0.7775956473464739, "learning_rate": 9.900602122372322e-06, "loss": 0.7023, "step": 2955 }, { "epoch": 0.52, "grad_norm": 0.6420070493464268, "learning_rate": 9.894922447053613e-06, "loss": 0.7088, "step": 2956 }, { "epoch": 0.52, "grad_norm": 0.7130536172252836, "learning_rate": 9.889242805635112e-06, "loss": 0.7419, "step": 2957 }, { "epoch": 0.52, "grad_norm": 0.6449896777210979, "learning_rate": 9.883563199949188e-06, "loss": 0.7167, "step": 2958 }, { "epoch": 0.52, "grad_norm": 0.8415778923886844, "learning_rate": 9.8778836318282e-06, "loss": 0.749, "step": 2959 }, { "epoch": 0.52, "grad_norm": 0.7530375984612054, "learning_rate": 9.872204103104502e-06, "loss": 0.6867, "step": 2960 }, { "epoch": 0.52, "grad_norm": 0.6981573399100688, "learning_rate": 9.86652461561042e-06, "loss": 0.758, "step": 2961 }, { "epoch": 0.52, "grad_norm": 0.7264198987805495, "learning_rate": 9.860845171178278e-06, "loss": 0.711, "step": 2962 }, { "epoch": 0.52, "grad_norm": 0.7234779361577852, "learning_rate": 9.855165771640387e-06, "loss": 0.7179, "step": 2963 }, { "epoch": 0.52, "grad_norm": 0.6372828474317224, "learning_rate": 9.849486418829033e-06, "loss": 0.7356, "step": 2964 }, { "epoch": 0.52, "grad_norm": 0.6298605437461996, "learning_rate": 9.843807114576503e-06, "loss": 0.6928, "step": 2965 }, { "epoch": 0.52, "grad_norm": 0.7446392237690583, "learning_rate": 9.83812786071505e-06, "loss": 0.7211, "step": 2966 }, { "epoch": 0.52, "grad_norm": 0.6768329564564827, "learning_rate": 9.83244865907692e-06, "loss": 0.7017, "step": 2967 }, { "epoch": 0.52, "grad_norm": 0.6052394311435881, "learning_rate": 9.826769511494344e-06, "loss": 0.7074, "step": 2968 }, { "epoch": 0.52, "grad_norm": 0.6802408041604171, "learning_rate": 9.821090419799536e-06, "loss": 0.7036, "step": 2969 }, { "epoch": 0.52, "grad_norm": 0.7474241737933711, "learning_rate": 9.815411385824684e-06, "loss": 0.7382, "step": 2970 }, { "epoch": 0.52, "grad_norm": 0.8508001613055186, "learning_rate": 9.809732411401969e-06, "loss": 0.7292, "step": 2971 }, { "epoch": 0.52, "grad_norm": 0.867113904052375, "learning_rate": 9.80405349836354e-06, "loss": 0.784, "step": 2972 }, { "epoch": 0.52, "grad_norm": 0.6992416753702727, "learning_rate": 9.798374648541537e-06, "loss": 0.6879, "step": 2973 }, { "epoch": 0.52, "grad_norm": 0.7210124637905377, "learning_rate": 9.792695863768075e-06, "loss": 0.7368, "step": 2974 }, { "epoch": 0.52, "grad_norm": 0.6148026427957894, "learning_rate": 9.787017145875251e-06, "loss": 0.6552, "step": 2975 }, { "epoch": 0.52, "grad_norm": 0.6562934738834532, "learning_rate": 9.78133849669513e-06, "loss": 0.6963, "step": 2976 }, { "epoch": 0.52, "grad_norm": 0.6767484692549744, "learning_rate": 9.775659918059765e-06, "loss": 0.7181, "step": 2977 }, { "epoch": 0.52, "grad_norm": 0.780839955364574, "learning_rate": 9.769981411801187e-06, "loss": 0.7161, "step": 2978 }, { "epoch": 0.52, "grad_norm": 0.6850406590184578, "learning_rate": 9.764302979751398e-06, "loss": 0.7179, "step": 2979 }, { "epoch": 0.52, "grad_norm": 0.6437582212725151, "learning_rate": 9.758624623742383e-06, "loss": 0.7, "step": 2980 }, { "epoch": 0.52, "grad_norm": 0.7531196109298124, "learning_rate": 9.752946345606092e-06, "loss": 0.7074, "step": 2981 }, { "epoch": 0.52, "grad_norm": 0.6847335883175085, "learning_rate": 9.747268147174458e-06, "loss": 0.7093, "step": 2982 }, { "epoch": 0.52, "grad_norm": 0.747307899107552, "learning_rate": 9.741590030279388e-06, "loss": 0.7281, "step": 2983 }, { "epoch": 0.52, "grad_norm": 0.6826748944456928, "learning_rate": 9.735911996752761e-06, "loss": 0.6852, "step": 2984 }, { "epoch": 0.52, "grad_norm": 0.6845900653856597, "learning_rate": 9.730234048426424e-06, "loss": 0.6817, "step": 2985 }, { "epoch": 0.52, "grad_norm": 0.7020294594501293, "learning_rate": 9.724556187132205e-06, "loss": 0.7215, "step": 2986 }, { "epoch": 0.52, "grad_norm": 0.7100138068692066, "learning_rate": 9.718878414701902e-06, "loss": 0.6759, "step": 2987 }, { "epoch": 0.52, "grad_norm": 0.7341443773216979, "learning_rate": 9.713200732967278e-06, "loss": 0.7221, "step": 2988 }, { "epoch": 0.52, "grad_norm": 0.6849507098575401, "learning_rate": 9.707523143760078e-06, "loss": 0.7035, "step": 2989 }, { "epoch": 0.52, "grad_norm": 0.7591893312931257, "learning_rate": 9.701845648912002e-06, "loss": 0.7071, "step": 2990 }, { "epoch": 0.52, "grad_norm": 0.7448070590923006, "learning_rate": 9.696168250254735e-06, "loss": 0.753, "step": 2991 }, { "epoch": 0.52, "grad_norm": 0.6397799174484448, "learning_rate": 9.690490949619924e-06, "loss": 0.716, "step": 2992 }, { "epoch": 0.52, "grad_norm": 0.6697661235162036, "learning_rate": 9.684813748839186e-06, "loss": 0.6987, "step": 2993 }, { "epoch": 0.52, "grad_norm": 0.6922334606829206, "learning_rate": 9.679136649744094e-06, "loss": 0.7249, "step": 2994 }, { "epoch": 0.53, "grad_norm": 0.8242404448909258, "learning_rate": 9.67345965416621e-06, "loss": 0.7388, "step": 2995 }, { "epoch": 0.53, "grad_norm": 0.7985721993149135, "learning_rate": 9.667782763937041e-06, "loss": 0.7735, "step": 2996 }, { "epoch": 0.53, "grad_norm": 0.7111169091294436, "learning_rate": 9.662105980888079e-06, "loss": 0.701, "step": 2997 }, { "epoch": 0.53, "grad_norm": 0.7262907288519612, "learning_rate": 9.656429306850769e-06, "loss": 0.7092, "step": 2998 }, { "epoch": 0.53, "grad_norm": 0.6666881904521723, "learning_rate": 9.650752743656523e-06, "loss": 0.7216, "step": 2999 }, { "epoch": 0.53, "grad_norm": 0.7136346456096978, "learning_rate": 9.645076293136722e-06, "loss": 0.7177, "step": 3000 }, { "epoch": 0.53, "grad_norm": 0.743841107797694, "learning_rate": 9.639399957122704e-06, "loss": 0.7402, "step": 3001 }, { "epoch": 0.53, "grad_norm": 0.7009448551200576, "learning_rate": 9.633723737445778e-06, "loss": 0.7022, "step": 3002 }, { "epoch": 0.53, "grad_norm": 0.7075957778393175, "learning_rate": 9.628047635937202e-06, "loss": 0.7167, "step": 3003 }, { "epoch": 0.53, "grad_norm": 0.6430648107092588, "learning_rate": 9.622371654428214e-06, "loss": 0.715, "step": 3004 }, { "epoch": 0.53, "grad_norm": 0.7479730363251198, "learning_rate": 9.616695794749996e-06, "loss": 0.7899, "step": 3005 }, { "epoch": 0.53, "grad_norm": 0.771522759702904, "learning_rate": 9.611020058733703e-06, "loss": 0.7607, "step": 3006 }, { "epoch": 0.53, "grad_norm": 0.7389932600664068, "learning_rate": 9.605344448210448e-06, "loss": 0.748, "step": 3007 }, { "epoch": 0.53, "grad_norm": 0.735763495693723, "learning_rate": 9.599668965011296e-06, "loss": 0.7075, "step": 3008 }, { "epoch": 0.53, "grad_norm": 0.7482967784184725, "learning_rate": 9.59399361096728e-06, "loss": 0.7166, "step": 3009 }, { "epoch": 0.53, "grad_norm": 0.6715893446826081, "learning_rate": 9.588318387909384e-06, "loss": 0.7149, "step": 3010 }, { "epoch": 0.53, "grad_norm": 0.7945284047323168, "learning_rate": 9.58264329766856e-06, "loss": 0.7916, "step": 3011 }, { "epoch": 0.53, "grad_norm": 0.6988582347537284, "learning_rate": 9.576968342075702e-06, "loss": 0.7112, "step": 3012 }, { "epoch": 0.53, "grad_norm": 0.7510081948915502, "learning_rate": 9.571293522961674e-06, "loss": 0.7246, "step": 3013 }, { "epoch": 0.53, "grad_norm": 0.6990000372961187, "learning_rate": 9.565618842157286e-06, "loss": 0.7141, "step": 3014 }, { "epoch": 0.53, "grad_norm": 0.6482790824051005, "learning_rate": 9.559944301493314e-06, "loss": 0.6944, "step": 3015 }, { "epoch": 0.53, "grad_norm": 0.6383739861904434, "learning_rate": 9.554269902800481e-06, "loss": 0.6814, "step": 3016 }, { "epoch": 0.53, "grad_norm": 0.6515207765237374, "learning_rate": 9.548595647909466e-06, "loss": 0.7135, "step": 3017 }, { "epoch": 0.53, "grad_norm": 0.7496496867345362, "learning_rate": 9.542921538650903e-06, "loss": 0.73, "step": 3018 }, { "epoch": 0.53, "grad_norm": 0.6296793173963948, "learning_rate": 9.537247576855374e-06, "loss": 0.6685, "step": 3019 }, { "epoch": 0.53, "grad_norm": 0.7115762220361718, "learning_rate": 9.531573764353426e-06, "loss": 0.75, "step": 3020 }, { "epoch": 0.53, "grad_norm": 0.5524735890803717, "learning_rate": 9.525900102975537e-06, "loss": 0.6824, "step": 3021 }, { "epoch": 0.53, "grad_norm": 0.6566519613027597, "learning_rate": 9.520226594552158e-06, "loss": 0.683, "step": 3022 }, { "epoch": 0.53, "grad_norm": 0.6108551516780315, "learning_rate": 9.514553240913677e-06, "loss": 0.7133, "step": 3023 }, { "epoch": 0.53, "grad_norm": 0.6723037870173245, "learning_rate": 9.508880043890437e-06, "loss": 0.7023, "step": 3024 }, { "epoch": 0.53, "grad_norm": 0.678811457726932, "learning_rate": 9.503207005312728e-06, "loss": 0.7056, "step": 3025 }, { "epoch": 0.53, "grad_norm": 0.6961579590839085, "learning_rate": 9.497534127010791e-06, "loss": 0.6881, "step": 3026 }, { "epoch": 0.53, "grad_norm": 0.6554487145713547, "learning_rate": 9.49186141081482e-06, "loss": 0.684, "step": 3027 }, { "epoch": 0.53, "grad_norm": 0.6262559225919642, "learning_rate": 9.486188858554943e-06, "loss": 0.6899, "step": 3028 }, { "epoch": 0.53, "grad_norm": 0.7349265544880905, "learning_rate": 9.480516472061254e-06, "loss": 0.6895, "step": 3029 }, { "epoch": 0.53, "grad_norm": 0.7090898211195074, "learning_rate": 9.47484425316377e-06, "loss": 0.7186, "step": 3030 }, { "epoch": 0.53, "grad_norm": 0.7320361343325081, "learning_rate": 9.469172203692478e-06, "loss": 0.6905, "step": 3031 }, { "epoch": 0.53, "grad_norm": 0.6762143033864013, "learning_rate": 9.463500325477292e-06, "loss": 0.6707, "step": 3032 }, { "epoch": 0.53, "grad_norm": 0.6923775593815482, "learning_rate": 9.457828620348083e-06, "loss": 0.6945, "step": 3033 }, { "epoch": 0.53, "grad_norm": 0.6921084529324079, "learning_rate": 9.45215709013466e-06, "loss": 0.7079, "step": 3034 }, { "epoch": 0.53, "grad_norm": 0.711796586741853, "learning_rate": 9.446485736666774e-06, "loss": 0.7183, "step": 3035 }, { "epoch": 0.53, "grad_norm": 0.7357949378729888, "learning_rate": 9.440814561774127e-06, "loss": 0.7449, "step": 3036 }, { "epoch": 0.53, "grad_norm": 0.6823207060738119, "learning_rate": 9.435143567286354e-06, "loss": 0.7057, "step": 3037 }, { "epoch": 0.53, "grad_norm": 1.1081796097596324, "learning_rate": 9.429472755033038e-06, "loss": 0.6561, "step": 3038 }, { "epoch": 0.53, "grad_norm": 0.6630637565418062, "learning_rate": 9.423802126843706e-06, "loss": 0.6818, "step": 3039 }, { "epoch": 0.53, "grad_norm": 0.6892227232047483, "learning_rate": 9.418131684547808e-06, "loss": 0.7133, "step": 3040 }, { "epoch": 0.53, "grad_norm": 0.6569931079024183, "learning_rate": 9.412461429974756e-06, "loss": 0.6926, "step": 3041 }, { "epoch": 0.53, "grad_norm": 0.7529638580380184, "learning_rate": 9.406791364953891e-06, "loss": 0.7496, "step": 3042 }, { "epoch": 0.53, "grad_norm": 0.6931603340527016, "learning_rate": 9.401121491314493e-06, "loss": 0.6872, "step": 3043 }, { "epoch": 0.53, "grad_norm": 0.7295006095036071, "learning_rate": 9.395451810885782e-06, "loss": 0.7432, "step": 3044 }, { "epoch": 0.53, "grad_norm": 0.7866564574510011, "learning_rate": 9.389782325496917e-06, "loss": 0.7135, "step": 3045 }, { "epoch": 0.53, "grad_norm": 0.6230265292206093, "learning_rate": 9.384113036976989e-06, "loss": 0.7101, "step": 3046 }, { "epoch": 0.53, "grad_norm": 0.6507705535478168, "learning_rate": 9.378443947155031e-06, "loss": 0.6892, "step": 3047 }, { "epoch": 0.53, "grad_norm": 0.7054269109307946, "learning_rate": 9.37277505786001e-06, "loss": 0.7114, "step": 3048 }, { "epoch": 0.53, "grad_norm": 0.6592927067297814, "learning_rate": 9.367106370920824e-06, "loss": 0.6899, "step": 3049 }, { "epoch": 0.53, "grad_norm": 0.7166831522121646, "learning_rate": 9.361437888166312e-06, "loss": 0.7514, "step": 3050 }, { "epoch": 0.53, "grad_norm": 0.6613154296868159, "learning_rate": 9.355769611425246e-06, "loss": 0.6744, "step": 3051 }, { "epoch": 0.54, "grad_norm": 0.7104524853595993, "learning_rate": 9.350101542526325e-06, "loss": 0.7258, "step": 3052 }, { "epoch": 0.54, "grad_norm": 0.6204271264475717, "learning_rate": 9.344433683298192e-06, "loss": 0.6959, "step": 3053 }, { "epoch": 0.54, "grad_norm": 0.6633697930964958, "learning_rate": 9.338766035569415e-06, "loss": 0.7023, "step": 3054 }, { "epoch": 0.54, "grad_norm": 0.6865966980507928, "learning_rate": 9.333098601168493e-06, "loss": 0.7224, "step": 3055 }, { "epoch": 0.54, "grad_norm": 0.6187259549406448, "learning_rate": 9.327431381923862e-06, "loss": 0.6732, "step": 3056 }, { "epoch": 0.54, "grad_norm": 0.7391154549410655, "learning_rate": 9.321764379663887e-06, "loss": 0.7269, "step": 3057 }, { "epoch": 0.54, "grad_norm": 0.7092720035855693, "learning_rate": 9.31609759621685e-06, "loss": 0.7205, "step": 3058 }, { "epoch": 0.54, "grad_norm": 0.5963724004722607, "learning_rate": 9.310431033410982e-06, "loss": 0.6869, "step": 3059 }, { "epoch": 0.54, "grad_norm": 0.6459516814894025, "learning_rate": 9.304764693074436e-06, "loss": 0.6703, "step": 3060 }, { "epoch": 0.54, "grad_norm": 0.7435337519063502, "learning_rate": 9.299098577035283e-06, "loss": 0.7116, "step": 3061 }, { "epoch": 0.54, "grad_norm": 0.6297424304402939, "learning_rate": 9.29343268712154e-06, "loss": 0.7054, "step": 3062 }, { "epoch": 0.54, "grad_norm": 0.6666435763639637, "learning_rate": 9.287767025161135e-06, "loss": 0.7343, "step": 3063 }, { "epoch": 0.54, "grad_norm": 0.806082687468403, "learning_rate": 9.282101592981928e-06, "loss": 0.8139, "step": 3064 }, { "epoch": 0.54, "grad_norm": 0.611456030890499, "learning_rate": 9.276436392411711e-06, "loss": 0.6305, "step": 3065 }, { "epoch": 0.54, "grad_norm": 0.7152449237634148, "learning_rate": 9.270771425278196e-06, "loss": 0.7293, "step": 3066 }, { "epoch": 0.54, "grad_norm": 0.6308778579934059, "learning_rate": 9.26510669340901e-06, "loss": 0.6744, "step": 3067 }, { "epoch": 0.54, "grad_norm": 0.6255926521578901, "learning_rate": 9.259442198631718e-06, "loss": 0.6743, "step": 3068 }, { "epoch": 0.54, "grad_norm": 0.7752029940568054, "learning_rate": 9.25377794277381e-06, "loss": 0.7233, "step": 3069 }, { "epoch": 0.54, "grad_norm": 0.6457299313771301, "learning_rate": 9.248113927662685e-06, "loss": 0.694, "step": 3070 }, { "epoch": 0.54, "grad_norm": 0.7385148368582829, "learning_rate": 9.242450155125678e-06, "loss": 0.7622, "step": 3071 }, { "epoch": 0.54, "grad_norm": 0.7186467096324591, "learning_rate": 9.236786626990036e-06, "loss": 0.6843, "step": 3072 }, { "epoch": 0.54, "grad_norm": 0.6591951290130035, "learning_rate": 9.231123345082934e-06, "loss": 0.7196, "step": 3073 }, { "epoch": 0.54, "grad_norm": 0.7059323823222571, "learning_rate": 9.225460311231464e-06, "loss": 0.7123, "step": 3074 }, { "epoch": 0.54, "grad_norm": 0.6319723957975051, "learning_rate": 9.219797527262642e-06, "loss": 0.7282, "step": 3075 }, { "epoch": 0.54, "grad_norm": 0.7814703148144614, "learning_rate": 9.214134995003393e-06, "loss": 0.6867, "step": 3076 }, { "epoch": 0.54, "grad_norm": 0.7456659586687702, "learning_rate": 9.208472716280571e-06, "loss": 0.7167, "step": 3077 }, { "epoch": 0.54, "grad_norm": 0.6984634206679956, "learning_rate": 9.202810692920946e-06, "loss": 0.6779, "step": 3078 }, { "epoch": 0.54, "grad_norm": 0.7397507317937629, "learning_rate": 9.197148926751202e-06, "loss": 0.7137, "step": 3079 }, { "epoch": 0.54, "grad_norm": 0.7300528526936676, "learning_rate": 9.19148741959795e-06, "loss": 0.7695, "step": 3080 }, { "epoch": 0.54, "grad_norm": 0.6652542634070704, "learning_rate": 9.185826173287702e-06, "loss": 0.7291, "step": 3081 }, { "epoch": 0.54, "grad_norm": 0.8967666860203597, "learning_rate": 9.180165189646899e-06, "loss": 0.7266, "step": 3082 }, { "epoch": 0.54, "grad_norm": 0.6351538972301775, "learning_rate": 9.174504470501889e-06, "loss": 0.6762, "step": 3083 }, { "epoch": 0.54, "grad_norm": 0.7475618421419711, "learning_rate": 9.168844017678944e-06, "loss": 0.7315, "step": 3084 }, { "epoch": 0.54, "grad_norm": 0.8017431802935718, "learning_rate": 9.163183833004236e-06, "loss": 0.7511, "step": 3085 }, { "epoch": 0.54, "grad_norm": 0.742151053015735, "learning_rate": 9.157523918303863e-06, "loss": 0.7297, "step": 3086 }, { "epoch": 0.54, "grad_norm": 0.7265632098410016, "learning_rate": 9.15186427540383e-06, "loss": 0.7628, "step": 3087 }, { "epoch": 0.54, "grad_norm": 0.7694562365947958, "learning_rate": 9.146204906130055e-06, "loss": 0.7567, "step": 3088 }, { "epoch": 0.54, "grad_norm": 0.6301976924185779, "learning_rate": 9.140545812308372e-06, "loss": 0.6617, "step": 3089 }, { "epoch": 0.54, "grad_norm": 0.7076030433964401, "learning_rate": 9.134886995764517e-06, "loss": 0.7314, "step": 3090 }, { "epoch": 0.54, "grad_norm": 0.6693339653539996, "learning_rate": 9.129228458324151e-06, "loss": 0.717, "step": 3091 }, { "epoch": 0.54, "grad_norm": 0.6960279889178507, "learning_rate": 9.123570201812826e-06, "loss": 0.7552, "step": 3092 }, { "epoch": 0.54, "grad_norm": 0.636883980356098, "learning_rate": 9.117912228056026e-06, "loss": 0.7339, "step": 3093 }, { "epoch": 0.54, "grad_norm": 0.5850882267478366, "learning_rate": 9.112254538879115e-06, "loss": 0.6593, "step": 3094 }, { "epoch": 0.54, "grad_norm": 0.6401421951529693, "learning_rate": 9.106597136107394e-06, "loss": 0.7326, "step": 3095 }, { "epoch": 0.54, "grad_norm": 0.6981521788206967, "learning_rate": 9.10094002156605e-06, "loss": 0.7493, "step": 3096 }, { "epoch": 0.54, "grad_norm": 0.5408718328043658, "learning_rate": 9.095283197080195e-06, "loss": 0.6734, "step": 3097 }, { "epoch": 0.54, "grad_norm": 0.6468691010655846, "learning_rate": 9.089626664474834e-06, "loss": 0.6829, "step": 3098 }, { "epoch": 0.54, "grad_norm": 0.744264292035705, "learning_rate": 9.083970425574882e-06, "loss": 0.7417, "step": 3099 }, { "epoch": 0.54, "grad_norm": 0.674748772303277, "learning_rate": 9.078314482205161e-06, "loss": 0.706, "step": 3100 }, { "epoch": 0.54, "grad_norm": 0.6971925976199982, "learning_rate": 9.072658836190394e-06, "loss": 0.6774, "step": 3101 }, { "epoch": 0.54, "grad_norm": 0.6561046435987615, "learning_rate": 9.067003489355211e-06, "loss": 0.6804, "step": 3102 }, { "epoch": 0.54, "grad_norm": 0.7316659147825776, "learning_rate": 9.06134844352415e-06, "loss": 0.7208, "step": 3103 }, { "epoch": 0.54, "grad_norm": 0.7371079323383048, "learning_rate": 9.05569370052164e-06, "loss": 0.7115, "step": 3104 }, { "epoch": 0.54, "grad_norm": 0.745131817960178, "learning_rate": 9.050039262172018e-06, "loss": 0.7043, "step": 3105 }, { "epoch": 0.54, "grad_norm": 0.7042761820420492, "learning_rate": 9.044385130299529e-06, "loss": 0.7482, "step": 3106 }, { "epoch": 0.54, "grad_norm": 0.6985937258758458, "learning_rate": 9.03873130672831e-06, "loss": 0.6866, "step": 3107 }, { "epoch": 0.54, "grad_norm": 0.6494181694483412, "learning_rate": 9.033077793282404e-06, "loss": 0.662, "step": 3108 }, { "epoch": 0.55, "grad_norm": 0.7372052955815905, "learning_rate": 9.027424591785754e-06, "loss": 0.7475, "step": 3109 }, { "epoch": 0.55, "grad_norm": 0.800688817454749, "learning_rate": 9.021771704062195e-06, "loss": 0.8067, "step": 3110 }, { "epoch": 0.55, "grad_norm": 0.7378659235467285, "learning_rate": 9.01611913193547e-06, "loss": 0.7275, "step": 3111 }, { "epoch": 0.55, "grad_norm": 0.7071631452458035, "learning_rate": 9.010466877229221e-06, "loss": 0.743, "step": 3112 }, { "epoch": 0.55, "grad_norm": 0.7198070198920263, "learning_rate": 9.004814941766978e-06, "loss": 0.7172, "step": 3113 }, { "epoch": 0.55, "grad_norm": 0.8151766397418856, "learning_rate": 8.99916332737217e-06, "loss": 0.8027, "step": 3114 }, { "epoch": 0.55, "grad_norm": 0.7023922289905279, "learning_rate": 8.993512035868133e-06, "loss": 0.7035, "step": 3115 }, { "epoch": 0.55, "grad_norm": 0.649465661709674, "learning_rate": 8.987861069078084e-06, "loss": 0.6934, "step": 3116 }, { "epoch": 0.55, "grad_norm": 0.6735349922785997, "learning_rate": 8.982210428825148e-06, "loss": 0.7565, "step": 3117 }, { "epoch": 0.55, "grad_norm": 0.697761043528414, "learning_rate": 8.97656011693234e-06, "loss": 0.7481, "step": 3118 }, { "epoch": 0.55, "grad_norm": 0.6471023039548565, "learning_rate": 8.970910135222562e-06, "loss": 0.7052, "step": 3119 }, { "epoch": 0.55, "grad_norm": 0.6280755943594942, "learning_rate": 8.965260485518624e-06, "loss": 0.7124, "step": 3120 }, { "epoch": 0.55, "grad_norm": 0.742338958885344, "learning_rate": 8.959611169643218e-06, "loss": 0.7351, "step": 3121 }, { "epoch": 0.55, "grad_norm": 0.6627411678685352, "learning_rate": 8.953962189418926e-06, "loss": 0.7148, "step": 3122 }, { "epoch": 0.55, "grad_norm": 0.6685110768468964, "learning_rate": 8.94831354666823e-06, "loss": 0.704, "step": 3123 }, { "epoch": 0.55, "grad_norm": 0.6784783264079932, "learning_rate": 8.942665243213502e-06, "loss": 0.7416, "step": 3124 }, { "epoch": 0.55, "grad_norm": 0.6601235429782621, "learning_rate": 8.937017280876999e-06, "loss": 0.7076, "step": 3125 }, { "epoch": 0.55, "grad_norm": 0.7068870266458557, "learning_rate": 8.931369661480874e-06, "loss": 0.7107, "step": 3126 }, { "epoch": 0.55, "grad_norm": 0.6166038300712262, "learning_rate": 8.925722386847167e-06, "loss": 0.7008, "step": 3127 }, { "epoch": 0.55, "grad_norm": 0.582282728696995, "learning_rate": 8.920075458797803e-06, "loss": 0.6711, "step": 3128 }, { "epoch": 0.55, "grad_norm": 0.6854195623966178, "learning_rate": 8.914428879154602e-06, "loss": 0.7037, "step": 3129 }, { "epoch": 0.55, "grad_norm": 0.6936965266593056, "learning_rate": 8.908782649739271e-06, "loss": 0.7059, "step": 3130 }, { "epoch": 0.55, "grad_norm": 0.7576451808314996, "learning_rate": 8.903136772373393e-06, "loss": 0.7301, "step": 3131 }, { "epoch": 0.55, "grad_norm": 0.7307473431881726, "learning_rate": 8.897491248878449e-06, "loss": 0.7482, "step": 3132 }, { "epoch": 0.55, "grad_norm": 0.6755400217106982, "learning_rate": 8.891846081075808e-06, "loss": 0.7289, "step": 3133 }, { "epoch": 0.55, "grad_norm": 0.7314381166114817, "learning_rate": 8.88620127078671e-06, "loss": 0.7219, "step": 3134 }, { "epoch": 0.55, "grad_norm": 0.6671340175121777, "learning_rate": 8.880556819832297e-06, "loss": 0.6957, "step": 3135 }, { "epoch": 0.55, "grad_norm": 0.762103585804957, "learning_rate": 8.874912730033579e-06, "loss": 0.7173, "step": 3136 }, { "epoch": 0.55, "grad_norm": 0.6289064660413481, "learning_rate": 8.869269003211461e-06, "loss": 0.7036, "step": 3137 }, { "epoch": 0.55, "grad_norm": 0.6531767565364721, "learning_rate": 8.863625641186729e-06, "loss": 0.7309, "step": 3138 }, { "epoch": 0.55, "grad_norm": 0.7587316823444283, "learning_rate": 8.857982645780048e-06, "loss": 0.711, "step": 3139 }, { "epoch": 0.55, "grad_norm": 0.7258728426171663, "learning_rate": 8.85234001881196e-06, "loss": 0.7628, "step": 3140 }, { "epoch": 0.55, "grad_norm": 0.6988599978859436, "learning_rate": 8.846697762102898e-06, "loss": 0.7023, "step": 3141 }, { "epoch": 0.55, "grad_norm": 0.6998675817446143, "learning_rate": 8.841055877473174e-06, "loss": 0.73, "step": 3142 }, { "epoch": 0.55, "grad_norm": 0.6796181797697578, "learning_rate": 8.835414366742974e-06, "loss": 0.7434, "step": 3143 }, { "epoch": 0.55, "grad_norm": 0.7313711807611419, "learning_rate": 8.82977323173237e-06, "loss": 0.7616, "step": 3144 }, { "epoch": 0.55, "grad_norm": 0.7523758809605466, "learning_rate": 8.82413247426131e-06, "loss": 0.8084, "step": 3145 }, { "epoch": 0.55, "grad_norm": 0.6981809923306003, "learning_rate": 8.818492096149614e-06, "loss": 0.7459, "step": 3146 }, { "epoch": 0.55, "grad_norm": 0.7313509622654046, "learning_rate": 8.812852099216991e-06, "loss": 0.7406, "step": 3147 }, { "epoch": 0.55, "grad_norm": 0.7368354631038556, "learning_rate": 8.807212485283025e-06, "loss": 0.7326, "step": 3148 }, { "epoch": 0.55, "grad_norm": 0.6797537358678953, "learning_rate": 8.801573256167165e-06, "loss": 0.7035, "step": 3149 }, { "epoch": 0.55, "grad_norm": 0.6656790105732313, "learning_rate": 8.795934413688746e-06, "loss": 0.7139, "step": 3150 }, { "epoch": 0.55, "grad_norm": 0.6573778300450821, "learning_rate": 8.790295959666976e-06, "loss": 0.6944, "step": 3151 }, { "epoch": 0.55, "grad_norm": 0.6647054788592275, "learning_rate": 8.784657895920938e-06, "loss": 0.6566, "step": 3152 }, { "epoch": 0.55, "grad_norm": 0.6419791638217034, "learning_rate": 8.779020224269594e-06, "loss": 0.7188, "step": 3153 }, { "epoch": 0.55, "grad_norm": 0.6843780610558504, "learning_rate": 8.773382946531767e-06, "loss": 0.6821, "step": 3154 }, { "epoch": 0.55, "grad_norm": 0.6417003731887598, "learning_rate": 8.767746064526162e-06, "loss": 0.7215, "step": 3155 }, { "epoch": 0.55, "grad_norm": 0.6854988305993699, "learning_rate": 8.76210958007136e-06, "loss": 0.7174, "step": 3156 }, { "epoch": 0.55, "grad_norm": 0.7571397275210348, "learning_rate": 8.756473494985806e-06, "loss": 0.7331, "step": 3157 }, { "epoch": 0.55, "grad_norm": 0.5986450166995977, "learning_rate": 8.750837811087812e-06, "loss": 0.7203, "step": 3158 }, { "epoch": 0.55, "grad_norm": 0.6452948777232476, "learning_rate": 8.745202530195575e-06, "loss": 0.6874, "step": 3159 }, { "epoch": 0.55, "grad_norm": 0.7986688691522695, "learning_rate": 8.739567654127148e-06, "loss": 0.7063, "step": 3160 }, { "epoch": 0.55, "grad_norm": 0.7831580752574087, "learning_rate": 8.733933184700462e-06, "loss": 0.6609, "step": 3161 }, { "epoch": 0.55, "grad_norm": 0.7047063872180486, "learning_rate": 8.728299123733317e-06, "loss": 0.7166, "step": 3162 }, { "epoch": 0.55, "grad_norm": 0.8209045209618339, "learning_rate": 8.722665473043376e-06, "loss": 0.7308, "step": 3163 }, { "epoch": 0.55, "grad_norm": 0.6960479952010561, "learning_rate": 8.717032234448173e-06, "loss": 0.7046, "step": 3164 }, { "epoch": 0.55, "grad_norm": 0.6993721779466507, "learning_rate": 8.711399409765104e-06, "loss": 0.7078, "step": 3165 }, { "epoch": 0.56, "grad_norm": 0.7206647872752087, "learning_rate": 8.705767000811441e-06, "loss": 0.7532, "step": 3166 }, { "epoch": 0.56, "grad_norm": 0.6976530127131897, "learning_rate": 8.700135009404318e-06, "loss": 0.7103, "step": 3167 }, { "epoch": 0.56, "grad_norm": 0.7172945615249098, "learning_rate": 8.694503437360727e-06, "loss": 0.7522, "step": 3168 }, { "epoch": 0.56, "grad_norm": 0.6451497333103482, "learning_rate": 8.68887228649753e-06, "loss": 0.7133, "step": 3169 }, { "epoch": 0.56, "grad_norm": 0.6445059907194665, "learning_rate": 8.683241558631457e-06, "loss": 0.6777, "step": 3170 }, { "epoch": 0.56, "grad_norm": 0.7226203952877899, "learning_rate": 8.6776112555791e-06, "loss": 0.714, "step": 3171 }, { "epoch": 0.56, "grad_norm": 0.6625151450600654, "learning_rate": 8.671981379156908e-06, "loss": 0.6964, "step": 3172 }, { "epoch": 0.56, "grad_norm": 0.7297911996582686, "learning_rate": 8.6663519311812e-06, "loss": 0.7038, "step": 3173 }, { "epoch": 0.56, "grad_norm": 0.6398076084825293, "learning_rate": 8.660722913468149e-06, "loss": 0.6913, "step": 3174 }, { "epoch": 0.56, "grad_norm": 0.8260400422013232, "learning_rate": 8.655094327833795e-06, "loss": 0.7307, "step": 3175 }, { "epoch": 0.56, "grad_norm": 0.6380375046069453, "learning_rate": 8.649466176094043e-06, "loss": 0.7265, "step": 3176 }, { "epoch": 0.56, "grad_norm": 0.8166333562636846, "learning_rate": 8.643838460064645e-06, "loss": 0.7228, "step": 3177 }, { "epoch": 0.56, "grad_norm": 0.7085817047690589, "learning_rate": 8.638211181561219e-06, "loss": 0.6745, "step": 3178 }, { "epoch": 0.56, "grad_norm": 0.696673553030968, "learning_rate": 8.632584342399244e-06, "loss": 0.7074, "step": 3179 }, { "epoch": 0.56, "grad_norm": 0.7383554238777205, "learning_rate": 8.62695794439406e-06, "loss": 0.7455, "step": 3180 }, { "epoch": 0.56, "grad_norm": 0.740365572571052, "learning_rate": 8.621331989360852e-06, "loss": 0.6976, "step": 3181 }, { "epoch": 0.56, "grad_norm": 0.6841653001555065, "learning_rate": 8.615706479114679e-06, "loss": 0.7659, "step": 3182 }, { "epoch": 0.56, "grad_norm": 0.6717665664321579, "learning_rate": 8.61008141547044e-06, "loss": 0.6822, "step": 3183 }, { "epoch": 0.56, "grad_norm": 0.7556179731585634, "learning_rate": 8.604456800242901e-06, "loss": 0.7558, "step": 3184 }, { "epoch": 0.56, "grad_norm": 0.6577868172375249, "learning_rate": 8.598832635246688e-06, "loss": 0.6993, "step": 3185 }, { "epoch": 0.56, "grad_norm": 0.7378377090719177, "learning_rate": 8.593208922296261e-06, "loss": 0.7118, "step": 3186 }, { "epoch": 0.56, "grad_norm": 0.6091642503808358, "learning_rate": 8.587585663205952e-06, "loss": 0.7049, "step": 3187 }, { "epoch": 0.56, "grad_norm": 0.7792363157844445, "learning_rate": 8.581962859789942e-06, "loss": 0.7621, "step": 3188 }, { "epoch": 0.56, "grad_norm": 0.7011440082222502, "learning_rate": 8.576340513862265e-06, "loss": 0.6922, "step": 3189 }, { "epoch": 0.56, "grad_norm": 0.6666689657074251, "learning_rate": 8.570718627236803e-06, "loss": 0.7051, "step": 3190 }, { "epoch": 0.56, "grad_norm": 0.6825854541320634, "learning_rate": 8.565097201727302e-06, "loss": 0.7224, "step": 3191 }, { "epoch": 0.56, "grad_norm": 0.6747243295010569, "learning_rate": 8.559476239147343e-06, "loss": 0.706, "step": 3192 }, { "epoch": 0.56, "grad_norm": 0.7164660017535778, "learning_rate": 8.553855741310369e-06, "loss": 0.7056, "step": 3193 }, { "epoch": 0.56, "grad_norm": 0.7445467706598539, "learning_rate": 8.548235710029674e-06, "loss": 0.6913, "step": 3194 }, { "epoch": 0.56, "grad_norm": 0.6980711289457655, "learning_rate": 8.542616147118392e-06, "loss": 0.7312, "step": 3195 }, { "epoch": 0.56, "grad_norm": 0.7062091301550835, "learning_rate": 8.536997054389508e-06, "loss": 0.7366, "step": 3196 }, { "epoch": 0.56, "grad_norm": 0.6693369307875315, "learning_rate": 8.531378433655864e-06, "loss": 0.7114, "step": 3197 }, { "epoch": 0.56, "grad_norm": 0.6275655617178844, "learning_rate": 8.52576028673014e-06, "loss": 0.7303, "step": 3198 }, { "epoch": 0.56, "grad_norm": 0.6577081223224852, "learning_rate": 8.520142615424871e-06, "loss": 0.7155, "step": 3199 }, { "epoch": 0.56, "grad_norm": 0.7010407089700124, "learning_rate": 8.514525421552436e-06, "loss": 0.7201, "step": 3200 }, { "epoch": 0.56, "grad_norm": 0.6576659084616722, "learning_rate": 8.508908706925054e-06, "loss": 0.7498, "step": 3201 }, { "epoch": 0.56, "grad_norm": 0.7064950986009241, "learning_rate": 8.503292473354798e-06, "loss": 0.6769, "step": 3202 }, { "epoch": 0.56, "grad_norm": 0.7007430352704472, "learning_rate": 8.497676722653583e-06, "loss": 0.6627, "step": 3203 }, { "epoch": 0.56, "grad_norm": 0.7946195581753643, "learning_rate": 8.492061456633161e-06, "loss": 0.7323, "step": 3204 }, { "epoch": 0.56, "grad_norm": 0.7148877994610223, "learning_rate": 8.486446677105138e-06, "loss": 0.7389, "step": 3205 }, { "epoch": 0.56, "grad_norm": 0.7669787756600648, "learning_rate": 8.48083238588096e-06, "loss": 0.6863, "step": 3206 }, { "epoch": 0.56, "grad_norm": 0.787880943976036, "learning_rate": 8.47521858477191e-06, "loss": 0.7555, "step": 3207 }, { "epoch": 0.56, "grad_norm": 0.7106808389405531, "learning_rate": 8.46960527558912e-06, "loss": 0.6964, "step": 3208 }, { "epoch": 0.56, "grad_norm": 0.6551021341406851, "learning_rate": 8.463992460143563e-06, "loss": 0.6941, "step": 3209 }, { "epoch": 0.56, "grad_norm": 0.7138018263981699, "learning_rate": 8.458380140246046e-06, "loss": 0.7305, "step": 3210 }, { "epoch": 0.56, "grad_norm": 0.7808470815975769, "learning_rate": 8.452768317707224e-06, "loss": 0.7356, "step": 3211 }, { "epoch": 0.56, "grad_norm": 0.7029351978474053, "learning_rate": 8.447156994337588e-06, "loss": 0.6878, "step": 3212 }, { "epoch": 0.56, "grad_norm": 0.7845493690192326, "learning_rate": 8.441546171947463e-06, "loss": 0.664, "step": 3213 }, { "epoch": 0.56, "grad_norm": 0.6661482892607852, "learning_rate": 8.435935852347018e-06, "loss": 0.7223, "step": 3214 }, { "epoch": 0.56, "grad_norm": 0.6023273653142036, "learning_rate": 8.430326037346264e-06, "loss": 0.6832, "step": 3215 }, { "epoch": 0.56, "grad_norm": 0.6378510889594577, "learning_rate": 8.42471672875504e-06, "loss": 0.6853, "step": 3216 }, { "epoch": 0.56, "grad_norm": 0.794579233053927, "learning_rate": 8.41910792838303e-06, "loss": 0.7467, "step": 3217 }, { "epoch": 0.56, "grad_norm": 0.6811739984579374, "learning_rate": 8.413499638039743e-06, "loss": 0.6786, "step": 3218 }, { "epoch": 0.56, "grad_norm": 0.7982091910398583, "learning_rate": 8.407891859534535e-06, "loss": 0.7365, "step": 3219 }, { "epoch": 0.56, "grad_norm": 0.7244698132795396, "learning_rate": 8.402284594676593e-06, "loss": 0.6909, "step": 3220 }, { "epoch": 0.56, "grad_norm": 0.6701861542507341, "learning_rate": 8.39667784527494e-06, "loss": 0.7136, "step": 3221 }, { "epoch": 0.56, "grad_norm": 0.7252036128662172, "learning_rate": 8.391071613138422e-06, "loss": 0.7106, "step": 3222 }, { "epoch": 0.57, "grad_norm": 0.7386453283141501, "learning_rate": 8.385465900075728e-06, "loss": 0.6946, "step": 3223 }, { "epoch": 0.57, "grad_norm": 0.6886908939249878, "learning_rate": 8.379860707895387e-06, "loss": 0.703, "step": 3224 }, { "epoch": 0.57, "grad_norm": 0.7649945953104451, "learning_rate": 8.37425603840574e-06, "loss": 0.7276, "step": 3225 }, { "epoch": 0.57, "grad_norm": 0.7116746144496365, "learning_rate": 8.368651893414975e-06, "loss": 0.7535, "step": 3226 }, { "epoch": 0.57, "grad_norm": 0.854501698911276, "learning_rate": 8.363048274731105e-06, "loss": 0.7543, "step": 3227 }, { "epoch": 0.57, "grad_norm": 0.6426234667808957, "learning_rate": 8.357445184161975e-06, "loss": 0.6931, "step": 3228 }, { "epoch": 0.57, "grad_norm": 0.665234921795987, "learning_rate": 8.351842623515259e-06, "loss": 0.695, "step": 3229 }, { "epoch": 0.57, "grad_norm": 0.6859181412862501, "learning_rate": 8.346240594598458e-06, "loss": 0.7101, "step": 3230 }, { "epoch": 0.57, "grad_norm": 0.7470088112606235, "learning_rate": 8.34063909921891e-06, "loss": 0.7394, "step": 3231 }, { "epoch": 0.57, "grad_norm": 0.7606225752701712, "learning_rate": 8.335038139183764e-06, "loss": 0.7837, "step": 3232 }, { "epoch": 0.57, "grad_norm": 0.6995240795182203, "learning_rate": 8.32943771630001e-06, "loss": 0.71, "step": 3233 }, { "epoch": 0.57, "grad_norm": 0.7046118866663207, "learning_rate": 8.323837832374462e-06, "loss": 0.7682, "step": 3234 }, { "epoch": 0.57, "grad_norm": 0.6972559200893806, "learning_rate": 8.318238489213762e-06, "loss": 0.7045, "step": 3235 }, { "epoch": 0.57, "grad_norm": 0.7065025198008461, "learning_rate": 8.312639688624372e-06, "loss": 0.7317, "step": 3236 }, { "epoch": 0.57, "grad_norm": 0.6326796555140619, "learning_rate": 8.307041432412579e-06, "loss": 0.6998, "step": 3237 }, { "epoch": 0.57, "grad_norm": 0.8137968279756399, "learning_rate": 8.301443722384503e-06, "loss": 0.75, "step": 3238 }, { "epoch": 0.57, "grad_norm": 0.6750382992963789, "learning_rate": 8.295846560346076e-06, "loss": 0.7167, "step": 3239 }, { "epoch": 0.57, "grad_norm": 0.7242324798721772, "learning_rate": 8.290249948103068e-06, "loss": 0.6696, "step": 3240 }, { "epoch": 0.57, "grad_norm": 0.6769756339221216, "learning_rate": 8.284653887461054e-06, "loss": 0.7104, "step": 3241 }, { "epoch": 0.57, "grad_norm": 0.712109780890941, "learning_rate": 8.279058380225441e-06, "loss": 0.6665, "step": 3242 }, { "epoch": 0.57, "grad_norm": 0.8386218338903201, "learning_rate": 8.273463428201457e-06, "loss": 0.7785, "step": 3243 }, { "epoch": 0.57, "grad_norm": 0.6333475066763421, "learning_rate": 8.267869033194153e-06, "loss": 0.6859, "step": 3244 }, { "epoch": 0.57, "grad_norm": 0.7383035483170511, "learning_rate": 8.262275197008395e-06, "loss": 0.7468, "step": 3245 }, { "epoch": 0.57, "grad_norm": 0.6778215029780419, "learning_rate": 8.256681921448872e-06, "loss": 0.7083, "step": 3246 }, { "epoch": 0.57, "grad_norm": 0.6423032413521454, "learning_rate": 8.25108920832009e-06, "loss": 0.6724, "step": 3247 }, { "epoch": 0.57, "grad_norm": 0.6591642560187779, "learning_rate": 8.245497059426376e-06, "loss": 0.6673, "step": 3248 }, { "epoch": 0.57, "grad_norm": 0.67531704679156, "learning_rate": 8.239905476571874e-06, "loss": 0.7191, "step": 3249 }, { "epoch": 0.57, "grad_norm": 0.6581965748534369, "learning_rate": 8.234314461560547e-06, "loss": 0.6977, "step": 3250 }, { "epoch": 0.57, "grad_norm": 0.8219008834723858, "learning_rate": 8.228724016196166e-06, "loss": 0.7266, "step": 3251 }, { "epoch": 0.57, "grad_norm": 0.7411260686964741, "learning_rate": 8.22313414228233e-06, "loss": 0.6974, "step": 3252 }, { "epoch": 0.57, "grad_norm": 0.6627503422102986, "learning_rate": 8.217544841622449e-06, "loss": 0.6965, "step": 3253 }, { "epoch": 0.57, "grad_norm": 0.7028970063862019, "learning_rate": 8.211956116019748e-06, "loss": 0.7148, "step": 3254 }, { "epoch": 0.57, "grad_norm": 0.7153949401166667, "learning_rate": 8.206367967277266e-06, "loss": 0.6694, "step": 3255 }, { "epoch": 0.57, "grad_norm": 0.706697840088249, "learning_rate": 8.200780397197854e-06, "loss": 0.7037, "step": 3256 }, { "epoch": 0.57, "grad_norm": 0.716452330353835, "learning_rate": 8.19519340758418e-06, "loss": 0.7223, "step": 3257 }, { "epoch": 0.57, "grad_norm": 0.6506163205815864, "learning_rate": 8.189607000238729e-06, "loss": 0.7007, "step": 3258 }, { "epoch": 0.57, "grad_norm": 0.638011552869826, "learning_rate": 8.184021176963784e-06, "loss": 0.7003, "step": 3259 }, { "epoch": 0.57, "grad_norm": 0.7181626753851706, "learning_rate": 8.178435939561451e-06, "loss": 0.6851, "step": 3260 }, { "epoch": 0.57, "grad_norm": 0.6904738601370856, "learning_rate": 8.172851289833641e-06, "loss": 0.727, "step": 3261 }, { "epoch": 0.57, "grad_norm": 0.7226995277920272, "learning_rate": 8.167267229582087e-06, "loss": 0.694, "step": 3262 }, { "epoch": 0.57, "grad_norm": 0.6785700836396097, "learning_rate": 8.161683760608314e-06, "loss": 0.6905, "step": 3263 }, { "epoch": 0.57, "grad_norm": 0.8118906661510069, "learning_rate": 8.156100884713672e-06, "loss": 0.6862, "step": 3264 }, { "epoch": 0.57, "grad_norm": 0.7688769381075508, "learning_rate": 8.150518603699306e-06, "loss": 0.7097, "step": 3265 }, { "epoch": 0.57, "grad_norm": 0.710139471442234, "learning_rate": 8.144936919366184e-06, "loss": 0.6963, "step": 3266 }, { "epoch": 0.57, "grad_norm": 0.6832665591366984, "learning_rate": 8.139355833515073e-06, "loss": 0.6765, "step": 3267 }, { "epoch": 0.57, "grad_norm": 0.7805647291810696, "learning_rate": 8.13377534794654e-06, "loss": 0.7682, "step": 3268 }, { "epoch": 0.57, "grad_norm": 0.7471771512132674, "learning_rate": 8.128195464460971e-06, "loss": 0.7578, "step": 3269 }, { "epoch": 0.57, "grad_norm": 0.6526251760786028, "learning_rate": 8.122616184858555e-06, "loss": 0.6927, "step": 3270 }, { "epoch": 0.57, "grad_norm": 0.6534825532105185, "learning_rate": 8.117037510939278e-06, "loss": 0.6628, "step": 3271 }, { "epoch": 0.57, "grad_norm": 0.6798668302365671, "learning_rate": 8.111459444502941e-06, "loss": 0.7003, "step": 3272 }, { "epoch": 0.57, "grad_norm": 0.8165252785006508, "learning_rate": 8.105881987349143e-06, "loss": 0.7278, "step": 3273 }, { "epoch": 0.57, "grad_norm": 0.7859173363388562, "learning_rate": 8.100305141277287e-06, "loss": 0.7417, "step": 3274 }, { "epoch": 0.57, "grad_norm": 0.7767234892147641, "learning_rate": 8.094728908086585e-06, "loss": 0.699, "step": 3275 }, { "epoch": 0.57, "grad_norm": 0.7131002083834873, "learning_rate": 8.08915328957604e-06, "loss": 0.7416, "step": 3276 }, { "epoch": 0.57, "grad_norm": 0.8947532190435318, "learning_rate": 8.083578287544465e-06, "loss": 0.7801, "step": 3277 }, { "epoch": 0.57, "grad_norm": 0.7264026718755021, "learning_rate": 8.07800390379047e-06, "loss": 0.6754, "step": 3278 }, { "epoch": 0.57, "grad_norm": 0.6932012410565431, "learning_rate": 8.07243014011247e-06, "loss": 0.7425, "step": 3279 }, { "epoch": 0.58, "grad_norm": 0.6609515728072222, "learning_rate": 8.066856998308673e-06, "loss": 0.6884, "step": 3280 }, { "epoch": 0.58, "grad_norm": 0.6372734761520102, "learning_rate": 8.061284480177096e-06, "loss": 0.7047, "step": 3281 }, { "epoch": 0.58, "grad_norm": 0.6912714503157056, "learning_rate": 8.055712587515548e-06, "loss": 0.73, "step": 3282 }, { "epoch": 0.58, "grad_norm": 0.6613742735467674, "learning_rate": 8.050141322121636e-06, "loss": 0.6591, "step": 3283 }, { "epoch": 0.58, "grad_norm": 0.7021032693080863, "learning_rate": 8.044570685792767e-06, "loss": 0.6995, "step": 3284 }, { "epoch": 0.58, "grad_norm": 0.669031570908645, "learning_rate": 8.039000680326149e-06, "loss": 0.6953, "step": 3285 }, { "epoch": 0.58, "grad_norm": 0.5918794005077738, "learning_rate": 8.033431307518772e-06, "loss": 0.6535, "step": 3286 }, { "epoch": 0.58, "grad_norm": 0.7298258370301614, "learning_rate": 8.027862569167439e-06, "loss": 0.7212, "step": 3287 }, { "epoch": 0.58, "grad_norm": 0.6945243336943374, "learning_rate": 8.02229446706874e-06, "loss": 0.7223, "step": 3288 }, { "epoch": 0.58, "grad_norm": 0.7005384598562276, "learning_rate": 8.016727003019059e-06, "loss": 0.7422, "step": 3289 }, { "epoch": 0.58, "grad_norm": 0.702001787144154, "learning_rate": 8.011160178814577e-06, "loss": 0.7214, "step": 3290 }, { "epoch": 0.58, "grad_norm": 0.7291780119404446, "learning_rate": 8.005593996251268e-06, "loss": 0.6978, "step": 3291 }, { "epoch": 0.58, "grad_norm": 0.8094114093973986, "learning_rate": 8.000028457124896e-06, "loss": 0.7143, "step": 3292 }, { "epoch": 0.58, "grad_norm": 0.5587063076635681, "learning_rate": 7.994463563231026e-06, "loss": 0.6369, "step": 3293 }, { "epoch": 0.58, "grad_norm": 0.721213306977691, "learning_rate": 7.988899316365001e-06, "loss": 0.7004, "step": 3294 }, { "epoch": 0.58, "grad_norm": 0.7513443982241949, "learning_rate": 7.983335718321972e-06, "loss": 0.684, "step": 3295 }, { "epoch": 0.58, "grad_norm": 0.6577044059879045, "learning_rate": 7.97777277089686e-06, "loss": 0.6866, "step": 3296 }, { "epoch": 0.58, "grad_norm": 0.7219185278716862, "learning_rate": 7.972210475884398e-06, "loss": 0.6902, "step": 3297 }, { "epoch": 0.58, "grad_norm": 0.6663389131653885, "learning_rate": 7.966648835079092e-06, "loss": 0.6718, "step": 3298 }, { "epoch": 0.58, "grad_norm": 0.8321714590616128, "learning_rate": 7.961087850275247e-06, "loss": 0.7497, "step": 3299 }, { "epoch": 0.58, "grad_norm": 0.681983935148707, "learning_rate": 7.955527523266949e-06, "loss": 0.6999, "step": 3300 }, { "epoch": 0.58, "grad_norm": 0.7428477613637693, "learning_rate": 7.949967855848078e-06, "loss": 0.7113, "step": 3301 }, { "epoch": 0.58, "grad_norm": 0.6821382921342434, "learning_rate": 7.9444088498123e-06, "loss": 0.7243, "step": 3302 }, { "epoch": 0.58, "grad_norm": 0.6859043293758867, "learning_rate": 7.938850506953062e-06, "loss": 0.7221, "step": 3303 }, { "epoch": 0.58, "grad_norm": 0.6987563150394814, "learning_rate": 7.933292829063608e-06, "loss": 0.7176, "step": 3304 }, { "epoch": 0.58, "grad_norm": 0.6902952164144884, "learning_rate": 7.927735817936952e-06, "loss": 0.7042, "step": 3305 }, { "epoch": 0.58, "grad_norm": 0.6641164431266118, "learning_rate": 7.922179475365908e-06, "loss": 0.7104, "step": 3306 }, { "epoch": 0.58, "grad_norm": 0.6996531588770665, "learning_rate": 7.916623803143062e-06, "loss": 0.6747, "step": 3307 }, { "epoch": 0.58, "grad_norm": 0.6942244408290257, "learning_rate": 7.911068803060797e-06, "loss": 0.702, "step": 3308 }, { "epoch": 0.58, "grad_norm": 0.7340490353744952, "learning_rate": 7.905514476911266e-06, "loss": 0.7032, "step": 3309 }, { "epoch": 0.58, "grad_norm": 0.5541476195334465, "learning_rate": 7.899960826486413e-06, "loss": 0.6873, "step": 3310 }, { "epoch": 0.58, "grad_norm": 0.6685105309475947, "learning_rate": 7.894407853577962e-06, "loss": 0.6883, "step": 3311 }, { "epoch": 0.58, "grad_norm": 0.7049948260030878, "learning_rate": 7.888855559977414e-06, "loss": 0.6801, "step": 3312 }, { "epoch": 0.58, "grad_norm": 0.7279252534332358, "learning_rate": 7.883303947476063e-06, "loss": 0.7517, "step": 3313 }, { "epoch": 0.58, "grad_norm": 0.6533821567334013, "learning_rate": 7.877753017864966e-06, "loss": 0.7114, "step": 3314 }, { "epoch": 0.58, "grad_norm": 0.6257813895268955, "learning_rate": 7.87220277293497e-06, "loss": 0.6901, "step": 3315 }, { "epoch": 0.58, "grad_norm": 0.661024090949632, "learning_rate": 7.866653214476701e-06, "loss": 0.7072, "step": 3316 }, { "epoch": 0.58, "grad_norm": 0.7507961365059667, "learning_rate": 7.861104344280566e-06, "loss": 0.7167, "step": 3317 }, { "epoch": 0.58, "grad_norm": 0.694207823276907, "learning_rate": 7.85555616413674e-06, "loss": 0.7162, "step": 3318 }, { "epoch": 0.58, "grad_norm": 0.7132938776720568, "learning_rate": 7.850008675835184e-06, "loss": 0.7213, "step": 3319 }, { "epoch": 0.58, "grad_norm": 0.8227410232181064, "learning_rate": 7.844461881165637e-06, "loss": 0.7883, "step": 3320 }, { "epoch": 0.58, "grad_norm": 0.5919458951340093, "learning_rate": 7.838915781917603e-06, "loss": 0.6569, "step": 3321 }, { "epoch": 0.58, "grad_norm": 0.6587946999841867, "learning_rate": 7.833370379880379e-06, "loss": 0.6631, "step": 3322 }, { "epoch": 0.58, "grad_norm": 0.7340108162605958, "learning_rate": 7.827825676843018e-06, "loss": 0.7358, "step": 3323 }, { "epoch": 0.58, "grad_norm": 0.7483829183102781, "learning_rate": 7.82228167459436e-06, "loss": 0.7155, "step": 3324 }, { "epoch": 0.58, "grad_norm": 0.8171915631950954, "learning_rate": 7.816738374923015e-06, "loss": 0.7238, "step": 3325 }, { "epoch": 0.58, "grad_norm": 0.7337159754725803, "learning_rate": 7.811195779617372e-06, "loss": 0.7312, "step": 3326 }, { "epoch": 0.58, "grad_norm": 0.7131469611391608, "learning_rate": 7.805653890465578e-06, "loss": 0.7021, "step": 3327 }, { "epoch": 0.58, "grad_norm": 0.8105793556859844, "learning_rate": 7.800112709255572e-06, "loss": 0.7279, "step": 3328 }, { "epoch": 0.58, "grad_norm": 0.9702632612465497, "learning_rate": 7.794572237775049e-06, "loss": 0.69, "step": 3329 }, { "epoch": 0.58, "grad_norm": 0.6988697584646006, "learning_rate": 7.78903247781148e-06, "loss": 0.7092, "step": 3330 }, { "epoch": 0.58, "grad_norm": 0.7375374287626989, "learning_rate": 7.783493431152115e-06, "loss": 0.7566, "step": 3331 }, { "epoch": 0.58, "grad_norm": 0.8258694682278745, "learning_rate": 7.777955099583957e-06, "loss": 0.7422, "step": 3332 }, { "epoch": 0.58, "grad_norm": 0.7128986834064747, "learning_rate": 7.772417484893787e-06, "loss": 0.724, "step": 3333 }, { "epoch": 0.58, "grad_norm": 0.6388438614395524, "learning_rate": 7.76688058886816e-06, "loss": 0.6807, "step": 3334 }, { "epoch": 0.58, "grad_norm": 0.6519137775122914, "learning_rate": 7.76134441329339e-06, "loss": 0.6872, "step": 3335 }, { "epoch": 0.58, "grad_norm": 0.6872791931272423, "learning_rate": 7.755808959955564e-06, "loss": 0.7032, "step": 3336 }, { "epoch": 0.59, "grad_norm": 0.6629140345821386, "learning_rate": 7.750274230640539e-06, "loss": 0.717, "step": 3337 }, { "epoch": 0.59, "grad_norm": 0.7437772509698569, "learning_rate": 7.744740227133926e-06, "loss": 0.7592, "step": 3338 }, { "epoch": 0.59, "grad_norm": 0.659420945768417, "learning_rate": 7.739206951221115e-06, "loss": 0.698, "step": 3339 }, { "epoch": 0.59, "grad_norm": 0.722833026195615, "learning_rate": 7.733674404687259e-06, "loss": 0.7056, "step": 3340 }, { "epoch": 0.59, "grad_norm": 0.6506933189213029, "learning_rate": 7.728142589317267e-06, "loss": 0.6817, "step": 3341 }, { "epoch": 0.59, "grad_norm": 0.6537942339835989, "learning_rate": 7.722611506895817e-06, "loss": 0.6868, "step": 3342 }, { "epoch": 0.59, "grad_norm": 0.627068817025234, "learning_rate": 7.717081159207356e-06, "loss": 0.7035, "step": 3343 }, { "epoch": 0.59, "grad_norm": 0.6514564804810191, "learning_rate": 7.711551548036085e-06, "loss": 0.6664, "step": 3344 }, { "epoch": 0.59, "grad_norm": 0.6953246190767878, "learning_rate": 7.706022675165977e-06, "loss": 0.7297, "step": 3345 }, { "epoch": 0.59, "grad_norm": 0.7091531229632937, "learning_rate": 7.700494542380758e-06, "loss": 0.6906, "step": 3346 }, { "epoch": 0.59, "grad_norm": 0.5626592616501356, "learning_rate": 7.69496715146392e-06, "loss": 0.6504, "step": 3347 }, { "epoch": 0.59, "grad_norm": 0.6864871751702908, "learning_rate": 7.689440504198714e-06, "loss": 0.7024, "step": 3348 }, { "epoch": 0.59, "grad_norm": 0.7336480465994055, "learning_rate": 7.683914602368156e-06, "loss": 0.7352, "step": 3349 }, { "epoch": 0.59, "grad_norm": 0.6385242639158475, "learning_rate": 7.67838944775501e-06, "loss": 0.6935, "step": 3350 }, { "epoch": 0.59, "grad_norm": 0.7145116893617449, "learning_rate": 7.672865042141808e-06, "loss": 0.7507, "step": 3351 }, { "epoch": 0.59, "grad_norm": 0.708434708567746, "learning_rate": 7.667341387310842e-06, "loss": 0.7368, "step": 3352 }, { "epoch": 0.59, "grad_norm": 0.6839714487870097, "learning_rate": 7.661818485044152e-06, "loss": 0.6919, "step": 3353 }, { "epoch": 0.59, "grad_norm": 0.656366429833009, "learning_rate": 7.656296337123545e-06, "loss": 0.713, "step": 3354 }, { "epoch": 0.59, "grad_norm": 0.6988184065273012, "learning_rate": 7.650774945330584e-06, "loss": 0.6792, "step": 3355 }, { "epoch": 0.59, "grad_norm": 0.6882306557902506, "learning_rate": 7.64525431144658e-06, "loss": 0.7047, "step": 3356 }, { "epoch": 0.59, "grad_norm": 0.6687751221239941, "learning_rate": 7.639734437252609e-06, "loss": 0.7001, "step": 3357 }, { "epoch": 0.59, "grad_norm": 0.6604456037493168, "learning_rate": 7.634215324529491e-06, "loss": 0.6731, "step": 3358 }, { "epoch": 0.59, "grad_norm": 0.7252921168305162, "learning_rate": 7.628696975057817e-06, "loss": 0.6985, "step": 3359 }, { "epoch": 0.59, "grad_norm": 0.6574128599053511, "learning_rate": 7.62317939061791e-06, "loss": 0.6631, "step": 3360 }, { "epoch": 0.59, "grad_norm": 0.7044602257643034, "learning_rate": 7.617662572989866e-06, "loss": 0.6645, "step": 3361 }, { "epoch": 0.59, "grad_norm": 0.7123473702112443, "learning_rate": 7.612146523953518e-06, "loss": 0.7046, "step": 3362 }, { "epoch": 0.59, "grad_norm": 0.7297861921296619, "learning_rate": 7.606631245288465e-06, "loss": 0.7027, "step": 3363 }, { "epoch": 0.59, "grad_norm": 0.7207058694739282, "learning_rate": 7.60111673877405e-06, "loss": 0.6803, "step": 3364 }, { "epoch": 0.59, "grad_norm": 0.6812020787979006, "learning_rate": 7.595603006189365e-06, "loss": 0.735, "step": 3365 }, { "epoch": 0.59, "grad_norm": 0.7177237489750051, "learning_rate": 7.590090049313257e-06, "loss": 0.6932, "step": 3366 }, { "epoch": 0.59, "grad_norm": 0.6260119761089193, "learning_rate": 7.584577869924319e-06, "loss": 0.7041, "step": 3367 }, { "epoch": 0.59, "grad_norm": 0.7218069804025545, "learning_rate": 7.5790664698008995e-06, "loss": 0.7149, "step": 3368 }, { "epoch": 0.59, "grad_norm": 0.6656172207618545, "learning_rate": 7.573555850721084e-06, "loss": 0.7242, "step": 3369 }, { "epoch": 0.59, "grad_norm": 0.7197817385646037, "learning_rate": 7.5680460144627185e-06, "loss": 0.6826, "step": 3370 }, { "epoch": 0.59, "grad_norm": 0.6271452150248662, "learning_rate": 7.562536962803387e-06, "loss": 0.6974, "step": 3371 }, { "epoch": 0.59, "grad_norm": 0.6768455085170326, "learning_rate": 7.557028697520426e-06, "loss": 0.6819, "step": 3372 }, { "epoch": 0.59, "grad_norm": 0.7559405757869287, "learning_rate": 7.551521220390921e-06, "loss": 0.7708, "step": 3373 }, { "epoch": 0.59, "grad_norm": 0.7250401407469317, "learning_rate": 7.54601453319169e-06, "loss": 0.7042, "step": 3374 }, { "epoch": 0.59, "grad_norm": 0.8424563516565612, "learning_rate": 7.5405086376993154e-06, "loss": 0.7869, "step": 3375 }, { "epoch": 0.59, "grad_norm": 0.7012897375453875, "learning_rate": 7.5350035356901045e-06, "loss": 0.712, "step": 3376 }, { "epoch": 0.59, "grad_norm": 0.7005549718535653, "learning_rate": 7.529499228940126e-06, "loss": 0.7148, "step": 3377 }, { "epoch": 0.59, "grad_norm": 0.6792353065174003, "learning_rate": 7.523995719225177e-06, "loss": 0.6832, "step": 3378 }, { "epoch": 0.59, "grad_norm": 0.582308812504723, "learning_rate": 7.518493008320809e-06, "loss": 0.6495, "step": 3379 }, { "epoch": 0.59, "grad_norm": 0.6543828283227073, "learning_rate": 7.512991098002307e-06, "loss": 0.7059, "step": 3380 }, { "epoch": 0.59, "grad_norm": 0.6681931858056676, "learning_rate": 7.507489990044706e-06, "loss": 0.6978, "step": 3381 }, { "epoch": 0.59, "grad_norm": 0.6266207914045867, "learning_rate": 7.501989686222775e-06, "loss": 0.7002, "step": 3382 }, { "epoch": 0.59, "grad_norm": 0.6777909580225416, "learning_rate": 7.4964901883110285e-06, "loss": 0.7095, "step": 3383 }, { "epoch": 0.59, "grad_norm": 0.7741064940769946, "learning_rate": 7.490991498083721e-06, "loss": 0.7372, "step": 3384 }, { "epoch": 0.59, "grad_norm": 0.6603597730805774, "learning_rate": 7.485493617314839e-06, "loss": 0.6848, "step": 3385 }, { "epoch": 0.59, "grad_norm": 0.6543350809272596, "learning_rate": 7.479996547778123e-06, "loss": 0.7294, "step": 3386 }, { "epoch": 0.59, "grad_norm": 0.6842411497930239, "learning_rate": 7.474500291247031e-06, "loss": 0.7122, "step": 3387 }, { "epoch": 0.59, "grad_norm": 0.6940235695139518, "learning_rate": 7.469004849494777e-06, "loss": 0.643, "step": 3388 }, { "epoch": 0.59, "grad_norm": 0.6800366345729726, "learning_rate": 7.4635102242942995e-06, "loss": 0.6837, "step": 3389 }, { "epoch": 0.59, "grad_norm": 0.713981865366141, "learning_rate": 7.458016417418285e-06, "loss": 0.7028, "step": 3390 }, { "epoch": 0.59, "grad_norm": 0.8469028451781124, "learning_rate": 7.452523430639147e-06, "loss": 0.7265, "step": 3391 }, { "epoch": 0.59, "grad_norm": 1.1720780948189036, "learning_rate": 7.4470312657290364e-06, "loss": 0.7322, "step": 3392 }, { "epoch": 0.59, "grad_norm": 0.7326868908586083, "learning_rate": 7.441539924459846e-06, "loss": 0.7279, "step": 3393 }, { "epoch": 0.6, "grad_norm": 0.7433536273760764, "learning_rate": 7.436049408603188e-06, "loss": 0.7549, "step": 3394 }, { "epoch": 0.6, "grad_norm": 0.6908312087538637, "learning_rate": 7.430559719930428e-06, "loss": 0.7297, "step": 3395 }, { "epoch": 0.6, "grad_norm": 0.7676129462334456, "learning_rate": 7.425070860212644e-06, "loss": 0.7371, "step": 3396 }, { "epoch": 0.6, "grad_norm": 0.7782911853306689, "learning_rate": 7.41958283122066e-06, "loss": 0.6827, "step": 3397 }, { "epoch": 0.6, "grad_norm": 0.6230125611341245, "learning_rate": 7.41409563472503e-06, "loss": 0.6612, "step": 3398 }, { "epoch": 0.6, "grad_norm": 0.7234226512379147, "learning_rate": 7.4086092724960355e-06, "loss": 0.7608, "step": 3399 }, { "epoch": 0.6, "grad_norm": 0.7819668498446872, "learning_rate": 7.403123746303693e-06, "loss": 0.7657, "step": 3400 }, { "epoch": 0.6, "grad_norm": 0.6480008076164091, "learning_rate": 7.397639057917746e-06, "loss": 0.686, "step": 3401 }, { "epoch": 0.6, "grad_norm": 0.7124393128284625, "learning_rate": 7.392155209107673e-06, "loss": 0.7682, "step": 3402 }, { "epoch": 0.6, "grad_norm": 0.6356451989282638, "learning_rate": 7.386672201642673e-06, "loss": 0.6992, "step": 3403 }, { "epoch": 0.6, "grad_norm": 0.6608874445200069, "learning_rate": 7.381190037291686e-06, "loss": 0.6844, "step": 3404 }, { "epoch": 0.6, "grad_norm": 0.6692640166358949, "learning_rate": 7.375708717823364e-06, "loss": 0.7042, "step": 3405 }, { "epoch": 0.6, "grad_norm": 0.6225604534711455, "learning_rate": 7.370228245006097e-06, "loss": 0.6861, "step": 3406 }, { "epoch": 0.6, "grad_norm": 0.6614000595403748, "learning_rate": 7.364748620608e-06, "loss": 0.662, "step": 3407 }, { "epoch": 0.6, "grad_norm": 0.7363304345031013, "learning_rate": 7.359269846396918e-06, "loss": 0.7403, "step": 3408 }, { "epoch": 0.6, "grad_norm": 0.6775594963886935, "learning_rate": 7.353791924140412e-06, "loss": 0.6524, "step": 3409 }, { "epoch": 0.6, "grad_norm": 0.7131112344871606, "learning_rate": 7.348314855605779e-06, "loss": 0.7134, "step": 3410 }, { "epoch": 0.6, "grad_norm": 0.6815864693237091, "learning_rate": 7.342838642560031e-06, "loss": 0.7162, "step": 3411 }, { "epoch": 0.6, "grad_norm": 0.8149662141123207, "learning_rate": 7.33736328676991e-06, "loss": 0.781, "step": 3412 }, { "epoch": 0.6, "grad_norm": 0.8110361686811369, "learning_rate": 7.331888790001886e-06, "loss": 0.7289, "step": 3413 }, { "epoch": 0.6, "grad_norm": 0.7280603067038471, "learning_rate": 7.326415154022136e-06, "loss": 0.7627, "step": 3414 }, { "epoch": 0.6, "grad_norm": 0.5318153908514162, "learning_rate": 7.320942380596572e-06, "loss": 0.6661, "step": 3415 }, { "epoch": 0.6, "grad_norm": 0.6730791718914293, "learning_rate": 7.315470471490827e-06, "loss": 0.7085, "step": 3416 }, { "epoch": 0.6, "grad_norm": 0.674065622819871, "learning_rate": 7.309999428470251e-06, "loss": 0.6667, "step": 3417 }, { "epoch": 0.6, "grad_norm": 0.6749923365765782, "learning_rate": 7.304529253299917e-06, "loss": 0.6545, "step": 3418 }, { "epoch": 0.6, "grad_norm": 0.834864880371418, "learning_rate": 7.29905994774462e-06, "loss": 0.7722, "step": 3419 }, { "epoch": 0.6, "grad_norm": 0.6485698748818973, "learning_rate": 7.293591513568867e-06, "loss": 0.7094, "step": 3420 }, { "epoch": 0.6, "grad_norm": 0.7930193121304756, "learning_rate": 7.288123952536893e-06, "loss": 0.7549, "step": 3421 }, { "epoch": 0.6, "grad_norm": 0.7230173437158197, "learning_rate": 7.282657266412647e-06, "loss": 0.7702, "step": 3422 }, { "epoch": 0.6, "grad_norm": 0.7085729166119135, "learning_rate": 7.277191456959796e-06, "loss": 0.7601, "step": 3423 }, { "epoch": 0.6, "grad_norm": 0.6649396803073762, "learning_rate": 7.271726525941719e-06, "loss": 0.6998, "step": 3424 }, { "epoch": 0.6, "grad_norm": 0.6583346588078218, "learning_rate": 7.266262475121522e-06, "loss": 0.651, "step": 3425 }, { "epoch": 0.6, "grad_norm": 0.7700045933451152, "learning_rate": 7.2607993062620184e-06, "loss": 0.7357, "step": 3426 }, { "epoch": 0.6, "grad_norm": 0.7139802577742989, "learning_rate": 7.255337021125741e-06, "loss": 0.7163, "step": 3427 }, { "epoch": 0.6, "grad_norm": 0.6386615061442688, "learning_rate": 7.24987562147494e-06, "loss": 0.6746, "step": 3428 }, { "epoch": 0.6, "grad_norm": 0.6749649793120197, "learning_rate": 7.244415109071572e-06, "loss": 0.7201, "step": 3429 }, { "epoch": 0.6, "grad_norm": 0.6725156862260886, "learning_rate": 7.2389554856773145e-06, "loss": 0.6955, "step": 3430 }, { "epoch": 0.6, "grad_norm": 0.6008361984074418, "learning_rate": 7.233496753053559e-06, "loss": 0.6741, "step": 3431 }, { "epoch": 0.6, "grad_norm": 0.6433061289594628, "learning_rate": 7.228038912961403e-06, "loss": 0.7428, "step": 3432 }, { "epoch": 0.6, "grad_norm": 0.7349096398912348, "learning_rate": 7.222581967161658e-06, "loss": 0.7597, "step": 3433 }, { "epoch": 0.6, "grad_norm": 0.7146754276840791, "learning_rate": 7.217125917414851e-06, "loss": 0.6678, "step": 3434 }, { "epoch": 0.6, "grad_norm": 0.6714425866717303, "learning_rate": 7.211670765481214e-06, "loss": 0.6631, "step": 3435 }, { "epoch": 0.6, "grad_norm": 0.6573410564893297, "learning_rate": 7.206216513120696e-06, "loss": 0.6906, "step": 3436 }, { "epoch": 0.6, "grad_norm": 0.6898246405142024, "learning_rate": 7.200763162092953e-06, "loss": 0.7485, "step": 3437 }, { "epoch": 0.6, "grad_norm": 0.6798978065895742, "learning_rate": 7.195310714157347e-06, "loss": 0.705, "step": 3438 }, { "epoch": 0.6, "grad_norm": 0.7465640579243081, "learning_rate": 7.189859171072953e-06, "loss": 0.7326, "step": 3439 }, { "epoch": 0.6, "grad_norm": 0.7717183846501479, "learning_rate": 7.184408534598551e-06, "loss": 0.7821, "step": 3440 }, { "epoch": 0.6, "grad_norm": 0.6669994874604241, "learning_rate": 7.1789588064926344e-06, "loss": 0.6496, "step": 3441 }, { "epoch": 0.6, "grad_norm": 0.6441368203176757, "learning_rate": 7.173509988513392e-06, "loss": 0.6933, "step": 3442 }, { "epoch": 0.6, "grad_norm": 0.7189328619420947, "learning_rate": 7.168062082418731e-06, "loss": 0.7077, "step": 3443 }, { "epoch": 0.6, "grad_norm": 0.76887246653179, "learning_rate": 7.162615089966256e-06, "loss": 0.7195, "step": 3444 }, { "epoch": 0.6, "grad_norm": 0.7135916601294435, "learning_rate": 7.157169012913283e-06, "loss": 0.7543, "step": 3445 }, { "epoch": 0.6, "grad_norm": 0.6491724674253859, "learning_rate": 7.15172385301683e-06, "loss": 0.6822, "step": 3446 }, { "epoch": 0.6, "grad_norm": 0.6675125356374901, "learning_rate": 7.146279612033618e-06, "loss": 0.6607, "step": 3447 }, { "epoch": 0.6, "grad_norm": 0.6598811709978598, "learning_rate": 7.140836291720074e-06, "loss": 0.6951, "step": 3448 }, { "epoch": 0.6, "grad_norm": 0.6620673306372166, "learning_rate": 7.1353938938323235e-06, "loss": 0.6803, "step": 3449 }, { "epoch": 0.6, "grad_norm": 0.6307714780644131, "learning_rate": 7.129952420126203e-06, "loss": 0.6851, "step": 3450 }, { "epoch": 0.61, "grad_norm": 0.6860317610196989, "learning_rate": 7.124511872357238e-06, "loss": 0.7255, "step": 3451 }, { "epoch": 0.61, "grad_norm": 0.729479672794354, "learning_rate": 7.119072252280668e-06, "loss": 0.7318, "step": 3452 }, { "epoch": 0.61, "grad_norm": 0.642995735574329, "learning_rate": 7.113633561651425e-06, "loss": 0.7222, "step": 3453 }, { "epoch": 0.61, "grad_norm": 0.6735412071633637, "learning_rate": 7.108195802224145e-06, "loss": 0.6687, "step": 3454 }, { "epoch": 0.61, "grad_norm": 0.6773039678044892, "learning_rate": 7.102758975753163e-06, "loss": 0.6921, "step": 3455 }, { "epoch": 0.61, "grad_norm": 0.6451129807233945, "learning_rate": 7.097323083992512e-06, "loss": 0.7054, "step": 3456 }, { "epoch": 0.61, "grad_norm": 0.6896762789599247, "learning_rate": 7.091888128695925e-06, "loss": 0.7008, "step": 3457 }, { "epoch": 0.61, "grad_norm": 0.6729868913922347, "learning_rate": 7.08645411161683e-06, "loss": 0.6762, "step": 3458 }, { "epoch": 0.61, "grad_norm": 0.5718881372551453, "learning_rate": 7.081021034508358e-06, "loss": 0.6684, "step": 3459 }, { "epoch": 0.61, "grad_norm": 0.6350990948106706, "learning_rate": 7.0755888991233264e-06, "loss": 0.6949, "step": 3460 }, { "epoch": 0.61, "grad_norm": 0.688691291140142, "learning_rate": 7.07015770721426e-06, "loss": 0.6877, "step": 3461 }, { "epoch": 0.61, "grad_norm": 0.6123814596485202, "learning_rate": 7.064727460533373e-06, "loss": 0.6795, "step": 3462 }, { "epoch": 0.61, "grad_norm": 0.6630116945720456, "learning_rate": 7.059298160832577e-06, "loss": 0.7005, "step": 3463 }, { "epoch": 0.61, "grad_norm": 0.77245909562748, "learning_rate": 7.053869809863476e-06, "loss": 0.7244, "step": 3464 }, { "epoch": 0.61, "grad_norm": 0.6618831654170163, "learning_rate": 7.0484424093773686e-06, "loss": 0.6829, "step": 3465 }, { "epoch": 0.61, "grad_norm": 0.6957384902034333, "learning_rate": 7.04301596112525e-06, "loss": 0.7269, "step": 3466 }, { "epoch": 0.61, "grad_norm": 0.6745421450234722, "learning_rate": 7.037590466857804e-06, "loss": 0.7113, "step": 3467 }, { "epoch": 0.61, "grad_norm": 0.6553567323905962, "learning_rate": 7.032165928325411e-06, "loss": 0.6554, "step": 3468 }, { "epoch": 0.61, "grad_norm": 0.6295412163332061, "learning_rate": 7.026742347278133e-06, "loss": 0.7072, "step": 3469 }, { "epoch": 0.61, "grad_norm": 0.6472255812468555, "learning_rate": 7.021319725465737e-06, "loss": 0.6978, "step": 3470 }, { "epoch": 0.61, "grad_norm": 0.6688172068315736, "learning_rate": 7.01589806463767e-06, "loss": 0.6994, "step": 3471 }, { "epoch": 0.61, "grad_norm": 0.8100666019682701, "learning_rate": 7.0104773665430755e-06, "loss": 0.7941, "step": 3472 }, { "epoch": 0.61, "grad_norm": 0.6247421055028334, "learning_rate": 7.0050576329307805e-06, "loss": 0.6703, "step": 3473 }, { "epoch": 0.61, "grad_norm": 0.7139237456451079, "learning_rate": 6.999638865549306e-06, "loss": 0.7092, "step": 3474 }, { "epoch": 0.61, "grad_norm": 0.7402746260187192, "learning_rate": 6.99422106614686e-06, "loss": 0.7441, "step": 3475 }, { "epoch": 0.61, "grad_norm": 0.7445871425521307, "learning_rate": 6.9888042364713345e-06, "loss": 0.6918, "step": 3476 }, { "epoch": 0.61, "grad_norm": 0.6916181508720427, "learning_rate": 6.9833883782703195e-06, "loss": 0.6905, "step": 3477 }, { "epoch": 0.61, "grad_norm": 0.6737336705618632, "learning_rate": 6.977973493291075e-06, "loss": 0.7121, "step": 3478 }, { "epoch": 0.61, "grad_norm": 0.6891682225069716, "learning_rate": 6.972559583280556e-06, "loss": 0.7107, "step": 3479 }, { "epoch": 0.61, "grad_norm": 0.8149934351232544, "learning_rate": 6.967146649985406e-06, "loss": 0.7669, "step": 3480 }, { "epoch": 0.61, "grad_norm": 0.6988034121613637, "learning_rate": 6.961734695151952e-06, "loss": 0.6591, "step": 3481 }, { "epoch": 0.61, "grad_norm": 0.6289297543230069, "learning_rate": 6.956323720526199e-06, "loss": 0.6978, "step": 3482 }, { "epoch": 0.61, "grad_norm": 0.7296322138610525, "learning_rate": 6.950913727853841e-06, "loss": 0.7725, "step": 3483 }, { "epoch": 0.61, "grad_norm": 0.7911227638873154, "learning_rate": 6.945504718880258e-06, "loss": 0.7295, "step": 3484 }, { "epoch": 0.61, "grad_norm": 0.6891998913419852, "learning_rate": 6.940096695350504e-06, "loss": 0.705, "step": 3485 }, { "epoch": 0.61, "grad_norm": 0.6363486569909487, "learning_rate": 6.934689659009325e-06, "loss": 0.7317, "step": 3486 }, { "epoch": 0.61, "grad_norm": 0.6929111834317353, "learning_rate": 6.929283611601143e-06, "loss": 0.7134, "step": 3487 }, { "epoch": 0.61, "grad_norm": 0.7172606059796671, "learning_rate": 6.923878554870056e-06, "loss": 0.7323, "step": 3488 }, { "epoch": 0.61, "grad_norm": 0.7639331955752773, "learning_rate": 6.918474490559851e-06, "loss": 0.7302, "step": 3489 }, { "epoch": 0.61, "grad_norm": 0.7331160663460403, "learning_rate": 6.913071420413995e-06, "loss": 0.7132, "step": 3490 }, { "epoch": 0.61, "grad_norm": 0.6619072152598279, "learning_rate": 6.907669346175626e-06, "loss": 0.744, "step": 3491 }, { "epoch": 0.61, "grad_norm": 0.7251266868036972, "learning_rate": 6.902268269587571e-06, "loss": 0.7621, "step": 3492 }, { "epoch": 0.61, "grad_norm": 0.733650102561035, "learning_rate": 6.8968681923923234e-06, "loss": 0.7078, "step": 3493 }, { "epoch": 0.61, "grad_norm": 0.7113133055117664, "learning_rate": 6.891469116332066e-06, "loss": 0.7293, "step": 3494 }, { "epoch": 0.61, "grad_norm": 0.6063167171950443, "learning_rate": 6.886071043148652e-06, "loss": 0.6639, "step": 3495 }, { "epoch": 0.61, "grad_norm": 0.6605939168415446, "learning_rate": 6.880673974583614e-06, "loss": 0.6745, "step": 3496 }, { "epoch": 0.61, "grad_norm": 0.7109782510815349, "learning_rate": 6.875277912378153e-06, "loss": 0.7517, "step": 3497 }, { "epoch": 0.61, "grad_norm": 0.6671919145522212, "learning_rate": 6.869882858273155e-06, "loss": 0.7038, "step": 3498 }, { "epoch": 0.61, "grad_norm": 0.6645865544016955, "learning_rate": 6.864488814009178e-06, "loss": 0.7111, "step": 3499 }, { "epoch": 0.61, "grad_norm": 0.7425253228876507, "learning_rate": 6.859095781326451e-06, "loss": 0.7056, "step": 3500 }, { "epoch": 0.61, "grad_norm": 0.6056756059308238, "learning_rate": 6.8537037619648795e-06, "loss": 0.6914, "step": 3501 }, { "epoch": 0.61, "grad_norm": 0.675618246345062, "learning_rate": 6.84831275766404e-06, "loss": 0.7331, "step": 3502 }, { "epoch": 0.61, "grad_norm": 0.7364072831098396, "learning_rate": 6.842922770163184e-06, "loss": 0.7321, "step": 3503 }, { "epoch": 0.61, "grad_norm": 0.9529792643942718, "learning_rate": 6.837533801201236e-06, "loss": 0.7109, "step": 3504 }, { "epoch": 0.61, "grad_norm": 0.6606730556334082, "learning_rate": 6.832145852516789e-06, "loss": 0.7016, "step": 3505 }, { "epoch": 0.61, "grad_norm": 0.6603722791072972, "learning_rate": 6.826758925848102e-06, "loss": 0.7172, "step": 3506 }, { "epoch": 0.61, "grad_norm": 0.6143104945676422, "learning_rate": 6.821373022933118e-06, "loss": 0.6618, "step": 3507 }, { "epoch": 0.62, "grad_norm": 0.6141769696141088, "learning_rate": 6.815988145509434e-06, "loss": 0.6547, "step": 3508 }, { "epoch": 0.62, "grad_norm": 0.717636235121684, "learning_rate": 6.810604295314327e-06, "loss": 0.7331, "step": 3509 }, { "epoch": 0.62, "grad_norm": 0.755001096689816, "learning_rate": 6.80522147408474e-06, "loss": 0.6657, "step": 3510 }, { "epoch": 0.62, "grad_norm": 0.682176525565654, "learning_rate": 6.799839683557282e-06, "loss": 0.6478, "step": 3511 }, { "epoch": 0.62, "grad_norm": 0.754598975647094, "learning_rate": 6.794458925468231e-06, "loss": 0.7393, "step": 3512 }, { "epoch": 0.62, "grad_norm": 0.7434344087292234, "learning_rate": 6.7890792015535345e-06, "loss": 0.7142, "step": 3513 }, { "epoch": 0.62, "grad_norm": 0.7211972362788732, "learning_rate": 6.783700513548803e-06, "loss": 0.737, "step": 3514 }, { "epoch": 0.62, "grad_norm": 0.7232044350767919, "learning_rate": 6.778322863189308e-06, "loss": 0.7582, "step": 3515 }, { "epoch": 0.62, "grad_norm": 0.6983250175344458, "learning_rate": 6.772946252209995e-06, "loss": 0.6797, "step": 3516 }, { "epoch": 0.62, "grad_norm": 0.6757854429444868, "learning_rate": 6.767570682345471e-06, "loss": 0.693, "step": 3517 }, { "epoch": 0.62, "grad_norm": 0.6594170481058534, "learning_rate": 6.7621961553300065e-06, "loss": 0.7385, "step": 3518 }, { "epoch": 0.62, "grad_norm": 0.6777713162121038, "learning_rate": 6.756822672897538e-06, "loss": 0.7242, "step": 3519 }, { "epoch": 0.62, "grad_norm": 0.8011934757379765, "learning_rate": 6.751450236781658e-06, "loss": 0.744, "step": 3520 }, { "epoch": 0.62, "grad_norm": 0.6499848031179054, "learning_rate": 6.746078848715631e-06, "loss": 0.7285, "step": 3521 }, { "epoch": 0.62, "grad_norm": 0.751283458057844, "learning_rate": 6.740708510432377e-06, "loss": 0.7491, "step": 3522 }, { "epoch": 0.62, "grad_norm": 0.7209982237308328, "learning_rate": 6.735339223664481e-06, "loss": 0.7266, "step": 3523 }, { "epoch": 0.62, "grad_norm": 0.6412065937271425, "learning_rate": 6.729970990144182e-06, "loss": 0.6801, "step": 3524 }, { "epoch": 0.62, "grad_norm": 0.6648004950421383, "learning_rate": 6.724603811603389e-06, "loss": 0.6723, "step": 3525 }, { "epoch": 0.62, "grad_norm": 0.6585384349158951, "learning_rate": 6.719237689773661e-06, "loss": 0.7011, "step": 3526 }, { "epoch": 0.62, "grad_norm": 0.6930230610354272, "learning_rate": 6.713872626386224e-06, "loss": 0.6624, "step": 3527 }, { "epoch": 0.62, "grad_norm": 0.7559715673618962, "learning_rate": 6.708508623171961e-06, "loss": 0.763, "step": 3528 }, { "epoch": 0.62, "grad_norm": 0.8017104494936862, "learning_rate": 6.703145681861405e-06, "loss": 0.817, "step": 3529 }, { "epoch": 0.62, "grad_norm": 0.801840792301606, "learning_rate": 6.69778380418476e-06, "loss": 0.6995, "step": 3530 }, { "epoch": 0.62, "grad_norm": 0.6618526057113431, "learning_rate": 6.692422991871873e-06, "loss": 0.6759, "step": 3531 }, { "epoch": 0.62, "grad_norm": 0.7203153467587843, "learning_rate": 6.687063246652261e-06, "loss": 0.7452, "step": 3532 }, { "epoch": 0.62, "grad_norm": 0.7317443594495058, "learning_rate": 6.681704570255081e-06, "loss": 0.7041, "step": 3533 }, { "epoch": 0.62, "grad_norm": 0.7681996356870799, "learning_rate": 6.67634696440916e-06, "loss": 0.7177, "step": 3534 }, { "epoch": 0.62, "grad_norm": 0.6555069193869607, "learning_rate": 6.670990430842969e-06, "loss": 0.7302, "step": 3535 }, { "epoch": 0.62, "grad_norm": 0.7605645099011664, "learning_rate": 6.6656349712846404e-06, "loss": 0.7572, "step": 3536 }, { "epoch": 0.62, "grad_norm": 0.6444102465912194, "learning_rate": 6.660280587461956e-06, "loss": 0.7078, "step": 3537 }, { "epoch": 0.62, "grad_norm": 0.6699350885670773, "learning_rate": 6.654927281102351e-06, "loss": 0.6931, "step": 3538 }, { "epoch": 0.62, "grad_norm": 0.6418678657321025, "learning_rate": 6.649575053932916e-06, "loss": 0.7247, "step": 3539 }, { "epoch": 0.62, "grad_norm": 0.7122046140955346, "learning_rate": 6.644223907680387e-06, "loss": 0.7377, "step": 3540 }, { "epoch": 0.62, "grad_norm": 0.6906829297510397, "learning_rate": 6.638873844071163e-06, "loss": 0.7199, "step": 3541 }, { "epoch": 0.62, "grad_norm": 0.6643639987606521, "learning_rate": 6.633524864831275e-06, "loss": 0.7248, "step": 3542 }, { "epoch": 0.62, "grad_norm": 0.815356195843113, "learning_rate": 6.628176971686424e-06, "loss": 0.7392, "step": 3543 }, { "epoch": 0.62, "grad_norm": 0.7138066339445679, "learning_rate": 6.622830166361946e-06, "loss": 0.721, "step": 3544 }, { "epoch": 0.62, "grad_norm": 0.7359853054312335, "learning_rate": 6.617484450582837e-06, "loss": 0.7293, "step": 3545 }, { "epoch": 0.62, "grad_norm": 0.6700307367957531, "learning_rate": 6.612139826073732e-06, "loss": 0.7097, "step": 3546 }, { "epoch": 0.62, "grad_norm": 0.733028994006544, "learning_rate": 6.606796294558919e-06, "loss": 0.763, "step": 3547 }, { "epoch": 0.62, "grad_norm": 0.6802847499161749, "learning_rate": 6.6014538577623365e-06, "loss": 0.7032, "step": 3548 }, { "epoch": 0.62, "grad_norm": 0.6983702893665593, "learning_rate": 6.59611251740756e-06, "loss": 0.7163, "step": 3549 }, { "epoch": 0.62, "grad_norm": 0.6391064270785828, "learning_rate": 6.590772275217823e-06, "loss": 0.7553, "step": 3550 }, { "epoch": 0.62, "grad_norm": 0.6605545836099191, "learning_rate": 6.585433132915996e-06, "loss": 0.6967, "step": 3551 }, { "epoch": 0.62, "grad_norm": 0.7616218935053916, "learning_rate": 6.580095092224598e-06, "loss": 0.6967, "step": 3552 }, { "epoch": 0.62, "grad_norm": 0.7394205109878593, "learning_rate": 6.574758154865789e-06, "loss": 0.7344, "step": 3553 }, { "epoch": 0.62, "grad_norm": 0.6863800018816526, "learning_rate": 6.569422322561382e-06, "loss": 0.7097, "step": 3554 }, { "epoch": 0.62, "grad_norm": 0.6779214882172971, "learning_rate": 6.564087597032822e-06, "loss": 0.7137, "step": 3555 }, { "epoch": 0.62, "grad_norm": 0.6730996499287865, "learning_rate": 6.558753980001207e-06, "loss": 0.7188, "step": 3556 }, { "epoch": 0.62, "grad_norm": 0.7034262067497852, "learning_rate": 6.5534214731872714e-06, "loss": 0.7033, "step": 3557 }, { "epoch": 0.62, "grad_norm": 0.6738700075722501, "learning_rate": 6.548090078311393e-06, "loss": 0.6829, "step": 3558 }, { "epoch": 0.62, "grad_norm": 0.7926751945275361, "learning_rate": 6.542759797093592e-06, "loss": 0.7035, "step": 3559 }, { "epoch": 0.62, "grad_norm": 0.6830614287649324, "learning_rate": 6.537430631253528e-06, "loss": 0.7035, "step": 3560 }, { "epoch": 0.62, "grad_norm": 0.7595964375780617, "learning_rate": 6.532102582510497e-06, "loss": 0.7086, "step": 3561 }, { "epoch": 0.62, "grad_norm": 0.7583561148587151, "learning_rate": 6.526775652583443e-06, "loss": 0.677, "step": 3562 }, { "epoch": 0.62, "grad_norm": 0.669467024504758, "learning_rate": 6.521449843190944e-06, "loss": 0.6817, "step": 3563 }, { "epoch": 0.62, "grad_norm": 0.7199453761428952, "learning_rate": 6.516125156051215e-06, "loss": 0.7413, "step": 3564 }, { "epoch": 0.63, "grad_norm": 0.6710070894575191, "learning_rate": 6.510801592882112e-06, "loss": 0.7119, "step": 3565 }, { "epoch": 0.63, "grad_norm": 0.6958176206958299, "learning_rate": 6.50547915540113e-06, "loss": 0.7147, "step": 3566 }, { "epoch": 0.63, "grad_norm": 0.782483019983758, "learning_rate": 6.500157845325395e-06, "loss": 0.7208, "step": 3567 }, { "epoch": 0.63, "grad_norm": 0.7538377682010605, "learning_rate": 6.494837664371674e-06, "loss": 0.6951, "step": 3568 }, { "epoch": 0.63, "grad_norm": 0.6811678019487833, "learning_rate": 6.489518614256369e-06, "loss": 0.6766, "step": 3569 }, { "epoch": 0.63, "grad_norm": 0.7010759008612599, "learning_rate": 6.484200696695513e-06, "loss": 0.7243, "step": 3570 }, { "epoch": 0.63, "grad_norm": 0.6335661092072302, "learning_rate": 6.478883913404779e-06, "loss": 0.6516, "step": 3571 }, { "epoch": 0.63, "grad_norm": 0.6820210057239217, "learning_rate": 6.473568266099475e-06, "loss": 0.7054, "step": 3572 }, { "epoch": 0.63, "grad_norm": 0.7082192837564041, "learning_rate": 6.468253756494533e-06, "loss": 0.6978, "step": 3573 }, { "epoch": 0.63, "grad_norm": 0.6815656053979829, "learning_rate": 6.462940386304532e-06, "loss": 0.7055, "step": 3574 }, { "epoch": 0.63, "grad_norm": 0.6844307183115025, "learning_rate": 6.45762815724367e-06, "loss": 0.7053, "step": 3575 }, { "epoch": 0.63, "grad_norm": 0.6471864662307, "learning_rate": 6.452317071025787e-06, "loss": 0.6757, "step": 3576 }, { "epoch": 0.63, "grad_norm": 0.7935411612904745, "learning_rate": 6.447007129364349e-06, "loss": 0.7608, "step": 3577 }, { "epoch": 0.63, "grad_norm": 0.6161809462635678, "learning_rate": 6.441698333972455e-06, "loss": 0.6918, "step": 3578 }, { "epoch": 0.63, "grad_norm": 0.7166901722820817, "learning_rate": 6.436390686562826e-06, "loss": 0.734, "step": 3579 }, { "epoch": 0.63, "grad_norm": 0.7363548395688689, "learning_rate": 6.431084188847827e-06, "loss": 0.6951, "step": 3580 }, { "epoch": 0.63, "grad_norm": 0.6699949289179976, "learning_rate": 6.425778842539443e-06, "loss": 0.7101, "step": 3581 }, { "epoch": 0.63, "grad_norm": 0.7201646031864687, "learning_rate": 6.420474649349289e-06, "loss": 0.6923, "step": 3582 }, { "epoch": 0.63, "grad_norm": 0.8349460435248445, "learning_rate": 6.415171610988609e-06, "loss": 0.7006, "step": 3583 }, { "epoch": 0.63, "grad_norm": 0.6684526874780929, "learning_rate": 6.40986972916827e-06, "loss": 0.7095, "step": 3584 }, { "epoch": 0.63, "grad_norm": 0.6882584420529697, "learning_rate": 6.404569005598773e-06, "loss": 0.7479, "step": 3585 }, { "epoch": 0.63, "grad_norm": 0.7265728048851733, "learning_rate": 6.399269441990244e-06, "loss": 0.6603, "step": 3586 }, { "epoch": 0.63, "grad_norm": 0.7172050180888494, "learning_rate": 6.3939710400524314e-06, "loss": 0.6972, "step": 3587 }, { "epoch": 0.63, "grad_norm": 0.7533045586491786, "learning_rate": 6.388673801494703e-06, "loss": 0.7254, "step": 3588 }, { "epoch": 0.63, "grad_norm": 0.6231632107264126, "learning_rate": 6.383377728026068e-06, "loss": 0.6989, "step": 3589 }, { "epoch": 0.63, "grad_norm": 0.7307613540245075, "learning_rate": 6.378082821355145e-06, "loss": 0.6798, "step": 3590 }, { "epoch": 0.63, "grad_norm": 0.788967482542082, "learning_rate": 6.372789083190181e-06, "loss": 0.7546, "step": 3591 }, { "epoch": 0.63, "grad_norm": 0.747798982896809, "learning_rate": 6.3674965152390486e-06, "loss": 0.7587, "step": 3592 }, { "epoch": 0.63, "grad_norm": 0.6761378842313815, "learning_rate": 6.3622051192092395e-06, "loss": 0.7007, "step": 3593 }, { "epoch": 0.63, "grad_norm": 0.8221290711441427, "learning_rate": 6.356914896807866e-06, "loss": 0.703, "step": 3594 }, { "epoch": 0.63, "grad_norm": 0.7368701363479745, "learning_rate": 6.351625849741669e-06, "loss": 0.7394, "step": 3595 }, { "epoch": 0.63, "grad_norm": 0.6932362566982139, "learning_rate": 6.346337979717005e-06, "loss": 0.7067, "step": 3596 }, { "epoch": 0.63, "grad_norm": 0.6955867002967538, "learning_rate": 6.341051288439845e-06, "loss": 0.7153, "step": 3597 }, { "epoch": 0.63, "grad_norm": 0.6487895585604765, "learning_rate": 6.335765777615792e-06, "loss": 0.6934, "step": 3598 }, { "epoch": 0.63, "grad_norm": 0.7408844018176473, "learning_rate": 6.330481448950058e-06, "loss": 0.7353, "step": 3599 }, { "epoch": 0.63, "grad_norm": 0.6547500871349252, "learning_rate": 6.3251983041474776e-06, "loss": 0.6583, "step": 3600 }, { "epoch": 0.63, "grad_norm": 0.6830161107958923, "learning_rate": 6.319916344912506e-06, "loss": 0.7159, "step": 3601 }, { "epoch": 0.63, "grad_norm": 0.6946401526592234, "learning_rate": 6.314635572949213e-06, "loss": 0.6836, "step": 3602 }, { "epoch": 0.63, "grad_norm": 0.7332321426373223, "learning_rate": 6.3093559899612854e-06, "loss": 0.7222, "step": 3603 }, { "epoch": 0.63, "grad_norm": 0.831443181345126, "learning_rate": 6.304077597652023e-06, "loss": 0.7398, "step": 3604 }, { "epoch": 0.63, "grad_norm": 0.7071637196526387, "learning_rate": 6.298800397724354e-06, "loss": 0.7037, "step": 3605 }, { "epoch": 0.63, "grad_norm": 0.6179392527680335, "learning_rate": 6.293524391880803e-06, "loss": 0.6836, "step": 3606 }, { "epoch": 0.63, "grad_norm": 0.6688082260083551, "learning_rate": 6.288249581823525e-06, "loss": 0.6972, "step": 3607 }, { "epoch": 0.63, "grad_norm": 0.734797098451643, "learning_rate": 6.282975969254281e-06, "loss": 0.6693, "step": 3608 }, { "epoch": 0.63, "grad_norm": 0.6798371728505118, "learning_rate": 6.277703555874449e-06, "loss": 0.7074, "step": 3609 }, { "epoch": 0.63, "grad_norm": 0.6762035108406397, "learning_rate": 6.2724323433850235e-06, "loss": 0.6926, "step": 3610 }, { "epoch": 0.63, "grad_norm": 0.6304093435260272, "learning_rate": 6.2671623334866005e-06, "loss": 0.6794, "step": 3611 }, { "epoch": 0.63, "grad_norm": 0.6521620958868556, "learning_rate": 6.261893527879402e-06, "loss": 0.7137, "step": 3612 }, { "epoch": 0.63, "grad_norm": 0.7234441541778581, "learning_rate": 6.256625928263249e-06, "loss": 0.7742, "step": 3613 }, { "epoch": 0.63, "grad_norm": 0.608748060829844, "learning_rate": 6.251359536337581e-06, "loss": 0.6939, "step": 3614 }, { "epoch": 0.63, "grad_norm": 0.6575050191118014, "learning_rate": 6.246094353801451e-06, "loss": 0.6931, "step": 3615 }, { "epoch": 0.63, "grad_norm": 0.607188750737669, "learning_rate": 6.2408303823535066e-06, "loss": 0.6468, "step": 3616 }, { "epoch": 0.63, "grad_norm": 0.6238861722331178, "learning_rate": 6.23556762369202e-06, "loss": 0.7116, "step": 3617 }, { "epoch": 0.63, "grad_norm": 0.6345880195038742, "learning_rate": 6.230306079514867e-06, "loss": 0.6756, "step": 3618 }, { "epoch": 0.63, "grad_norm": 0.6616477488445691, "learning_rate": 6.2250457515195295e-06, "loss": 0.6564, "step": 3619 }, { "epoch": 0.63, "grad_norm": 0.6828742644606607, "learning_rate": 6.219786641403101e-06, "loss": 0.6865, "step": 3620 }, { "epoch": 0.63, "grad_norm": 0.8754429180847298, "learning_rate": 6.214528750862279e-06, "loss": 0.6886, "step": 3621 }, { "epoch": 0.64, "grad_norm": 0.6751132846652845, "learning_rate": 6.209272081593369e-06, "loss": 0.6752, "step": 3622 }, { "epoch": 0.64, "grad_norm": 0.6129856247988313, "learning_rate": 6.204016635292281e-06, "loss": 0.6899, "step": 3623 }, { "epoch": 0.64, "grad_norm": 0.6169981480877094, "learning_rate": 6.198762413654537e-06, "loss": 0.6862, "step": 3624 }, { "epoch": 0.64, "grad_norm": 0.7189990424626926, "learning_rate": 6.193509418375251e-06, "loss": 0.7459, "step": 3625 }, { "epoch": 0.64, "grad_norm": 0.6600642934328962, "learning_rate": 6.18825765114915e-06, "loss": 0.7281, "step": 3626 }, { "epoch": 0.64, "grad_norm": 0.6903109056828168, "learning_rate": 6.183007113670566e-06, "loss": 0.7075, "step": 3627 }, { "epoch": 0.64, "grad_norm": 0.66137599014953, "learning_rate": 6.17775780763343e-06, "loss": 0.693, "step": 3628 }, { "epoch": 0.64, "grad_norm": 0.7316790811731002, "learning_rate": 6.172509734731278e-06, "loss": 0.7718, "step": 3629 }, { "epoch": 0.64, "grad_norm": 0.6663119815930032, "learning_rate": 6.167262896657249e-06, "loss": 0.6773, "step": 3630 }, { "epoch": 0.64, "grad_norm": 0.7143908614273448, "learning_rate": 6.162017295104081e-06, "loss": 0.7297, "step": 3631 }, { "epoch": 0.64, "grad_norm": 0.5936342350699189, "learning_rate": 6.156772931764117e-06, "loss": 0.6934, "step": 3632 }, { "epoch": 0.64, "grad_norm": 0.7671129026013588, "learning_rate": 6.151529808329294e-06, "loss": 0.7427, "step": 3633 }, { "epoch": 0.64, "grad_norm": 0.6483775960059862, "learning_rate": 6.146287926491155e-06, "loss": 0.7158, "step": 3634 }, { "epoch": 0.64, "grad_norm": 1.1022217526299123, "learning_rate": 6.141047287940838e-06, "loss": 0.7438, "step": 3635 }, { "epoch": 0.64, "grad_norm": 0.7082099594531301, "learning_rate": 6.135807894369086e-06, "loss": 0.7072, "step": 3636 }, { "epoch": 0.64, "grad_norm": 0.6866927448096244, "learning_rate": 6.130569747466233e-06, "loss": 0.7231, "step": 3637 }, { "epoch": 0.64, "grad_norm": 0.629497920971104, "learning_rate": 6.125332848922215e-06, "loss": 0.6758, "step": 3638 }, { "epoch": 0.64, "grad_norm": 0.6742917750098905, "learning_rate": 6.120097200426569e-06, "loss": 0.7233, "step": 3639 }, { "epoch": 0.64, "grad_norm": 0.6257111987547964, "learning_rate": 6.114862803668418e-06, "loss": 0.7057, "step": 3640 }, { "epoch": 0.64, "grad_norm": 0.6711491350432747, "learning_rate": 6.109629660336493e-06, "loss": 0.6788, "step": 3641 }, { "epoch": 0.64, "grad_norm": 0.7484272776848792, "learning_rate": 6.104397772119113e-06, "loss": 0.7504, "step": 3642 }, { "epoch": 0.64, "grad_norm": 0.7453203899629145, "learning_rate": 6.099167140704191e-06, "loss": 0.7276, "step": 3643 }, { "epoch": 0.64, "grad_norm": 0.6921357524059765, "learning_rate": 6.093937767779239e-06, "loss": 0.696, "step": 3644 }, { "epoch": 0.64, "grad_norm": 0.6887924676863367, "learning_rate": 6.088709655031366e-06, "loss": 0.6928, "step": 3645 }, { "epoch": 0.64, "grad_norm": 0.7138459063862419, "learning_rate": 6.0834828041472645e-06, "loss": 0.7109, "step": 3646 }, { "epoch": 0.64, "grad_norm": 0.7138066087524346, "learning_rate": 6.078257216813228e-06, "loss": 0.7359, "step": 3647 }, { "epoch": 0.64, "grad_norm": 0.7379793100523082, "learning_rate": 6.0730328947151425e-06, "loss": 0.7162, "step": 3648 }, { "epoch": 0.64, "grad_norm": 0.6567753122127806, "learning_rate": 6.067809839538479e-06, "loss": 0.6979, "step": 3649 }, { "epoch": 0.64, "grad_norm": 0.6508231687926482, "learning_rate": 6.062588052968308e-06, "loss": 0.713, "step": 3650 }, { "epoch": 0.64, "grad_norm": 0.6895285236265193, "learning_rate": 6.057367536689285e-06, "loss": 0.6432, "step": 3651 }, { "epoch": 0.64, "grad_norm": 0.6940450493461439, "learning_rate": 6.0521482923856536e-06, "loss": 0.6883, "step": 3652 }, { "epoch": 0.64, "grad_norm": 0.6995572525643989, "learning_rate": 6.046930321741254e-06, "loss": 0.6978, "step": 3653 }, { "epoch": 0.64, "grad_norm": 0.7060590390385089, "learning_rate": 6.041713626439514e-06, "loss": 0.7212, "step": 3654 }, { "epoch": 0.64, "grad_norm": 0.737262502778645, "learning_rate": 6.036498208163443e-06, "loss": 0.733, "step": 3655 }, { "epoch": 0.64, "grad_norm": 0.6003787124169327, "learning_rate": 6.03128406859565e-06, "loss": 0.6998, "step": 3656 }, { "epoch": 0.64, "grad_norm": 0.7914754101282354, "learning_rate": 6.02607120941832e-06, "loss": 0.722, "step": 3657 }, { "epoch": 0.64, "grad_norm": 0.7110278251680815, "learning_rate": 6.020859632313231e-06, "loss": 0.7223, "step": 3658 }, { "epoch": 0.64, "grad_norm": 0.6654708169813119, "learning_rate": 6.01564933896175e-06, "loss": 0.6938, "step": 3659 }, { "epoch": 0.64, "grad_norm": 0.7529153925828365, "learning_rate": 6.010440331044826e-06, "loss": 0.7156, "step": 3660 }, { "epoch": 0.64, "grad_norm": 0.6933489300395358, "learning_rate": 6.005232610242986e-06, "loss": 0.7471, "step": 3661 }, { "epoch": 0.64, "grad_norm": 0.6589805060592346, "learning_rate": 6.000026178236355e-06, "loss": 0.7053, "step": 3662 }, { "epoch": 0.64, "grad_norm": 0.6594017977891309, "learning_rate": 5.9948210367046385e-06, "loss": 0.7076, "step": 3663 }, { "epoch": 0.64, "grad_norm": 0.6402535494537709, "learning_rate": 5.989617187327119e-06, "loss": 0.7089, "step": 3664 }, { "epoch": 0.64, "grad_norm": 0.6063886943967922, "learning_rate": 5.98441463178267e-06, "loss": 0.6812, "step": 3665 }, { "epoch": 0.64, "grad_norm": 0.7041337579115885, "learning_rate": 5.979213371749742e-06, "loss": 0.7446, "step": 3666 }, { "epoch": 0.64, "grad_norm": 0.6945316705119915, "learning_rate": 5.9740134089063695e-06, "loss": 0.7088, "step": 3667 }, { "epoch": 0.64, "grad_norm": 0.632452059238897, "learning_rate": 5.968814744930174e-06, "loss": 0.6905, "step": 3668 }, { "epoch": 0.64, "grad_norm": 0.6734824894538969, "learning_rate": 5.963617381498349e-06, "loss": 0.7066, "step": 3669 }, { "epoch": 0.64, "grad_norm": 0.7398572956839622, "learning_rate": 5.958421320287669e-06, "loss": 0.7342, "step": 3670 }, { "epoch": 0.64, "grad_norm": 0.6113889925716475, "learning_rate": 5.9532265629744975e-06, "loss": 0.6778, "step": 3671 }, { "epoch": 0.64, "grad_norm": 0.741130099081557, "learning_rate": 5.948033111234768e-06, "loss": 0.6766, "step": 3672 }, { "epoch": 0.64, "grad_norm": 0.6191410792872402, "learning_rate": 5.9428409667439955e-06, "loss": 0.6882, "step": 3673 }, { "epoch": 0.64, "grad_norm": 0.741807500178219, "learning_rate": 5.937650131177279e-06, "loss": 0.685, "step": 3674 }, { "epoch": 0.64, "grad_norm": 1.0067471299888575, "learning_rate": 5.932460606209285e-06, "loss": 0.7071, "step": 3675 }, { "epoch": 0.64, "grad_norm": 0.5842270107364662, "learning_rate": 5.927272393514263e-06, "loss": 0.6432, "step": 3676 }, { "epoch": 0.64, "grad_norm": 0.7973439561306773, "learning_rate": 5.922085494766044e-06, "loss": 0.771, "step": 3677 }, { "epoch": 0.64, "grad_norm": 1.1000865227529635, "learning_rate": 5.916899911638021e-06, "loss": 0.6409, "step": 3678 }, { "epoch": 0.65, "grad_norm": 0.6775350796058979, "learning_rate": 5.9117156458031825e-06, "loss": 0.6935, "step": 3679 }, { "epoch": 0.65, "grad_norm": 0.7052719389841254, "learning_rate": 5.90653269893407e-06, "loss": 0.696, "step": 3680 }, { "epoch": 0.65, "grad_norm": 0.6709065349005693, "learning_rate": 5.901351072702812e-06, "loss": 0.6718, "step": 3681 }, { "epoch": 0.65, "grad_norm": 0.7315048440236811, "learning_rate": 5.896170768781112e-06, "loss": 0.6854, "step": 3682 }, { "epoch": 0.65, "grad_norm": 0.7733715810644142, "learning_rate": 5.890991788840246e-06, "loss": 0.7542, "step": 3683 }, { "epoch": 0.65, "grad_norm": 0.7497013683690918, "learning_rate": 5.8858141345510545e-06, "loss": 0.7324, "step": 3684 }, { "epoch": 0.65, "grad_norm": 0.694658361204646, "learning_rate": 5.880637807583962e-06, "loss": 0.6842, "step": 3685 }, { "epoch": 0.65, "grad_norm": 0.699531956824605, "learning_rate": 5.875462809608957e-06, "loss": 0.6772, "step": 3686 }, { "epoch": 0.65, "grad_norm": 0.687821363204963, "learning_rate": 5.870289142295602e-06, "loss": 0.6963, "step": 3687 }, { "epoch": 0.65, "grad_norm": 0.6318667121921496, "learning_rate": 5.865116807313034e-06, "loss": 0.6971, "step": 3688 }, { "epoch": 0.65, "grad_norm": 0.6488914268508297, "learning_rate": 5.859945806329951e-06, "loss": 0.7057, "step": 3689 }, { "epoch": 0.65, "grad_norm": 0.620716010092699, "learning_rate": 5.854776141014624e-06, "loss": 0.7096, "step": 3690 }, { "epoch": 0.65, "grad_norm": 0.5931234747257947, "learning_rate": 5.849607813034899e-06, "loss": 0.6742, "step": 3691 }, { "epoch": 0.65, "grad_norm": 0.6519308064787281, "learning_rate": 5.844440824058186e-06, "loss": 0.6885, "step": 3692 }, { "epoch": 0.65, "grad_norm": 0.7751570920064099, "learning_rate": 5.83927517575146e-06, "loss": 0.7016, "step": 3693 }, { "epoch": 0.65, "grad_norm": 0.667787028386904, "learning_rate": 5.834110869781272e-06, "loss": 0.6757, "step": 3694 }, { "epoch": 0.65, "grad_norm": 0.6868300588547681, "learning_rate": 5.828947907813729e-06, "loss": 0.755, "step": 3695 }, { "epoch": 0.65, "grad_norm": 0.7380963955278015, "learning_rate": 5.823786291514515e-06, "loss": 0.7488, "step": 3696 }, { "epoch": 0.65, "grad_norm": 0.6831269241333745, "learning_rate": 5.818626022548874e-06, "loss": 0.6934, "step": 3697 }, { "epoch": 0.65, "grad_norm": 0.7182640039437924, "learning_rate": 5.813467102581613e-06, "loss": 0.6878, "step": 3698 }, { "epoch": 0.65, "grad_norm": 0.7164174999071558, "learning_rate": 5.808309533277114e-06, "loss": 0.7329, "step": 3699 }, { "epoch": 0.65, "grad_norm": 0.6364515481426959, "learning_rate": 5.803153316299308e-06, "loss": 0.7232, "step": 3700 }, { "epoch": 0.65, "grad_norm": 0.6789819550865234, "learning_rate": 5.797998453311702e-06, "loss": 0.7097, "step": 3701 }, { "epoch": 0.65, "grad_norm": 0.6818255908735336, "learning_rate": 5.7928449459773605e-06, "loss": 0.6861, "step": 3702 }, { "epoch": 0.65, "grad_norm": 0.641927516860865, "learning_rate": 5.787692795958915e-06, "loss": 0.6904, "step": 3703 }, { "epoch": 0.65, "grad_norm": 0.8093988616646525, "learning_rate": 5.782542004918558e-06, "loss": 0.6979, "step": 3704 }, { "epoch": 0.65, "grad_norm": 0.6743058987883921, "learning_rate": 5.7773925745180355e-06, "loss": 0.697, "step": 3705 }, { "epoch": 0.65, "grad_norm": 0.6621115970684641, "learning_rate": 5.77224450641867e-06, "loss": 0.6704, "step": 3706 }, { "epoch": 0.65, "grad_norm": 0.6477503814253919, "learning_rate": 5.767097802281326e-06, "loss": 0.7133, "step": 3707 }, { "epoch": 0.65, "grad_norm": 0.6691966347072137, "learning_rate": 5.761952463766443e-06, "loss": 0.6902, "step": 3708 }, { "epoch": 0.65, "grad_norm": 0.7123369233876122, "learning_rate": 5.7568084925340115e-06, "loss": 0.7859, "step": 3709 }, { "epoch": 0.65, "grad_norm": 0.6584900210372273, "learning_rate": 5.751665890243587e-06, "loss": 0.7047, "step": 3710 }, { "epoch": 0.65, "grad_norm": 0.6455423863876283, "learning_rate": 5.7465246585542825e-06, "loss": 0.6844, "step": 3711 }, { "epoch": 0.65, "grad_norm": 0.7126555101231647, "learning_rate": 5.741384799124758e-06, "loss": 0.6949, "step": 3712 }, { "epoch": 0.65, "grad_norm": 0.7027563511864601, "learning_rate": 5.736246313613246e-06, "loss": 0.7445, "step": 3713 }, { "epoch": 0.65, "grad_norm": 0.6806326641535373, "learning_rate": 5.731109203677527e-06, "loss": 0.7011, "step": 3714 }, { "epoch": 0.65, "grad_norm": 0.6566685785137848, "learning_rate": 5.725973470974944e-06, "loss": 0.6925, "step": 3715 }, { "epoch": 0.65, "grad_norm": 0.6527520708156976, "learning_rate": 5.720839117162382e-06, "loss": 0.7025, "step": 3716 }, { "epoch": 0.65, "grad_norm": 0.7233925751710032, "learning_rate": 5.7157061438963e-06, "loss": 0.7407, "step": 3717 }, { "epoch": 0.65, "grad_norm": 0.7471932164376863, "learning_rate": 5.7105745528326935e-06, "loss": 0.7296, "step": 3718 }, { "epoch": 0.65, "grad_norm": 0.7302943089219985, "learning_rate": 5.705444345627127e-06, "loss": 0.6978, "step": 3719 }, { "epoch": 0.65, "grad_norm": 0.6648619023428303, "learning_rate": 5.700315523934709e-06, "loss": 0.6702, "step": 3720 }, { "epoch": 0.65, "grad_norm": 0.8410601111758593, "learning_rate": 5.695188089410107e-06, "loss": 0.8182, "step": 3721 }, { "epoch": 0.65, "grad_norm": 0.6421404082363404, "learning_rate": 5.690062043707538e-06, "loss": 0.6685, "step": 3722 }, { "epoch": 0.65, "grad_norm": 0.661929218493477, "learning_rate": 5.684937388480769e-06, "loss": 0.7111, "step": 3723 }, { "epoch": 0.65, "grad_norm": 0.6543251042184385, "learning_rate": 5.679814125383125e-06, "loss": 0.6831, "step": 3724 }, { "epoch": 0.65, "grad_norm": 0.7755641723412785, "learning_rate": 5.674692256067469e-06, "loss": 0.7089, "step": 3725 }, { "epoch": 0.65, "grad_norm": 0.705404338333374, "learning_rate": 5.66957178218623e-06, "loss": 0.6956, "step": 3726 }, { "epoch": 0.65, "grad_norm": 0.6742145969194477, "learning_rate": 5.664452705391375e-06, "loss": 0.7099, "step": 3727 }, { "epoch": 0.65, "grad_norm": 0.728275550805505, "learning_rate": 5.659335027334433e-06, "loss": 0.7588, "step": 3728 }, { "epoch": 0.65, "grad_norm": 0.8022581680370245, "learning_rate": 5.654218749666463e-06, "loss": 0.7243, "step": 3729 }, { "epoch": 0.65, "grad_norm": 0.7523058046403082, "learning_rate": 5.649103874038089e-06, "loss": 0.7648, "step": 3730 }, { "epoch": 0.65, "grad_norm": 0.7203754545851695, "learning_rate": 5.643990402099473e-06, "loss": 0.7009, "step": 3731 }, { "epoch": 0.65, "grad_norm": 0.6791343482877812, "learning_rate": 5.638878335500331e-06, "loss": 0.6986, "step": 3732 }, { "epoch": 0.65, "grad_norm": 0.6060408872049132, "learning_rate": 5.633767675889925e-06, "loss": 0.6821, "step": 3733 }, { "epoch": 0.65, "grad_norm": 0.67943236002702, "learning_rate": 5.628658424917057e-06, "loss": 0.649, "step": 3734 }, { "epoch": 0.65, "grad_norm": 0.6547715278804377, "learning_rate": 5.623550584230072e-06, "loss": 0.7202, "step": 3735 }, { "epoch": 0.66, "grad_norm": 0.708164074948917, "learning_rate": 5.618444155476872e-06, "loss": 0.724, "step": 3736 }, { "epoch": 0.66, "grad_norm": 0.6837584649586619, "learning_rate": 5.613339140304894e-06, "loss": 0.6837, "step": 3737 }, { "epoch": 0.66, "grad_norm": 0.6741510700830021, "learning_rate": 5.608235540361126e-06, "loss": 0.6838, "step": 3738 }, { "epoch": 0.66, "grad_norm": 0.6222341375132858, "learning_rate": 5.603133357292096e-06, "loss": 0.6798, "step": 3739 }, { "epoch": 0.66, "grad_norm": 0.7274487188348453, "learning_rate": 5.598032592743875e-06, "loss": 0.6816, "step": 3740 }, { "epoch": 0.66, "grad_norm": 0.7301058573908898, "learning_rate": 5.59293324836207e-06, "loss": 0.6746, "step": 3741 }, { "epoch": 0.66, "grad_norm": 0.7210013846924617, "learning_rate": 5.58783532579184e-06, "loss": 0.7302, "step": 3742 }, { "epoch": 0.66, "grad_norm": 0.6737671424979346, "learning_rate": 5.582738826677885e-06, "loss": 0.7242, "step": 3743 }, { "epoch": 0.66, "grad_norm": 0.6965375237274973, "learning_rate": 5.577643752664432e-06, "loss": 0.7059, "step": 3744 }, { "epoch": 0.66, "grad_norm": 0.6602938551039658, "learning_rate": 5.572550105395265e-06, "loss": 0.7008, "step": 3745 }, { "epoch": 0.66, "grad_norm": 0.8345682417561775, "learning_rate": 5.567457886513702e-06, "loss": 0.6922, "step": 3746 }, { "epoch": 0.66, "grad_norm": 0.6938532024747854, "learning_rate": 5.562367097662593e-06, "loss": 0.6972, "step": 3747 }, { "epoch": 0.66, "grad_norm": 0.7532647460707428, "learning_rate": 5.5572777404843346e-06, "loss": 0.6974, "step": 3748 }, { "epoch": 0.66, "grad_norm": 0.669690493356793, "learning_rate": 5.552189816620862e-06, "loss": 0.6444, "step": 3749 }, { "epoch": 0.66, "grad_norm": 0.6693500235865453, "learning_rate": 5.547103327713644e-06, "loss": 0.6643, "step": 3750 }, { "epoch": 0.66, "grad_norm": 0.7115888953258173, "learning_rate": 5.542018275403691e-06, "loss": 0.7182, "step": 3751 }, { "epoch": 0.66, "grad_norm": 0.7755743047879254, "learning_rate": 5.536934661331545e-06, "loss": 0.7678, "step": 3752 }, { "epoch": 0.66, "grad_norm": 0.7397469724041066, "learning_rate": 5.53185248713728e-06, "loss": 0.7204, "step": 3753 }, { "epoch": 0.66, "grad_norm": 0.6699288242515097, "learning_rate": 5.526771754460516e-06, "loss": 0.6893, "step": 3754 }, { "epoch": 0.66, "grad_norm": 0.5839284259075035, "learning_rate": 5.5216924649404046e-06, "loss": 0.6729, "step": 3755 }, { "epoch": 0.66, "grad_norm": 0.7370992763213352, "learning_rate": 5.516614620215628e-06, "loss": 0.7367, "step": 3756 }, { "epoch": 0.66, "grad_norm": 0.6681379174256832, "learning_rate": 5.5115382219244095e-06, "loss": 0.7054, "step": 3757 }, { "epoch": 0.66, "grad_norm": 0.6634008888504233, "learning_rate": 5.506463271704494e-06, "loss": 0.6853, "step": 3758 }, { "epoch": 0.66, "grad_norm": 0.7367793729251505, "learning_rate": 5.501389771193168e-06, "loss": 0.7087, "step": 3759 }, { "epoch": 0.66, "grad_norm": 0.6684249713566321, "learning_rate": 5.496317722027252e-06, "loss": 0.6904, "step": 3760 }, { "epoch": 0.66, "grad_norm": 0.6614854244692017, "learning_rate": 5.4912471258430966e-06, "loss": 0.6929, "step": 3761 }, { "epoch": 0.66, "grad_norm": 0.6813478616923928, "learning_rate": 5.486177984276573e-06, "loss": 0.6997, "step": 3762 }, { "epoch": 0.66, "grad_norm": 0.7603371061847558, "learning_rate": 5.4811102989630974e-06, "loss": 0.7584, "step": 3763 }, { "epoch": 0.66, "grad_norm": 0.6896333514007714, "learning_rate": 5.4760440715376136e-06, "loss": 0.6754, "step": 3764 }, { "epoch": 0.66, "grad_norm": 0.640382256081795, "learning_rate": 5.470979303634586e-06, "loss": 0.6852, "step": 3765 }, { "epoch": 0.66, "grad_norm": 0.7092592558806176, "learning_rate": 5.465915996888016e-06, "loss": 0.7371, "step": 3766 }, { "epoch": 0.66, "grad_norm": 0.6402397668420263, "learning_rate": 5.460854152931433e-06, "loss": 0.7203, "step": 3767 }, { "epoch": 0.66, "grad_norm": 0.7428702539444467, "learning_rate": 5.4557937733978926e-06, "loss": 0.7146, "step": 3768 }, { "epoch": 0.66, "grad_norm": 0.643979130413918, "learning_rate": 5.450734859919984e-06, "loss": 0.6187, "step": 3769 }, { "epoch": 0.66, "grad_norm": 0.7585789761973535, "learning_rate": 5.445677414129813e-06, "loss": 0.7635, "step": 3770 }, { "epoch": 0.66, "grad_norm": 0.7680762661699424, "learning_rate": 5.440621437659015e-06, "loss": 0.8015, "step": 3771 }, { "epoch": 0.66, "grad_norm": 0.747766405761006, "learning_rate": 5.435566932138754e-06, "loss": 0.6796, "step": 3772 }, { "epoch": 0.66, "grad_norm": 0.7051394532522703, "learning_rate": 5.430513899199721e-06, "loss": 0.7146, "step": 3773 }, { "epoch": 0.66, "grad_norm": 0.6949089847313543, "learning_rate": 5.4254623404721296e-06, "loss": 0.7344, "step": 3774 }, { "epoch": 0.66, "grad_norm": 0.6948505282090368, "learning_rate": 5.420412257585721e-06, "loss": 0.6821, "step": 3775 }, { "epoch": 0.66, "grad_norm": 0.7564222522799866, "learning_rate": 5.415363652169749e-06, "loss": 0.7476, "step": 3776 }, { "epoch": 0.66, "grad_norm": 0.7295250739501412, "learning_rate": 5.410316525853006e-06, "loss": 0.7199, "step": 3777 }, { "epoch": 0.66, "grad_norm": 0.7058426625538013, "learning_rate": 5.4052708802637965e-06, "loss": 0.6944, "step": 3778 }, { "epoch": 0.66, "grad_norm": 0.6802501352914707, "learning_rate": 5.400226717029957e-06, "loss": 0.7412, "step": 3779 }, { "epoch": 0.66, "grad_norm": 0.74595609461052, "learning_rate": 5.395184037778831e-06, "loss": 0.7002, "step": 3780 }, { "epoch": 0.66, "grad_norm": 0.6574353738669626, "learning_rate": 5.390142844137298e-06, "loss": 0.6845, "step": 3781 }, { "epoch": 0.66, "grad_norm": 0.7517762770301801, "learning_rate": 5.3851031377317485e-06, "loss": 0.7191, "step": 3782 }, { "epoch": 0.66, "grad_norm": 0.7167133352762778, "learning_rate": 5.380064920188097e-06, "loss": 0.7207, "step": 3783 }, { "epoch": 0.66, "grad_norm": 0.6665493790211523, "learning_rate": 5.375028193131779e-06, "loss": 0.6865, "step": 3784 }, { "epoch": 0.66, "grad_norm": 0.6636683951753829, "learning_rate": 5.369992958187748e-06, "loss": 0.7221, "step": 3785 }, { "epoch": 0.66, "grad_norm": 0.6083332953428028, "learning_rate": 5.364959216980478e-06, "loss": 0.7035, "step": 3786 }, { "epoch": 0.66, "grad_norm": 0.704324506285884, "learning_rate": 5.359926971133953e-06, "loss": 0.6801, "step": 3787 }, { "epoch": 0.66, "grad_norm": 0.6224286457191655, "learning_rate": 5.354896222271686e-06, "loss": 0.7015, "step": 3788 }, { "epoch": 0.66, "grad_norm": 0.6551434760176825, "learning_rate": 5.349866972016695e-06, "loss": 0.6884, "step": 3789 }, { "epoch": 0.66, "grad_norm": 0.6984569202003542, "learning_rate": 5.344839221991526e-06, "loss": 0.695, "step": 3790 }, { "epoch": 0.66, "grad_norm": 0.6914434358495585, "learning_rate": 5.339812973818233e-06, "loss": 0.6857, "step": 3791 }, { "epoch": 0.66, "grad_norm": 0.7028335553597863, "learning_rate": 5.33478822911839e-06, "loss": 0.7051, "step": 3792 }, { "epoch": 0.67, "grad_norm": 0.7326798230567361, "learning_rate": 5.3297649895130895e-06, "loss": 0.7021, "step": 3793 }, { "epoch": 0.67, "grad_norm": 0.6770921019724756, "learning_rate": 5.324743256622924e-06, "loss": 0.709, "step": 3794 }, { "epoch": 0.67, "grad_norm": 0.8490460324915713, "learning_rate": 5.319723032068014e-06, "loss": 0.7038, "step": 3795 }, { "epoch": 0.67, "grad_norm": 0.7253733612956895, "learning_rate": 5.314704317467987e-06, "loss": 0.7348, "step": 3796 }, { "epoch": 0.67, "grad_norm": 0.7781015454414068, "learning_rate": 5.309687114441988e-06, "loss": 0.7078, "step": 3797 }, { "epoch": 0.67, "grad_norm": 0.7604188371261382, "learning_rate": 5.304671424608668e-06, "loss": 0.6927, "step": 3798 }, { "epoch": 0.67, "grad_norm": 0.6925294520494099, "learning_rate": 5.2996572495861964e-06, "loss": 0.7245, "step": 3799 }, { "epoch": 0.67, "grad_norm": 0.668849326710018, "learning_rate": 5.294644590992244e-06, "loss": 0.6829, "step": 3800 }, { "epoch": 0.67, "grad_norm": 0.7854950769501681, "learning_rate": 5.289633450444005e-06, "loss": 0.7987, "step": 3801 }, { "epoch": 0.67, "grad_norm": 0.6452512361262347, "learning_rate": 5.2846238295581745e-06, "loss": 0.6825, "step": 3802 }, { "epoch": 0.67, "grad_norm": 0.6688480910420856, "learning_rate": 5.279615729950962e-06, "loss": 0.7087, "step": 3803 }, { "epoch": 0.67, "grad_norm": 0.6890494117562236, "learning_rate": 5.274609153238088e-06, "loss": 0.6891, "step": 3804 }, { "epoch": 0.67, "grad_norm": 0.6646685093384058, "learning_rate": 5.269604101034773e-06, "loss": 0.6974, "step": 3805 }, { "epoch": 0.67, "grad_norm": 0.6694631825568886, "learning_rate": 5.264600574955754e-06, "loss": 0.6627, "step": 3806 }, { "epoch": 0.67, "grad_norm": 0.6514231752706654, "learning_rate": 5.259598576615275e-06, "loss": 0.645, "step": 3807 }, { "epoch": 0.67, "grad_norm": 0.6694048366002475, "learning_rate": 5.254598107627078e-06, "loss": 0.6865, "step": 3808 }, { "epoch": 0.67, "grad_norm": 0.6708132674980001, "learning_rate": 5.249599169604423e-06, "loss": 0.6999, "step": 3809 }, { "epoch": 0.67, "grad_norm": 0.6042120229003117, "learning_rate": 5.2446017641600754e-06, "loss": 0.683, "step": 3810 }, { "epoch": 0.67, "grad_norm": 0.6164426586954925, "learning_rate": 5.239605892906294e-06, "loss": 0.672, "step": 3811 }, { "epoch": 0.67, "grad_norm": 0.6156381915418546, "learning_rate": 5.234611557454855e-06, "loss": 0.6647, "step": 3812 }, { "epoch": 0.67, "grad_norm": 0.7193681062487589, "learning_rate": 5.229618759417036e-06, "loss": 0.7196, "step": 3813 }, { "epoch": 0.67, "grad_norm": 0.6718725189307097, "learning_rate": 5.2246275004036164e-06, "loss": 0.7093, "step": 3814 }, { "epoch": 0.67, "grad_norm": 0.6948923857943885, "learning_rate": 5.219637782024884e-06, "loss": 0.7298, "step": 3815 }, { "epoch": 0.67, "grad_norm": 0.6687246630759397, "learning_rate": 5.214649605890625e-06, "loss": 0.6615, "step": 3816 }, { "epoch": 0.67, "grad_norm": 0.6104581628952634, "learning_rate": 5.209662973610121e-06, "loss": 0.697, "step": 3817 }, { "epoch": 0.67, "grad_norm": 0.6108915142527969, "learning_rate": 5.20467788679217e-06, "loss": 0.6869, "step": 3818 }, { "epoch": 0.67, "grad_norm": 0.7504762559683734, "learning_rate": 5.199694347045063e-06, "loss": 0.6941, "step": 3819 }, { "epoch": 0.67, "grad_norm": 0.7064520286747901, "learning_rate": 5.194712355976596e-06, "loss": 0.6919, "step": 3820 }, { "epoch": 0.67, "grad_norm": 0.6777608824840348, "learning_rate": 5.189731915194063e-06, "loss": 0.7041, "step": 3821 }, { "epoch": 0.67, "grad_norm": 0.7167435467179116, "learning_rate": 5.184753026304261e-06, "loss": 0.7299, "step": 3822 }, { "epoch": 0.67, "grad_norm": 0.6349898325831766, "learning_rate": 5.179775690913475e-06, "loss": 0.6881, "step": 3823 }, { "epoch": 0.67, "grad_norm": 0.6716625436872551, "learning_rate": 5.1747999106275005e-06, "loss": 0.6828, "step": 3824 }, { "epoch": 0.67, "grad_norm": 0.69521079684104, "learning_rate": 5.169825687051634e-06, "loss": 0.7098, "step": 3825 }, { "epoch": 0.67, "grad_norm": 0.6590262866444403, "learning_rate": 5.1648530217906545e-06, "loss": 0.6963, "step": 3826 }, { "epoch": 0.67, "grad_norm": 0.8104638440228294, "learning_rate": 5.159881916448851e-06, "loss": 0.762, "step": 3827 }, { "epoch": 0.67, "grad_norm": 0.7298600878178718, "learning_rate": 5.154912372630011e-06, "loss": 0.6478, "step": 3828 }, { "epoch": 0.67, "grad_norm": 0.6637284403452532, "learning_rate": 5.149944391937404e-06, "loss": 0.7008, "step": 3829 }, { "epoch": 0.67, "grad_norm": 0.7882743389908855, "learning_rate": 5.1449779759738085e-06, "loss": 0.7662, "step": 3830 }, { "epoch": 0.67, "grad_norm": 0.6434169300159732, "learning_rate": 5.140013126341492e-06, "loss": 0.6519, "step": 3831 }, { "epoch": 0.67, "grad_norm": 0.6417461916658261, "learning_rate": 5.135049844642221e-06, "loss": 0.6664, "step": 3832 }, { "epoch": 0.67, "grad_norm": 0.7674431315440703, "learning_rate": 5.130088132477258e-06, "loss": 0.7345, "step": 3833 }, { "epoch": 0.67, "grad_norm": 0.7162543286470655, "learning_rate": 5.125127991447348e-06, "loss": 0.744, "step": 3834 }, { "epoch": 0.67, "grad_norm": 0.7620752051987778, "learning_rate": 5.120169423152732e-06, "loss": 0.7667, "step": 3835 }, { "epoch": 0.67, "grad_norm": 0.6082529818119427, "learning_rate": 5.115212429193155e-06, "loss": 0.688, "step": 3836 }, { "epoch": 0.67, "grad_norm": 0.7739604581172084, "learning_rate": 5.110257011167843e-06, "loss": 0.7441, "step": 3837 }, { "epoch": 0.67, "grad_norm": 0.768242325020029, "learning_rate": 5.1053031706755184e-06, "loss": 0.7556, "step": 3838 }, { "epoch": 0.67, "grad_norm": 0.7356790349449117, "learning_rate": 5.1003509093143975e-06, "loss": 0.6876, "step": 3839 }, { "epoch": 0.67, "grad_norm": 0.6921361170478114, "learning_rate": 5.095400228682175e-06, "loss": 0.7232, "step": 3840 }, { "epoch": 0.67, "grad_norm": 0.6016952165733309, "learning_rate": 5.090451130376047e-06, "loss": 0.6578, "step": 3841 }, { "epoch": 0.67, "grad_norm": 0.5939114865889934, "learning_rate": 5.085503615992697e-06, "loss": 0.6917, "step": 3842 }, { "epoch": 0.67, "grad_norm": 0.8701947214391322, "learning_rate": 5.080557687128299e-06, "loss": 0.8256, "step": 3843 }, { "epoch": 0.67, "grad_norm": 0.7045726432348327, "learning_rate": 5.075613345378505e-06, "loss": 0.7261, "step": 3844 }, { "epoch": 0.67, "grad_norm": 0.674543894829507, "learning_rate": 5.0706705923384715e-06, "loss": 0.7179, "step": 3845 }, { "epoch": 0.67, "grad_norm": 0.9706718617995038, "learning_rate": 5.0657294296028244e-06, "loss": 0.6952, "step": 3846 }, { "epoch": 0.67, "grad_norm": 0.736243042262045, "learning_rate": 5.060789858765691e-06, "loss": 0.7167, "step": 3847 }, { "epoch": 0.67, "grad_norm": 0.7430431492984368, "learning_rate": 5.055851881420679e-06, "loss": 0.7288, "step": 3848 }, { "epoch": 0.67, "grad_norm": 0.6603799311233272, "learning_rate": 5.050915499160882e-06, "loss": 0.741, "step": 3849 }, { "epoch": 0.68, "grad_norm": 0.7843688113107631, "learning_rate": 5.045980713578882e-06, "loss": 0.7179, "step": 3850 }, { "epoch": 0.68, "grad_norm": 0.6963642129677979, "learning_rate": 5.041047526266747e-06, "loss": 0.6715, "step": 3851 }, { "epoch": 0.68, "grad_norm": 0.7180726886787616, "learning_rate": 5.03611593881602e-06, "loss": 0.7089, "step": 3852 }, { "epoch": 0.68, "grad_norm": 0.6695917716173048, "learning_rate": 5.031185952817732e-06, "loss": 0.7172, "step": 3853 }, { "epoch": 0.68, "grad_norm": 0.8447764460742302, "learning_rate": 5.0262575698624026e-06, "loss": 0.6782, "step": 3854 }, { "epoch": 0.68, "grad_norm": 0.7566683882549605, "learning_rate": 5.02133079154003e-06, "loss": 0.8001, "step": 3855 }, { "epoch": 0.68, "grad_norm": 0.6853996565554452, "learning_rate": 5.016405619440096e-06, "loss": 0.6763, "step": 3856 }, { "epoch": 0.68, "grad_norm": 0.7772617977867423, "learning_rate": 5.011482055151571e-06, "loss": 0.6975, "step": 3857 }, { "epoch": 0.68, "grad_norm": 0.6410543890495052, "learning_rate": 5.006560100262886e-06, "loss": 0.6699, "step": 3858 }, { "epoch": 0.68, "grad_norm": 0.6335982136050051, "learning_rate": 5.001639756361973e-06, "loss": 0.7087, "step": 3859 }, { "epoch": 0.68, "grad_norm": 0.7114060930853807, "learning_rate": 4.996721025036239e-06, "loss": 0.6888, "step": 3860 }, { "epoch": 0.68, "grad_norm": 0.6840949706894206, "learning_rate": 4.9918039078725685e-06, "loss": 0.7318, "step": 3861 }, { "epoch": 0.68, "grad_norm": 0.6261156380397004, "learning_rate": 4.986888406457323e-06, "loss": 0.6923, "step": 3862 }, { "epoch": 0.68, "grad_norm": 0.7257860484233973, "learning_rate": 4.981974522376352e-06, "loss": 0.7744, "step": 3863 }, { "epoch": 0.68, "grad_norm": 0.6733520041417648, "learning_rate": 4.977062257214968e-06, "loss": 0.669, "step": 3864 }, { "epoch": 0.68, "grad_norm": 0.6480139853948359, "learning_rate": 4.9721516125579735e-06, "loss": 0.6981, "step": 3865 }, { "epoch": 0.68, "grad_norm": 0.6939224944777761, "learning_rate": 4.967242589989648e-06, "loss": 0.7523, "step": 3866 }, { "epoch": 0.68, "grad_norm": 0.7119404032088039, "learning_rate": 4.962335191093741e-06, "loss": 0.6658, "step": 3867 }, { "epoch": 0.68, "grad_norm": 0.6801775365300111, "learning_rate": 4.9574294174534875e-06, "loss": 0.7009, "step": 3868 }, { "epoch": 0.68, "grad_norm": 0.7206293803736405, "learning_rate": 4.952525270651584e-06, "loss": 0.6927, "step": 3869 }, { "epoch": 0.68, "grad_norm": 0.7141212941146021, "learning_rate": 4.947622752270219e-06, "loss": 0.7342, "step": 3870 }, { "epoch": 0.68, "grad_norm": 0.735018266942994, "learning_rate": 4.9427218638910375e-06, "loss": 0.7276, "step": 3871 }, { "epoch": 0.68, "grad_norm": 0.7310767155875324, "learning_rate": 4.937822607095174e-06, "loss": 0.691, "step": 3872 }, { "epoch": 0.68, "grad_norm": 0.7632903030581712, "learning_rate": 4.93292498346323e-06, "loss": 0.7335, "step": 3873 }, { "epoch": 0.68, "grad_norm": 0.6007912544467231, "learning_rate": 4.92802899457528e-06, "loss": 0.6634, "step": 3874 }, { "epoch": 0.68, "grad_norm": 0.6955819958547845, "learning_rate": 4.923134642010878e-06, "loss": 0.6734, "step": 3875 }, { "epoch": 0.68, "grad_norm": 0.66878894379211, "learning_rate": 4.918241927349034e-06, "loss": 0.7068, "step": 3876 }, { "epoch": 0.68, "grad_norm": 0.6623088632117868, "learning_rate": 4.9133508521682464e-06, "loss": 0.7192, "step": 3877 }, { "epoch": 0.68, "grad_norm": 0.7105584456995008, "learning_rate": 4.908461418046475e-06, "loss": 0.6756, "step": 3878 }, { "epoch": 0.68, "grad_norm": 0.6753608938155389, "learning_rate": 4.903573626561156e-06, "loss": 0.7097, "step": 3879 }, { "epoch": 0.68, "grad_norm": 0.7451636552678931, "learning_rate": 4.898687479289195e-06, "loss": 0.6554, "step": 3880 }, { "epoch": 0.68, "grad_norm": 0.7753326965457921, "learning_rate": 4.89380297780696e-06, "loss": 0.7052, "step": 3881 }, { "epoch": 0.68, "grad_norm": 0.6435540677314671, "learning_rate": 4.888920123690291e-06, "loss": 0.663, "step": 3882 }, { "epoch": 0.68, "grad_norm": 0.6673020999174788, "learning_rate": 4.884038918514503e-06, "loss": 0.7143, "step": 3883 }, { "epoch": 0.68, "grad_norm": 0.7285281771550658, "learning_rate": 4.879159363854372e-06, "loss": 0.6953, "step": 3884 }, { "epoch": 0.68, "grad_norm": 0.694173608957493, "learning_rate": 4.874281461284146e-06, "loss": 0.7099, "step": 3885 }, { "epoch": 0.68, "grad_norm": 0.6950531377946103, "learning_rate": 4.8694052123775406e-06, "loss": 0.7019, "step": 3886 }, { "epoch": 0.68, "grad_norm": 0.7526954187445473, "learning_rate": 4.864530618707728e-06, "loss": 0.7104, "step": 3887 }, { "epoch": 0.68, "grad_norm": 0.5805633859847171, "learning_rate": 4.8596576818473586e-06, "loss": 0.6676, "step": 3888 }, { "epoch": 0.68, "grad_norm": 0.6013091649711352, "learning_rate": 4.854786403368545e-06, "loss": 0.6399, "step": 3889 }, { "epoch": 0.68, "grad_norm": 0.660962667044918, "learning_rate": 4.849916784842859e-06, "loss": 0.7191, "step": 3890 }, { "epoch": 0.68, "grad_norm": 0.6656048433533132, "learning_rate": 4.845048827841341e-06, "loss": 0.6844, "step": 3891 }, { "epoch": 0.68, "grad_norm": 0.8651831098950522, "learning_rate": 4.840182533934501e-06, "loss": 0.771, "step": 3892 }, { "epoch": 0.68, "grad_norm": 0.6932638465408888, "learning_rate": 4.835317904692299e-06, "loss": 0.7299, "step": 3893 }, { "epoch": 0.68, "grad_norm": 0.632700899304217, "learning_rate": 4.83045494168417e-06, "loss": 0.6838, "step": 3894 }, { "epoch": 0.68, "grad_norm": 0.8210602352660376, "learning_rate": 4.825593646479007e-06, "loss": 0.7825, "step": 3895 }, { "epoch": 0.68, "grad_norm": 0.6227599242935085, "learning_rate": 4.820734020645166e-06, "loss": 0.6811, "step": 3896 }, { "epoch": 0.68, "grad_norm": 0.6938873828681384, "learning_rate": 4.815876065750464e-06, "loss": 0.6699, "step": 3897 }, { "epoch": 0.68, "grad_norm": 0.694306498440378, "learning_rate": 4.81101978336218e-06, "loss": 0.7025, "step": 3898 }, { "epoch": 0.68, "grad_norm": 0.6868360962607555, "learning_rate": 4.806165175047043e-06, "loss": 0.6633, "step": 3899 }, { "epoch": 0.68, "grad_norm": 0.7162194389800982, "learning_rate": 4.8013122423712565e-06, "loss": 0.7634, "step": 3900 }, { "epoch": 0.68, "grad_norm": 0.6736335736670063, "learning_rate": 4.796460986900479e-06, "loss": 0.6791, "step": 3901 }, { "epoch": 0.68, "grad_norm": 0.7141194729654623, "learning_rate": 4.791611410199826e-06, "loss": 0.7263, "step": 3902 }, { "epoch": 0.68, "grad_norm": 0.7899384576745612, "learning_rate": 4.786763513833871e-06, "loss": 0.7034, "step": 3903 }, { "epoch": 0.68, "grad_norm": 0.6138629626129972, "learning_rate": 4.781917299366651e-06, "loss": 0.671, "step": 3904 }, { "epoch": 0.68, "grad_norm": 0.6725164664482314, "learning_rate": 4.7770727683616465e-06, "loss": 0.6772, "step": 3905 }, { "epoch": 0.68, "grad_norm": 0.6469233213991414, "learning_rate": 4.772229922381812e-06, "loss": 0.6898, "step": 3906 }, { "epoch": 0.69, "grad_norm": 0.7631567578271852, "learning_rate": 4.76738876298955e-06, "loss": 0.7556, "step": 3907 }, { "epoch": 0.69, "grad_norm": 0.6770158280140242, "learning_rate": 4.7625492917467145e-06, "loss": 0.7652, "step": 3908 }, { "epoch": 0.69, "grad_norm": 0.6608854088930747, "learning_rate": 4.757711510214621e-06, "loss": 0.6935, "step": 3909 }, { "epoch": 0.69, "grad_norm": 0.6833246379555615, "learning_rate": 4.752875419954046e-06, "loss": 0.683, "step": 3910 }, { "epoch": 0.69, "grad_norm": 0.6755786593392891, "learning_rate": 4.748041022525203e-06, "loss": 0.6916, "step": 3911 }, { "epoch": 0.69, "grad_norm": 0.623776793187692, "learning_rate": 4.743208319487773e-06, "loss": 0.6627, "step": 3912 }, { "epoch": 0.69, "grad_norm": 0.6986150476505548, "learning_rate": 4.738377312400887e-06, "loss": 0.6772, "step": 3913 }, { "epoch": 0.69, "grad_norm": 0.8117048394174071, "learning_rate": 4.73354800282313e-06, "loss": 0.8325, "step": 3914 }, { "epoch": 0.69, "grad_norm": 0.6494509787405786, "learning_rate": 4.72872039231254e-06, "loss": 0.6669, "step": 3915 }, { "epoch": 0.69, "grad_norm": 0.7721548115882467, "learning_rate": 4.723894482426602e-06, "loss": 0.7827, "step": 3916 }, { "epoch": 0.69, "grad_norm": 0.6374713861613419, "learning_rate": 4.71907027472225e-06, "loss": 0.6628, "step": 3917 }, { "epoch": 0.69, "grad_norm": 0.7008788347200157, "learning_rate": 4.7142477707558795e-06, "loss": 0.6929, "step": 3918 }, { "epoch": 0.69, "grad_norm": 0.719262725077465, "learning_rate": 4.7094269720833304e-06, "loss": 0.6753, "step": 3919 }, { "epoch": 0.69, "grad_norm": 0.65161845217328, "learning_rate": 4.704607880259891e-06, "loss": 0.6716, "step": 3920 }, { "epoch": 0.69, "grad_norm": 0.680782571418947, "learning_rate": 4.699790496840307e-06, "loss": 0.6746, "step": 3921 }, { "epoch": 0.69, "grad_norm": 1.147455454416788, "learning_rate": 4.694974823378757e-06, "loss": 0.7293, "step": 3922 }, { "epoch": 0.69, "grad_norm": 0.8417507434733394, "learning_rate": 4.690160861428883e-06, "loss": 0.7457, "step": 3923 }, { "epoch": 0.69, "grad_norm": 0.6628827157377503, "learning_rate": 4.6853486125437685e-06, "loss": 0.7017, "step": 3924 }, { "epoch": 0.69, "grad_norm": 0.7084645457836741, "learning_rate": 4.680538078275949e-06, "loss": 0.679, "step": 3925 }, { "epoch": 0.69, "grad_norm": 0.7284298986960926, "learning_rate": 4.675729260177397e-06, "loss": 0.7281, "step": 3926 }, { "epoch": 0.69, "grad_norm": 0.7683212862065008, "learning_rate": 4.670922159799543e-06, "loss": 0.7449, "step": 3927 }, { "epoch": 0.69, "grad_norm": 0.8935893856233198, "learning_rate": 4.666116778693251e-06, "loss": 0.6877, "step": 3928 }, { "epoch": 0.69, "grad_norm": 0.6155208638645904, "learning_rate": 4.661313118408841e-06, "loss": 0.6721, "step": 3929 }, { "epoch": 0.69, "grad_norm": 0.7172702890530858, "learning_rate": 4.656511180496075e-06, "loss": 0.7187, "step": 3930 }, { "epoch": 0.69, "grad_norm": 0.6954912046735725, "learning_rate": 4.651710966504156e-06, "loss": 0.6793, "step": 3931 }, { "epoch": 0.69, "grad_norm": 0.6993206253618558, "learning_rate": 4.646912477981735e-06, "loss": 0.6981, "step": 3932 }, { "epoch": 0.69, "grad_norm": 0.727153422435547, "learning_rate": 4.642115716476906e-06, "loss": 0.7414, "step": 3933 }, { "epoch": 0.69, "grad_norm": 0.6540648025083513, "learning_rate": 4.637320683537202e-06, "loss": 0.6683, "step": 3934 }, { "epoch": 0.69, "grad_norm": 0.7288664597113681, "learning_rate": 4.6325273807095975e-06, "loss": 0.7295, "step": 3935 }, { "epoch": 0.69, "grad_norm": 0.6285193828288874, "learning_rate": 4.627735809540514e-06, "loss": 0.6523, "step": 3936 }, { "epoch": 0.69, "grad_norm": 0.7034385122602635, "learning_rate": 4.622945971575814e-06, "loss": 0.6835, "step": 3937 }, { "epoch": 0.69, "grad_norm": 0.7341621299915216, "learning_rate": 4.618157868360796e-06, "loss": 0.7288, "step": 3938 }, { "epoch": 0.69, "grad_norm": 0.6833564038385779, "learning_rate": 4.613371501440209e-06, "loss": 0.6888, "step": 3939 }, { "epoch": 0.69, "grad_norm": 0.6708047249291859, "learning_rate": 4.608586872358226e-06, "loss": 0.6612, "step": 3940 }, { "epoch": 0.69, "grad_norm": 0.7939758481579874, "learning_rate": 4.603803982658472e-06, "loss": 0.7372, "step": 3941 }, { "epoch": 0.69, "grad_norm": 0.7795435889320006, "learning_rate": 4.599022833884005e-06, "loss": 0.642, "step": 3942 }, { "epoch": 0.69, "grad_norm": 0.6570728215770382, "learning_rate": 4.594243427577326e-06, "loss": 0.6934, "step": 3943 }, { "epoch": 0.69, "grad_norm": 0.6765631285196115, "learning_rate": 4.589465765280373e-06, "loss": 0.6812, "step": 3944 }, { "epoch": 0.69, "grad_norm": 0.6439742675806613, "learning_rate": 4.584689848534516e-06, "loss": 0.6864, "step": 3945 }, { "epoch": 0.69, "grad_norm": 0.8160413864984365, "learning_rate": 4.579915678880562e-06, "loss": 0.7106, "step": 3946 }, { "epoch": 0.69, "grad_norm": 0.8267365683303203, "learning_rate": 4.575143257858762e-06, "loss": 0.7113, "step": 3947 }, { "epoch": 0.69, "grad_norm": 0.7981274782776646, "learning_rate": 4.570372587008796e-06, "loss": 0.729, "step": 3948 }, { "epoch": 0.69, "grad_norm": 0.6846457621170189, "learning_rate": 4.565603667869785e-06, "loss": 0.7016, "step": 3949 }, { "epoch": 0.69, "grad_norm": 0.6500076081638055, "learning_rate": 4.560836501980283e-06, "loss": 0.6918, "step": 3950 }, { "epoch": 0.69, "grad_norm": 0.6406145007983253, "learning_rate": 4.55607109087827e-06, "loss": 0.6962, "step": 3951 }, { "epoch": 0.69, "grad_norm": 0.676570725408778, "learning_rate": 4.551307436101172e-06, "loss": 0.7196, "step": 3952 }, { "epoch": 0.69, "grad_norm": 0.6721652992277668, "learning_rate": 4.546545539185846e-06, "loss": 0.6955, "step": 3953 }, { "epoch": 0.69, "grad_norm": 0.6427274533907341, "learning_rate": 4.541785401668572e-06, "loss": 0.7036, "step": 3954 }, { "epoch": 0.69, "grad_norm": 0.6508394720461732, "learning_rate": 4.537027025085073e-06, "loss": 0.7088, "step": 3955 }, { "epoch": 0.69, "grad_norm": 0.6624445009326414, "learning_rate": 4.5322704109705e-06, "loss": 0.6881, "step": 3956 }, { "epoch": 0.69, "grad_norm": 0.6277120069422828, "learning_rate": 4.52751556085944e-06, "loss": 0.6777, "step": 3957 }, { "epoch": 0.69, "grad_norm": 0.6053704513123044, "learning_rate": 4.5227624762859e-06, "loss": 0.668, "step": 3958 }, { "epoch": 0.69, "grad_norm": 0.7063263470959069, "learning_rate": 4.518011158783327e-06, "loss": 0.7369, "step": 3959 }, { "epoch": 0.69, "grad_norm": 0.7051217649171778, "learning_rate": 4.513261609884597e-06, "loss": 0.7372, "step": 3960 }, { "epoch": 0.69, "grad_norm": 0.7717968771168001, "learning_rate": 4.50851383112201e-06, "loss": 0.728, "step": 3961 }, { "epoch": 0.69, "grad_norm": 0.7669938294717987, "learning_rate": 4.503767824027305e-06, "loss": 0.7536, "step": 3962 }, { "epoch": 0.69, "grad_norm": 0.7086901376379484, "learning_rate": 4.499023590131637e-06, "loss": 0.7152, "step": 3963 }, { "epoch": 0.7, "grad_norm": 0.65523723765447, "learning_rate": 4.494281130965593e-06, "loss": 0.717, "step": 3964 }, { "epoch": 0.7, "grad_norm": 0.7008072711442297, "learning_rate": 4.489540448059191e-06, "loss": 0.6844, "step": 3965 }, { "epoch": 0.7, "grad_norm": 0.6648701100166502, "learning_rate": 4.484801542941876e-06, "loss": 0.6914, "step": 3966 }, { "epoch": 0.7, "grad_norm": 0.7951954619503134, "learning_rate": 4.4800644171425155e-06, "loss": 0.6731, "step": 3967 }, { "epoch": 0.7, "grad_norm": 0.6616063376838991, "learning_rate": 4.475329072189409e-06, "loss": 0.6858, "step": 3968 }, { "epoch": 0.7, "grad_norm": 0.6364975615008875, "learning_rate": 4.4705955096102726e-06, "loss": 0.6959, "step": 3969 }, { "epoch": 0.7, "grad_norm": 0.7124936810528, "learning_rate": 4.465863730932253e-06, "loss": 0.7231, "step": 3970 }, { "epoch": 0.7, "grad_norm": 0.8172592761284707, "learning_rate": 4.461133737681926e-06, "loss": 0.756, "step": 3971 }, { "epoch": 0.7, "grad_norm": 0.6301959463478298, "learning_rate": 4.456405531385277e-06, "loss": 0.6749, "step": 3972 }, { "epoch": 0.7, "grad_norm": 0.7042121214641872, "learning_rate": 4.45167911356773e-06, "loss": 0.7091, "step": 3973 }, { "epoch": 0.7, "grad_norm": 0.7574947576644057, "learning_rate": 4.446954485754127e-06, "loss": 0.7054, "step": 3974 }, { "epoch": 0.7, "grad_norm": 0.6844240677967618, "learning_rate": 4.4422316494687255e-06, "loss": 0.6607, "step": 3975 }, { "epoch": 0.7, "grad_norm": 0.6949244544466432, "learning_rate": 4.437510606235214e-06, "loss": 0.67, "step": 3976 }, { "epoch": 0.7, "grad_norm": 0.7756139661151491, "learning_rate": 4.4327913575767e-06, "loss": 0.7254, "step": 3977 }, { "epoch": 0.7, "grad_norm": 0.7368394736490559, "learning_rate": 4.428073905015712e-06, "loss": 0.6958, "step": 3978 }, { "epoch": 0.7, "grad_norm": 0.6768653594930659, "learning_rate": 4.4233582500742e-06, "loss": 0.7119, "step": 3979 }, { "epoch": 0.7, "grad_norm": 0.611418110420067, "learning_rate": 4.418644394273531e-06, "loss": 0.6983, "step": 3980 }, { "epoch": 0.7, "grad_norm": 0.7063357885124085, "learning_rate": 4.413932339134489e-06, "loss": 0.7068, "step": 3981 }, { "epoch": 0.7, "grad_norm": 0.6925585112783754, "learning_rate": 4.409222086177284e-06, "loss": 0.6779, "step": 3982 }, { "epoch": 0.7, "grad_norm": 0.6517344019147757, "learning_rate": 4.404513636921541e-06, "loss": 0.6788, "step": 3983 }, { "epoch": 0.7, "grad_norm": 0.7041831619852239, "learning_rate": 4.399806992886307e-06, "loss": 0.7104, "step": 3984 }, { "epoch": 0.7, "grad_norm": 0.7705168969758647, "learning_rate": 4.3951021555900406e-06, "loss": 0.6957, "step": 3985 }, { "epoch": 0.7, "grad_norm": 0.8306827639618176, "learning_rate": 4.3903991265506256e-06, "loss": 0.7205, "step": 3986 }, { "epoch": 0.7, "grad_norm": 0.7326720285006443, "learning_rate": 4.385697907285349e-06, "loss": 0.7452, "step": 3987 }, { "epoch": 0.7, "grad_norm": 0.6871504967409611, "learning_rate": 4.380998499310925e-06, "loss": 0.7305, "step": 3988 }, { "epoch": 0.7, "grad_norm": 0.6832408702499257, "learning_rate": 4.376300904143486e-06, "loss": 0.6983, "step": 3989 }, { "epoch": 0.7, "grad_norm": 0.638314415648227, "learning_rate": 4.371605123298568e-06, "loss": 0.678, "step": 3990 }, { "epoch": 0.7, "grad_norm": 0.8603120334527046, "learning_rate": 4.3669111582911274e-06, "loss": 0.6881, "step": 3991 }, { "epoch": 0.7, "grad_norm": 0.6742626156558128, "learning_rate": 4.362219010635541e-06, "loss": 0.7396, "step": 3992 }, { "epoch": 0.7, "grad_norm": 0.7066612237369919, "learning_rate": 4.357528681845585e-06, "loss": 0.6919, "step": 3993 }, { "epoch": 0.7, "grad_norm": 0.7191056515381434, "learning_rate": 4.352840173434463e-06, "loss": 0.7119, "step": 3994 }, { "epoch": 0.7, "grad_norm": 0.7662799162847375, "learning_rate": 4.348153486914785e-06, "loss": 0.7053, "step": 3995 }, { "epoch": 0.7, "grad_norm": 0.6473729039926089, "learning_rate": 4.343468623798571e-06, "loss": 0.699, "step": 3996 }, { "epoch": 0.7, "grad_norm": 0.6476106181861463, "learning_rate": 4.338785585597263e-06, "loss": 0.696, "step": 3997 }, { "epoch": 0.7, "grad_norm": 0.7127786868044038, "learning_rate": 4.3341043738217e-06, "loss": 0.7053, "step": 3998 }, { "epoch": 0.7, "grad_norm": 0.7140247643548592, "learning_rate": 4.329424989982136e-06, "loss": 0.7266, "step": 3999 }, { "epoch": 0.7, "grad_norm": 0.6075025123223964, "learning_rate": 4.324747435588242e-06, "loss": 0.7015, "step": 4000 }, { "epoch": 0.7, "grad_norm": 0.7144206866893128, "learning_rate": 4.320071712149094e-06, "loss": 0.7399, "step": 4001 }, { "epoch": 0.7, "grad_norm": 0.7400632408219339, "learning_rate": 4.315397821173177e-06, "loss": 0.6905, "step": 4002 }, { "epoch": 0.7, "grad_norm": 0.75526909205561, "learning_rate": 4.310725764168392e-06, "loss": 0.7652, "step": 4003 }, { "epoch": 0.7, "grad_norm": 0.6487284584645248, "learning_rate": 4.306055542642031e-06, "loss": 0.6946, "step": 4004 }, { "epoch": 0.7, "grad_norm": 0.7291489870260005, "learning_rate": 4.301387158100811e-06, "loss": 0.7087, "step": 4005 }, { "epoch": 0.7, "grad_norm": 0.6489591092659152, "learning_rate": 4.296720612050849e-06, "loss": 0.6739, "step": 4006 }, { "epoch": 0.7, "grad_norm": 0.777682253569864, "learning_rate": 4.29205590599767e-06, "loss": 0.7309, "step": 4007 }, { "epoch": 0.7, "grad_norm": 0.7516945246136576, "learning_rate": 4.287393041446209e-06, "loss": 0.8011, "step": 4008 }, { "epoch": 0.7, "grad_norm": 0.7064891759073131, "learning_rate": 4.282732019900801e-06, "loss": 0.7338, "step": 4009 }, { "epoch": 0.7, "grad_norm": 0.6218176534615302, "learning_rate": 4.278072842865183e-06, "loss": 0.7146, "step": 4010 }, { "epoch": 0.7, "grad_norm": 0.6298627939371148, "learning_rate": 4.273415511842507e-06, "loss": 0.6763, "step": 4011 }, { "epoch": 0.7, "grad_norm": 0.7190160125948066, "learning_rate": 4.268760028335326e-06, "loss": 0.6912, "step": 4012 }, { "epoch": 0.7, "grad_norm": 0.7362086228336854, "learning_rate": 4.264106393845595e-06, "loss": 0.7139, "step": 4013 }, { "epoch": 0.7, "grad_norm": 0.653600491102266, "learning_rate": 4.259454609874675e-06, "loss": 0.6817, "step": 4014 }, { "epoch": 0.7, "grad_norm": 0.6016503577473339, "learning_rate": 4.254804677923329e-06, "loss": 0.6484, "step": 4015 }, { "epoch": 0.7, "grad_norm": 0.6274787816373798, "learning_rate": 4.250156599491717e-06, "loss": 0.687, "step": 4016 }, { "epoch": 0.7, "grad_norm": 0.6595147859136136, "learning_rate": 4.245510376079413e-06, "loss": 0.6858, "step": 4017 }, { "epoch": 0.7, "grad_norm": 0.7256858917837818, "learning_rate": 4.240866009185378e-06, "loss": 0.7081, "step": 4018 }, { "epoch": 0.7, "grad_norm": 0.7012697047857332, "learning_rate": 4.236223500307986e-06, "loss": 0.6964, "step": 4019 }, { "epoch": 0.7, "grad_norm": 0.6461050279850772, "learning_rate": 4.2315828509450055e-06, "loss": 0.7108, "step": 4020 }, { "epoch": 0.71, "grad_norm": 0.6611221800624095, "learning_rate": 4.226944062593611e-06, "loss": 0.6876, "step": 4021 }, { "epoch": 0.71, "grad_norm": 0.7095670823119493, "learning_rate": 4.222307136750364e-06, "loss": 0.7021, "step": 4022 }, { "epoch": 0.71, "grad_norm": 0.6472496200030502, "learning_rate": 4.21767207491124e-06, "loss": 0.6564, "step": 4023 }, { "epoch": 0.71, "grad_norm": 0.7034928643630493, "learning_rate": 4.213038878571604e-06, "loss": 0.7611, "step": 4024 }, { "epoch": 0.71, "grad_norm": 0.6905506674503996, "learning_rate": 4.2084075492262226e-06, "loss": 0.6989, "step": 4025 }, { "epoch": 0.71, "grad_norm": 0.6778981059217278, "learning_rate": 4.203778088369261e-06, "loss": 0.7081, "step": 4026 }, { "epoch": 0.71, "grad_norm": 0.8717287303007151, "learning_rate": 4.19915049749428e-06, "loss": 0.7386, "step": 4027 }, { "epoch": 0.71, "grad_norm": 0.6633858203006394, "learning_rate": 4.19452477809423e-06, "loss": 0.6636, "step": 4028 }, { "epoch": 0.71, "grad_norm": 0.6086608947901369, "learning_rate": 4.189900931661469e-06, "loss": 0.6691, "step": 4029 }, { "epoch": 0.71, "grad_norm": 0.7436054577697347, "learning_rate": 4.185278959687747e-06, "loss": 0.6968, "step": 4030 }, { "epoch": 0.71, "grad_norm": 0.6161142239855353, "learning_rate": 4.180658863664205e-06, "loss": 0.7057, "step": 4031 }, { "epoch": 0.71, "grad_norm": 0.732611476366949, "learning_rate": 4.176040645081392e-06, "loss": 0.7475, "step": 4032 }, { "epoch": 0.71, "grad_norm": 0.7151364623712079, "learning_rate": 4.171424305429229e-06, "loss": 0.7292, "step": 4033 }, { "epoch": 0.71, "grad_norm": 0.685179806746212, "learning_rate": 4.166809846197049e-06, "loss": 0.7094, "step": 4034 }, { "epoch": 0.71, "grad_norm": 0.6627201639020645, "learning_rate": 4.162197268873577e-06, "loss": 0.7192, "step": 4035 }, { "epoch": 0.71, "grad_norm": 0.6024292377492336, "learning_rate": 4.157586574946918e-06, "loss": 0.6977, "step": 4036 }, { "epoch": 0.71, "grad_norm": 0.6799440318302968, "learning_rate": 4.1529777659045825e-06, "loss": 0.7258, "step": 4037 }, { "epoch": 0.71, "grad_norm": 0.6573972923197857, "learning_rate": 4.148370843233467e-06, "loss": 0.6848, "step": 4038 }, { "epoch": 0.71, "grad_norm": 0.7413596158868206, "learning_rate": 4.143765808419866e-06, "loss": 0.7126, "step": 4039 }, { "epoch": 0.71, "grad_norm": 0.6215890469459103, "learning_rate": 4.1391626629494515e-06, "loss": 0.6928, "step": 4040 }, { "epoch": 0.71, "grad_norm": 0.6380303003136703, "learning_rate": 4.134561408307299e-06, "loss": 0.6924, "step": 4041 }, { "epoch": 0.71, "grad_norm": 0.6057664561260124, "learning_rate": 4.129962045977868e-06, "loss": 0.7004, "step": 4042 }, { "epoch": 0.71, "grad_norm": 0.7067512037916545, "learning_rate": 4.125364577445008e-06, "loss": 0.686, "step": 4043 }, { "epoch": 0.71, "grad_norm": 0.6977729909286843, "learning_rate": 4.120769004191963e-06, "loss": 0.6907, "step": 4044 }, { "epoch": 0.71, "grad_norm": 0.6782617000313675, "learning_rate": 4.116175327701357e-06, "loss": 0.7002, "step": 4045 }, { "epoch": 0.71, "grad_norm": 0.7315905840456344, "learning_rate": 4.111583549455202e-06, "loss": 0.7353, "step": 4046 }, { "epoch": 0.71, "grad_norm": 0.6857781388280468, "learning_rate": 4.106993670934905e-06, "loss": 0.6781, "step": 4047 }, { "epoch": 0.71, "grad_norm": 0.7187255556302073, "learning_rate": 4.1024056936212556e-06, "loss": 0.7121, "step": 4048 }, { "epoch": 0.71, "grad_norm": 0.769545840050583, "learning_rate": 4.097819618994432e-06, "loss": 0.7208, "step": 4049 }, { "epoch": 0.71, "grad_norm": 0.7004867816898608, "learning_rate": 4.093235448534e-06, "loss": 0.673, "step": 4050 }, { "epoch": 0.71, "grad_norm": 0.7051267313536966, "learning_rate": 4.088653183718903e-06, "loss": 0.7088, "step": 4051 }, { "epoch": 0.71, "grad_norm": 0.7081079787505027, "learning_rate": 4.084072826027477e-06, "loss": 0.7069, "step": 4052 }, { "epoch": 0.71, "grad_norm": 0.6240672562064153, "learning_rate": 4.079494376937445e-06, "loss": 0.6986, "step": 4053 }, { "epoch": 0.71, "grad_norm": 0.7025294194914378, "learning_rate": 4.074917837925903e-06, "loss": 0.702, "step": 4054 }, { "epoch": 0.71, "grad_norm": 0.6696439243854817, "learning_rate": 4.070343210469342e-06, "loss": 0.6642, "step": 4055 }, { "epoch": 0.71, "grad_norm": 0.6974677333112685, "learning_rate": 4.065770496043634e-06, "loss": 0.7312, "step": 4056 }, { "epoch": 0.71, "grad_norm": 0.6646005438956429, "learning_rate": 4.0611996961240276e-06, "loss": 0.6633, "step": 4057 }, { "epoch": 0.71, "grad_norm": 0.821287734412913, "learning_rate": 4.056630812185159e-06, "loss": 0.7467, "step": 4058 }, { "epoch": 0.71, "grad_norm": 0.7440564930566199, "learning_rate": 4.0520638457010466e-06, "loss": 0.7446, "step": 4059 }, { "epoch": 0.71, "grad_norm": 0.6873337783315991, "learning_rate": 4.047498798145089e-06, "loss": 0.7125, "step": 4060 }, { "epoch": 0.71, "grad_norm": 0.7103067533983112, "learning_rate": 4.042935670990069e-06, "loss": 0.6912, "step": 4061 }, { "epoch": 0.71, "grad_norm": 0.6712456466714375, "learning_rate": 4.038374465708143e-06, "loss": 0.7083, "step": 4062 }, { "epoch": 0.71, "grad_norm": 0.724889769471863, "learning_rate": 4.033815183770847e-06, "loss": 0.7398, "step": 4063 }, { "epoch": 0.71, "grad_norm": 0.6792990868455396, "learning_rate": 4.029257826649105e-06, "loss": 0.7024, "step": 4064 }, { "epoch": 0.71, "grad_norm": 0.6995132026212403, "learning_rate": 4.0247023958132136e-06, "loss": 0.6767, "step": 4065 }, { "epoch": 0.71, "grad_norm": 0.6339298374515113, "learning_rate": 4.020148892732851e-06, "loss": 0.6918, "step": 4066 }, { "epoch": 0.71, "grad_norm": 0.8917780257144801, "learning_rate": 4.015597318877073e-06, "loss": 0.7333, "step": 4067 }, { "epoch": 0.71, "grad_norm": 0.7047361941276172, "learning_rate": 4.0110476757143135e-06, "loss": 0.6604, "step": 4068 }, { "epoch": 0.71, "grad_norm": 0.6831823821697504, "learning_rate": 4.006499964712377e-06, "loss": 0.7381, "step": 4069 }, { "epoch": 0.71, "grad_norm": 0.7545247781484505, "learning_rate": 4.001954187338451e-06, "loss": 0.7748, "step": 4070 }, { "epoch": 0.71, "grad_norm": 0.7187603400865059, "learning_rate": 3.9974103450591015e-06, "loss": 0.6733, "step": 4071 }, { "epoch": 0.71, "grad_norm": 0.752869408919982, "learning_rate": 3.992868439340268e-06, "loss": 0.7559, "step": 4072 }, { "epoch": 0.71, "grad_norm": 0.6956253888604574, "learning_rate": 3.988328471647257e-06, "loss": 0.7109, "step": 4073 }, { "epoch": 0.71, "grad_norm": 0.622703697117216, "learning_rate": 3.9837904434447635e-06, "loss": 0.7349, "step": 4074 }, { "epoch": 0.71, "grad_norm": 0.6353703788172573, "learning_rate": 3.979254356196845e-06, "loss": 0.7245, "step": 4075 }, { "epoch": 0.71, "grad_norm": 0.6631488897410306, "learning_rate": 3.974720211366938e-06, "loss": 0.7121, "step": 4076 }, { "epoch": 0.71, "grad_norm": 0.7341204385083082, "learning_rate": 3.970188010417855e-06, "loss": 0.6912, "step": 4077 }, { "epoch": 0.72, "grad_norm": 0.6023243312376773, "learning_rate": 3.965657754811777e-06, "loss": 0.7012, "step": 4078 }, { "epoch": 0.72, "grad_norm": 0.7392345368247785, "learning_rate": 3.961129446010262e-06, "loss": 0.7005, "step": 4079 }, { "epoch": 0.72, "grad_norm": 0.7637944200535546, "learning_rate": 3.956603085474231e-06, "loss": 0.7534, "step": 4080 }, { "epoch": 0.72, "grad_norm": 0.7304559583961786, "learning_rate": 3.952078674663989e-06, "loss": 0.7179, "step": 4081 }, { "epoch": 0.72, "grad_norm": 0.6589969349506175, "learning_rate": 3.9475562150391975e-06, "loss": 0.7224, "step": 4082 }, { "epoch": 0.72, "grad_norm": 0.7366060778622724, "learning_rate": 3.943035708058901e-06, "loss": 0.7366, "step": 4083 }, { "epoch": 0.72, "grad_norm": 0.7016140382779684, "learning_rate": 3.9385171551815074e-06, "loss": 0.6922, "step": 4084 }, { "epoch": 0.72, "grad_norm": 0.6492315274910282, "learning_rate": 3.934000557864801e-06, "loss": 0.6701, "step": 4085 }, { "epoch": 0.72, "grad_norm": 0.7677861673730672, "learning_rate": 3.929485917565922e-06, "loss": 0.6975, "step": 4086 }, { "epoch": 0.72, "grad_norm": 0.6345939522879193, "learning_rate": 3.924973235741393e-06, "loss": 0.6877, "step": 4087 }, { "epoch": 0.72, "grad_norm": 0.644454567294953, "learning_rate": 3.920462513847096e-06, "loss": 0.6713, "step": 4088 }, { "epoch": 0.72, "grad_norm": 0.6776451098319187, "learning_rate": 3.915953753338288e-06, "loss": 0.6739, "step": 4089 }, { "epoch": 0.72, "grad_norm": 0.640049317987023, "learning_rate": 3.911446955669588e-06, "loss": 0.6649, "step": 4090 }, { "epoch": 0.72, "grad_norm": 0.6525595932219584, "learning_rate": 3.906942122294983e-06, "loss": 0.686, "step": 4091 }, { "epoch": 0.72, "grad_norm": 0.6279312381085187, "learning_rate": 3.9024392546678215e-06, "loss": 0.6924, "step": 4092 }, { "epoch": 0.72, "grad_norm": 0.7120537675247931, "learning_rate": 3.8979383542408254e-06, "loss": 0.6853, "step": 4093 }, { "epoch": 0.72, "grad_norm": 0.6582150533270535, "learning_rate": 3.893439422466079e-06, "loss": 0.7139, "step": 4094 }, { "epoch": 0.72, "grad_norm": 0.6413898789737901, "learning_rate": 3.888942460795032e-06, "loss": 0.6879, "step": 4095 }, { "epoch": 0.72, "grad_norm": 0.7312180485811128, "learning_rate": 3.884447470678499e-06, "loss": 0.7196, "step": 4096 }, { "epoch": 0.72, "grad_norm": 0.6727694257849652, "learning_rate": 3.879954453566657e-06, "loss": 0.719, "step": 4097 }, { "epoch": 0.72, "grad_norm": 0.7097732280272122, "learning_rate": 3.875463410909045e-06, "loss": 0.6982, "step": 4098 }, { "epoch": 0.72, "grad_norm": 0.7372679733846382, "learning_rate": 3.870974344154571e-06, "loss": 0.7461, "step": 4099 }, { "epoch": 0.72, "grad_norm": 0.6216035418003141, "learning_rate": 3.866487254751495e-06, "loss": 0.6906, "step": 4100 }, { "epoch": 0.72, "grad_norm": 0.6328448345740221, "learning_rate": 3.862002144147449e-06, "loss": 0.6648, "step": 4101 }, { "epoch": 0.72, "grad_norm": 0.6990975296627503, "learning_rate": 3.857519013789424e-06, "loss": 0.7073, "step": 4102 }, { "epoch": 0.72, "grad_norm": 0.6602859523941846, "learning_rate": 3.853037865123773e-06, "loss": 0.666, "step": 4103 }, { "epoch": 0.72, "grad_norm": 0.7387699039062786, "learning_rate": 3.848558699596202e-06, "loss": 0.6932, "step": 4104 }, { "epoch": 0.72, "grad_norm": 0.6706568224660141, "learning_rate": 3.844081518651788e-06, "loss": 0.7007, "step": 4105 }, { "epoch": 0.72, "grad_norm": 0.7038842346687189, "learning_rate": 3.83960632373496e-06, "loss": 0.7398, "step": 4106 }, { "epoch": 0.72, "grad_norm": 0.7028187251362598, "learning_rate": 3.835133116289512e-06, "loss": 0.6675, "step": 4107 }, { "epoch": 0.72, "grad_norm": 1.8196142073898787, "learning_rate": 3.830661897758595e-06, "loss": 0.7219, "step": 4108 }, { "epoch": 0.72, "grad_norm": 0.7040763754806472, "learning_rate": 3.826192669584715e-06, "loss": 0.7092, "step": 4109 }, { "epoch": 0.72, "grad_norm": 0.6767091019439144, "learning_rate": 3.821725433209735e-06, "loss": 0.7042, "step": 4110 }, { "epoch": 0.72, "grad_norm": 0.6891031924173102, "learning_rate": 3.817260190074882e-06, "loss": 0.7128, "step": 4111 }, { "epoch": 0.72, "grad_norm": 0.6481628826483844, "learning_rate": 3.8127969416207354e-06, "loss": 0.6863, "step": 4112 }, { "epoch": 0.72, "grad_norm": 0.7386027142216073, "learning_rate": 3.8083356892872337e-06, "loss": 0.7692, "step": 4113 }, { "epoch": 0.72, "grad_norm": 0.6313585405694262, "learning_rate": 3.8038764345136712e-06, "loss": 0.6608, "step": 4114 }, { "epoch": 0.72, "grad_norm": 0.6402379257936502, "learning_rate": 3.7994191787386914e-06, "loss": 0.6793, "step": 4115 }, { "epoch": 0.72, "grad_norm": 0.7597925382858056, "learning_rate": 3.794963923400301e-06, "loss": 0.7288, "step": 4116 }, { "epoch": 0.72, "grad_norm": 0.7287238768983444, "learning_rate": 3.790510669935861e-06, "loss": 0.7474, "step": 4117 }, { "epoch": 0.72, "grad_norm": 0.738711728997057, "learning_rate": 3.7860594197820777e-06, "loss": 0.6955, "step": 4118 }, { "epoch": 0.72, "grad_norm": 0.7527409552193809, "learning_rate": 3.78161017437502e-06, "loss": 0.7545, "step": 4119 }, { "epoch": 0.72, "grad_norm": 0.7163892493523407, "learning_rate": 3.7771629351501103e-06, "loss": 0.6753, "step": 4120 }, { "epoch": 0.72, "grad_norm": 0.7262509227881031, "learning_rate": 3.772717703542115e-06, "loss": 0.7354, "step": 4121 }, { "epoch": 0.72, "grad_norm": 0.7072396673501435, "learning_rate": 3.76827448098516e-06, "loss": 0.7281, "step": 4122 }, { "epoch": 0.72, "grad_norm": 0.7298791653041709, "learning_rate": 3.763833268912723e-06, "loss": 0.6887, "step": 4123 }, { "epoch": 0.72, "grad_norm": 0.6623895534630554, "learning_rate": 3.7593940687576325e-06, "loss": 0.6675, "step": 4124 }, { "epoch": 0.72, "grad_norm": 0.9301847959920553, "learning_rate": 3.7549568819520644e-06, "loss": 0.6731, "step": 4125 }, { "epoch": 0.72, "grad_norm": 0.72663164704963, "learning_rate": 3.7505217099275525e-06, "loss": 0.7518, "step": 4126 }, { "epoch": 0.72, "grad_norm": 0.7185711717908811, "learning_rate": 3.7460885541149727e-06, "loss": 0.6876, "step": 4127 }, { "epoch": 0.72, "grad_norm": 0.6891708778038718, "learning_rate": 3.7416574159445496e-06, "loss": 0.6415, "step": 4128 }, { "epoch": 0.72, "grad_norm": 0.7004585272540013, "learning_rate": 3.737228296845864e-06, "loss": 0.786, "step": 4129 }, { "epoch": 0.72, "grad_norm": 0.6943563819324008, "learning_rate": 3.732801198247842e-06, "loss": 0.675, "step": 4130 }, { "epoch": 0.72, "grad_norm": 0.6785201096757093, "learning_rate": 3.728376121578757e-06, "loss": 0.6957, "step": 4131 }, { "epoch": 0.72, "grad_norm": 0.74538105751841, "learning_rate": 3.7239530682662363e-06, "loss": 0.6833, "step": 4132 }, { "epoch": 0.72, "grad_norm": 0.6915210235234839, "learning_rate": 3.71953203973724e-06, "loss": 0.6795, "step": 4133 }, { "epoch": 0.72, "grad_norm": 0.7067295751103678, "learning_rate": 3.71511303741809e-06, "loss": 0.6942, "step": 4134 }, { "epoch": 0.73, "grad_norm": 0.7186790492767638, "learning_rate": 3.710696062734447e-06, "loss": 0.7262, "step": 4135 }, { "epoch": 0.73, "grad_norm": 0.7453210576374013, "learning_rate": 3.706281117111322e-06, "loss": 0.6642, "step": 4136 }, { "epoch": 0.73, "grad_norm": 0.829757203079186, "learning_rate": 3.7018682019730632e-06, "loss": 0.7231, "step": 4137 }, { "epoch": 0.73, "grad_norm": 0.686724229174792, "learning_rate": 3.697457318743375e-06, "loss": 0.681, "step": 4138 }, { "epoch": 0.73, "grad_norm": 0.6540899809061967, "learning_rate": 3.6930484688452916e-06, "loss": 0.6927, "step": 4139 }, { "epoch": 0.73, "grad_norm": 0.7884837387593868, "learning_rate": 3.6886416537012073e-06, "loss": 0.727, "step": 4140 }, { "epoch": 0.73, "grad_norm": 0.6983306732365626, "learning_rate": 3.684236874732848e-06, "loss": 0.6818, "step": 4141 }, { "epoch": 0.73, "grad_norm": 0.6429417426337873, "learning_rate": 3.679834133361292e-06, "loss": 0.6972, "step": 4142 }, { "epoch": 0.73, "grad_norm": 0.764259675543516, "learning_rate": 3.675433431006954e-06, "loss": 0.7452, "step": 4143 }, { "epoch": 0.73, "grad_norm": 0.6806894953890447, "learning_rate": 3.6710347690895885e-06, "loss": 0.6948, "step": 4144 }, { "epoch": 0.73, "grad_norm": 0.7498765296972095, "learning_rate": 3.666638149028302e-06, "loss": 0.7621, "step": 4145 }, { "epoch": 0.73, "grad_norm": 0.7272327049074007, "learning_rate": 3.6622435722415284e-06, "loss": 0.7396, "step": 4146 }, { "epoch": 0.73, "grad_norm": 0.6685235898208827, "learning_rate": 3.6578510401470526e-06, "loss": 0.6738, "step": 4147 }, { "epoch": 0.73, "grad_norm": 0.6852793518675613, "learning_rate": 3.653460554161998e-06, "loss": 0.7008, "step": 4148 }, { "epoch": 0.73, "grad_norm": 0.7987047350191235, "learning_rate": 3.6490721157028265e-06, "loss": 0.722, "step": 4149 }, { "epoch": 0.73, "grad_norm": 0.6752235978269928, "learning_rate": 3.6446857261853428e-06, "loss": 0.6433, "step": 4150 }, { "epoch": 0.73, "grad_norm": 0.8118613253220879, "learning_rate": 3.6403013870246827e-06, "loss": 0.7204, "step": 4151 }, { "epoch": 0.73, "grad_norm": 0.6635855937234578, "learning_rate": 3.6359190996353257e-06, "loss": 0.6673, "step": 4152 }, { "epoch": 0.73, "grad_norm": 0.726303974277222, "learning_rate": 3.631538865431091e-06, "loss": 0.7543, "step": 4153 }, { "epoch": 0.73, "grad_norm": 0.693839481147394, "learning_rate": 3.627160685825136e-06, "loss": 0.6787, "step": 4154 }, { "epoch": 0.73, "grad_norm": 0.6686195740702836, "learning_rate": 3.6227845622299474e-06, "loss": 0.7194, "step": 4155 }, { "epoch": 0.73, "grad_norm": 0.8551966256843198, "learning_rate": 3.6184104960573584e-06, "loss": 0.8132, "step": 4156 }, { "epoch": 0.73, "grad_norm": 0.6326882703289347, "learning_rate": 3.6140384887185286e-06, "loss": 0.7292, "step": 4157 }, { "epoch": 0.73, "grad_norm": 0.7118917430018665, "learning_rate": 3.609668541623962e-06, "loss": 0.6976, "step": 4158 }, { "epoch": 0.73, "grad_norm": 0.741042417240332, "learning_rate": 3.6053006561834947e-06, "loss": 0.7057, "step": 4159 }, { "epoch": 0.73, "grad_norm": 0.8092978103840734, "learning_rate": 3.6009348338062968e-06, "loss": 0.7106, "step": 4160 }, { "epoch": 0.73, "grad_norm": 0.7666590802149164, "learning_rate": 3.5965710759008778e-06, "loss": 0.7432, "step": 4161 }, { "epoch": 0.73, "grad_norm": 0.7066257123276345, "learning_rate": 3.5922093838750684e-06, "loss": 0.7055, "step": 4162 }, { "epoch": 0.73, "grad_norm": 0.6690470815807529, "learning_rate": 3.587849759136051e-06, "loss": 0.6922, "step": 4163 }, { "epoch": 0.73, "grad_norm": 0.6754867205953481, "learning_rate": 3.583492203090322e-06, "loss": 0.6859, "step": 4164 }, { "epoch": 0.73, "grad_norm": 0.6765168000354858, "learning_rate": 3.5791367171437252e-06, "loss": 0.6649, "step": 4165 }, { "epoch": 0.73, "grad_norm": 0.7696327671043454, "learning_rate": 3.574783302701429e-06, "loss": 0.6903, "step": 4166 }, { "epoch": 0.73, "grad_norm": 0.7301368987337316, "learning_rate": 3.5704319611679407e-06, "loss": 0.7026, "step": 4167 }, { "epoch": 0.73, "grad_norm": 0.679890945168546, "learning_rate": 3.5660826939470873e-06, "loss": 0.6593, "step": 4168 }, { "epoch": 0.73, "grad_norm": 0.6949728467605827, "learning_rate": 3.5617355024420343e-06, "loss": 0.7032, "step": 4169 }, { "epoch": 0.73, "grad_norm": 0.6661672877355814, "learning_rate": 3.5573903880552785e-06, "loss": 0.704, "step": 4170 }, { "epoch": 0.73, "grad_norm": 0.7212962401994266, "learning_rate": 3.553047352188643e-06, "loss": 0.7195, "step": 4171 }, { "epoch": 0.73, "grad_norm": 0.7315957946603637, "learning_rate": 3.5487063962432856e-06, "loss": 0.7011, "step": 4172 }, { "epoch": 0.73, "grad_norm": 0.7053867324031871, "learning_rate": 3.544367521619686e-06, "loss": 0.7214, "step": 4173 }, { "epoch": 0.73, "grad_norm": 0.6259413072001925, "learning_rate": 3.5400307297176516e-06, "loss": 0.6769, "step": 4174 }, { "epoch": 0.73, "grad_norm": 0.6233551483021637, "learning_rate": 3.5356960219363256e-06, "loss": 0.715, "step": 4175 }, { "epoch": 0.73, "grad_norm": 0.6876072715285857, "learning_rate": 3.5313633996741748e-06, "loss": 0.7013, "step": 4176 }, { "epoch": 0.73, "grad_norm": 0.7557484773556263, "learning_rate": 3.527032864328993e-06, "loss": 0.701, "step": 4177 }, { "epoch": 0.73, "grad_norm": 0.7195991837566319, "learning_rate": 3.522704417297902e-06, "loss": 0.719, "step": 4178 }, { "epoch": 0.73, "grad_norm": 0.7074659581026258, "learning_rate": 3.518378059977352e-06, "loss": 0.6825, "step": 4179 }, { "epoch": 0.73, "grad_norm": 0.7308340532160994, "learning_rate": 3.51405379376311e-06, "loss": 0.7102, "step": 4180 }, { "epoch": 0.73, "grad_norm": 0.8317088282358945, "learning_rate": 3.5097316200502807e-06, "loss": 0.7137, "step": 4181 }, { "epoch": 0.73, "grad_norm": 0.6495416029552771, "learning_rate": 3.50541154023328e-06, "loss": 0.6631, "step": 4182 }, { "epoch": 0.73, "grad_norm": 0.6808734204203105, "learning_rate": 3.501093555705859e-06, "loss": 0.7168, "step": 4183 }, { "epoch": 0.73, "grad_norm": 0.6875893914630794, "learning_rate": 3.496777667861091e-06, "loss": 0.6852, "step": 4184 }, { "epoch": 0.73, "grad_norm": 0.7044880038897607, "learning_rate": 3.4924638780913733e-06, "loss": 0.7403, "step": 4185 }, { "epoch": 0.73, "grad_norm": 0.6769090467780672, "learning_rate": 3.4881521877884185e-06, "loss": 0.6726, "step": 4186 }, { "epoch": 0.73, "grad_norm": 0.6795405198728878, "learning_rate": 3.48384259834327e-06, "loss": 0.6809, "step": 4187 }, { "epoch": 0.73, "grad_norm": 0.6921185948242717, "learning_rate": 3.4795351111462926e-06, "loss": 0.7244, "step": 4188 }, { "epoch": 0.73, "grad_norm": 0.6432644311754939, "learning_rate": 3.4752297275871706e-06, "loss": 0.6817, "step": 4189 }, { "epoch": 0.73, "grad_norm": 0.7014521443446441, "learning_rate": 3.4709264490549134e-06, "loss": 0.709, "step": 4190 }, { "epoch": 0.73, "grad_norm": 0.6472809782017682, "learning_rate": 3.466625276937845e-06, "loss": 0.6702, "step": 4191 }, { "epoch": 0.74, "grad_norm": 0.691132335723365, "learning_rate": 3.462326212623611e-06, "loss": 0.6865, "step": 4192 }, { "epoch": 0.74, "grad_norm": 0.6577813293677944, "learning_rate": 3.4580292574991827e-06, "loss": 0.6632, "step": 4193 }, { "epoch": 0.74, "grad_norm": 0.7150635908464325, "learning_rate": 3.4537344129508453e-06, "loss": 0.7029, "step": 4194 }, { "epoch": 0.74, "grad_norm": 0.6125468269608902, "learning_rate": 3.449441680364207e-06, "loss": 0.676, "step": 4195 }, { "epoch": 0.74, "grad_norm": 0.6714971713342085, "learning_rate": 3.4451510611241957e-06, "loss": 0.682, "step": 4196 }, { "epoch": 0.74, "grad_norm": 0.6722023767329092, "learning_rate": 3.440862556615047e-06, "loss": 0.7325, "step": 4197 }, { "epoch": 0.74, "grad_norm": 0.738182869577407, "learning_rate": 3.4365761682203258e-06, "loss": 0.6482, "step": 4198 }, { "epoch": 0.74, "grad_norm": 0.7691822657472277, "learning_rate": 3.4322918973229104e-06, "loss": 0.7478, "step": 4199 }, { "epoch": 0.74, "grad_norm": 0.7180203679845676, "learning_rate": 3.428009745304999e-06, "loss": 0.7286, "step": 4200 }, { "epoch": 0.74, "grad_norm": 0.7138241938313679, "learning_rate": 3.4237297135480964e-06, "loss": 0.7129, "step": 4201 }, { "epoch": 0.74, "grad_norm": 0.7428190993468228, "learning_rate": 3.419451803433037e-06, "loss": 0.7156, "step": 4202 }, { "epoch": 0.74, "grad_norm": 0.6355489524268946, "learning_rate": 3.4151760163399573e-06, "loss": 0.6685, "step": 4203 }, { "epoch": 0.74, "grad_norm": 0.6584699880549385, "learning_rate": 3.410902353648319e-06, "loss": 0.7282, "step": 4204 }, { "epoch": 0.74, "grad_norm": 0.6043057883656514, "learning_rate": 3.4066308167368934e-06, "loss": 0.7088, "step": 4205 }, { "epoch": 0.74, "grad_norm": 0.719294658101093, "learning_rate": 3.4023614069837676e-06, "loss": 0.6841, "step": 4206 }, { "epoch": 0.74, "grad_norm": 0.8130232383194077, "learning_rate": 3.3980941257663424e-06, "loss": 0.7401, "step": 4207 }, { "epoch": 0.74, "grad_norm": 0.6759959978691861, "learning_rate": 3.393828974461335e-06, "loss": 0.6852, "step": 4208 }, { "epoch": 0.74, "grad_norm": 0.6378960331611802, "learning_rate": 3.3895659544447703e-06, "loss": 0.6603, "step": 4209 }, { "epoch": 0.74, "grad_norm": 0.9094181831246927, "learning_rate": 3.385305067091981e-06, "loss": 0.701, "step": 4210 }, { "epoch": 0.74, "grad_norm": 0.709981636479359, "learning_rate": 3.3810463137776238e-06, "loss": 0.6625, "step": 4211 }, { "epoch": 0.74, "grad_norm": 0.6817705182048864, "learning_rate": 3.3767896958756605e-06, "loss": 0.7011, "step": 4212 }, { "epoch": 0.74, "grad_norm": 0.6084991249387433, "learning_rate": 3.372535214759365e-06, "loss": 0.6414, "step": 4213 }, { "epoch": 0.74, "grad_norm": 0.6684829864860282, "learning_rate": 3.368282871801325e-06, "loss": 0.7141, "step": 4214 }, { "epoch": 0.74, "grad_norm": 0.7375501872262327, "learning_rate": 3.364032668373427e-06, "loss": 0.6922, "step": 4215 }, { "epoch": 0.74, "grad_norm": 0.7612098623112753, "learning_rate": 3.359784605846881e-06, "loss": 0.7443, "step": 4216 }, { "epoch": 0.74, "grad_norm": 0.6900040732448244, "learning_rate": 3.355538685592198e-06, "loss": 0.6948, "step": 4217 }, { "epoch": 0.74, "grad_norm": 0.7185649593248153, "learning_rate": 3.3512949089792046e-06, "loss": 0.6966, "step": 4218 }, { "epoch": 0.74, "grad_norm": 0.652133903980499, "learning_rate": 3.3470532773770257e-06, "loss": 0.6927, "step": 4219 }, { "epoch": 0.74, "grad_norm": 0.7236227605370223, "learning_rate": 3.342813792154106e-06, "loss": 0.7227, "step": 4220 }, { "epoch": 0.74, "grad_norm": 0.6602586558221443, "learning_rate": 3.338576454678184e-06, "loss": 0.6939, "step": 4221 }, { "epoch": 0.74, "grad_norm": 0.6685813621702712, "learning_rate": 3.334341266316319e-06, "loss": 0.6785, "step": 4222 }, { "epoch": 0.74, "grad_norm": 0.6824094533316672, "learning_rate": 3.3301082284348696e-06, "loss": 0.7031, "step": 4223 }, { "epoch": 0.74, "grad_norm": 0.6367952975831179, "learning_rate": 3.3258773423995005e-06, "loss": 0.6784, "step": 4224 }, { "epoch": 0.74, "grad_norm": 0.6711917483187669, "learning_rate": 3.3216486095751897e-06, "loss": 0.6948, "step": 4225 }, { "epoch": 0.74, "grad_norm": 0.6412845555268937, "learning_rate": 3.3174220313262074e-06, "loss": 0.6523, "step": 4226 }, { "epoch": 0.74, "grad_norm": 0.6629045980027822, "learning_rate": 3.313197609016141e-06, "loss": 0.7017, "step": 4227 }, { "epoch": 0.74, "grad_norm": 0.6620952448969619, "learning_rate": 3.308975344007871e-06, "loss": 0.7014, "step": 4228 }, { "epoch": 0.74, "grad_norm": 0.683896407805782, "learning_rate": 3.3047552376635925e-06, "loss": 0.7212, "step": 4229 }, { "epoch": 0.74, "grad_norm": 0.6772412314318997, "learning_rate": 3.3005372913447986e-06, "loss": 0.7132, "step": 4230 }, { "epoch": 0.74, "grad_norm": 0.7391460500505719, "learning_rate": 3.2963215064122878e-06, "loss": 0.7504, "step": 4231 }, { "epoch": 0.74, "grad_norm": 0.6801246798004695, "learning_rate": 3.292107884226161e-06, "loss": 0.6521, "step": 4232 }, { "epoch": 0.74, "grad_norm": 0.6631711036122113, "learning_rate": 3.287896426145818e-06, "loss": 0.6985, "step": 4233 }, { "epoch": 0.74, "grad_norm": 0.7196931197959254, "learning_rate": 3.2836871335299625e-06, "loss": 0.7618, "step": 4234 }, { "epoch": 0.74, "grad_norm": 0.6515489293463687, "learning_rate": 3.279480007736602e-06, "loss": 0.7196, "step": 4235 }, { "epoch": 0.74, "grad_norm": 0.6638874634796682, "learning_rate": 3.2752750501230444e-06, "loss": 0.6576, "step": 4236 }, { "epoch": 0.74, "grad_norm": 0.6163533776448326, "learning_rate": 3.2710722620458925e-06, "loss": 0.7014, "step": 4237 }, { "epoch": 0.74, "grad_norm": 0.6825677699154447, "learning_rate": 3.2668716448610595e-06, "loss": 0.6778, "step": 4238 }, { "epoch": 0.74, "grad_norm": 0.6499977217470185, "learning_rate": 3.2626731999237436e-06, "loss": 0.689, "step": 4239 }, { "epoch": 0.74, "grad_norm": 0.6689385065863087, "learning_rate": 3.2584769285884554e-06, "loss": 0.6821, "step": 4240 }, { "epoch": 0.74, "grad_norm": 0.6403236928833032, "learning_rate": 3.254282832208998e-06, "loss": 0.6822, "step": 4241 }, { "epoch": 0.74, "grad_norm": 0.6835329052037585, "learning_rate": 3.2500909121384773e-06, "loss": 0.689, "step": 4242 }, { "epoch": 0.74, "grad_norm": 0.7384456543681011, "learning_rate": 3.245901169729294e-06, "loss": 0.7271, "step": 4243 }, { "epoch": 0.74, "grad_norm": 0.7175444332633186, "learning_rate": 3.241713606333142e-06, "loss": 0.6948, "step": 4244 }, { "epoch": 0.74, "grad_norm": 0.6043573371922646, "learning_rate": 3.237528223301022e-06, "loss": 0.6857, "step": 4245 }, { "epoch": 0.74, "grad_norm": 0.6170995636695833, "learning_rate": 3.2333450219832195e-06, "loss": 0.709, "step": 4246 }, { "epoch": 0.74, "grad_norm": 0.7079761904670429, "learning_rate": 3.2291640037293258e-06, "loss": 0.7323, "step": 4247 }, { "epoch": 0.74, "grad_norm": 0.651671530801884, "learning_rate": 3.224985169888225e-06, "loss": 0.6543, "step": 4248 }, { "epoch": 0.75, "grad_norm": 0.6907117467171854, "learning_rate": 3.2208085218080974e-06, "loss": 0.7044, "step": 4249 }, { "epoch": 0.75, "grad_norm": 0.6846277970162123, "learning_rate": 3.2166340608364134e-06, "loss": 0.6626, "step": 4250 }, { "epoch": 0.75, "grad_norm": 0.6619474103855552, "learning_rate": 3.2124617883199405e-06, "loss": 0.7066, "step": 4251 }, { "epoch": 0.75, "grad_norm": 0.6711796935340077, "learning_rate": 3.2082917056047435e-06, "loss": 0.677, "step": 4252 }, { "epoch": 0.75, "grad_norm": 0.6931622347891565, "learning_rate": 3.2041238140361774e-06, "loss": 0.6873, "step": 4253 }, { "epoch": 0.75, "grad_norm": 0.7011440025221709, "learning_rate": 3.199958114958893e-06, "loss": 0.7099, "step": 4254 }, { "epoch": 0.75, "grad_norm": 0.6012658932181135, "learning_rate": 3.1957946097168302e-06, "loss": 0.635, "step": 4255 }, { "epoch": 0.75, "grad_norm": 0.6840279770342765, "learning_rate": 3.1916332996532185e-06, "loss": 0.7195, "step": 4256 }, { "epoch": 0.75, "grad_norm": 0.5947735202686021, "learning_rate": 3.1874741861105862e-06, "loss": 0.6708, "step": 4257 }, { "epoch": 0.75, "grad_norm": 0.7101407914155566, "learning_rate": 3.1833172704307504e-06, "loss": 0.7346, "step": 4258 }, { "epoch": 0.75, "grad_norm": 0.6899852164750276, "learning_rate": 3.1791625539548175e-06, "loss": 0.7372, "step": 4259 }, { "epoch": 0.75, "grad_norm": 0.5974338362590322, "learning_rate": 3.1750100380231874e-06, "loss": 0.6633, "step": 4260 }, { "epoch": 0.75, "grad_norm": 0.7154782752746012, "learning_rate": 3.1708597239755513e-06, "loss": 0.7035, "step": 4261 }, { "epoch": 0.75, "grad_norm": 0.6889100217207367, "learning_rate": 3.1667116131508792e-06, "loss": 0.7029, "step": 4262 }, { "epoch": 0.75, "grad_norm": 0.6841324023471677, "learning_rate": 3.1625657068874414e-06, "loss": 0.6637, "step": 4263 }, { "epoch": 0.75, "grad_norm": 0.7228334317828661, "learning_rate": 3.158422006522799e-06, "loss": 0.696, "step": 4264 }, { "epoch": 0.75, "grad_norm": 0.6621813566366248, "learning_rate": 3.154280513393787e-06, "loss": 0.6939, "step": 4265 }, { "epoch": 0.75, "grad_norm": 0.719486835844998, "learning_rate": 3.150141228836543e-06, "loss": 0.7775, "step": 4266 }, { "epoch": 0.75, "grad_norm": 0.6613530417704682, "learning_rate": 3.1460041541864883e-06, "loss": 0.6892, "step": 4267 }, { "epoch": 0.75, "grad_norm": 0.6720584085834083, "learning_rate": 3.1418692907783235e-06, "loss": 0.6672, "step": 4268 }, { "epoch": 0.75, "grad_norm": 0.748047226241028, "learning_rate": 3.137736639946043e-06, "loss": 0.7447, "step": 4269 }, { "epoch": 0.75, "grad_norm": 0.5997112117391916, "learning_rate": 3.1336062030229284e-06, "loss": 0.6906, "step": 4270 }, { "epoch": 0.75, "grad_norm": 0.7287705376116427, "learning_rate": 3.1294779813415456e-06, "loss": 0.6771, "step": 4271 }, { "epoch": 0.75, "grad_norm": 0.6748431824048075, "learning_rate": 3.1253519762337447e-06, "loss": 0.7086, "step": 4272 }, { "epoch": 0.75, "grad_norm": 0.6912742492077535, "learning_rate": 3.1212281890306604e-06, "loss": 0.6851, "step": 4273 }, { "epoch": 0.75, "grad_norm": 0.7219764151569331, "learning_rate": 3.1171066210627075e-06, "loss": 0.7197, "step": 4274 }, { "epoch": 0.75, "grad_norm": 0.6726851897770894, "learning_rate": 3.1129872736595947e-06, "loss": 0.7061, "step": 4275 }, { "epoch": 0.75, "grad_norm": 0.6948863092585174, "learning_rate": 3.1088701481503082e-06, "loss": 0.7143, "step": 4276 }, { "epoch": 0.75, "grad_norm": 0.6834453553667204, "learning_rate": 3.104755245863119e-06, "loss": 0.7005, "step": 4277 }, { "epoch": 0.75, "grad_norm": 0.6684051375014362, "learning_rate": 3.1006425681255826e-06, "loss": 0.7151, "step": 4278 }, { "epoch": 0.75, "grad_norm": 0.6148416345711465, "learning_rate": 3.096532116264529e-06, "loss": 0.6612, "step": 4279 }, { "epoch": 0.75, "grad_norm": 0.6843830212420293, "learning_rate": 3.092423891606079e-06, "loss": 0.6937, "step": 4280 }, { "epoch": 0.75, "grad_norm": 0.6924134947390549, "learning_rate": 3.088317895475631e-06, "loss": 0.7088, "step": 4281 }, { "epoch": 0.75, "grad_norm": 0.6707159256518196, "learning_rate": 3.0842141291978677e-06, "loss": 0.7296, "step": 4282 }, { "epoch": 0.75, "grad_norm": 0.6484470480390797, "learning_rate": 3.080112594096745e-06, "loss": 0.68, "step": 4283 }, { "epoch": 0.75, "grad_norm": 0.6067888618189627, "learning_rate": 3.076013291495508e-06, "loss": 0.6724, "step": 4284 }, { "epoch": 0.75, "grad_norm": 0.6612958607377855, "learning_rate": 3.07191622271667e-06, "loss": 0.7005, "step": 4285 }, { "epoch": 0.75, "grad_norm": 0.6887649750516756, "learning_rate": 3.0678213890820373e-06, "loss": 0.7145, "step": 4286 }, { "epoch": 0.75, "grad_norm": 0.6474578523127409, "learning_rate": 3.0637287919126856e-06, "loss": 0.7004, "step": 4287 }, { "epoch": 0.75, "grad_norm": 0.6999147231446484, "learning_rate": 3.059638432528974e-06, "loss": 0.7498, "step": 4288 }, { "epoch": 0.75, "grad_norm": 0.6719816663028715, "learning_rate": 3.055550312250536e-06, "loss": 0.6909, "step": 4289 }, { "epoch": 0.75, "grad_norm": 0.6855829956382038, "learning_rate": 3.0514644323962895e-06, "loss": 0.6356, "step": 4290 }, { "epoch": 0.75, "grad_norm": 0.7587808216488773, "learning_rate": 3.047380794284419e-06, "loss": 0.7537, "step": 4291 }, { "epoch": 0.75, "grad_norm": 0.6433946841169893, "learning_rate": 3.04329939923239e-06, "loss": 0.6823, "step": 4292 }, { "epoch": 0.75, "grad_norm": 0.6270676789241981, "learning_rate": 3.0392202485569477e-06, "loss": 0.6845, "step": 4293 }, { "epoch": 0.75, "grad_norm": 0.8813145994784798, "learning_rate": 3.035143343574112e-06, "loss": 0.6622, "step": 4294 }, { "epoch": 0.75, "grad_norm": 0.6558310565232773, "learning_rate": 3.0310686855991766e-06, "loss": 0.6958, "step": 4295 }, { "epoch": 0.75, "grad_norm": 0.6649812120953954, "learning_rate": 3.026996275946714e-06, "loss": 0.6896, "step": 4296 }, { "epoch": 0.75, "grad_norm": 0.708075435844975, "learning_rate": 3.0229261159305634e-06, "loss": 0.6967, "step": 4297 }, { "epoch": 0.75, "grad_norm": 0.6574909602903621, "learning_rate": 3.018858206863845e-06, "loss": 0.6912, "step": 4298 }, { "epoch": 0.75, "grad_norm": 0.6918184580130557, "learning_rate": 3.0147925500589516e-06, "loss": 0.7022, "step": 4299 }, { "epoch": 0.75, "grad_norm": 0.789585489410625, "learning_rate": 3.0107291468275523e-06, "loss": 0.7696, "step": 4300 }, { "epoch": 0.75, "grad_norm": 0.6476176428778349, "learning_rate": 3.006667998480579e-06, "loss": 0.7027, "step": 4301 }, { "epoch": 0.75, "grad_norm": 0.64054403996509, "learning_rate": 3.002609106328249e-06, "loss": 0.6626, "step": 4302 }, { "epoch": 0.75, "grad_norm": 0.6894366760817895, "learning_rate": 2.998552471680041e-06, "loss": 0.7485, "step": 4303 }, { "epoch": 0.75, "grad_norm": 0.7381471617982184, "learning_rate": 2.994498095844709e-06, "loss": 0.7019, "step": 4304 }, { "epoch": 0.75, "grad_norm": 0.7387188805883755, "learning_rate": 2.990445980130283e-06, "loss": 0.6947, "step": 4305 }, { "epoch": 0.76, "grad_norm": 0.6033688937624295, "learning_rate": 2.986396125844058e-06, "loss": 0.6478, "step": 4306 }, { "epoch": 0.76, "grad_norm": 0.6757631626655024, "learning_rate": 2.982348534292605e-06, "loss": 0.691, "step": 4307 }, { "epoch": 0.76, "grad_norm": 0.6670211764171111, "learning_rate": 2.9783032067817554e-06, "loss": 0.7134, "step": 4308 }, { "epoch": 0.76, "grad_norm": 0.715834194852012, "learning_rate": 2.974260144616622e-06, "loss": 0.7252, "step": 4309 }, { "epoch": 0.76, "grad_norm": 0.7330466872390747, "learning_rate": 2.9702193491015742e-06, "loss": 0.6904, "step": 4310 }, { "epoch": 0.76, "grad_norm": 0.6072991812760155, "learning_rate": 2.966180821540261e-06, "loss": 0.656, "step": 4311 }, { "epoch": 0.76, "grad_norm": 0.737926732222092, "learning_rate": 2.962144563235595e-06, "loss": 0.7177, "step": 4312 }, { "epoch": 0.76, "grad_norm": 0.6674346894872141, "learning_rate": 2.958110575489761e-06, "loss": 0.6663, "step": 4313 }, { "epoch": 0.76, "grad_norm": 0.6606273305948861, "learning_rate": 2.9540788596042003e-06, "loss": 0.673, "step": 4314 }, { "epoch": 0.76, "grad_norm": 0.7265547461758818, "learning_rate": 2.9500494168796325e-06, "loss": 0.7347, "step": 4315 }, { "epoch": 0.76, "grad_norm": 0.7266140451102369, "learning_rate": 2.9460222486160382e-06, "loss": 0.7084, "step": 4316 }, { "epoch": 0.76, "grad_norm": 0.6899114465907923, "learning_rate": 2.941997356112668e-06, "loss": 0.7361, "step": 4317 }, { "epoch": 0.76, "grad_norm": 0.672287951270178, "learning_rate": 2.9379747406680383e-06, "loss": 0.6621, "step": 4318 }, { "epoch": 0.76, "grad_norm": 0.7292874248370427, "learning_rate": 2.9339544035799216e-06, "loss": 0.6796, "step": 4319 }, { "epoch": 0.76, "grad_norm": 0.7282930032816345, "learning_rate": 2.92993634614537e-06, "loss": 0.7579, "step": 4320 }, { "epoch": 0.76, "grad_norm": 0.6986021182590472, "learning_rate": 2.925920569660685e-06, "loss": 0.6718, "step": 4321 }, { "epoch": 0.76, "grad_norm": 0.6930828404793721, "learning_rate": 2.921907075421444e-06, "loss": 0.676, "step": 4322 }, { "epoch": 0.76, "grad_norm": 0.7249082767284523, "learning_rate": 2.917895864722483e-06, "loss": 0.7123, "step": 4323 }, { "epoch": 0.76, "grad_norm": 0.7303287779382369, "learning_rate": 2.913886938857903e-06, "loss": 0.6907, "step": 4324 }, { "epoch": 0.76, "grad_norm": 0.6010593367628515, "learning_rate": 2.9098802991210695e-06, "loss": 0.6622, "step": 4325 }, { "epoch": 0.76, "grad_norm": 0.6105043396639995, "learning_rate": 2.9058759468046027e-06, "loss": 0.686, "step": 4326 }, { "epoch": 0.76, "grad_norm": 0.6450370925688146, "learning_rate": 2.9018738832003923e-06, "loss": 0.7034, "step": 4327 }, { "epoch": 0.76, "grad_norm": 0.6734581410298869, "learning_rate": 2.89787410959959e-06, "loss": 0.7152, "step": 4328 }, { "epoch": 0.76, "grad_norm": 0.6579232616134495, "learning_rate": 2.893876627292601e-06, "loss": 0.6937, "step": 4329 }, { "epoch": 0.76, "grad_norm": 0.6733451035268354, "learning_rate": 2.8898814375691e-06, "loss": 0.661, "step": 4330 }, { "epoch": 0.76, "grad_norm": 0.6917105877193951, "learning_rate": 2.8858885417180193e-06, "loss": 0.708, "step": 4331 }, { "epoch": 0.76, "grad_norm": 0.7118781516720698, "learning_rate": 2.881897941027546e-06, "loss": 0.6785, "step": 4332 }, { "epoch": 0.76, "grad_norm": 0.6852285007395738, "learning_rate": 2.8779096367851346e-06, "loss": 0.6749, "step": 4333 }, { "epoch": 0.76, "grad_norm": 0.7022562760721243, "learning_rate": 2.873923630277493e-06, "loss": 0.7241, "step": 4334 }, { "epoch": 0.76, "grad_norm": 0.7131137434976613, "learning_rate": 2.869939922790592e-06, "loss": 0.7104, "step": 4335 }, { "epoch": 0.76, "grad_norm": 0.6948315455084594, "learning_rate": 2.86595851560966e-06, "loss": 0.7233, "step": 4336 }, { "epoch": 0.76, "grad_norm": 0.7607019607764423, "learning_rate": 2.861979410019181e-06, "loss": 0.7528, "step": 4337 }, { "epoch": 0.76, "grad_norm": 0.7148931791604591, "learning_rate": 2.8580026073028933e-06, "loss": 0.676, "step": 4338 }, { "epoch": 0.76, "grad_norm": 0.6629739622866826, "learning_rate": 2.8540281087437983e-06, "loss": 0.6669, "step": 4339 }, { "epoch": 0.76, "grad_norm": 0.7136569158717904, "learning_rate": 2.8500559156241526e-06, "loss": 0.7126, "step": 4340 }, { "epoch": 0.76, "grad_norm": 0.6458336025722355, "learning_rate": 2.8460860292254698e-06, "loss": 0.7061, "step": 4341 }, { "epoch": 0.76, "grad_norm": 0.6853250752426268, "learning_rate": 2.842118450828516e-06, "loss": 0.6748, "step": 4342 }, { "epoch": 0.76, "grad_norm": 0.7058815749867753, "learning_rate": 2.83815318171332e-06, "loss": 0.7375, "step": 4343 }, { "epoch": 0.76, "grad_norm": 0.6492147780425177, "learning_rate": 2.834190223159152e-06, "loss": 0.6961, "step": 4344 }, { "epoch": 0.76, "grad_norm": 0.6405922746431567, "learning_rate": 2.8302295764445485e-06, "loss": 0.7008, "step": 4345 }, { "epoch": 0.76, "grad_norm": 0.6896270887498933, "learning_rate": 2.8262712428473015e-06, "loss": 0.6876, "step": 4346 }, { "epoch": 0.76, "grad_norm": 0.7213201180843737, "learning_rate": 2.822315223644444e-06, "loss": 0.7157, "step": 4347 }, { "epoch": 0.76, "grad_norm": 0.6460560135425492, "learning_rate": 2.818361520112274e-06, "loss": 0.7235, "step": 4348 }, { "epoch": 0.76, "grad_norm": 0.6693257136616763, "learning_rate": 2.8144101335263407e-06, "loss": 0.6803, "step": 4349 }, { "epoch": 0.76, "grad_norm": 0.7336453893625734, "learning_rate": 2.8104610651614383e-06, "loss": 0.7465, "step": 4350 }, { "epoch": 0.76, "grad_norm": 0.7219143363332473, "learning_rate": 2.8065143162916233e-06, "loss": 0.742, "step": 4351 }, { "epoch": 0.76, "grad_norm": 0.6771082084017352, "learning_rate": 2.8025698881901963e-06, "loss": 0.6922, "step": 4352 }, { "epoch": 0.76, "grad_norm": 0.6174617309361377, "learning_rate": 2.7986277821297137e-06, "loss": 0.6954, "step": 4353 }, { "epoch": 0.76, "grad_norm": 0.6715319636424123, "learning_rate": 2.794687999381983e-06, "loss": 0.7107, "step": 4354 }, { "epoch": 0.76, "grad_norm": 0.6399644824637609, "learning_rate": 2.790750541218058e-06, "loss": 0.707, "step": 4355 }, { "epoch": 0.76, "grad_norm": 0.6544944124594748, "learning_rate": 2.7868154089082422e-06, "loss": 0.6777, "step": 4356 }, { "epoch": 0.76, "grad_norm": 0.608157979838302, "learning_rate": 2.7828826037220936e-06, "loss": 0.667, "step": 4357 }, { "epoch": 0.76, "grad_norm": 0.7175636863577015, "learning_rate": 2.778952126928417e-06, "loss": 0.6718, "step": 4358 }, { "epoch": 0.76, "grad_norm": 0.6964629710264357, "learning_rate": 2.775023979795266e-06, "loss": 0.721, "step": 4359 }, { "epoch": 0.76, "grad_norm": 0.6718009172853071, "learning_rate": 2.771098163589946e-06, "loss": 0.685, "step": 4360 }, { "epoch": 0.76, "grad_norm": 0.7860066823141806, "learning_rate": 2.767174679579002e-06, "loss": 0.7328, "step": 4361 }, { "epoch": 0.76, "grad_norm": 0.6558481617236268, "learning_rate": 2.7632535290282335e-06, "loss": 0.6687, "step": 4362 }, { "epoch": 0.77, "grad_norm": 0.7758244942395631, "learning_rate": 2.759334713202686e-06, "loss": 0.7654, "step": 4363 }, { "epoch": 0.77, "grad_norm": 0.5898269362682842, "learning_rate": 2.7554182333666533e-06, "loss": 0.651, "step": 4364 }, { "epoch": 0.77, "grad_norm": 0.5979191283319093, "learning_rate": 2.7515040907836677e-06, "loss": 0.6659, "step": 4365 }, { "epoch": 0.77, "grad_norm": 0.7990558135098621, "learning_rate": 2.7475922867165208e-06, "loss": 0.7131, "step": 4366 }, { "epoch": 0.77, "grad_norm": 0.6576632165112233, "learning_rate": 2.743682822427234e-06, "loss": 0.687, "step": 4367 }, { "epoch": 0.77, "grad_norm": 0.6238787874410033, "learning_rate": 2.7397756991770863e-06, "loss": 0.6417, "step": 4368 }, { "epoch": 0.77, "grad_norm": 0.6321519388229497, "learning_rate": 2.7358709182265954e-06, "loss": 0.6951, "step": 4369 }, { "epoch": 0.77, "grad_norm": 0.8137579098540422, "learning_rate": 2.7319684808355276e-06, "loss": 0.7507, "step": 4370 }, { "epoch": 0.77, "grad_norm": 0.6445083631366049, "learning_rate": 2.728068388262888e-06, "loss": 0.7396, "step": 4371 }, { "epoch": 0.77, "grad_norm": 0.6710396670149641, "learning_rate": 2.7241706417669322e-06, "loss": 0.688, "step": 4372 }, { "epoch": 0.77, "grad_norm": 0.6678017583435523, "learning_rate": 2.7202752426051506e-06, "loss": 0.6706, "step": 4373 }, { "epoch": 0.77, "grad_norm": 0.7175965921332618, "learning_rate": 2.7163821920342783e-06, "loss": 0.7024, "step": 4374 }, { "epoch": 0.77, "grad_norm": 0.7490131218872977, "learning_rate": 2.712491491310296e-06, "loss": 0.6852, "step": 4375 }, { "epoch": 0.77, "grad_norm": 0.6496132373206126, "learning_rate": 2.708603141688426e-06, "loss": 0.6622, "step": 4376 }, { "epoch": 0.77, "grad_norm": 0.7379769959503508, "learning_rate": 2.704717144423131e-06, "loss": 0.7348, "step": 4377 }, { "epoch": 0.77, "grad_norm": 0.7298423277495225, "learning_rate": 2.7008335007681174e-06, "loss": 0.7235, "step": 4378 }, { "epoch": 0.77, "grad_norm": 0.7148703020042013, "learning_rate": 2.6969522119763238e-06, "loss": 0.7302, "step": 4379 }, { "epoch": 0.77, "grad_norm": 0.7353012012883668, "learning_rate": 2.693073279299936e-06, "loss": 0.7378, "step": 4380 }, { "epoch": 0.77, "grad_norm": 0.7213671072948346, "learning_rate": 2.689196703990381e-06, "loss": 0.6995, "step": 4381 }, { "epoch": 0.77, "grad_norm": 0.6586157604737909, "learning_rate": 2.6853224872983253e-06, "loss": 0.6982, "step": 4382 }, { "epoch": 0.77, "grad_norm": 0.6554502916777821, "learning_rate": 2.681450630473664e-06, "loss": 0.7223, "step": 4383 }, { "epoch": 0.77, "grad_norm": 0.5917511915638496, "learning_rate": 2.677581134765548e-06, "loss": 0.6488, "step": 4384 }, { "epoch": 0.77, "grad_norm": 0.7132177638442664, "learning_rate": 2.673714001422348e-06, "loss": 0.7301, "step": 4385 }, { "epoch": 0.77, "grad_norm": 0.6752637599051652, "learning_rate": 2.669849231691687e-06, "loss": 0.6898, "step": 4386 }, { "epoch": 0.77, "grad_norm": 0.7069024161068582, "learning_rate": 2.6659868268204203e-06, "loss": 0.7209, "step": 4387 }, { "epoch": 0.77, "grad_norm": 0.6038026094588275, "learning_rate": 2.6621267880546388e-06, "loss": 0.6706, "step": 4388 }, { "epoch": 0.77, "grad_norm": 0.7722344853293152, "learning_rate": 2.6582691166396744e-06, "loss": 0.702, "step": 4389 }, { "epoch": 0.77, "grad_norm": 0.6608106654045633, "learning_rate": 2.654413813820088e-06, "loss": 0.7047, "step": 4390 }, { "epoch": 0.77, "grad_norm": 0.6405066446491424, "learning_rate": 2.650560880839682e-06, "loss": 0.6704, "step": 4391 }, { "epoch": 0.77, "grad_norm": 0.6400782605256468, "learning_rate": 2.6467103189414966e-06, "loss": 0.7114, "step": 4392 }, { "epoch": 0.77, "grad_norm": 0.6801768893843201, "learning_rate": 2.642862129367798e-06, "loss": 0.6942, "step": 4393 }, { "epoch": 0.77, "grad_norm": 0.716169199088323, "learning_rate": 2.639016313360093e-06, "loss": 0.7311, "step": 4394 }, { "epoch": 0.77, "grad_norm": 0.7366237856782029, "learning_rate": 2.6351728721591287e-06, "loss": 0.7401, "step": 4395 }, { "epoch": 0.77, "grad_norm": 0.632173332062806, "learning_rate": 2.63133180700487e-06, "loss": 0.6834, "step": 4396 }, { "epoch": 0.77, "grad_norm": 0.6686128681847957, "learning_rate": 2.627493119136528e-06, "loss": 0.6848, "step": 4397 }, { "epoch": 0.77, "grad_norm": 0.7375096351292418, "learning_rate": 2.6236568097925453e-06, "loss": 0.7035, "step": 4398 }, { "epoch": 0.77, "grad_norm": 0.7944011624427639, "learning_rate": 2.619822880210594e-06, "loss": 0.7809, "step": 4399 }, { "epoch": 0.77, "grad_norm": 0.6239562316391867, "learning_rate": 2.6159913316275786e-06, "loss": 0.6563, "step": 4400 }, { "epoch": 0.77, "grad_norm": 0.6741763005204546, "learning_rate": 2.6121621652796425e-06, "loss": 0.738, "step": 4401 }, { "epoch": 0.77, "grad_norm": 0.7548518584586089, "learning_rate": 2.6083353824021483e-06, "loss": 0.6809, "step": 4402 }, { "epoch": 0.77, "grad_norm": 0.6991761436446731, "learning_rate": 2.6045109842296944e-06, "loss": 0.6983, "step": 4403 }, { "epoch": 0.77, "grad_norm": 0.7331658441553062, "learning_rate": 2.600688971996115e-06, "loss": 0.7643, "step": 4404 }, { "epoch": 0.77, "grad_norm": 0.6683362521746651, "learning_rate": 2.596869346934471e-06, "loss": 0.7054, "step": 4405 }, { "epoch": 0.77, "grad_norm": 0.7563992612207664, "learning_rate": 2.59305211027705e-06, "loss": 0.7743, "step": 4406 }, { "epoch": 0.77, "grad_norm": 0.6805459187427193, "learning_rate": 2.58923726325538e-06, "loss": 0.679, "step": 4407 }, { "epoch": 0.77, "grad_norm": 0.5945768578695007, "learning_rate": 2.5854248071002e-06, "loss": 0.6922, "step": 4408 }, { "epoch": 0.77, "grad_norm": 0.6964511060895735, "learning_rate": 2.5816147430414917e-06, "loss": 0.7519, "step": 4409 }, { "epoch": 0.77, "grad_norm": 0.6753282321515117, "learning_rate": 2.5778070723084656e-06, "loss": 0.6948, "step": 4410 }, { "epoch": 0.77, "grad_norm": 0.7394107817051443, "learning_rate": 2.574001796129548e-06, "loss": 0.7001, "step": 4411 }, { "epoch": 0.77, "grad_norm": 0.6905837365216658, "learning_rate": 2.5701989157324048e-06, "loss": 0.7525, "step": 4412 }, { "epoch": 0.77, "grad_norm": 0.6327734335801021, "learning_rate": 2.566398432343927e-06, "loss": 0.6802, "step": 4413 }, { "epoch": 0.77, "grad_norm": 0.7164451820696967, "learning_rate": 2.5626003471902228e-06, "loss": 0.7099, "step": 4414 }, { "epoch": 0.77, "grad_norm": 0.6516081399484215, "learning_rate": 2.5588046614966377e-06, "loss": 0.6683, "step": 4415 }, { "epoch": 0.77, "grad_norm": 0.6618774558230182, "learning_rate": 2.5550113764877392e-06, "loss": 0.6568, "step": 4416 }, { "epoch": 0.77, "grad_norm": 0.656293047102398, "learning_rate": 2.5512204933873185e-06, "loss": 0.6903, "step": 4417 }, { "epoch": 0.77, "grad_norm": 0.6657005833008155, "learning_rate": 2.547432013418398e-06, "loss": 0.7488, "step": 4418 }, { "epoch": 0.77, "grad_norm": 0.6222784238799268, "learning_rate": 2.5436459378032165e-06, "loss": 0.6946, "step": 4419 }, { "epoch": 0.78, "grad_norm": 0.7302355601301218, "learning_rate": 2.5398622677632377e-06, "loss": 0.7616, "step": 4420 }, { "epoch": 0.78, "grad_norm": 0.6797555188854594, "learning_rate": 2.5360810045191574e-06, "loss": 0.7164, "step": 4421 }, { "epoch": 0.78, "grad_norm": 0.676003291630378, "learning_rate": 2.532302149290887e-06, "loss": 0.7436, "step": 4422 }, { "epoch": 0.78, "grad_norm": 0.6968607012590929, "learning_rate": 2.5285257032975662e-06, "loss": 0.6594, "step": 4423 }, { "epoch": 0.78, "grad_norm": 0.6369475091147434, "learning_rate": 2.5247516677575533e-06, "loss": 0.638, "step": 4424 }, { "epoch": 0.78, "grad_norm": 0.6995114377684288, "learning_rate": 2.5209800438884357e-06, "loss": 0.7201, "step": 4425 }, { "epoch": 0.78, "grad_norm": 0.6813982158025857, "learning_rate": 2.517210832907011e-06, "loss": 0.7043, "step": 4426 }, { "epoch": 0.78, "grad_norm": 0.6545210552855596, "learning_rate": 2.5134440360293078e-06, "loss": 0.6913, "step": 4427 }, { "epoch": 0.78, "grad_norm": 0.7436630392256248, "learning_rate": 2.509679654470577e-06, "loss": 0.7748, "step": 4428 }, { "epoch": 0.78, "grad_norm": 0.6604767388264492, "learning_rate": 2.50591768944528e-06, "loss": 0.6914, "step": 4429 }, { "epoch": 0.78, "grad_norm": 0.736097418569258, "learning_rate": 2.502158142167108e-06, "loss": 0.7214, "step": 4430 }, { "epoch": 0.78, "grad_norm": 0.6456827815048567, "learning_rate": 2.4984010138489733e-06, "loss": 0.6678, "step": 4431 }, { "epoch": 0.78, "grad_norm": 0.7510351682698043, "learning_rate": 2.4946463057029966e-06, "loss": 0.7583, "step": 4432 }, { "epoch": 0.78, "grad_norm": 0.7014130749724792, "learning_rate": 2.4908940189405296e-06, "loss": 0.6985, "step": 4433 }, { "epoch": 0.78, "grad_norm": 0.6816463288387188, "learning_rate": 2.4871441547721363e-06, "loss": 0.6558, "step": 4434 }, { "epoch": 0.78, "grad_norm": 0.7152300795226034, "learning_rate": 2.483396714407601e-06, "loss": 0.6908, "step": 4435 }, { "epoch": 0.78, "grad_norm": 0.6187124648432665, "learning_rate": 2.4796516990559317e-06, "loss": 0.6715, "step": 4436 }, { "epoch": 0.78, "grad_norm": 0.7020206228636318, "learning_rate": 2.4759091099253418e-06, "loss": 0.7142, "step": 4437 }, { "epoch": 0.78, "grad_norm": 0.6919830588662177, "learning_rate": 2.472168948223268e-06, "loss": 0.6793, "step": 4438 }, { "epoch": 0.78, "grad_norm": 0.691306287417338, "learning_rate": 2.468431215156367e-06, "loss": 0.7176, "step": 4439 }, { "epoch": 0.78, "grad_norm": 0.6365732479946181, "learning_rate": 2.4646959119305093e-06, "loss": 0.694, "step": 4440 }, { "epoch": 0.78, "grad_norm": 0.7758283504488048, "learning_rate": 2.4609630397507813e-06, "loss": 0.811, "step": 4441 }, { "epoch": 0.78, "grad_norm": 0.6662516509655402, "learning_rate": 2.4572325998214887e-06, "loss": 0.7135, "step": 4442 }, { "epoch": 0.78, "grad_norm": 0.6454143704141313, "learning_rate": 2.453504593346142e-06, "loss": 0.6892, "step": 4443 }, { "epoch": 0.78, "grad_norm": 0.7079257358219276, "learning_rate": 2.4497790215274773e-06, "loss": 0.7163, "step": 4444 }, { "epoch": 0.78, "grad_norm": 0.6246048771929358, "learning_rate": 2.446055885567443e-06, "loss": 0.6854, "step": 4445 }, { "epoch": 0.78, "grad_norm": 0.6681769479083386, "learning_rate": 2.442335186667203e-06, "loss": 0.6504, "step": 4446 }, { "epoch": 0.78, "grad_norm": 0.630488453316136, "learning_rate": 2.4386169260271254e-06, "loss": 0.673, "step": 4447 }, { "epoch": 0.78, "grad_norm": 0.6659534401529311, "learning_rate": 2.434901104846804e-06, "loss": 0.7162, "step": 4448 }, { "epoch": 0.78, "grad_norm": 0.7026836680956687, "learning_rate": 2.431187724325036e-06, "loss": 0.7038, "step": 4449 }, { "epoch": 0.78, "grad_norm": 0.6807466759116871, "learning_rate": 2.427476785659837e-06, "loss": 0.6831, "step": 4450 }, { "epoch": 0.78, "grad_norm": 0.7190996853458741, "learning_rate": 2.4237682900484337e-06, "loss": 0.6675, "step": 4451 }, { "epoch": 0.78, "grad_norm": 0.772345749949045, "learning_rate": 2.4200622386872643e-06, "loss": 0.7641, "step": 4452 }, { "epoch": 0.78, "grad_norm": 0.7327205085633297, "learning_rate": 2.4163586327719766e-06, "loss": 0.6967, "step": 4453 }, { "epoch": 0.78, "grad_norm": 0.7258374321664719, "learning_rate": 2.412657473497436e-06, "loss": 0.7132, "step": 4454 }, { "epoch": 0.78, "grad_norm": 0.9191228893655496, "learning_rate": 2.408958762057707e-06, "loss": 0.8261, "step": 4455 }, { "epoch": 0.78, "grad_norm": 0.8618411798517549, "learning_rate": 2.405262499646077e-06, "loss": 0.7316, "step": 4456 }, { "epoch": 0.78, "grad_norm": 0.700007448189415, "learning_rate": 2.4015686874550304e-06, "loss": 0.7425, "step": 4457 }, { "epoch": 0.78, "grad_norm": 0.6508492105828403, "learning_rate": 2.3978773266762724e-06, "loss": 0.6591, "step": 4458 }, { "epoch": 0.78, "grad_norm": 0.6651944056060757, "learning_rate": 2.3941884185007125e-06, "loss": 0.6977, "step": 4459 }, { "epoch": 0.78, "grad_norm": 0.715955328494538, "learning_rate": 2.3905019641184713e-06, "loss": 0.7089, "step": 4460 }, { "epoch": 0.78, "grad_norm": 0.7175455341859202, "learning_rate": 2.3868179647188715e-06, "loss": 0.6778, "step": 4461 }, { "epoch": 0.78, "grad_norm": 0.7157821633491877, "learning_rate": 2.3831364214904494e-06, "loss": 0.6905, "step": 4462 }, { "epoch": 0.78, "grad_norm": 0.6698301155215018, "learning_rate": 2.379457335620948e-06, "loss": 0.7668, "step": 4463 }, { "epoch": 0.78, "grad_norm": 0.645481899728501, "learning_rate": 2.3757807082973184e-06, "loss": 0.7229, "step": 4464 }, { "epoch": 0.78, "grad_norm": 0.7600592003518846, "learning_rate": 2.3721065407057188e-06, "loss": 0.6949, "step": 4465 }, { "epoch": 0.78, "grad_norm": 0.6937986217856367, "learning_rate": 2.36843483403151e-06, "loss": 0.7055, "step": 4466 }, { "epoch": 0.78, "grad_norm": 0.7037757873797891, "learning_rate": 2.364765589459258e-06, "loss": 0.7102, "step": 4467 }, { "epoch": 0.78, "grad_norm": 0.7298276412475131, "learning_rate": 2.3610988081727393e-06, "loss": 0.7493, "step": 4468 }, { "epoch": 0.78, "grad_norm": 0.6779937018184314, "learning_rate": 2.357434491354935e-06, "loss": 0.6937, "step": 4469 }, { "epoch": 0.78, "grad_norm": 0.6547966344007566, "learning_rate": 2.3537726401880312e-06, "loss": 0.7038, "step": 4470 }, { "epoch": 0.78, "grad_norm": 0.6540165452737318, "learning_rate": 2.350113255853418e-06, "loss": 0.7252, "step": 4471 }, { "epoch": 0.78, "grad_norm": 0.6692445813554894, "learning_rate": 2.346456339531685e-06, "loss": 0.6936, "step": 4472 }, { "epoch": 0.78, "grad_norm": 0.5953100005433263, "learning_rate": 2.342801892402632e-06, "loss": 0.6838, "step": 4473 }, { "epoch": 0.78, "grad_norm": 0.678392532831892, "learning_rate": 2.3391499156452626e-06, "loss": 0.6973, "step": 4474 }, { "epoch": 0.78, "grad_norm": 0.6097450341114111, "learning_rate": 2.3355004104377732e-06, "loss": 0.6345, "step": 4475 }, { "epoch": 0.78, "grad_norm": 0.7428681706964594, "learning_rate": 2.331853377957577e-06, "loss": 0.7513, "step": 4476 }, { "epoch": 0.79, "grad_norm": 0.6596599435156287, "learning_rate": 2.328208819381281e-06, "loss": 0.6899, "step": 4477 }, { "epoch": 0.79, "grad_norm": 0.7495897767257591, "learning_rate": 2.3245667358846944e-06, "loss": 0.7075, "step": 4478 }, { "epoch": 0.79, "grad_norm": 0.6814968300630969, "learning_rate": 2.3209271286428292e-06, "loss": 0.6805, "step": 4479 }, { "epoch": 0.79, "grad_norm": 0.7042431098650255, "learning_rate": 2.3172899988298993e-06, "loss": 0.7032, "step": 4480 }, { "epoch": 0.79, "grad_norm": 0.7152834162882614, "learning_rate": 2.313655347619318e-06, "loss": 0.7089, "step": 4481 }, { "epoch": 0.79, "grad_norm": 0.6892350299778682, "learning_rate": 2.3100231761837012e-06, "loss": 0.727, "step": 4482 }, { "epoch": 0.79, "grad_norm": 0.671451714062499, "learning_rate": 2.3063934856948644e-06, "loss": 0.6771, "step": 4483 }, { "epoch": 0.79, "grad_norm": 0.7102701839709636, "learning_rate": 2.302766277323819e-06, "loss": 0.7014, "step": 4484 }, { "epoch": 0.79, "grad_norm": 0.6331408639605367, "learning_rate": 2.2991415522407743e-06, "loss": 0.7048, "step": 4485 }, { "epoch": 0.79, "grad_norm": 0.6557516149501416, "learning_rate": 2.2955193116151477e-06, "loss": 0.6758, "step": 4486 }, { "epoch": 0.79, "grad_norm": 0.6335297075567131, "learning_rate": 2.2918995566155466e-06, "loss": 0.7086, "step": 4487 }, { "epoch": 0.79, "grad_norm": 0.7169639820981214, "learning_rate": 2.28828228840978e-06, "loss": 0.7281, "step": 4488 }, { "epoch": 0.79, "grad_norm": 0.7124041267050691, "learning_rate": 2.2846675081648582e-06, "loss": 0.6883, "step": 4489 }, { "epoch": 0.79, "grad_norm": 0.634135075995977, "learning_rate": 2.281055217046978e-06, "loss": 0.6978, "step": 4490 }, { "epoch": 0.79, "grad_norm": 0.6787689596836228, "learning_rate": 2.277445416221542e-06, "loss": 0.7049, "step": 4491 }, { "epoch": 0.79, "grad_norm": 0.8854846907780783, "learning_rate": 2.273838106853151e-06, "loss": 0.8259, "step": 4492 }, { "epoch": 0.79, "grad_norm": 0.60698044967814, "learning_rate": 2.2702332901055925e-06, "loss": 0.6681, "step": 4493 }, { "epoch": 0.79, "grad_norm": 0.6739138474065548, "learning_rate": 2.2666309671418584e-06, "loss": 0.6529, "step": 4494 }, { "epoch": 0.79, "grad_norm": 0.7568486363923367, "learning_rate": 2.2630311391241354e-06, "loss": 0.7214, "step": 4495 }, { "epoch": 0.79, "grad_norm": 0.6543353225718438, "learning_rate": 2.259433807213797e-06, "loss": 0.6903, "step": 4496 }, { "epoch": 0.79, "grad_norm": 0.6674657403390726, "learning_rate": 2.2558389725714215e-06, "loss": 0.6798, "step": 4497 }, { "epoch": 0.79, "grad_norm": 0.7688137043601442, "learning_rate": 2.2522466363567775e-06, "loss": 0.695, "step": 4498 }, { "epoch": 0.79, "grad_norm": 0.6216515645252135, "learning_rate": 2.2486567997288255e-06, "loss": 0.7012, "step": 4499 }, { "epoch": 0.79, "grad_norm": 0.6645684548540303, "learning_rate": 2.245069463845727e-06, "loss": 0.6658, "step": 4500 }, { "epoch": 0.79, "grad_norm": 0.6250391395130309, "learning_rate": 2.2414846298648262e-06, "loss": 0.7013, "step": 4501 }, { "epoch": 0.79, "grad_norm": 0.6814781903124701, "learning_rate": 2.2379022989426645e-06, "loss": 0.6917, "step": 4502 }, { "epoch": 0.79, "grad_norm": 0.7187479777578677, "learning_rate": 2.2343224722349777e-06, "loss": 0.7165, "step": 4503 }, { "epoch": 0.79, "grad_norm": 0.6541262426170698, "learning_rate": 2.230745150896694e-06, "loss": 0.6494, "step": 4504 }, { "epoch": 0.79, "grad_norm": 0.6198796222088301, "learning_rate": 2.2271703360819297e-06, "loss": 0.6575, "step": 4505 }, { "epoch": 0.79, "grad_norm": 0.7428950591611629, "learning_rate": 2.2235980289439994e-06, "loss": 0.7307, "step": 4506 }, { "epoch": 0.79, "grad_norm": 0.6515232234538271, "learning_rate": 2.220028230635398e-06, "loss": 0.67, "step": 4507 }, { "epoch": 0.79, "grad_norm": 0.6983443578688461, "learning_rate": 2.216460942307819e-06, "loss": 0.7135, "step": 4508 }, { "epoch": 0.79, "grad_norm": 0.6878743266097868, "learning_rate": 2.212896165112143e-06, "loss": 0.7313, "step": 4509 }, { "epoch": 0.79, "grad_norm": 0.7030740072655034, "learning_rate": 2.2093339001984457e-06, "loss": 0.6961, "step": 4510 }, { "epoch": 0.79, "grad_norm": 0.6886688349443971, "learning_rate": 2.205774148715982e-06, "loss": 0.7001, "step": 4511 }, { "epoch": 0.79, "grad_norm": 0.6616233429551137, "learning_rate": 2.2022169118132053e-06, "loss": 0.7029, "step": 4512 }, { "epoch": 0.79, "grad_norm": 0.6385290566318708, "learning_rate": 2.1986621906377557e-06, "loss": 0.6558, "step": 4513 }, { "epoch": 0.79, "grad_norm": 0.6310737953109874, "learning_rate": 2.1951099863364567e-06, "loss": 0.7218, "step": 4514 }, { "epoch": 0.79, "grad_norm": 0.7135552564058322, "learning_rate": 2.1915603000553243e-06, "loss": 0.6777, "step": 4515 }, { "epoch": 0.79, "grad_norm": 0.6251178974862999, "learning_rate": 2.1880131329395614e-06, "loss": 0.6534, "step": 4516 }, { "epoch": 0.79, "grad_norm": 0.6879250634536112, "learning_rate": 2.1844684861335596e-06, "loss": 0.7059, "step": 4517 }, { "epoch": 0.79, "grad_norm": 0.7163623936642638, "learning_rate": 2.180926360780897e-06, "loss": 0.7386, "step": 4518 }, { "epoch": 0.79, "grad_norm": 0.750987964471114, "learning_rate": 2.1773867580243337e-06, "loss": 0.7243, "step": 4519 }, { "epoch": 0.79, "grad_norm": 0.611169342153591, "learning_rate": 2.173849679005822e-06, "loss": 0.6633, "step": 4520 }, { "epoch": 0.79, "grad_norm": 0.711980383788163, "learning_rate": 2.170315124866493e-06, "loss": 0.6877, "step": 4521 }, { "epoch": 0.79, "grad_norm": 1.1257377400197228, "learning_rate": 2.166783096746671e-06, "loss": 0.691, "step": 4522 }, { "epoch": 0.79, "grad_norm": 0.728343640574522, "learning_rate": 2.1632535957858615e-06, "loss": 0.6996, "step": 4523 }, { "epoch": 0.79, "grad_norm": 0.6743381252088636, "learning_rate": 2.1597266231227566e-06, "loss": 0.679, "step": 4524 }, { "epoch": 0.79, "grad_norm": 0.6573893955744676, "learning_rate": 2.156202179895227e-06, "loss": 0.6776, "step": 4525 }, { "epoch": 0.79, "grad_norm": 0.7089358484852818, "learning_rate": 2.1526802672403344e-06, "loss": 0.7109, "step": 4526 }, { "epoch": 0.79, "grad_norm": 0.6918295556047012, "learning_rate": 2.14916088629432e-06, "loss": 0.7546, "step": 4527 }, { "epoch": 0.79, "grad_norm": 0.7036921028332425, "learning_rate": 2.145644038192611e-06, "loss": 0.6777, "step": 4528 }, { "epoch": 0.79, "grad_norm": 0.7188076018922774, "learning_rate": 2.142129724069818e-06, "loss": 0.6677, "step": 4529 }, { "epoch": 0.79, "grad_norm": 0.672577050364119, "learning_rate": 2.138617945059729e-06, "loss": 0.6765, "step": 4530 }, { "epoch": 0.79, "grad_norm": 0.662145076992799, "learning_rate": 2.1351087022953143e-06, "loss": 0.6955, "step": 4531 }, { "epoch": 0.79, "grad_norm": 0.6809039898516186, "learning_rate": 2.131601996908731e-06, "loss": 0.7076, "step": 4532 }, { "epoch": 0.79, "grad_norm": 0.6818274003460432, "learning_rate": 2.128097830031317e-06, "loss": 0.7282, "step": 4533 }, { "epoch": 0.8, "grad_norm": 0.6084531516030981, "learning_rate": 2.1245962027935872e-06, "loss": 0.6855, "step": 4534 }, { "epoch": 0.8, "grad_norm": 0.7215449136074293, "learning_rate": 2.121097116325241e-06, "loss": 0.709, "step": 4535 }, { "epoch": 0.8, "grad_norm": 0.6653176932344355, "learning_rate": 2.1176005717551607e-06, "loss": 0.7144, "step": 4536 }, { "epoch": 0.8, "grad_norm": 0.6661954164066004, "learning_rate": 2.1141065702113963e-06, "loss": 0.712, "step": 4537 }, { "epoch": 0.8, "grad_norm": 0.6889900553762947, "learning_rate": 2.1106151128211916e-06, "loss": 0.6961, "step": 4538 }, { "epoch": 0.8, "grad_norm": 0.633811772330852, "learning_rate": 2.1071262007109595e-06, "loss": 0.6855, "step": 4539 }, { "epoch": 0.8, "grad_norm": 0.62787513770302, "learning_rate": 2.103639835006297e-06, "loss": 0.6494, "step": 4540 }, { "epoch": 0.8, "grad_norm": 0.6571966748125566, "learning_rate": 2.100156016831979e-06, "loss": 0.6892, "step": 4541 }, { "epoch": 0.8, "grad_norm": 0.6802057777876734, "learning_rate": 2.0966747473119607e-06, "loss": 0.6553, "step": 4542 }, { "epoch": 0.8, "grad_norm": 0.6437957627291225, "learning_rate": 2.0931960275693665e-06, "loss": 0.6912, "step": 4543 }, { "epoch": 0.8, "grad_norm": 0.7334563212058297, "learning_rate": 2.0897198587265064e-06, "loss": 0.7517, "step": 4544 }, { "epoch": 0.8, "grad_norm": 0.6609160359353113, "learning_rate": 2.0862462419048658e-06, "loss": 0.7029, "step": 4545 }, { "epoch": 0.8, "grad_norm": 0.6767481549071976, "learning_rate": 2.082775178225105e-06, "loss": 0.7093, "step": 4546 }, { "epoch": 0.8, "grad_norm": 0.7255890519822781, "learning_rate": 2.079306668807063e-06, "loss": 0.6508, "step": 4547 }, { "epoch": 0.8, "grad_norm": 0.7337351071913467, "learning_rate": 2.0758407147697537e-06, "loss": 0.6885, "step": 4548 }, { "epoch": 0.8, "grad_norm": 0.6952191924927043, "learning_rate": 2.07237731723136e-06, "loss": 0.6978, "step": 4549 }, { "epoch": 0.8, "grad_norm": 0.6771684522707879, "learning_rate": 2.068916477309252e-06, "loss": 0.6688, "step": 4550 }, { "epoch": 0.8, "grad_norm": 0.6493665625747481, "learning_rate": 2.065458196119966e-06, "loss": 0.6859, "step": 4551 }, { "epoch": 0.8, "grad_norm": 0.7205124810908596, "learning_rate": 2.0620024747792167e-06, "loss": 0.7468, "step": 4552 }, { "epoch": 0.8, "grad_norm": 0.6681690249215563, "learning_rate": 2.0585493144018953e-06, "loss": 0.675, "step": 4553 }, { "epoch": 0.8, "grad_norm": 0.6412073150289124, "learning_rate": 2.0550987161020573e-06, "loss": 0.6918, "step": 4554 }, { "epoch": 0.8, "grad_norm": 0.6885496903777935, "learning_rate": 2.0516506809929395e-06, "loss": 0.652, "step": 4555 }, { "epoch": 0.8, "grad_norm": 0.6904744872755174, "learning_rate": 2.048205210186953e-06, "loss": 0.7167, "step": 4556 }, { "epoch": 0.8, "grad_norm": 0.7569442392695672, "learning_rate": 2.0447623047956743e-06, "loss": 0.7832, "step": 4557 }, { "epoch": 0.8, "grad_norm": 0.6281853626667768, "learning_rate": 2.0413219659298577e-06, "loss": 0.6619, "step": 4558 }, { "epoch": 0.8, "grad_norm": 0.6530263127521929, "learning_rate": 2.037884194699432e-06, "loss": 0.7049, "step": 4559 }, { "epoch": 0.8, "grad_norm": 0.7174470480044012, "learning_rate": 2.034448992213488e-06, "loss": 0.7191, "step": 4560 }, { "epoch": 0.8, "grad_norm": 0.6680984822075945, "learning_rate": 2.0310163595802958e-06, "loss": 0.689, "step": 4561 }, { "epoch": 0.8, "grad_norm": 0.721289422360614, "learning_rate": 2.0275862979072934e-06, "loss": 0.7399, "step": 4562 }, { "epoch": 0.8, "grad_norm": 0.6139583827196448, "learning_rate": 2.0241588083010932e-06, "loss": 0.6667, "step": 4563 }, { "epoch": 0.8, "grad_norm": 0.6654518855227894, "learning_rate": 2.0207338918674712e-06, "loss": 0.6653, "step": 4564 }, { "epoch": 0.8, "grad_norm": 0.6536664736840289, "learning_rate": 2.0173115497113827e-06, "loss": 0.6899, "step": 4565 }, { "epoch": 0.8, "grad_norm": 0.6131094765710674, "learning_rate": 2.013891782936942e-06, "loss": 0.704, "step": 4566 }, { "epoch": 0.8, "grad_norm": 0.6423798252481222, "learning_rate": 2.0104745926474345e-06, "loss": 0.6528, "step": 4567 }, { "epoch": 0.8, "grad_norm": 0.6716420256161062, "learning_rate": 2.0070599799453205e-06, "loss": 0.6852, "step": 4568 }, { "epoch": 0.8, "grad_norm": 0.8464756404756416, "learning_rate": 2.0036479459322245e-06, "loss": 0.732, "step": 4569 }, { "epoch": 0.8, "grad_norm": 0.6903102629185387, "learning_rate": 2.0002384917089402e-06, "loss": 0.7332, "step": 4570 }, { "epoch": 0.8, "grad_norm": 0.5691045863046632, "learning_rate": 1.996831618375431e-06, "loss": 0.662, "step": 4571 }, { "epoch": 0.8, "grad_norm": 0.6115987259887065, "learning_rate": 1.99342732703082e-06, "loss": 0.6803, "step": 4572 }, { "epoch": 0.8, "grad_norm": 0.753193542017943, "learning_rate": 1.990025618773406e-06, "loss": 0.7277, "step": 4573 }, { "epoch": 0.8, "grad_norm": 0.6848806529831551, "learning_rate": 1.986626494700652e-06, "loss": 0.681, "step": 4574 }, { "epoch": 0.8, "grad_norm": 0.6785070293065137, "learning_rate": 1.9832299559091817e-06, "loss": 0.6963, "step": 4575 }, { "epoch": 0.8, "grad_norm": 0.7814033432678373, "learning_rate": 1.979836003494793e-06, "loss": 0.69, "step": 4576 }, { "epoch": 0.8, "grad_norm": 0.697526950811242, "learning_rate": 1.976444638552446e-06, "loss": 0.6908, "step": 4577 }, { "epoch": 0.8, "grad_norm": 0.6541800687855843, "learning_rate": 1.973055862176263e-06, "loss": 0.7082, "step": 4578 }, { "epoch": 0.8, "grad_norm": 0.6640115392479015, "learning_rate": 1.9696696754595346e-06, "loss": 0.6961, "step": 4579 }, { "epoch": 0.8, "grad_norm": 0.6928032905710458, "learning_rate": 1.9662860794947157e-06, "loss": 0.7111, "step": 4580 }, { "epoch": 0.8, "grad_norm": 0.6661869461180127, "learning_rate": 1.9629050753734257e-06, "loss": 0.7062, "step": 4581 }, { "epoch": 0.8, "grad_norm": 0.6727180376070863, "learning_rate": 1.9595266641864495e-06, "loss": 0.7034, "step": 4582 }, { "epoch": 0.8, "grad_norm": 0.7589286191370245, "learning_rate": 1.9561508470237268e-06, "loss": 0.6653, "step": 4583 }, { "epoch": 0.8, "grad_norm": 0.7076533869996535, "learning_rate": 1.952777624974371e-06, "loss": 0.7376, "step": 4584 }, { "epoch": 0.8, "grad_norm": 0.6380973979545261, "learning_rate": 1.9494069991266505e-06, "loss": 0.6953, "step": 4585 }, { "epoch": 0.8, "grad_norm": 0.6809263454769167, "learning_rate": 1.9460389705680014e-06, "loss": 0.7045, "step": 4586 }, { "epoch": 0.8, "grad_norm": 0.709129588979124, "learning_rate": 1.9426735403850207e-06, "loss": 0.7237, "step": 4587 }, { "epoch": 0.8, "grad_norm": 0.6899077238420118, "learning_rate": 1.9393107096634678e-06, "loss": 0.7509, "step": 4588 }, { "epoch": 0.8, "grad_norm": 0.724901815299391, "learning_rate": 1.9359504794882567e-06, "loss": 0.6922, "step": 4589 }, { "epoch": 0.8, "grad_norm": 0.6765344476936033, "learning_rate": 1.93259285094347e-06, "loss": 0.7152, "step": 4590 }, { "epoch": 0.81, "grad_norm": 0.7173240610751251, "learning_rate": 1.92923782511235e-06, "loss": 0.7655, "step": 4591 }, { "epoch": 0.81, "grad_norm": 0.738545540625782, "learning_rate": 1.9258854030772966e-06, "loss": 0.7184, "step": 4592 }, { "epoch": 0.81, "grad_norm": 0.6148111030520652, "learning_rate": 1.922535585919875e-06, "loss": 0.6579, "step": 4593 }, { "epoch": 0.81, "grad_norm": 0.7052437847322001, "learning_rate": 1.9191883747207994e-06, "loss": 0.6879, "step": 4594 }, { "epoch": 0.81, "grad_norm": 0.7378133971570272, "learning_rate": 1.9158437705599562e-06, "loss": 0.7458, "step": 4595 }, { "epoch": 0.81, "grad_norm": 0.6309029063081407, "learning_rate": 1.9125017745163786e-06, "loss": 0.6893, "step": 4596 }, { "epoch": 0.81, "grad_norm": 0.6484004404833721, "learning_rate": 1.9091623876682665e-06, "loss": 0.7015, "step": 4597 }, { "epoch": 0.81, "grad_norm": 0.6870887226090817, "learning_rate": 1.9058256110929763e-06, "loss": 0.7018, "step": 4598 }, { "epoch": 0.81, "grad_norm": 0.6300375237894748, "learning_rate": 1.9024914458670219e-06, "loss": 0.6493, "step": 4599 }, { "epoch": 0.81, "grad_norm": 0.6769309181445892, "learning_rate": 1.899159893066076e-06, "loss": 0.7025, "step": 4600 }, { "epoch": 0.81, "grad_norm": 0.6516283951022662, "learning_rate": 1.895830953764962e-06, "loss": 0.687, "step": 4601 }, { "epoch": 0.81, "grad_norm": 0.7342108618192259, "learning_rate": 1.8925046290376725e-06, "loss": 0.6513, "step": 4602 }, { "epoch": 0.81, "grad_norm": 0.6770833810890599, "learning_rate": 1.8891809199573408e-06, "loss": 0.7718, "step": 4603 }, { "epoch": 0.81, "grad_norm": 0.5902327643521433, "learning_rate": 1.885859827596268e-06, "loss": 0.6737, "step": 4604 }, { "epoch": 0.81, "grad_norm": 0.6575803030573626, "learning_rate": 1.8825413530259096e-06, "loss": 0.6887, "step": 4605 }, { "epoch": 0.81, "grad_norm": 0.624403245133022, "learning_rate": 1.8792254973168754e-06, "loss": 0.6704, "step": 4606 }, { "epoch": 0.81, "grad_norm": 0.7159685096671614, "learning_rate": 1.8759122615389258e-06, "loss": 0.708, "step": 4607 }, { "epoch": 0.81, "grad_norm": 0.6530557642859434, "learning_rate": 1.8726016467609809e-06, "loss": 0.6632, "step": 4608 }, { "epoch": 0.81, "grad_norm": 0.6690938042221254, "learning_rate": 1.869293654051114e-06, "loss": 0.6999, "step": 4609 }, { "epoch": 0.81, "grad_norm": 0.6824907641760002, "learning_rate": 1.8659882844765541e-06, "loss": 0.7024, "step": 4610 }, { "epoch": 0.81, "grad_norm": 0.6345796922591752, "learning_rate": 1.8626855391036835e-06, "loss": 0.6802, "step": 4611 }, { "epoch": 0.81, "grad_norm": 0.6343416893894599, "learning_rate": 1.8593854189980343e-06, "loss": 0.7103, "step": 4612 }, { "epoch": 0.81, "grad_norm": 0.7428090195610553, "learning_rate": 1.8560879252242925e-06, "loss": 0.7082, "step": 4613 }, { "epoch": 0.81, "grad_norm": 0.6237848182469442, "learning_rate": 1.8527930588462982e-06, "loss": 0.7071, "step": 4614 }, { "epoch": 0.81, "grad_norm": 0.6620868431069575, "learning_rate": 1.8495008209270471e-06, "loss": 0.6243, "step": 4615 }, { "epoch": 0.81, "grad_norm": 0.677938380654325, "learning_rate": 1.8462112125286824e-06, "loss": 0.7034, "step": 4616 }, { "epoch": 0.81, "grad_norm": 0.66032498975549, "learning_rate": 1.8429242347124998e-06, "loss": 0.7034, "step": 4617 }, { "epoch": 0.81, "grad_norm": 0.6287616895713798, "learning_rate": 1.839639888538951e-06, "loss": 0.6746, "step": 4618 }, { "epoch": 0.81, "grad_norm": 0.7205568004595887, "learning_rate": 1.8363581750676274e-06, "loss": 0.7176, "step": 4619 }, { "epoch": 0.81, "grad_norm": 0.7329527817276801, "learning_rate": 1.8330790953572842e-06, "loss": 0.7551, "step": 4620 }, { "epoch": 0.81, "grad_norm": 0.6718537263180131, "learning_rate": 1.8298026504658162e-06, "loss": 0.7058, "step": 4621 }, { "epoch": 0.81, "grad_norm": 0.8521671987179587, "learning_rate": 1.8265288414502746e-06, "loss": 0.7273, "step": 4622 }, { "epoch": 0.81, "grad_norm": 0.6909713952528331, "learning_rate": 1.8232576693668591e-06, "loss": 0.7272, "step": 4623 }, { "epoch": 0.81, "grad_norm": 0.689090371516081, "learning_rate": 1.81998913527092e-06, "loss": 0.7366, "step": 4624 }, { "epoch": 0.81, "grad_norm": 0.7124356527103682, "learning_rate": 1.8167232402169488e-06, "loss": 0.751, "step": 4625 }, { "epoch": 0.81, "grad_norm": 0.6846577753782936, "learning_rate": 1.8134599852585954e-06, "loss": 0.6995, "step": 4626 }, { "epoch": 0.81, "grad_norm": 0.7367052950341315, "learning_rate": 1.8101993714486532e-06, "loss": 0.7305, "step": 4627 }, { "epoch": 0.81, "grad_norm": 0.6695477412447132, "learning_rate": 1.806941399839064e-06, "loss": 0.7017, "step": 4628 }, { "epoch": 0.81, "grad_norm": 0.6485107488723956, "learning_rate": 1.803686071480919e-06, "loss": 0.6927, "step": 4629 }, { "epoch": 0.81, "grad_norm": 0.6374911816813491, "learning_rate": 1.800433387424455e-06, "loss": 0.6829, "step": 4630 }, { "epoch": 0.81, "grad_norm": 0.6740367715038027, "learning_rate": 1.7971833487190504e-06, "loss": 0.6919, "step": 4631 }, { "epoch": 0.81, "grad_norm": 0.7903679694277188, "learning_rate": 1.7939359564132387e-06, "loss": 0.7067, "step": 4632 }, { "epoch": 0.81, "grad_norm": 0.6487237804474845, "learning_rate": 1.7906912115546972e-06, "loss": 0.6845, "step": 4633 }, { "epoch": 0.81, "grad_norm": 0.7148334932065173, "learning_rate": 1.7874491151902474e-06, "loss": 0.7221, "step": 4634 }, { "epoch": 0.81, "grad_norm": 0.5977208530411333, "learning_rate": 1.7842096683658606e-06, "loss": 0.7034, "step": 4635 }, { "epoch": 0.81, "grad_norm": 0.6242499933814962, "learning_rate": 1.780972872126644e-06, "loss": 0.6707, "step": 4636 }, { "epoch": 0.81, "grad_norm": 0.6396315259322602, "learning_rate": 1.7777387275168577e-06, "loss": 0.6627, "step": 4637 }, { "epoch": 0.81, "grad_norm": 0.6916858837199753, "learning_rate": 1.7745072355799065e-06, "loss": 0.6728, "step": 4638 }, { "epoch": 0.81, "grad_norm": 0.7033237431350391, "learning_rate": 1.7712783973583337e-06, "loss": 0.7123, "step": 4639 }, { "epoch": 0.81, "grad_norm": 0.6522562216385117, "learning_rate": 1.7680522138938316e-06, "loss": 0.7049, "step": 4640 }, { "epoch": 0.81, "grad_norm": 0.6653458307907753, "learning_rate": 1.7648286862272358e-06, "loss": 0.6804, "step": 4641 }, { "epoch": 0.81, "grad_norm": 0.6851292022990259, "learning_rate": 1.7616078153985183e-06, "loss": 0.6691, "step": 4642 }, { "epoch": 0.81, "grad_norm": 0.6582605910825806, "learning_rate": 1.758389602446804e-06, "loss": 0.6727, "step": 4643 }, { "epoch": 0.81, "grad_norm": 0.688394283164115, "learning_rate": 1.755174048410353e-06, "loss": 0.7183, "step": 4644 }, { "epoch": 0.81, "grad_norm": 0.6851553326688089, "learning_rate": 1.7519611543265713e-06, "loss": 0.6831, "step": 4645 }, { "epoch": 0.81, "grad_norm": 0.6316533175283278, "learning_rate": 1.7487509212320043e-06, "loss": 0.704, "step": 4646 }, { "epoch": 0.81, "grad_norm": 0.6549275342006233, "learning_rate": 1.7455433501623442e-06, "loss": 0.6712, "step": 4647 }, { "epoch": 0.82, "grad_norm": 0.7265867031142141, "learning_rate": 1.742338442152417e-06, "loss": 0.757, "step": 4648 }, { "epoch": 0.82, "grad_norm": 0.6564602641397871, "learning_rate": 1.7391361982361899e-06, "loss": 0.6542, "step": 4649 }, { "epoch": 0.82, "grad_norm": 0.6329647399698339, "learning_rate": 1.7359366194467774e-06, "loss": 0.6857, "step": 4650 }, { "epoch": 0.82, "grad_norm": 0.7229920100853027, "learning_rate": 1.7327397068164286e-06, "loss": 0.7154, "step": 4651 }, { "epoch": 0.82, "grad_norm": 0.7014235729318982, "learning_rate": 1.729545461376535e-06, "loss": 0.7478, "step": 4652 }, { "epoch": 0.82, "grad_norm": 0.6471534262822609, "learning_rate": 1.726353884157629e-06, "loss": 0.6716, "step": 4653 }, { "epoch": 0.82, "grad_norm": 0.7022685099805295, "learning_rate": 1.7231649761893742e-06, "loss": 0.7264, "step": 4654 }, { "epoch": 0.82, "grad_norm": 0.6612574565200947, "learning_rate": 1.719978738500584e-06, "loss": 0.6623, "step": 4655 }, { "epoch": 0.82, "grad_norm": 0.6464546812647457, "learning_rate": 1.7167951721192011e-06, "loss": 0.6857, "step": 4656 }, { "epoch": 0.82, "grad_norm": 0.7107618471752077, "learning_rate": 1.7136142780723164e-06, "loss": 0.6666, "step": 4657 }, { "epoch": 0.82, "grad_norm": 0.6553048301348487, "learning_rate": 1.7104360573861456e-06, "loss": 0.6569, "step": 4658 }, { "epoch": 0.82, "grad_norm": 0.7037444743049592, "learning_rate": 1.707260511086054e-06, "loss": 0.6937, "step": 4659 }, { "epoch": 0.82, "grad_norm": 0.626352590503863, "learning_rate": 1.7040876401965355e-06, "loss": 0.6647, "step": 4660 }, { "epoch": 0.82, "grad_norm": 0.7176115519054681, "learning_rate": 1.7009174457412258e-06, "loss": 0.6904, "step": 4661 }, { "epoch": 0.82, "grad_norm": 0.6514160564725299, "learning_rate": 1.6977499287428944e-06, "loss": 0.6891, "step": 4662 }, { "epoch": 0.82, "grad_norm": 0.6653571258284761, "learning_rate": 1.6945850902234506e-06, "loss": 0.6743, "step": 4663 }, { "epoch": 0.82, "grad_norm": 0.6378753384490797, "learning_rate": 1.6914229312039387e-06, "loss": 0.6877, "step": 4664 }, { "epoch": 0.82, "grad_norm": 0.6957025245826159, "learning_rate": 1.6882634527045328e-06, "loss": 0.6918, "step": 4665 }, { "epoch": 0.82, "grad_norm": 0.6479913705200788, "learning_rate": 1.6851066557445505e-06, "loss": 0.6867, "step": 4666 }, { "epoch": 0.82, "grad_norm": 0.6842224702404357, "learning_rate": 1.6819525413424354e-06, "loss": 0.6954, "step": 4667 }, { "epoch": 0.82, "grad_norm": 0.6706047910707377, "learning_rate": 1.6788011105157731e-06, "loss": 0.6965, "step": 4668 }, { "epoch": 0.82, "grad_norm": 0.6342890891718624, "learning_rate": 1.6756523642812815e-06, "loss": 0.6784, "step": 4669 }, { "epoch": 0.82, "grad_norm": 0.7450188833205046, "learning_rate": 1.6725063036548138e-06, "loss": 0.7102, "step": 4670 }, { "epoch": 0.82, "grad_norm": 0.6493030117324775, "learning_rate": 1.6693629296513503e-06, "loss": 0.6721, "step": 4671 }, { "epoch": 0.82, "grad_norm": 0.7149432627475433, "learning_rate": 1.6662222432850096e-06, "loss": 0.6702, "step": 4672 }, { "epoch": 0.82, "grad_norm": 0.6479056071933678, "learning_rate": 1.6630842455690443e-06, "loss": 0.7361, "step": 4673 }, { "epoch": 0.82, "grad_norm": 0.6963659854138308, "learning_rate": 1.6599489375158383e-06, "loss": 0.7277, "step": 4674 }, { "epoch": 0.82, "grad_norm": 0.691366333648258, "learning_rate": 1.6568163201369081e-06, "loss": 0.7185, "step": 4675 }, { "epoch": 0.82, "grad_norm": 0.5627432335971987, "learning_rate": 1.6536863944428982e-06, "loss": 0.6651, "step": 4676 }, { "epoch": 0.82, "grad_norm": 0.6530731591214513, "learning_rate": 1.65055916144359e-06, "loss": 0.7075, "step": 4677 }, { "epoch": 0.82, "grad_norm": 0.6840705630127601, "learning_rate": 1.6474346221478931e-06, "loss": 0.72, "step": 4678 }, { "epoch": 0.82, "grad_norm": 0.7036666266910252, "learning_rate": 1.644312777563848e-06, "loss": 0.7484, "step": 4679 }, { "epoch": 0.82, "grad_norm": 0.6543558437564064, "learning_rate": 1.6411936286986286e-06, "loss": 0.6698, "step": 4680 }, { "epoch": 0.82, "grad_norm": 0.769081165891024, "learning_rate": 1.6380771765585357e-06, "loss": 0.7193, "step": 4681 }, { "epoch": 0.82, "grad_norm": 0.6913401853765636, "learning_rate": 1.6349634221490063e-06, "loss": 0.6639, "step": 4682 }, { "epoch": 0.82, "grad_norm": 0.7467016367195208, "learning_rate": 1.6318523664745966e-06, "loss": 0.7415, "step": 4683 }, { "epoch": 0.82, "grad_norm": 0.8160606272411302, "learning_rate": 1.6287440105390028e-06, "loss": 0.8014, "step": 4684 }, { "epoch": 0.82, "grad_norm": 0.6332313456953527, "learning_rate": 1.6256383553450394e-06, "loss": 0.6468, "step": 4685 }, { "epoch": 0.82, "grad_norm": 0.696225372887348, "learning_rate": 1.6225354018946605e-06, "loss": 0.7125, "step": 4686 }, { "epoch": 0.82, "grad_norm": 0.6244208317437558, "learning_rate": 1.6194351511889418e-06, "loss": 0.6977, "step": 4687 }, { "epoch": 0.82, "grad_norm": 0.5871001950215318, "learning_rate": 1.6163376042280909e-06, "loss": 0.6466, "step": 4688 }, { "epoch": 0.82, "grad_norm": 0.6460513560438443, "learning_rate": 1.6132427620114366e-06, "loss": 0.689, "step": 4689 }, { "epoch": 0.82, "grad_norm": 0.6277024723388414, "learning_rate": 1.6101506255374422e-06, "loss": 0.6903, "step": 4690 }, { "epoch": 0.82, "grad_norm": 0.6001226464114388, "learning_rate": 1.6070611958036953e-06, "loss": 0.633, "step": 4691 }, { "epoch": 0.82, "grad_norm": 0.6773580774870095, "learning_rate": 1.6039744738069097e-06, "loss": 0.6932, "step": 4692 }, { "epoch": 0.82, "grad_norm": 0.7252279571264698, "learning_rate": 1.6008904605429288e-06, "loss": 0.7041, "step": 4693 }, { "epoch": 0.82, "grad_norm": 0.706783101341306, "learning_rate": 1.597809157006719e-06, "loss": 0.6851, "step": 4694 }, { "epoch": 0.82, "grad_norm": 0.8894084147452546, "learning_rate": 1.5947305641923672e-06, "loss": 0.7052, "step": 4695 }, { "epoch": 0.82, "grad_norm": 0.6678441153022964, "learning_rate": 1.5916546830930967e-06, "loss": 0.6873, "step": 4696 }, { "epoch": 0.82, "grad_norm": 0.6794751496353555, "learning_rate": 1.5885815147012507e-06, "loss": 0.6999, "step": 4697 }, { "epoch": 0.82, "grad_norm": 0.7047943946190242, "learning_rate": 1.585511060008297e-06, "loss": 0.6834, "step": 4698 }, { "epoch": 0.82, "grad_norm": 0.6448194625264313, "learning_rate": 1.582443320004826e-06, "loss": 0.6795, "step": 4699 }, { "epoch": 0.82, "grad_norm": 0.7139407016806248, "learning_rate": 1.5793782956805603e-06, "loss": 0.7177, "step": 4700 }, { "epoch": 0.82, "grad_norm": 0.6245002814511571, "learning_rate": 1.576315988024335e-06, "loss": 0.687, "step": 4701 }, { "epoch": 0.82, "grad_norm": 0.7656075769112065, "learning_rate": 1.5732563980241178e-06, "loss": 0.7529, "step": 4702 }, { "epoch": 0.82, "grad_norm": 0.6137069577054421, "learning_rate": 1.5701995266669923e-06, "loss": 0.6658, "step": 4703 }, { "epoch": 0.82, "grad_norm": 0.6827902980743689, "learning_rate": 1.5671453749391707e-06, "loss": 0.6942, "step": 4704 }, { "epoch": 0.83, "grad_norm": 0.6671084523122224, "learning_rate": 1.5640939438259873e-06, "loss": 0.6785, "step": 4705 }, { "epoch": 0.83, "grad_norm": 0.6848047551076408, "learning_rate": 1.5610452343118986e-06, "loss": 0.6673, "step": 4706 }, { "epoch": 0.83, "grad_norm": 0.6880267352456603, "learning_rate": 1.5579992473804773e-06, "loss": 0.6875, "step": 4707 }, { "epoch": 0.83, "grad_norm": 0.6482312109827418, "learning_rate": 1.5549559840144258e-06, "loss": 0.69, "step": 4708 }, { "epoch": 0.83, "grad_norm": 0.7574197234090781, "learning_rate": 1.551915445195562e-06, "loss": 0.7235, "step": 4709 }, { "epoch": 0.83, "grad_norm": 0.7446928947791482, "learning_rate": 1.5488776319048283e-06, "loss": 0.7232, "step": 4710 }, { "epoch": 0.83, "grad_norm": 0.6507302819567733, "learning_rate": 1.5458425451222903e-06, "loss": 0.6829, "step": 4711 }, { "epoch": 0.83, "grad_norm": 0.616548211450258, "learning_rate": 1.542810185827125e-06, "loss": 0.6691, "step": 4712 }, { "epoch": 0.83, "grad_norm": 0.7438372800005378, "learning_rate": 1.5397805549976352e-06, "loss": 0.7482, "step": 4713 }, { "epoch": 0.83, "grad_norm": 0.6499602216528263, "learning_rate": 1.5367536536112426e-06, "loss": 0.7167, "step": 4714 }, { "epoch": 0.83, "grad_norm": 0.6869869458943392, "learning_rate": 1.5337294826444915e-06, "loss": 0.7174, "step": 4715 }, { "epoch": 0.83, "grad_norm": 0.7607100605879255, "learning_rate": 1.5307080430730404e-06, "loss": 0.7373, "step": 4716 }, { "epoch": 0.83, "grad_norm": 0.753360097596143, "learning_rate": 1.5276893358716706e-06, "loss": 0.7391, "step": 4717 }, { "epoch": 0.83, "grad_norm": 0.7020437663052306, "learning_rate": 1.5246733620142773e-06, "loss": 0.7224, "step": 4718 }, { "epoch": 0.83, "grad_norm": 0.6844852717992717, "learning_rate": 1.521660122473878e-06, "loss": 0.6912, "step": 4719 }, { "epoch": 0.83, "grad_norm": 0.7264917848639948, "learning_rate": 1.5186496182226063e-06, "loss": 0.7345, "step": 4720 }, { "epoch": 0.83, "grad_norm": 0.7135575753418402, "learning_rate": 1.5156418502317162e-06, "loss": 0.7258, "step": 4721 }, { "epoch": 0.83, "grad_norm": 0.6795271564149057, "learning_rate": 1.5126368194715712e-06, "loss": 0.7171, "step": 4722 }, { "epoch": 0.83, "grad_norm": 0.584695423689043, "learning_rate": 1.509634526911662e-06, "loss": 0.6611, "step": 4723 }, { "epoch": 0.83, "grad_norm": 0.6899455832297352, "learning_rate": 1.5066349735205865e-06, "loss": 0.7216, "step": 4724 }, { "epoch": 0.83, "grad_norm": 0.6502431884450997, "learning_rate": 1.5036381602660643e-06, "loss": 0.6989, "step": 4725 }, { "epoch": 0.83, "grad_norm": 0.6953021025272058, "learning_rate": 1.5006440881149309e-06, "loss": 0.6599, "step": 4726 }, { "epoch": 0.83, "grad_norm": 0.6433418509715125, "learning_rate": 1.4976527580331347e-06, "loss": 0.7316, "step": 4727 }, { "epoch": 0.83, "grad_norm": 0.6989503841429595, "learning_rate": 1.4946641709857434e-06, "loss": 0.6716, "step": 4728 }, { "epoch": 0.83, "grad_norm": 0.6380393766185881, "learning_rate": 1.4916783279369384e-06, "loss": 0.6947, "step": 4729 }, { "epoch": 0.83, "grad_norm": 0.6646686850485624, "learning_rate": 1.4886952298500124e-06, "loss": 0.7004, "step": 4730 }, { "epoch": 0.83, "grad_norm": 0.7673337592901556, "learning_rate": 1.4857148776873741e-06, "loss": 0.7134, "step": 4731 }, { "epoch": 0.83, "grad_norm": 0.6304558670602254, "learning_rate": 1.4827372724105483e-06, "loss": 0.6971, "step": 4732 }, { "epoch": 0.83, "grad_norm": 0.6348422242143614, "learning_rate": 1.4797624149801726e-06, "loss": 0.6912, "step": 4733 }, { "epoch": 0.83, "grad_norm": 0.6889504526384641, "learning_rate": 1.4767903063559973e-06, "loss": 0.7017, "step": 4734 }, { "epoch": 0.83, "grad_norm": 0.7494002268483599, "learning_rate": 1.4738209474968912e-06, "loss": 0.742, "step": 4735 }, { "epoch": 0.83, "grad_norm": 0.6608031718235026, "learning_rate": 1.4708543393608244e-06, "loss": 0.6933, "step": 4736 }, { "epoch": 0.83, "grad_norm": 0.6726643152086824, "learning_rate": 1.4678904829048901e-06, "loss": 0.704, "step": 4737 }, { "epoch": 0.83, "grad_norm": 0.8178065344716932, "learning_rate": 1.4649293790852904e-06, "loss": 0.7838, "step": 4738 }, { "epoch": 0.83, "grad_norm": 0.6228039717815039, "learning_rate": 1.4619710288573397e-06, "loss": 0.6602, "step": 4739 }, { "epoch": 0.83, "grad_norm": 0.7179495152760568, "learning_rate": 1.4590154331754602e-06, "loss": 0.7336, "step": 4740 }, { "epoch": 0.83, "grad_norm": 0.6820620772437497, "learning_rate": 1.4560625929931937e-06, "loss": 0.6985, "step": 4741 }, { "epoch": 0.83, "grad_norm": 0.5868530113159003, "learning_rate": 1.4531125092631815e-06, "loss": 0.6428, "step": 4742 }, { "epoch": 0.83, "grad_norm": 0.6430078450460105, "learning_rate": 1.4501651829371865e-06, "loss": 0.6576, "step": 4743 }, { "epoch": 0.83, "grad_norm": 0.6652209928856478, "learning_rate": 1.4472206149660761e-06, "loss": 0.688, "step": 4744 }, { "epoch": 0.83, "grad_norm": 0.8293685680434129, "learning_rate": 1.4442788062998303e-06, "loss": 0.6964, "step": 4745 }, { "epoch": 0.83, "grad_norm": 0.637386111070758, "learning_rate": 1.44133975788754e-06, "loss": 0.6708, "step": 4746 }, { "epoch": 0.83, "grad_norm": 0.7156513101965623, "learning_rate": 1.4384034706773974e-06, "loss": 0.7422, "step": 4747 }, { "epoch": 0.83, "grad_norm": 0.6996370615375304, "learning_rate": 1.4354699456167164e-06, "loss": 0.6847, "step": 4748 }, { "epoch": 0.83, "grad_norm": 0.6726300549136773, "learning_rate": 1.4325391836519087e-06, "loss": 0.7283, "step": 4749 }, { "epoch": 0.83, "grad_norm": 0.6886077946184247, "learning_rate": 1.4296111857285e-06, "loss": 0.6785, "step": 4750 }, { "epoch": 0.83, "grad_norm": 0.7404085913512789, "learning_rate": 1.4266859527911249e-06, "loss": 0.7339, "step": 4751 }, { "epoch": 0.83, "grad_norm": 0.59083812382409, "learning_rate": 1.4237634857835248e-06, "loss": 0.6659, "step": 4752 }, { "epoch": 0.83, "grad_norm": 0.6130640218293704, "learning_rate": 1.4208437856485468e-06, "loss": 0.6674, "step": 4753 }, { "epoch": 0.83, "grad_norm": 0.6829965081032678, "learning_rate": 1.4179268533281464e-06, "loss": 0.6951, "step": 4754 }, { "epoch": 0.83, "grad_norm": 0.7240908029454765, "learning_rate": 1.4150126897633886e-06, "loss": 0.7009, "step": 4755 }, { "epoch": 0.83, "grad_norm": 0.7104604242004848, "learning_rate": 1.4121012958944436e-06, "loss": 0.7235, "step": 4756 }, { "epoch": 0.83, "grad_norm": 0.6415878525929153, "learning_rate": 1.4091926726605886e-06, "loss": 0.6762, "step": 4757 }, { "epoch": 0.83, "grad_norm": 0.6671879721643932, "learning_rate": 1.4062868210002024e-06, "loss": 0.7411, "step": 4758 }, { "epoch": 0.83, "grad_norm": 0.6030837536506765, "learning_rate": 1.4033837418507768e-06, "loss": 0.6423, "step": 4759 }, { "epoch": 0.83, "grad_norm": 0.7391024443091248, "learning_rate": 1.400483436148903e-06, "loss": 0.7023, "step": 4760 }, { "epoch": 0.83, "grad_norm": 0.6341042355623117, "learning_rate": 1.3975859048302798e-06, "loss": 0.6703, "step": 4761 }, { "epoch": 0.83, "grad_norm": 0.6790344616327368, "learning_rate": 1.3946911488297133e-06, "loss": 0.7281, "step": 4762 }, { "epoch": 0.84, "grad_norm": 0.6741757143952178, "learning_rate": 1.3917991690811117e-06, "loss": 0.7399, "step": 4763 }, { "epoch": 0.84, "grad_norm": 0.7878898573172304, "learning_rate": 1.3889099665174898e-06, "loss": 0.7264, "step": 4764 }, { "epoch": 0.84, "grad_norm": 0.6476082850121421, "learning_rate": 1.38602354207096e-06, "loss": 0.7183, "step": 4765 }, { "epoch": 0.84, "grad_norm": 0.7331053641895812, "learning_rate": 1.3831398966727473e-06, "loss": 0.7301, "step": 4766 }, { "epoch": 0.84, "grad_norm": 0.6301458110478818, "learning_rate": 1.3802590312531717e-06, "loss": 0.7056, "step": 4767 }, { "epoch": 0.84, "grad_norm": 0.6601684149183544, "learning_rate": 1.3773809467416621e-06, "loss": 0.6433, "step": 4768 }, { "epoch": 0.84, "grad_norm": 0.6666510503693993, "learning_rate": 1.3745056440667503e-06, "loss": 0.6832, "step": 4769 }, { "epoch": 0.84, "grad_norm": 0.6632555395646473, "learning_rate": 1.3716331241560688e-06, "loss": 0.6541, "step": 4770 }, { "epoch": 0.84, "grad_norm": 0.7205023737926639, "learning_rate": 1.3687633879363494e-06, "loss": 0.6579, "step": 4771 }, { "epoch": 0.84, "grad_norm": 0.6728635842756192, "learning_rate": 1.3658964363334304e-06, "loss": 0.7088, "step": 4772 }, { "epoch": 0.84, "grad_norm": 0.7161502097499756, "learning_rate": 1.3630322702722498e-06, "loss": 0.6607, "step": 4773 }, { "epoch": 0.84, "grad_norm": 0.7122991951406784, "learning_rate": 1.3601708906768486e-06, "loss": 0.695, "step": 4774 }, { "epoch": 0.84, "grad_norm": 0.7246323863685169, "learning_rate": 1.357312298470369e-06, "loss": 0.7659, "step": 4775 }, { "epoch": 0.84, "grad_norm": 0.7062706149731478, "learning_rate": 1.3544564945750495e-06, "loss": 0.7033, "step": 4776 }, { "epoch": 0.84, "grad_norm": 0.6701819448534938, "learning_rate": 1.3516034799122303e-06, "loss": 0.6944, "step": 4777 }, { "epoch": 0.84, "grad_norm": 0.7719666385763106, "learning_rate": 1.3487532554023553e-06, "loss": 0.7191, "step": 4778 }, { "epoch": 0.84, "grad_norm": 0.6868568370196396, "learning_rate": 1.3459058219649678e-06, "loss": 0.683, "step": 4779 }, { "epoch": 0.84, "grad_norm": 0.713783122366391, "learning_rate": 1.3430611805187066e-06, "loss": 0.7234, "step": 4780 }, { "epoch": 0.84, "grad_norm": 0.6981344785187124, "learning_rate": 1.3402193319813173e-06, "loss": 0.6974, "step": 4781 }, { "epoch": 0.84, "grad_norm": 0.8301309162026729, "learning_rate": 1.337380277269632e-06, "loss": 0.7866, "step": 4782 }, { "epoch": 0.84, "grad_norm": 0.7537076084946169, "learning_rate": 1.3345440172995939e-06, "loss": 0.742, "step": 4783 }, { "epoch": 0.84, "grad_norm": 0.6331608088011959, "learning_rate": 1.3317105529862384e-06, "loss": 0.6814, "step": 4784 }, { "epoch": 0.84, "grad_norm": 0.7292611161087612, "learning_rate": 1.3288798852437025e-06, "loss": 0.7201, "step": 4785 }, { "epoch": 0.84, "grad_norm": 0.7784856911454575, "learning_rate": 1.326052014985214e-06, "loss": 0.7589, "step": 4786 }, { "epoch": 0.84, "grad_norm": 0.6804735530802649, "learning_rate": 1.323226943123105e-06, "loss": 0.6863, "step": 4787 }, { "epoch": 0.84, "grad_norm": 0.6658051831397126, "learning_rate": 1.3204046705688045e-06, "loss": 0.6668, "step": 4788 }, { "epoch": 0.84, "grad_norm": 0.5766032291336665, "learning_rate": 1.3175851982328324e-06, "loss": 0.6693, "step": 4789 }, { "epoch": 0.84, "grad_norm": 0.6703660877340406, "learning_rate": 1.3147685270248113e-06, "loss": 0.7041, "step": 4790 }, { "epoch": 0.84, "grad_norm": 0.6431076690714641, "learning_rate": 1.3119546578534582e-06, "loss": 0.6792, "step": 4791 }, { "epoch": 0.84, "grad_norm": 0.747222987643262, "learning_rate": 1.309143591626586e-06, "loss": 0.7339, "step": 4792 }, { "epoch": 0.84, "grad_norm": 0.6207811673726294, "learning_rate": 1.3063353292511061e-06, "loss": 0.6403, "step": 4793 }, { "epoch": 0.84, "grad_norm": 0.6249940181339395, "learning_rate": 1.303529871633018e-06, "loss": 0.6707, "step": 4794 }, { "epoch": 0.84, "grad_norm": 0.5994443037199673, "learning_rate": 1.3007272196774212e-06, "loss": 0.6601, "step": 4795 }, { "epoch": 0.84, "grad_norm": 0.6983858673058554, "learning_rate": 1.29792737428851e-06, "loss": 0.7291, "step": 4796 }, { "epoch": 0.84, "grad_norm": 0.5971199963047447, "learning_rate": 1.2951303363695732e-06, "loss": 0.6979, "step": 4797 }, { "epoch": 0.84, "grad_norm": 0.6282339110969687, "learning_rate": 1.2923361068229946e-06, "loss": 0.6817, "step": 4798 }, { "epoch": 0.84, "grad_norm": 0.7374839388884193, "learning_rate": 1.2895446865502525e-06, "loss": 0.6931, "step": 4799 }, { "epoch": 0.84, "grad_norm": 0.6764287988479992, "learning_rate": 1.2867560764519126e-06, "loss": 0.738, "step": 4800 }, { "epoch": 0.84, "grad_norm": 0.6782477196231718, "learning_rate": 1.2839702774276409e-06, "loss": 0.6694, "step": 4801 }, { "epoch": 0.84, "grad_norm": 0.6588670093927232, "learning_rate": 1.2811872903761936e-06, "loss": 0.68, "step": 4802 }, { "epoch": 0.84, "grad_norm": 0.6474565397090951, "learning_rate": 1.2784071161954237e-06, "loss": 0.6705, "step": 4803 }, { "epoch": 0.84, "grad_norm": 0.6743271629639025, "learning_rate": 1.2756297557822683e-06, "loss": 0.6657, "step": 4804 }, { "epoch": 0.84, "grad_norm": 0.7034797040570117, "learning_rate": 1.2728552100327663e-06, "loss": 0.6878, "step": 4805 }, { "epoch": 0.84, "grad_norm": 0.6656336466153981, "learning_rate": 1.2700834798420403e-06, "loss": 0.6886, "step": 4806 }, { "epoch": 0.84, "grad_norm": 0.7075497189242923, "learning_rate": 1.2673145661043085e-06, "loss": 0.7276, "step": 4807 }, { "epoch": 0.84, "grad_norm": 0.6641957899717228, "learning_rate": 1.2645484697128819e-06, "loss": 0.6869, "step": 4808 }, { "epoch": 0.84, "grad_norm": 0.69913131875725, "learning_rate": 1.2617851915601597e-06, "loss": 0.7046, "step": 4809 }, { "epoch": 0.84, "grad_norm": 0.6282810944399446, "learning_rate": 1.2590247325376337e-06, "loss": 0.699, "step": 4810 }, { "epoch": 0.84, "grad_norm": 0.6586695443808623, "learning_rate": 1.2562670935358878e-06, "loss": 0.6961, "step": 4811 }, { "epoch": 0.84, "grad_norm": 0.6008855700757453, "learning_rate": 1.2535122754445916e-06, "loss": 0.6147, "step": 4812 }, { "epoch": 0.84, "grad_norm": 0.6977069503186857, "learning_rate": 1.2507602791525042e-06, "loss": 0.6821, "step": 4813 }, { "epoch": 0.84, "grad_norm": 0.6524217055388718, "learning_rate": 1.2480111055474796e-06, "loss": 0.6659, "step": 4814 }, { "epoch": 0.84, "grad_norm": 0.7498900340803232, "learning_rate": 1.2452647555164588e-06, "loss": 0.6994, "step": 4815 }, { "epoch": 0.84, "grad_norm": 0.7280710879291313, "learning_rate": 1.2425212299454702e-06, "loss": 0.6675, "step": 4816 }, { "epoch": 0.84, "grad_norm": 0.7058669277689203, "learning_rate": 1.2397805297196353e-06, "loss": 0.7153, "step": 4817 }, { "epoch": 0.84, "grad_norm": 0.7294868159375801, "learning_rate": 1.2370426557231563e-06, "loss": 0.7443, "step": 4818 }, { "epoch": 0.84, "grad_norm": 0.7151294504428718, "learning_rate": 1.2343076088393324e-06, "loss": 0.7637, "step": 4819 }, { "epoch": 0.85, "grad_norm": 0.7063974785134616, "learning_rate": 1.2315753899505446e-06, "loss": 0.6987, "step": 4820 }, { "epoch": 0.85, "grad_norm": 0.7265672003523526, "learning_rate": 1.2288459999382663e-06, "loss": 0.7103, "step": 4821 }, { "epoch": 0.85, "grad_norm": 0.6530580577673792, "learning_rate": 1.2261194396830523e-06, "loss": 0.6956, "step": 4822 }, { "epoch": 0.85, "grad_norm": 0.6622418962023245, "learning_rate": 1.223395710064551e-06, "loss": 0.6513, "step": 4823 }, { "epoch": 0.85, "grad_norm": 0.653470426488364, "learning_rate": 1.2206748119614898e-06, "loss": 0.6627, "step": 4824 }, { "epoch": 0.85, "grad_norm": 0.6388244185414118, "learning_rate": 1.2179567462516906e-06, "loss": 0.6908, "step": 4825 }, { "epoch": 0.85, "grad_norm": 0.6783308950945004, "learning_rate": 1.2152415138120576e-06, "loss": 0.7408, "step": 4826 }, { "epoch": 0.85, "grad_norm": 0.6275967730090444, "learning_rate": 1.2125291155185803e-06, "loss": 0.6659, "step": 4827 }, { "epoch": 0.85, "grad_norm": 0.6530802967993884, "learning_rate": 1.2098195522463386e-06, "loss": 0.6682, "step": 4828 }, { "epoch": 0.85, "grad_norm": 0.7243006691898933, "learning_rate": 1.2071128248694886e-06, "loss": 0.7231, "step": 4829 }, { "epoch": 0.85, "grad_norm": 0.6702433860566109, "learning_rate": 1.2044089342612807e-06, "loss": 0.6999, "step": 4830 }, { "epoch": 0.85, "grad_norm": 0.6768883387568334, "learning_rate": 1.201707881294044e-06, "loss": 0.7146, "step": 4831 }, { "epoch": 0.85, "grad_norm": 0.6161983822269897, "learning_rate": 1.199009666839196e-06, "loss": 0.658, "step": 4832 }, { "epoch": 0.85, "grad_norm": 0.6431740566055447, "learning_rate": 1.1963142917672354e-06, "loss": 0.6769, "step": 4833 }, { "epoch": 0.85, "grad_norm": 0.6296486414557712, "learning_rate": 1.1936217569477505e-06, "loss": 0.6731, "step": 4834 }, { "epoch": 0.85, "grad_norm": 0.6508217859867451, "learning_rate": 1.1909320632494036e-06, "loss": 0.6697, "step": 4835 }, { "epoch": 0.85, "grad_norm": 0.7059237156671958, "learning_rate": 1.1882452115399489e-06, "loss": 0.6973, "step": 4836 }, { "epoch": 0.85, "grad_norm": 0.7445653973510997, "learning_rate": 1.1855612026862206e-06, "loss": 0.7516, "step": 4837 }, { "epoch": 0.85, "grad_norm": 0.6603233366538676, "learning_rate": 1.182880037554135e-06, "loss": 0.6632, "step": 4838 }, { "epoch": 0.85, "grad_norm": 0.64293288299969, "learning_rate": 1.1802017170086932e-06, "loss": 0.6789, "step": 4839 }, { "epoch": 0.85, "grad_norm": 0.7765429354700775, "learning_rate": 1.1775262419139788e-06, "loss": 0.7458, "step": 4840 }, { "epoch": 0.85, "grad_norm": 0.7045430227232269, "learning_rate": 1.1748536131331522e-06, "loss": 0.6621, "step": 4841 }, { "epoch": 0.85, "grad_norm": 0.7123200960122323, "learning_rate": 1.1721838315284595e-06, "loss": 0.7325, "step": 4842 }, { "epoch": 0.85, "grad_norm": 0.6718189132796649, "learning_rate": 1.1695168979612282e-06, "loss": 0.6625, "step": 4843 }, { "epoch": 0.85, "grad_norm": 0.6903159039175029, "learning_rate": 1.1668528132918677e-06, "loss": 0.6963, "step": 4844 }, { "epoch": 0.85, "grad_norm": 0.666753635327419, "learning_rate": 1.1641915783798663e-06, "loss": 0.7196, "step": 4845 }, { "epoch": 0.85, "grad_norm": 0.6301011292438301, "learning_rate": 1.1615331940837971e-06, "loss": 0.6889, "step": 4846 }, { "epoch": 0.85, "grad_norm": 0.6050076302484989, "learning_rate": 1.1588776612613063e-06, "loss": 0.6931, "step": 4847 }, { "epoch": 0.85, "grad_norm": 0.7298153750825876, "learning_rate": 1.156224980769124e-06, "loss": 0.7888, "step": 4848 }, { "epoch": 0.85, "grad_norm": 0.6719390262816924, "learning_rate": 1.1535751534630634e-06, "loss": 0.7011, "step": 4849 }, { "epoch": 0.85, "grad_norm": 0.691024870609015, "learning_rate": 1.150928180198011e-06, "loss": 0.7427, "step": 4850 }, { "epoch": 0.85, "grad_norm": 0.6275868720311245, "learning_rate": 1.1482840618279368e-06, "loss": 0.6773, "step": 4851 }, { "epoch": 0.85, "grad_norm": 0.65788819286804, "learning_rate": 1.1456427992058904e-06, "loss": 0.646, "step": 4852 }, { "epoch": 0.85, "grad_norm": 0.6671933477432723, "learning_rate": 1.143004393183994e-06, "loss": 0.7071, "step": 4853 }, { "epoch": 0.85, "grad_norm": 0.7408073118685726, "learning_rate": 1.1403688446134543e-06, "loss": 0.7234, "step": 4854 }, { "epoch": 0.85, "grad_norm": 0.5717768379698961, "learning_rate": 1.1377361543445542e-06, "loss": 0.6532, "step": 4855 }, { "epoch": 0.85, "grad_norm": 0.6167070490471913, "learning_rate": 1.1351063232266558e-06, "loss": 0.6802, "step": 4856 }, { "epoch": 0.85, "grad_norm": 0.6977011433193291, "learning_rate": 1.1324793521081966e-06, "loss": 0.7066, "step": 4857 }, { "epoch": 0.85, "grad_norm": 0.7160993816645385, "learning_rate": 1.1298552418366938e-06, "loss": 0.6947, "step": 4858 }, { "epoch": 0.85, "grad_norm": 0.6445036018646161, "learning_rate": 1.127233993258735e-06, "loss": 0.6843, "step": 4859 }, { "epoch": 0.85, "grad_norm": 0.6715101842304472, "learning_rate": 1.1246156072199933e-06, "loss": 0.7034, "step": 4860 }, { "epoch": 0.85, "grad_norm": 0.7242974379597207, "learning_rate": 1.1220000845652135e-06, "loss": 0.7119, "step": 4861 }, { "epoch": 0.85, "grad_norm": 0.6500869868896797, "learning_rate": 1.1193874261382198e-06, "loss": 0.6703, "step": 4862 }, { "epoch": 0.85, "grad_norm": 0.6596519607991453, "learning_rate": 1.116777632781909e-06, "loss": 0.7016, "step": 4863 }, { "epoch": 0.85, "grad_norm": 0.6475290177570928, "learning_rate": 1.1141707053382533e-06, "loss": 0.7128, "step": 4864 }, { "epoch": 0.85, "grad_norm": 0.7517856219309704, "learning_rate": 1.1115666446483042e-06, "loss": 0.7196, "step": 4865 }, { "epoch": 0.85, "grad_norm": 0.7649905339930431, "learning_rate": 1.1089654515521831e-06, "loss": 0.7668, "step": 4866 }, { "epoch": 0.85, "grad_norm": 0.7274581456065664, "learning_rate": 1.1063671268890941e-06, "loss": 0.7115, "step": 4867 }, { "epoch": 0.85, "grad_norm": 0.7304124061874501, "learning_rate": 1.1037716714973057e-06, "loss": 0.706, "step": 4868 }, { "epoch": 0.85, "grad_norm": 0.5953187857500197, "learning_rate": 1.1011790862141669e-06, "loss": 0.6513, "step": 4869 }, { "epoch": 0.85, "grad_norm": 0.7382749953722643, "learning_rate": 1.0985893718761032e-06, "loss": 0.7228, "step": 4870 }, { "epoch": 0.85, "grad_norm": 0.6907849652945868, "learning_rate": 1.096002529318606e-06, "loss": 0.7017, "step": 4871 }, { "epoch": 0.85, "grad_norm": 0.6321082311270549, "learning_rate": 1.0934185593762458e-06, "loss": 0.6864, "step": 4872 }, { "epoch": 0.85, "grad_norm": 0.6454731631554506, "learning_rate": 1.0908374628826667e-06, "loss": 0.6727, "step": 4873 }, { "epoch": 0.85, "grad_norm": 0.6474850881902792, "learning_rate": 1.0882592406705839e-06, "loss": 0.6798, "step": 4874 }, { "epoch": 0.85, "grad_norm": 0.7366304386519741, "learning_rate": 1.0856838935717862e-06, "loss": 0.7453, "step": 4875 }, { "epoch": 0.85, "grad_norm": 0.724308765521185, "learning_rate": 1.0831114224171334e-06, "loss": 0.6778, "step": 4876 }, { "epoch": 0.86, "grad_norm": 0.6715619650717174, "learning_rate": 1.0805418280365554e-06, "loss": 0.7027, "step": 4877 }, { "epoch": 0.86, "grad_norm": 0.6848498952685574, "learning_rate": 1.077975111259061e-06, "loss": 0.6769, "step": 4878 }, { "epoch": 0.86, "grad_norm": 0.6498414098950632, "learning_rate": 1.075411272912724e-06, "loss": 0.6903, "step": 4879 }, { "epoch": 0.86, "grad_norm": 0.6176596585387112, "learning_rate": 1.0728503138246937e-06, "loss": 0.7011, "step": 4880 }, { "epoch": 0.86, "grad_norm": 0.6236191408323828, "learning_rate": 1.0702922348211897e-06, "loss": 0.6685, "step": 4881 }, { "epoch": 0.86, "grad_norm": 0.5993106356865675, "learning_rate": 1.0677370367274987e-06, "loss": 0.6828, "step": 4882 }, { "epoch": 0.86, "grad_norm": 0.666362079414263, "learning_rate": 1.065184720367982e-06, "loss": 0.7404, "step": 4883 }, { "epoch": 0.86, "grad_norm": 0.60485778614732, "learning_rate": 1.0626352865660705e-06, "loss": 0.6811, "step": 4884 }, { "epoch": 0.86, "grad_norm": 0.7710951943923767, "learning_rate": 1.060088736144267e-06, "loss": 0.7486, "step": 4885 }, { "epoch": 0.86, "grad_norm": 0.7258074283728753, "learning_rate": 1.0575450699241364e-06, "loss": 0.6822, "step": 4886 }, { "epoch": 0.86, "grad_norm": 0.6421610518013774, "learning_rate": 1.0550042887263246e-06, "loss": 0.6958, "step": 4887 }, { "epoch": 0.86, "grad_norm": 0.6744656882030388, "learning_rate": 1.0524663933705348e-06, "loss": 0.7272, "step": 4888 }, { "epoch": 0.86, "grad_norm": 0.6427982265151857, "learning_rate": 1.0499313846755465e-06, "loss": 0.686, "step": 4889 }, { "epoch": 0.86, "grad_norm": 0.6447519324675348, "learning_rate": 1.0473992634592088e-06, "loss": 0.6475, "step": 4890 }, { "epoch": 0.86, "grad_norm": 0.6488328537102194, "learning_rate": 1.0448700305384341e-06, "loss": 0.7256, "step": 4891 }, { "epoch": 0.86, "grad_norm": 0.5980259745642345, "learning_rate": 1.042343686729208e-06, "loss": 0.6567, "step": 4892 }, { "epoch": 0.86, "grad_norm": 0.5664772589906449, "learning_rate": 1.0398202328465812e-06, "loss": 0.6583, "step": 4893 }, { "epoch": 0.86, "grad_norm": 0.7177764515045783, "learning_rate": 1.0372996697046723e-06, "loss": 0.7541, "step": 4894 }, { "epoch": 0.86, "grad_norm": 0.695141845393571, "learning_rate": 1.0347819981166652e-06, "loss": 0.6972, "step": 4895 }, { "epoch": 0.86, "grad_norm": 0.6923209819684719, "learning_rate": 1.0322672188948146e-06, "loss": 0.7233, "step": 4896 }, { "epoch": 0.86, "grad_norm": 0.6945020151996663, "learning_rate": 1.0297553328504417e-06, "loss": 0.6933, "step": 4897 }, { "epoch": 0.86, "grad_norm": 0.7441675676896198, "learning_rate": 1.0272463407939314e-06, "loss": 0.7205, "step": 4898 }, { "epoch": 0.86, "grad_norm": 0.7110725944299673, "learning_rate": 1.0247402435347409e-06, "loss": 0.714, "step": 4899 }, { "epoch": 0.86, "grad_norm": 0.6448520211972052, "learning_rate": 1.0222370418813854e-06, "loss": 0.6932, "step": 4900 }, { "epoch": 0.86, "grad_norm": 0.6811628834476571, "learning_rate": 1.01973673664145e-06, "loss": 0.7119, "step": 4901 }, { "epoch": 0.86, "grad_norm": 0.7138603376800582, "learning_rate": 1.0172393286215866e-06, "loss": 0.7011, "step": 4902 }, { "epoch": 0.86, "grad_norm": 0.6613630642783259, "learning_rate": 1.014744818627511e-06, "loss": 0.6802, "step": 4903 }, { "epoch": 0.86, "grad_norm": 0.6809612659560299, "learning_rate": 1.0122532074640057e-06, "loss": 0.7248, "step": 4904 }, { "epoch": 0.86, "grad_norm": 0.6511937967434017, "learning_rate": 1.009764495934914e-06, "loss": 0.6698, "step": 4905 }, { "epoch": 0.86, "grad_norm": 0.7222130481501683, "learning_rate": 1.007278684843146e-06, "loss": 0.6587, "step": 4906 }, { "epoch": 0.86, "grad_norm": 0.7478976780826949, "learning_rate": 1.0047957749906766e-06, "loss": 0.7666, "step": 4907 }, { "epoch": 0.86, "grad_norm": 0.6879133183276845, "learning_rate": 1.0023157671785444e-06, "loss": 0.6863, "step": 4908 }, { "epoch": 0.86, "grad_norm": 0.6549225291676015, "learning_rate": 9.998386622068512e-07, "loss": 0.6738, "step": 4909 }, { "epoch": 0.86, "grad_norm": 0.7058123130868491, "learning_rate": 9.973644608747657e-07, "loss": 0.7221, "step": 4910 }, { "epoch": 0.86, "grad_norm": 0.7853702890761152, "learning_rate": 9.948931639805127e-07, "loss": 0.6975, "step": 4911 }, { "epoch": 0.86, "grad_norm": 0.7067801324774641, "learning_rate": 9.924247723213854e-07, "loss": 0.696, "step": 4912 }, { "epoch": 0.86, "grad_norm": 0.6844105795611678, "learning_rate": 9.899592866937413e-07, "loss": 0.7139, "step": 4913 }, { "epoch": 0.86, "grad_norm": 0.6696231816556764, "learning_rate": 9.87496707892992e-07, "loss": 0.6837, "step": 4914 }, { "epoch": 0.86, "grad_norm": 0.6164697733305771, "learning_rate": 9.850370367136197e-07, "loss": 0.689, "step": 4915 }, { "epoch": 0.86, "grad_norm": 0.6940258306740746, "learning_rate": 9.825802739491686e-07, "loss": 0.7215, "step": 4916 }, { "epoch": 0.86, "grad_norm": 0.7539289807032434, "learning_rate": 9.801264203922356e-07, "loss": 0.7023, "step": 4917 }, { "epoch": 0.86, "grad_norm": 0.6909402712957017, "learning_rate": 9.776754768344877e-07, "loss": 0.7098, "step": 4918 }, { "epoch": 0.86, "grad_norm": 0.693728546627523, "learning_rate": 9.75227444066651e-07, "loss": 0.699, "step": 4919 }, { "epoch": 0.86, "grad_norm": 0.651143577531007, "learning_rate": 9.727823228785105e-07, "loss": 0.7179, "step": 4920 }, { "epoch": 0.86, "grad_norm": 0.7366228545955361, "learning_rate": 9.703401140589153e-07, "loss": 0.7006, "step": 4921 }, { "epoch": 0.86, "grad_norm": 0.6946680200403874, "learning_rate": 9.679008183957728e-07, "loss": 0.7106, "step": 4922 }, { "epoch": 0.86, "grad_norm": 0.6293700690968625, "learning_rate": 9.6546443667605e-07, "loss": 0.6794, "step": 4923 }, { "epoch": 0.86, "grad_norm": 0.7421096402885868, "learning_rate": 9.63030969685772e-07, "loss": 0.7021, "step": 4924 }, { "epoch": 0.86, "grad_norm": 0.6225324722133606, "learning_rate": 9.606004182100271e-07, "loss": 0.6719, "step": 4925 }, { "epoch": 0.86, "grad_norm": 0.6282415444115741, "learning_rate": 9.581727830329634e-07, "loss": 0.662, "step": 4926 }, { "epoch": 0.86, "grad_norm": 0.7152091374304664, "learning_rate": 9.557480649377858e-07, "loss": 0.714, "step": 4927 }, { "epoch": 0.86, "grad_norm": 0.6898568713746176, "learning_rate": 9.533262647067609e-07, "loss": 0.703, "step": 4928 }, { "epoch": 0.86, "grad_norm": 0.66937830641222, "learning_rate": 9.509073831212079e-07, "loss": 0.6748, "step": 4929 }, { "epoch": 0.86, "grad_norm": 0.6670575405327832, "learning_rate": 9.484914209615115e-07, "loss": 0.7026, "step": 4930 }, { "epoch": 0.86, "grad_norm": 0.6725675724624007, "learning_rate": 9.460783790071104e-07, "loss": 0.6979, "step": 4931 }, { "epoch": 0.86, "grad_norm": 0.6867284233537454, "learning_rate": 9.436682580365019e-07, "loss": 0.664, "step": 4932 }, { "epoch": 0.86, "grad_norm": 0.6081000426868497, "learning_rate": 9.412610588272408e-07, "loss": 0.6237, "step": 4933 }, { "epoch": 0.87, "grad_norm": 0.6326391699774856, "learning_rate": 9.388567821559424e-07, "loss": 0.6533, "step": 4934 }, { "epoch": 0.87, "grad_norm": 0.7079293461138848, "learning_rate": 9.36455428798273e-07, "loss": 0.7011, "step": 4935 }, { "epoch": 0.87, "grad_norm": 0.6700743055467192, "learning_rate": 9.340569995289617e-07, "loss": 0.7282, "step": 4936 }, { "epoch": 0.87, "grad_norm": 0.7183704799482746, "learning_rate": 9.316614951217895e-07, "loss": 0.7225, "step": 4937 }, { "epoch": 0.87, "grad_norm": 0.5819503186356885, "learning_rate": 9.292689163495983e-07, "loss": 0.6131, "step": 4938 }, { "epoch": 0.87, "grad_norm": 0.7305589742474686, "learning_rate": 9.268792639842839e-07, "loss": 0.7433, "step": 4939 }, { "epoch": 0.87, "grad_norm": 0.6585060499830437, "learning_rate": 9.244925387967974e-07, "loss": 0.6692, "step": 4940 }, { "epoch": 0.87, "grad_norm": 0.654344126377863, "learning_rate": 9.221087415571439e-07, "loss": 0.6834, "step": 4941 }, { "epoch": 0.87, "grad_norm": 0.6827085163890979, "learning_rate": 9.197278730343883e-07, "loss": 0.7078, "step": 4942 }, { "epoch": 0.87, "grad_norm": 0.6279221848021753, "learning_rate": 9.173499339966474e-07, "loss": 0.6978, "step": 4943 }, { "epoch": 0.87, "grad_norm": 0.651305249034445, "learning_rate": 9.149749252110951e-07, "loss": 0.6614, "step": 4944 }, { "epoch": 0.87, "grad_norm": 0.6242960767098168, "learning_rate": 9.126028474439596e-07, "loss": 0.7129, "step": 4945 }, { "epoch": 0.87, "grad_norm": 0.7669378386924466, "learning_rate": 9.102337014605189e-07, "loss": 0.7215, "step": 4946 }, { "epoch": 0.87, "grad_norm": 0.7077178233648591, "learning_rate": 9.078674880251126e-07, "loss": 0.6944, "step": 4947 }, { "epoch": 0.87, "grad_norm": 0.65322175552763, "learning_rate": 9.055042079011278e-07, "loss": 0.7309, "step": 4948 }, { "epoch": 0.87, "grad_norm": 0.6727162987913881, "learning_rate": 9.031438618510102e-07, "loss": 0.6777, "step": 4949 }, { "epoch": 0.87, "grad_norm": 0.7392313781440198, "learning_rate": 9.007864506362551e-07, "loss": 0.7457, "step": 4950 }, { "epoch": 0.87, "grad_norm": 0.6974897084031096, "learning_rate": 8.984319750174119e-07, "loss": 0.711, "step": 4951 }, { "epoch": 0.87, "grad_norm": 0.7180106608895278, "learning_rate": 8.960804357540864e-07, "loss": 0.6579, "step": 4952 }, { "epoch": 0.87, "grad_norm": 0.6963383001832163, "learning_rate": 8.937318336049316e-07, "loss": 0.747, "step": 4953 }, { "epoch": 0.87, "grad_norm": 0.6425405594551552, "learning_rate": 8.913861693276549e-07, "loss": 0.6631, "step": 4954 }, { "epoch": 0.87, "grad_norm": 0.6950185860053312, "learning_rate": 8.890434436790185e-07, "loss": 0.7208, "step": 4955 }, { "epoch": 0.87, "grad_norm": 0.6164508383941375, "learning_rate": 8.867036574148336e-07, "loss": 0.6705, "step": 4956 }, { "epoch": 0.87, "grad_norm": 0.743454642797909, "learning_rate": 8.84366811289965e-07, "loss": 0.6694, "step": 4957 }, { "epoch": 0.87, "grad_norm": 0.6719361682814701, "learning_rate": 8.820329060583277e-07, "loss": 0.7026, "step": 4958 }, { "epoch": 0.87, "grad_norm": 0.6765754848455626, "learning_rate": 8.797019424728859e-07, "loss": 0.672, "step": 4959 }, { "epoch": 0.87, "grad_norm": 0.5803676169275447, "learning_rate": 8.773739212856591e-07, "loss": 0.6588, "step": 4960 }, { "epoch": 0.87, "grad_norm": 0.5988240144149433, "learning_rate": 8.750488432477144e-07, "loss": 0.7321, "step": 4961 }, { "epoch": 0.87, "grad_norm": 0.7003342176413121, "learning_rate": 8.727267091091718e-07, "loss": 0.705, "step": 4962 }, { "epoch": 0.87, "grad_norm": 0.7470275192521137, "learning_rate": 8.704075196192008e-07, "loss": 0.7543, "step": 4963 }, { "epoch": 0.87, "grad_norm": 0.6503006229592941, "learning_rate": 8.680912755260173e-07, "loss": 0.6829, "step": 4964 }, { "epoch": 0.87, "grad_norm": 0.575767601170546, "learning_rate": 8.657779775768916e-07, "loss": 0.6824, "step": 4965 }, { "epoch": 0.87, "grad_norm": 0.6444040568237167, "learning_rate": 8.634676265181408e-07, "loss": 0.7183, "step": 4966 }, { "epoch": 0.87, "grad_norm": 0.7320857456073832, "learning_rate": 8.611602230951343e-07, "loss": 0.6834, "step": 4967 }, { "epoch": 0.87, "grad_norm": 0.5971103490360162, "learning_rate": 8.588557680522891e-07, "loss": 0.6437, "step": 4968 }, { "epoch": 0.87, "grad_norm": 0.7093362353895313, "learning_rate": 8.56554262133068e-07, "loss": 0.7363, "step": 4969 }, { "epoch": 0.87, "grad_norm": 0.760359424397373, "learning_rate": 8.54255706079985e-07, "loss": 0.7054, "step": 4970 }, { "epoch": 0.87, "grad_norm": 0.7878873664543418, "learning_rate": 8.519601006346023e-07, "loss": 0.7341, "step": 4971 }, { "epoch": 0.87, "grad_norm": 0.6340844395406424, "learning_rate": 8.496674465375298e-07, "loss": 0.6801, "step": 4972 }, { "epoch": 0.87, "grad_norm": 0.6443058047282542, "learning_rate": 8.47377744528427e-07, "loss": 0.7202, "step": 4973 }, { "epoch": 0.87, "grad_norm": 0.6470038910556581, "learning_rate": 8.450909953460007e-07, "loss": 0.693, "step": 4974 }, { "epoch": 0.87, "grad_norm": 0.6092907125581103, "learning_rate": 8.428071997279996e-07, "loss": 0.6913, "step": 4975 }, { "epoch": 0.87, "grad_norm": 0.604723964796157, "learning_rate": 8.405263584112255e-07, "loss": 0.6734, "step": 4976 }, { "epoch": 0.87, "grad_norm": 0.7352107107078331, "learning_rate": 8.382484721315287e-07, "loss": 0.7393, "step": 4977 }, { "epoch": 0.87, "grad_norm": 0.7497338415006836, "learning_rate": 8.359735416237969e-07, "loss": 0.7511, "step": 4978 }, { "epoch": 0.87, "grad_norm": 0.6957474226022387, "learning_rate": 8.33701567621974e-07, "loss": 0.6757, "step": 4979 }, { "epoch": 0.87, "grad_norm": 0.661691606185396, "learning_rate": 8.314325508590448e-07, "loss": 0.7141, "step": 4980 }, { "epoch": 0.87, "grad_norm": 0.7266838791839944, "learning_rate": 8.29166492067044e-07, "loss": 0.7399, "step": 4981 }, { "epoch": 0.87, "grad_norm": 0.7360360146550868, "learning_rate": 8.269033919770453e-07, "loss": 0.7139, "step": 4982 }, { "epoch": 0.87, "grad_norm": 0.6958502376252015, "learning_rate": 8.246432513191738e-07, "loss": 0.6809, "step": 4983 }, { "epoch": 0.87, "grad_norm": 0.6337509386331998, "learning_rate": 8.223860708225984e-07, "loss": 0.6636, "step": 4984 }, { "epoch": 0.87, "grad_norm": 0.6360416764916568, "learning_rate": 8.201318512155309e-07, "loss": 0.7108, "step": 4985 }, { "epoch": 0.87, "grad_norm": 0.6362319851107495, "learning_rate": 8.178805932252343e-07, "loss": 0.6461, "step": 4986 }, { "epoch": 0.87, "grad_norm": 0.6175928782039724, "learning_rate": 8.156322975780074e-07, "loss": 0.6969, "step": 4987 }, { "epoch": 0.87, "grad_norm": 0.6961194457189426, "learning_rate": 8.133869649991955e-07, "loss": 0.7148, "step": 4988 }, { "epoch": 0.87, "grad_norm": 0.7301540119320312, "learning_rate": 8.111445962131925e-07, "loss": 0.7487, "step": 4989 }, { "epoch": 0.87, "grad_norm": 0.6814505828064321, "learning_rate": 8.089051919434321e-07, "loss": 0.7686, "step": 4990 }, { "epoch": 0.88, "grad_norm": 0.6365516967484058, "learning_rate": 8.06668752912394e-07, "loss": 0.7133, "step": 4991 }, { "epoch": 0.88, "grad_norm": 0.6416760429157039, "learning_rate": 8.044352798416e-07, "loss": 0.6718, "step": 4992 }, { "epoch": 0.88, "grad_norm": 0.8188482310171137, "learning_rate": 8.022047734516136e-07, "loss": 0.6964, "step": 4993 }, { "epoch": 0.88, "grad_norm": 0.6287229916305674, "learning_rate": 7.999772344620437e-07, "loss": 0.6703, "step": 4994 }, { "epoch": 0.88, "grad_norm": 0.6997873787868881, "learning_rate": 7.977526635915422e-07, "loss": 0.6885, "step": 4995 }, { "epoch": 0.88, "grad_norm": 0.6669456989479913, "learning_rate": 7.955310615577993e-07, "loss": 0.7048, "step": 4996 }, { "epoch": 0.88, "grad_norm": 0.6438112751282876, "learning_rate": 7.933124290775507e-07, "loss": 0.6808, "step": 4997 }, { "epoch": 0.88, "grad_norm": 0.8587816340431749, "learning_rate": 7.910967668665759e-07, "loss": 0.6852, "step": 4998 }, { "epoch": 0.88, "grad_norm": 0.6317064849341534, "learning_rate": 7.888840756396898e-07, "loss": 0.6596, "step": 4999 }, { "epoch": 0.88, "grad_norm": 0.6015066886496627, "learning_rate": 7.866743561107548e-07, "loss": 0.6877, "step": 5000 }, { "epoch": 0.88, "grad_norm": 0.674861003689793, "learning_rate": 7.844676089926717e-07, "loss": 0.6968, "step": 5001 }, { "epoch": 0.88, "grad_norm": 0.6793583441507539, "learning_rate": 7.822638349973854e-07, "loss": 0.71, "step": 5002 }, { "epoch": 0.88, "grad_norm": 0.6644660449743555, "learning_rate": 7.800630348358762e-07, "loss": 0.6693, "step": 5003 }, { "epoch": 0.88, "grad_norm": 0.6819194534321793, "learning_rate": 7.778652092181726e-07, "loss": 0.7049, "step": 5004 }, { "epoch": 0.88, "grad_norm": 0.690266658968523, "learning_rate": 7.756703588533354e-07, "loss": 0.7038, "step": 5005 }, { "epoch": 0.88, "grad_norm": 0.6283534518482208, "learning_rate": 7.73478484449468e-07, "loss": 0.6594, "step": 5006 }, { "epoch": 0.88, "grad_norm": 0.7331245319551555, "learning_rate": 7.71289586713716e-07, "loss": 0.6832, "step": 5007 }, { "epoch": 0.88, "grad_norm": 0.6857895975580812, "learning_rate": 7.691036663522644e-07, "loss": 0.6985, "step": 5008 }, { "epoch": 0.88, "grad_norm": 0.6127390387489915, "learning_rate": 7.669207240703358e-07, "loss": 0.6782, "step": 5009 }, { "epoch": 0.88, "grad_norm": 0.7784603535718949, "learning_rate": 7.647407605721946e-07, "loss": 0.7483, "step": 5010 }, { "epoch": 0.88, "grad_norm": 0.5912844104252837, "learning_rate": 7.625637765611405e-07, "loss": 0.652, "step": 5011 }, { "epoch": 0.88, "grad_norm": 0.7349847663480231, "learning_rate": 7.603897727395126e-07, "loss": 0.7003, "step": 5012 }, { "epoch": 0.88, "grad_norm": 0.6891271266395798, "learning_rate": 7.582187498086946e-07, "loss": 0.702, "step": 5013 }, { "epoch": 0.88, "grad_norm": 0.8168526669212026, "learning_rate": 7.560507084690993e-07, "loss": 0.6965, "step": 5014 }, { "epoch": 0.88, "grad_norm": 0.6420843324525556, "learning_rate": 7.538856494201829e-07, "loss": 0.6903, "step": 5015 }, { "epoch": 0.88, "grad_norm": 0.6213623807744865, "learning_rate": 7.517235733604411e-07, "loss": 0.6581, "step": 5016 }, { "epoch": 0.88, "grad_norm": 0.6468048025760801, "learning_rate": 7.495644809874003e-07, "loss": 0.6783, "step": 5017 }, { "epoch": 0.88, "grad_norm": 0.6911198844936937, "learning_rate": 7.474083729976322e-07, "loss": 0.743, "step": 5018 }, { "epoch": 0.88, "grad_norm": 0.7650669374360607, "learning_rate": 7.452552500867405e-07, "loss": 0.6914, "step": 5019 }, { "epoch": 0.88, "grad_norm": 0.6693946687617682, "learning_rate": 7.431051129493682e-07, "loss": 0.7089, "step": 5020 }, { "epoch": 0.88, "grad_norm": 0.6850510108908252, "learning_rate": 7.409579622791963e-07, "loss": 0.6916, "step": 5021 }, { "epoch": 0.88, "grad_norm": 0.6579699658046446, "learning_rate": 7.388137987689381e-07, "loss": 0.6789, "step": 5022 }, { "epoch": 0.88, "grad_norm": 0.6585798454226429, "learning_rate": 7.36672623110345e-07, "loss": 0.7125, "step": 5023 }, { "epoch": 0.88, "grad_norm": 0.689280138203555, "learning_rate": 7.345344359942042e-07, "loss": 0.7015, "step": 5024 }, { "epoch": 0.88, "grad_norm": 0.6218897692758315, "learning_rate": 7.323992381103418e-07, "loss": 0.6815, "step": 5025 }, { "epoch": 0.88, "grad_norm": 0.6319867259078529, "learning_rate": 7.302670301476156e-07, "loss": 0.6811, "step": 5026 }, { "epoch": 0.88, "grad_norm": 0.7300895307258279, "learning_rate": 7.281378127939221e-07, "loss": 0.7306, "step": 5027 }, { "epoch": 0.88, "grad_norm": 0.6795255653830686, "learning_rate": 7.260115867361895e-07, "loss": 0.7037, "step": 5028 }, { "epoch": 0.88, "grad_norm": 0.7026362749685073, "learning_rate": 7.238883526603824e-07, "loss": 0.6886, "step": 5029 }, { "epoch": 0.88, "grad_norm": 0.6550621318557394, "learning_rate": 7.217681112515007e-07, "loss": 0.678, "step": 5030 }, { "epoch": 0.88, "grad_norm": 0.6508997380871702, "learning_rate": 7.196508631935794e-07, "loss": 0.6653, "step": 5031 }, { "epoch": 0.88, "grad_norm": 0.6597464981857799, "learning_rate": 7.175366091696878e-07, "loss": 0.6617, "step": 5032 }, { "epoch": 0.88, "grad_norm": 0.7563859259562059, "learning_rate": 7.154253498619245e-07, "loss": 0.7065, "step": 5033 }, { "epoch": 0.88, "grad_norm": 0.7032092104059681, "learning_rate": 7.133170859514304e-07, "loss": 0.7356, "step": 5034 }, { "epoch": 0.88, "grad_norm": 0.7490460746374362, "learning_rate": 7.112118181183703e-07, "loss": 0.7467, "step": 5035 }, { "epoch": 0.88, "grad_norm": 0.6232538863714465, "learning_rate": 7.091095470419507e-07, "loss": 0.6755, "step": 5036 }, { "epoch": 0.88, "grad_norm": 0.8185962982812571, "learning_rate": 7.070102734004081e-07, "loss": 0.7712, "step": 5037 }, { "epoch": 0.88, "grad_norm": 0.6813902178345911, "learning_rate": 7.049139978710106e-07, "loss": 0.7107, "step": 5038 }, { "epoch": 0.88, "grad_norm": 0.790868320499998, "learning_rate": 7.02820721130063e-07, "loss": 0.733, "step": 5039 }, { "epoch": 0.88, "grad_norm": 0.6523960365361848, "learning_rate": 7.007304438528972e-07, "loss": 0.6779, "step": 5040 }, { "epoch": 0.88, "grad_norm": 0.6174834521609853, "learning_rate": 6.986431667138815e-07, "loss": 0.6582, "step": 5041 }, { "epoch": 0.88, "grad_norm": 0.6412265069615184, "learning_rate": 6.96558890386414e-07, "loss": 0.6567, "step": 5042 }, { "epoch": 0.88, "grad_norm": 0.6748660429099712, "learning_rate": 6.944776155429256e-07, "loss": 0.6969, "step": 5043 }, { "epoch": 0.88, "grad_norm": 0.7161081286402309, "learning_rate": 6.923993428548803e-07, "loss": 0.6607, "step": 5044 }, { "epoch": 0.88, "grad_norm": 0.6749139834439424, "learning_rate": 6.90324072992773e-07, "loss": 0.6671, "step": 5045 }, { "epoch": 0.88, "grad_norm": 0.6561689170321549, "learning_rate": 6.882518066261246e-07, "loss": 0.6971, "step": 5046 }, { "epoch": 0.88, "grad_norm": 0.6576672327446043, "learning_rate": 6.861825444234959e-07, "loss": 0.7039, "step": 5047 }, { "epoch": 0.89, "grad_norm": 0.7029297470969077, "learning_rate": 6.84116287052472e-07, "loss": 0.7253, "step": 5048 }, { "epoch": 0.89, "grad_norm": 0.6530742469741634, "learning_rate": 6.820530351796706e-07, "loss": 0.6683, "step": 5049 }, { "epoch": 0.89, "grad_norm": 0.6687220681764383, "learning_rate": 6.799927894707425e-07, "loss": 0.6849, "step": 5050 }, { "epoch": 0.89, "grad_norm": 0.6815666785676069, "learning_rate": 6.779355505903629e-07, "loss": 0.6919, "step": 5051 }, { "epoch": 0.89, "grad_norm": 0.634859378758188, "learning_rate": 6.758813192022396e-07, "loss": 0.6951, "step": 5052 }, { "epoch": 0.89, "grad_norm": 0.7107762500206927, "learning_rate": 6.738300959691113e-07, "loss": 0.717, "step": 5053 }, { "epoch": 0.89, "grad_norm": 0.6702180215045738, "learning_rate": 6.717818815527455e-07, "loss": 0.68, "step": 5054 }, { "epoch": 0.89, "grad_norm": 0.6231993828713926, "learning_rate": 6.697366766139379e-07, "loss": 0.6756, "step": 5055 }, { "epoch": 0.89, "grad_norm": 0.5935107664664829, "learning_rate": 6.676944818125187e-07, "loss": 0.6617, "step": 5056 }, { "epoch": 0.89, "grad_norm": 0.6405271505145459, "learning_rate": 6.656552978073361e-07, "loss": 0.6688, "step": 5057 }, { "epoch": 0.89, "grad_norm": 0.6994619209057904, "learning_rate": 6.636191252562763e-07, "loss": 0.7448, "step": 5058 }, { "epoch": 0.89, "grad_norm": 0.7516795661932462, "learning_rate": 6.615859648162526e-07, "loss": 0.6506, "step": 5059 }, { "epoch": 0.89, "grad_norm": 0.6756681774722314, "learning_rate": 6.595558171432026e-07, "loss": 0.7289, "step": 5060 }, { "epoch": 0.89, "grad_norm": 0.6736139287547559, "learning_rate": 6.575286828920946e-07, "loss": 0.7355, "step": 5061 }, { "epoch": 0.89, "grad_norm": 0.6554073721624437, "learning_rate": 6.555045627169243e-07, "loss": 0.6542, "step": 5062 }, { "epoch": 0.89, "grad_norm": 0.7239220047047097, "learning_rate": 6.534834572707171e-07, "loss": 0.7516, "step": 5063 }, { "epoch": 0.89, "grad_norm": 0.6742956254252193, "learning_rate": 6.514653672055204e-07, "loss": 0.684, "step": 5064 }, { "epoch": 0.89, "grad_norm": 0.6124987834341231, "learning_rate": 6.494502931724133e-07, "loss": 0.6587, "step": 5065 }, { "epoch": 0.89, "grad_norm": 0.6668151705372639, "learning_rate": 6.474382358215026e-07, "loss": 0.7186, "step": 5066 }, { "epoch": 0.89, "grad_norm": 0.6530610777064729, "learning_rate": 6.454291958019177e-07, "loss": 0.7063, "step": 5067 }, { "epoch": 0.89, "grad_norm": 0.6780831052040899, "learning_rate": 6.43423173761819e-07, "loss": 0.6937, "step": 5068 }, { "epoch": 0.89, "grad_norm": 0.7440453330074065, "learning_rate": 6.414201703483902e-07, "loss": 0.7241, "step": 5069 }, { "epoch": 0.89, "grad_norm": 0.7084567301809934, "learning_rate": 6.394201862078386e-07, "loss": 0.7129, "step": 5070 }, { "epoch": 0.89, "grad_norm": 1.2136770736404845, "learning_rate": 6.374232219854049e-07, "loss": 0.7095, "step": 5071 }, { "epoch": 0.89, "grad_norm": 0.6289324583974081, "learning_rate": 6.354292783253491e-07, "loss": 0.6719, "step": 5072 }, { "epoch": 0.89, "grad_norm": 0.6502456955253032, "learning_rate": 6.3343835587096e-07, "loss": 0.6768, "step": 5073 }, { "epoch": 0.89, "grad_norm": 0.6885054738152341, "learning_rate": 6.314504552645528e-07, "loss": 0.6868, "step": 5074 }, { "epoch": 0.89, "grad_norm": 0.6309001435188996, "learning_rate": 6.294655771474612e-07, "loss": 0.6634, "step": 5075 }, { "epoch": 0.89, "grad_norm": 0.6972171575520929, "learning_rate": 6.274837221600505e-07, "loss": 0.7004, "step": 5076 }, { "epoch": 0.89, "grad_norm": 0.7207801470750851, "learning_rate": 6.255048909417095e-07, "loss": 0.6888, "step": 5077 }, { "epoch": 0.89, "grad_norm": 0.646618066970546, "learning_rate": 6.235290841308484e-07, "loss": 0.686, "step": 5078 }, { "epoch": 0.89, "grad_norm": 0.6299010903849748, "learning_rate": 6.21556302364904e-07, "loss": 0.6882, "step": 5079 }, { "epoch": 0.89, "grad_norm": 0.6465090880320002, "learning_rate": 6.195865462803385e-07, "loss": 0.6973, "step": 5080 }, { "epoch": 0.89, "grad_norm": 0.6700829905553866, "learning_rate": 6.176198165126335e-07, "loss": 0.6702, "step": 5081 }, { "epoch": 0.89, "grad_norm": 0.6413454667334564, "learning_rate": 6.156561136962991e-07, "loss": 0.6962, "step": 5082 }, { "epoch": 0.89, "grad_norm": 0.6434230996084168, "learning_rate": 6.136954384648663e-07, "loss": 0.6576, "step": 5083 }, { "epoch": 0.89, "grad_norm": 0.6536796804568686, "learning_rate": 6.117377914508904e-07, "loss": 0.714, "step": 5084 }, { "epoch": 0.89, "grad_norm": 0.7673495322704128, "learning_rate": 6.097831732859471e-07, "loss": 0.6769, "step": 5085 }, { "epoch": 0.89, "grad_norm": 0.739246325077026, "learning_rate": 6.0783158460064e-07, "loss": 0.7268, "step": 5086 }, { "epoch": 0.89, "grad_norm": 0.6795366992985777, "learning_rate": 6.058830260245907e-07, "loss": 0.6985, "step": 5087 }, { "epoch": 0.89, "grad_norm": 0.6898973469515431, "learning_rate": 6.039374981864444e-07, "loss": 0.7415, "step": 5088 }, { "epoch": 0.89, "grad_norm": 0.6509688542476593, "learning_rate": 6.019950017138676e-07, "loss": 0.65, "step": 5089 }, { "epoch": 0.89, "grad_norm": 0.6719929573045945, "learning_rate": 6.000555372335526e-07, "loss": 0.6936, "step": 5090 }, { "epoch": 0.89, "grad_norm": 0.6207784052783373, "learning_rate": 5.981191053712099e-07, "loss": 0.7084, "step": 5091 }, { "epoch": 0.89, "grad_norm": 0.726206580555055, "learning_rate": 5.961857067515742e-07, "loss": 0.7168, "step": 5092 }, { "epoch": 0.89, "grad_norm": 0.754484494162379, "learning_rate": 5.942553419983976e-07, "loss": 0.7478, "step": 5093 }, { "epoch": 0.89, "grad_norm": 0.5914355769596401, "learning_rate": 5.923280117344576e-07, "loss": 0.6603, "step": 5094 }, { "epoch": 0.89, "grad_norm": 0.6573800841266276, "learning_rate": 5.904037165815512e-07, "loss": 0.6949, "step": 5095 }, { "epoch": 0.89, "grad_norm": 0.6311569146933668, "learning_rate": 5.884824571604974e-07, "loss": 0.6691, "step": 5096 }, { "epoch": 0.89, "grad_norm": 0.5824285889392142, "learning_rate": 5.865642340911315e-07, "loss": 0.6474, "step": 5097 }, { "epoch": 0.89, "grad_norm": 0.7083818226222938, "learning_rate": 5.84649047992315e-07, "loss": 0.7301, "step": 5098 }, { "epoch": 0.89, "grad_norm": 0.637460429033412, "learning_rate": 5.827368994819238e-07, "loss": 0.6727, "step": 5099 }, { "epoch": 0.89, "grad_norm": 0.6485897681177597, "learning_rate": 5.808277891768599e-07, "loss": 0.6686, "step": 5100 }, { "epoch": 0.89, "grad_norm": 0.6506620716324071, "learning_rate": 5.789217176930395e-07, "loss": 0.6908, "step": 5101 }, { "epoch": 0.89, "grad_norm": 0.7115008245976694, "learning_rate": 5.77018685645403e-07, "loss": 0.7311, "step": 5102 }, { "epoch": 0.89, "grad_norm": 0.6594221354215759, "learning_rate": 5.75118693647908e-07, "loss": 0.717, "step": 5103 }, { "epoch": 0.89, "grad_norm": 0.5981700061434727, "learning_rate": 5.732217423135311e-07, "loss": 0.6466, "step": 5104 }, { "epoch": 0.9, "grad_norm": 0.5888191344605326, "learning_rate": 5.713278322542682e-07, "loss": 0.6598, "step": 5105 }, { "epoch": 0.9, "grad_norm": 0.6465778613998194, "learning_rate": 5.694369640811337e-07, "loss": 0.7198, "step": 5106 }, { "epoch": 0.9, "grad_norm": 0.6744947124514591, "learning_rate": 5.675491384041609e-07, "loss": 0.6806, "step": 5107 }, { "epoch": 0.9, "grad_norm": 0.6592180337186977, "learning_rate": 5.656643558324027e-07, "loss": 0.7559, "step": 5108 }, { "epoch": 0.9, "grad_norm": 0.6979195257071743, "learning_rate": 5.637826169739302e-07, "loss": 0.6944, "step": 5109 }, { "epoch": 0.9, "grad_norm": 0.6374034908030505, "learning_rate": 5.619039224358292e-07, "loss": 0.6562, "step": 5110 }, { "epoch": 0.9, "grad_norm": 0.677400742130099, "learning_rate": 5.600282728242068e-07, "loss": 0.7285, "step": 5111 }, { "epoch": 0.9, "grad_norm": 0.6055363993626175, "learning_rate": 5.581556687441858e-07, "loss": 0.6232, "step": 5112 }, { "epoch": 0.9, "grad_norm": 0.695714665624665, "learning_rate": 5.562861107999095e-07, "loss": 0.6899, "step": 5113 }, { "epoch": 0.9, "grad_norm": 0.6554539776657197, "learning_rate": 5.544195995945356e-07, "loss": 0.6784, "step": 5114 }, { "epoch": 0.9, "grad_norm": 0.6219620220755192, "learning_rate": 5.525561357302367e-07, "loss": 0.6436, "step": 5115 }, { "epoch": 0.9, "grad_norm": 0.6207397384671143, "learning_rate": 5.506957198082097e-07, "loss": 0.671, "step": 5116 }, { "epoch": 0.9, "grad_norm": 0.6137230544312345, "learning_rate": 5.48838352428659e-07, "loss": 0.6861, "step": 5117 }, { "epoch": 0.9, "grad_norm": 0.6155464331126521, "learning_rate": 5.469840341908117e-07, "loss": 0.6604, "step": 5118 }, { "epoch": 0.9, "grad_norm": 0.6512910762818657, "learning_rate": 5.451327656929106e-07, "loss": 0.7374, "step": 5119 }, { "epoch": 0.9, "grad_norm": 0.6825944988528989, "learning_rate": 5.432845475322124e-07, "loss": 0.7058, "step": 5120 }, { "epoch": 0.9, "grad_norm": 0.6514844659159983, "learning_rate": 5.414393803049933e-07, "loss": 0.6955, "step": 5121 }, { "epoch": 0.9, "grad_norm": 0.6045373401025126, "learning_rate": 5.395972646065384e-07, "loss": 0.6904, "step": 5122 }, { "epoch": 0.9, "grad_norm": 0.6205594939387041, "learning_rate": 5.377582010311555e-07, "loss": 0.6511, "step": 5123 }, { "epoch": 0.9, "grad_norm": 0.5955567285792948, "learning_rate": 5.359221901721634e-07, "loss": 0.6821, "step": 5124 }, { "epoch": 0.9, "grad_norm": 0.685013666137852, "learning_rate": 5.340892326218983e-07, "loss": 0.7158, "step": 5125 }, { "epoch": 0.9, "grad_norm": 0.6573819755339418, "learning_rate": 5.322593289717104e-07, "loss": 0.6716, "step": 5126 }, { "epoch": 0.9, "grad_norm": 0.6146991542725002, "learning_rate": 5.304324798119664e-07, "loss": 0.6356, "step": 5127 }, { "epoch": 0.9, "grad_norm": 0.7014246174358304, "learning_rate": 5.286086857320416e-07, "loss": 0.726, "step": 5128 }, { "epoch": 0.9, "grad_norm": 0.6482041301817434, "learning_rate": 5.267879473203341e-07, "loss": 0.6923, "step": 5129 }, { "epoch": 0.9, "grad_norm": 1.0787217985642683, "learning_rate": 5.249702651642496e-07, "loss": 0.7384, "step": 5130 }, { "epoch": 0.9, "grad_norm": 0.6411579142880084, "learning_rate": 5.231556398502114e-07, "loss": 0.6951, "step": 5131 }, { "epoch": 0.9, "grad_norm": 0.7008672894963947, "learning_rate": 5.213440719636564e-07, "loss": 0.6581, "step": 5132 }, { "epoch": 0.9, "grad_norm": 0.647968880896426, "learning_rate": 5.19535562089033e-07, "loss": 0.6948, "step": 5133 }, { "epoch": 0.9, "grad_norm": 0.7002244761877433, "learning_rate": 5.177301108098032e-07, "loss": 0.6671, "step": 5134 }, { "epoch": 0.9, "grad_norm": 0.7044386259084227, "learning_rate": 5.159277187084433e-07, "loss": 0.6684, "step": 5135 }, { "epoch": 0.9, "grad_norm": 0.6434325530029915, "learning_rate": 5.141283863664448e-07, "loss": 0.6729, "step": 5136 }, { "epoch": 0.9, "grad_norm": 0.6662072696449306, "learning_rate": 5.123321143643079e-07, "loss": 0.7032, "step": 5137 }, { "epoch": 0.9, "grad_norm": 0.7617014800341856, "learning_rate": 5.105389032815489e-07, "loss": 0.6729, "step": 5138 }, { "epoch": 0.9, "grad_norm": 0.6758039820760449, "learning_rate": 5.087487536966929e-07, "loss": 0.7261, "step": 5139 }, { "epoch": 0.9, "grad_norm": 0.6270496875171772, "learning_rate": 5.069616661872801e-07, "loss": 0.6955, "step": 5140 }, { "epoch": 0.9, "grad_norm": 0.6307189806645389, "learning_rate": 5.051776413298647e-07, "loss": 0.6789, "step": 5141 }, { "epoch": 0.9, "grad_norm": 0.6708695306458515, "learning_rate": 5.033966797000067e-07, "loss": 0.7052, "step": 5142 }, { "epoch": 0.9, "grad_norm": 0.6479409782448884, "learning_rate": 5.016187818722828e-07, "loss": 0.6912, "step": 5143 }, { "epoch": 0.9, "grad_norm": 0.6886477251455617, "learning_rate": 4.998439484202811e-07, "loss": 0.7473, "step": 5144 }, { "epoch": 0.9, "grad_norm": 0.660220542492386, "learning_rate": 4.980721799166e-07, "loss": 0.7057, "step": 5145 }, { "epoch": 0.9, "grad_norm": 0.6995097048182796, "learning_rate": 4.963034769328457e-07, "loss": 0.7054, "step": 5146 }, { "epoch": 0.9, "grad_norm": 0.6849366784038846, "learning_rate": 4.945378400396417e-07, "loss": 0.7019, "step": 5147 }, { "epoch": 0.9, "grad_norm": 0.6924835898595314, "learning_rate": 4.92775269806618e-07, "loss": 0.7185, "step": 5148 }, { "epoch": 0.9, "grad_norm": 0.6703018196940217, "learning_rate": 4.910157668024163e-07, "loss": 0.7081, "step": 5149 }, { "epoch": 0.9, "grad_norm": 0.7013626617543578, "learning_rate": 4.892593315946914e-07, "loss": 0.6906, "step": 5150 }, { "epoch": 0.9, "grad_norm": 0.712272925582211, "learning_rate": 4.875059647501035e-07, "loss": 0.7451, "step": 5151 }, { "epoch": 0.9, "grad_norm": 0.6776426477111472, "learning_rate": 4.857556668343244e-07, "loss": 0.7219, "step": 5152 }, { "epoch": 0.9, "grad_norm": 0.6264568186206098, "learning_rate": 4.840084384120381e-07, "loss": 0.6687, "step": 5153 }, { "epoch": 0.9, "grad_norm": 0.683073728630025, "learning_rate": 4.822642800469368e-07, "loss": 0.7174, "step": 5154 }, { "epoch": 0.9, "grad_norm": 0.6648630201969823, "learning_rate": 4.805231923017228e-07, "loss": 0.6948, "step": 5155 }, { "epoch": 0.9, "grad_norm": 0.6086117454457106, "learning_rate": 4.787851757381079e-07, "loss": 0.6847, "step": 5156 }, { "epoch": 0.9, "grad_norm": 0.6066989237314606, "learning_rate": 4.770502309168101e-07, "loss": 0.6848, "step": 5157 }, { "epoch": 0.9, "grad_norm": 0.7002998834789402, "learning_rate": 4.753183583975596e-07, "loss": 0.6657, "step": 5158 }, { "epoch": 0.9, "grad_norm": 0.7186695641048381, "learning_rate": 4.7358955873909594e-07, "loss": 0.7221, "step": 5159 }, { "epoch": 0.9, "grad_norm": 0.7425682672701384, "learning_rate": 4.7186383249916644e-07, "loss": 0.7188, "step": 5160 }, { "epoch": 0.9, "grad_norm": 0.633153837010472, "learning_rate": 4.701411802345224e-07, "loss": 0.6948, "step": 5161 }, { "epoch": 0.91, "grad_norm": 0.6985049134148822, "learning_rate": 4.6842160250093136e-07, "loss": 0.675, "step": 5162 }, { "epoch": 0.91, "grad_norm": 0.7165681989100472, "learning_rate": 4.66705099853163e-07, "loss": 0.7287, "step": 5163 }, { "epoch": 0.91, "grad_norm": 0.6043812998346797, "learning_rate": 4.649916728449966e-07, "loss": 0.6715, "step": 5164 }, { "epoch": 0.91, "grad_norm": 0.6894585855287751, "learning_rate": 4.632813220292209e-07, "loss": 0.7276, "step": 5165 }, { "epoch": 0.91, "grad_norm": 0.6826489935900882, "learning_rate": 4.615740479576292e-07, "loss": 0.7303, "step": 5166 }, { "epoch": 0.91, "grad_norm": 0.7161368948875177, "learning_rate": 4.598698511810251e-07, "loss": 0.7011, "step": 5167 }, { "epoch": 0.91, "grad_norm": 0.7156045924335728, "learning_rate": 4.5816873224921677e-07, "loss": 0.7343, "step": 5168 }, { "epoch": 0.91, "grad_norm": 0.7332992843225806, "learning_rate": 4.564706917110229e-07, "loss": 0.7445, "step": 5169 }, { "epoch": 0.91, "grad_norm": 0.7348136225620779, "learning_rate": 4.54775730114263e-07, "loss": 0.7537, "step": 5170 }, { "epoch": 0.91, "grad_norm": 1.5480062057646846, "learning_rate": 4.530838480057698e-07, "loss": 0.7118, "step": 5171 }, { "epoch": 0.91, "grad_norm": 0.6523418058712791, "learning_rate": 4.5139504593137763e-07, "loss": 0.6456, "step": 5172 }, { "epoch": 0.91, "grad_norm": 0.7834747684416298, "learning_rate": 4.4970932443593186e-07, "loss": 0.7523, "step": 5173 }, { "epoch": 0.91, "grad_norm": 0.6542506228968274, "learning_rate": 4.480266840632808e-07, "loss": 0.6769, "step": 5174 }, { "epoch": 0.91, "grad_norm": 0.6853289627902458, "learning_rate": 4.463471253562768e-07, "loss": 0.6848, "step": 5175 }, { "epoch": 0.91, "grad_norm": 0.6866860891092332, "learning_rate": 4.4467064885678314e-07, "loss": 0.7048, "step": 5176 }, { "epoch": 0.91, "grad_norm": 0.6989294718167838, "learning_rate": 4.4299725510566384e-07, "loss": 0.7425, "step": 5177 }, { "epoch": 0.91, "grad_norm": 0.6384434109611873, "learning_rate": 4.413269446427948e-07, "loss": 0.701, "step": 5178 }, { "epoch": 0.91, "grad_norm": 0.7051692230245222, "learning_rate": 4.396597180070483e-07, "loss": 0.7261, "step": 5179 }, { "epoch": 0.91, "grad_norm": 0.8007181539628061, "learning_rate": 4.3799557573630856e-07, "loss": 0.6817, "step": 5180 }, { "epoch": 0.91, "grad_norm": 0.6072024831699643, "learning_rate": 4.3633451836746165e-07, "loss": 0.7076, "step": 5181 }, { "epoch": 0.91, "grad_norm": 0.5812975762567614, "learning_rate": 4.346765464364e-07, "loss": 0.6403, "step": 5182 }, { "epoch": 0.91, "grad_norm": 0.6662109973937782, "learning_rate": 4.3302166047802017e-07, "loss": 0.6847, "step": 5183 }, { "epoch": 0.91, "grad_norm": 0.7486717173436629, "learning_rate": 4.313698610262218e-07, "loss": 0.7419, "step": 5184 }, { "epoch": 0.91, "grad_norm": 0.6717975219847047, "learning_rate": 4.297211486139119e-07, "loss": 0.6907, "step": 5185 }, { "epoch": 0.91, "grad_norm": 0.6344014498892879, "learning_rate": 4.280755237729972e-07, "loss": 0.6689, "step": 5186 }, { "epoch": 0.91, "grad_norm": 0.7123550541200384, "learning_rate": 4.2643298703439194e-07, "loss": 0.7139, "step": 5187 }, { "epoch": 0.91, "grad_norm": 0.6754168986527378, "learning_rate": 4.247935389280111e-07, "loss": 0.6766, "step": 5188 }, { "epoch": 0.91, "grad_norm": 0.6807353445148655, "learning_rate": 4.2315717998277603e-07, "loss": 0.7086, "step": 5189 }, { "epoch": 0.91, "grad_norm": 0.6591716759067102, "learning_rate": 4.2152391072661004e-07, "loss": 0.6546, "step": 5190 }, { "epoch": 0.91, "grad_norm": 0.5401367320637133, "learning_rate": 4.1989373168644044e-07, "loss": 0.6599, "step": 5191 }, { "epoch": 0.91, "grad_norm": 0.6110764584726533, "learning_rate": 4.182666433881966e-07, "loss": 0.7021, "step": 5192 }, { "epoch": 0.91, "grad_norm": 0.6387816023186227, "learning_rate": 4.1664264635680963e-07, "loss": 0.7153, "step": 5193 }, { "epoch": 0.91, "grad_norm": 0.6777775344469593, "learning_rate": 4.150217411162161e-07, "loss": 0.6722, "step": 5194 }, { "epoch": 0.91, "grad_norm": 0.7138748979699672, "learning_rate": 4.1340392818935426e-07, "loss": 0.6869, "step": 5195 }, { "epoch": 0.91, "grad_norm": 0.7099616178875814, "learning_rate": 4.117892080981645e-07, "loss": 0.6729, "step": 5196 }, { "epoch": 0.91, "grad_norm": 0.6157713215056707, "learning_rate": 4.101775813635878e-07, "loss": 0.6629, "step": 5197 }, { "epoch": 0.91, "grad_norm": 0.6788749604351574, "learning_rate": 4.0856904850557046e-07, "loss": 0.6719, "step": 5198 }, { "epoch": 0.91, "grad_norm": 0.6617874973468545, "learning_rate": 4.069636100430563e-07, "loss": 0.665, "step": 5199 }, { "epoch": 0.91, "grad_norm": 0.6957857675305978, "learning_rate": 4.053612664939932e-07, "loss": 0.6883, "step": 5200 }, { "epoch": 0.91, "grad_norm": 0.6553594419359362, "learning_rate": 4.0376201837533324e-07, "loss": 0.723, "step": 5201 }, { "epoch": 0.91, "grad_norm": 0.7416871886745575, "learning_rate": 4.021658662030248e-07, "loss": 0.7232, "step": 5202 }, { "epoch": 0.91, "grad_norm": 0.6774858073056813, "learning_rate": 4.0057281049202256e-07, "loss": 0.7368, "step": 5203 }, { "epoch": 0.91, "grad_norm": 0.6477576998527672, "learning_rate": 3.989828517562755e-07, "loss": 0.6998, "step": 5204 }, { "epoch": 0.91, "grad_norm": 0.6540415571219923, "learning_rate": 3.97395990508741e-07, "loss": 0.6547, "step": 5205 }, { "epoch": 0.91, "grad_norm": 0.6868610030398729, "learning_rate": 3.958122272613718e-07, "loss": 0.7079, "step": 5206 }, { "epoch": 0.91, "grad_norm": 0.6859186166305641, "learning_rate": 3.9423156252512253e-07, "loss": 0.6915, "step": 5207 }, { "epoch": 0.91, "grad_norm": 0.6396855913425539, "learning_rate": 3.9265399680994964e-07, "loss": 0.6819, "step": 5208 }, { "epoch": 0.91, "grad_norm": 0.7183144672540949, "learning_rate": 3.9107953062480943e-07, "loss": 0.6857, "step": 5209 }, { "epoch": 0.91, "grad_norm": 0.6849160774059312, "learning_rate": 3.895081644776555e-07, "loss": 0.7022, "step": 5210 }, { "epoch": 0.91, "grad_norm": 0.7705056837280294, "learning_rate": 3.8793989887544457e-07, "loss": 0.7018, "step": 5211 }, { "epoch": 0.91, "grad_norm": 0.644628310550631, "learning_rate": 3.863747343241309e-07, "loss": 0.6877, "step": 5212 }, { "epoch": 0.91, "grad_norm": 0.7169902863498977, "learning_rate": 3.848126713286704e-07, "loss": 0.6839, "step": 5213 }, { "epoch": 0.91, "grad_norm": 0.6592512491058847, "learning_rate": 3.8325371039301673e-07, "loss": 0.6929, "step": 5214 }, { "epoch": 0.91, "grad_norm": 0.6973541202685539, "learning_rate": 3.816978520201242e-07, "loss": 0.6948, "step": 5215 }, { "epoch": 0.91, "grad_norm": 0.6416674405348961, "learning_rate": 3.8014509671194244e-07, "loss": 0.689, "step": 5216 }, { "epoch": 0.91, "grad_norm": 0.7522864608334948, "learning_rate": 3.785954449694229e-07, "loss": 0.6881, "step": 5217 }, { "epoch": 0.91, "grad_norm": 0.6677009854521466, "learning_rate": 3.7704889729251793e-07, "loss": 0.6844, "step": 5218 }, { "epoch": 0.92, "grad_norm": 0.683257439476345, "learning_rate": 3.7550545418017506e-07, "loss": 0.6886, "step": 5219 }, { "epoch": 0.92, "grad_norm": 0.6302192664606404, "learning_rate": 3.7396511613034257e-07, "loss": 0.6569, "step": 5220 }, { "epoch": 0.92, "grad_norm": 0.612703695242606, "learning_rate": 3.72427883639962e-07, "loss": 0.685, "step": 5221 }, { "epoch": 0.92, "grad_norm": 0.6469306262580434, "learning_rate": 3.708937572049798e-07, "loss": 0.7061, "step": 5222 }, { "epoch": 0.92, "grad_norm": 0.706483412652573, "learning_rate": 3.6936273732033457e-07, "loss": 0.6335, "step": 5223 }, { "epoch": 0.92, "grad_norm": 0.6539203973247423, "learning_rate": 3.67834824479969e-07, "loss": 0.6623, "step": 5224 }, { "epoch": 0.92, "grad_norm": 0.7103488193525133, "learning_rate": 3.663100191768154e-07, "loss": 0.6741, "step": 5225 }, { "epoch": 0.92, "grad_norm": 0.692528359129227, "learning_rate": 3.647883219028103e-07, "loss": 0.6832, "step": 5226 }, { "epoch": 0.92, "grad_norm": 0.6574266524496459, "learning_rate": 3.6326973314888434e-07, "loss": 0.6724, "step": 5227 }, { "epoch": 0.92, "grad_norm": 0.6692363061069107, "learning_rate": 3.617542534049656e-07, "loss": 0.692, "step": 5228 }, { "epoch": 0.92, "grad_norm": 0.7678900005730859, "learning_rate": 3.6024188315997965e-07, "loss": 0.7358, "step": 5229 }, { "epoch": 0.92, "grad_norm": 0.7106098651792374, "learning_rate": 3.587326229018484e-07, "loss": 0.7589, "step": 5230 }, { "epoch": 0.92, "grad_norm": 0.6227244609508701, "learning_rate": 3.5722647311749125e-07, "loss": 0.682, "step": 5231 }, { "epoch": 0.92, "grad_norm": 0.7824023646770196, "learning_rate": 3.55723434292824e-07, "loss": 0.7245, "step": 5232 }, { "epoch": 0.92, "grad_norm": 0.7013794297893006, "learning_rate": 3.5422350691275753e-07, "loss": 0.711, "step": 5233 }, { "epoch": 0.92, "grad_norm": 0.7097198619140711, "learning_rate": 3.527266914611993e-07, "loss": 0.7428, "step": 5234 }, { "epoch": 0.92, "grad_norm": 0.7449944335743958, "learning_rate": 3.512329884210519e-07, "loss": 0.7542, "step": 5235 }, { "epoch": 0.92, "grad_norm": 0.6226455816439255, "learning_rate": 3.497423982742176e-07, "loss": 0.6724, "step": 5236 }, { "epoch": 0.92, "grad_norm": 0.7103899908897485, "learning_rate": 3.482549215015907e-07, "loss": 0.689, "step": 5237 }, { "epoch": 0.92, "grad_norm": 0.6705676852488501, "learning_rate": 3.4677055858306385e-07, "loss": 0.6887, "step": 5238 }, { "epoch": 0.92, "grad_norm": 0.5846940444342903, "learning_rate": 3.4528930999752076e-07, "loss": 0.6524, "step": 5239 }, { "epoch": 0.92, "grad_norm": 0.7691389422076164, "learning_rate": 3.4381117622284355e-07, "loss": 0.7631, "step": 5240 }, { "epoch": 0.92, "grad_norm": 0.681226364765676, "learning_rate": 3.4233615773591076e-07, "loss": 0.7144, "step": 5241 }, { "epoch": 0.92, "grad_norm": 0.6270188033554106, "learning_rate": 3.4086425501259403e-07, "loss": 0.6777, "step": 5242 }, { "epoch": 0.92, "grad_norm": 0.6456951657008014, "learning_rate": 3.3939546852775786e-07, "loss": 0.7085, "step": 5243 }, { "epoch": 0.92, "grad_norm": 0.7481318028652012, "learning_rate": 3.379297987552654e-07, "loss": 0.7081, "step": 5244 }, { "epoch": 0.92, "grad_norm": 0.6182809254332401, "learning_rate": 3.3646724616796966e-07, "loss": 0.6993, "step": 5245 }, { "epoch": 0.92, "grad_norm": 0.7096123485870652, "learning_rate": 3.3500781123772306e-07, "loss": 0.7255, "step": 5246 }, { "epoch": 0.92, "grad_norm": 0.7202420635742646, "learning_rate": 3.3355149443536793e-07, "loss": 0.659, "step": 5247 }, { "epoch": 0.92, "grad_norm": 0.6581536902648724, "learning_rate": 3.32098296230744e-07, "loss": 0.7553, "step": 5248 }, { "epoch": 0.92, "grad_norm": 0.7180676656705357, "learning_rate": 3.306482170926839e-07, "loss": 0.6794, "step": 5249 }, { "epoch": 0.92, "grad_norm": 0.7400680434709849, "learning_rate": 3.2920125748901135e-07, "loss": 0.6716, "step": 5250 }, { "epoch": 0.92, "grad_norm": 0.6404393933978099, "learning_rate": 3.2775741788654615e-07, "loss": 0.6805, "step": 5251 }, { "epoch": 0.92, "grad_norm": 0.6761145932909408, "learning_rate": 3.263166987511013e-07, "loss": 0.6857, "step": 5252 }, { "epoch": 0.92, "grad_norm": 0.8376152129116157, "learning_rate": 3.248791005474827e-07, "loss": 0.6893, "step": 5253 }, { "epoch": 0.92, "grad_norm": 0.6924119433416068, "learning_rate": 3.234446237394906e-07, "loss": 0.7076, "step": 5254 }, { "epoch": 0.92, "grad_norm": 0.7389831551054181, "learning_rate": 3.2201326878991466e-07, "loss": 0.7235, "step": 5255 }, { "epoch": 0.92, "grad_norm": 0.6817327532606716, "learning_rate": 3.205850361605434e-07, "loss": 0.7145, "step": 5256 }, { "epoch": 0.92, "grad_norm": 0.6393101294005828, "learning_rate": 3.1915992631215144e-07, "loss": 0.6967, "step": 5257 }, { "epoch": 0.92, "grad_norm": 0.6768571971112933, "learning_rate": 3.1773793970450883e-07, "loss": 0.6952, "step": 5258 }, { "epoch": 0.92, "grad_norm": 0.6660046145749697, "learning_rate": 3.1631907679637974e-07, "loss": 0.7146, "step": 5259 }, { "epoch": 0.92, "grad_norm": 0.7052856200670561, "learning_rate": 3.149033380455191e-07, "loss": 0.6962, "step": 5260 }, { "epoch": 0.92, "grad_norm": 0.9351317968217558, "learning_rate": 3.1349072390867266e-07, "loss": 0.6885, "step": 5261 }, { "epoch": 0.92, "grad_norm": 0.7501271240724482, "learning_rate": 3.1208123484157917e-07, "loss": 0.6923, "step": 5262 }, { "epoch": 0.92, "grad_norm": 0.685496738103531, "learning_rate": 3.1067487129896933e-07, "loss": 0.6942, "step": 5263 }, { "epoch": 0.92, "grad_norm": 0.6430846374193937, "learning_rate": 3.092716337345647e-07, "loss": 0.6651, "step": 5264 }, { "epoch": 0.92, "grad_norm": 0.6915082074520287, "learning_rate": 3.0787152260108087e-07, "loss": 0.7143, "step": 5265 }, { "epoch": 0.92, "grad_norm": 0.6992352943837561, "learning_rate": 3.0647453835022103e-07, "loss": 0.6895, "step": 5266 }, { "epoch": 0.92, "grad_norm": 0.6790396857372587, "learning_rate": 3.0508068143268233e-07, "loss": 0.7672, "step": 5267 }, { "epoch": 0.92, "grad_norm": 0.6845100528168951, "learning_rate": 3.036899522981518e-07, "loss": 0.6826, "step": 5268 }, { "epoch": 0.92, "grad_norm": 0.6351502589083022, "learning_rate": 3.0230235139530827e-07, "loss": 0.6539, "step": 5269 }, { "epoch": 0.92, "grad_norm": 0.6778826857539335, "learning_rate": 3.0091787917181926e-07, "loss": 0.6875, "step": 5270 }, { "epoch": 0.92, "grad_norm": 0.7324773858779275, "learning_rate": 2.9953653607434517e-07, "loss": 0.7058, "step": 5271 }, { "epoch": 0.92, "grad_norm": 0.6539123073544474, "learning_rate": 2.9815832254853517e-07, "loss": 0.6856, "step": 5272 }, { "epoch": 0.92, "grad_norm": 0.7001219068274559, "learning_rate": 2.967832390390324e-07, "loss": 0.7642, "step": 5273 }, { "epoch": 0.92, "grad_norm": 0.7642734457748329, "learning_rate": 2.954112859894653e-07, "loss": 0.7188, "step": 5274 }, { "epoch": 0.92, "grad_norm": 0.6075845371888403, "learning_rate": 2.940424638424533e-07, "loss": 0.6405, "step": 5275 }, { "epoch": 0.93, "grad_norm": 0.6402390428449879, "learning_rate": 2.9267677303960963e-07, "loss": 0.6734, "step": 5276 }, { "epoch": 0.93, "grad_norm": 0.6686897594984523, "learning_rate": 2.91314214021533e-07, "loss": 0.7084, "step": 5277 }, { "epoch": 0.93, "grad_norm": 0.6251316476479377, "learning_rate": 2.899547872278152e-07, "loss": 0.6874, "step": 5278 }, { "epoch": 0.93, "grad_norm": 0.6798760474295191, "learning_rate": 2.8859849309703315e-07, "loss": 0.7151, "step": 5279 }, { "epoch": 0.93, "grad_norm": 0.6650334631575656, "learning_rate": 2.8724533206675677e-07, "loss": 0.701, "step": 5280 }, { "epoch": 0.93, "grad_norm": 0.702131813608893, "learning_rate": 2.8589530457354353e-07, "loss": 0.7193, "step": 5281 }, { "epoch": 0.93, "grad_norm": 0.6983975630175427, "learning_rate": 2.845484110529417e-07, "loss": 0.7145, "step": 5282 }, { "epoch": 0.93, "grad_norm": 0.6398987738577269, "learning_rate": 2.832046519394849e-07, "loss": 0.712, "step": 5283 }, { "epoch": 0.93, "grad_norm": 0.6704847961146372, "learning_rate": 2.8186402766670065e-07, "loss": 0.6952, "step": 5284 }, { "epoch": 0.93, "grad_norm": 0.671989660040787, "learning_rate": 2.805265386671008e-07, "loss": 0.6901, "step": 5285 }, { "epoch": 0.93, "grad_norm": 0.6633470305546442, "learning_rate": 2.7919218537218685e-07, "loss": 0.697, "step": 5286 }, { "epoch": 0.93, "grad_norm": 0.6225718100093189, "learning_rate": 2.778609682124489e-07, "loss": 0.716, "step": 5287 }, { "epoch": 0.93, "grad_norm": 0.6665048818863435, "learning_rate": 2.7653288761736784e-07, "loss": 0.6707, "step": 5288 }, { "epoch": 0.93, "grad_norm": 0.7014681265910406, "learning_rate": 2.752079440154065e-07, "loss": 0.6996, "step": 5289 }, { "epoch": 0.93, "grad_norm": 0.6550481421094896, "learning_rate": 2.7388613783402076e-07, "loss": 0.677, "step": 5290 }, { "epoch": 0.93, "grad_norm": 0.8389082777614718, "learning_rate": 2.7256746949965387e-07, "loss": 0.7548, "step": 5291 }, { "epoch": 0.93, "grad_norm": 0.6934763032354532, "learning_rate": 2.7125193943773444e-07, "loss": 0.7413, "step": 5292 }, { "epoch": 0.93, "grad_norm": 0.6452204687243271, "learning_rate": 2.6993954807267966e-07, "loss": 0.6935, "step": 5293 }, { "epoch": 0.93, "grad_norm": 0.737444988436998, "learning_rate": 2.6863029582789525e-07, "loss": 0.7009, "step": 5294 }, { "epoch": 0.93, "grad_norm": 0.7047145414071522, "learning_rate": 2.6732418312577335e-07, "loss": 0.7419, "step": 5295 }, { "epoch": 0.93, "grad_norm": 0.7484372836539037, "learning_rate": 2.660212103876936e-07, "loss": 0.7212, "step": 5296 }, { "epoch": 0.93, "grad_norm": 0.6375132055858875, "learning_rate": 2.6472137803402073e-07, "loss": 0.6678, "step": 5297 }, { "epoch": 0.93, "grad_norm": 0.5865498389069066, "learning_rate": 2.6342468648410836e-07, "loss": 0.6449, "step": 5298 }, { "epoch": 0.93, "grad_norm": 0.6060404082395981, "learning_rate": 2.6213113615629613e-07, "loss": 0.6438, "step": 5299 }, { "epoch": 0.93, "grad_norm": 0.6504453337053913, "learning_rate": 2.6084072746791036e-07, "loss": 0.7085, "step": 5300 }, { "epoch": 0.93, "grad_norm": 0.6519244725529003, "learning_rate": 2.5955346083526345e-07, "loss": 0.7107, "step": 5301 }, { "epoch": 0.93, "grad_norm": 0.7254910910257484, "learning_rate": 2.582693366736555e-07, "loss": 0.6977, "step": 5302 }, { "epoch": 0.93, "grad_norm": 0.7482281195507772, "learning_rate": 2.5698835539737067e-07, "loss": 0.7219, "step": 5303 }, { "epoch": 0.93, "grad_norm": 0.6593995265038727, "learning_rate": 2.5571051741968055e-07, "loss": 0.732, "step": 5304 }, { "epoch": 0.93, "grad_norm": 0.751537277999001, "learning_rate": 2.544358231528421e-07, "loss": 0.7697, "step": 5305 }, { "epoch": 0.93, "grad_norm": 0.6862845617259445, "learning_rate": 2.5316427300809966e-07, "loss": 0.6862, "step": 5306 }, { "epoch": 0.93, "grad_norm": 0.6097667208210895, "learning_rate": 2.518958673956795e-07, "loss": 0.6268, "step": 5307 }, { "epoch": 0.93, "grad_norm": 0.6833456360297879, "learning_rate": 2.5063060672479655e-07, "loss": 0.699, "step": 5308 }, { "epoch": 0.93, "grad_norm": 0.5782768204817034, "learning_rate": 2.4936849140365206e-07, "loss": 0.6191, "step": 5309 }, { "epoch": 0.93, "grad_norm": 0.6637971556904797, "learning_rate": 2.4810952183942805e-07, "loss": 0.6851, "step": 5310 }, { "epoch": 0.93, "grad_norm": 0.7273650006681454, "learning_rate": 2.4685369843829634e-07, "loss": 0.7352, "step": 5311 }, { "epoch": 0.93, "grad_norm": 0.6031768480761882, "learning_rate": 2.456010216054094e-07, "loss": 0.64, "step": 5312 }, { "epoch": 0.93, "grad_norm": 0.606653964808978, "learning_rate": 2.443514917449097e-07, "loss": 0.6733, "step": 5313 }, { "epoch": 0.93, "grad_norm": 0.7347555273782942, "learning_rate": 2.431051092599213e-07, "loss": 0.7543, "step": 5314 }, { "epoch": 0.93, "grad_norm": 0.6647883387509251, "learning_rate": 2.418618745525514e-07, "loss": 0.6779, "step": 5315 }, { "epoch": 0.93, "grad_norm": 0.660705648251567, "learning_rate": 2.406217880238937e-07, "loss": 0.6957, "step": 5316 }, { "epoch": 0.93, "grad_norm": 0.5922955506474055, "learning_rate": 2.393848500740259e-07, "loss": 0.6774, "step": 5317 }, { "epoch": 0.93, "grad_norm": 0.7243654673491389, "learning_rate": 2.3815106110201102e-07, "loss": 0.7307, "step": 5318 }, { "epoch": 0.93, "grad_norm": 0.6826811242581564, "learning_rate": 2.36920421505894e-07, "loss": 0.6705, "step": 5319 }, { "epoch": 0.93, "grad_norm": 0.603614780430483, "learning_rate": 2.3569293168270724e-07, "loss": 0.6114, "step": 5320 }, { "epoch": 0.93, "grad_norm": 0.6441609257005241, "learning_rate": 2.3446859202846172e-07, "loss": 0.7168, "step": 5321 }, { "epoch": 0.93, "grad_norm": 0.6678421180030345, "learning_rate": 2.3324740293815596e-07, "loss": 0.72, "step": 5322 }, { "epoch": 0.93, "grad_norm": 0.6536284544458797, "learning_rate": 2.320293648057703e-07, "loss": 0.6764, "step": 5323 }, { "epoch": 0.93, "grad_norm": 0.6504295918063964, "learning_rate": 2.308144780242705e-07, "loss": 0.6694, "step": 5324 }, { "epoch": 0.93, "grad_norm": 0.6132297149515094, "learning_rate": 2.2960274298560403e-07, "loss": 0.6488, "step": 5325 }, { "epoch": 0.93, "grad_norm": 0.6493062799834516, "learning_rate": 2.283941600807016e-07, "loss": 0.6869, "step": 5326 }, { "epoch": 0.93, "grad_norm": 0.6857849361560552, "learning_rate": 2.271887296994768e-07, "loss": 0.6872, "step": 5327 }, { "epoch": 0.93, "grad_norm": 0.7697241837234011, "learning_rate": 2.2598645223082637e-07, "loss": 0.7318, "step": 5328 }, { "epoch": 0.93, "grad_norm": 0.7033822407821443, "learning_rate": 2.2478732806263114e-07, "loss": 0.7024, "step": 5329 }, { "epoch": 0.93, "grad_norm": 0.6325303786563078, "learning_rate": 2.2359135758175388e-07, "loss": 0.6835, "step": 5330 }, { "epoch": 0.93, "grad_norm": 0.6570855136681409, "learning_rate": 2.2239854117403926e-07, "loss": 0.6284, "step": 5331 }, { "epoch": 0.93, "grad_norm": 0.7232400004704773, "learning_rate": 2.212088792243128e-07, "loss": 0.7578, "step": 5332 }, { "epoch": 0.94, "grad_norm": 0.6721978432503914, "learning_rate": 2.2002237211638633e-07, "loss": 0.6809, "step": 5333 }, { "epoch": 0.94, "grad_norm": 0.8368467320675895, "learning_rate": 2.188390202330515e-07, "loss": 0.7522, "step": 5334 }, { "epoch": 0.94, "grad_norm": 0.6833956234177393, "learning_rate": 2.1765882395608174e-07, "loss": 0.7293, "step": 5335 }, { "epoch": 0.94, "grad_norm": 0.675077428549778, "learning_rate": 2.1648178366623252e-07, "loss": 0.6758, "step": 5336 }, { "epoch": 0.94, "grad_norm": 0.59199256656562, "learning_rate": 2.1530789974324228e-07, "loss": 0.6905, "step": 5337 }, { "epoch": 0.94, "grad_norm": 0.6556449634581225, "learning_rate": 2.1413717256583145e-07, "loss": 0.6839, "step": 5338 }, { "epoch": 0.94, "grad_norm": 0.6724001492706012, "learning_rate": 2.1296960251170006e-07, "loss": 0.6845, "step": 5339 }, { "epoch": 0.94, "grad_norm": 0.7076750661685112, "learning_rate": 2.118051899575291e-07, "loss": 0.7174, "step": 5340 }, { "epoch": 0.94, "grad_norm": 0.7473225196810949, "learning_rate": 2.1064393527898353e-07, "loss": 0.7675, "step": 5341 }, { "epoch": 0.94, "grad_norm": 0.7612050393454505, "learning_rate": 2.094858388507104e-07, "loss": 0.7138, "step": 5342 }, { "epoch": 0.94, "grad_norm": 0.7103366836764364, "learning_rate": 2.083309010463319e-07, "loss": 0.7236, "step": 5343 }, { "epoch": 0.94, "grad_norm": 0.7321097514861684, "learning_rate": 2.0717912223845894e-07, "loss": 0.7236, "step": 5344 }, { "epoch": 0.94, "grad_norm": 0.692284599030079, "learning_rate": 2.060305027986753e-07, "loss": 0.6707, "step": 5345 }, { "epoch": 0.94, "grad_norm": 0.6497895531963045, "learning_rate": 2.048850430975524e-07, "loss": 0.7125, "step": 5346 }, { "epoch": 0.94, "grad_norm": 0.653219521937856, "learning_rate": 2.0374274350463795e-07, "loss": 0.6659, "step": 5347 }, { "epoch": 0.94, "grad_norm": 0.6315017308991131, "learning_rate": 2.0260360438846273e-07, "loss": 0.7127, "step": 5348 }, { "epoch": 0.94, "grad_norm": 0.6326957297836353, "learning_rate": 2.0146762611653713e-07, "loss": 0.6708, "step": 5349 }, { "epoch": 0.94, "grad_norm": 0.7730375304677846, "learning_rate": 2.003348090553503e-07, "loss": 0.6953, "step": 5350 }, { "epoch": 0.94, "grad_norm": 0.6463240416682831, "learning_rate": 1.9920515357037207e-07, "loss": 0.7182, "step": 5351 }, { "epoch": 0.94, "grad_norm": 0.6898225593742806, "learning_rate": 1.9807866002605425e-07, "loss": 0.6998, "step": 5352 }, { "epoch": 0.94, "grad_norm": 0.6756703823555548, "learning_rate": 1.9695532878582614e-07, "loss": 0.6734, "step": 5353 }, { "epoch": 0.94, "grad_norm": 0.7366283900435714, "learning_rate": 1.958351602120989e-07, "loss": 0.7002, "step": 5354 }, { "epoch": 0.94, "grad_norm": 0.6847726887113197, "learning_rate": 1.947181546662613e-07, "loss": 0.6772, "step": 5355 }, { "epoch": 0.94, "grad_norm": 0.6278940912676385, "learning_rate": 1.9360431250868284e-07, "loss": 0.6616, "step": 5356 }, { "epoch": 0.94, "grad_norm": 0.5842397926378742, "learning_rate": 1.9249363409871158e-07, "loss": 0.6401, "step": 5357 }, { "epoch": 0.94, "grad_norm": 0.7105552374411587, "learning_rate": 1.9138611979467646e-07, "loss": 0.7019, "step": 5358 }, { "epoch": 0.94, "grad_norm": 0.6681501629359151, "learning_rate": 1.9028176995388503e-07, "loss": 0.7068, "step": 5359 }, { "epoch": 0.94, "grad_norm": 0.6305497613543287, "learning_rate": 1.8918058493262227e-07, "loss": 0.668, "step": 5360 }, { "epoch": 0.94, "grad_norm": 0.7466333048081372, "learning_rate": 1.8808256508615507e-07, "loss": 0.7171, "step": 5361 }, { "epoch": 0.94, "grad_norm": 0.6585537738923654, "learning_rate": 1.869877107687257e-07, "loss": 0.6857, "step": 5362 }, { "epoch": 0.94, "grad_norm": 0.6791053650694041, "learning_rate": 1.858960223335582e-07, "loss": 0.6921, "step": 5363 }, { "epoch": 0.94, "grad_norm": 0.6933654589367698, "learning_rate": 1.848075001328531e-07, "loss": 0.7478, "step": 5364 }, { "epoch": 0.94, "grad_norm": 0.6929465430521353, "learning_rate": 1.837221445177917e-07, "loss": 0.6961, "step": 5365 }, { "epoch": 0.94, "grad_norm": 0.6653940192058783, "learning_rate": 1.8263995583853056e-07, "loss": 0.6755, "step": 5366 }, { "epoch": 0.94, "grad_norm": 0.7241729407804185, "learning_rate": 1.8156093444420818e-07, "loss": 0.7075, "step": 5367 }, { "epoch": 0.94, "grad_norm": 0.6685563837555162, "learning_rate": 1.8048508068293724e-07, "loss": 0.6773, "step": 5368 }, { "epoch": 0.94, "grad_norm": 0.7414835438495249, "learning_rate": 1.7941239490181228e-07, "loss": 0.7123, "step": 5369 }, { "epoch": 0.94, "grad_norm": 0.718285505687217, "learning_rate": 1.7834287744690427e-07, "loss": 0.7162, "step": 5370 }, { "epoch": 0.94, "grad_norm": 0.7211233213202328, "learning_rate": 1.772765286632605e-07, "loss": 0.6902, "step": 5371 }, { "epoch": 0.94, "grad_norm": 0.6683863172002288, "learning_rate": 1.7621334889490805e-07, "loss": 0.7517, "step": 5372 }, { "epoch": 0.94, "grad_norm": 0.58800893657313, "learning_rate": 1.751533384848514e-07, "loss": 0.6712, "step": 5373 }, { "epoch": 0.94, "grad_norm": 0.7030546148679663, "learning_rate": 1.7409649777507033e-07, "loss": 0.6895, "step": 5374 }, { "epoch": 0.94, "grad_norm": 0.649651583517647, "learning_rate": 1.7304282710652544e-07, "loss": 0.6953, "step": 5375 }, { "epoch": 0.94, "grad_norm": 0.6630520648904323, "learning_rate": 1.7199232681915147e-07, "loss": 0.6801, "step": 5376 }, { "epoch": 0.94, "grad_norm": 0.6915271138842615, "learning_rate": 1.7094499725186177e-07, "loss": 0.6863, "step": 5377 }, { "epoch": 0.94, "grad_norm": 0.6363107121769487, "learning_rate": 1.6990083874254937e-07, "loss": 0.7143, "step": 5378 }, { "epoch": 0.94, "grad_norm": 0.6216105974102382, "learning_rate": 1.6885985162807817e-07, "loss": 0.7159, "step": 5379 }, { "epoch": 0.94, "grad_norm": 0.7070177140889281, "learning_rate": 1.6782203624429394e-07, "loss": 0.7261, "step": 5380 }, { "epoch": 0.94, "grad_norm": 0.6868125172200412, "learning_rate": 1.6678739292601664e-07, "loss": 0.7069, "step": 5381 }, { "epoch": 0.94, "grad_norm": 0.676021686230813, "learning_rate": 1.6575592200704371e-07, "loss": 0.7264, "step": 5382 }, { "epoch": 0.94, "grad_norm": 0.5805122717883384, "learning_rate": 1.6472762382015005e-07, "loss": 0.6554, "step": 5383 }, { "epoch": 0.94, "grad_norm": 0.6174661639239597, "learning_rate": 1.63702498697087e-07, "loss": 0.6304, "step": 5384 }, { "epoch": 0.94, "grad_norm": 0.658960891173463, "learning_rate": 1.6268054696857995e-07, "loss": 0.6848, "step": 5385 }, { "epoch": 0.94, "grad_norm": 0.6932417552597921, "learning_rate": 1.6166176896433072e-07, "loss": 0.7377, "step": 5386 }, { "epoch": 0.94, "grad_norm": 0.683848062736622, "learning_rate": 1.606461650130209e-07, "loss": 0.689, "step": 5387 }, { "epoch": 0.94, "grad_norm": 0.6211515237690866, "learning_rate": 1.5963373544230388e-07, "loss": 0.6213, "step": 5388 }, { "epoch": 0.94, "grad_norm": 0.6449131051877578, "learning_rate": 1.5862448057881065e-07, "loss": 0.6972, "step": 5389 }, { "epoch": 0.95, "grad_norm": 0.5785383242663593, "learning_rate": 1.5761840074814738e-07, "loss": 0.6784, "step": 5390 }, { "epoch": 0.95, "grad_norm": 0.6464724770369693, "learning_rate": 1.566154962748978e-07, "loss": 0.7152, "step": 5391 }, { "epoch": 0.95, "grad_norm": 0.6591720562861992, "learning_rate": 1.5561576748261863e-07, "loss": 0.6683, "step": 5392 }, { "epoch": 0.95, "grad_norm": 0.7171113644091159, "learning_rate": 1.546192146938441e-07, "loss": 0.732, "step": 5393 }, { "epoch": 0.95, "grad_norm": 0.7318819008314915, "learning_rate": 1.536258382300815e-07, "loss": 0.6991, "step": 5394 }, { "epoch": 0.95, "grad_norm": 0.6293937561626292, "learning_rate": 1.5263563841181662e-07, "loss": 0.6804, "step": 5395 }, { "epoch": 0.95, "grad_norm": 0.6464654854294664, "learning_rate": 1.5164861555850728e-07, "loss": 0.6694, "step": 5396 }, { "epoch": 0.95, "grad_norm": 0.6811146038603135, "learning_rate": 1.5066476998858882e-07, "loss": 0.7248, "step": 5397 }, { "epoch": 0.95, "grad_norm": 0.606539679268634, "learning_rate": 1.4968410201946837e-07, "loss": 0.6627, "step": 5398 }, { "epoch": 0.95, "grad_norm": 0.7029138632571363, "learning_rate": 1.4870661196753178e-07, "loss": 0.7542, "step": 5399 }, { "epoch": 0.95, "grad_norm": 0.7078459740295779, "learning_rate": 1.4773230014813568e-07, "loss": 0.734, "step": 5400 }, { "epoch": 0.95, "grad_norm": 0.702816708015938, "learning_rate": 1.4676116687561525e-07, "loss": 0.7338, "step": 5401 }, { "epoch": 0.95, "grad_norm": 0.687227510645245, "learning_rate": 1.457932124632788e-07, "loss": 0.7437, "step": 5402 }, { "epoch": 0.95, "grad_norm": 0.6810564716651443, "learning_rate": 1.4482843722340544e-07, "loss": 0.727, "step": 5403 }, { "epoch": 0.95, "grad_norm": 0.6822261790038039, "learning_rate": 1.4386684146725504e-07, "loss": 0.6998, "step": 5404 }, { "epoch": 0.95, "grad_norm": 0.6969651744840567, "learning_rate": 1.4290842550505724e-07, "loss": 0.7339, "step": 5405 }, { "epoch": 0.95, "grad_norm": 0.6776346095184568, "learning_rate": 1.4195318964601689e-07, "loss": 0.6486, "step": 5406 }, { "epoch": 0.95, "grad_norm": 0.6870674292266882, "learning_rate": 1.4100113419831195e-07, "loss": 0.7199, "step": 5407 }, { "epoch": 0.95, "grad_norm": 0.6242175327375672, "learning_rate": 1.4005225946909783e-07, "loss": 0.676, "step": 5408 }, { "epoch": 0.95, "grad_norm": 0.7037226142899639, "learning_rate": 1.3910656576449966e-07, "loss": 0.6699, "step": 5409 }, { "epoch": 0.95, "grad_norm": 0.7120608802623337, "learning_rate": 1.381640533896178e-07, "loss": 0.7228, "step": 5410 }, { "epoch": 0.95, "grad_norm": 0.5901787869698922, "learning_rate": 1.372247226485257e-07, "loss": 0.633, "step": 5411 }, { "epoch": 0.95, "grad_norm": 0.6195575885783481, "learning_rate": 1.3628857384427318e-07, "loss": 0.6785, "step": 5412 }, { "epoch": 0.95, "grad_norm": 0.67549776582141, "learning_rate": 1.3535560727888087e-07, "loss": 0.714, "step": 5413 }, { "epoch": 0.95, "grad_norm": 0.7267726122145755, "learning_rate": 1.3442582325334132e-07, "loss": 0.7008, "step": 5414 }, { "epoch": 0.95, "grad_norm": 0.636555896971177, "learning_rate": 1.3349922206762455e-07, "loss": 0.6666, "step": 5415 }, { "epoch": 0.95, "grad_norm": 0.6570074279463513, "learning_rate": 1.325758040206704e-07, "loss": 0.6848, "step": 5416 }, { "epoch": 0.95, "grad_norm": 0.6743986779903356, "learning_rate": 1.316555694103916e-07, "loss": 0.6725, "step": 5417 }, { "epoch": 0.95, "grad_norm": 0.6363886940951234, "learning_rate": 1.3073851853367736e-07, "loss": 0.73, "step": 5418 }, { "epoch": 0.95, "grad_norm": 0.6413491620458593, "learning_rate": 1.2982465168638546e-07, "loss": 0.6671, "step": 5419 }, { "epoch": 0.95, "grad_norm": 0.5853681308497494, "learning_rate": 1.2891396916335007e-07, "loss": 0.671, "step": 5420 }, { "epoch": 0.95, "grad_norm": 0.7757255777893428, "learning_rate": 1.2800647125837396e-07, "loss": 0.7717, "step": 5421 }, { "epoch": 0.95, "grad_norm": 0.622092641153492, "learning_rate": 1.2710215826423734e-07, "loss": 0.6966, "step": 5422 }, { "epoch": 0.95, "grad_norm": 0.6700193672196824, "learning_rate": 1.2620103047268905e-07, "loss": 0.6956, "step": 5423 }, { "epoch": 0.95, "grad_norm": 0.6717989465116165, "learning_rate": 1.2530308817445215e-07, "loss": 0.7075, "step": 5424 }, { "epoch": 0.95, "grad_norm": 0.6658624241255655, "learning_rate": 1.244083316592215e-07, "loss": 0.6614, "step": 5425 }, { "epoch": 0.95, "grad_norm": 0.5726927402870178, "learning_rate": 1.2351676121566514e-07, "loss": 0.6704, "step": 5426 }, { "epoch": 0.95, "grad_norm": 0.6550164951169197, "learning_rate": 1.2262837713142073e-07, "loss": 0.6795, "step": 5427 }, { "epoch": 0.95, "grad_norm": 0.7160642244361908, "learning_rate": 1.2174317969309902e-07, "loss": 0.6718, "step": 5428 }, { "epoch": 0.95, "grad_norm": 0.6367168211476035, "learning_rate": 1.2086116918628488e-07, "loss": 0.7073, "step": 5429 }, { "epoch": 0.95, "grad_norm": 0.6455559979608442, "learning_rate": 1.199823458955318e-07, "loss": 0.703, "step": 5430 }, { "epoch": 0.95, "grad_norm": 0.6733444352694116, "learning_rate": 1.1910671010436858e-07, "loss": 0.6832, "step": 5431 }, { "epoch": 0.95, "grad_norm": 0.7093572912879808, "learning_rate": 1.1823426209529143e-07, "loss": 0.7634, "step": 5432 }, { "epoch": 0.95, "grad_norm": 0.6995106190231996, "learning_rate": 1.1736500214977186e-07, "loss": 0.6898, "step": 5433 }, { "epoch": 0.95, "grad_norm": 0.8126020785607709, "learning_rate": 1.1649893054825002e-07, "loss": 0.7636, "step": 5434 }, { "epoch": 0.95, "grad_norm": 0.6440847222519523, "learning_rate": 1.1563604757013902e-07, "loss": 0.6943, "step": 5435 }, { "epoch": 0.95, "grad_norm": 0.6577235908701883, "learning_rate": 1.1477635349382287e-07, "loss": 0.649, "step": 5436 }, { "epoch": 0.95, "grad_norm": 0.6918672176021524, "learning_rate": 1.1391984859665751e-07, "loss": 0.6982, "step": 5437 }, { "epoch": 0.95, "grad_norm": 0.6773683147502787, "learning_rate": 1.1306653315496741e-07, "loss": 0.6899, "step": 5438 }, { "epoch": 0.95, "grad_norm": 0.607910258953721, "learning_rate": 1.1221640744405126e-07, "loss": 0.7027, "step": 5439 }, { "epoch": 0.95, "grad_norm": 0.6357941165784098, "learning_rate": 1.1136947173817858e-07, "loss": 0.698, "step": 5440 }, { "epoch": 0.95, "grad_norm": 0.6765220787830165, "learning_rate": 1.1052572631058634e-07, "loss": 0.6873, "step": 5441 }, { "epoch": 0.95, "grad_norm": 0.6289713809309675, "learning_rate": 1.0968517143348567e-07, "loss": 0.6635, "step": 5442 }, { "epoch": 0.95, "grad_norm": 0.736752890661965, "learning_rate": 1.0884780737805522e-07, "loss": 0.7389, "step": 5443 }, { "epoch": 0.95, "grad_norm": 0.6452425215022389, "learning_rate": 1.0801363441444889e-07, "loss": 0.6942, "step": 5444 }, { "epoch": 0.95, "grad_norm": 0.6576566902291915, "learning_rate": 1.0718265281178696e-07, "loss": 0.705, "step": 5445 }, { "epoch": 0.95, "grad_norm": 0.6755473646377278, "learning_rate": 1.0635486283816055e-07, "loss": 0.6615, "step": 5446 }, { "epoch": 0.96, "grad_norm": 0.6286575401958581, "learning_rate": 1.0553026476063266e-07, "loss": 0.6927, "step": 5447 }, { "epoch": 0.96, "grad_norm": 0.6218993398296805, "learning_rate": 1.0470885884523608e-07, "loss": 0.7201, "step": 5448 }, { "epoch": 0.96, "grad_norm": 0.5941705762963674, "learning_rate": 1.0389064535697324e-07, "loss": 0.6375, "step": 5449 }, { "epoch": 0.96, "grad_norm": 0.6707269164215461, "learning_rate": 1.0307562455981635e-07, "loss": 0.7064, "step": 5450 }, { "epoch": 0.96, "grad_norm": 0.699736926800377, "learning_rate": 1.022637967167095e-07, "loss": 0.7312, "step": 5451 }, { "epoch": 0.96, "grad_norm": 0.6706483072900383, "learning_rate": 1.014551620895643e-07, "loss": 0.6823, "step": 5452 }, { "epoch": 0.96, "grad_norm": 0.6999413184463142, "learning_rate": 1.0064972093926317e-07, "loss": 0.6851, "step": 5453 }, { "epoch": 0.96, "grad_norm": 0.6173633571657301, "learning_rate": 9.984747352565827e-08, "loss": 0.6649, "step": 5454 }, { "epoch": 0.96, "grad_norm": 0.6783149393390168, "learning_rate": 9.904842010757143e-08, "loss": 0.6938, "step": 5455 }, { "epoch": 0.96, "grad_norm": 0.6667822024853537, "learning_rate": 9.825256094279312e-08, "loss": 0.6928, "step": 5456 }, { "epoch": 0.96, "grad_norm": 0.683962660733446, "learning_rate": 9.745989628808572e-08, "loss": 0.6846, "step": 5457 }, { "epoch": 0.96, "grad_norm": 0.6540379375173747, "learning_rate": 9.6670426399178e-08, "loss": 0.6585, "step": 5458 }, { "epoch": 0.96, "grad_norm": 0.6677191222893065, "learning_rate": 9.588415153076958e-08, "loss": 0.6845, "step": 5459 }, { "epoch": 0.96, "grad_norm": 0.6467645870247477, "learning_rate": 9.510107193652973e-08, "loss": 0.6489, "step": 5460 }, { "epoch": 0.96, "grad_norm": 0.6458079781384493, "learning_rate": 9.432118786909639e-08, "loss": 0.6903, "step": 5461 }, { "epoch": 0.96, "grad_norm": 0.7366007208492513, "learning_rate": 9.354449958007494e-08, "loss": 0.6817, "step": 5462 }, { "epoch": 0.96, "grad_norm": 0.6631537568695398, "learning_rate": 9.277100732004273e-08, "loss": 0.7014, "step": 5463 }, { "epoch": 0.96, "grad_norm": 0.7228495512555279, "learning_rate": 9.200071133854349e-08, "loss": 0.6959, "step": 5464 }, { "epoch": 0.96, "grad_norm": 0.6406333034562853, "learning_rate": 9.123361188409175e-08, "loss": 0.6639, "step": 5465 }, { "epoch": 0.96, "grad_norm": 0.6852528112871983, "learning_rate": 9.046970920416953e-08, "loss": 0.708, "step": 5466 }, { "epoch": 0.96, "grad_norm": 0.7562878779958014, "learning_rate": 8.97090035452275e-08, "loss": 0.7331, "step": 5467 }, { "epoch": 0.96, "grad_norm": 0.6243821934093773, "learning_rate": 8.895149515268376e-08, "loss": 0.6952, "step": 5468 }, { "epoch": 0.96, "grad_norm": 0.7018068013029932, "learning_rate": 8.819718427092838e-08, "loss": 0.715, "step": 5469 }, { "epoch": 0.96, "grad_norm": 0.622901467427635, "learning_rate": 8.744607114331782e-08, "loss": 0.6721, "step": 5470 }, { "epoch": 0.96, "grad_norm": 0.6192400133204734, "learning_rate": 8.669815601217602e-08, "loss": 0.6335, "step": 5471 }, { "epoch": 0.96, "grad_norm": 0.6299693044914715, "learning_rate": 8.59534391187955e-08, "loss": 0.6804, "step": 5472 }, { "epoch": 0.96, "grad_norm": 0.5998845403135361, "learning_rate": 8.521192070343853e-08, "loss": 0.6553, "step": 5473 }, { "epoch": 0.96, "grad_norm": 0.6172238627711223, "learning_rate": 8.447360100533264e-08, "loss": 0.7108, "step": 5474 }, { "epoch": 0.96, "grad_norm": 0.6899381833964708, "learning_rate": 8.373848026267728e-08, "loss": 0.7325, "step": 5475 }, { "epoch": 0.96, "grad_norm": 0.6386727569951643, "learning_rate": 8.300655871263608e-08, "loss": 0.6958, "step": 5476 }, { "epoch": 0.96, "grad_norm": 0.6076590172086611, "learning_rate": 8.227783659134347e-08, "loss": 0.6884, "step": 5477 }, { "epoch": 0.96, "grad_norm": 0.6334231259725904, "learning_rate": 8.155231413390031e-08, "loss": 0.6375, "step": 5478 }, { "epoch": 0.96, "grad_norm": 0.6992592151939718, "learning_rate": 8.082999157437488e-08, "loss": 0.7001, "step": 5479 }, { "epoch": 0.96, "grad_norm": 0.7245712388693513, "learning_rate": 8.011086914580302e-08, "loss": 0.7541, "step": 5480 }, { "epoch": 0.96, "grad_norm": 0.7241863948307017, "learning_rate": 7.939494708018914e-08, "loss": 0.6867, "step": 5481 }, { "epoch": 0.96, "grad_norm": 0.624667387328441, "learning_rate": 7.868222560850402e-08, "loss": 0.6544, "step": 5482 }, { "epoch": 0.96, "grad_norm": 0.6444116829613343, "learning_rate": 7.797270496068598e-08, "loss": 0.6984, "step": 5483 }, { "epoch": 0.96, "grad_norm": 0.7788181361690866, "learning_rate": 7.726638536564413e-08, "loss": 0.7241, "step": 5484 }, { "epoch": 0.96, "grad_norm": 0.6171232487490448, "learning_rate": 7.656326705124839e-08, "loss": 0.6723, "step": 5485 }, { "epoch": 0.96, "grad_norm": 0.712423075514945, "learning_rate": 7.586335024434178e-08, "loss": 0.6765, "step": 5486 }, { "epoch": 0.96, "grad_norm": 0.6737013950228641, "learning_rate": 7.51666351707303e-08, "loss": 0.6704, "step": 5487 }, { "epoch": 0.96, "grad_norm": 0.6118646559437555, "learning_rate": 7.447312205518976e-08, "loss": 0.6905, "step": 5488 }, { "epoch": 0.96, "grad_norm": 0.6486017992438559, "learning_rate": 7.378281112146224e-08, "loss": 0.6884, "step": 5489 }, { "epoch": 0.96, "grad_norm": 0.7004253281266634, "learning_rate": 7.309570259225629e-08, "loss": 0.7354, "step": 5490 }, { "epoch": 0.96, "grad_norm": 0.672246031033486, "learning_rate": 7.24117966892468e-08, "loss": 0.667, "step": 5491 }, { "epoch": 0.96, "grad_norm": 0.6658028795447529, "learning_rate": 7.173109363307617e-08, "loss": 0.6708, "step": 5492 }, { "epoch": 0.96, "grad_norm": 0.6661353937865077, "learning_rate": 7.105359364335318e-08, "loss": 0.652, "step": 5493 }, { "epoch": 0.96, "grad_norm": 0.7733895384879172, "learning_rate": 7.037929693865408e-08, "loss": 0.7192, "step": 5494 }, { "epoch": 0.96, "grad_norm": 0.7780937399775334, "learning_rate": 6.970820373652154e-08, "loss": 0.7036, "step": 5495 }, { "epoch": 0.96, "grad_norm": 0.6710389581774653, "learning_rate": 6.904031425346236e-08, "loss": 0.7042, "step": 5496 }, { "epoch": 0.96, "grad_norm": 0.7112103254552711, "learning_rate": 6.837562870495307e-08, "loss": 0.6662, "step": 5497 }, { "epoch": 0.96, "grad_norm": 0.6571684379954628, "learning_rate": 6.771414730543546e-08, "loss": 0.6964, "step": 5498 }, { "epoch": 0.96, "grad_norm": 0.6917745000703825, "learning_rate": 6.705587026831551e-08, "loss": 0.7157, "step": 5499 }, { "epoch": 0.96, "grad_norm": 0.654789333280689, "learning_rate": 6.640079780596886e-08, "loss": 0.7116, "step": 5500 }, { "epoch": 0.96, "grad_norm": 0.6886054394480884, "learning_rate": 6.574893012973427e-08, "loss": 0.6973, "step": 5501 }, { "epoch": 0.96, "grad_norm": 0.7149878341042419, "learning_rate": 6.510026744991904e-08, "loss": 0.7408, "step": 5502 }, { "epoch": 0.96, "grad_norm": 0.6633918888951519, "learning_rate": 6.445480997579356e-08, "loss": 0.6936, "step": 5503 }, { "epoch": 0.97, "grad_norm": 0.7033771664604066, "learning_rate": 6.381255791559682e-08, "loss": 0.7337, "step": 5504 }, { "epoch": 0.97, "grad_norm": 0.6423251938853566, "learning_rate": 6.317351147653305e-08, "loss": 0.6756, "step": 5505 }, { "epoch": 0.97, "grad_norm": 0.6298487804842499, "learning_rate": 6.253767086477181e-08, "loss": 0.6583, "step": 5506 }, { "epoch": 0.97, "grad_norm": 0.6015206424147753, "learning_rate": 6.190503628544897e-08, "loss": 0.6392, "step": 5507 }, { "epoch": 0.97, "grad_norm": 0.6728794733465611, "learning_rate": 6.127560794266462e-08, "loss": 0.6825, "step": 5508 }, { "epoch": 0.97, "grad_norm": 0.7021633765065407, "learning_rate": 6.064938603948633e-08, "loss": 0.7052, "step": 5509 }, { "epoch": 0.97, "grad_norm": 0.8020683064701568, "learning_rate": 6.002637077794471e-08, "loss": 0.6889, "step": 5510 }, { "epoch": 0.97, "grad_norm": 0.666740883150888, "learning_rate": 5.940656235904008e-08, "loss": 0.6637, "step": 5511 }, { "epoch": 0.97, "grad_norm": 0.655418513501751, "learning_rate": 5.878996098273249e-08, "loss": 0.7117, "step": 5512 }, { "epoch": 0.97, "grad_norm": 0.6509193483789036, "learning_rate": 5.8176566847953916e-08, "loss": 0.673, "step": 5513 }, { "epoch": 0.97, "grad_norm": 0.6282077913359626, "learning_rate": 5.756638015259497e-08, "loss": 0.7148, "step": 5514 }, { "epoch": 0.97, "grad_norm": 0.6737025470285977, "learning_rate": 5.6959401093515944e-08, "loss": 0.7112, "step": 5515 }, { "epoch": 0.97, "grad_norm": 0.6525838584142409, "learning_rate": 5.63556298665413e-08, "loss": 0.7058, "step": 5516 }, { "epoch": 0.97, "grad_norm": 0.6763715789943742, "learning_rate": 5.575506666645858e-08, "loss": 0.6911, "step": 5517 }, { "epoch": 0.97, "grad_norm": 0.6807084627385365, "learning_rate": 5.5157711687023884e-08, "loss": 0.6541, "step": 5518 }, { "epoch": 0.97, "grad_norm": 0.6531066067639685, "learning_rate": 5.45635651209564e-08, "loss": 0.6773, "step": 5519 }, { "epoch": 0.97, "grad_norm": 0.6748731250438016, "learning_rate": 5.397262715993834e-08, "loss": 0.7212, "step": 5520 }, { "epoch": 0.97, "grad_norm": 0.6680565574391911, "learning_rate": 5.3384897994619433e-08, "loss": 0.7138, "step": 5521 }, { "epoch": 0.97, "grad_norm": 0.6742687511307099, "learning_rate": 5.280037781461467e-08, "loss": 0.7481, "step": 5522 }, { "epoch": 0.97, "grad_norm": 0.7036199496400647, "learning_rate": 5.2219066808500976e-08, "loss": 0.7265, "step": 5523 }, { "epoch": 0.97, "grad_norm": 0.7331251590788749, "learning_rate": 5.164096516382167e-08, "loss": 0.7137, "step": 5524 }, { "epoch": 0.97, "grad_norm": 0.6564399318181942, "learning_rate": 5.106607306708422e-08, "loss": 0.6837, "step": 5525 }, { "epoch": 0.97, "grad_norm": 0.6617534965394755, "learning_rate": 5.049439070376139e-08, "loss": 0.6782, "step": 5526 }, { "epoch": 0.97, "grad_norm": 0.6843668503518696, "learning_rate": 4.992591825829007e-08, "loss": 0.6655, "step": 5527 }, { "epoch": 0.97, "grad_norm": 0.6192024751956472, "learning_rate": 4.9360655914070245e-08, "loss": 0.6442, "step": 5528 }, { "epoch": 0.97, "grad_norm": 0.7190940679744571, "learning_rate": 4.879860385346713e-08, "loss": 0.7058, "step": 5529 }, { "epoch": 0.97, "grad_norm": 0.6509898251074434, "learning_rate": 4.823976225781235e-08, "loss": 0.6685, "step": 5530 }, { "epoch": 0.97, "grad_norm": 0.7384313808259438, "learning_rate": 4.768413130739835e-08, "loss": 0.7364, "step": 5531 }, { "epoch": 0.97, "grad_norm": 0.6993597851830534, "learning_rate": 4.713171118148396e-08, "loss": 0.6633, "step": 5532 }, { "epoch": 0.97, "grad_norm": 0.6584431493297701, "learning_rate": 4.6582502058289956e-08, "loss": 0.6653, "step": 5533 }, { "epoch": 0.97, "grad_norm": 0.6626327007144456, "learning_rate": 4.60365041150046e-08, "loss": 0.7098, "step": 5534 }, { "epoch": 0.97, "grad_norm": 0.6075177875270455, "learning_rate": 4.549371752777698e-08, "loss": 0.6744, "step": 5535 }, { "epoch": 0.97, "grad_norm": 0.6278407232485914, "learning_rate": 4.4954142471721475e-08, "loss": 0.6674, "step": 5536 }, { "epoch": 0.97, "grad_norm": 0.6107788967364894, "learning_rate": 4.441777912091771e-08, "loss": 0.6564, "step": 5537 }, { "epoch": 0.97, "grad_norm": 0.7009218510196313, "learning_rate": 4.388462764840507e-08, "loss": 0.6939, "step": 5538 }, { "epoch": 0.97, "grad_norm": 0.6783399765325352, "learning_rate": 4.335468822619038e-08, "loss": 0.6825, "step": 5539 }, { "epoch": 0.97, "grad_norm": 0.6967567211284718, "learning_rate": 4.282796102524467e-08, "loss": 0.7133, "step": 5540 }, { "epoch": 0.97, "grad_norm": 0.7299255416291, "learning_rate": 4.230444621549978e-08, "loss": 0.7064, "step": 5541 }, { "epoch": 0.97, "grad_norm": 0.662080336125653, "learning_rate": 4.178414396585173e-08, "loss": 0.7124, "step": 5542 }, { "epoch": 0.97, "grad_norm": 0.68721864121572, "learning_rate": 4.12670544441629e-08, "loss": 0.7428, "step": 5543 }, { "epoch": 0.97, "grad_norm": 0.6666740345956339, "learning_rate": 4.0753177817255406e-08, "loss": 0.6459, "step": 5544 }, { "epoch": 0.97, "grad_norm": 0.7102313023014182, "learning_rate": 4.024251425091663e-08, "loss": 0.7255, "step": 5545 }, { "epoch": 0.97, "grad_norm": 0.6352479947497611, "learning_rate": 3.973506390989923e-08, "loss": 0.6251, "step": 5546 }, { "epoch": 0.97, "grad_norm": 0.6682410311757189, "learning_rate": 3.923082695791447e-08, "loss": 0.6986, "step": 5547 }, { "epoch": 0.97, "grad_norm": 0.6203552913751105, "learning_rate": 3.872980355764222e-08, "loss": 0.701, "step": 5548 }, { "epoch": 0.97, "grad_norm": 0.6166951682069134, "learning_rate": 3.823199387072096e-08, "loss": 0.6747, "step": 5549 }, { "epoch": 0.97, "grad_norm": 0.7201814776247042, "learning_rate": 3.773739805775556e-08, "loss": 0.7302, "step": 5550 }, { "epoch": 0.97, "grad_norm": 0.7311603783743563, "learning_rate": 3.724601627831281e-08, "loss": 0.6878, "step": 5551 }, { "epoch": 0.97, "grad_norm": 0.7721903203447883, "learning_rate": 3.675784869092258e-08, "loss": 0.7145, "step": 5552 }, { "epoch": 0.97, "grad_norm": 0.6878445567195126, "learning_rate": 3.627289545307888e-08, "loss": 0.7745, "step": 5553 }, { "epoch": 0.97, "grad_norm": 0.6023877723449473, "learning_rate": 3.5791156721236567e-08, "loss": 0.641, "step": 5554 }, { "epoch": 0.97, "grad_norm": 0.7864912664357397, "learning_rate": 3.531263265081464e-08, "loss": 0.7494, "step": 5555 }, { "epoch": 0.97, "grad_norm": 0.6245292753101318, "learning_rate": 3.4837323396195164e-08, "loss": 0.6661, "step": 5556 }, { "epoch": 0.97, "grad_norm": 0.8498553047530379, "learning_rate": 3.436522911072326e-08, "loss": 0.7242, "step": 5557 }, { "epoch": 0.97, "grad_norm": 0.7084446830920609, "learning_rate": 3.3896349946705984e-08, "loss": 0.7211, "step": 5558 }, { "epoch": 0.97, "grad_norm": 0.6537481507428908, "learning_rate": 3.343068605541233e-08, "loss": 0.6689, "step": 5559 }, { "epoch": 0.97, "grad_norm": 0.6998291054957169, "learning_rate": 3.2968237587077676e-08, "loss": 0.6953, "step": 5560 }, { "epoch": 0.98, "grad_norm": 0.6544705507422103, "learning_rate": 3.250900469089602e-08, "loss": 0.6872, "step": 5561 }, { "epoch": 0.98, "grad_norm": 0.7261181523196746, "learning_rate": 3.20529875150255e-08, "loss": 0.6946, "step": 5562 }, { "epoch": 0.98, "grad_norm": 0.6553953168843222, "learning_rate": 3.160018620658734e-08, "loss": 0.6327, "step": 5563 }, { "epoch": 0.98, "grad_norm": 0.7169339212533979, "learning_rate": 3.115060091166355e-08, "loss": 0.6902, "step": 5564 }, { "epoch": 0.98, "grad_norm": 0.6706494871016345, "learning_rate": 3.070423177530035e-08, "loss": 0.7259, "step": 5565 }, { "epoch": 0.98, "grad_norm": 0.6591363505346738, "learning_rate": 3.0261078941506986e-08, "loss": 0.6945, "step": 5566 }, { "epoch": 0.98, "grad_norm": 0.8309762182752156, "learning_rate": 2.98211425532513e-08, "loss": 0.7277, "step": 5567 }, { "epoch": 0.98, "grad_norm": 0.6130791252474391, "learning_rate": 2.9384422752467558e-08, "loss": 0.6466, "step": 5568 }, { "epoch": 0.98, "grad_norm": 0.6659476727852927, "learning_rate": 2.8950919680050814e-08, "loss": 0.6641, "step": 5569 }, { "epoch": 0.98, "grad_norm": 0.6805059575578812, "learning_rate": 2.8520633475856985e-08, "loss": 0.6992, "step": 5570 }, { "epoch": 0.98, "grad_norm": 0.6672156392447297, "learning_rate": 2.809356427870724e-08, "loss": 0.7011, "step": 5571 }, { "epoch": 0.98, "grad_norm": 1.325974052365072, "learning_rate": 2.7669712226381374e-08, "loss": 0.6775, "step": 5572 }, { "epoch": 0.98, "grad_norm": 0.6606645159603727, "learning_rate": 2.724907745562333e-08, "loss": 0.6645, "step": 5573 }, { "epoch": 0.98, "grad_norm": 0.6461375985953987, "learning_rate": 2.6831660102139e-08, "loss": 0.6835, "step": 5574 }, { "epoch": 0.98, "grad_norm": 0.6812150693887683, "learning_rate": 2.641746030059511e-08, "loss": 0.6983, "step": 5575 }, { "epoch": 0.98, "grad_norm": 0.6326942933903705, "learning_rate": 2.600647818462143e-08, "loss": 0.6668, "step": 5576 }, { "epoch": 0.98, "grad_norm": 0.7217483331242144, "learning_rate": 2.559871388681079e-08, "loss": 0.7293, "step": 5577 }, { "epoch": 0.98, "grad_norm": 0.6519517630307867, "learning_rate": 2.519416753871462e-08, "loss": 0.6748, "step": 5578 }, { "epoch": 0.98, "grad_norm": 0.6811858906944799, "learning_rate": 2.4792839270848523e-08, "loss": 0.6685, "step": 5579 }, { "epoch": 0.98, "grad_norm": 0.6824146970539711, "learning_rate": 2.4394729212690037e-08, "loss": 0.714, "step": 5580 }, { "epoch": 0.98, "grad_norm": 0.6029142961677746, "learning_rate": 2.3999837492676427e-08, "loss": 0.6876, "step": 5581 }, { "epoch": 0.98, "grad_norm": 0.6667563331685223, "learning_rate": 2.360816423820911e-08, "loss": 0.7063, "step": 5582 }, { "epoch": 0.98, "grad_norm": 0.6901064776150798, "learning_rate": 2.321970957564923e-08, "loss": 0.7244, "step": 5583 }, { "epoch": 0.98, "grad_norm": 0.6516633716856788, "learning_rate": 2.2834473630322095e-08, "loss": 0.7108, "step": 5584 }, { "epoch": 0.98, "grad_norm": 0.690202132458105, "learning_rate": 2.2452456526510513e-08, "loss": 0.7101, "step": 5585 }, { "epoch": 0.98, "grad_norm": 0.6785092486002453, "learning_rate": 2.207365838746256e-08, "loss": 0.7216, "step": 5586 }, { "epoch": 0.98, "grad_norm": 0.5967070362582922, "learning_rate": 2.1698079335386036e-08, "loss": 0.6483, "step": 5587 }, { "epoch": 0.98, "grad_norm": 0.717265880506052, "learning_rate": 2.132571949145068e-08, "loss": 0.7158, "step": 5588 }, { "epoch": 0.98, "grad_norm": 0.59717832573565, "learning_rate": 2.0956578975788177e-08, "loss": 0.6313, "step": 5589 }, { "epoch": 0.98, "grad_norm": 0.6059682059829004, "learning_rate": 2.059065790748993e-08, "loss": 0.6883, "step": 5590 }, { "epoch": 0.98, "grad_norm": 0.6699089102571342, "learning_rate": 2.0227956404609282e-08, "loss": 0.6841, "step": 5591 }, { "epoch": 0.98, "grad_norm": 0.6538206419330663, "learning_rate": 1.9868474584162633e-08, "loss": 0.6932, "step": 5592 }, { "epoch": 0.98, "grad_norm": 0.6178147027974642, "learning_rate": 1.951221256212499e-08, "loss": 0.6649, "step": 5593 }, { "epoch": 0.98, "grad_norm": 0.6689091824554173, "learning_rate": 1.9159170453435515e-08, "loss": 0.6918, "step": 5594 }, { "epoch": 0.98, "grad_norm": 0.6443533334043914, "learning_rate": 1.8809348371991997e-08, "loss": 0.6906, "step": 5595 }, { "epoch": 0.98, "grad_norm": 0.7345928859014631, "learning_rate": 1.8462746430654155e-08, "loss": 0.7179, "step": 5596 }, { "epoch": 0.98, "grad_norm": 0.6176032287098241, "learning_rate": 1.811936474124254e-08, "loss": 0.6995, "step": 5597 }, { "epoch": 0.98, "grad_norm": 0.6834843849128179, "learning_rate": 1.777920341454076e-08, "loss": 0.6857, "step": 5598 }, { "epoch": 0.98, "grad_norm": 0.6893804803113004, "learning_rate": 1.7442262560291022e-08, "loss": 0.6902, "step": 5599 }, { "epoch": 0.98, "grad_norm": 0.6607789810574357, "learning_rate": 1.7108542287197493e-08, "loss": 0.6758, "step": 5600 }, { "epoch": 0.98, "grad_norm": 0.6921919265606316, "learning_rate": 1.6778042702926266e-08, "loss": 0.7163, "step": 5601 }, { "epoch": 0.98, "grad_norm": 0.6876722359209989, "learning_rate": 1.6450763914102052e-08, "loss": 0.7334, "step": 5602 }, { "epoch": 0.98, "grad_norm": 0.6798548706442888, "learning_rate": 1.6126706026311502e-08, "loss": 0.6848, "step": 5603 }, { "epoch": 0.98, "grad_norm": 0.7653289765067438, "learning_rate": 1.5805869144103205e-08, "loss": 0.749, "step": 5604 }, { "epoch": 0.98, "grad_norm": 0.6820773147117589, "learning_rate": 1.5488253370986585e-08, "loss": 0.6783, "step": 5605 }, { "epoch": 0.98, "grad_norm": 0.6822425813053976, "learning_rate": 1.5173858809429676e-08, "loss": 0.6728, "step": 5606 }, { "epoch": 0.98, "grad_norm": 0.6508818535981635, "learning_rate": 1.4862685560863565e-08, "loss": 0.7089, "step": 5607 }, { "epoch": 0.98, "grad_norm": 0.6816747025597627, "learning_rate": 1.4554733725680171e-08, "loss": 0.7293, "step": 5608 }, { "epoch": 0.98, "grad_norm": 0.5927305901316613, "learning_rate": 1.4250003403228907e-08, "loss": 0.6614, "step": 5609 }, { "epoch": 0.98, "grad_norm": 0.7332773081630236, "learning_rate": 1.3948494691823355e-08, "loss": 0.7601, "step": 5610 }, { "epoch": 0.98, "grad_norm": 0.6451677762060226, "learning_rate": 1.3650207688735706e-08, "loss": 0.6785, "step": 5611 }, { "epoch": 0.98, "grad_norm": 0.8462592227347205, "learning_rate": 1.33551424902012e-08, "loss": 0.7019, "step": 5612 }, { "epoch": 0.98, "grad_norm": 0.7439854657746539, "learning_rate": 1.3063299191413692e-08, "loss": 0.7346, "step": 5613 }, { "epoch": 0.98, "grad_norm": 0.6583400200491211, "learning_rate": 1.2774677886526753e-08, "loss": 0.6715, "step": 5614 }, { "epoch": 0.98, "grad_norm": 0.7227774176985211, "learning_rate": 1.2489278668655901e-08, "loss": 0.7195, "step": 5615 }, { "epoch": 0.98, "grad_norm": 0.6113205614217184, "learning_rate": 1.2207101629876373e-08, "loss": 0.7002, "step": 5616 }, { "epoch": 0.98, "grad_norm": 0.7040848845759214, "learning_rate": 1.1928146861226453e-08, "loss": 0.7238, "step": 5617 }, { "epoch": 0.99, "grad_norm": 0.6701186810729105, "learning_rate": 1.1652414452701932e-08, "loss": 0.6947, "step": 5618 }, { "epoch": 0.99, "grad_norm": 0.6765009903599033, "learning_rate": 1.137990449325832e-08, "loss": 0.6922, "step": 5619 }, { "epoch": 0.99, "grad_norm": 0.6152819513157632, "learning_rate": 1.1110617070814178e-08, "loss": 0.6484, "step": 5620 }, { "epoch": 0.99, "grad_norm": 0.7479269557589341, "learning_rate": 1.0844552272246678e-08, "loss": 0.6922, "step": 5621 }, { "epoch": 0.99, "grad_norm": 0.6497463669298972, "learning_rate": 1.0581710183394934e-08, "loss": 0.6741, "step": 5622 }, { "epoch": 0.99, "grad_norm": 0.6704578023265587, "learning_rate": 1.0322090889056669e-08, "loss": 0.6823, "step": 5623 }, { "epoch": 0.99, "grad_norm": 0.6380405049419285, "learning_rate": 1.006569447299044e-08, "loss": 0.6647, "step": 5624 }, { "epoch": 0.99, "grad_norm": 0.7251480186673226, "learning_rate": 9.812521017914523e-09, "loss": 0.6893, "step": 5625 }, { "epoch": 0.99, "grad_norm": 0.6496013199465467, "learning_rate": 9.562570605509137e-09, "loss": 0.6896, "step": 5626 }, { "epoch": 0.99, "grad_norm": 0.7317547010303674, "learning_rate": 9.31584331641311e-09, "loss": 0.7349, "step": 5627 }, { "epoch": 0.99, "grad_norm": 0.6941541159052061, "learning_rate": 9.072339230224992e-09, "loss": 0.7274, "step": 5628 }, { "epoch": 0.99, "grad_norm": 0.6572123224573279, "learning_rate": 8.832058425505275e-09, "loss": 0.6666, "step": 5629 }, { "epoch": 0.99, "grad_norm": 0.7056504798650207, "learning_rate": 8.59500097977306e-09, "loss": 0.7147, "step": 5630 }, { "epoch": 0.99, "grad_norm": 0.7170785602251238, "learning_rate": 8.361166969508283e-09, "loss": 0.7146, "step": 5631 }, { "epoch": 0.99, "grad_norm": 0.7154492326080727, "learning_rate": 8.130556470149486e-09, "loss": 0.6976, "step": 5632 }, { "epoch": 0.99, "grad_norm": 0.6460169141662006, "learning_rate": 7.903169556098267e-09, "loss": 0.7211, "step": 5633 }, { "epoch": 0.99, "grad_norm": 0.719410740540638, "learning_rate": 7.67900630071372e-09, "loss": 0.7217, "step": 5634 }, { "epoch": 0.99, "grad_norm": 0.6268100343483555, "learning_rate": 7.458066776314665e-09, "loss": 0.696, "step": 5635 }, { "epoch": 0.99, "grad_norm": 0.6840661464362316, "learning_rate": 7.24035105418186e-09, "loss": 0.7148, "step": 5636 }, { "epoch": 0.99, "grad_norm": 0.6963274874381727, "learning_rate": 7.025859204554675e-09, "loss": 0.6827, "step": 5637 }, { "epoch": 0.99, "grad_norm": 0.6527691498833009, "learning_rate": 6.814591296633311e-09, "loss": 0.6498, "step": 5638 }, { "epoch": 0.99, "grad_norm": 0.6972615324107898, "learning_rate": 6.606547398575469e-09, "loss": 0.7039, "step": 5639 }, { "epoch": 0.99, "grad_norm": 0.7074917816594729, "learning_rate": 6.401727577501904e-09, "loss": 0.7028, "step": 5640 }, { "epoch": 0.99, "grad_norm": 0.6734303610209647, "learning_rate": 6.200131899490869e-09, "loss": 0.6657, "step": 5641 }, { "epoch": 0.99, "grad_norm": 0.7596063316195328, "learning_rate": 6.001760429581449e-09, "loss": 0.707, "step": 5642 }, { "epoch": 0.99, "grad_norm": 0.7141100089259258, "learning_rate": 5.806613231773561e-09, "loss": 0.7044, "step": 5643 }, { "epoch": 0.99, "grad_norm": 0.6557798979954025, "learning_rate": 5.614690369024622e-09, "loss": 0.6747, "step": 5644 }, { "epoch": 0.99, "grad_norm": 0.7435377402331151, "learning_rate": 5.425991903252881e-09, "loss": 0.7045, "step": 5645 }, { "epoch": 0.99, "grad_norm": 0.6460518173894233, "learning_rate": 5.240517895337416e-09, "loss": 0.6977, "step": 5646 }, { "epoch": 0.99, "grad_norm": 0.5958300766470458, "learning_rate": 5.058268405114808e-09, "loss": 0.6783, "step": 5647 }, { "epoch": 0.99, "grad_norm": 0.6942252361999809, "learning_rate": 4.879243491383578e-09, "loss": 0.7187, "step": 5648 }, { "epoch": 0.99, "grad_norm": 0.6317507970935711, "learning_rate": 4.70344321190086e-09, "loss": 0.6854, "step": 5649 }, { "epoch": 0.99, "grad_norm": 0.6487419106880962, "learning_rate": 4.530867623381285e-09, "loss": 0.6853, "step": 5650 }, { "epoch": 0.99, "grad_norm": 0.6427862921547772, "learning_rate": 4.36151678150476e-09, "loss": 0.6799, "step": 5651 }, { "epoch": 0.99, "grad_norm": 0.6955621240362855, "learning_rate": 4.195390740905359e-09, "loss": 0.713, "step": 5652 }, { "epoch": 0.99, "grad_norm": 0.658454078844665, "learning_rate": 4.0324895551779875e-09, "loss": 0.6997, "step": 5653 }, { "epoch": 0.99, "grad_norm": 0.6520856152064165, "learning_rate": 3.872813276880605e-09, "loss": 0.6511, "step": 5654 }, { "epoch": 0.99, "grad_norm": 0.6727730127041908, "learning_rate": 3.7163619575264485e-09, "loss": 0.7105, "step": 5655 }, { "epoch": 0.99, "grad_norm": 0.6880240607490584, "learning_rate": 3.5631356475895883e-09, "loss": 0.7037, "step": 5656 }, { "epoch": 0.99, "grad_norm": 0.7939931686203886, "learning_rate": 3.4131343965049244e-09, "loss": 0.71, "step": 5657 }, { "epoch": 0.99, "grad_norm": 0.663413998886994, "learning_rate": 3.2663582526648584e-09, "loss": 0.7274, "step": 5658 }, { "epoch": 0.99, "grad_norm": 0.6703515192296359, "learning_rate": 3.122807263423733e-09, "loss": 0.6852, "step": 5659 }, { "epoch": 0.99, "grad_norm": 0.7908282089115537, "learning_rate": 2.9824814750945007e-09, "loss": 0.7469, "step": 5660 }, { "epoch": 0.99, "grad_norm": 0.6610715910812922, "learning_rate": 2.845380932947617e-09, "loss": 0.7247, "step": 5661 }, { "epoch": 0.99, "grad_norm": 0.6348239080514873, "learning_rate": 2.7115056812143657e-09, "loss": 0.7101, "step": 5662 }, { "epoch": 0.99, "grad_norm": 0.6912434716920728, "learning_rate": 2.580855763087975e-09, "loss": 0.7126, "step": 5663 }, { "epoch": 0.99, "grad_norm": 0.6793698310743903, "learning_rate": 2.453431220716951e-09, "loss": 0.6587, "step": 5664 }, { "epoch": 0.99, "grad_norm": 0.612317512376912, "learning_rate": 2.3292320952106318e-09, "loss": 0.709, "step": 5665 }, { "epoch": 0.99, "grad_norm": 2.0186555713822414, "learning_rate": 2.2082584266402974e-09, "loss": 0.7297, "step": 5666 }, { "epoch": 0.99, "grad_norm": 0.6923241471031918, "learning_rate": 2.0905102540336173e-09, "loss": 0.7014, "step": 5667 }, { "epoch": 0.99, "grad_norm": 0.6663853832824121, "learning_rate": 1.975987615376873e-09, "loss": 0.649, "step": 5668 }, { "epoch": 0.99, "grad_norm": 0.6051407891403848, "learning_rate": 1.864690547620507e-09, "loss": 0.6942, "step": 5669 }, { "epoch": 0.99, "grad_norm": 0.7256563573686435, "learning_rate": 1.7566190866691312e-09, "loss": 0.7202, "step": 5670 }, { "epoch": 0.99, "grad_norm": 0.7528249654043957, "learning_rate": 1.6517732673904109e-09, "loss": 0.7547, "step": 5671 }, { "epoch": 0.99, "grad_norm": 0.6525375255576223, "learning_rate": 1.5501531236084e-09, "loss": 0.6465, "step": 5672 }, { "epoch": 0.99, "grad_norm": 0.6496737477274479, "learning_rate": 1.4517586881079847e-09, "loss": 0.6997, "step": 5673 }, { "epoch": 0.99, "grad_norm": 0.665438568393549, "learning_rate": 1.3565899926337721e-09, "loss": 0.7057, "step": 5674 }, { "epoch": 1.0, "grad_norm": 0.6207866975370773, "learning_rate": 1.26464706788898e-09, "loss": 0.687, "step": 5675 }, { "epoch": 1.0, "grad_norm": 0.6557792100652791, "learning_rate": 1.1759299435365468e-09, "loss": 0.6807, "step": 5676 }, { "epoch": 1.0, "grad_norm": 0.6575142167006899, "learning_rate": 1.0904386481980222e-09, "loss": 0.6948, "step": 5677 }, { "epoch": 1.0, "grad_norm": 0.6953964969464204, "learning_rate": 1.0081732094557872e-09, "loss": 0.6879, "step": 5678 }, { "epoch": 1.0, "grad_norm": 0.70671209879021, "learning_rate": 9.291336538486129e-10, "loss": 0.7287, "step": 5679 }, { "epoch": 1.0, "grad_norm": 0.664184034482496, "learning_rate": 8.533200068783221e-10, "loss": 0.712, "step": 5680 }, { "epoch": 1.0, "grad_norm": 0.6694939436207021, "learning_rate": 7.807322930020179e-10, "loss": 0.7176, "step": 5681 }, { "epoch": 1.0, "grad_norm": 0.6491860336936887, "learning_rate": 7.113705356387446e-10, "loss": 0.6966, "step": 5682 }, { "epoch": 1.0, "grad_norm": 0.5892041087337994, "learning_rate": 6.452347571661577e-10, "loss": 0.6501, "step": 5683 }, { "epoch": 1.0, "grad_norm": 0.6380089802129617, "learning_rate": 5.823249789216334e-10, "loss": 0.716, "step": 5684 }, { "epoch": 1.0, "grad_norm": 0.6516162841713956, "learning_rate": 5.226412212000486e-10, "loss": 0.6926, "step": 5685 }, { "epoch": 1.0, "grad_norm": 0.6598421445406673, "learning_rate": 4.661835032582218e-10, "loss": 0.6837, "step": 5686 }, { "epoch": 1.0, "grad_norm": 0.6999742937275595, "learning_rate": 4.1295184330825135e-10, "loss": 0.7364, "step": 5687 }, { "epoch": 1.0, "grad_norm": 0.7204280449147162, "learning_rate": 3.6294625852639765e-10, "loss": 0.7144, "step": 5688 }, { "epoch": 1.0, "grad_norm": 0.7026042003397023, "learning_rate": 3.1616676504309106e-10, "loss": 0.6841, "step": 5689 }, { "epoch": 1.0, "grad_norm": 0.583902088662315, "learning_rate": 2.7261337795181365e-10, "loss": 0.6676, "step": 5690 }, { "epoch": 1.0, "grad_norm": 0.6147232048798866, "learning_rate": 2.3228611130354795e-10, "loss": 0.6871, "step": 5691 }, { "epoch": 1.0, "grad_norm": 0.6724636599074791, "learning_rate": 1.9518497810899762e-10, "loss": 0.7056, "step": 5692 }, { "epoch": 1.0, "grad_norm": 0.8050494673458964, "learning_rate": 1.6130999033747707e-10, "loss": 0.7494, "step": 5693 }, { "epoch": 1.0, "grad_norm": 0.6586580424743982, "learning_rate": 1.3066115891691156e-10, "loss": 0.7141, "step": 5694 }, { "epoch": 1.0, "grad_norm": 0.7112422246450445, "learning_rate": 1.0323849373605754e-10, "loss": 0.7, "step": 5695 }, { "epoch": 1.0, "grad_norm": 0.6489913107109709, "learning_rate": 7.904200364228232e-11, "loss": 0.6951, "step": 5696 }, { "epoch": 1.0, "grad_norm": 0.702646726069728, "learning_rate": 5.807169644156396e-11, "loss": 0.666, "step": 5697 }, { "epoch": 1.0, "grad_norm": 0.6616902934848862, "learning_rate": 4.0327578899601596e-11, "loss": 0.6658, "step": 5698 }, { "epoch": 1.0, "grad_norm": 0.7283359101001657, "learning_rate": 2.5809656740705123e-11, "loss": 0.6824, "step": 5699 }, { "epoch": 1.0, "grad_norm": 0.629232660896775, "learning_rate": 1.4517934648905497e-11, "loss": 0.6755, "step": 5700 }, { "epoch": 1.0, "grad_norm": 0.6485218932150899, "learning_rate": 6.452416266844452e-12, "loss": 0.6873, "step": 5701 }, { "epoch": 1.0, "grad_norm": 0.6288450128046413, "learning_rate": 1.6131041968847628e-12, "loss": 0.6744, "step": 5702 }, { "epoch": 1.0, "grad_norm": 0.6746962056277772, "learning_rate": 0.0, "loss": 0.6594, "step": 5703 }, { "epoch": 1.0, "step": 5703, "total_flos": 0.0, "train_loss": 0.19594565146265794, "train_runtime": 5410.6584, "train_samples_per_second": 272.424, "train_steps_per_second": 1.054 } ], "logging_steps": 1.0, "max_steps": 5703, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }