{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9965014577259477, "eval_steps": 500, "global_step": 1713, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001749271137026239, "grad_norm": 6.927950859069824, "learning_rate": 5.8139534883720935e-08, "loss": 1.112, "step": 1 }, { "epoch": 0.003498542274052478, "grad_norm": 7.0369086265563965, "learning_rate": 1.1627906976744187e-07, "loss": 1.17, "step": 2 }, { "epoch": 0.0052478134110787176, "grad_norm": 6.587803363800049, "learning_rate": 1.744186046511628e-07, "loss": 1.0965, "step": 3 }, { "epoch": 0.006997084548104956, "grad_norm": 7.085984230041504, "learning_rate": 2.3255813953488374e-07, "loss": 1.1619, "step": 4 }, { "epoch": 0.008746355685131196, "grad_norm": 6.997914791107178, "learning_rate": 2.906976744186047e-07, "loss": 1.1527, "step": 5 }, { "epoch": 0.010495626822157435, "grad_norm": 6.976718425750732, "learning_rate": 3.488372093023256e-07, "loss": 1.1175, "step": 6 }, { "epoch": 0.012244897959183673, "grad_norm": 6.910824298858643, "learning_rate": 4.0697674418604655e-07, "loss": 1.153, "step": 7 }, { "epoch": 0.013994169096209912, "grad_norm": 6.906734943389893, "learning_rate": 4.651162790697675e-07, "loss": 1.1405, "step": 8 }, { "epoch": 0.01574344023323615, "grad_norm": 6.774232864379883, "learning_rate": 5.232558139534884e-07, "loss": 1.1353, "step": 9 }, { "epoch": 0.01749271137026239, "grad_norm": 6.3412604331970215, "learning_rate": 5.813953488372094e-07, "loss": 1.106, "step": 10 }, { "epoch": 0.01924198250728863, "grad_norm": 6.0955634117126465, "learning_rate": 6.395348837209303e-07, "loss": 1.0971, "step": 11 }, { "epoch": 0.02099125364431487, "grad_norm": 6.291795253753662, "learning_rate": 6.976744186046513e-07, "loss": 1.1499, "step": 12 }, { "epoch": 0.022740524781341108, "grad_norm": 5.741114139556885, "learning_rate": 7.558139534883721e-07, "loss": 1.0976, "step": 13 }, { "epoch": 0.024489795918367346, "grad_norm": 5.2305073738098145, "learning_rate": 8.139534883720931e-07, "loss": 1.1093, "step": 14 }, { "epoch": 0.026239067055393587, "grad_norm": 4.9003520011901855, "learning_rate": 8.720930232558141e-07, "loss": 1.0708, "step": 15 }, { "epoch": 0.027988338192419825, "grad_norm": 4.959968090057373, "learning_rate": 9.30232558139535e-07, "loss": 1.1092, "step": 16 }, { "epoch": 0.029737609329446066, "grad_norm": 4.454362869262695, "learning_rate": 9.88372093023256e-07, "loss": 1.0709, "step": 17 }, { "epoch": 0.0314868804664723, "grad_norm": 3.384338855743408, "learning_rate": 1.0465116279069768e-06, "loss": 1.0366, "step": 18 }, { "epoch": 0.03323615160349854, "grad_norm": 3.179197311401367, "learning_rate": 1.1046511627906977e-06, "loss": 1.0149, "step": 19 }, { "epoch": 0.03498542274052478, "grad_norm": 3.093018054962158, "learning_rate": 1.1627906976744188e-06, "loss": 1.0361, "step": 20 }, { "epoch": 0.036734693877551024, "grad_norm": 3.024778127670288, "learning_rate": 1.2209302325581397e-06, "loss": 1.0532, "step": 21 }, { "epoch": 0.03848396501457726, "grad_norm": 2.873383045196533, "learning_rate": 1.2790697674418605e-06, "loss": 1.0241, "step": 22 }, { "epoch": 0.0402332361516035, "grad_norm": 2.762340545654297, "learning_rate": 1.3372093023255814e-06, "loss": 1.0142, "step": 23 }, { "epoch": 0.04198250728862974, "grad_norm": 2.3799502849578857, "learning_rate": 1.3953488372093025e-06, "loss": 1.0096, "step": 24 }, { "epoch": 0.043731778425655975, "grad_norm": 1.9668264389038086, "learning_rate": 1.4534883720930234e-06, "loss": 0.9536, "step": 25 }, { "epoch": 0.045481049562682216, "grad_norm": 2.1020021438598633, "learning_rate": 1.5116279069767443e-06, "loss": 0.9668, "step": 26 }, { "epoch": 0.04723032069970846, "grad_norm": 2.346451997756958, "learning_rate": 1.5697674418604653e-06, "loss": 0.9649, "step": 27 }, { "epoch": 0.04897959183673469, "grad_norm": 2.198338747024536, "learning_rate": 1.6279069767441862e-06, "loss": 0.9529, "step": 28 }, { "epoch": 0.05072886297376093, "grad_norm": 2.0172877311706543, "learning_rate": 1.686046511627907e-06, "loss": 0.9483, "step": 29 }, { "epoch": 0.052478134110787174, "grad_norm": 1.7843124866485596, "learning_rate": 1.7441860465116282e-06, "loss": 0.9203, "step": 30 }, { "epoch": 0.05422740524781341, "grad_norm": 1.6177380084991455, "learning_rate": 1.8023255813953488e-06, "loss": 0.9093, "step": 31 }, { "epoch": 0.05597667638483965, "grad_norm": 1.4337577819824219, "learning_rate": 1.86046511627907e-06, "loss": 0.9321, "step": 32 }, { "epoch": 0.05772594752186589, "grad_norm": 1.4343769550323486, "learning_rate": 1.918604651162791e-06, "loss": 0.914, "step": 33 }, { "epoch": 0.05947521865889213, "grad_norm": 1.754113793373108, "learning_rate": 1.976744186046512e-06, "loss": 0.8973, "step": 34 }, { "epoch": 0.061224489795918366, "grad_norm": 1.8165544271469116, "learning_rate": 2.0348837209302328e-06, "loss": 0.8811, "step": 35 }, { "epoch": 0.0629737609329446, "grad_norm": 1.4021797180175781, "learning_rate": 2.0930232558139536e-06, "loss": 0.8186, "step": 36 }, { "epoch": 0.06472303206997085, "grad_norm": 1.316294550895691, "learning_rate": 2.1511627906976745e-06, "loss": 0.8706, "step": 37 }, { "epoch": 0.06647230320699708, "grad_norm": 1.1901872158050537, "learning_rate": 2.2093023255813954e-06, "loss": 0.8599, "step": 38 }, { "epoch": 0.06822157434402332, "grad_norm": 1.2351670265197754, "learning_rate": 2.2674418604651163e-06, "loss": 0.8506, "step": 39 }, { "epoch": 0.06997084548104957, "grad_norm": 1.36652672290802, "learning_rate": 2.3255813953488376e-06, "loss": 0.8375, "step": 40 }, { "epoch": 0.0717201166180758, "grad_norm": 1.2795497179031372, "learning_rate": 2.3837209302325585e-06, "loss": 0.8079, "step": 41 }, { "epoch": 0.07346938775510205, "grad_norm": 1.118710994720459, "learning_rate": 2.4418604651162793e-06, "loss": 0.8223, "step": 42 }, { "epoch": 0.07521865889212828, "grad_norm": 1.0609632730484009, "learning_rate": 2.5e-06, "loss": 0.829, "step": 43 }, { "epoch": 0.07696793002915452, "grad_norm": 0.9519705176353455, "learning_rate": 2.558139534883721e-06, "loss": 0.8249, "step": 44 }, { "epoch": 0.07871720116618076, "grad_norm": 0.979071319103241, "learning_rate": 2.6162790697674424e-06, "loss": 0.8018, "step": 45 }, { "epoch": 0.080466472303207, "grad_norm": 0.9642322063446045, "learning_rate": 2.674418604651163e-06, "loss": 0.8288, "step": 46 }, { "epoch": 0.08221574344023323, "grad_norm": 0.8872063755989075, "learning_rate": 2.7325581395348837e-06, "loss": 0.8026, "step": 47 }, { "epoch": 0.08396501457725948, "grad_norm": 0.849246084690094, "learning_rate": 2.790697674418605e-06, "loss": 0.792, "step": 48 }, { "epoch": 0.08571428571428572, "grad_norm": 0.8678059577941895, "learning_rate": 2.848837209302326e-06, "loss": 0.7827, "step": 49 }, { "epoch": 0.08746355685131195, "grad_norm": 0.9353904724121094, "learning_rate": 2.9069767441860468e-06, "loss": 0.7772, "step": 50 }, { "epoch": 0.0892128279883382, "grad_norm": 0.8705860376358032, "learning_rate": 2.965116279069768e-06, "loss": 0.7944, "step": 51 }, { "epoch": 0.09096209912536443, "grad_norm": 0.850265622138977, "learning_rate": 3.0232558139534885e-06, "loss": 0.7999, "step": 52 }, { "epoch": 0.09271137026239067, "grad_norm": 0.8365254402160645, "learning_rate": 3.0813953488372094e-06, "loss": 0.7709, "step": 53 }, { "epoch": 0.09446064139941691, "grad_norm": 0.8595771193504333, "learning_rate": 3.1395348837209307e-06, "loss": 0.7872, "step": 54 }, { "epoch": 0.09620991253644315, "grad_norm": 0.9326982498168945, "learning_rate": 3.1976744186046516e-06, "loss": 0.7688, "step": 55 }, { "epoch": 0.09795918367346938, "grad_norm": 0.9450392127037048, "learning_rate": 3.2558139534883724e-06, "loss": 0.7862, "step": 56 }, { "epoch": 0.09970845481049563, "grad_norm": 0.7583674192428589, "learning_rate": 3.313953488372093e-06, "loss": 0.7721, "step": 57 }, { "epoch": 0.10145772594752187, "grad_norm": 0.749837338924408, "learning_rate": 3.372093023255814e-06, "loss": 0.7769, "step": 58 }, { "epoch": 0.1032069970845481, "grad_norm": 0.7862544059753418, "learning_rate": 3.430232558139535e-06, "loss": 0.7883, "step": 59 }, { "epoch": 0.10495626822157435, "grad_norm": 0.852881669998169, "learning_rate": 3.4883720930232564e-06, "loss": 0.7702, "step": 60 }, { "epoch": 0.10670553935860058, "grad_norm": 0.7574265599250793, "learning_rate": 3.5465116279069772e-06, "loss": 0.7639, "step": 61 }, { "epoch": 0.10845481049562682, "grad_norm": 0.7816519737243652, "learning_rate": 3.6046511627906977e-06, "loss": 0.7838, "step": 62 }, { "epoch": 0.11020408163265306, "grad_norm": 0.7837647199630737, "learning_rate": 3.6627906976744186e-06, "loss": 0.7608, "step": 63 }, { "epoch": 0.1119533527696793, "grad_norm": 0.7166586518287659, "learning_rate": 3.72093023255814e-06, "loss": 0.7662, "step": 64 }, { "epoch": 0.11370262390670553, "grad_norm": 0.7910889983177185, "learning_rate": 3.7790697674418607e-06, "loss": 0.7557, "step": 65 }, { "epoch": 0.11545189504373178, "grad_norm": 0.7474683523178101, "learning_rate": 3.837209302325582e-06, "loss": 0.7705, "step": 66 }, { "epoch": 0.11720116618075802, "grad_norm": 0.782768726348877, "learning_rate": 3.8953488372093025e-06, "loss": 0.7573, "step": 67 }, { "epoch": 0.11895043731778426, "grad_norm": 0.8091023564338684, "learning_rate": 3.953488372093024e-06, "loss": 0.749, "step": 68 }, { "epoch": 0.1206997084548105, "grad_norm": 0.8194798231124878, "learning_rate": 4.011627906976744e-06, "loss": 0.7547, "step": 69 }, { "epoch": 0.12244897959183673, "grad_norm": 0.7381356358528137, "learning_rate": 4.0697674418604655e-06, "loss": 0.7469, "step": 70 }, { "epoch": 0.12419825072886298, "grad_norm": 0.7338340282440186, "learning_rate": 4.127906976744187e-06, "loss": 0.726, "step": 71 }, { "epoch": 0.1259475218658892, "grad_norm": 0.7473530769348145, "learning_rate": 4.186046511627907e-06, "loss": 0.7192, "step": 72 }, { "epoch": 0.12769679300291545, "grad_norm": 0.8724824786186218, "learning_rate": 4.244186046511628e-06, "loss": 0.7794, "step": 73 }, { "epoch": 0.1294460641399417, "grad_norm": 0.751129150390625, "learning_rate": 4.302325581395349e-06, "loss": 0.7453, "step": 74 }, { "epoch": 0.13119533527696792, "grad_norm": 0.8948066234588623, "learning_rate": 4.36046511627907e-06, "loss": 0.74, "step": 75 }, { "epoch": 0.13294460641399417, "grad_norm": 0.7968193292617798, "learning_rate": 4.418604651162791e-06, "loss": 0.7528, "step": 76 }, { "epoch": 0.1346938775510204, "grad_norm": 0.7686959505081177, "learning_rate": 4.476744186046512e-06, "loss": 0.7725, "step": 77 }, { "epoch": 0.13644314868804663, "grad_norm": 0.7859252095222473, "learning_rate": 4.5348837209302326e-06, "loss": 0.7306, "step": 78 }, { "epoch": 0.13819241982507288, "grad_norm": 0.8519811630249023, "learning_rate": 4.593023255813954e-06, "loss": 0.7406, "step": 79 }, { "epoch": 0.13994169096209913, "grad_norm": 0.8160409927368164, "learning_rate": 4.651162790697675e-06, "loss": 0.7458, "step": 80 }, { "epoch": 0.14169096209912538, "grad_norm": 0.847032368183136, "learning_rate": 4.709302325581396e-06, "loss": 0.7301, "step": 81 }, { "epoch": 0.1434402332361516, "grad_norm": 0.7633818984031677, "learning_rate": 4.767441860465117e-06, "loss": 0.7037, "step": 82 }, { "epoch": 0.14518950437317785, "grad_norm": 0.7600036859512329, "learning_rate": 4.825581395348838e-06, "loss": 0.7385, "step": 83 }, { "epoch": 0.1469387755102041, "grad_norm": 0.8503198623657227, "learning_rate": 4.883720930232559e-06, "loss": 0.7585, "step": 84 }, { "epoch": 0.14868804664723032, "grad_norm": 0.8065237402915955, "learning_rate": 4.941860465116279e-06, "loss": 0.7355, "step": 85 }, { "epoch": 0.15043731778425656, "grad_norm": 0.7841895818710327, "learning_rate": 5e-06, "loss": 0.7245, "step": 86 }, { "epoch": 0.1521865889212828, "grad_norm": 0.8088502883911133, "learning_rate": 5.058139534883722e-06, "loss": 0.7425, "step": 87 }, { "epoch": 0.15393586005830903, "grad_norm": 0.7876049876213074, "learning_rate": 5.116279069767442e-06, "loss": 0.7478, "step": 88 }, { "epoch": 0.15568513119533528, "grad_norm": 0.8074668049812317, "learning_rate": 5.1744186046511635e-06, "loss": 0.7544, "step": 89 }, { "epoch": 0.15743440233236153, "grad_norm": 0.827198326587677, "learning_rate": 5.232558139534885e-06, "loss": 0.7295, "step": 90 }, { "epoch": 0.15918367346938775, "grad_norm": 0.8083609938621521, "learning_rate": 5.290697674418605e-06, "loss": 0.7348, "step": 91 }, { "epoch": 0.160932944606414, "grad_norm": 0.7084585428237915, "learning_rate": 5.348837209302326e-06, "loss": 0.7176, "step": 92 }, { "epoch": 0.16268221574344024, "grad_norm": 0.8089188933372498, "learning_rate": 5.406976744186047e-06, "loss": 0.726, "step": 93 }, { "epoch": 0.16443148688046647, "grad_norm": 0.7521550059318542, "learning_rate": 5.465116279069767e-06, "loss": 0.7191, "step": 94 }, { "epoch": 0.1661807580174927, "grad_norm": 0.7223957180976868, "learning_rate": 5.523255813953489e-06, "loss": 0.7305, "step": 95 }, { "epoch": 0.16793002915451896, "grad_norm": 0.7788702845573425, "learning_rate": 5.58139534883721e-06, "loss": 0.7572, "step": 96 }, { "epoch": 0.16967930029154518, "grad_norm": 0.7492494583129883, "learning_rate": 5.6395348837209305e-06, "loss": 0.7453, "step": 97 }, { "epoch": 0.17142857142857143, "grad_norm": 0.7491464018821716, "learning_rate": 5.697674418604652e-06, "loss": 0.7113, "step": 98 }, { "epoch": 0.17317784256559768, "grad_norm": 0.7532879114151001, "learning_rate": 5.755813953488373e-06, "loss": 0.7158, "step": 99 }, { "epoch": 0.1749271137026239, "grad_norm": 0.8485985398292542, "learning_rate": 5.8139534883720935e-06, "loss": 0.7202, "step": 100 }, { "epoch": 0.17667638483965015, "grad_norm": 0.7578812837600708, "learning_rate": 5.872093023255815e-06, "loss": 0.6982, "step": 101 }, { "epoch": 0.1784256559766764, "grad_norm": 0.7509278655052185, "learning_rate": 5.930232558139536e-06, "loss": 0.7148, "step": 102 }, { "epoch": 0.18017492711370262, "grad_norm": 0.8094415068626404, "learning_rate": 5.988372093023256e-06, "loss": 0.722, "step": 103 }, { "epoch": 0.18192419825072886, "grad_norm": 0.7700554728507996, "learning_rate": 6.046511627906977e-06, "loss": 0.7178, "step": 104 }, { "epoch": 0.1836734693877551, "grad_norm": 0.848608672618866, "learning_rate": 6.104651162790698e-06, "loss": 0.7099, "step": 105 }, { "epoch": 0.18542274052478133, "grad_norm": 0.8904078602790833, "learning_rate": 6.162790697674419e-06, "loss": 0.7227, "step": 106 }, { "epoch": 0.18717201166180758, "grad_norm": 0.7577165365219116, "learning_rate": 6.22093023255814e-06, "loss": 0.716, "step": 107 }, { "epoch": 0.18892128279883383, "grad_norm": 0.8763809204101562, "learning_rate": 6.279069767441861e-06, "loss": 0.7423, "step": 108 }, { "epoch": 0.19067055393586005, "grad_norm": 0.8464856743812561, "learning_rate": 6.337209302325582e-06, "loss": 0.698, "step": 109 }, { "epoch": 0.1924198250728863, "grad_norm": 0.7644020915031433, "learning_rate": 6.395348837209303e-06, "loss": 0.7323, "step": 110 }, { "epoch": 0.19416909620991255, "grad_norm": 0.7447682023048401, "learning_rate": 6.4534883720930244e-06, "loss": 0.7042, "step": 111 }, { "epoch": 0.19591836734693877, "grad_norm": 0.8717958331108093, "learning_rate": 6.511627906976745e-06, "loss": 0.6975, "step": 112 }, { "epoch": 0.197667638483965, "grad_norm": 0.8748733997344971, "learning_rate": 6.569767441860465e-06, "loss": 0.7313, "step": 113 }, { "epoch": 0.19941690962099126, "grad_norm": 0.8892107009887695, "learning_rate": 6.627906976744186e-06, "loss": 0.7032, "step": 114 }, { "epoch": 0.20116618075801748, "grad_norm": 0.836543619632721, "learning_rate": 6.686046511627907e-06, "loss": 0.7032, "step": 115 }, { "epoch": 0.20291545189504373, "grad_norm": 0.8091773390769958, "learning_rate": 6.744186046511628e-06, "loss": 0.7066, "step": 116 }, { "epoch": 0.20466472303206998, "grad_norm": 0.9410114288330078, "learning_rate": 6.80232558139535e-06, "loss": 0.7036, "step": 117 }, { "epoch": 0.2064139941690962, "grad_norm": 0.7774474620819092, "learning_rate": 6.86046511627907e-06, "loss": 0.6809, "step": 118 }, { "epoch": 0.20816326530612245, "grad_norm": 0.7945885062217712, "learning_rate": 6.9186046511627914e-06, "loss": 0.7128, "step": 119 }, { "epoch": 0.2099125364431487, "grad_norm": 0.8980152010917664, "learning_rate": 6.976744186046513e-06, "loss": 0.7094, "step": 120 }, { "epoch": 0.21166180758017492, "grad_norm": 0.8268460035324097, "learning_rate": 7.034883720930233e-06, "loss": 0.7075, "step": 121 }, { "epoch": 0.21341107871720116, "grad_norm": 0.7794296741485596, "learning_rate": 7.0930232558139545e-06, "loss": 0.7219, "step": 122 }, { "epoch": 0.2151603498542274, "grad_norm": 0.8592284917831421, "learning_rate": 7.151162790697676e-06, "loss": 0.7058, "step": 123 }, { "epoch": 0.21690962099125363, "grad_norm": 0.7795148491859436, "learning_rate": 7.209302325581395e-06, "loss": 0.7081, "step": 124 }, { "epoch": 0.21865889212827988, "grad_norm": 0.8111539483070374, "learning_rate": 7.267441860465117e-06, "loss": 0.7073, "step": 125 }, { "epoch": 0.22040816326530613, "grad_norm": 0.7707515954971313, "learning_rate": 7.325581395348837e-06, "loss": 0.6839, "step": 126 }, { "epoch": 0.22215743440233235, "grad_norm": 0.8843724727630615, "learning_rate": 7.3837209302325584e-06, "loss": 0.7024, "step": 127 }, { "epoch": 0.2239067055393586, "grad_norm": 0.8938904404640198, "learning_rate": 7.44186046511628e-06, "loss": 0.7209, "step": 128 }, { "epoch": 0.22565597667638485, "grad_norm": 0.7606058716773987, "learning_rate": 7.500000000000001e-06, "loss": 0.7318, "step": 129 }, { "epoch": 0.22740524781341107, "grad_norm": 0.8294805884361267, "learning_rate": 7.5581395348837215e-06, "loss": 0.6996, "step": 130 }, { "epoch": 0.2291545189504373, "grad_norm": 0.9355678558349609, "learning_rate": 7.616279069767443e-06, "loss": 0.6945, "step": 131 }, { "epoch": 0.23090379008746356, "grad_norm": 0.8121860027313232, "learning_rate": 7.674418604651164e-06, "loss": 0.6955, "step": 132 }, { "epoch": 0.23265306122448978, "grad_norm": 0.8831561207771301, "learning_rate": 7.732558139534885e-06, "loss": 0.7173, "step": 133 }, { "epoch": 0.23440233236151603, "grad_norm": 0.839634120464325, "learning_rate": 7.790697674418605e-06, "loss": 0.7008, "step": 134 }, { "epoch": 0.23615160349854228, "grad_norm": 0.9939845204353333, "learning_rate": 7.848837209302325e-06, "loss": 0.6816, "step": 135 }, { "epoch": 0.23790087463556853, "grad_norm": 0.8028685450553894, "learning_rate": 7.906976744186048e-06, "loss": 0.683, "step": 136 }, { "epoch": 0.23965014577259475, "grad_norm": 0.9724025130271912, "learning_rate": 7.965116279069768e-06, "loss": 0.707, "step": 137 }, { "epoch": 0.241399416909621, "grad_norm": 0.8497872352600098, "learning_rate": 8.023255813953488e-06, "loss": 0.7046, "step": 138 }, { "epoch": 0.24314868804664724, "grad_norm": 0.8368130922317505, "learning_rate": 8.08139534883721e-06, "loss": 0.6981, "step": 139 }, { "epoch": 0.24489795918367346, "grad_norm": 0.9745544195175171, "learning_rate": 8.139534883720931e-06, "loss": 0.703, "step": 140 }, { "epoch": 0.2466472303206997, "grad_norm": 0.7968669533729553, "learning_rate": 8.197674418604652e-06, "loss": 0.6991, "step": 141 }, { "epoch": 0.24839650145772596, "grad_norm": 0.9027257561683655, "learning_rate": 8.255813953488374e-06, "loss": 0.7124, "step": 142 }, { "epoch": 0.2501457725947522, "grad_norm": 0.8721644282341003, "learning_rate": 8.313953488372094e-06, "loss": 0.7125, "step": 143 }, { "epoch": 0.2518950437317784, "grad_norm": 0.8598381280899048, "learning_rate": 8.372093023255815e-06, "loss": 0.7159, "step": 144 }, { "epoch": 0.2536443148688047, "grad_norm": 0.8189973831176758, "learning_rate": 8.430232558139537e-06, "loss": 0.6915, "step": 145 }, { "epoch": 0.2553935860058309, "grad_norm": 0.8181302547454834, "learning_rate": 8.488372093023256e-06, "loss": 0.6946, "step": 146 }, { "epoch": 0.2571428571428571, "grad_norm": 0.8662753701210022, "learning_rate": 8.546511627906978e-06, "loss": 0.7167, "step": 147 }, { "epoch": 0.2588921282798834, "grad_norm": 0.9563596844673157, "learning_rate": 8.604651162790698e-06, "loss": 0.7308, "step": 148 }, { "epoch": 0.2606413994169096, "grad_norm": 1.222193956375122, "learning_rate": 8.662790697674419e-06, "loss": 0.7019, "step": 149 }, { "epoch": 0.26239067055393583, "grad_norm": 0.9216798543930054, "learning_rate": 8.72093023255814e-06, "loss": 0.7041, "step": 150 }, { "epoch": 0.2641399416909621, "grad_norm": 1.1131595373153687, "learning_rate": 8.779069767441861e-06, "loss": 0.6968, "step": 151 }, { "epoch": 0.26588921282798833, "grad_norm": 1.2671828269958496, "learning_rate": 8.837209302325582e-06, "loss": 0.6936, "step": 152 }, { "epoch": 0.26763848396501455, "grad_norm": 0.8260896801948547, "learning_rate": 8.895348837209304e-06, "loss": 0.6921, "step": 153 }, { "epoch": 0.2693877551020408, "grad_norm": 1.1704788208007812, "learning_rate": 8.953488372093024e-06, "loss": 0.6809, "step": 154 }, { "epoch": 0.27113702623906705, "grad_norm": 1.235892415046692, "learning_rate": 9.011627906976745e-06, "loss": 0.6852, "step": 155 }, { "epoch": 0.27288629737609327, "grad_norm": 0.8134777545928955, "learning_rate": 9.069767441860465e-06, "loss": 0.6789, "step": 156 }, { "epoch": 0.27463556851311954, "grad_norm": 1.1979421377182007, "learning_rate": 9.127906976744186e-06, "loss": 0.6868, "step": 157 }, { "epoch": 0.27638483965014576, "grad_norm": 0.8993186354637146, "learning_rate": 9.186046511627908e-06, "loss": 0.6922, "step": 158 }, { "epoch": 0.278134110787172, "grad_norm": 0.9218212366104126, "learning_rate": 9.244186046511628e-06, "loss": 0.6982, "step": 159 }, { "epoch": 0.27988338192419826, "grad_norm": 0.91939377784729, "learning_rate": 9.30232558139535e-06, "loss": 0.6862, "step": 160 }, { "epoch": 0.2816326530612245, "grad_norm": 0.8778752088546753, "learning_rate": 9.36046511627907e-06, "loss": 0.7114, "step": 161 }, { "epoch": 0.28338192419825076, "grad_norm": 0.9812875390052795, "learning_rate": 9.418604651162791e-06, "loss": 0.7161, "step": 162 }, { "epoch": 0.285131195335277, "grad_norm": 0.8337746262550354, "learning_rate": 9.476744186046513e-06, "loss": 0.6979, "step": 163 }, { "epoch": 0.2868804664723032, "grad_norm": 0.8535592555999756, "learning_rate": 9.534883720930234e-06, "loss": 0.6984, "step": 164 }, { "epoch": 0.2886297376093295, "grad_norm": 0.8191837072372437, "learning_rate": 9.593023255813954e-06, "loss": 0.6984, "step": 165 }, { "epoch": 0.2903790087463557, "grad_norm": 0.9740723371505737, "learning_rate": 9.651162790697676e-06, "loss": 0.701, "step": 166 }, { "epoch": 0.2921282798833819, "grad_norm": 0.8932219743728638, "learning_rate": 9.709302325581395e-06, "loss": 0.7296, "step": 167 }, { "epoch": 0.2938775510204082, "grad_norm": 0.8917909264564514, "learning_rate": 9.767441860465117e-06, "loss": 0.7031, "step": 168 }, { "epoch": 0.2956268221574344, "grad_norm": 0.8919321894645691, "learning_rate": 9.825581395348838e-06, "loss": 0.6705, "step": 169 }, { "epoch": 0.29737609329446063, "grad_norm": 0.8930680155754089, "learning_rate": 9.883720930232558e-06, "loss": 0.6873, "step": 170 }, { "epoch": 0.2991253644314869, "grad_norm": 0.899061918258667, "learning_rate": 9.94186046511628e-06, "loss": 0.6806, "step": 171 }, { "epoch": 0.3008746355685131, "grad_norm": 0.8692183494567871, "learning_rate": 1e-05, "loss": 0.7058, "step": 172 }, { "epoch": 0.30262390670553935, "grad_norm": 0.8429186344146729, "learning_rate": 9.999989609550734e-06, "loss": 0.6918, "step": 173 }, { "epoch": 0.3043731778425656, "grad_norm": 0.8520380258560181, "learning_rate": 9.999958438246115e-06, "loss": 0.6935, "step": 174 }, { "epoch": 0.30612244897959184, "grad_norm": 0.8542640805244446, "learning_rate": 9.999906486215701e-06, "loss": 0.6973, "step": 175 }, { "epoch": 0.30787172011661806, "grad_norm": 0.8848674297332764, "learning_rate": 9.999833753675413e-06, "loss": 0.6838, "step": 176 }, { "epoch": 0.30962099125364434, "grad_norm": 1.6088979244232178, "learning_rate": 9.999740240927537e-06, "loss": 0.6877, "step": 177 }, { "epoch": 0.31137026239067056, "grad_norm": 0.9393627643585205, "learning_rate": 9.999625948360732e-06, "loss": 0.6895, "step": 178 }, { "epoch": 0.3131195335276968, "grad_norm": 0.8688336610794067, "learning_rate": 9.99949087645002e-06, "loss": 0.6906, "step": 179 }, { "epoch": 0.31486880466472306, "grad_norm": 0.8592703938484192, "learning_rate": 9.99933502575678e-06, "loss": 0.6824, "step": 180 }, { "epoch": 0.3166180758017493, "grad_norm": 1.0506497621536255, "learning_rate": 9.999158396928757e-06, "loss": 0.7014, "step": 181 }, { "epoch": 0.3183673469387755, "grad_norm": 0.8329772353172302, "learning_rate": 9.998960990700055e-06, "loss": 0.7114, "step": 182 }, { "epoch": 0.3201166180758018, "grad_norm": 0.9890666007995605, "learning_rate": 9.998742807891126e-06, "loss": 0.6904, "step": 183 }, { "epoch": 0.321865889212828, "grad_norm": 0.9045460820198059, "learning_rate": 9.998503849408779e-06, "loss": 0.6653, "step": 184 }, { "epoch": 0.3236151603498542, "grad_norm": 0.7993919253349304, "learning_rate": 9.998244116246167e-06, "loss": 0.7051, "step": 185 }, { "epoch": 0.3253644314868805, "grad_norm": 1.0505069494247437, "learning_rate": 9.997963609482788e-06, "loss": 0.6838, "step": 186 }, { "epoch": 0.3271137026239067, "grad_norm": 0.8677232265472412, "learning_rate": 9.99766233028448e-06, "loss": 0.698, "step": 187 }, { "epoch": 0.32886297376093293, "grad_norm": 0.8572655916213989, "learning_rate": 9.997340279903412e-06, "loss": 0.6886, "step": 188 }, { "epoch": 0.3306122448979592, "grad_norm": 1.0147967338562012, "learning_rate": 9.996997459678083e-06, "loss": 0.6934, "step": 189 }, { "epoch": 0.3323615160349854, "grad_norm": 0.8224540948867798, "learning_rate": 9.99663387103332e-06, "loss": 0.6772, "step": 190 }, { "epoch": 0.33411078717201165, "grad_norm": 1.005629301071167, "learning_rate": 9.996249515480255e-06, "loss": 0.6783, "step": 191 }, { "epoch": 0.3358600583090379, "grad_norm": 0.8282907605171204, "learning_rate": 9.995844394616343e-06, "loss": 0.7037, "step": 192 }, { "epoch": 0.33760932944606414, "grad_norm": 1.0158436298370361, "learning_rate": 9.99541851012534e-06, "loss": 0.6686, "step": 193 }, { "epoch": 0.33935860058309036, "grad_norm": 0.9228296279907227, "learning_rate": 9.994971863777299e-06, "loss": 0.7028, "step": 194 }, { "epoch": 0.34110787172011664, "grad_norm": 1.0101940631866455, "learning_rate": 9.994504457428557e-06, "loss": 0.6953, "step": 195 }, { "epoch": 0.34285714285714286, "grad_norm": 0.9893026351928711, "learning_rate": 9.994016293021746e-06, "loss": 0.671, "step": 196 }, { "epoch": 0.3446064139941691, "grad_norm": 0.9287882447242737, "learning_rate": 9.99350737258576e-06, "loss": 0.6727, "step": 197 }, { "epoch": 0.34635568513119536, "grad_norm": 1.1028951406478882, "learning_rate": 9.992977698235765e-06, "loss": 0.6844, "step": 198 }, { "epoch": 0.3481049562682216, "grad_norm": 0.8260549902915955, "learning_rate": 9.992427272173184e-06, "loss": 0.6783, "step": 199 }, { "epoch": 0.3498542274052478, "grad_norm": 1.0094648599624634, "learning_rate": 9.991856096685687e-06, "loss": 0.6821, "step": 200 }, { "epoch": 0.3516034985422741, "grad_norm": 0.8331499695777893, "learning_rate": 9.991264174147177e-06, "loss": 0.6794, "step": 201 }, { "epoch": 0.3533527696793003, "grad_norm": 0.78400719165802, "learning_rate": 9.990651507017796e-06, "loss": 0.6849, "step": 202 }, { "epoch": 0.3551020408163265, "grad_norm": 0.8433126211166382, "learning_rate": 9.990018097843898e-06, "loss": 0.7074, "step": 203 }, { "epoch": 0.3568513119533528, "grad_norm": 0.8061536550521851, "learning_rate": 9.989363949258044e-06, "loss": 0.703, "step": 204 }, { "epoch": 0.358600583090379, "grad_norm": 0.9220350384712219, "learning_rate": 9.988689063978992e-06, "loss": 0.6813, "step": 205 }, { "epoch": 0.36034985422740523, "grad_norm": 0.8930953741073608, "learning_rate": 9.987993444811689e-06, "loss": 0.6853, "step": 206 }, { "epoch": 0.3620991253644315, "grad_norm": 0.8944008946418762, "learning_rate": 9.987277094647252e-06, "loss": 0.6989, "step": 207 }, { "epoch": 0.3638483965014577, "grad_norm": 0.8832034468650818, "learning_rate": 9.98654001646296e-06, "loss": 0.6896, "step": 208 }, { "epoch": 0.36559766763848395, "grad_norm": 0.9339091777801514, "learning_rate": 9.985782213322245e-06, "loss": 0.6823, "step": 209 }, { "epoch": 0.3673469387755102, "grad_norm": 0.8949092030525208, "learning_rate": 9.98500368837467e-06, "loss": 0.6775, "step": 210 }, { "epoch": 0.36909620991253644, "grad_norm": 0.9396309852600098, "learning_rate": 9.984204444855926e-06, "loss": 0.6852, "step": 211 }, { "epoch": 0.37084548104956266, "grad_norm": 0.9027555584907532, "learning_rate": 9.983384486087815e-06, "loss": 0.6867, "step": 212 }, { "epoch": 0.37259475218658894, "grad_norm": 0.7851990461349487, "learning_rate": 9.98254381547823e-06, "loss": 0.6727, "step": 213 }, { "epoch": 0.37434402332361516, "grad_norm": 0.8529061079025269, "learning_rate": 9.981682436521149e-06, "loss": 0.7102, "step": 214 }, { "epoch": 0.3760932944606414, "grad_norm": 0.8795499205589294, "learning_rate": 9.98080035279662e-06, "loss": 0.6886, "step": 215 }, { "epoch": 0.37784256559766766, "grad_norm": 1.1092426776885986, "learning_rate": 9.97989756797074e-06, "loss": 0.6828, "step": 216 }, { "epoch": 0.3795918367346939, "grad_norm": 0.7254016399383545, "learning_rate": 9.978974085795643e-06, "loss": 0.6916, "step": 217 }, { "epoch": 0.3813411078717201, "grad_norm": 0.8924064040184021, "learning_rate": 9.978029910109491e-06, "loss": 0.6802, "step": 218 }, { "epoch": 0.3830903790087464, "grad_norm": 0.9013124108314514, "learning_rate": 9.977065044836446e-06, "loss": 0.675, "step": 219 }, { "epoch": 0.3848396501457726, "grad_norm": 0.8334516286849976, "learning_rate": 9.97607949398666e-06, "loss": 0.6932, "step": 220 }, { "epoch": 0.3865889212827988, "grad_norm": 0.882684588432312, "learning_rate": 9.975073261656262e-06, "loss": 0.6864, "step": 221 }, { "epoch": 0.3883381924198251, "grad_norm": 0.7908568978309631, "learning_rate": 9.974046352027332e-06, "loss": 0.6782, "step": 222 }, { "epoch": 0.3900874635568513, "grad_norm": 0.8941582441329956, "learning_rate": 9.972998769367892e-06, "loss": 0.6797, "step": 223 }, { "epoch": 0.39183673469387753, "grad_norm": 1.090354323387146, "learning_rate": 9.971930518031885e-06, "loss": 0.6711, "step": 224 }, { "epoch": 0.3935860058309038, "grad_norm": 0.9109607338905334, "learning_rate": 9.970841602459153e-06, "loss": 0.7031, "step": 225 }, { "epoch": 0.39533527696793, "grad_norm": 0.799044132232666, "learning_rate": 9.969732027175429e-06, "loss": 0.687, "step": 226 }, { "epoch": 0.39708454810495625, "grad_norm": 0.826241135597229, "learning_rate": 9.968601796792302e-06, "loss": 0.6731, "step": 227 }, { "epoch": 0.3988338192419825, "grad_norm": 0.8070242404937744, "learning_rate": 9.967450916007215e-06, "loss": 0.6709, "step": 228 }, { "epoch": 0.40058309037900874, "grad_norm": 0.8797098398208618, "learning_rate": 9.966279389603437e-06, "loss": 0.6835, "step": 229 }, { "epoch": 0.40233236151603496, "grad_norm": 0.8582132458686829, "learning_rate": 9.965087222450039e-06, "loss": 0.6947, "step": 230 }, { "epoch": 0.40408163265306124, "grad_norm": 0.8197420239448547, "learning_rate": 9.963874419501885e-06, "loss": 0.6905, "step": 231 }, { "epoch": 0.40583090379008746, "grad_norm": 0.8372311592102051, "learning_rate": 9.9626409857996e-06, "loss": 0.6672, "step": 232 }, { "epoch": 0.4075801749271137, "grad_norm": 0.8585025668144226, "learning_rate": 9.961386926469556e-06, "loss": 0.6686, "step": 233 }, { "epoch": 0.40932944606413996, "grad_norm": 0.9147070646286011, "learning_rate": 9.960112246723851e-06, "loss": 0.6834, "step": 234 }, { "epoch": 0.4110787172011662, "grad_norm": 0.8345823287963867, "learning_rate": 9.958816951860282e-06, "loss": 0.6558, "step": 235 }, { "epoch": 0.4128279883381924, "grad_norm": 0.8845197558403015, "learning_rate": 9.957501047262326e-06, "loss": 0.6889, "step": 236 }, { "epoch": 0.4145772594752187, "grad_norm": 0.8187896013259888, "learning_rate": 9.95616453839912e-06, "loss": 0.6977, "step": 237 }, { "epoch": 0.4163265306122449, "grad_norm": 0.8374192714691162, "learning_rate": 9.954807430825435e-06, "loss": 0.6721, "step": 238 }, { "epoch": 0.4180758017492711, "grad_norm": 0.8518050312995911, "learning_rate": 9.953429730181653e-06, "loss": 0.6674, "step": 239 }, { "epoch": 0.4198250728862974, "grad_norm": 0.8168686032295227, "learning_rate": 9.952031442193749e-06, "loss": 0.6569, "step": 240 }, { "epoch": 0.4215743440233236, "grad_norm": 0.9228339195251465, "learning_rate": 9.950612572673255e-06, "loss": 0.6793, "step": 241 }, { "epoch": 0.42332361516034983, "grad_norm": 0.8622779250144958, "learning_rate": 9.949173127517252e-06, "loss": 0.6821, "step": 242 }, { "epoch": 0.4250728862973761, "grad_norm": 0.8492239117622375, "learning_rate": 9.947713112708325e-06, "loss": 0.6881, "step": 243 }, { "epoch": 0.4268221574344023, "grad_norm": 0.9254249930381775, "learning_rate": 9.946232534314565e-06, "loss": 0.6787, "step": 244 }, { "epoch": 0.42857142857142855, "grad_norm": 0.8793076276779175, "learning_rate": 9.944731398489523e-06, "loss": 0.7105, "step": 245 }, { "epoch": 0.4303206997084548, "grad_norm": 0.9064289927482605, "learning_rate": 9.943209711472183e-06, "loss": 0.6749, "step": 246 }, { "epoch": 0.43206997084548104, "grad_norm": 0.8630545139312744, "learning_rate": 9.941667479586953e-06, "loss": 0.6702, "step": 247 }, { "epoch": 0.43381924198250726, "grad_norm": 0.8162756562232971, "learning_rate": 9.940104709243625e-06, "loss": 0.6757, "step": 248 }, { "epoch": 0.43556851311953354, "grad_norm": 0.8749597668647766, "learning_rate": 9.938521406937356e-06, "loss": 0.6927, "step": 249 }, { "epoch": 0.43731778425655976, "grad_norm": 0.8708418011665344, "learning_rate": 9.936917579248632e-06, "loss": 0.6883, "step": 250 }, { "epoch": 0.439067055393586, "grad_norm": 0.786575973033905, "learning_rate": 9.93529323284325e-06, "loss": 0.6601, "step": 251 }, { "epoch": 0.44081632653061226, "grad_norm": 0.8405781388282776, "learning_rate": 9.933648374472287e-06, "loss": 0.7026, "step": 252 }, { "epoch": 0.4425655976676385, "grad_norm": 0.8714183568954468, "learning_rate": 9.931983010972066e-06, "loss": 0.6742, "step": 253 }, { "epoch": 0.4443148688046647, "grad_norm": 0.811898946762085, "learning_rate": 9.930297149264141e-06, "loss": 0.6896, "step": 254 }, { "epoch": 0.446064139941691, "grad_norm": 0.8517475128173828, "learning_rate": 9.928590796355255e-06, "loss": 0.6664, "step": 255 }, { "epoch": 0.4478134110787172, "grad_norm": 0.9151557087898254, "learning_rate": 9.926863959337317e-06, "loss": 0.6934, "step": 256 }, { "epoch": 0.4495626822157434, "grad_norm": 0.8543063998222351, "learning_rate": 9.925116645387372e-06, "loss": 0.6727, "step": 257 }, { "epoch": 0.4513119533527697, "grad_norm": 0.8705698847770691, "learning_rate": 9.923348861767572e-06, "loss": 0.6806, "step": 258 }, { "epoch": 0.4530612244897959, "grad_norm": 0.8583109974861145, "learning_rate": 9.92156061582514e-06, "loss": 0.6873, "step": 259 }, { "epoch": 0.45481049562682213, "grad_norm": 0.938755989074707, "learning_rate": 9.919751914992351e-06, "loss": 0.7035, "step": 260 }, { "epoch": 0.4565597667638484, "grad_norm": 1.0100270509719849, "learning_rate": 9.91792276678649e-06, "loss": 0.7005, "step": 261 }, { "epoch": 0.4583090379008746, "grad_norm": 0.9074779748916626, "learning_rate": 9.916073178809823e-06, "loss": 0.6788, "step": 262 }, { "epoch": 0.46005830903790085, "grad_norm": 0.9534067511558533, "learning_rate": 9.914203158749576e-06, "loss": 0.6761, "step": 263 }, { "epoch": 0.4618075801749271, "grad_norm": 0.8688150644302368, "learning_rate": 9.91231271437788e-06, "loss": 0.6748, "step": 264 }, { "epoch": 0.46355685131195334, "grad_norm": 0.8816513419151306, "learning_rate": 9.91040185355177e-06, "loss": 0.6798, "step": 265 }, { "epoch": 0.46530612244897956, "grad_norm": 1.074083685874939, "learning_rate": 9.908470584213121e-06, "loss": 0.6677, "step": 266 }, { "epoch": 0.46705539358600584, "grad_norm": 1.0394014120101929, "learning_rate": 9.906518914388638e-06, "loss": 0.6933, "step": 267 }, { "epoch": 0.46880466472303206, "grad_norm": 1.035010814666748, "learning_rate": 9.904546852189813e-06, "loss": 0.6626, "step": 268 }, { "epoch": 0.47055393586005834, "grad_norm": 1.2476211786270142, "learning_rate": 9.902554405812887e-06, "loss": 0.6788, "step": 269 }, { "epoch": 0.47230320699708456, "grad_norm": 1.0049374103546143, "learning_rate": 9.900541583538827e-06, "loss": 0.6453, "step": 270 }, { "epoch": 0.4740524781341108, "grad_norm": 1.1121054887771606, "learning_rate": 9.898508393733284e-06, "loss": 0.6898, "step": 271 }, { "epoch": 0.47580174927113705, "grad_norm": 1.065759539604187, "learning_rate": 9.896454844846562e-06, "loss": 0.6605, "step": 272 }, { "epoch": 0.4775510204081633, "grad_norm": 0.9293599128723145, "learning_rate": 9.894380945413576e-06, "loss": 0.6911, "step": 273 }, { "epoch": 0.4793002915451895, "grad_norm": 0.879508376121521, "learning_rate": 9.892286704053828e-06, "loss": 0.6966, "step": 274 }, { "epoch": 0.48104956268221577, "grad_norm": 0.874912679195404, "learning_rate": 9.890172129471358e-06, "loss": 0.6635, "step": 275 }, { "epoch": 0.482798833819242, "grad_norm": 0.9670069813728333, "learning_rate": 9.888037230454721e-06, "loss": 0.674, "step": 276 }, { "epoch": 0.4845481049562682, "grad_norm": 0.7746601700782776, "learning_rate": 9.88588201587694e-06, "loss": 0.65, "step": 277 }, { "epoch": 0.4862973760932945, "grad_norm": 0.8897718787193298, "learning_rate": 9.883706494695472e-06, "loss": 0.6771, "step": 278 }, { "epoch": 0.4880466472303207, "grad_norm": 0.9518530368804932, "learning_rate": 9.88151067595218e-06, "loss": 0.6751, "step": 279 }, { "epoch": 0.4897959183673469, "grad_norm": 0.9824015498161316, "learning_rate": 9.879294568773275e-06, "loss": 0.6783, "step": 280 }, { "epoch": 0.4915451895043732, "grad_norm": 0.8532746434211731, "learning_rate": 9.877058182369296e-06, "loss": 0.6907, "step": 281 }, { "epoch": 0.4932944606413994, "grad_norm": 1.0221869945526123, "learning_rate": 9.874801526035074e-06, "loss": 0.6862, "step": 282 }, { "epoch": 0.49504373177842564, "grad_norm": 0.8417726755142212, "learning_rate": 9.872524609149671e-06, "loss": 0.6957, "step": 283 }, { "epoch": 0.4967930029154519, "grad_norm": 0.8594486713409424, "learning_rate": 9.870227441176367e-06, "loss": 0.6699, "step": 284 }, { "epoch": 0.49854227405247814, "grad_norm": 0.8736686706542969, "learning_rate": 9.867910031662603e-06, "loss": 0.6844, "step": 285 }, { "epoch": 0.5002915451895044, "grad_norm": 0.7984974384307861, "learning_rate": 9.86557239023995e-06, "loss": 0.6901, "step": 286 }, { "epoch": 0.5020408163265306, "grad_norm": 0.7743108868598938, "learning_rate": 9.863214526624065e-06, "loss": 0.6769, "step": 287 }, { "epoch": 0.5037900874635568, "grad_norm": 0.9301120042800903, "learning_rate": 9.860836450614656e-06, "loss": 0.6609, "step": 288 }, { "epoch": 0.5055393586005831, "grad_norm": 0.7831388115882874, "learning_rate": 9.85843817209543e-06, "loss": 0.6786, "step": 289 }, { "epoch": 0.5072886297376094, "grad_norm": 0.8193007111549377, "learning_rate": 9.856019701034066e-06, "loss": 0.6616, "step": 290 }, { "epoch": 0.5090379008746355, "grad_norm": 0.9227758049964905, "learning_rate": 9.853581047482165e-06, "loss": 0.6882, "step": 291 }, { "epoch": 0.5107871720116618, "grad_norm": 0.7908544540405273, "learning_rate": 9.851122221575206e-06, "loss": 0.6461, "step": 292 }, { "epoch": 0.5125364431486881, "grad_norm": 0.819151759147644, "learning_rate": 9.848643233532515e-06, "loss": 0.6728, "step": 293 }, { "epoch": 0.5142857142857142, "grad_norm": 0.8731646537780762, "learning_rate": 9.846144093657211e-06, "loss": 0.6679, "step": 294 }, { "epoch": 0.5160349854227405, "grad_norm": 0.7846607565879822, "learning_rate": 9.843624812336166e-06, "loss": 0.669, "step": 295 }, { "epoch": 0.5177842565597668, "grad_norm": 0.7345834374427795, "learning_rate": 9.84108540003997e-06, "loss": 0.6584, "step": 296 }, { "epoch": 0.519533527696793, "grad_norm": 0.8330621719360352, "learning_rate": 9.838525867322872e-06, "loss": 0.6791, "step": 297 }, { "epoch": 0.5212827988338192, "grad_norm": 0.8131314516067505, "learning_rate": 9.835946224822754e-06, "loss": 0.6736, "step": 298 }, { "epoch": 0.5230320699708455, "grad_norm": 0.7916424870491028, "learning_rate": 9.833346483261072e-06, "loss": 0.6724, "step": 299 }, { "epoch": 0.5247813411078717, "grad_norm": 0.8307373523712158, "learning_rate": 9.830726653442818e-06, "loss": 0.6631, "step": 300 }, { "epoch": 0.5265306122448979, "grad_norm": 0.7734163403511047, "learning_rate": 9.82808674625648e-06, "loss": 0.6474, "step": 301 }, { "epoch": 0.5282798833819242, "grad_norm": 0.7823531031608582, "learning_rate": 9.825426772673983e-06, "loss": 0.6889, "step": 302 }, { "epoch": 0.5300291545189504, "grad_norm": 0.8301039934158325, "learning_rate": 9.822746743750655e-06, "loss": 0.6764, "step": 303 }, { "epoch": 0.5317784256559767, "grad_norm": 0.8198210000991821, "learning_rate": 9.820046670625178e-06, "loss": 0.6948, "step": 304 }, { "epoch": 0.5335276967930029, "grad_norm": 0.7254811525344849, "learning_rate": 9.817326564519542e-06, "loss": 0.698, "step": 305 }, { "epoch": 0.5352769679300291, "grad_norm": 0.8038996458053589, "learning_rate": 9.814586436738998e-06, "loss": 0.6727, "step": 306 }, { "epoch": 0.5370262390670554, "grad_norm": 0.7741175889968872, "learning_rate": 9.811826298672007e-06, "loss": 0.6543, "step": 307 }, { "epoch": 0.5387755102040817, "grad_norm": 0.8718826174736023, "learning_rate": 9.8090461617902e-06, "loss": 0.6582, "step": 308 }, { "epoch": 0.5405247813411078, "grad_norm": 0.9195080995559692, "learning_rate": 9.806246037648324e-06, "loss": 0.659, "step": 309 }, { "epoch": 0.5422740524781341, "grad_norm": 0.6996453404426575, "learning_rate": 9.803425937884202e-06, "loss": 0.6628, "step": 310 }, { "epoch": 0.5440233236151604, "grad_norm": 0.9271142482757568, "learning_rate": 9.800585874218671e-06, "loss": 0.6719, "step": 311 }, { "epoch": 0.5457725947521865, "grad_norm": 0.8899317383766174, "learning_rate": 9.797725858455549e-06, "loss": 0.6591, "step": 312 }, { "epoch": 0.5475218658892128, "grad_norm": 0.8132035136222839, "learning_rate": 9.794845902481575e-06, "loss": 0.6764, "step": 313 }, { "epoch": 0.5492711370262391, "grad_norm": 0.9520671963691711, "learning_rate": 9.791946018266363e-06, "loss": 0.6704, "step": 314 }, { "epoch": 0.5510204081632653, "grad_norm": 0.8516552448272705, "learning_rate": 9.789026217862351e-06, "loss": 0.6762, "step": 315 }, { "epoch": 0.5527696793002915, "grad_norm": 0.8404406309127808, "learning_rate": 9.786086513404758e-06, "loss": 0.6574, "step": 316 }, { "epoch": 0.5545189504373178, "grad_norm": 0.8160989880561829, "learning_rate": 9.783126917111521e-06, "loss": 0.6734, "step": 317 }, { "epoch": 0.556268221574344, "grad_norm": 0.864995002746582, "learning_rate": 9.780147441283256e-06, "loss": 0.6634, "step": 318 }, { "epoch": 0.5580174927113702, "grad_norm": 0.8282278180122375, "learning_rate": 9.777148098303198e-06, "loss": 0.6771, "step": 319 }, { "epoch": 0.5597667638483965, "grad_norm": 0.8815659284591675, "learning_rate": 9.774128900637156e-06, "loss": 0.6766, "step": 320 }, { "epoch": 0.5615160349854227, "grad_norm": 0.996401309967041, "learning_rate": 9.77108986083346e-06, "loss": 0.6626, "step": 321 }, { "epoch": 0.563265306122449, "grad_norm": 0.703777015209198, "learning_rate": 9.768030991522906e-06, "loss": 0.6498, "step": 322 }, { "epoch": 0.5650145772594752, "grad_norm": 1.011526346206665, "learning_rate": 9.764952305418701e-06, "loss": 0.6551, "step": 323 }, { "epoch": 0.5667638483965015, "grad_norm": 0.8401075601577759, "learning_rate": 9.761853815316417e-06, "loss": 0.6942, "step": 324 }, { "epoch": 0.5685131195335277, "grad_norm": 0.7371053099632263, "learning_rate": 9.75873553409394e-06, "loss": 0.6615, "step": 325 }, { "epoch": 0.570262390670554, "grad_norm": 0.8786805272102356, "learning_rate": 9.755597474711406e-06, "loss": 0.669, "step": 326 }, { "epoch": 0.5720116618075802, "grad_norm": 0.8196384906768799, "learning_rate": 9.752439650211154e-06, "loss": 0.6727, "step": 327 }, { "epoch": 0.5737609329446064, "grad_norm": 0.9674431681632996, "learning_rate": 9.749262073717666e-06, "loss": 0.6816, "step": 328 }, { "epoch": 0.5755102040816327, "grad_norm": 0.8873831033706665, "learning_rate": 9.746064758437527e-06, "loss": 0.6745, "step": 329 }, { "epoch": 0.577259475218659, "grad_norm": 0.8475132584571838, "learning_rate": 9.74284771765935e-06, "loss": 0.6793, "step": 330 }, { "epoch": 0.5790087463556851, "grad_norm": 0.9969664216041565, "learning_rate": 9.739610964753735e-06, "loss": 0.683, "step": 331 }, { "epoch": 0.5807580174927114, "grad_norm": 0.8074207901954651, "learning_rate": 9.73635451317321e-06, "loss": 0.6745, "step": 332 }, { "epoch": 0.5825072886297377, "grad_norm": 1.0764408111572266, "learning_rate": 9.733078376452172e-06, "loss": 0.6846, "step": 333 }, { "epoch": 0.5842565597667638, "grad_norm": 0.7739421725273132, "learning_rate": 9.729782568206833e-06, "loss": 0.6703, "step": 334 }, { "epoch": 0.5860058309037901, "grad_norm": 0.8167464733123779, "learning_rate": 9.726467102135167e-06, "loss": 0.6561, "step": 335 }, { "epoch": 0.5877551020408164, "grad_norm": 0.983731746673584, "learning_rate": 9.723131992016846e-06, "loss": 0.669, "step": 336 }, { "epoch": 0.5895043731778425, "grad_norm": 0.8090880513191223, "learning_rate": 9.719777251713184e-06, "loss": 0.6498, "step": 337 }, { "epoch": 0.5912536443148688, "grad_norm": 0.8917976021766663, "learning_rate": 9.716402895167088e-06, "loss": 0.6398, "step": 338 }, { "epoch": 0.5930029154518951, "grad_norm": 0.9440398216247559, "learning_rate": 9.71300893640299e-06, "loss": 0.6608, "step": 339 }, { "epoch": 0.5947521865889213, "grad_norm": 0.870690643787384, "learning_rate": 9.709595389526792e-06, "loss": 0.6752, "step": 340 }, { "epoch": 0.5965014577259475, "grad_norm": 1.051248550415039, "learning_rate": 9.706162268725807e-06, "loss": 0.6701, "step": 341 }, { "epoch": 0.5982507288629738, "grad_norm": 0.7961276173591614, "learning_rate": 9.702709588268702e-06, "loss": 0.6797, "step": 342 }, { "epoch": 0.6, "grad_norm": 0.9224449992179871, "learning_rate": 9.699237362505439e-06, "loss": 0.6568, "step": 343 }, { "epoch": 0.6017492711370263, "grad_norm": 0.8638266921043396, "learning_rate": 9.695745605867213e-06, "loss": 0.6419, "step": 344 }, { "epoch": 0.6034985422740525, "grad_norm": 0.8175105452537537, "learning_rate": 9.692234332866387e-06, "loss": 0.6732, "step": 345 }, { "epoch": 0.6052478134110787, "grad_norm": 0.9727689623832703, "learning_rate": 9.68870355809645e-06, "loss": 0.6555, "step": 346 }, { "epoch": 0.606997084548105, "grad_norm": 0.8340580463409424, "learning_rate": 9.685153296231933e-06, "loss": 0.6848, "step": 347 }, { "epoch": 0.6087463556851312, "grad_norm": 0.824990451335907, "learning_rate": 9.68158356202836e-06, "loss": 0.6481, "step": 348 }, { "epoch": 0.6104956268221574, "grad_norm": 0.9843393564224243, "learning_rate": 9.67799437032219e-06, "loss": 0.6619, "step": 349 }, { "epoch": 0.6122448979591837, "grad_norm": 0.8069766759872437, "learning_rate": 9.67438573603075e-06, "loss": 0.6791, "step": 350 }, { "epoch": 0.61399416909621, "grad_norm": 0.9691466093063354, "learning_rate": 9.67075767415217e-06, "loss": 0.6563, "step": 351 }, { "epoch": 0.6157434402332361, "grad_norm": 0.8698590397834778, "learning_rate": 9.667110199765331e-06, "loss": 0.6753, "step": 352 }, { "epoch": 0.6174927113702624, "grad_norm": 0.8047291040420532, "learning_rate": 9.663443328029786e-06, "loss": 0.6894, "step": 353 }, { "epoch": 0.6192419825072887, "grad_norm": 0.7468858957290649, "learning_rate": 9.659757074185718e-06, "loss": 0.6846, "step": 354 }, { "epoch": 0.6209912536443148, "grad_norm": 0.7936758399009705, "learning_rate": 9.65605145355386e-06, "loss": 0.6487, "step": 355 }, { "epoch": 0.6227405247813411, "grad_norm": 0.8228779435157776, "learning_rate": 9.652326481535434e-06, "loss": 0.6546, "step": 356 }, { "epoch": 0.6244897959183674, "grad_norm": 0.8214784860610962, "learning_rate": 9.648582173612095e-06, "loss": 0.6671, "step": 357 }, { "epoch": 0.6262390670553936, "grad_norm": 0.9324108958244324, "learning_rate": 9.644818545345862e-06, "loss": 0.6528, "step": 358 }, { "epoch": 0.6279883381924198, "grad_norm": 0.8310825228691101, "learning_rate": 9.641035612379046e-06, "loss": 0.6534, "step": 359 }, { "epoch": 0.6297376093294461, "grad_norm": 0.811160147190094, "learning_rate": 9.6372333904342e-06, "loss": 0.6809, "step": 360 }, { "epoch": 0.6314868804664723, "grad_norm": 0.7270039916038513, "learning_rate": 9.63341189531404e-06, "loss": 0.6595, "step": 361 }, { "epoch": 0.6332361516034986, "grad_norm": 0.8658615946769714, "learning_rate": 9.629571142901384e-06, "loss": 0.6449, "step": 362 }, { "epoch": 0.6349854227405248, "grad_norm": 0.8899701237678528, "learning_rate": 9.625711149159095e-06, "loss": 0.6599, "step": 363 }, { "epoch": 0.636734693877551, "grad_norm": 1.015919804573059, "learning_rate": 9.621831930129996e-06, "loss": 0.6645, "step": 364 }, { "epoch": 0.6384839650145773, "grad_norm": 0.7742146253585815, "learning_rate": 9.61793350193682e-06, "loss": 0.6584, "step": 365 }, { "epoch": 0.6402332361516035, "grad_norm": 0.8947616815567017, "learning_rate": 9.614015880782136e-06, "loss": 0.6653, "step": 366 }, { "epoch": 0.6419825072886297, "grad_norm": 0.9182134866714478, "learning_rate": 9.610079082948279e-06, "loss": 0.6807, "step": 367 }, { "epoch": 0.643731778425656, "grad_norm": 0.7240360975265503, "learning_rate": 9.606123124797292e-06, "loss": 0.6645, "step": 368 }, { "epoch": 0.6454810495626823, "grad_norm": 0.8014088869094849, "learning_rate": 9.602148022770843e-06, "loss": 0.6595, "step": 369 }, { "epoch": 0.6472303206997084, "grad_norm": 0.8485872745513916, "learning_rate": 9.598153793390175e-06, "loss": 0.6701, "step": 370 }, { "epoch": 0.6489795918367347, "grad_norm": 0.8253888487815857, "learning_rate": 9.594140453256022e-06, "loss": 0.6579, "step": 371 }, { "epoch": 0.650728862973761, "grad_norm": 0.8392235040664673, "learning_rate": 9.590108019048545e-06, "loss": 0.6626, "step": 372 }, { "epoch": 0.6524781341107871, "grad_norm": 0.870360791683197, "learning_rate": 9.586056507527266e-06, "loss": 0.6518, "step": 373 }, { "epoch": 0.6542274052478134, "grad_norm": 0.7567829489707947, "learning_rate": 9.581985935530995e-06, "loss": 0.6733, "step": 374 }, { "epoch": 0.6559766763848397, "grad_norm": 0.7789931893348694, "learning_rate": 9.577896319977762e-06, "loss": 0.6764, "step": 375 }, { "epoch": 0.6577259475218659, "grad_norm": 0.8274850249290466, "learning_rate": 9.573787677864743e-06, "loss": 0.6574, "step": 376 }, { "epoch": 0.6594752186588921, "grad_norm": 0.9464408159255981, "learning_rate": 9.569660026268194e-06, "loss": 0.6572, "step": 377 }, { "epoch": 0.6612244897959184, "grad_norm": 0.8591985106468201, "learning_rate": 9.565513382343375e-06, "loss": 0.6673, "step": 378 }, { "epoch": 0.6629737609329446, "grad_norm": 0.9289495348930359, "learning_rate": 9.561347763324484e-06, "loss": 0.6734, "step": 379 }, { "epoch": 0.6647230320699709, "grad_norm": 0.9167383909225464, "learning_rate": 9.557163186524584e-06, "loss": 0.6618, "step": 380 }, { "epoch": 0.6664723032069971, "grad_norm": 0.7628527283668518, "learning_rate": 9.552959669335526e-06, "loss": 0.6566, "step": 381 }, { "epoch": 0.6682215743440233, "grad_norm": 1.1895248889923096, "learning_rate": 9.548737229227883e-06, "loss": 0.673, "step": 382 }, { "epoch": 0.6699708454810496, "grad_norm": 0.8770232200622559, "learning_rate": 9.544495883750876e-06, "loss": 0.6836, "step": 383 }, { "epoch": 0.6717201166180758, "grad_norm": 0.9356043338775635, "learning_rate": 9.5402356505323e-06, "loss": 0.6491, "step": 384 }, { "epoch": 0.673469387755102, "grad_norm": 0.9447104334831238, "learning_rate": 9.535956547278444e-06, "loss": 0.6759, "step": 385 }, { "epoch": 0.6752186588921283, "grad_norm": 0.7375609874725342, "learning_rate": 9.531658591774038e-06, "loss": 0.6653, "step": 386 }, { "epoch": 0.6769679300291546, "grad_norm": 0.8895502686500549, "learning_rate": 9.527341801882152e-06, "loss": 0.6692, "step": 387 }, { "epoch": 0.6787172011661807, "grad_norm": 0.8159501552581787, "learning_rate": 9.523006195544144e-06, "loss": 0.6548, "step": 388 }, { "epoch": 0.680466472303207, "grad_norm": 0.8679546117782593, "learning_rate": 9.518651790779572e-06, "loss": 0.6644, "step": 389 }, { "epoch": 0.6822157434402333, "grad_norm": 0.7407789826393127, "learning_rate": 9.514278605686123e-06, "loss": 0.6541, "step": 390 }, { "epoch": 0.6839650145772594, "grad_norm": 0.7654983401298523, "learning_rate": 9.50988665843954e-06, "loss": 0.6414, "step": 391 }, { "epoch": 0.6857142857142857, "grad_norm": 0.8171312808990479, "learning_rate": 9.505475967293549e-06, "loss": 0.6581, "step": 392 }, { "epoch": 0.687463556851312, "grad_norm": 0.8206588625907898, "learning_rate": 9.501046550579771e-06, "loss": 0.6721, "step": 393 }, { "epoch": 0.6892128279883382, "grad_norm": 0.8300743103027344, "learning_rate": 9.49659842670766e-06, "loss": 0.652, "step": 394 }, { "epoch": 0.6909620991253644, "grad_norm": 0.8924394249916077, "learning_rate": 9.492131614164417e-06, "loss": 0.6653, "step": 395 }, { "epoch": 0.6927113702623907, "grad_norm": 0.7794016599655151, "learning_rate": 9.487646131514917e-06, "loss": 0.6596, "step": 396 }, { "epoch": 0.6944606413994169, "grad_norm": 0.832097053527832, "learning_rate": 9.483141997401636e-06, "loss": 0.6509, "step": 397 }, { "epoch": 0.6962099125364432, "grad_norm": 0.7697159647941589, "learning_rate": 9.478619230544558e-06, "loss": 0.6662, "step": 398 }, { "epoch": 0.6979591836734694, "grad_norm": 0.8066057562828064, "learning_rate": 9.474077849741122e-06, "loss": 0.66, "step": 399 }, { "epoch": 0.6997084548104956, "grad_norm": 0.757810115814209, "learning_rate": 9.469517873866119e-06, "loss": 0.6515, "step": 400 }, { "epoch": 0.7014577259475219, "grad_norm": 0.8145730495452881, "learning_rate": 9.464939321871627e-06, "loss": 0.6402, "step": 401 }, { "epoch": 0.7032069970845481, "grad_norm": 0.7861849069595337, "learning_rate": 9.460342212786933e-06, "loss": 0.6477, "step": 402 }, { "epoch": 0.7049562682215743, "grad_norm": 0.8399086594581604, "learning_rate": 9.455726565718448e-06, "loss": 0.6438, "step": 403 }, { "epoch": 0.7067055393586006, "grad_norm": 0.7854669690132141, "learning_rate": 9.451092399849633e-06, "loss": 0.6773, "step": 404 }, { "epoch": 0.7084548104956269, "grad_norm": 0.7263845205307007, "learning_rate": 9.446439734440908e-06, "loss": 0.6566, "step": 405 }, { "epoch": 0.710204081632653, "grad_norm": 0.8101020455360413, "learning_rate": 9.441768588829592e-06, "loss": 0.6714, "step": 406 }, { "epoch": 0.7119533527696793, "grad_norm": 0.7310351133346558, "learning_rate": 9.437078982429805e-06, "loss": 0.6491, "step": 407 }, { "epoch": 0.7137026239067056, "grad_norm": 0.7907615900039673, "learning_rate": 9.432370934732392e-06, "loss": 0.6595, "step": 408 }, { "epoch": 0.7154518950437317, "grad_norm": 0.8099790811538696, "learning_rate": 9.427644465304845e-06, "loss": 0.6221, "step": 409 }, { "epoch": 0.717201166180758, "grad_norm": 0.8568946719169617, "learning_rate": 9.422899593791224e-06, "loss": 0.6736, "step": 410 }, { "epoch": 0.7189504373177843, "grad_norm": 0.858045220375061, "learning_rate": 9.418136339912064e-06, "loss": 0.656, "step": 411 }, { "epoch": 0.7206997084548105, "grad_norm": 0.7773323059082031, "learning_rate": 9.413354723464306e-06, "loss": 0.6511, "step": 412 }, { "epoch": 0.7224489795918367, "grad_norm": 0.7657396197319031, "learning_rate": 9.408554764321206e-06, "loss": 0.6526, "step": 413 }, { "epoch": 0.724198250728863, "grad_norm": 0.786668062210083, "learning_rate": 9.403736482432258e-06, "loss": 0.6647, "step": 414 }, { "epoch": 0.7259475218658892, "grad_norm": 0.8126866817474365, "learning_rate": 9.398899897823104e-06, "loss": 0.6593, "step": 415 }, { "epoch": 0.7276967930029155, "grad_norm": 0.8205054998397827, "learning_rate": 9.394045030595465e-06, "loss": 0.6558, "step": 416 }, { "epoch": 0.7294460641399417, "grad_norm": 0.782646894454956, "learning_rate": 9.389171900927037e-06, "loss": 0.6671, "step": 417 }, { "epoch": 0.7311953352769679, "grad_norm": 0.8177270889282227, "learning_rate": 9.384280529071424e-06, "loss": 0.6549, "step": 418 }, { "epoch": 0.7329446064139942, "grad_norm": 0.8253909349441528, "learning_rate": 9.379370935358046e-06, "loss": 0.651, "step": 419 }, { "epoch": 0.7346938775510204, "grad_norm": 0.7803795337677002, "learning_rate": 9.374443140192058e-06, "loss": 0.6549, "step": 420 }, { "epoch": 0.7364431486880466, "grad_norm": 0.8445982933044434, "learning_rate": 9.36949716405426e-06, "loss": 0.707, "step": 421 }, { "epoch": 0.7381924198250729, "grad_norm": 0.8282182812690735, "learning_rate": 9.364533027501019e-06, "loss": 0.648, "step": 422 }, { "epoch": 0.7399416909620992, "grad_norm": 0.8399822115898132, "learning_rate": 9.35955075116418e-06, "loss": 0.6767, "step": 423 }, { "epoch": 0.7416909620991253, "grad_norm": 0.8803613781929016, "learning_rate": 9.354550355750978e-06, "loss": 0.6699, "step": 424 }, { "epoch": 0.7434402332361516, "grad_norm": 0.9375181794166565, "learning_rate": 9.349531862043952e-06, "loss": 0.672, "step": 425 }, { "epoch": 0.7451895043731779, "grad_norm": 0.8111773133277893, "learning_rate": 9.344495290900868e-06, "loss": 0.6525, "step": 426 }, { "epoch": 0.746938775510204, "grad_norm": 0.7650731801986694, "learning_rate": 9.339440663254618e-06, "loss": 0.6585, "step": 427 }, { "epoch": 0.7486880466472303, "grad_norm": 0.8151988387107849, "learning_rate": 9.334368000113142e-06, "loss": 0.6599, "step": 428 }, { "epoch": 0.7504373177842566, "grad_norm": 0.9552065134048462, "learning_rate": 9.329277322559344e-06, "loss": 0.6601, "step": 429 }, { "epoch": 0.7521865889212828, "grad_norm": 0.9680750966072083, "learning_rate": 9.324168651750992e-06, "loss": 0.6308, "step": 430 }, { "epoch": 0.753935860058309, "grad_norm": 0.7538974285125732, "learning_rate": 9.31904200892064e-06, "loss": 0.6508, "step": 431 }, { "epoch": 0.7556851311953353, "grad_norm": 0.9529233574867249, "learning_rate": 9.313897415375534e-06, "loss": 0.6669, "step": 432 }, { "epoch": 0.7574344023323615, "grad_norm": 0.8785517811775208, "learning_rate": 9.308734892497535e-06, "loss": 0.6551, "step": 433 }, { "epoch": 0.7591836734693878, "grad_norm": 0.8518381118774414, "learning_rate": 9.303554461743012e-06, "loss": 0.6575, "step": 434 }, { "epoch": 0.760932944606414, "grad_norm": 0.8673778772354126, "learning_rate": 9.298356144642768e-06, "loss": 0.6731, "step": 435 }, { "epoch": 0.7626822157434402, "grad_norm": 0.9003520011901855, "learning_rate": 9.293139962801942e-06, "loss": 0.6646, "step": 436 }, { "epoch": 0.7644314868804665, "grad_norm": 0.8027007579803467, "learning_rate": 9.287905937899923e-06, "loss": 0.6518, "step": 437 }, { "epoch": 0.7661807580174927, "grad_norm": 0.8065455555915833, "learning_rate": 9.28265409169026e-06, "loss": 0.6507, "step": 438 }, { "epoch": 0.7679300291545189, "grad_norm": 0.8338696956634521, "learning_rate": 9.277384446000568e-06, "loss": 0.6676, "step": 439 }, { "epoch": 0.7696793002915452, "grad_norm": 0.8211595416069031, "learning_rate": 9.272097022732444e-06, "loss": 0.67, "step": 440 }, { "epoch": 0.7714285714285715, "grad_norm": 0.8294822573661804, "learning_rate": 9.266791843861367e-06, "loss": 0.6737, "step": 441 }, { "epoch": 0.7731778425655976, "grad_norm": 0.7694903612136841, "learning_rate": 9.261468931436614e-06, "loss": 0.6539, "step": 442 }, { "epoch": 0.7749271137026239, "grad_norm": 0.7702130675315857, "learning_rate": 9.256128307581168e-06, "loss": 0.6507, "step": 443 }, { "epoch": 0.7766763848396502, "grad_norm": 0.7343398332595825, "learning_rate": 9.250769994491617e-06, "loss": 0.6572, "step": 444 }, { "epoch": 0.7784256559766763, "grad_norm": 0.7784836292266846, "learning_rate": 9.245394014438078e-06, "loss": 0.6388, "step": 445 }, { "epoch": 0.7801749271137026, "grad_norm": 0.8293264508247375, "learning_rate": 9.240000389764087e-06, "loss": 0.6519, "step": 446 }, { "epoch": 0.7819241982507289, "grad_norm": 0.7003908753395081, "learning_rate": 9.23458914288652e-06, "loss": 0.6416, "step": 447 }, { "epoch": 0.7836734693877551, "grad_norm": 0.949068009853363, "learning_rate": 9.229160296295488e-06, "loss": 0.6638, "step": 448 }, { "epoch": 0.7854227405247813, "grad_norm": 0.9237349033355713, "learning_rate": 9.223713872554255e-06, "loss": 0.6763, "step": 449 }, { "epoch": 0.7871720116618076, "grad_norm": 0.9964410662651062, "learning_rate": 9.218249894299139e-06, "loss": 0.6642, "step": 450 }, { "epoch": 0.7889212827988338, "grad_norm": 0.806239902973175, "learning_rate": 9.21276838423941e-06, "loss": 0.6622, "step": 451 }, { "epoch": 0.79067055393586, "grad_norm": 0.8152488470077515, "learning_rate": 9.207269365157214e-06, "loss": 0.6599, "step": 452 }, { "epoch": 0.7924198250728863, "grad_norm": 0.8113889098167419, "learning_rate": 9.201752859907461e-06, "loss": 0.665, "step": 453 }, { "epoch": 0.7941690962099125, "grad_norm": 0.8020102977752686, "learning_rate": 9.196218891417737e-06, "loss": 0.6368, "step": 454 }, { "epoch": 0.7959183673469388, "grad_norm": 0.7559289336204529, "learning_rate": 9.190667482688211e-06, "loss": 0.6663, "step": 455 }, { "epoch": 0.797667638483965, "grad_norm": 0.7916702628135681, "learning_rate": 9.185098656791535e-06, "loss": 0.6622, "step": 456 }, { "epoch": 0.7994169096209912, "grad_norm": 0.9270349144935608, "learning_rate": 9.17951243687275e-06, "loss": 0.6667, "step": 457 }, { "epoch": 0.8011661807580175, "grad_norm": 0.83078533411026, "learning_rate": 9.17390884614919e-06, "loss": 0.6821, "step": 458 }, { "epoch": 0.8029154518950438, "grad_norm": 0.8263804316520691, "learning_rate": 9.168287907910382e-06, "loss": 0.6371, "step": 459 }, { "epoch": 0.8046647230320699, "grad_norm": 0.8648985624313354, "learning_rate": 9.162649645517961e-06, "loss": 0.6604, "step": 460 }, { "epoch": 0.8064139941690962, "grad_norm": 0.8317275047302246, "learning_rate": 9.156994082405556e-06, "loss": 0.6808, "step": 461 }, { "epoch": 0.8081632653061225, "grad_norm": 0.9024345874786377, "learning_rate": 9.151321242078703e-06, "loss": 0.6457, "step": 462 }, { "epoch": 0.8099125364431486, "grad_norm": 0.8285801410675049, "learning_rate": 9.145631148114747e-06, "loss": 0.655, "step": 463 }, { "epoch": 0.8116618075801749, "grad_norm": 0.765201985836029, "learning_rate": 9.139923824162739e-06, "loss": 0.6518, "step": 464 }, { "epoch": 0.8134110787172012, "grad_norm": 1.0028012990951538, "learning_rate": 9.134199293943347e-06, "loss": 0.6662, "step": 465 }, { "epoch": 0.8151603498542274, "grad_norm": 0.9230080842971802, "learning_rate": 9.128457581248741e-06, "loss": 0.6785, "step": 466 }, { "epoch": 0.8169096209912536, "grad_norm": 0.7785151600837708, "learning_rate": 9.12269870994252e-06, "loss": 0.6514, "step": 467 }, { "epoch": 0.8186588921282799, "grad_norm": 0.824826180934906, "learning_rate": 9.116922703959578e-06, "loss": 0.6487, "step": 468 }, { "epoch": 0.8204081632653061, "grad_norm": 0.8065788149833679, "learning_rate": 9.11112958730604e-06, "loss": 0.6359, "step": 469 }, { "epoch": 0.8221574344023324, "grad_norm": 0.8150350451469421, "learning_rate": 9.105319384059139e-06, "loss": 0.6404, "step": 470 }, { "epoch": 0.8239067055393586, "grad_norm": 0.861851692199707, "learning_rate": 9.099492118367123e-06, "loss": 0.6464, "step": 471 }, { "epoch": 0.8256559766763848, "grad_norm": 0.958235502243042, "learning_rate": 9.093647814449158e-06, "loss": 0.6598, "step": 472 }, { "epoch": 0.8274052478134111, "grad_norm": 0.9278356432914734, "learning_rate": 9.087786496595215e-06, "loss": 0.6576, "step": 473 }, { "epoch": 0.8291545189504373, "grad_norm": 0.8423124551773071, "learning_rate": 9.081908189165992e-06, "loss": 0.6682, "step": 474 }, { "epoch": 0.8309037900874635, "grad_norm": 0.9094522595405579, "learning_rate": 9.076012916592784e-06, "loss": 0.6307, "step": 475 }, { "epoch": 0.8326530612244898, "grad_norm": 0.8472310304641724, "learning_rate": 9.07010070337741e-06, "loss": 0.6694, "step": 476 }, { "epoch": 0.8344023323615161, "grad_norm": 0.9557937979698181, "learning_rate": 9.064171574092085e-06, "loss": 0.6625, "step": 477 }, { "epoch": 0.8361516034985422, "grad_norm": 0.8352527618408203, "learning_rate": 9.058225553379338e-06, "loss": 0.6658, "step": 478 }, { "epoch": 0.8379008746355685, "grad_norm": 0.8124155402183533, "learning_rate": 9.0522626659519e-06, "loss": 0.6542, "step": 479 }, { "epoch": 0.8396501457725948, "grad_norm": 0.8609442114830017, "learning_rate": 9.046282936592603e-06, "loss": 0.6483, "step": 480 }, { "epoch": 0.841399416909621, "grad_norm": 0.8632254004478455, "learning_rate": 9.040286390154276e-06, "loss": 0.6593, "step": 481 }, { "epoch": 0.8431486880466472, "grad_norm": 0.943077564239502, "learning_rate": 9.034273051559643e-06, "loss": 0.6523, "step": 482 }, { "epoch": 0.8448979591836735, "grad_norm": 0.8099398016929626, "learning_rate": 9.028242945801222e-06, "loss": 0.6633, "step": 483 }, { "epoch": 0.8466472303206997, "grad_norm": 0.9189561605453491, "learning_rate": 9.022196097941214e-06, "loss": 0.6751, "step": 484 }, { "epoch": 0.8483965014577259, "grad_norm": 0.8259660601615906, "learning_rate": 9.016132533111406e-06, "loss": 0.6656, "step": 485 }, { "epoch": 0.8501457725947522, "grad_norm": 0.7820884585380554, "learning_rate": 9.010052276513064e-06, "loss": 0.6509, "step": 486 }, { "epoch": 0.8518950437317784, "grad_norm": 0.9036146402359009, "learning_rate": 9.003955353416824e-06, "loss": 0.6523, "step": 487 }, { "epoch": 0.8536443148688047, "grad_norm": 0.9042792320251465, "learning_rate": 8.9978417891626e-06, "loss": 0.6621, "step": 488 }, { "epoch": 0.8553935860058309, "grad_norm": 0.9255024194717407, "learning_rate": 8.991711609159459e-06, "loss": 0.6646, "step": 489 }, { "epoch": 0.8571428571428571, "grad_norm": 0.8441616892814636, "learning_rate": 8.985564838885531e-06, "loss": 0.6346, "step": 490 }, { "epoch": 0.8588921282798834, "grad_norm": 0.8288893699645996, "learning_rate": 8.9794015038879e-06, "loss": 0.6468, "step": 491 }, { "epoch": 0.8606413994169096, "grad_norm": 0.7340868711471558, "learning_rate": 8.973221629782491e-06, "loss": 0.6335, "step": 492 }, { "epoch": 0.8623906705539358, "grad_norm": 0.8286553025245667, "learning_rate": 8.967025242253973e-06, "loss": 0.6628, "step": 493 }, { "epoch": 0.8641399416909621, "grad_norm": 0.8606296181678772, "learning_rate": 8.960812367055646e-06, "loss": 0.6518, "step": 494 }, { "epoch": 0.8658892128279884, "grad_norm": 0.7245426774024963, "learning_rate": 8.954583030009338e-06, "loss": 0.6568, "step": 495 }, { "epoch": 0.8676384839650145, "grad_norm": 0.8268070816993713, "learning_rate": 8.94833725700529e-06, "loss": 0.6519, "step": 496 }, { "epoch": 0.8693877551020408, "grad_norm": 0.7328831553459167, "learning_rate": 8.942075074002059e-06, "loss": 0.6515, "step": 497 }, { "epoch": 0.8711370262390671, "grad_norm": 0.8437705039978027, "learning_rate": 8.9357965070264e-06, "loss": 0.6469, "step": 498 }, { "epoch": 0.8728862973760932, "grad_norm": 0.8057518601417542, "learning_rate": 8.929501582173167e-06, "loss": 0.64, "step": 499 }, { "epoch": 0.8746355685131195, "grad_norm": 0.777188241481781, "learning_rate": 8.923190325605202e-06, "loss": 0.6248, "step": 500 }, { "epoch": 0.8763848396501458, "grad_norm": 0.7843491435050964, "learning_rate": 8.916862763553217e-06, "loss": 0.6513, "step": 501 }, { "epoch": 0.878134110787172, "grad_norm": 0.8650302290916443, "learning_rate": 8.910518922315702e-06, "loss": 0.6379, "step": 502 }, { "epoch": 0.8798833819241982, "grad_norm": 0.7716485857963562, "learning_rate": 8.904158828258796e-06, "loss": 0.6521, "step": 503 }, { "epoch": 0.8816326530612245, "grad_norm": 0.761059582233429, "learning_rate": 8.897782507816195e-06, "loss": 0.6472, "step": 504 }, { "epoch": 0.8833819241982507, "grad_norm": 0.8378280997276306, "learning_rate": 8.891389987489034e-06, "loss": 0.663, "step": 505 }, { "epoch": 0.885131195335277, "grad_norm": 0.7125569581985474, "learning_rate": 8.884981293845775e-06, "loss": 0.6439, "step": 506 }, { "epoch": 0.8868804664723032, "grad_norm": 0.8235815167427063, "learning_rate": 8.8785564535221e-06, "loss": 0.643, "step": 507 }, { "epoch": 0.8886297376093294, "grad_norm": 0.8254197239875793, "learning_rate": 8.8721154932208e-06, "loss": 0.6634, "step": 508 }, { "epoch": 0.8903790087463557, "grad_norm": 0.6996443867683411, "learning_rate": 8.865658439711665e-06, "loss": 0.6408, "step": 509 }, { "epoch": 0.892128279883382, "grad_norm": 1.027512550354004, "learning_rate": 8.859185319831368e-06, "loss": 0.6331, "step": 510 }, { "epoch": 0.8938775510204081, "grad_norm": 0.716364860534668, "learning_rate": 8.852696160483358e-06, "loss": 0.6573, "step": 511 }, { "epoch": 0.8956268221574344, "grad_norm": 0.7939058542251587, "learning_rate": 8.84619098863775e-06, "loss": 0.6496, "step": 512 }, { "epoch": 0.8973760932944607, "grad_norm": 0.8533241152763367, "learning_rate": 8.839669831331207e-06, "loss": 0.6449, "step": 513 }, { "epoch": 0.8991253644314868, "grad_norm": 0.8271939754486084, "learning_rate": 8.833132715666827e-06, "loss": 0.6425, "step": 514 }, { "epoch": 0.9008746355685131, "grad_norm": 0.7712026238441467, "learning_rate": 8.826579668814041e-06, "loss": 0.6881, "step": 515 }, { "epoch": 0.9026239067055394, "grad_norm": 0.7993425130844116, "learning_rate": 8.820010718008489e-06, "loss": 0.6612, "step": 516 }, { "epoch": 0.9043731778425655, "grad_norm": 0.7909967303276062, "learning_rate": 8.81342589055191e-06, "loss": 0.6668, "step": 517 }, { "epoch": 0.9061224489795918, "grad_norm": 0.7277968525886536, "learning_rate": 8.806825213812031e-06, "loss": 0.6502, "step": 518 }, { "epoch": 0.9078717201166181, "grad_norm": 0.7023674249649048, "learning_rate": 8.80020871522245e-06, "loss": 0.6706, "step": 519 }, { "epoch": 0.9096209912536443, "grad_norm": 0.745360255241394, "learning_rate": 8.793576422282524e-06, "loss": 0.6393, "step": 520 }, { "epoch": 0.9113702623906705, "grad_norm": 0.8034881949424744, "learning_rate": 8.786928362557256e-06, "loss": 0.6495, "step": 521 }, { "epoch": 0.9131195335276968, "grad_norm": 0.762272834777832, "learning_rate": 8.780264563677175e-06, "loss": 0.6184, "step": 522 }, { "epoch": 0.914868804664723, "grad_norm": 0.7285841107368469, "learning_rate": 8.773585053338226e-06, "loss": 0.6446, "step": 523 }, { "epoch": 0.9166180758017493, "grad_norm": 0.8421581387519836, "learning_rate": 8.766889859301658e-06, "loss": 0.6343, "step": 524 }, { "epoch": 0.9183673469387755, "grad_norm": 0.7869500517845154, "learning_rate": 8.760179009393896e-06, "loss": 0.6628, "step": 525 }, { "epoch": 0.9201166180758017, "grad_norm": 0.8309426307678223, "learning_rate": 8.753452531506443e-06, "loss": 0.6511, "step": 526 }, { "epoch": 0.921865889212828, "grad_norm": 0.7290377020835876, "learning_rate": 8.746710453595746e-06, "loss": 0.6505, "step": 527 }, { "epoch": 0.9236151603498542, "grad_norm": 0.9149712920188904, "learning_rate": 8.739952803683093e-06, "loss": 0.6526, "step": 528 }, { "epoch": 0.9253644314868804, "grad_norm": 0.7976328730583191, "learning_rate": 8.733179609854493e-06, "loss": 0.6428, "step": 529 }, { "epoch": 0.9271137026239067, "grad_norm": 0.8044387102127075, "learning_rate": 8.726390900260556e-06, "loss": 0.6607, "step": 530 }, { "epoch": 0.928862973760933, "grad_norm": 1.0480045080184937, "learning_rate": 8.71958670311638e-06, "loss": 0.6512, "step": 531 }, { "epoch": 0.9306122448979591, "grad_norm": 0.8026485443115234, "learning_rate": 8.71276704670143e-06, "loss": 0.6535, "step": 532 }, { "epoch": 0.9323615160349854, "grad_norm": 0.8274646401405334, "learning_rate": 8.705931959359422e-06, "loss": 0.6607, "step": 533 }, { "epoch": 0.9341107871720117, "grad_norm": 0.8106281757354736, "learning_rate": 8.69908146949821e-06, "loss": 0.6536, "step": 534 }, { "epoch": 0.9358600583090378, "grad_norm": 0.72037273645401, "learning_rate": 8.69221560558966e-06, "loss": 0.6297, "step": 535 }, { "epoch": 0.9376093294460641, "grad_norm": 0.8486798405647278, "learning_rate": 8.685334396169537e-06, "loss": 0.672, "step": 536 }, { "epoch": 0.9393586005830904, "grad_norm": 0.7214967012405396, "learning_rate": 8.67843786983738e-06, "loss": 0.6562, "step": 537 }, { "epoch": 0.9411078717201167, "grad_norm": 0.6937552690505981, "learning_rate": 8.6715260552564e-06, "loss": 0.6268, "step": 538 }, { "epoch": 0.9428571428571428, "grad_norm": 0.7400856018066406, "learning_rate": 8.664598981153333e-06, "loss": 0.6542, "step": 539 }, { "epoch": 0.9446064139941691, "grad_norm": 0.7356927990913391, "learning_rate": 8.657656676318346e-06, "loss": 0.649, "step": 540 }, { "epoch": 0.9463556851311954, "grad_norm": 0.7675653696060181, "learning_rate": 8.650699169604906e-06, "loss": 0.6355, "step": 541 }, { "epoch": 0.9481049562682216, "grad_norm": 0.7891031503677368, "learning_rate": 8.64372648992966e-06, "loss": 0.64, "step": 542 }, { "epoch": 0.9498542274052478, "grad_norm": 0.743021547794342, "learning_rate": 8.63673866627232e-06, "loss": 0.6653, "step": 543 }, { "epoch": 0.9516034985422741, "grad_norm": 0.7736004590988159, "learning_rate": 8.629735727675536e-06, "loss": 0.6463, "step": 544 }, { "epoch": 0.9533527696793003, "grad_norm": 0.755898654460907, "learning_rate": 8.622717703244779e-06, "loss": 0.6661, "step": 545 }, { "epoch": 0.9551020408163265, "grad_norm": 0.9036446213722229, "learning_rate": 8.615684622148218e-06, "loss": 0.653, "step": 546 }, { "epoch": 0.9568513119533528, "grad_norm": 0.7612971663475037, "learning_rate": 8.608636513616604e-06, "loss": 0.6621, "step": 547 }, { "epoch": 0.958600583090379, "grad_norm": 0.8090029358863831, "learning_rate": 8.601573406943143e-06, "loss": 0.6536, "step": 548 }, { "epoch": 0.9603498542274053, "grad_norm": 0.7707245945930481, "learning_rate": 8.594495331483374e-06, "loss": 0.6574, "step": 549 }, { "epoch": 0.9620991253644315, "grad_norm": 0.8504512310028076, "learning_rate": 8.587402316655052e-06, "loss": 0.6543, "step": 550 }, { "epoch": 0.9638483965014577, "grad_norm": 0.9194968342781067, "learning_rate": 8.580294391938022e-06, "loss": 0.6577, "step": 551 }, { "epoch": 0.965597667638484, "grad_norm": 0.811928927898407, "learning_rate": 8.573171586874093e-06, "loss": 0.6361, "step": 552 }, { "epoch": 0.9673469387755103, "grad_norm": 0.8627200126647949, "learning_rate": 8.566033931066926e-06, "loss": 0.6296, "step": 553 }, { "epoch": 0.9690962099125364, "grad_norm": 0.8371759057044983, "learning_rate": 8.5588814541819e-06, "loss": 0.6331, "step": 554 }, { "epoch": 0.9708454810495627, "grad_norm": 0.8430494666099548, "learning_rate": 8.551714185945993e-06, "loss": 0.6478, "step": 555 }, { "epoch": 0.972594752186589, "grad_norm": 0.7402383685112, "learning_rate": 8.544532156147664e-06, "loss": 0.642, "step": 556 }, { "epoch": 0.9743440233236151, "grad_norm": 0.9169778823852539, "learning_rate": 8.537335394636717e-06, "loss": 0.642, "step": 557 }, { "epoch": 0.9760932944606414, "grad_norm": 0.9582036137580872, "learning_rate": 8.530123931324184e-06, "loss": 0.6398, "step": 558 }, { "epoch": 0.9778425655976677, "grad_norm": 0.7186473608016968, "learning_rate": 8.522897796182206e-06, "loss": 0.6285, "step": 559 }, { "epoch": 0.9795918367346939, "grad_norm": 0.7509265542030334, "learning_rate": 8.515657019243897e-06, "loss": 0.6513, "step": 560 }, { "epoch": 0.9813411078717201, "grad_norm": 0.9382954835891724, "learning_rate": 8.508401630603226e-06, "loss": 0.6527, "step": 561 }, { "epoch": 0.9830903790087464, "grad_norm": 0.734822154045105, "learning_rate": 8.501131660414898e-06, "loss": 0.6574, "step": 562 }, { "epoch": 0.9848396501457726, "grad_norm": 0.7634336948394775, "learning_rate": 8.49384713889421e-06, "loss": 0.6572, "step": 563 }, { "epoch": 0.9865889212827988, "grad_norm": 0.7752605080604553, "learning_rate": 8.486548096316942e-06, "loss": 0.6262, "step": 564 }, { "epoch": 0.9883381924198251, "grad_norm": 0.8870651125907898, "learning_rate": 8.47923456301923e-06, "loss": 0.6543, "step": 565 }, { "epoch": 0.9900874635568513, "grad_norm": 0.6799163222312927, "learning_rate": 8.471906569397432e-06, "loss": 0.6267, "step": 566 }, { "epoch": 0.9918367346938776, "grad_norm": 0.8166511058807373, "learning_rate": 8.464564145908005e-06, "loss": 0.609, "step": 567 }, { "epoch": 0.9935860058309038, "grad_norm": 0.6924462914466858, "learning_rate": 8.457207323067382e-06, "loss": 0.6445, "step": 568 }, { "epoch": 0.99533527696793, "grad_norm": 0.788011372089386, "learning_rate": 8.449836131451838e-06, "loss": 0.63, "step": 569 }, { "epoch": 0.9970845481049563, "grad_norm": 0.7385700345039368, "learning_rate": 8.442450601697373e-06, "loss": 0.6531, "step": 570 }, { "epoch": 0.9988338192419826, "grad_norm": 0.7874246835708618, "learning_rate": 8.435050764499578e-06, "loss": 0.6478, "step": 571 }, { "epoch": 1.0005830903790087, "grad_norm": 0.7162706851959229, "learning_rate": 8.427636650613499e-06, "loss": 0.6003, "step": 572 }, { "epoch": 1.0023323615160349, "grad_norm": 0.7501272559165955, "learning_rate": 8.420208290853531e-06, "loss": 0.5883, "step": 573 }, { "epoch": 1.0040816326530613, "grad_norm": 0.6922512054443359, "learning_rate": 8.412765716093273e-06, "loss": 0.5924, "step": 574 }, { "epoch": 1.0058309037900874, "grad_norm": 0.7050082683563232, "learning_rate": 8.405308957265397e-06, "loss": 0.5851, "step": 575 }, { "epoch": 1.0075801749271136, "grad_norm": 0.6952359080314636, "learning_rate": 8.39783804536154e-06, "loss": 0.6026, "step": 576 }, { "epoch": 1.00932944606414, "grad_norm": 0.7224779725074768, "learning_rate": 8.39035301143215e-06, "loss": 0.5874, "step": 577 }, { "epoch": 1.0110787172011662, "grad_norm": 0.7687159180641174, "learning_rate": 8.382853886586374e-06, "loss": 0.5895, "step": 578 }, { "epoch": 1.0128279883381923, "grad_norm": 0.7063059210777283, "learning_rate": 8.375340701991923e-06, "loss": 0.5683, "step": 579 }, { "epoch": 1.0145772594752187, "grad_norm": 0.7755822539329529, "learning_rate": 8.367813488874942e-06, "loss": 0.5839, "step": 580 }, { "epoch": 1.0163265306122449, "grad_norm": 0.7671052813529968, "learning_rate": 8.360272278519883e-06, "loss": 0.589, "step": 581 }, { "epoch": 1.018075801749271, "grad_norm": 0.7296236157417297, "learning_rate": 8.35271710226937e-06, "loss": 0.5844, "step": 582 }, { "epoch": 1.0198250728862974, "grad_norm": 0.7949197292327881, "learning_rate": 8.345147991524074e-06, "loss": 0.5891, "step": 583 }, { "epoch": 1.0215743440233236, "grad_norm": 0.7571488618850708, "learning_rate": 8.337564977742577e-06, "loss": 0.6122, "step": 584 }, { "epoch": 1.0233236151603498, "grad_norm": 0.7510064244270325, "learning_rate": 8.329968092441248e-06, "loss": 0.5779, "step": 585 }, { "epoch": 1.0250728862973761, "grad_norm": 0.8047382831573486, "learning_rate": 8.32235736719411e-06, "loss": 0.575, "step": 586 }, { "epoch": 1.0268221574344023, "grad_norm": 0.7365403175354004, "learning_rate": 8.3147328336327e-06, "loss": 0.5881, "step": 587 }, { "epoch": 1.0285714285714285, "grad_norm": 0.7103053331375122, "learning_rate": 8.307094523445957e-06, "loss": 0.5818, "step": 588 }, { "epoch": 1.0303206997084549, "grad_norm": 0.8095313906669617, "learning_rate": 8.299442468380065e-06, "loss": 0.596, "step": 589 }, { "epoch": 1.032069970845481, "grad_norm": 0.7226415872573853, "learning_rate": 8.291776700238341e-06, "loss": 0.5897, "step": 590 }, { "epoch": 1.0338192419825072, "grad_norm": 0.7384948134422302, "learning_rate": 8.284097250881096e-06, "loss": 0.5758, "step": 591 }, { "epoch": 1.0355685131195336, "grad_norm": 0.7770851850509644, "learning_rate": 8.2764041522255e-06, "loss": 0.5764, "step": 592 }, { "epoch": 1.0373177842565597, "grad_norm": 0.7911878824234009, "learning_rate": 8.268697436245457e-06, "loss": 0.584, "step": 593 }, { "epoch": 1.039067055393586, "grad_norm": 0.7258270978927612, "learning_rate": 8.26097713497146e-06, "loss": 0.5992, "step": 594 }, { "epoch": 1.0408163265306123, "grad_norm": 0.8300918936729431, "learning_rate": 8.253243280490471e-06, "loss": 0.6065, "step": 595 }, { "epoch": 1.0425655976676385, "grad_norm": 0.7771987915039062, "learning_rate": 8.245495904945775e-06, "loss": 0.6009, "step": 596 }, { "epoch": 1.0443148688046646, "grad_norm": 0.7929785847663879, "learning_rate": 8.237735040536862e-06, "loss": 0.5838, "step": 597 }, { "epoch": 1.046064139941691, "grad_norm": 0.7533876895904541, "learning_rate": 8.229960719519274e-06, "loss": 0.6056, "step": 598 }, { "epoch": 1.0478134110787172, "grad_norm": 0.831673264503479, "learning_rate": 8.222172974204493e-06, "loss": 0.5955, "step": 599 }, { "epoch": 1.0495626822157433, "grad_norm": 0.8129804730415344, "learning_rate": 8.214371836959782e-06, "loss": 0.5915, "step": 600 }, { "epoch": 1.0513119533527697, "grad_norm": 0.9338422417640686, "learning_rate": 8.20655734020807e-06, "loss": 0.5668, "step": 601 }, { "epoch": 1.0530612244897959, "grad_norm": 0.6543465852737427, "learning_rate": 8.198729516427815e-06, "loss": 0.5816, "step": 602 }, { "epoch": 1.054810495626822, "grad_norm": 0.7885090708732605, "learning_rate": 8.190888398152853e-06, "loss": 0.5763, "step": 603 }, { "epoch": 1.0565597667638484, "grad_norm": 0.8415564298629761, "learning_rate": 8.183034017972285e-06, "loss": 0.5827, "step": 604 }, { "epoch": 1.0583090379008746, "grad_norm": 0.6944169998168945, "learning_rate": 8.175166408530324e-06, "loss": 0.5972, "step": 605 }, { "epoch": 1.0600583090379008, "grad_norm": 0.7324714660644531, "learning_rate": 8.16728560252617e-06, "loss": 0.5977, "step": 606 }, { "epoch": 1.0618075801749272, "grad_norm": 0.7649739384651184, "learning_rate": 8.159391632713868e-06, "loss": 0.5942, "step": 607 }, { "epoch": 1.0635568513119533, "grad_norm": 0.6606800556182861, "learning_rate": 8.151484531902175e-06, "loss": 0.5876, "step": 608 }, { "epoch": 1.0653061224489795, "grad_norm": 0.778293788433075, "learning_rate": 8.143564332954426e-06, "loss": 0.5828, "step": 609 }, { "epoch": 1.0670553935860059, "grad_norm": 0.8077337741851807, "learning_rate": 8.135631068788386e-06, "loss": 0.5797, "step": 610 }, { "epoch": 1.068804664723032, "grad_norm": 0.6837419867515564, "learning_rate": 8.12768477237613e-06, "loss": 0.5582, "step": 611 }, { "epoch": 1.0705539358600582, "grad_norm": 0.7375588417053223, "learning_rate": 8.119725476743893e-06, "loss": 0.5916, "step": 612 }, { "epoch": 1.0723032069970846, "grad_norm": 0.7412000894546509, "learning_rate": 8.11175321497194e-06, "loss": 0.5871, "step": 613 }, { "epoch": 1.0740524781341108, "grad_norm": 0.7086803317070007, "learning_rate": 8.103768020194422e-06, "loss": 0.5905, "step": 614 }, { "epoch": 1.075801749271137, "grad_norm": 0.7518477439880371, "learning_rate": 8.095769925599242e-06, "loss": 0.6009, "step": 615 }, { "epoch": 1.0775510204081633, "grad_norm": 0.7370843887329102, "learning_rate": 8.08775896442792e-06, "loss": 0.5688, "step": 616 }, { "epoch": 1.0793002915451895, "grad_norm": 0.6971272826194763, "learning_rate": 8.079735169975449e-06, "loss": 0.5691, "step": 617 }, { "epoch": 1.0810495626822156, "grad_norm": 0.7586734890937805, "learning_rate": 8.071698575590164e-06, "loss": 0.5901, "step": 618 }, { "epoch": 1.082798833819242, "grad_norm": 0.740722119808197, "learning_rate": 8.06364921467359e-06, "loss": 0.5687, "step": 619 }, { "epoch": 1.0845481049562682, "grad_norm": 0.8197956681251526, "learning_rate": 8.055587120680322e-06, "loss": 0.5835, "step": 620 }, { "epoch": 1.0862973760932944, "grad_norm": 0.8077744245529175, "learning_rate": 8.04751232711787e-06, "loss": 0.5867, "step": 621 }, { "epoch": 1.0880466472303207, "grad_norm": 0.846993088722229, "learning_rate": 8.039424867546529e-06, "loss": 0.5905, "step": 622 }, { "epoch": 1.089795918367347, "grad_norm": 0.7804524302482605, "learning_rate": 8.03132477557923e-06, "loss": 0.585, "step": 623 }, { "epoch": 1.091545189504373, "grad_norm": 0.6927772760391235, "learning_rate": 8.023212084881415e-06, "loss": 0.5939, "step": 624 }, { "epoch": 1.0932944606413995, "grad_norm": 0.7511571645736694, "learning_rate": 8.015086829170881e-06, "loss": 0.5654, "step": 625 }, { "epoch": 1.0950437317784256, "grad_norm": 0.7723050713539124, "learning_rate": 8.006949042217655e-06, "loss": 0.5941, "step": 626 }, { "epoch": 1.0967930029154518, "grad_norm": 0.6883190870285034, "learning_rate": 7.998798757843839e-06, "loss": 0.5818, "step": 627 }, { "epoch": 1.0985422740524782, "grad_norm": 0.7326623797416687, "learning_rate": 7.99063600992348e-06, "loss": 0.5704, "step": 628 }, { "epoch": 1.1002915451895043, "grad_norm": 0.7372655868530273, "learning_rate": 7.982460832382426e-06, "loss": 0.5783, "step": 629 }, { "epoch": 1.1020408163265305, "grad_norm": 0.7336997985839844, "learning_rate": 7.974273259198184e-06, "loss": 0.5848, "step": 630 }, { "epoch": 1.103790087463557, "grad_norm": 0.5926365852355957, "learning_rate": 7.96607332439978e-06, "loss": 0.5854, "step": 631 }, { "epoch": 1.105539358600583, "grad_norm": 0.6692789793014526, "learning_rate": 7.957861062067614e-06, "loss": 0.5886, "step": 632 }, { "epoch": 1.1072886297376092, "grad_norm": 0.7653543949127197, "learning_rate": 7.949636506333325e-06, "loss": 0.5954, "step": 633 }, { "epoch": 1.1090379008746356, "grad_norm": 0.7486501932144165, "learning_rate": 7.941399691379646e-06, "loss": 0.5711, "step": 634 }, { "epoch": 1.1107871720116618, "grad_norm": 0.7463504672050476, "learning_rate": 7.93315065144026e-06, "loss": 0.5936, "step": 635 }, { "epoch": 1.112536443148688, "grad_norm": 0.664617657661438, "learning_rate": 7.924889420799659e-06, "loss": 0.5651, "step": 636 }, { "epoch": 1.1142857142857143, "grad_norm": 0.7309557795524597, "learning_rate": 7.916616033793001e-06, "loss": 0.5775, "step": 637 }, { "epoch": 1.1160349854227405, "grad_norm": 0.7160918116569519, "learning_rate": 7.908330524805972e-06, "loss": 0.5807, "step": 638 }, { "epoch": 1.1177842565597667, "grad_norm": 0.7500931620597839, "learning_rate": 7.900032928274635e-06, "loss": 0.5523, "step": 639 }, { "epoch": 1.119533527696793, "grad_norm": 0.7340319752693176, "learning_rate": 7.89172327868529e-06, "loss": 0.5895, "step": 640 }, { "epoch": 1.1212827988338192, "grad_norm": 0.7378114461898804, "learning_rate": 7.883401610574338e-06, "loss": 0.5968, "step": 641 }, { "epoch": 1.1230320699708454, "grad_norm": 0.6786007285118103, "learning_rate": 7.875067958528123e-06, "loss": 0.5945, "step": 642 }, { "epoch": 1.1247813411078718, "grad_norm": 0.7955930829048157, "learning_rate": 7.866722357182802e-06, "loss": 0.5838, "step": 643 }, { "epoch": 1.126530612244898, "grad_norm": 0.6760314702987671, "learning_rate": 7.858364841224196e-06, "loss": 0.5731, "step": 644 }, { "epoch": 1.128279883381924, "grad_norm": 0.6789821982383728, "learning_rate": 7.849995445387641e-06, "loss": 0.5851, "step": 645 }, { "epoch": 1.1300291545189505, "grad_norm": 0.7065725326538086, "learning_rate": 7.84161420445785e-06, "loss": 0.5717, "step": 646 }, { "epoch": 1.1317784256559766, "grad_norm": 0.7986956834793091, "learning_rate": 7.833221153268771e-06, "loss": 0.597, "step": 647 }, { "epoch": 1.1335276967930028, "grad_norm": 0.701583981513977, "learning_rate": 7.824816326703427e-06, "loss": 0.5912, "step": 648 }, { "epoch": 1.1352769679300292, "grad_norm": 0.7437650561332703, "learning_rate": 7.81639975969379e-06, "loss": 0.5766, "step": 649 }, { "epoch": 1.1370262390670554, "grad_norm": 0.7493281960487366, "learning_rate": 7.807971487220625e-06, "loss": 0.5792, "step": 650 }, { "epoch": 1.1387755102040815, "grad_norm": 0.7174960374832153, "learning_rate": 7.799531544313349e-06, "loss": 0.571, "step": 651 }, { "epoch": 1.140524781341108, "grad_norm": 0.7612585425376892, "learning_rate": 7.791079966049875e-06, "loss": 0.5891, "step": 652 }, { "epoch": 1.142274052478134, "grad_norm": 0.686790943145752, "learning_rate": 7.782616787556489e-06, "loss": 0.5827, "step": 653 }, { "epoch": 1.1440233236151602, "grad_norm": 0.6844298839569092, "learning_rate": 7.774142044007677e-06, "loss": 0.5742, "step": 654 }, { "epoch": 1.1457725947521866, "grad_norm": 0.719002902507782, "learning_rate": 7.765655770625997e-06, "loss": 0.5747, "step": 655 }, { "epoch": 1.1475218658892128, "grad_norm": 0.7167585492134094, "learning_rate": 7.757158002681928e-06, "loss": 0.5957, "step": 656 }, { "epoch": 1.149271137026239, "grad_norm": 0.7162557244300842, "learning_rate": 7.748648775493719e-06, "loss": 0.5968, "step": 657 }, { "epoch": 1.1510204081632653, "grad_norm": 0.7499945163726807, "learning_rate": 7.740128124427248e-06, "loss": 0.5692, "step": 658 }, { "epoch": 1.1527696793002915, "grad_norm": 0.670823335647583, "learning_rate": 7.73159608489587e-06, "loss": 0.599, "step": 659 }, { "epoch": 1.1545189504373177, "grad_norm": 0.7939999103546143, "learning_rate": 7.723052692360278e-06, "loss": 0.5833, "step": 660 }, { "epoch": 1.156268221574344, "grad_norm": 0.7756043076515198, "learning_rate": 7.714497982328343e-06, "loss": 0.6032, "step": 661 }, { "epoch": 1.1580174927113702, "grad_norm": 0.7829065322875977, "learning_rate": 7.705931990354981e-06, "loss": 0.5638, "step": 662 }, { "epoch": 1.1597667638483964, "grad_norm": 0.8080562949180603, "learning_rate": 7.697354752041993e-06, "loss": 0.5748, "step": 663 }, { "epoch": 1.1615160349854228, "grad_norm": 0.7522807717323303, "learning_rate": 7.68876630303792e-06, "loss": 0.5832, "step": 664 }, { "epoch": 1.163265306122449, "grad_norm": 0.7087774276733398, "learning_rate": 7.680166679037901e-06, "loss": 0.5684, "step": 665 }, { "epoch": 1.165014577259475, "grad_norm": 0.738114058971405, "learning_rate": 7.671555915783521e-06, "loss": 0.5909, "step": 666 }, { "epoch": 1.1667638483965015, "grad_norm": 0.7286803722381592, "learning_rate": 7.662934049062656e-06, "loss": 0.5779, "step": 667 }, { "epoch": 1.1685131195335277, "grad_norm": 0.6938976645469666, "learning_rate": 7.654301114709337e-06, "loss": 0.5676, "step": 668 }, { "epoch": 1.1702623906705538, "grad_norm": 0.6952196359634399, "learning_rate": 7.645657148603588e-06, "loss": 0.5973, "step": 669 }, { "epoch": 1.1720116618075802, "grad_norm": 0.7153951525688171, "learning_rate": 7.637002186671288e-06, "loss": 0.6038, "step": 670 }, { "epoch": 1.1737609329446064, "grad_norm": 0.8159996271133423, "learning_rate": 7.628336264884011e-06, "loss": 0.5849, "step": 671 }, { "epoch": 1.1755102040816325, "grad_norm": 0.7702032327651978, "learning_rate": 7.619659419258886e-06, "loss": 0.5749, "step": 672 }, { "epoch": 1.177259475218659, "grad_norm": 0.6900976896286011, "learning_rate": 7.6109716858584435e-06, "loss": 0.5735, "step": 673 }, { "epoch": 1.179008746355685, "grad_norm": 0.7831345200538635, "learning_rate": 7.602273100790465e-06, "loss": 0.5795, "step": 674 }, { "epoch": 1.1807580174927113, "grad_norm": 0.796859860420227, "learning_rate": 7.593563700207834e-06, "loss": 0.5888, "step": 675 }, { "epoch": 1.1825072886297376, "grad_norm": 0.7660314440727234, "learning_rate": 7.584843520308383e-06, "loss": 0.5778, "step": 676 }, { "epoch": 1.1842565597667638, "grad_norm": 0.8283981680870056, "learning_rate": 7.5761125973347475e-06, "loss": 0.5643, "step": 677 }, { "epoch": 1.18600583090379, "grad_norm": 0.7160781621932983, "learning_rate": 7.56737096757421e-06, "loss": 0.5862, "step": 678 }, { "epoch": 1.1877551020408164, "grad_norm": 0.6969922780990601, "learning_rate": 7.558618667358558e-06, "loss": 0.5773, "step": 679 }, { "epoch": 1.1895043731778425, "grad_norm": 0.770819365978241, "learning_rate": 7.549855733063924e-06, "loss": 0.5951, "step": 680 }, { "epoch": 1.1912536443148687, "grad_norm": 0.7461149096488953, "learning_rate": 7.541082201110634e-06, "loss": 0.5697, "step": 681 }, { "epoch": 1.193002915451895, "grad_norm": 0.7934045195579529, "learning_rate": 7.5322981079630696e-06, "loss": 0.5773, "step": 682 }, { "epoch": 1.1947521865889212, "grad_norm": 0.7850393056869507, "learning_rate": 7.523503490129493e-06, "loss": 0.5908, "step": 683 }, { "epoch": 1.1965014577259474, "grad_norm": 0.7232147455215454, "learning_rate": 7.514698384161923e-06, "loss": 0.5936, "step": 684 }, { "epoch": 1.1982507288629738, "grad_norm": 0.9024247527122498, "learning_rate": 7.505882826655958e-06, "loss": 0.5778, "step": 685 }, { "epoch": 1.2, "grad_norm": 0.7916082739830017, "learning_rate": 7.4970568542506414e-06, "loss": 0.5925, "step": 686 }, { "epoch": 1.2017492711370261, "grad_norm": 0.7833503484725952, "learning_rate": 7.4882205036282995e-06, "loss": 0.6075, "step": 687 }, { "epoch": 1.2034985422740525, "grad_norm": 0.7731108069419861, "learning_rate": 7.4793738115143945e-06, "loss": 0.5719, "step": 688 }, { "epoch": 1.2052478134110787, "grad_norm": 0.7863621115684509, "learning_rate": 7.470516814677367e-06, "loss": 0.5526, "step": 689 }, { "epoch": 1.2069970845481048, "grad_norm": 0.7827056050300598, "learning_rate": 7.46164954992849e-06, "loss": 0.5607, "step": 690 }, { "epoch": 1.2087463556851312, "grad_norm": 0.8274509906768799, "learning_rate": 7.452772054121709e-06, "loss": 0.566, "step": 691 }, { "epoch": 1.2104956268221574, "grad_norm": 0.7847001552581787, "learning_rate": 7.443884364153487e-06, "loss": 0.5971, "step": 692 }, { "epoch": 1.2122448979591836, "grad_norm": 0.787212610244751, "learning_rate": 7.434986516962667e-06, "loss": 0.5793, "step": 693 }, { "epoch": 1.21399416909621, "grad_norm": 0.6807124018669128, "learning_rate": 7.426078549530298e-06, "loss": 0.6045, "step": 694 }, { "epoch": 1.215743440233236, "grad_norm": 0.8698908686637878, "learning_rate": 7.417160498879494e-06, "loss": 0.5832, "step": 695 }, { "epoch": 1.2174927113702623, "grad_norm": 0.8250199556350708, "learning_rate": 7.408232402075275e-06, "loss": 0.5839, "step": 696 }, { "epoch": 1.2192419825072887, "grad_norm": 0.7191975712776184, "learning_rate": 7.399294296224417e-06, "loss": 0.5989, "step": 697 }, { "epoch": 1.2209912536443148, "grad_norm": 0.7614293098449707, "learning_rate": 7.390346218475295e-06, "loss": 0.5908, "step": 698 }, { "epoch": 1.222740524781341, "grad_norm": 0.8519247770309448, "learning_rate": 7.3813882060177275e-06, "loss": 0.5967, "step": 699 }, { "epoch": 1.2244897959183674, "grad_norm": 0.7587504386901855, "learning_rate": 7.372420296082823e-06, "loss": 0.5981, "step": 700 }, { "epoch": 1.2262390670553935, "grad_norm": 0.6888514757156372, "learning_rate": 7.363442525942827e-06, "loss": 0.5828, "step": 701 }, { "epoch": 1.2279883381924197, "grad_norm": 0.7067459225654602, "learning_rate": 7.354454932910966e-06, "loss": 0.5933, "step": 702 }, { "epoch": 1.229737609329446, "grad_norm": 0.7949218153953552, "learning_rate": 7.3454575543412935e-06, "loss": 0.5548, "step": 703 }, { "epoch": 1.2314868804664723, "grad_norm": 0.7394559979438782, "learning_rate": 7.336450427628528e-06, "loss": 0.5797, "step": 704 }, { "epoch": 1.2332361516034984, "grad_norm": 0.71580970287323, "learning_rate": 7.32743359020791e-06, "loss": 0.5651, "step": 705 }, { "epoch": 1.2349854227405248, "grad_norm": 0.7067066431045532, "learning_rate": 7.3184070795550345e-06, "loss": 0.5782, "step": 706 }, { "epoch": 1.236734693877551, "grad_norm": 0.9357588887214661, "learning_rate": 7.309370933185702e-06, "loss": 0.5946, "step": 707 }, { "epoch": 1.2384839650145771, "grad_norm": 0.6606597900390625, "learning_rate": 7.300325188655762e-06, "loss": 0.6065, "step": 708 }, { "epoch": 1.2402332361516035, "grad_norm": 0.765891969203949, "learning_rate": 7.291269883560952e-06, "loss": 0.5875, "step": 709 }, { "epoch": 1.2419825072886297, "grad_norm": 0.7608016133308411, "learning_rate": 7.282205055536751e-06, "loss": 0.5688, "step": 710 }, { "epoch": 1.2437317784256559, "grad_norm": 0.7054245471954346, "learning_rate": 7.2731307422582085e-06, "loss": 0.5833, "step": 711 }, { "epoch": 1.2454810495626822, "grad_norm": 0.7757759094238281, "learning_rate": 7.264046981439805e-06, "loss": 0.6016, "step": 712 }, { "epoch": 1.2472303206997084, "grad_norm": 0.7349992990493774, "learning_rate": 7.254953810835281e-06, "loss": 0.6108, "step": 713 }, { "epoch": 1.2489795918367346, "grad_norm": 0.765326738357544, "learning_rate": 7.24585126823749e-06, "loss": 0.5899, "step": 714 }, { "epoch": 1.250728862973761, "grad_norm": 0.7540246844291687, "learning_rate": 7.236739391478231e-06, "loss": 0.5678, "step": 715 }, { "epoch": 1.2524781341107871, "grad_norm": 0.712993860244751, "learning_rate": 7.227618218428104e-06, "loss": 0.5688, "step": 716 }, { "epoch": 1.2542274052478133, "grad_norm": 0.7648964524269104, "learning_rate": 7.218487786996344e-06, "loss": 0.5865, "step": 717 }, { "epoch": 1.2559766763848397, "grad_norm": 0.7834830284118652, "learning_rate": 7.209348135130661e-06, "loss": 0.5887, "step": 718 }, { "epoch": 1.2577259475218658, "grad_norm": 0.7364179491996765, "learning_rate": 7.200199300817094e-06, "loss": 0.6023, "step": 719 }, { "epoch": 1.259475218658892, "grad_norm": 0.6705512404441833, "learning_rate": 7.191041322079842e-06, "loss": 0.575, "step": 720 }, { "epoch": 1.2612244897959184, "grad_norm": 0.7370550632476807, "learning_rate": 7.18187423698111e-06, "loss": 0.6014, "step": 721 }, { "epoch": 1.2629737609329446, "grad_norm": 0.7369145154953003, "learning_rate": 7.172698083620951e-06, "loss": 0.5829, "step": 722 }, { "epoch": 1.2647230320699707, "grad_norm": 0.7336062788963318, "learning_rate": 7.163512900137107e-06, "loss": 0.5913, "step": 723 }, { "epoch": 1.266472303206997, "grad_norm": 0.7129080891609192, "learning_rate": 7.1543187247048525e-06, "loss": 0.5774, "step": 724 }, { "epoch": 1.2682215743440233, "grad_norm": 0.6659237742424011, "learning_rate": 7.145115595536832e-06, "loss": 0.5772, "step": 725 }, { "epoch": 1.2699708454810494, "grad_norm": 0.7026515603065491, "learning_rate": 7.135903550882903e-06, "loss": 0.5908, "step": 726 }, { "epoch": 1.2717201166180758, "grad_norm": 0.689743161201477, "learning_rate": 7.126682629029982e-06, "loss": 0.5581, "step": 727 }, { "epoch": 1.273469387755102, "grad_norm": 0.7267572283744812, "learning_rate": 7.117452868301872e-06, "loss": 0.5736, "step": 728 }, { "epoch": 1.2752186588921282, "grad_norm": 0.7183996438980103, "learning_rate": 7.108214307059122e-06, "loss": 0.5869, "step": 729 }, { "epoch": 1.2769679300291545, "grad_norm": 0.7018952965736389, "learning_rate": 7.098966983698851e-06, "loss": 0.5801, "step": 730 }, { "epoch": 1.2787172011661807, "grad_norm": 0.6842145919799805, "learning_rate": 7.089710936654597e-06, "loss": 0.5647, "step": 731 }, { "epoch": 1.2804664723032069, "grad_norm": 0.6817106008529663, "learning_rate": 7.080446204396153e-06, "loss": 0.5948, "step": 732 }, { "epoch": 1.2822157434402333, "grad_norm": 0.6787328720092773, "learning_rate": 7.0711728254294145e-06, "loss": 0.5875, "step": 733 }, { "epoch": 1.2839650145772594, "grad_norm": 0.7171207666397095, "learning_rate": 7.061890838296209e-06, "loss": 0.6004, "step": 734 }, { "epoch": 1.2857142857142856, "grad_norm": 0.8042128682136536, "learning_rate": 7.052600281574142e-06, "loss": 0.5863, "step": 735 }, { "epoch": 1.287463556851312, "grad_norm": 0.7431527972221375, "learning_rate": 7.043301193876441e-06, "loss": 0.5679, "step": 736 }, { "epoch": 1.2892128279883381, "grad_norm": 0.7799648642539978, "learning_rate": 7.033993613851779e-06, "loss": 0.5895, "step": 737 }, { "epoch": 1.2909620991253643, "grad_norm": 0.7826388478279114, "learning_rate": 7.0246775801841384e-06, "loss": 0.5708, "step": 738 }, { "epoch": 1.2927113702623907, "grad_norm": 0.7362210154533386, "learning_rate": 7.015353131592624e-06, "loss": 0.5696, "step": 739 }, { "epoch": 1.2944606413994169, "grad_norm": 0.7313367128372192, "learning_rate": 7.0060203068313214e-06, "loss": 0.5794, "step": 740 }, { "epoch": 1.296209912536443, "grad_norm": 0.7567710876464844, "learning_rate": 6.996679144689129e-06, "loss": 0.5813, "step": 741 }, { "epoch": 1.2979591836734694, "grad_norm": 0.7770729660987854, "learning_rate": 6.987329683989593e-06, "loss": 0.5654, "step": 742 }, { "epoch": 1.2997084548104956, "grad_norm": 0.7669873237609863, "learning_rate": 6.977971963590754e-06, "loss": 0.5785, "step": 743 }, { "epoch": 1.3014577259475217, "grad_norm": 0.6579426527023315, "learning_rate": 6.968606022384979e-06, "loss": 0.5881, "step": 744 }, { "epoch": 1.3032069970845481, "grad_norm": 0.8124262094497681, "learning_rate": 6.959231899298803e-06, "loss": 0.5644, "step": 745 }, { "epoch": 1.3049562682215743, "grad_norm": 0.7445812225341797, "learning_rate": 6.949849633292764e-06, "loss": 0.5661, "step": 746 }, { "epoch": 1.3067055393586005, "grad_norm": 0.7588534951210022, "learning_rate": 6.9404592633612486e-06, "loss": 0.563, "step": 747 }, { "epoch": 1.3084548104956268, "grad_norm": 0.7650431394577026, "learning_rate": 6.9310608285323215e-06, "loss": 0.5926, "step": 748 }, { "epoch": 1.310204081632653, "grad_norm": 0.7714006304740906, "learning_rate": 6.921654367867565e-06, "loss": 0.592, "step": 749 }, { "epoch": 1.3119533527696792, "grad_norm": 0.8198411464691162, "learning_rate": 6.9122399204619206e-06, "loss": 0.5694, "step": 750 }, { "epoch": 1.3137026239067056, "grad_norm": 0.825666069984436, "learning_rate": 6.902817525443524e-06, "loss": 0.5614, "step": 751 }, { "epoch": 1.3154518950437317, "grad_norm": 0.7401177883148193, "learning_rate": 6.893387221973543e-06, "loss": 0.594, "step": 752 }, { "epoch": 1.3172011661807579, "grad_norm": 0.7801963686943054, "learning_rate": 6.883949049246013e-06, "loss": 0.5867, "step": 753 }, { "epoch": 1.3189504373177843, "grad_norm": 0.7522205710411072, "learning_rate": 6.8745030464876735e-06, "loss": 0.6068, "step": 754 }, { "epoch": 1.3206997084548104, "grad_norm": 0.7665319442749023, "learning_rate": 6.865049252957813e-06, "loss": 0.5966, "step": 755 }, { "epoch": 1.3224489795918366, "grad_norm": 0.793034553527832, "learning_rate": 6.855587707948094e-06, "loss": 0.5872, "step": 756 }, { "epoch": 1.324198250728863, "grad_norm": 0.8110220432281494, "learning_rate": 6.846118450782399e-06, "loss": 0.592, "step": 757 }, { "epoch": 1.3259475218658892, "grad_norm": 0.6731963157653809, "learning_rate": 6.836641520816662e-06, "loss": 0.5973, "step": 758 }, { "epoch": 1.3276967930029153, "grad_norm": 0.8347616195678711, "learning_rate": 6.827156957438707e-06, "loss": 0.5993, "step": 759 }, { "epoch": 1.3294460641399417, "grad_norm": 0.7723289728164673, "learning_rate": 6.817664800068085e-06, "loss": 0.5875, "step": 760 }, { "epoch": 1.3311953352769679, "grad_norm": 0.6868395209312439, "learning_rate": 6.808165088155906e-06, "loss": 0.5664, "step": 761 }, { "epoch": 1.332944606413994, "grad_norm": 0.8453480005264282, "learning_rate": 6.798657861184681e-06, "loss": 0.5951, "step": 762 }, { "epoch": 1.3346938775510204, "grad_norm": 0.7571320533752441, "learning_rate": 6.789143158668153e-06, "loss": 0.5953, "step": 763 }, { "epoch": 1.3364431486880466, "grad_norm": 0.649735689163208, "learning_rate": 6.779621020151137e-06, "loss": 0.5881, "step": 764 }, { "epoch": 1.3381924198250728, "grad_norm": 0.8617205619812012, "learning_rate": 6.77009148520935e-06, "loss": 0.5693, "step": 765 }, { "epoch": 1.3399416909620991, "grad_norm": 0.7008987665176392, "learning_rate": 6.760554593449252e-06, "loss": 0.6, "step": 766 }, { "epoch": 1.3416909620991253, "grad_norm": 0.6620650887489319, "learning_rate": 6.751010384507881e-06, "loss": 0.5787, "step": 767 }, { "epoch": 1.3434402332361515, "grad_norm": 0.7597478032112122, "learning_rate": 6.741458898052684e-06, "loss": 0.5721, "step": 768 }, { "epoch": 1.3451895043731779, "grad_norm": 0.6271765828132629, "learning_rate": 6.731900173781353e-06, "loss": 0.571, "step": 769 }, { "epoch": 1.346938775510204, "grad_norm": 0.672400176525116, "learning_rate": 6.722334251421665e-06, "loss": 0.5599, "step": 770 }, { "epoch": 1.3486880466472302, "grad_norm": 0.848135232925415, "learning_rate": 6.712761170731314e-06, "loss": 0.5934, "step": 771 }, { "epoch": 1.3504373177842566, "grad_norm": 0.6765730381011963, "learning_rate": 6.7031809714977404e-06, "loss": 0.5874, "step": 772 }, { "epoch": 1.3521865889212827, "grad_norm": 0.7608811855316162, "learning_rate": 6.6935936935379765e-06, "loss": 0.5887, "step": 773 }, { "epoch": 1.353935860058309, "grad_norm": 0.7140952348709106, "learning_rate": 6.683999376698472e-06, "loss": 0.5889, "step": 774 }, { "epoch": 1.3556851311953353, "grad_norm": 0.7518954277038574, "learning_rate": 6.674398060854931e-06, "loss": 0.5743, "step": 775 }, { "epoch": 1.3574344023323615, "grad_norm": 0.6832268238067627, "learning_rate": 6.664789785912149e-06, "loss": 0.5866, "step": 776 }, { "epoch": 1.3591836734693876, "grad_norm": 0.7125049829483032, "learning_rate": 6.655174591803841e-06, "loss": 0.6006, "step": 777 }, { "epoch": 1.360932944606414, "grad_norm": 0.6981856822967529, "learning_rate": 6.645552518492486e-06, "loss": 0.5831, "step": 778 }, { "epoch": 1.3626822157434402, "grad_norm": 0.6843867301940918, "learning_rate": 6.635923605969143e-06, "loss": 0.5911, "step": 779 }, { "epoch": 1.3644314868804663, "grad_norm": 0.7841307520866394, "learning_rate": 6.6262878942533085e-06, "loss": 0.6073, "step": 780 }, { "epoch": 1.3661807580174927, "grad_norm": 0.7465881109237671, "learning_rate": 6.61664542339273e-06, "loss": 0.5871, "step": 781 }, { "epoch": 1.367930029154519, "grad_norm": 0.7846645712852478, "learning_rate": 6.606996233463249e-06, "loss": 0.5788, "step": 782 }, { "epoch": 1.369679300291545, "grad_norm": 0.8399228453636169, "learning_rate": 6.597340364568634e-06, "loss": 0.5972, "step": 783 }, { "epoch": 1.3714285714285714, "grad_norm": 0.7623879909515381, "learning_rate": 6.58767785684041e-06, "loss": 0.5611, "step": 784 }, { "epoch": 1.3731778425655976, "grad_norm": 0.783353865146637, "learning_rate": 6.578008750437698e-06, "loss": 0.5824, "step": 785 }, { "epoch": 1.3749271137026238, "grad_norm": 0.7113669514656067, "learning_rate": 6.568333085547036e-06, "loss": 0.5729, "step": 786 }, { "epoch": 1.3766763848396502, "grad_norm": 0.7810559868812561, "learning_rate": 6.558650902382233e-06, "loss": 0.6061, "step": 787 }, { "epoch": 1.3784256559766763, "grad_norm": 0.7055504322052002, "learning_rate": 6.5489622411841806e-06, "loss": 0.5859, "step": 788 }, { "epoch": 1.3801749271137025, "grad_norm": 0.7454884648323059, "learning_rate": 6.539267142220691e-06, "loss": 0.5585, "step": 789 }, { "epoch": 1.3819241982507289, "grad_norm": 0.6890878081321716, "learning_rate": 6.529565645786345e-06, "loss": 0.5801, "step": 790 }, { "epoch": 1.383673469387755, "grad_norm": 0.6825428009033203, "learning_rate": 6.519857792202298e-06, "loss": 0.5812, "step": 791 }, { "epoch": 1.3854227405247812, "grad_norm": 0.6992026567459106, "learning_rate": 6.510143621816139e-06, "loss": 0.583, "step": 792 }, { "epoch": 1.3871720116618076, "grad_norm": 0.757209300994873, "learning_rate": 6.500423175001705e-06, "loss": 0.5807, "step": 793 }, { "epoch": 1.3889212827988338, "grad_norm": 0.6787829399108887, "learning_rate": 6.490696492158918e-06, "loss": 0.5753, "step": 794 }, { "epoch": 1.39067055393586, "grad_norm": 0.8165136575698853, "learning_rate": 6.480963613713621e-06, "loss": 0.5763, "step": 795 }, { "epoch": 1.3924198250728863, "grad_norm": 0.8152770400047302, "learning_rate": 6.471224580117407e-06, "loss": 0.5707, "step": 796 }, { "epoch": 1.3941690962099125, "grad_norm": 0.7971863746643066, "learning_rate": 6.461479431847448e-06, "loss": 0.597, "step": 797 }, { "epoch": 1.3959183673469386, "grad_norm": 0.7354028224945068, "learning_rate": 6.451728209406332e-06, "loss": 0.5742, "step": 798 }, { "epoch": 1.397667638483965, "grad_norm": 0.7192472815513611, "learning_rate": 6.441970953321893e-06, "loss": 0.5637, "step": 799 }, { "epoch": 1.3994169096209912, "grad_norm": 0.6813075542449951, "learning_rate": 6.432207704147037e-06, "loss": 0.5888, "step": 800 }, { "epoch": 1.4011661807580174, "grad_norm": 0.7503769397735596, "learning_rate": 6.422438502459588e-06, "loss": 0.5829, "step": 801 }, { "epoch": 1.4029154518950437, "grad_norm": 0.7250015735626221, "learning_rate": 6.412663388862099e-06, "loss": 0.576, "step": 802 }, { "epoch": 1.40466472303207, "grad_norm": 0.6180996298789978, "learning_rate": 6.402882403981702e-06, "loss": 0.6005, "step": 803 }, { "epoch": 1.4064139941690963, "grad_norm": 0.7702288031578064, "learning_rate": 6.393095588469924e-06, "loss": 0.5776, "step": 804 }, { "epoch": 1.4081632653061225, "grad_norm": 0.7245737314224243, "learning_rate": 6.3833029830025325e-06, "loss": 0.5987, "step": 805 }, { "epoch": 1.4099125364431486, "grad_norm": 0.6427474617958069, "learning_rate": 6.373504628279354e-06, "loss": 0.5801, "step": 806 }, { "epoch": 1.411661807580175, "grad_norm": 0.6634738445281982, "learning_rate": 6.363700565024113e-06, "loss": 0.5785, "step": 807 }, { "epoch": 1.4134110787172012, "grad_norm": 0.6765893697738647, "learning_rate": 6.353890833984258e-06, "loss": 0.5887, "step": 808 }, { "epoch": 1.4151603498542273, "grad_norm": 0.6773227453231812, "learning_rate": 6.344075475930791e-06, "loss": 0.6054, "step": 809 }, { "epoch": 1.4169096209912537, "grad_norm": 0.6941475868225098, "learning_rate": 6.334254531658107e-06, "loss": 0.5592, "step": 810 }, { "epoch": 1.41865889212828, "grad_norm": 0.6020407676696777, "learning_rate": 6.3244280419838156e-06, "loss": 0.5768, "step": 811 }, { "epoch": 1.420408163265306, "grad_norm": 0.6604232788085938, "learning_rate": 6.3145960477485715e-06, "loss": 0.5938, "step": 812 }, { "epoch": 1.4221574344023324, "grad_norm": 0.7064168453216553, "learning_rate": 6.304758589815911e-06, "loss": 0.592, "step": 813 }, { "epoch": 1.4239067055393586, "grad_norm": 0.6644725203514099, "learning_rate": 6.294915709072078e-06, "loss": 0.5707, "step": 814 }, { "epoch": 1.4256559766763848, "grad_norm": 0.8222501873970032, "learning_rate": 6.285067446425852e-06, "loss": 0.5885, "step": 815 }, { "epoch": 1.4274052478134112, "grad_norm": 0.6752313375473022, "learning_rate": 6.275213842808383e-06, "loss": 0.5785, "step": 816 }, { "epoch": 1.4291545189504373, "grad_norm": 0.8543202877044678, "learning_rate": 6.265354939173019e-06, "loss": 0.5618, "step": 817 }, { "epoch": 1.4309037900874635, "grad_norm": 0.7995426654815674, "learning_rate": 6.255490776495133e-06, "loss": 0.5505, "step": 818 }, { "epoch": 1.4326530612244899, "grad_norm": 0.7217656373977661, "learning_rate": 6.245621395771961e-06, "loss": 0.566, "step": 819 }, { "epoch": 1.434402332361516, "grad_norm": 0.748439371585846, "learning_rate": 6.23574683802242e-06, "loss": 0.5726, "step": 820 }, { "epoch": 1.4361516034985422, "grad_norm": 0.920210599899292, "learning_rate": 6.22586714428695e-06, "loss": 0.5654, "step": 821 }, { "epoch": 1.4379008746355686, "grad_norm": 0.7262372970581055, "learning_rate": 6.21598235562733e-06, "loss": 0.5716, "step": 822 }, { "epoch": 1.4396501457725948, "grad_norm": 0.6976616382598877, "learning_rate": 6.2060925131265205e-06, "loss": 0.5815, "step": 823 }, { "epoch": 1.441399416909621, "grad_norm": 0.8690890669822693, "learning_rate": 6.196197657888482e-06, "loss": 0.5749, "step": 824 }, { "epoch": 1.4431486880466473, "grad_norm": 0.7894096970558167, "learning_rate": 6.186297831038013e-06, "loss": 0.5785, "step": 825 }, { "epoch": 1.4448979591836735, "grad_norm": 0.7988227605819702, "learning_rate": 6.176393073720571e-06, "loss": 0.6016, "step": 826 }, { "epoch": 1.4466472303206996, "grad_norm": 0.9592409729957581, "learning_rate": 6.166483427102109e-06, "loss": 0.5743, "step": 827 }, { "epoch": 1.448396501457726, "grad_norm": 0.7138217091560364, "learning_rate": 6.156568932368901e-06, "loss": 0.5699, "step": 828 }, { "epoch": 1.4501457725947522, "grad_norm": 0.7282622456550598, "learning_rate": 6.146649630727363e-06, "loss": 0.5623, "step": 829 }, { "epoch": 1.4518950437317784, "grad_norm": 0.8291656970977783, "learning_rate": 6.1367255634039014e-06, "loss": 0.571, "step": 830 }, { "epoch": 1.4536443148688047, "grad_norm": 0.7165212035179138, "learning_rate": 6.12679677164472e-06, "loss": 0.5573, "step": 831 }, { "epoch": 1.455393586005831, "grad_norm": 0.6736827492713928, "learning_rate": 6.116863296715661e-06, "loss": 0.555, "step": 832 }, { "epoch": 1.457142857142857, "grad_norm": 0.7022991180419922, "learning_rate": 6.106925179902035e-06, "loss": 0.5814, "step": 833 }, { "epoch": 1.4588921282798835, "grad_norm": 0.6816012263298035, "learning_rate": 6.096982462508436e-06, "loss": 0.569, "step": 834 }, { "epoch": 1.4606413994169096, "grad_norm": 0.6301626563072205, "learning_rate": 6.087035185858591e-06, "loss": 0.5754, "step": 835 }, { "epoch": 1.4623906705539358, "grad_norm": 0.6720054149627686, "learning_rate": 6.077083391295163e-06, "loss": 0.585, "step": 836 }, { "epoch": 1.4641399416909622, "grad_norm": 0.682899534702301, "learning_rate": 6.067127120179603e-06, "loss": 0.5616, "step": 837 }, { "epoch": 1.4658892128279883, "grad_norm": 0.6826781630516052, "learning_rate": 6.057166413891961e-06, "loss": 0.5648, "step": 838 }, { "epoch": 1.4676384839650145, "grad_norm": 0.6166812181472778, "learning_rate": 6.047201313830724e-06, "loss": 0.5696, "step": 839 }, { "epoch": 1.469387755102041, "grad_norm": 0.6635741591453552, "learning_rate": 6.0372318614126345e-06, "loss": 0.5749, "step": 840 }, { "epoch": 1.471137026239067, "grad_norm": 0.6896770000457764, "learning_rate": 6.027258098072532e-06, "loss": 0.5977, "step": 841 }, { "epoch": 1.4728862973760932, "grad_norm": 0.6588635444641113, "learning_rate": 6.0172800652631706e-06, "loss": 0.5695, "step": 842 }, { "epoch": 1.4746355685131196, "grad_norm": 0.6895458102226257, "learning_rate": 6.007297804455042e-06, "loss": 0.5798, "step": 843 }, { "epoch": 1.4763848396501458, "grad_norm": 0.6969309449195862, "learning_rate": 5.997311357136224e-06, "loss": 0.5885, "step": 844 }, { "epoch": 1.478134110787172, "grad_norm": 0.6633201837539673, "learning_rate": 5.987320764812178e-06, "loss": 0.5554, "step": 845 }, { "epoch": 1.4798833819241983, "grad_norm": 0.6309625506401062, "learning_rate": 5.9773260690056065e-06, "loss": 0.5493, "step": 846 }, { "epoch": 1.4816326530612245, "grad_norm": 0.6361517906188965, "learning_rate": 5.967327311256261e-06, "loss": 0.591, "step": 847 }, { "epoch": 1.4833819241982507, "grad_norm": 0.6687442660331726, "learning_rate": 5.957324533120773e-06, "loss": 0.5462, "step": 848 }, { "epoch": 1.485131195335277, "grad_norm": 0.6592605710029602, "learning_rate": 5.947317776172488e-06, "loss": 0.5858, "step": 849 }, { "epoch": 1.4868804664723032, "grad_norm": 0.6392437219619751, "learning_rate": 5.937307082001285e-06, "loss": 0.5878, "step": 850 }, { "epoch": 1.4886297376093294, "grad_norm": 0.6975717544555664, "learning_rate": 5.92729249221341e-06, "loss": 0.592, "step": 851 }, { "epoch": 1.4903790087463558, "grad_norm": 0.6584556102752686, "learning_rate": 5.917274048431294e-06, "loss": 0.5913, "step": 852 }, { "epoch": 1.492128279883382, "grad_norm": 0.6294975876808167, "learning_rate": 5.907251792293393e-06, "loss": 0.5785, "step": 853 }, { "epoch": 1.493877551020408, "grad_norm": 0.6642628312110901, "learning_rate": 5.897225765454006e-06, "loss": 0.5705, "step": 854 }, { "epoch": 1.4956268221574345, "grad_norm": 0.7764820456504822, "learning_rate": 5.887196009583098e-06, "loss": 0.5721, "step": 855 }, { "epoch": 1.4973760932944606, "grad_norm": 0.6070895791053772, "learning_rate": 5.8771625663661395e-06, "loss": 0.5762, "step": 856 }, { "epoch": 1.4991253644314868, "grad_norm": 0.6659649014472961, "learning_rate": 5.867125477503922e-06, "loss": 0.5872, "step": 857 }, { "epoch": 1.500874635568513, "grad_norm": 0.7116081118583679, "learning_rate": 5.857084784712392e-06, "loss": 0.5681, "step": 858 }, { "epoch": 1.5026239067055394, "grad_norm": 0.6801527738571167, "learning_rate": 5.847040529722473e-06, "loss": 0.5845, "step": 859 }, { "epoch": 1.5043731778425657, "grad_norm": 0.6576544642448425, "learning_rate": 5.8369927542798935e-06, "loss": 0.5915, "step": 860 }, { "epoch": 1.5061224489795917, "grad_norm": 0.6720598340034485, "learning_rate": 5.826941500145014e-06, "loss": 0.5891, "step": 861 }, { "epoch": 1.507871720116618, "grad_norm": 0.7170236110687256, "learning_rate": 5.816886809092651e-06, "loss": 0.5567, "step": 862 }, { "epoch": 1.5096209912536445, "grad_norm": 0.6796704530715942, "learning_rate": 5.806828722911911e-06, "loss": 0.5808, "step": 863 }, { "epoch": 1.5113702623906704, "grad_norm": 0.7269731163978577, "learning_rate": 5.796767283406005e-06, "loss": 0.599, "step": 864 }, { "epoch": 1.5131195335276968, "grad_norm": 0.7056795358657837, "learning_rate": 5.786702532392084e-06, "loss": 0.5478, "step": 865 }, { "epoch": 1.5148688046647232, "grad_norm": 0.7382676601409912, "learning_rate": 5.776634511701064e-06, "loss": 0.5783, "step": 866 }, { "epoch": 1.5166180758017491, "grad_norm": 0.7234690189361572, "learning_rate": 5.766563263177446e-06, "loss": 0.5816, "step": 867 }, { "epoch": 1.5183673469387755, "grad_norm": 0.6642122268676758, "learning_rate": 5.756488828679149e-06, "loss": 0.5848, "step": 868 }, { "epoch": 1.520116618075802, "grad_norm": 0.7130971550941467, "learning_rate": 5.746411250077334e-06, "loss": 0.5903, "step": 869 }, { "epoch": 1.5218658892128278, "grad_norm": 0.6897161602973938, "learning_rate": 5.736330569256227e-06, "loss": 0.5528, "step": 870 }, { "epoch": 1.5236151603498542, "grad_norm": 0.7037844657897949, "learning_rate": 5.726246828112954e-06, "loss": 0.5689, "step": 871 }, { "epoch": 1.5253644314868806, "grad_norm": 0.6794323325157166, "learning_rate": 5.7161600685573485e-06, "loss": 0.5543, "step": 872 }, { "epoch": 1.5271137026239066, "grad_norm": 0.6707441210746765, "learning_rate": 5.706070332511799e-06, "loss": 0.5675, "step": 873 }, { "epoch": 1.528862973760933, "grad_norm": 0.625310480594635, "learning_rate": 5.695977661911063e-06, "loss": 0.576, "step": 874 }, { "epoch": 1.5306122448979593, "grad_norm": 0.7004854679107666, "learning_rate": 5.685882098702092e-06, "loss": 0.5865, "step": 875 }, { "epoch": 1.5323615160349853, "grad_norm": 0.678975522518158, "learning_rate": 5.675783684843862e-06, "loss": 0.5785, "step": 876 }, { "epoch": 1.5341107871720117, "grad_norm": 0.6861137747764587, "learning_rate": 5.665682462307193e-06, "loss": 0.5821, "step": 877 }, { "epoch": 1.535860058309038, "grad_norm": 0.6619076132774353, "learning_rate": 5.655578473074584e-06, "loss": 0.5608, "step": 878 }, { "epoch": 1.537609329446064, "grad_norm": 0.7261517643928528, "learning_rate": 5.645471759140029e-06, "loss": 0.5699, "step": 879 }, { "epoch": 1.5393586005830904, "grad_norm": 1.2281841039657593, "learning_rate": 5.635362362508846e-06, "loss": 0.5773, "step": 880 }, { "epoch": 1.5411078717201168, "grad_norm": 0.6631203293800354, "learning_rate": 5.625250325197505e-06, "loss": 0.5797, "step": 881 }, { "epoch": 1.5428571428571427, "grad_norm": 0.6712144017219543, "learning_rate": 5.615135689233453e-06, "loss": 0.5871, "step": 882 }, { "epoch": 1.544606413994169, "grad_norm": 0.7728083729743958, "learning_rate": 5.605018496654929e-06, "loss": 0.6037, "step": 883 }, { "epoch": 1.5463556851311955, "grad_norm": 0.6966935992240906, "learning_rate": 5.59489878951081e-06, "loss": 0.5923, "step": 884 }, { "epoch": 1.5481049562682214, "grad_norm": 0.6382519602775574, "learning_rate": 5.584776609860414e-06, "loss": 0.5838, "step": 885 }, { "epoch": 1.5498542274052478, "grad_norm": 0.8038651943206787, "learning_rate": 5.574651999773336e-06, "loss": 0.5824, "step": 886 }, { "epoch": 1.5516034985422742, "grad_norm": 0.7047119736671448, "learning_rate": 5.564525001329282e-06, "loss": 0.5982, "step": 887 }, { "epoch": 1.5533527696793001, "grad_norm": 0.6902878284454346, "learning_rate": 5.55439565661787e-06, "loss": 0.578, "step": 888 }, { "epoch": 1.5551020408163265, "grad_norm": 0.8142916560173035, "learning_rate": 5.544264007738482e-06, "loss": 0.5652, "step": 889 }, { "epoch": 1.556851311953353, "grad_norm": 0.7916989922523499, "learning_rate": 5.53413009680007e-06, "loss": 0.5945, "step": 890 }, { "epoch": 1.5586005830903789, "grad_norm": 0.7181157469749451, "learning_rate": 5.523993965920988e-06, "loss": 0.5672, "step": 891 }, { "epoch": 1.5603498542274052, "grad_norm": 0.7353807687759399, "learning_rate": 5.5138556572288186e-06, "loss": 0.5911, "step": 892 }, { "epoch": 1.5620991253644316, "grad_norm": 0.7604122757911682, "learning_rate": 5.503715212860194e-06, "loss": 0.5715, "step": 893 }, { "epoch": 1.5638483965014576, "grad_norm": 0.7147740125656128, "learning_rate": 5.493572674960625e-06, "loss": 0.5843, "step": 894 }, { "epoch": 1.565597667638484, "grad_norm": 0.6923832297325134, "learning_rate": 5.483428085684318e-06, "loss": 0.5789, "step": 895 }, { "epoch": 1.5673469387755103, "grad_norm": 0.6570796966552734, "learning_rate": 5.473281487194015e-06, "loss": 0.5697, "step": 896 }, { "epoch": 1.5690962099125363, "grad_norm": 0.7098940014839172, "learning_rate": 5.4631329216607955e-06, "loss": 0.5853, "step": 897 }, { "epoch": 1.5708454810495627, "grad_norm": 0.7151067852973938, "learning_rate": 5.452982431263928e-06, "loss": 0.5798, "step": 898 }, { "epoch": 1.572594752186589, "grad_norm": 0.6810306310653687, "learning_rate": 5.44283005819067e-06, "loss": 0.565, "step": 899 }, { "epoch": 1.574344023323615, "grad_norm": 0.7741630673408508, "learning_rate": 5.432675844636111e-06, "loss": 0.5897, "step": 900 }, { "epoch": 1.5760932944606414, "grad_norm": 0.7045981287956238, "learning_rate": 5.422519832802988e-06, "loss": 0.5536, "step": 901 }, { "epoch": 1.5778425655976678, "grad_norm": 0.6886359453201294, "learning_rate": 5.4123620649015095e-06, "loss": 0.5544, "step": 902 }, { "epoch": 1.5795918367346937, "grad_norm": 0.6761001348495483, "learning_rate": 5.402202583149184e-06, "loss": 0.581, "step": 903 }, { "epoch": 1.58134110787172, "grad_norm": 0.6551448106765747, "learning_rate": 5.3920414297706454e-06, "loss": 0.5773, "step": 904 }, { "epoch": 1.5830903790087465, "grad_norm": 0.692683219909668, "learning_rate": 5.3818786469974735e-06, "loss": 0.5618, "step": 905 }, { "epoch": 1.5848396501457724, "grad_norm": 0.7273074388504028, "learning_rate": 5.371714277068016e-06, "loss": 0.5705, "step": 906 }, { "epoch": 1.5865889212827988, "grad_norm": 0.6677204370498657, "learning_rate": 5.3615483622272235e-06, "loss": 0.5883, "step": 907 }, { "epoch": 1.5883381924198252, "grad_norm": 0.6752099990844727, "learning_rate": 5.351380944726465e-06, "loss": 0.5709, "step": 908 }, { "epoch": 1.5900874635568512, "grad_norm": 0.6421010494232178, "learning_rate": 5.341212066823356e-06, "loss": 0.5474, "step": 909 }, { "epoch": 1.5918367346938775, "grad_norm": 0.6561928391456604, "learning_rate": 5.331041770781578e-06, "loss": 0.5827, "step": 910 }, { "epoch": 1.593586005830904, "grad_norm": 0.6550191044807434, "learning_rate": 5.32087009887071e-06, "loss": 0.5736, "step": 911 }, { "epoch": 1.5953352769679299, "grad_norm": 0.611761212348938, "learning_rate": 5.310697093366049e-06, "loss": 0.5725, "step": 912 }, { "epoch": 1.5970845481049563, "grad_norm": 0.7253372669219971, "learning_rate": 5.300522796548433e-06, "loss": 0.5794, "step": 913 }, { "epoch": 1.5988338192419826, "grad_norm": 0.6736083030700684, "learning_rate": 5.290347250704069e-06, "loss": 0.5812, "step": 914 }, { "epoch": 1.6005830903790086, "grad_norm": 0.6749951243400574, "learning_rate": 5.280170498124353e-06, "loss": 0.5788, "step": 915 }, { "epoch": 1.602332361516035, "grad_norm": 0.707821786403656, "learning_rate": 5.269992581105698e-06, "loss": 0.5732, "step": 916 }, { "epoch": 1.6040816326530614, "grad_norm": 0.6301819086074829, "learning_rate": 5.259813541949358e-06, "loss": 0.5559, "step": 917 }, { "epoch": 1.6058309037900873, "grad_norm": 0.6943726539611816, "learning_rate": 5.249633422961245e-06, "loss": 0.5587, "step": 918 }, { "epoch": 1.6075801749271137, "grad_norm": 0.67755526304245, "learning_rate": 5.239452266451767e-06, "loss": 0.5843, "step": 919 }, { "epoch": 1.60932944606414, "grad_norm": 0.6510584354400635, "learning_rate": 5.229270114735639e-06, "loss": 0.591, "step": 920 }, { "epoch": 1.611078717201166, "grad_norm": 0.6916648745536804, "learning_rate": 5.219087010131712e-06, "loss": 0.5818, "step": 921 }, { "epoch": 1.6128279883381924, "grad_norm": 0.7172616124153137, "learning_rate": 5.208902994962799e-06, "loss": 0.5739, "step": 922 }, { "epoch": 1.6145772594752188, "grad_norm": 0.690065324306488, "learning_rate": 5.198718111555498e-06, "loss": 0.5872, "step": 923 }, { "epoch": 1.6163265306122447, "grad_norm": 0.7252439856529236, "learning_rate": 5.188532402240013e-06, "loss": 0.5717, "step": 924 }, { "epoch": 1.6180758017492711, "grad_norm": 0.7447409629821777, "learning_rate": 5.178345909349985e-06, "loss": 0.591, "step": 925 }, { "epoch": 1.6198250728862975, "grad_norm": 0.7306740283966064, "learning_rate": 5.168158675222306e-06, "loss": 0.5819, "step": 926 }, { "epoch": 1.6215743440233235, "grad_norm": 0.7323715686798096, "learning_rate": 5.1579707421969576e-06, "loss": 0.5829, "step": 927 }, { "epoch": 1.6233236151603498, "grad_norm": 0.683120608329773, "learning_rate": 5.1477821526168125e-06, "loss": 0.5708, "step": 928 }, { "epoch": 1.6250728862973762, "grad_norm": 0.6547465324401855, "learning_rate": 5.137592948827486e-06, "loss": 0.5607, "step": 929 }, { "epoch": 1.6268221574344022, "grad_norm": 0.6907232999801636, "learning_rate": 5.1274031731771356e-06, "loss": 0.5942, "step": 930 }, { "epoch": 1.6285714285714286, "grad_norm": 0.6489132642745972, "learning_rate": 5.117212868016303e-06, "loss": 0.5876, "step": 931 }, { "epoch": 1.630320699708455, "grad_norm": 0.6566838026046753, "learning_rate": 5.107022075697727e-06, "loss": 0.5757, "step": 932 }, { "epoch": 1.6320699708454809, "grad_norm": 0.7490189671516418, "learning_rate": 5.096830838576171e-06, "loss": 0.6012, "step": 933 }, { "epoch": 1.6338192419825073, "grad_norm": 0.6975552439689636, "learning_rate": 5.086639199008251e-06, "loss": 0.5798, "step": 934 }, { "epoch": 1.6355685131195337, "grad_norm": 0.7196182608604431, "learning_rate": 5.076447199352248e-06, "loss": 0.5611, "step": 935 }, { "epoch": 1.6373177842565596, "grad_norm": 0.6815073490142822, "learning_rate": 5.066254881967948e-06, "loss": 0.573, "step": 936 }, { "epoch": 1.639067055393586, "grad_norm": 0.68426913022995, "learning_rate": 5.0560622892164504e-06, "loss": 0.5993, "step": 937 }, { "epoch": 1.6408163265306124, "grad_norm": 0.7131015658378601, "learning_rate": 5.045869463460006e-06, "loss": 0.5602, "step": 938 }, { "epoch": 1.6425655976676383, "grad_norm": 0.7022716999053955, "learning_rate": 5.035676447061827e-06, "loss": 0.5943, "step": 939 }, { "epoch": 1.6443148688046647, "grad_norm": 0.7858887314796448, "learning_rate": 5.025483282385922e-06, "loss": 0.5795, "step": 940 }, { "epoch": 1.646064139941691, "grad_norm": 0.6410307288169861, "learning_rate": 5.015290011796919e-06, "loss": 0.5699, "step": 941 }, { "epoch": 1.647813411078717, "grad_norm": 0.656557559967041, "learning_rate": 5.005096677659875e-06, "loss": 0.5768, "step": 942 }, { "epoch": 1.6495626822157434, "grad_norm": 0.6766897439956665, "learning_rate": 4.994903322340125e-06, "loss": 0.5681, "step": 943 }, { "epoch": 1.6513119533527698, "grad_norm": 0.654792070388794, "learning_rate": 4.984709988203083e-06, "loss": 0.5968, "step": 944 }, { "epoch": 1.6530612244897958, "grad_norm": 0.6835823059082031, "learning_rate": 4.974516717614079e-06, "loss": 0.5919, "step": 945 }, { "epoch": 1.6548104956268221, "grad_norm": 0.6917576193809509, "learning_rate": 4.9643235529381745e-06, "loss": 0.5725, "step": 946 }, { "epoch": 1.6565597667638485, "grad_norm": 0.7793366312980652, "learning_rate": 4.954130536539996e-06, "loss": 0.5712, "step": 947 }, { "epoch": 1.6583090379008745, "grad_norm": 0.6362972259521484, "learning_rate": 4.943937710783551e-06, "loss": 0.5587, "step": 948 }, { "epoch": 1.6600583090379009, "grad_norm": 0.6876039505004883, "learning_rate": 4.9337451180320545e-06, "loss": 0.5773, "step": 949 }, { "epoch": 1.6618075801749272, "grad_norm": 0.7136940956115723, "learning_rate": 4.923552800647753e-06, "loss": 0.5788, "step": 950 }, { "epoch": 1.6635568513119532, "grad_norm": 0.7295624613761902, "learning_rate": 4.913360800991751e-06, "loss": 0.5871, "step": 951 }, { "epoch": 1.6653061224489796, "grad_norm": 0.719811737537384, "learning_rate": 4.90316916142383e-06, "loss": 0.5701, "step": 952 }, { "epoch": 1.667055393586006, "grad_norm": 0.6336323618888855, "learning_rate": 4.892977924302274e-06, "loss": 0.5755, "step": 953 }, { "epoch": 1.668804664723032, "grad_norm": 0.6984180212020874, "learning_rate": 4.882787131983698e-06, "loss": 0.5799, "step": 954 }, { "epoch": 1.6705539358600583, "grad_norm": 0.6967098116874695, "learning_rate": 4.872596826822866e-06, "loss": 0.5448, "step": 955 }, { "epoch": 1.6723032069970847, "grad_norm": 0.6805486679077148, "learning_rate": 4.862407051172517e-06, "loss": 0.5882, "step": 956 }, { "epoch": 1.6740524781341106, "grad_norm": 0.7547163367271423, "learning_rate": 4.852217847383188e-06, "loss": 0.5744, "step": 957 }, { "epoch": 1.675801749271137, "grad_norm": 0.7194184064865112, "learning_rate": 4.842029257803045e-06, "loss": 0.5871, "step": 958 }, { "epoch": 1.6775510204081634, "grad_norm": 0.8334743976593018, "learning_rate": 4.8318413247776944e-06, "loss": 0.5688, "step": 959 }, { "epoch": 1.6793002915451893, "grad_norm": 0.7575350999832153, "learning_rate": 4.821654090650015e-06, "loss": 0.5974, "step": 960 }, { "epoch": 1.6810495626822157, "grad_norm": 0.5841570496559143, "learning_rate": 4.811467597759988e-06, "loss": 0.5653, "step": 961 }, { "epoch": 1.6827988338192421, "grad_norm": 0.627515435218811, "learning_rate": 4.801281888444504e-06, "loss": 0.5996, "step": 962 }, { "epoch": 1.684548104956268, "grad_norm": 0.6767963171005249, "learning_rate": 4.791097005037204e-06, "loss": 0.5691, "step": 963 }, { "epoch": 1.6862973760932944, "grad_norm": 0.7417729496955872, "learning_rate": 4.78091298986829e-06, "loss": 0.6017, "step": 964 }, { "epoch": 1.6880466472303208, "grad_norm": 0.71535325050354, "learning_rate": 4.770729885264363e-06, "loss": 0.5875, "step": 965 }, { "epoch": 1.689795918367347, "grad_norm": 0.6249955892562866, "learning_rate": 4.760547733548233e-06, "loss": 0.5781, "step": 966 }, { "epoch": 1.6915451895043732, "grad_norm": 0.7291207313537598, "learning_rate": 4.750366577038754e-06, "loss": 0.5962, "step": 967 }, { "epoch": 1.6932944606413995, "grad_norm": 0.7336034774780273, "learning_rate": 4.740186458050644e-06, "loss": 0.5623, "step": 968 }, { "epoch": 1.6950437317784257, "grad_norm": 0.6804900169372559, "learning_rate": 4.7300074188943025e-06, "loss": 0.5898, "step": 969 }, { "epoch": 1.6967930029154519, "grad_norm": 0.6444945931434631, "learning_rate": 4.719829501875649e-06, "loss": 0.5601, "step": 970 }, { "epoch": 1.6985422740524783, "grad_norm": 0.6665693521499634, "learning_rate": 4.709652749295932e-06, "loss": 0.5776, "step": 971 }, { "epoch": 1.7002915451895044, "grad_norm": 0.6981083154678345, "learning_rate": 4.6994772034515686e-06, "loss": 0.5675, "step": 972 }, { "epoch": 1.7020408163265306, "grad_norm": 0.6743907928466797, "learning_rate": 4.689302906633953e-06, "loss": 0.5658, "step": 973 }, { "epoch": 1.703790087463557, "grad_norm": 0.6587707996368408, "learning_rate": 4.679129901129291e-06, "loss": 0.5654, "step": 974 }, { "epoch": 1.7055393586005831, "grad_norm": 0.6273492574691772, "learning_rate": 4.668958229218424e-06, "loss": 0.5805, "step": 975 }, { "epoch": 1.7072886297376093, "grad_norm": 0.6548513770103455, "learning_rate": 4.6587879331766465e-06, "loss": 0.569, "step": 976 }, { "epoch": 1.7090379008746357, "grad_norm": 0.6829813122749329, "learning_rate": 4.6486190552735375e-06, "loss": 0.5693, "step": 977 }, { "epoch": 1.7107871720116619, "grad_norm": 0.6387469172477722, "learning_rate": 4.6384516377727765e-06, "loss": 0.5645, "step": 978 }, { "epoch": 1.712536443148688, "grad_norm": 0.6949775218963623, "learning_rate": 4.628285722931986e-06, "loss": 0.5624, "step": 979 }, { "epoch": 1.7142857142857144, "grad_norm": 0.6172347068786621, "learning_rate": 4.618121353002529e-06, "loss": 0.5722, "step": 980 }, { "epoch": 1.7160349854227406, "grad_norm": 0.6026545166969299, "learning_rate": 4.607958570229355e-06, "loss": 0.5856, "step": 981 }, { "epoch": 1.7177842565597667, "grad_norm": 0.6816028356552124, "learning_rate": 4.597797416850817e-06, "loss": 0.5692, "step": 982 }, { "epoch": 1.7195335276967931, "grad_norm": 0.6682668924331665, "learning_rate": 4.587637935098492e-06, "loss": 0.5842, "step": 983 }, { "epoch": 1.7212827988338193, "grad_norm": 0.6090446710586548, "learning_rate": 4.5774801671970135e-06, "loss": 0.586, "step": 984 }, { "epoch": 1.7230320699708455, "grad_norm": 0.7078045010566711, "learning_rate": 4.5673241553638895e-06, "loss": 0.5605, "step": 985 }, { "epoch": 1.7247813411078718, "grad_norm": 0.6497894525527954, "learning_rate": 4.557169941809332e-06, "loss": 0.5666, "step": 986 }, { "epoch": 1.726530612244898, "grad_norm": 0.6381052136421204, "learning_rate": 4.547017568736074e-06, "loss": 0.5649, "step": 987 }, { "epoch": 1.7282798833819242, "grad_norm": 0.6716902852058411, "learning_rate": 4.536867078339205e-06, "loss": 0.5879, "step": 988 }, { "epoch": 1.7300291545189506, "grad_norm": 0.8448407053947449, "learning_rate": 4.526718512805987e-06, "loss": 0.6044, "step": 989 }, { "epoch": 1.7317784256559767, "grad_norm": 0.5975463390350342, "learning_rate": 4.516571914315683e-06, "loss": 0.5872, "step": 990 }, { "epoch": 1.733527696793003, "grad_norm": 0.5962976217269897, "learning_rate": 4.506427325039377e-06, "loss": 0.5733, "step": 991 }, { "epoch": 1.7352769679300293, "grad_norm": 0.660053014755249, "learning_rate": 4.496284787139807e-06, "loss": 0.5663, "step": 992 }, { "epoch": 1.7370262390670554, "grad_norm": 0.7503305077552795, "learning_rate": 4.486144342771183e-06, "loss": 0.5742, "step": 993 }, { "epoch": 1.7387755102040816, "grad_norm": 0.6584203839302063, "learning_rate": 4.4760060340790134e-06, "loss": 0.5964, "step": 994 }, { "epoch": 1.740524781341108, "grad_norm": 0.6103390455245972, "learning_rate": 4.465869903199931e-06, "loss": 0.5642, "step": 995 }, { "epoch": 1.7422740524781342, "grad_norm": 0.7414954900741577, "learning_rate": 4.4557359922615185e-06, "loss": 0.5749, "step": 996 }, { "epoch": 1.7440233236151603, "grad_norm": 0.6772499680519104, "learning_rate": 4.445604343382132e-06, "loss": 0.5876, "step": 997 }, { "epoch": 1.7457725947521867, "grad_norm": 0.6077747344970703, "learning_rate": 4.435474998670721e-06, "loss": 0.5764, "step": 998 }, { "epoch": 1.7475218658892129, "grad_norm": 0.6100650429725647, "learning_rate": 4.425348000226664e-06, "loss": 0.5734, "step": 999 }, { "epoch": 1.749271137026239, "grad_norm": 0.754660964012146, "learning_rate": 4.415223390139588e-06, "loss": 0.5973, "step": 1000 }, { "epoch": 1.7510204081632654, "grad_norm": 0.6599634885787964, "learning_rate": 4.405101210489192e-06, "loss": 0.5708, "step": 1001 }, { "epoch": 1.7527696793002916, "grad_norm": 0.6275045275688171, "learning_rate": 4.394981503345071e-06, "loss": 0.5952, "step": 1002 }, { "epoch": 1.7545189504373178, "grad_norm": 0.7311594486236572, "learning_rate": 4.384864310766549e-06, "loss": 0.5744, "step": 1003 }, { "epoch": 1.7562682215743441, "grad_norm": 0.6916747689247131, "learning_rate": 4.374749674802496e-06, "loss": 0.5772, "step": 1004 }, { "epoch": 1.7580174927113703, "grad_norm": 0.6963610053062439, "learning_rate": 4.364637637491156e-06, "loss": 0.5778, "step": 1005 }, { "epoch": 1.7597667638483965, "grad_norm": 0.5920865535736084, "learning_rate": 4.3545282408599734e-06, "loss": 0.583, "step": 1006 }, { "epoch": 1.7615160349854229, "grad_norm": 0.6805728673934937, "learning_rate": 4.344421526925417e-06, "loss": 0.5818, "step": 1007 }, { "epoch": 1.763265306122449, "grad_norm": 0.6573500633239746, "learning_rate": 4.334317537692809e-06, "loss": 0.5921, "step": 1008 }, { "epoch": 1.7650145772594752, "grad_norm": 0.6428177952766418, "learning_rate": 4.32421631515614e-06, "loss": 0.5738, "step": 1009 }, { "epoch": 1.7667638483965016, "grad_norm": 0.6631107926368713, "learning_rate": 4.314117901297909e-06, "loss": 0.5832, "step": 1010 }, { "epoch": 1.7685131195335277, "grad_norm": 0.7416541576385498, "learning_rate": 4.304022338088939e-06, "loss": 0.5667, "step": 1011 }, { "epoch": 1.770262390670554, "grad_norm": 0.6242367625236511, "learning_rate": 4.2939296674882025e-06, "loss": 0.5653, "step": 1012 }, { "epoch": 1.7720116618075803, "grad_norm": 0.6250543594360352, "learning_rate": 4.283839931442653e-06, "loss": 0.5682, "step": 1013 }, { "epoch": 1.7737609329446065, "grad_norm": 0.6133513450622559, "learning_rate": 4.273753171887049e-06, "loss": 0.5688, "step": 1014 }, { "epoch": 1.7755102040816326, "grad_norm": 0.683520495891571, "learning_rate": 4.263669430743774e-06, "loss": 0.5726, "step": 1015 }, { "epoch": 1.777259475218659, "grad_norm": 0.7369042038917542, "learning_rate": 4.253588749922668e-06, "loss": 0.5809, "step": 1016 }, { "epoch": 1.7790087463556852, "grad_norm": 0.6054936647415161, "learning_rate": 4.243511171320853e-06, "loss": 0.5837, "step": 1017 }, { "epoch": 1.7807580174927113, "grad_norm": 0.6819022297859192, "learning_rate": 4.233436736822556e-06, "loss": 0.5874, "step": 1018 }, { "epoch": 1.7825072886297377, "grad_norm": 0.7007023096084595, "learning_rate": 4.2233654882989385e-06, "loss": 0.5738, "step": 1019 }, { "epoch": 1.784256559766764, "grad_norm": 0.5976265072822571, "learning_rate": 4.213297467607917e-06, "loss": 0.5732, "step": 1020 }, { "epoch": 1.78600583090379, "grad_norm": 0.6654145121574402, "learning_rate": 4.203232716593997e-06, "loss": 0.568, "step": 1021 }, { "epoch": 1.7877551020408164, "grad_norm": 0.6601415276527405, "learning_rate": 4.193171277088092e-06, "loss": 0.5809, "step": 1022 }, { "epoch": 1.7895043731778426, "grad_norm": 0.6963457465171814, "learning_rate": 4.183113190907349e-06, "loss": 0.5629, "step": 1023 }, { "epoch": 1.7912536443148688, "grad_norm": 0.644846498966217, "learning_rate": 4.173058499854988e-06, "loss": 0.5721, "step": 1024 }, { "epoch": 1.7930029154518952, "grad_norm": 0.6340683102607727, "learning_rate": 4.163007245720107e-06, "loss": 0.5825, "step": 1025 }, { "epoch": 1.7947521865889213, "grad_norm": 0.7253589630126953, "learning_rate": 4.1529594702775266e-06, "loss": 0.5683, "step": 1026 }, { "epoch": 1.7965014577259475, "grad_norm": 0.7580778002738953, "learning_rate": 4.142915215287609e-06, "loss": 0.5581, "step": 1027 }, { "epoch": 1.7982507288629739, "grad_norm": 0.6836219429969788, "learning_rate": 4.13287452249608e-06, "loss": 0.5697, "step": 1028 }, { "epoch": 1.8, "grad_norm": 0.6754874587059021, "learning_rate": 4.122837433633862e-06, "loss": 0.5883, "step": 1029 }, { "epoch": 1.8017492711370262, "grad_norm": 0.6718679070472717, "learning_rate": 4.112803990416903e-06, "loss": 0.5857, "step": 1030 }, { "epoch": 1.8034985422740526, "grad_norm": 0.6555882692337036, "learning_rate": 4.102774234545996e-06, "loss": 0.5743, "step": 1031 }, { "epoch": 1.8052478134110788, "grad_norm": 0.65171217918396, "learning_rate": 4.0927482077066074e-06, "loss": 0.5772, "step": 1032 }, { "epoch": 1.806997084548105, "grad_norm": 0.7044389247894287, "learning_rate": 4.082725951568706e-06, "loss": 0.5531, "step": 1033 }, { "epoch": 1.8087463556851313, "grad_norm": 0.7278966903686523, "learning_rate": 4.0727075077865914e-06, "loss": 0.5886, "step": 1034 }, { "epoch": 1.8104956268221575, "grad_norm": 0.6201330423355103, "learning_rate": 4.062692917998716e-06, "loss": 0.5721, "step": 1035 }, { "epoch": 1.8122448979591836, "grad_norm": 0.6267753839492798, "learning_rate": 4.0526822238275145e-06, "loss": 0.5582, "step": 1036 }, { "epoch": 1.81399416909621, "grad_norm": 0.6683110594749451, "learning_rate": 4.042675466879228e-06, "loss": 0.6067, "step": 1037 }, { "epoch": 1.8157434402332362, "grad_norm": 0.6723374128341675, "learning_rate": 4.032672688743741e-06, "loss": 0.5928, "step": 1038 }, { "epoch": 1.8174927113702624, "grad_norm": 0.6783703565597534, "learning_rate": 4.022673930994394e-06, "loss": 0.5524, "step": 1039 }, { "epoch": 1.8192419825072887, "grad_norm": 0.6538788080215454, "learning_rate": 4.012679235187823e-06, "loss": 0.5758, "step": 1040 }, { "epoch": 1.820991253644315, "grad_norm": 0.7009226083755493, "learning_rate": 4.002688642863778e-06, "loss": 0.5886, "step": 1041 }, { "epoch": 1.822740524781341, "grad_norm": 0.6193081736564636, "learning_rate": 3.9927021955449585e-06, "loss": 0.5982, "step": 1042 }, { "epoch": 1.8244897959183675, "grad_norm": 0.6891264915466309, "learning_rate": 3.982719934736832e-06, "loss": 0.596, "step": 1043 }, { "epoch": 1.8262390670553936, "grad_norm": 0.6667318344116211, "learning_rate": 3.972741901927468e-06, "loss": 0.5956, "step": 1044 }, { "epoch": 1.8279883381924198, "grad_norm": 0.6532625555992126, "learning_rate": 3.962768138587367e-06, "loss": 0.5859, "step": 1045 }, { "epoch": 1.8297376093294462, "grad_norm": 0.6743453145027161, "learning_rate": 3.952798686169279e-06, "loss": 0.5823, "step": 1046 }, { "epoch": 1.8314868804664723, "grad_norm": 0.7015960216522217, "learning_rate": 3.942833586108039e-06, "loss": 0.5802, "step": 1047 }, { "epoch": 1.8332361516034985, "grad_norm": 0.6908312439918518, "learning_rate": 3.932872879820398e-06, "loss": 0.5929, "step": 1048 }, { "epoch": 1.834985422740525, "grad_norm": 0.5686475038528442, "learning_rate": 3.922916608704838e-06, "loss": 0.5712, "step": 1049 }, { "epoch": 1.836734693877551, "grad_norm": 0.6304417848587036, "learning_rate": 3.912964814141411e-06, "loss": 0.5774, "step": 1050 }, { "epoch": 1.8384839650145772, "grad_norm": 0.6867626905441284, "learning_rate": 3.903017537491564e-06, "loss": 0.5817, "step": 1051 }, { "epoch": 1.8402332361516036, "grad_norm": 0.6228721141815186, "learning_rate": 3.893074820097967e-06, "loss": 0.5671, "step": 1052 }, { "epoch": 1.8419825072886298, "grad_norm": 0.6590495109558105, "learning_rate": 3.8831367032843405e-06, "loss": 0.5672, "step": 1053 }, { "epoch": 1.843731778425656, "grad_norm": 0.6630164980888367, "learning_rate": 3.873203228355282e-06, "loss": 0.5855, "step": 1054 }, { "epoch": 1.8454810495626823, "grad_norm": 0.6653128862380981, "learning_rate": 3.8632744365961e-06, "loss": 0.5797, "step": 1055 }, { "epoch": 1.8472303206997085, "grad_norm": 0.6658947467803955, "learning_rate": 3.853350369272639e-06, "loss": 0.5912, "step": 1056 }, { "epoch": 1.8489795918367347, "grad_norm": 0.7562530636787415, "learning_rate": 3.843431067631103e-06, "loss": 0.5918, "step": 1057 }, { "epoch": 1.850728862973761, "grad_norm": 0.7188180088996887, "learning_rate": 3.8335165728978915e-06, "loss": 0.5904, "step": 1058 }, { "epoch": 1.8524781341107872, "grad_norm": 0.732384443283081, "learning_rate": 3.82360692627943e-06, "loss": 0.5934, "step": 1059 }, { "epoch": 1.8542274052478134, "grad_norm": 0.7590283155441284, "learning_rate": 3.8137021689619904e-06, "loss": 0.5777, "step": 1060 }, { "epoch": 1.8559766763848398, "grad_norm": 0.6291661262512207, "learning_rate": 3.8038023421115196e-06, "loss": 0.5705, "step": 1061 }, { "epoch": 1.857725947521866, "grad_norm": 0.6302904486656189, "learning_rate": 3.7939074868734816e-06, "loss": 0.5548, "step": 1062 }, { "epoch": 1.859475218658892, "grad_norm": 0.6758832335472107, "learning_rate": 3.7840176443726714e-06, "loss": 0.5936, "step": 1063 }, { "epoch": 1.8612244897959185, "grad_norm": 0.6698166728019714, "learning_rate": 3.7741328557130525e-06, "loss": 0.6165, "step": 1064 }, { "epoch": 1.8629737609329446, "grad_norm": 0.6682102084159851, "learning_rate": 3.76425316197758e-06, "loss": 0.5754, "step": 1065 }, { "epoch": 1.8647230320699708, "grad_norm": 0.635493278503418, "learning_rate": 3.7543786042280402e-06, "loss": 0.563, "step": 1066 }, { "epoch": 1.8664723032069972, "grad_norm": 0.6600708365440369, "learning_rate": 3.7445092235048685e-06, "loss": 0.5889, "step": 1067 }, { "epoch": 1.8682215743440234, "grad_norm": 0.6127699017524719, "learning_rate": 3.7346450608269817e-06, "loss": 0.5791, "step": 1068 }, { "epoch": 1.8699708454810495, "grad_norm": 0.653840959072113, "learning_rate": 3.7247861571916183e-06, "loss": 0.5568, "step": 1069 }, { "epoch": 1.871720116618076, "grad_norm": 0.6527422666549683, "learning_rate": 3.71493255357415e-06, "loss": 0.5629, "step": 1070 }, { "epoch": 1.873469387755102, "grad_norm": 0.6671414971351624, "learning_rate": 3.705084290927925e-06, "loss": 0.5784, "step": 1071 }, { "epoch": 1.8752186588921282, "grad_norm": 0.6347544193267822, "learning_rate": 3.6952414101840893e-06, "loss": 0.5866, "step": 1072 }, { "epoch": 1.8769679300291546, "grad_norm": 0.6391722559928894, "learning_rate": 3.6854039522514297e-06, "loss": 0.5725, "step": 1073 }, { "epoch": 1.8787172011661808, "grad_norm": 0.6548317670822144, "learning_rate": 3.6755719580161874e-06, "loss": 0.5844, "step": 1074 }, { "epoch": 1.880466472303207, "grad_norm": 0.6531338691711426, "learning_rate": 3.6657454683418927e-06, "loss": 0.5751, "step": 1075 }, { "epoch": 1.8822157434402333, "grad_norm": 0.6911947131156921, "learning_rate": 3.65592452406921e-06, "loss": 0.5445, "step": 1076 }, { "epoch": 1.8839650145772595, "grad_norm": 0.6676965951919556, "learning_rate": 3.6461091660157444e-06, "loss": 0.5756, "step": 1077 }, { "epoch": 1.8857142857142857, "grad_norm": 0.7559449076652527, "learning_rate": 3.636299434975889e-06, "loss": 0.5895, "step": 1078 }, { "epoch": 1.887463556851312, "grad_norm": 0.6454640030860901, "learning_rate": 3.6264953717206465e-06, "loss": 0.5681, "step": 1079 }, { "epoch": 1.8892128279883382, "grad_norm": 0.666427731513977, "learning_rate": 3.616697016997468e-06, "loss": 0.5822, "step": 1080 }, { "epoch": 1.8909620991253644, "grad_norm": 0.7664399147033691, "learning_rate": 3.6069044115300768e-06, "loss": 0.5619, "step": 1081 }, { "epoch": 1.8927113702623908, "grad_norm": 0.7227085828781128, "learning_rate": 3.597117596018299e-06, "loss": 0.5974, "step": 1082 }, { "epoch": 1.894460641399417, "grad_norm": 0.5986963510513306, "learning_rate": 3.5873366111379015e-06, "loss": 0.5877, "step": 1083 }, { "epoch": 1.896209912536443, "grad_norm": 0.6955671906471252, "learning_rate": 3.577561497540413e-06, "loss": 0.5809, "step": 1084 }, { "epoch": 1.8979591836734695, "grad_norm": 0.6043244004249573, "learning_rate": 3.5677922958529644e-06, "loss": 0.5477, "step": 1085 }, { "epoch": 1.8997084548104957, "grad_norm": 0.6180927157402039, "learning_rate": 3.558029046678108e-06, "loss": 0.5884, "step": 1086 }, { "epoch": 1.9014577259475218, "grad_norm": 0.6477891802787781, "learning_rate": 3.5482717905936692e-06, "loss": 0.55, "step": 1087 }, { "epoch": 1.9032069970845482, "grad_norm": 0.684007465839386, "learning_rate": 3.5385205681525538e-06, "loss": 0.5606, "step": 1088 }, { "epoch": 1.9049562682215744, "grad_norm": 0.6415033340454102, "learning_rate": 3.5287754198825942e-06, "loss": 0.5803, "step": 1089 }, { "epoch": 1.9067055393586005, "grad_norm": 0.6175705194473267, "learning_rate": 3.5190363862863807e-06, "loss": 0.5719, "step": 1090 }, { "epoch": 1.908454810495627, "grad_norm": 0.672552764415741, "learning_rate": 3.5093035078410844e-06, "loss": 0.5748, "step": 1091 }, { "epoch": 1.910204081632653, "grad_norm": 0.7378261685371399, "learning_rate": 3.4995768249982975e-06, "loss": 0.566, "step": 1092 }, { "epoch": 1.9119533527696793, "grad_norm": 0.7050798535346985, "learning_rate": 3.489856378183862e-06, "loss": 0.5873, "step": 1093 }, { "epoch": 1.9137026239067056, "grad_norm": 0.6606338024139404, "learning_rate": 3.4801422077977037e-06, "loss": 0.5612, "step": 1094 }, { "epoch": 1.9154518950437318, "grad_norm": 0.7884581089019775, "learning_rate": 3.4704343542136583e-06, "loss": 0.5781, "step": 1095 }, { "epoch": 1.917201166180758, "grad_norm": 0.6659044027328491, "learning_rate": 3.460732857779309e-06, "loss": 0.5961, "step": 1096 }, { "epoch": 1.9189504373177844, "grad_norm": 0.5889891982078552, "learning_rate": 3.451037758815821e-06, "loss": 0.579, "step": 1097 }, { "epoch": 1.9206997084548105, "grad_norm": 0.5918232202529907, "learning_rate": 3.4413490976177675e-06, "loss": 0.5706, "step": 1098 }, { "epoch": 1.9224489795918367, "grad_norm": 0.5739858746528625, "learning_rate": 3.4316669144529646e-06, "loss": 0.574, "step": 1099 }, { "epoch": 1.924198250728863, "grad_norm": 0.6529068350791931, "learning_rate": 3.421991249562304e-06, "loss": 0.5807, "step": 1100 }, { "epoch": 1.9259475218658892, "grad_norm": 0.7463613152503967, "learning_rate": 3.4123221431595915e-06, "loss": 0.5669, "step": 1101 }, { "epoch": 1.9276967930029154, "grad_norm": 0.6179770827293396, "learning_rate": 3.402659635431368e-06, "loss": 0.6047, "step": 1102 }, { "epoch": 1.9294460641399418, "grad_norm": 0.7788529992103577, "learning_rate": 3.3930037665367515e-06, "loss": 0.5778, "step": 1103 }, { "epoch": 1.931195335276968, "grad_norm": 0.6275160312652588, "learning_rate": 3.3833545766072706e-06, "loss": 0.5634, "step": 1104 }, { "epoch": 1.9329446064139941, "grad_norm": 0.6037013530731201, "learning_rate": 3.3737121057466936e-06, "loss": 0.5707, "step": 1105 }, { "epoch": 1.9346938775510205, "grad_norm": 0.5661295652389526, "learning_rate": 3.364076394030858e-06, "loss": 0.5772, "step": 1106 }, { "epoch": 1.9364431486880467, "grad_norm": 0.6255748867988586, "learning_rate": 3.354447481507517e-06, "loss": 0.546, "step": 1107 }, { "epoch": 1.9381924198250728, "grad_norm": 0.6482170224189758, "learning_rate": 3.3448254081961605e-06, "loss": 0.5866, "step": 1108 }, { "epoch": 1.9399416909620992, "grad_norm": 0.6148178577423096, "learning_rate": 3.335210214087853e-06, "loss": 0.575, "step": 1109 }, { "epoch": 1.9416909620991254, "grad_norm": 0.675525426864624, "learning_rate": 3.3256019391450696e-06, "loss": 0.5679, "step": 1110 }, { "epoch": 1.9434402332361516, "grad_norm": 0.5989593267440796, "learning_rate": 3.3160006233015284e-06, "loss": 0.5884, "step": 1111 }, { "epoch": 1.945189504373178, "grad_norm": 0.6681731343269348, "learning_rate": 3.3064063064620256e-06, "loss": 0.5795, "step": 1112 }, { "epoch": 1.9469387755102041, "grad_norm": 0.6558393239974976, "learning_rate": 3.2968190285022604e-06, "loss": 0.5723, "step": 1113 }, { "epoch": 1.9486880466472303, "grad_norm": 0.6447663307189941, "learning_rate": 3.2872388292686875e-06, "loss": 0.5984, "step": 1114 }, { "epoch": 1.9504373177842567, "grad_norm": 0.6010802388191223, "learning_rate": 3.2776657485783357e-06, "loss": 0.5702, "step": 1115 }, { "epoch": 1.9521865889212828, "grad_norm": 0.6223452091217041, "learning_rate": 3.2680998262186494e-06, "loss": 0.5851, "step": 1116 }, { "epoch": 1.953935860058309, "grad_norm": 0.670316219329834, "learning_rate": 3.258541101947318e-06, "loss": 0.5764, "step": 1117 }, { "epoch": 1.9556851311953354, "grad_norm": 0.6551936864852905, "learning_rate": 3.2489896154921196e-06, "loss": 0.5727, "step": 1118 }, { "epoch": 1.9574344023323615, "grad_norm": 0.6634239554405212, "learning_rate": 3.2394454065507497e-06, "loss": 0.5681, "step": 1119 }, { "epoch": 1.9591836734693877, "grad_norm": 0.6281231045722961, "learning_rate": 3.2299085147906506e-06, "loss": 0.5664, "step": 1120 }, { "epoch": 1.960932944606414, "grad_norm": 0.6562538146972656, "learning_rate": 3.220378979848865e-06, "loss": 0.5782, "step": 1121 }, { "epoch": 1.9626822157434403, "grad_norm": 0.6790986657142639, "learning_rate": 3.2108568413318485e-06, "loss": 0.5934, "step": 1122 }, { "epoch": 1.9644314868804664, "grad_norm": 0.6582399606704712, "learning_rate": 3.201342138815322e-06, "loss": 0.583, "step": 1123 }, { "epoch": 1.9661807580174928, "grad_norm": 0.6243076324462891, "learning_rate": 3.1918349118440956e-06, "loss": 0.5701, "step": 1124 }, { "epoch": 1.967930029154519, "grad_norm": 0.6541100144386292, "learning_rate": 3.182335199931917e-06, "loss": 0.5626, "step": 1125 }, { "epoch": 1.9696793002915451, "grad_norm": 0.612354576587677, "learning_rate": 3.1728430425612944e-06, "loss": 0.5779, "step": 1126 }, { "epoch": 1.9714285714285715, "grad_norm": 0.6859802007675171, "learning_rate": 3.163358479183338e-06, "loss": 0.5899, "step": 1127 }, { "epoch": 1.9731778425655977, "grad_norm": 0.6720437407493591, "learning_rate": 3.153881549217602e-06, "loss": 0.5671, "step": 1128 }, { "epoch": 1.9749271137026239, "grad_norm": 0.593874454498291, "learning_rate": 3.1444122920519075e-06, "loss": 0.5912, "step": 1129 }, { "epoch": 1.9766763848396502, "grad_norm": 0.651711642742157, "learning_rate": 3.13495074704219e-06, "loss": 0.5658, "step": 1130 }, { "epoch": 1.9784256559766764, "grad_norm": 0.6449437737464905, "learning_rate": 3.125496953512327e-06, "loss": 0.5616, "step": 1131 }, { "epoch": 1.9801749271137026, "grad_norm": 0.6473076343536377, "learning_rate": 3.116050950753989e-06, "loss": 0.5621, "step": 1132 }, { "epoch": 1.981924198250729, "grad_norm": 0.6565347909927368, "learning_rate": 3.1066127780264584e-06, "loss": 0.5742, "step": 1133 }, { "epoch": 1.9836734693877551, "grad_norm": 0.6539503335952759, "learning_rate": 3.097182474556476e-06, "loss": 0.6014, "step": 1134 }, { "epoch": 1.9854227405247813, "grad_norm": 0.6285736560821533, "learning_rate": 3.0877600795380803e-06, "loss": 0.574, "step": 1135 }, { "epoch": 1.9871720116618077, "grad_norm": 0.6051139235496521, "learning_rate": 3.0783456321324367e-06, "loss": 0.5736, "step": 1136 }, { "epoch": 1.9889212827988338, "grad_norm": 0.6531978249549866, "learning_rate": 3.0689391714676798e-06, "loss": 0.592, "step": 1137 }, { "epoch": 1.99067055393586, "grad_norm": 0.6725262999534607, "learning_rate": 3.059540736638751e-06, "loss": 0.5809, "step": 1138 }, { "epoch": 1.9924198250728864, "grad_norm": 0.6389681696891785, "learning_rate": 3.0501503667072367e-06, "loss": 0.5636, "step": 1139 }, { "epoch": 1.9941690962099126, "grad_norm": 0.6177360415458679, "learning_rate": 3.040768100701199e-06, "loss": 0.5783, "step": 1140 }, { "epoch": 1.9959183673469387, "grad_norm": 0.576633870601654, "learning_rate": 3.0313939776150213e-06, "loss": 0.567, "step": 1141 }, { "epoch": 1.9976676384839651, "grad_norm": 0.6641987562179565, "learning_rate": 3.0220280364092474e-06, "loss": 0.5553, "step": 1142 }, { "epoch": 1.9994169096209913, "grad_norm": 0.629604160785675, "learning_rate": 3.012670316010408e-06, "loss": 0.5764, "step": 1143 }, { "epoch": 2.0011661807580174, "grad_norm": 0.7002227306365967, "learning_rate": 3.003320855310873e-06, "loss": 0.5411, "step": 1144 }, { "epoch": 2.002915451895044, "grad_norm": 0.6642045974731445, "learning_rate": 2.993979693168679e-06, "loss": 0.5426, "step": 1145 }, { "epoch": 2.0046647230320698, "grad_norm": 0.6386356949806213, "learning_rate": 2.9846468684073783e-06, "loss": 0.5058, "step": 1146 }, { "epoch": 2.006413994169096, "grad_norm": 0.6340780854225159, "learning_rate": 2.9753224198158636e-06, "loss": 0.5323, "step": 1147 }, { "epoch": 2.0081632653061225, "grad_norm": 0.732230007648468, "learning_rate": 2.9660063861482207e-06, "loss": 0.5156, "step": 1148 }, { "epoch": 2.0099125364431485, "grad_norm": 0.7649560570716858, "learning_rate": 2.9566988061235606e-06, "loss": 0.5112, "step": 1149 }, { "epoch": 2.011661807580175, "grad_norm": 0.7259947657585144, "learning_rate": 2.947399718425859e-06, "loss": 0.5238, "step": 1150 }, { "epoch": 2.0134110787172013, "grad_norm": 0.6647830009460449, "learning_rate": 2.938109161703793e-06, "loss": 0.5023, "step": 1151 }, { "epoch": 2.015160349854227, "grad_norm": 0.698024332523346, "learning_rate": 2.9288271745705867e-06, "loss": 0.5097, "step": 1152 }, { "epoch": 2.0169096209912536, "grad_norm": 0.6710247993469238, "learning_rate": 2.9195537956038483e-06, "loss": 0.5314, "step": 1153 }, { "epoch": 2.01865889212828, "grad_norm": 0.7223543524742126, "learning_rate": 2.910289063345405e-06, "loss": 0.5099, "step": 1154 }, { "epoch": 2.020408163265306, "grad_norm": 0.744163990020752, "learning_rate": 2.90103301630115e-06, "loss": 0.5046, "step": 1155 }, { "epoch": 2.0221574344023323, "grad_norm": 0.6502383351325989, "learning_rate": 2.8917856929408793e-06, "loss": 0.5019, "step": 1156 }, { "epoch": 2.0239067055393587, "grad_norm": 0.6370036602020264, "learning_rate": 2.8825471316981287e-06, "loss": 0.5055, "step": 1157 }, { "epoch": 2.0256559766763846, "grad_norm": 0.7541194558143616, "learning_rate": 2.8733173709700215e-06, "loss": 0.5212, "step": 1158 }, { "epoch": 2.027405247813411, "grad_norm": 0.6487350463867188, "learning_rate": 2.8640964491170976e-06, "loss": 0.4961, "step": 1159 }, { "epoch": 2.0291545189504374, "grad_norm": 0.6081093549728394, "learning_rate": 2.854884404463171e-06, "loss": 0.5025, "step": 1160 }, { "epoch": 2.0309037900874634, "grad_norm": 0.6221868395805359, "learning_rate": 2.8456812752951483e-06, "loss": 0.5016, "step": 1161 }, { "epoch": 2.0326530612244897, "grad_norm": 0.5825191140174866, "learning_rate": 2.8364870998628925e-06, "loss": 0.5192, "step": 1162 }, { "epoch": 2.034402332361516, "grad_norm": 0.682655394077301, "learning_rate": 2.82730191637905e-06, "loss": 0.5284, "step": 1163 }, { "epoch": 2.036151603498542, "grad_norm": 0.6354448199272156, "learning_rate": 2.818125763018892e-06, "loss": 0.5004, "step": 1164 }, { "epoch": 2.0379008746355685, "grad_norm": 0.5718160271644592, "learning_rate": 2.8089586779201607e-06, "loss": 0.5085, "step": 1165 }, { "epoch": 2.039650145772595, "grad_norm": 0.5960151553153992, "learning_rate": 2.7998006991829057e-06, "loss": 0.5364, "step": 1166 }, { "epoch": 2.041399416909621, "grad_norm": 0.5956400036811829, "learning_rate": 2.7906518648693405e-06, "loss": 0.5038, "step": 1167 }, { "epoch": 2.043148688046647, "grad_norm": 0.5738234519958496, "learning_rate": 2.7815122130036593e-06, "loss": 0.5141, "step": 1168 }, { "epoch": 2.0448979591836736, "grad_norm": 0.6251971125602722, "learning_rate": 2.772381781571897e-06, "loss": 0.4956, "step": 1169 }, { "epoch": 2.0466472303206995, "grad_norm": 0.5853664875030518, "learning_rate": 2.763260608521771e-06, "loss": 0.5221, "step": 1170 }, { "epoch": 2.048396501457726, "grad_norm": 0.5822017788887024, "learning_rate": 2.7541487317625116e-06, "loss": 0.509, "step": 1171 }, { "epoch": 2.0501457725947523, "grad_norm": 0.6114208698272705, "learning_rate": 2.7450461891647205e-06, "loss": 0.4953, "step": 1172 }, { "epoch": 2.0518950437317782, "grad_norm": 0.6245787143707275, "learning_rate": 2.7359530185601956e-06, "loss": 0.5053, "step": 1173 }, { "epoch": 2.0536443148688046, "grad_norm": 0.6067457795143127, "learning_rate": 2.726869257741793e-06, "loss": 0.521, "step": 1174 }, { "epoch": 2.055393586005831, "grad_norm": 0.594319224357605, "learning_rate": 2.717794944463251e-06, "loss": 0.5086, "step": 1175 }, { "epoch": 2.057142857142857, "grad_norm": 0.6315504908561707, "learning_rate": 2.7087301164390477e-06, "loss": 0.5047, "step": 1176 }, { "epoch": 2.0588921282798833, "grad_norm": 0.5805989503860474, "learning_rate": 2.6996748113442397e-06, "loss": 0.5242, "step": 1177 }, { "epoch": 2.0606413994169097, "grad_norm": 0.5891321301460266, "learning_rate": 2.6906290668143005e-06, "loss": 0.5301, "step": 1178 }, { "epoch": 2.0623906705539357, "grad_norm": 0.6275649070739746, "learning_rate": 2.6815929204449676e-06, "loss": 0.5146, "step": 1179 }, { "epoch": 2.064139941690962, "grad_norm": 0.5998619794845581, "learning_rate": 2.672566409792091e-06, "loss": 0.522, "step": 1180 }, { "epoch": 2.0658892128279884, "grad_norm": 0.5705634355545044, "learning_rate": 2.6635495723714733e-06, "loss": 0.4969, "step": 1181 }, { "epoch": 2.0676384839650144, "grad_norm": 0.582104504108429, "learning_rate": 2.65454244565871e-06, "loss": 0.5111, "step": 1182 }, { "epoch": 2.0693877551020408, "grad_norm": 0.5578974485397339, "learning_rate": 2.6455450670890346e-06, "loss": 0.4942, "step": 1183 }, { "epoch": 2.071137026239067, "grad_norm": 0.6039025187492371, "learning_rate": 2.636557474057173e-06, "loss": 0.5149, "step": 1184 }, { "epoch": 2.072886297376093, "grad_norm": 0.5697957873344421, "learning_rate": 2.627579703917179e-06, "loss": 0.5063, "step": 1185 }, { "epoch": 2.0746355685131195, "grad_norm": 0.5948165655136108, "learning_rate": 2.618611793982273e-06, "loss": 0.5078, "step": 1186 }, { "epoch": 2.076384839650146, "grad_norm": 0.5933621525764465, "learning_rate": 2.6096537815247057e-06, "loss": 0.5019, "step": 1187 }, { "epoch": 2.078134110787172, "grad_norm": 0.5820541977882385, "learning_rate": 2.6007057037755823e-06, "loss": 0.5289, "step": 1188 }, { "epoch": 2.079883381924198, "grad_norm": 0.5895217061042786, "learning_rate": 2.5917675979247258e-06, "loss": 0.5226, "step": 1189 }, { "epoch": 2.0816326530612246, "grad_norm": 0.5494213700294495, "learning_rate": 2.5828395011205066e-06, "loss": 0.5085, "step": 1190 }, { "epoch": 2.0833819241982505, "grad_norm": 0.6157009601593018, "learning_rate": 2.5739214504697036e-06, "loss": 0.5249, "step": 1191 }, { "epoch": 2.085131195335277, "grad_norm": 5.105178356170654, "learning_rate": 2.5650134830373354e-06, "loss": 0.5511, "step": 1192 }, { "epoch": 2.0868804664723033, "grad_norm": 0.6153669953346252, "learning_rate": 2.5561156358465138e-06, "loss": 0.4958, "step": 1193 }, { "epoch": 2.0886297376093292, "grad_norm": 0.6141291260719299, "learning_rate": 2.5472279458782935e-06, "loss": 0.5069, "step": 1194 }, { "epoch": 2.0903790087463556, "grad_norm": 0.6111932396888733, "learning_rate": 2.5383504500715113e-06, "loss": 0.5065, "step": 1195 }, { "epoch": 2.092128279883382, "grad_norm": 0.594882607460022, "learning_rate": 2.5294831853226344e-06, "loss": 0.4954, "step": 1196 }, { "epoch": 2.093877551020408, "grad_norm": 0.5700216293334961, "learning_rate": 2.5206261884856063e-06, "loss": 0.5391, "step": 1197 }, { "epoch": 2.0956268221574343, "grad_norm": 0.62410569190979, "learning_rate": 2.511779496371701e-06, "loss": 0.5157, "step": 1198 }, { "epoch": 2.0973760932944607, "grad_norm": 0.5766206383705139, "learning_rate": 2.5029431457493602e-06, "loss": 0.5197, "step": 1199 }, { "epoch": 2.0991253644314867, "grad_norm": 0.5834435224533081, "learning_rate": 2.4941171733440422e-06, "loss": 0.5198, "step": 1200 }, { "epoch": 2.100874635568513, "grad_norm": 0.6722077131271362, "learning_rate": 2.4853016158380787e-06, "loss": 0.5119, "step": 1201 }, { "epoch": 2.1026239067055394, "grad_norm": 0.7233018279075623, "learning_rate": 2.4764965098705066e-06, "loss": 0.5085, "step": 1202 }, { "epoch": 2.1043731778425654, "grad_norm": 0.5965102910995483, "learning_rate": 2.467701892036933e-06, "loss": 0.5271, "step": 1203 }, { "epoch": 2.1061224489795918, "grad_norm": 0.5996162295341492, "learning_rate": 2.4589177988893654e-06, "loss": 0.507, "step": 1204 }, { "epoch": 2.107871720116618, "grad_norm": 0.5986148118972778, "learning_rate": 2.450144266936078e-06, "loss": 0.5219, "step": 1205 }, { "epoch": 2.109620991253644, "grad_norm": 0.6858665347099304, "learning_rate": 2.441381332641442e-06, "loss": 0.5117, "step": 1206 }, { "epoch": 2.1113702623906705, "grad_norm": 0.6654102206230164, "learning_rate": 2.4326290324257896e-06, "loss": 0.4855, "step": 1207 }, { "epoch": 2.113119533527697, "grad_norm": 0.6032256484031677, "learning_rate": 2.423887402665254e-06, "loss": 0.5202, "step": 1208 }, { "epoch": 2.114868804664723, "grad_norm": 0.6067450046539307, "learning_rate": 2.415156479691619e-06, "loss": 0.5058, "step": 1209 }, { "epoch": 2.116618075801749, "grad_norm": 0.6476879119873047, "learning_rate": 2.4064362997921685e-06, "loss": 0.5178, "step": 1210 }, { "epoch": 2.1183673469387756, "grad_norm": 0.5899983644485474, "learning_rate": 2.397726899209534e-06, "loss": 0.5386, "step": 1211 }, { "epoch": 2.1201166180758015, "grad_norm": 0.5912320613861084, "learning_rate": 2.389028314141557e-06, "loss": 0.5542, "step": 1212 }, { "epoch": 2.121865889212828, "grad_norm": 0.6006015539169312, "learning_rate": 2.3803405807411163e-06, "loss": 0.4781, "step": 1213 }, { "epoch": 2.1236151603498543, "grad_norm": 0.5959700345993042, "learning_rate": 2.371663735115991e-06, "loss": 0.522, "step": 1214 }, { "epoch": 2.1253644314868803, "grad_norm": 0.594844400882721, "learning_rate": 2.362997813328715e-06, "loss": 0.53, "step": 1215 }, { "epoch": 2.1271137026239066, "grad_norm": 0.5499120354652405, "learning_rate": 2.3543428513964124e-06, "loss": 0.5268, "step": 1216 }, { "epoch": 2.128862973760933, "grad_norm": 0.5681025385856628, "learning_rate": 2.3456988852906648e-06, "loss": 0.5291, "step": 1217 }, { "epoch": 2.130612244897959, "grad_norm": 0.6007065773010254, "learning_rate": 2.337065950937344e-06, "loss": 0.5109, "step": 1218 }, { "epoch": 2.1323615160349854, "grad_norm": 0.5621976852416992, "learning_rate": 2.328444084216481e-06, "loss": 0.5544, "step": 1219 }, { "epoch": 2.1341107871720117, "grad_norm": 0.6195060610771179, "learning_rate": 2.3198333209620994e-06, "loss": 0.5094, "step": 1220 }, { "epoch": 2.1358600583090377, "grad_norm": 0.5956797003746033, "learning_rate": 2.3112336969620806e-06, "loss": 0.5306, "step": 1221 }, { "epoch": 2.137609329446064, "grad_norm": 0.6563884615898132, "learning_rate": 2.302645247958009e-06, "loss": 0.5122, "step": 1222 }, { "epoch": 2.1393586005830905, "grad_norm": 0.6162475943565369, "learning_rate": 2.2940680096450208e-06, "loss": 0.5118, "step": 1223 }, { "epoch": 2.1411078717201164, "grad_norm": 0.5849912762641907, "learning_rate": 2.2855020176716573e-06, "loss": 0.4982, "step": 1224 }, { "epoch": 2.142857142857143, "grad_norm": 0.6260120272636414, "learning_rate": 2.2769473076397224e-06, "loss": 0.5246, "step": 1225 }, { "epoch": 2.144606413994169, "grad_norm": 0.577342689037323, "learning_rate": 2.268403915104131e-06, "loss": 0.5181, "step": 1226 }, { "epoch": 2.146355685131195, "grad_norm": 0.5908405780792236, "learning_rate": 2.259871875572755e-06, "loss": 0.506, "step": 1227 }, { "epoch": 2.1481049562682215, "grad_norm": 0.5665891766548157, "learning_rate": 2.2513512245062824e-06, "loss": 0.5091, "step": 1228 }, { "epoch": 2.149854227405248, "grad_norm": 0.5552300214767456, "learning_rate": 2.242841997318072e-06, "loss": 0.515, "step": 1229 }, { "epoch": 2.151603498542274, "grad_norm": 0.580239474773407, "learning_rate": 2.234344229374003e-06, "loss": 0.5415, "step": 1230 }, { "epoch": 2.1533527696793002, "grad_norm": 0.558835506439209, "learning_rate": 2.2258579559923247e-06, "loss": 0.5197, "step": 1231 }, { "epoch": 2.1551020408163266, "grad_norm": 0.5500437617301941, "learning_rate": 2.217383212443512e-06, "loss": 0.5311, "step": 1232 }, { "epoch": 2.1568513119533526, "grad_norm": 0.5760363340377808, "learning_rate": 2.2089200339501265e-06, "loss": 0.5085, "step": 1233 }, { "epoch": 2.158600583090379, "grad_norm": 0.5806437730789185, "learning_rate": 2.200468455686654e-06, "loss": 0.5283, "step": 1234 }, { "epoch": 2.1603498542274053, "grad_norm": 0.5660296082496643, "learning_rate": 2.192028512779375e-06, "loss": 0.5217, "step": 1235 }, { "epoch": 2.1620991253644313, "grad_norm": 0.5706313848495483, "learning_rate": 2.183600240306211e-06, "loss": 0.5129, "step": 1236 }, { "epoch": 2.1638483965014577, "grad_norm": 0.5519400238990784, "learning_rate": 2.1751836732965754e-06, "loss": 0.5242, "step": 1237 }, { "epoch": 2.165597667638484, "grad_norm": 0.5549954175949097, "learning_rate": 2.1667788467312307e-06, "loss": 0.5167, "step": 1238 }, { "epoch": 2.16734693877551, "grad_norm": 0.6404731869697571, "learning_rate": 2.1583857955421483e-06, "loss": 0.5182, "step": 1239 }, { "epoch": 2.1690962099125364, "grad_norm": 0.5441157817840576, "learning_rate": 2.15000455461236e-06, "loss": 0.5185, "step": 1240 }, { "epoch": 2.1708454810495628, "grad_norm": 0.5353224873542786, "learning_rate": 2.141635158775806e-06, "loss": 0.5332, "step": 1241 }, { "epoch": 2.1725947521865887, "grad_norm": 0.5619603991508484, "learning_rate": 2.133277642817199e-06, "loss": 0.5262, "step": 1242 }, { "epoch": 2.174344023323615, "grad_norm": 0.5826131105422974, "learning_rate": 2.124932041471878e-06, "loss": 0.5107, "step": 1243 }, { "epoch": 2.1760932944606415, "grad_norm": 0.5679678320884705, "learning_rate": 2.1165983894256647e-06, "loss": 0.4974, "step": 1244 }, { "epoch": 2.1778425655976674, "grad_norm": 0.5834650993347168, "learning_rate": 2.1082767213147125e-06, "loss": 0.5068, "step": 1245 }, { "epoch": 2.179591836734694, "grad_norm": 0.5769474506378174, "learning_rate": 2.099967071725367e-06, "loss": 0.5343, "step": 1246 }, { "epoch": 2.18134110787172, "grad_norm": 0.5893000960350037, "learning_rate": 2.0916694751940287e-06, "loss": 0.5022, "step": 1247 }, { "epoch": 2.183090379008746, "grad_norm": 0.5578664541244507, "learning_rate": 2.0833839662069995e-06, "loss": 0.5412, "step": 1248 }, { "epoch": 2.1848396501457725, "grad_norm": 0.6360623836517334, "learning_rate": 2.0751105792003417e-06, "loss": 0.4769, "step": 1249 }, { "epoch": 2.186588921282799, "grad_norm": 0.5614444017410278, "learning_rate": 2.066849348559741e-06, "loss": 0.5268, "step": 1250 }, { "epoch": 2.188338192419825, "grad_norm": 0.6171649098396301, "learning_rate": 2.058600308620354e-06, "loss": 0.5309, "step": 1251 }, { "epoch": 2.1900874635568512, "grad_norm": 0.5569816827774048, "learning_rate": 2.0503634936666767e-06, "loss": 0.5072, "step": 1252 }, { "epoch": 2.1918367346938776, "grad_norm": 0.562021791934967, "learning_rate": 2.042138937932388e-06, "loss": 0.5251, "step": 1253 }, { "epoch": 2.1935860058309036, "grad_norm": 0.6023848652839661, "learning_rate": 2.033926675600223e-06, "loss": 0.501, "step": 1254 }, { "epoch": 2.19533527696793, "grad_norm": 0.6120443940162659, "learning_rate": 2.0257267408018187e-06, "loss": 0.4938, "step": 1255 }, { "epoch": 2.1970845481049563, "grad_norm": 0.5805404186248779, "learning_rate": 2.0175391676175756e-06, "loss": 0.5011, "step": 1256 }, { "epoch": 2.1988338192419823, "grad_norm": 0.5861479640007019, "learning_rate": 2.0093639900765203e-06, "loss": 0.5034, "step": 1257 }, { "epoch": 2.2005830903790087, "grad_norm": 0.5965428948402405, "learning_rate": 2.001201242156163e-06, "loss": 0.4937, "step": 1258 }, { "epoch": 2.202332361516035, "grad_norm": 0.5945551991462708, "learning_rate": 1.9930509577823475e-06, "loss": 0.5189, "step": 1259 }, { "epoch": 2.204081632653061, "grad_norm": 0.6022103428840637, "learning_rate": 1.98491317082912e-06, "loss": 0.5155, "step": 1260 }, { "epoch": 2.2058309037900874, "grad_norm": 0.6029344797134399, "learning_rate": 1.9767879151185865e-06, "loss": 0.5088, "step": 1261 }, { "epoch": 2.207580174927114, "grad_norm": 0.638581395149231, "learning_rate": 1.968675224420772e-06, "loss": 0.5017, "step": 1262 }, { "epoch": 2.2093294460641397, "grad_norm": 0.5505150556564331, "learning_rate": 1.9605751324534734e-06, "loss": 0.4961, "step": 1263 }, { "epoch": 2.211078717201166, "grad_norm": 0.5606935024261475, "learning_rate": 1.952487672882131e-06, "loss": 0.5402, "step": 1264 }, { "epoch": 2.2128279883381925, "grad_norm": 0.5842843055725098, "learning_rate": 1.944412879319679e-06, "loss": 0.5178, "step": 1265 }, { "epoch": 2.2145772594752184, "grad_norm": 0.6140647530555725, "learning_rate": 1.9363507853264117e-06, "loss": 0.5066, "step": 1266 }, { "epoch": 2.216326530612245, "grad_norm": 0.5820194482803345, "learning_rate": 1.9283014244098385e-06, "loss": 0.505, "step": 1267 }, { "epoch": 2.218075801749271, "grad_norm": 0.5803715586662292, "learning_rate": 1.920264830024553e-06, "loss": 0.5005, "step": 1268 }, { "epoch": 2.219825072886297, "grad_norm": 0.5715053081512451, "learning_rate": 1.912241035572082e-06, "loss": 0.4805, "step": 1269 }, { "epoch": 2.2215743440233235, "grad_norm": 0.5805825591087341, "learning_rate": 1.9042300744007586e-06, "loss": 0.534, "step": 1270 }, { "epoch": 2.22332361516035, "grad_norm": 0.5771877765655518, "learning_rate": 1.8962319798055796e-06, "loss": 0.5188, "step": 1271 }, { "epoch": 2.225072886297376, "grad_norm": 0.5839830636978149, "learning_rate": 1.8882467850280611e-06, "loss": 0.5022, "step": 1272 }, { "epoch": 2.2268221574344023, "grad_norm": 0.5987756252288818, "learning_rate": 1.8802745232561066e-06, "loss": 0.4773, "step": 1273 }, { "epoch": 2.2285714285714286, "grad_norm": 0.5797451138496399, "learning_rate": 1.8723152276238693e-06, "loss": 0.5284, "step": 1274 }, { "epoch": 2.2303206997084546, "grad_norm": 0.5895811319351196, "learning_rate": 1.8643689312116148e-06, "loss": 0.5552, "step": 1275 }, { "epoch": 2.232069970845481, "grad_norm": 0.5787795186042786, "learning_rate": 1.856435667045577e-06, "loss": 0.5205, "step": 1276 }, { "epoch": 2.2338192419825074, "grad_norm": 0.6107646822929382, "learning_rate": 1.8485154680978257e-06, "loss": 0.5082, "step": 1277 }, { "epoch": 2.2355685131195333, "grad_norm": 0.5980193018913269, "learning_rate": 1.840608367286134e-06, "loss": 0.5221, "step": 1278 }, { "epoch": 2.2373177842565597, "grad_norm": 0.5769156217575073, "learning_rate": 1.8327143974738316e-06, "loss": 0.53, "step": 1279 }, { "epoch": 2.239067055393586, "grad_norm": 0.5905051231384277, "learning_rate": 1.8248335914696762e-06, "loss": 0.5142, "step": 1280 }, { "epoch": 2.240816326530612, "grad_norm": 0.5792200565338135, "learning_rate": 1.8169659820277164e-06, "loss": 0.5388, "step": 1281 }, { "epoch": 2.2425655976676384, "grad_norm": 0.5987423658370972, "learning_rate": 1.8091116018471484e-06, "loss": 0.5086, "step": 1282 }, { "epoch": 2.244314868804665, "grad_norm": 0.5713226795196533, "learning_rate": 1.8012704835721866e-06, "loss": 0.523, "step": 1283 }, { "epoch": 2.2460641399416907, "grad_norm": 0.6583318114280701, "learning_rate": 1.7934426597919291e-06, "loss": 0.5214, "step": 1284 }, { "epoch": 2.247813411078717, "grad_norm": 0.6187498569488525, "learning_rate": 1.7856281630402195e-06, "loss": 0.499, "step": 1285 }, { "epoch": 2.2495626822157435, "grad_norm": 0.5950196981430054, "learning_rate": 1.7778270257955099e-06, "loss": 0.5297, "step": 1286 }, { "epoch": 2.2513119533527695, "grad_norm": 0.6105055212974548, "learning_rate": 1.770039280480726e-06, "loss": 0.4965, "step": 1287 }, { "epoch": 2.253061224489796, "grad_norm": 0.581340491771698, "learning_rate": 1.762264959463139e-06, "loss": 0.5194, "step": 1288 }, { "epoch": 2.2548104956268222, "grad_norm": 0.6108242869377136, "learning_rate": 1.7545040950542264e-06, "loss": 0.4987, "step": 1289 }, { "epoch": 2.256559766763848, "grad_norm": 0.5744954347610474, "learning_rate": 1.7467567195095324e-06, "loss": 0.5008, "step": 1290 }, { "epoch": 2.2583090379008746, "grad_norm": 0.577490508556366, "learning_rate": 1.7390228650285412e-06, "loss": 0.4971, "step": 1291 }, { "epoch": 2.260058309037901, "grad_norm": 0.5736153721809387, "learning_rate": 1.7313025637545432e-06, "loss": 0.4844, "step": 1292 }, { "epoch": 2.261807580174927, "grad_norm": 0.5920312404632568, "learning_rate": 1.7235958477744996e-06, "loss": 0.4935, "step": 1293 }, { "epoch": 2.2635568513119533, "grad_norm": 0.6191216707229614, "learning_rate": 1.715902749118904e-06, "loss": 0.5172, "step": 1294 }, { "epoch": 2.2653061224489797, "grad_norm": 0.6080250144004822, "learning_rate": 1.70822329976166e-06, "loss": 0.4803, "step": 1295 }, { "epoch": 2.2670553935860056, "grad_norm": 0.5335835814476013, "learning_rate": 1.700557531619937e-06, "loss": 0.5307, "step": 1296 }, { "epoch": 2.268804664723032, "grad_norm": 0.59977787733078, "learning_rate": 1.6929054765540443e-06, "loss": 0.5119, "step": 1297 }, { "epoch": 2.2705539358600584, "grad_norm": 0.6059865355491638, "learning_rate": 1.6852671663672988e-06, "loss": 0.5088, "step": 1298 }, { "epoch": 2.2723032069970843, "grad_norm": 0.5738167762756348, "learning_rate": 1.677642632805892e-06, "loss": 0.5317, "step": 1299 }, { "epoch": 2.2740524781341107, "grad_norm": 0.5346144437789917, "learning_rate": 1.670031907558754e-06, "loss": 0.5312, "step": 1300 }, { "epoch": 2.275801749271137, "grad_norm": 0.5606193542480469, "learning_rate": 1.6624350222574253e-06, "loss": 0.52, "step": 1301 }, { "epoch": 2.277551020408163, "grad_norm": 0.5438253879547119, "learning_rate": 1.6548520084759283e-06, "loss": 0.5081, "step": 1302 }, { "epoch": 2.2793002915451894, "grad_norm": 0.571051836013794, "learning_rate": 1.6472828977306316e-06, "loss": 0.534, "step": 1303 }, { "epoch": 2.281049562682216, "grad_norm": 0.5798311233520508, "learning_rate": 1.6397277214801188e-06, "loss": 0.5087, "step": 1304 }, { "epoch": 2.2827988338192418, "grad_norm": 0.6161330938339233, "learning_rate": 1.6321865111250584e-06, "loss": 0.5236, "step": 1305 }, { "epoch": 2.284548104956268, "grad_norm": 0.5580735802650452, "learning_rate": 1.6246592980080771e-06, "loss": 0.5241, "step": 1306 }, { "epoch": 2.2862973760932945, "grad_norm": 0.5713551044464111, "learning_rate": 1.6171461134136274e-06, "loss": 0.5108, "step": 1307 }, { "epoch": 2.2880466472303205, "grad_norm": 0.555664598941803, "learning_rate": 1.609646988567851e-06, "loss": 0.5263, "step": 1308 }, { "epoch": 2.289795918367347, "grad_norm": 0.5874269008636475, "learning_rate": 1.602161954638462e-06, "loss": 0.5022, "step": 1309 }, { "epoch": 2.2915451895043732, "grad_norm": 0.5438481569290161, "learning_rate": 1.5946910427346036e-06, "loss": 0.5302, "step": 1310 }, { "epoch": 2.293294460641399, "grad_norm": 0.5848217010498047, "learning_rate": 1.5872342839067305e-06, "loss": 0.4904, "step": 1311 }, { "epoch": 2.2950437317784256, "grad_norm": 0.5840378999710083, "learning_rate": 1.5797917091464698e-06, "loss": 0.5397, "step": 1312 }, { "epoch": 2.296793002915452, "grad_norm": 0.5778439044952393, "learning_rate": 1.5723633493865025e-06, "loss": 0.5179, "step": 1313 }, { "epoch": 2.298542274052478, "grad_norm": 0.611266016960144, "learning_rate": 1.5649492355004241e-06, "loss": 0.5037, "step": 1314 }, { "epoch": 2.3002915451895043, "grad_norm": 0.5245871543884277, "learning_rate": 1.5575493983026258e-06, "loss": 0.5135, "step": 1315 }, { "epoch": 2.3020408163265307, "grad_norm": 0.5875314474105835, "learning_rate": 1.5501638685481625e-06, "loss": 0.5107, "step": 1316 }, { "epoch": 2.3037900874635566, "grad_norm": 0.5613728761672974, "learning_rate": 1.54279267693262e-06, "loss": 0.5214, "step": 1317 }, { "epoch": 2.305539358600583, "grad_norm": 0.5562321543693542, "learning_rate": 1.5354358540919973e-06, "loss": 0.5352, "step": 1318 }, { "epoch": 2.3072886297376094, "grad_norm": 0.5632153153419495, "learning_rate": 1.528093430602568e-06, "loss": 0.5134, "step": 1319 }, { "epoch": 2.3090379008746353, "grad_norm": 0.6001570224761963, "learning_rate": 1.5207654369807707e-06, "loss": 0.5165, "step": 1320 }, { "epoch": 2.3107871720116617, "grad_norm": 0.5657544136047363, "learning_rate": 1.5134519036830591e-06, "loss": 0.5149, "step": 1321 }, { "epoch": 2.312536443148688, "grad_norm": 0.5529624819755554, "learning_rate": 1.5061528611057917e-06, "loss": 0.5289, "step": 1322 }, { "epoch": 2.314285714285714, "grad_norm": 0.5280054211616516, "learning_rate": 1.4988683395851045e-06, "loss": 0.5218, "step": 1323 }, { "epoch": 2.3160349854227404, "grad_norm": 0.5875835418701172, "learning_rate": 1.4915983693967735e-06, "loss": 0.5113, "step": 1324 }, { "epoch": 2.317784256559767, "grad_norm": 0.5528156161308289, "learning_rate": 1.484342980756105e-06, "loss": 0.5065, "step": 1325 }, { "epoch": 2.3195335276967928, "grad_norm": 0.5626161694526672, "learning_rate": 1.4771022038177958e-06, "loss": 0.5348, "step": 1326 }, { "epoch": 2.321282798833819, "grad_norm": 0.5393100380897522, "learning_rate": 1.4698760686758178e-06, "loss": 0.5215, "step": 1327 }, { "epoch": 2.3230320699708455, "grad_norm": 0.5655534863471985, "learning_rate": 1.4626646053632848e-06, "loss": 0.5287, "step": 1328 }, { "epoch": 2.3247813411078715, "grad_norm": 0.5676655173301697, "learning_rate": 1.4554678438523356e-06, "loss": 0.5217, "step": 1329 }, { "epoch": 2.326530612244898, "grad_norm": 0.545199990272522, "learning_rate": 1.4482858140540068e-06, "loss": 0.5182, "step": 1330 }, { "epoch": 2.3282798833819243, "grad_norm": 0.5545451641082764, "learning_rate": 1.441118545818102e-06, "loss": 0.5198, "step": 1331 }, { "epoch": 2.33002915451895, "grad_norm": 0.5526758432388306, "learning_rate": 1.4339660689330753e-06, "loss": 0.5189, "step": 1332 }, { "epoch": 2.3317784256559766, "grad_norm": 0.5352872610092163, "learning_rate": 1.4268284131259075e-06, "loss": 0.5393, "step": 1333 }, { "epoch": 2.333527696793003, "grad_norm": 0.6117573380470276, "learning_rate": 1.4197056080619803e-06, "loss": 0.5036, "step": 1334 }, { "epoch": 2.335276967930029, "grad_norm": 0.5898134708404541, "learning_rate": 1.4125976833449495e-06, "loss": 0.5068, "step": 1335 }, { "epoch": 2.3370262390670553, "grad_norm": 0.5720674395561218, "learning_rate": 1.405504668516627e-06, "loss": 0.5215, "step": 1336 }, { "epoch": 2.3387755102040817, "grad_norm": 0.5549268126487732, "learning_rate": 1.3984265930568575e-06, "loss": 0.5206, "step": 1337 }, { "epoch": 2.3405247813411076, "grad_norm": 0.5669368505477905, "learning_rate": 1.391363486383397e-06, "loss": 0.4948, "step": 1338 }, { "epoch": 2.342274052478134, "grad_norm": 0.5789579153060913, "learning_rate": 1.3843153778517842e-06, "loss": 0.5219, "step": 1339 }, { "epoch": 2.3440233236151604, "grad_norm": 0.5628488063812256, "learning_rate": 1.3772822967552235e-06, "loss": 0.5309, "step": 1340 }, { "epoch": 2.3457725947521864, "grad_norm": 0.5888519883155823, "learning_rate": 1.3702642723244664e-06, "loss": 0.5208, "step": 1341 }, { "epoch": 2.3475218658892127, "grad_norm": 0.5756899118423462, "learning_rate": 1.3632613337276807e-06, "loss": 0.5216, "step": 1342 }, { "epoch": 2.349271137026239, "grad_norm": 0.6325283646583557, "learning_rate": 1.35627351007034e-06, "loss": 0.5073, "step": 1343 }, { "epoch": 2.351020408163265, "grad_norm": 0.5363728404045105, "learning_rate": 1.3493008303950955e-06, "loss": 0.5159, "step": 1344 }, { "epoch": 2.3527696793002915, "grad_norm": 0.5788946151733398, "learning_rate": 1.3423433236816563e-06, "loss": 0.5184, "step": 1345 }, { "epoch": 2.354518950437318, "grad_norm": 0.5766350626945496, "learning_rate": 1.3354010188466688e-06, "loss": 0.5121, "step": 1346 }, { "epoch": 2.356268221574344, "grad_norm": 0.7362555265426636, "learning_rate": 1.3284739447436007e-06, "loss": 0.5293, "step": 1347 }, { "epoch": 2.35801749271137, "grad_norm": 0.5195390582084656, "learning_rate": 1.3215621301626191e-06, "loss": 0.4979, "step": 1348 }, { "epoch": 2.3597667638483966, "grad_norm": 0.5652745366096497, "learning_rate": 1.3146656038304656e-06, "loss": 0.5078, "step": 1349 }, { "epoch": 2.3615160349854225, "grad_norm": 0.5652557611465454, "learning_rate": 1.3077843944103408e-06, "loss": 0.5375, "step": 1350 }, { "epoch": 2.363265306122449, "grad_norm": 0.5575758218765259, "learning_rate": 1.3009185305017901e-06, "loss": 0.5245, "step": 1351 }, { "epoch": 2.3650145772594753, "grad_norm": 0.5337525606155396, "learning_rate": 1.2940680406405792e-06, "loss": 0.5242, "step": 1352 }, { "epoch": 2.3667638483965012, "grad_norm": 0.5640784502029419, "learning_rate": 1.2872329532985716e-06, "loss": 0.5119, "step": 1353 }, { "epoch": 2.3685131195335276, "grad_norm": 0.5798302292823792, "learning_rate": 1.280413296883622e-06, "loss": 0.5093, "step": 1354 }, { "epoch": 2.370262390670554, "grad_norm": 0.5790512561798096, "learning_rate": 1.273609099739444e-06, "loss": 0.5096, "step": 1355 }, { "epoch": 2.37201166180758, "grad_norm": 0.5409457683563232, "learning_rate": 1.2668203901455083e-06, "loss": 0.5089, "step": 1356 }, { "epoch": 2.3737609329446063, "grad_norm": 0.5604782700538635, "learning_rate": 1.2600471963169075e-06, "loss": 0.5152, "step": 1357 }, { "epoch": 2.3755102040816327, "grad_norm": 0.5737406611442566, "learning_rate": 1.2532895464042561e-06, "loss": 0.5217, "step": 1358 }, { "epoch": 2.3772594752186587, "grad_norm": 0.549640953540802, "learning_rate": 1.2465474684935603e-06, "loss": 0.5167, "step": 1359 }, { "epoch": 2.379008746355685, "grad_norm": 0.5426600575447083, "learning_rate": 1.2398209906061036e-06, "loss": 0.5272, "step": 1360 }, { "epoch": 2.3807580174927114, "grad_norm": 0.5267632603645325, "learning_rate": 1.2331101406983438e-06, "loss": 0.5333, "step": 1361 }, { "epoch": 2.3825072886297374, "grad_norm": 0.546955943107605, "learning_rate": 1.2264149466617752e-06, "loss": 0.5409, "step": 1362 }, { "epoch": 2.3842565597667638, "grad_norm": 0.582362711429596, "learning_rate": 1.2197354363228281e-06, "loss": 0.4891, "step": 1363 }, { "epoch": 2.38600583090379, "grad_norm": 0.5680318474769592, "learning_rate": 1.2130716374427465e-06, "loss": 0.5387, "step": 1364 }, { "epoch": 2.387755102040816, "grad_norm": 0.5393708348274231, "learning_rate": 1.2064235777174766e-06, "loss": 0.5305, "step": 1365 }, { "epoch": 2.3895043731778425, "grad_norm": 0.6052448749542236, "learning_rate": 1.1997912847775518e-06, "loss": 0.4924, "step": 1366 }, { "epoch": 2.391253644314869, "grad_norm": 0.5313429236412048, "learning_rate": 1.1931747861879694e-06, "loss": 0.4937, "step": 1367 }, { "epoch": 2.393002915451895, "grad_norm": 0.5403512716293335, "learning_rate": 1.186574109448091e-06, "loss": 0.494, "step": 1368 }, { "epoch": 2.394752186588921, "grad_norm": 0.5868626236915588, "learning_rate": 1.179989281991511e-06, "loss": 0.4932, "step": 1369 }, { "epoch": 2.3965014577259476, "grad_norm": 0.5881838202476501, "learning_rate": 1.1734203311859593e-06, "loss": 0.5063, "step": 1370 }, { "epoch": 2.3982507288629735, "grad_norm": 0.6249469518661499, "learning_rate": 1.166867284333173e-06, "loss": 0.5225, "step": 1371 }, { "epoch": 2.4, "grad_norm": 0.5644508004188538, "learning_rate": 1.1603301686687947e-06, "loss": 0.5102, "step": 1372 }, { "epoch": 2.4017492711370263, "grad_norm": 0.5313032865524292, "learning_rate": 1.1538090113622496e-06, "loss": 0.5292, "step": 1373 }, { "epoch": 2.4034985422740522, "grad_norm": 0.5626275539398193, "learning_rate": 1.1473038395166415e-06, "loss": 0.4988, "step": 1374 }, { "epoch": 2.4052478134110786, "grad_norm": 0.5447697639465332, "learning_rate": 1.140814680168633e-06, "loss": 0.4986, "step": 1375 }, { "epoch": 2.406997084548105, "grad_norm": 0.5892448425292969, "learning_rate": 1.1343415602883367e-06, "loss": 0.5008, "step": 1376 }, { "epoch": 2.408746355685131, "grad_norm": 0.558306872844696, "learning_rate": 1.1278845067792004e-06, "loss": 0.5125, "step": 1377 }, { "epoch": 2.4104956268221573, "grad_norm": 0.6053299307823181, "learning_rate": 1.1214435464779006e-06, "loss": 0.5024, "step": 1378 }, { "epoch": 2.4122448979591837, "grad_norm": 0.5780885219573975, "learning_rate": 1.115018706154226e-06, "loss": 0.4872, "step": 1379 }, { "epoch": 2.4139941690962097, "grad_norm": 0.5657323598861694, "learning_rate": 1.1086100125109673e-06, "loss": 0.5047, "step": 1380 }, { "epoch": 2.415743440233236, "grad_norm": 0.5404059290885925, "learning_rate": 1.1022174921838052e-06, "loss": 0.5156, "step": 1381 }, { "epoch": 2.4174927113702624, "grad_norm": 0.5847872495651245, "learning_rate": 1.0958411717412043e-06, "loss": 0.5383, "step": 1382 }, { "epoch": 2.4192419825072884, "grad_norm": 0.5371589064598083, "learning_rate": 1.0894810776842995e-06, "loss": 0.5164, "step": 1383 }, { "epoch": 2.4209912536443148, "grad_norm": 0.5689742565155029, "learning_rate": 1.0831372364467834e-06, "loss": 0.5237, "step": 1384 }, { "epoch": 2.422740524781341, "grad_norm": 0.5711183547973633, "learning_rate": 1.0768096743947986e-06, "loss": 0.5101, "step": 1385 }, { "epoch": 2.424489795918367, "grad_norm": 0.5644636154174805, "learning_rate": 1.0704984178268336e-06, "loss": 0.5261, "step": 1386 }, { "epoch": 2.4262390670553935, "grad_norm": 0.5245851278305054, "learning_rate": 1.0642034929736018e-06, "loss": 0.5337, "step": 1387 }, { "epoch": 2.42798833819242, "grad_norm": 0.5591368079185486, "learning_rate": 1.0579249259979424e-06, "loss": 0.5056, "step": 1388 }, { "epoch": 2.429737609329446, "grad_norm": 0.5389377474784851, "learning_rate": 1.0516627429947112e-06, "loss": 0.5029, "step": 1389 }, { "epoch": 2.431486880466472, "grad_norm": 0.5243515968322754, "learning_rate": 1.0454169699906635e-06, "loss": 0.5383, "step": 1390 }, { "epoch": 2.4332361516034986, "grad_norm": 0.5404490828514099, "learning_rate": 1.0391876329443534e-06, "loss": 0.5378, "step": 1391 }, { "epoch": 2.4349854227405245, "grad_norm": 0.546185314655304, "learning_rate": 1.032974757746027e-06, "loss": 0.4923, "step": 1392 }, { "epoch": 2.436734693877551, "grad_norm": 0.6166105270385742, "learning_rate": 1.0267783702175104e-06, "loss": 0.5283, "step": 1393 }, { "epoch": 2.4384839650145773, "grad_norm": 0.5385024547576904, "learning_rate": 1.020598496112103e-06, "loss": 0.5066, "step": 1394 }, { "epoch": 2.4402332361516033, "grad_norm": 0.587562620639801, "learning_rate": 1.0144351611144703e-06, "loss": 0.5172, "step": 1395 }, { "epoch": 2.4419825072886296, "grad_norm": 0.5565781593322754, "learning_rate": 1.008288390840542e-06, "loss": 0.4899, "step": 1396 }, { "epoch": 2.443731778425656, "grad_norm": 0.524133563041687, "learning_rate": 1.0021582108374017e-06, "loss": 0.522, "step": 1397 }, { "epoch": 2.445481049562682, "grad_norm": 0.517731785774231, "learning_rate": 9.96044646583177e-07, "loss": 0.5262, "step": 1398 }, { "epoch": 2.4472303206997084, "grad_norm": 0.5560615658760071, "learning_rate": 9.899477234869381e-07, "loss": 0.4914, "step": 1399 }, { "epoch": 2.4489795918367347, "grad_norm": 0.5915992259979248, "learning_rate": 9.838674668885951e-07, "loss": 0.5098, "step": 1400 }, { "epoch": 2.4507288629737607, "grad_norm": 0.5663204193115234, "learning_rate": 9.778039020587883e-07, "loss": 0.5127, "step": 1401 }, { "epoch": 2.452478134110787, "grad_norm": 0.624844491481781, "learning_rate": 9.717570541987798e-07, "loss": 0.5031, "step": 1402 }, { "epoch": 2.4542274052478135, "grad_norm": 0.5514086484909058, "learning_rate": 9.657269484403587e-07, "loss": 0.5027, "step": 1403 }, { "epoch": 2.4559766763848394, "grad_norm": 0.5160055756568909, "learning_rate": 9.597136098457266e-07, "loss": 0.5137, "step": 1404 }, { "epoch": 2.457725947521866, "grad_norm": 0.5491235852241516, "learning_rate": 9.53717063407399e-07, "loss": 0.4849, "step": 1405 }, { "epoch": 2.459475218658892, "grad_norm": 0.5746858716011047, "learning_rate": 9.47737334048101e-07, "loss": 0.5246, "step": 1406 }, { "epoch": 2.461224489795918, "grad_norm": 0.5245023965835571, "learning_rate": 9.417744466206636e-07, "loss": 0.5386, "step": 1407 }, { "epoch": 2.4629737609329445, "grad_norm": 0.5376152396202087, "learning_rate": 9.358284259079181e-07, "loss": 0.5224, "step": 1408 }, { "epoch": 2.464723032069971, "grad_norm": 0.542328417301178, "learning_rate": 9.298992966225928e-07, "loss": 0.4975, "step": 1409 }, { "epoch": 2.466472303206997, "grad_norm": 0.5777959823608398, "learning_rate": 9.239870834072162e-07, "loss": 0.5145, "step": 1410 }, { "epoch": 2.4682215743440232, "grad_norm": 0.6091229319572449, "learning_rate": 9.180918108340109e-07, "loss": 0.49, "step": 1411 }, { "epoch": 2.4699708454810496, "grad_norm": 0.5754712224006653, "learning_rate": 9.122135034047863e-07, "loss": 0.4978, "step": 1412 }, { "epoch": 2.4717201166180756, "grad_norm": 0.5701878666877747, "learning_rate": 9.06352185550845e-07, "loss": 0.5017, "step": 1413 }, { "epoch": 2.473469387755102, "grad_norm": 0.5891409516334534, "learning_rate": 9.005078816328772e-07, "loss": 0.4738, "step": 1414 }, { "epoch": 2.4752186588921283, "grad_norm": 0.5595827698707581, "learning_rate": 8.946806159408616e-07, "loss": 0.5104, "step": 1415 }, { "epoch": 2.4769679300291543, "grad_norm": 0.5386918187141418, "learning_rate": 8.888704126939601e-07, "loss": 0.4858, "step": 1416 }, { "epoch": 2.4787172011661807, "grad_norm": 0.5229963660240173, "learning_rate": 8.830772960404232e-07, "loss": 0.5296, "step": 1417 }, { "epoch": 2.480466472303207, "grad_norm": 0.5356171727180481, "learning_rate": 8.773012900574823e-07, "loss": 0.5241, "step": 1418 }, { "epoch": 2.482215743440233, "grad_norm": 0.5835537910461426, "learning_rate": 8.715424187512589e-07, "loss": 0.5192, "step": 1419 }, { "epoch": 2.4839650145772594, "grad_norm": 0.5394976735115051, "learning_rate": 8.658007060566548e-07, "loss": 0.5069, "step": 1420 }, { "epoch": 2.4857142857142858, "grad_norm": 0.562157928943634, "learning_rate": 8.600761758372622e-07, "loss": 0.5224, "step": 1421 }, { "epoch": 2.4874635568513117, "grad_norm": 0.5718593597412109, "learning_rate": 8.543688518852561e-07, "loss": 0.5395, "step": 1422 }, { "epoch": 2.489212827988338, "grad_norm": 0.5583070516586304, "learning_rate": 8.486787579212973e-07, "loss": 0.5339, "step": 1423 }, { "epoch": 2.4909620991253645, "grad_norm": 0.6185349822044373, "learning_rate": 8.430059175944455e-07, "loss": 0.5012, "step": 1424 }, { "epoch": 2.4927113702623904, "grad_norm": 0.5260583758354187, "learning_rate": 8.373503544820404e-07, "loss": 0.5382, "step": 1425 }, { "epoch": 2.494460641399417, "grad_norm": 0.5530878901481628, "learning_rate": 8.317120920896193e-07, "loss": 0.5111, "step": 1426 }, { "epoch": 2.496209912536443, "grad_norm": 0.5347972512245178, "learning_rate": 8.260911538508126e-07, "loss": 0.5116, "step": 1427 }, { "epoch": 2.497959183673469, "grad_norm": 0.5521382093429565, "learning_rate": 8.204875631272514e-07, "loss": 0.5292, "step": 1428 }, { "epoch": 2.4997084548104955, "grad_norm": 0.5175709128379822, "learning_rate": 8.149013432084668e-07, "loss": 0.5083, "step": 1429 }, { "epoch": 2.501457725947522, "grad_norm": 0.5494022965431213, "learning_rate": 8.093325173117894e-07, "loss": 0.4994, "step": 1430 }, { "epoch": 2.503206997084548, "grad_norm": 0.5250890254974365, "learning_rate": 8.037811085822644e-07, "loss": 0.5152, "step": 1431 }, { "epoch": 2.5049562682215742, "grad_norm": 0.604673445224762, "learning_rate": 7.982471400925401e-07, "loss": 0.5363, "step": 1432 }, { "epoch": 2.5067055393586006, "grad_norm": 0.5366871953010559, "learning_rate": 7.92730634842786e-07, "loss": 0.5077, "step": 1433 }, { "epoch": 2.5084548104956266, "grad_norm": 0.5448949933052063, "learning_rate": 7.872316157605908e-07, "loss": 0.4962, "step": 1434 }, { "epoch": 2.510204081632653, "grad_norm": 0.5333808064460754, "learning_rate": 7.817501057008642e-07, "loss": 0.5367, "step": 1435 }, { "epoch": 2.5119533527696793, "grad_norm": 0.5334743857383728, "learning_rate": 7.762861274457456e-07, "loss": 0.5265, "step": 1436 }, { "epoch": 2.5137026239067053, "grad_norm": 0.5408334732055664, "learning_rate": 7.708397037045129e-07, "loss": 0.5255, "step": 1437 }, { "epoch": 2.5154518950437317, "grad_norm": 0.5568749904632568, "learning_rate": 7.654108571134822e-07, "loss": 0.5219, "step": 1438 }, { "epoch": 2.517201166180758, "grad_norm": 0.5524238348007202, "learning_rate": 7.599996102359148e-07, "loss": 0.5084, "step": 1439 }, { "epoch": 2.518950437317784, "grad_norm": 0.5306229591369629, "learning_rate": 7.546059855619231e-07, "loss": 0.5232, "step": 1440 }, { "epoch": 2.5206997084548104, "grad_norm": 0.5564960241317749, "learning_rate": 7.492300055083829e-07, "loss": 0.5086, "step": 1441 }, { "epoch": 2.522448979591837, "grad_norm": 0.54436194896698, "learning_rate": 7.438716924188344e-07, "loss": 0.5134, "step": 1442 }, { "epoch": 2.5241982507288627, "grad_norm": 0.5399362444877625, "learning_rate": 7.385310685633873e-07, "loss": 0.5181, "step": 1443 }, { "epoch": 2.525947521865889, "grad_norm": 0.5294635891914368, "learning_rate": 7.332081561386345e-07, "loss": 0.517, "step": 1444 }, { "epoch": 2.5276967930029155, "grad_norm": 0.583927571773529, "learning_rate": 7.279029772675572e-07, "loss": 0.5055, "step": 1445 }, { "epoch": 2.5294460641399414, "grad_norm": 0.49857449531555176, "learning_rate": 7.226155539994329e-07, "loss": 0.5421, "step": 1446 }, { "epoch": 2.531195335276968, "grad_norm": 0.5267397165298462, "learning_rate": 7.173459083097406e-07, "loss": 0.4955, "step": 1447 }, { "epoch": 2.532944606413994, "grad_norm": 0.5476489067077637, "learning_rate": 7.120940621000777e-07, "loss": 0.5002, "step": 1448 }, { "epoch": 2.53469387755102, "grad_norm": 0.5524364709854126, "learning_rate": 7.068600371980594e-07, "loss": 0.4954, "step": 1449 }, { "epoch": 2.5364431486880465, "grad_norm": 0.5142143368721008, "learning_rate": 7.016438553572325e-07, "loss": 0.5273, "step": 1450 }, { "epoch": 2.538192419825073, "grad_norm": 0.5358419418334961, "learning_rate": 6.964455382569879e-07, "loss": 0.5206, "step": 1451 }, { "epoch": 2.539941690962099, "grad_norm": 0.5391412377357483, "learning_rate": 6.912651075024657e-07, "loss": 0.4888, "step": 1452 }, { "epoch": 2.5416909620991253, "grad_norm": 0.5788346529006958, "learning_rate": 6.861025846244662e-07, "loss": 0.5126, "step": 1453 }, { "epoch": 2.5434402332361516, "grad_norm": 0.5755944848060608, "learning_rate": 6.809579910793618e-07, "loss": 0.4989, "step": 1454 }, { "epoch": 2.5451895043731776, "grad_norm": 0.5310918688774109, "learning_rate": 6.758313482490087e-07, "loss": 0.5174, "step": 1455 }, { "epoch": 2.546938775510204, "grad_norm": 0.5750368237495422, "learning_rate": 6.707226774406567e-07, "loss": 0.4987, "step": 1456 }, { "epoch": 2.5486880466472304, "grad_norm": 0.5203021764755249, "learning_rate": 6.656319998868583e-07, "loss": 0.5234, "step": 1457 }, { "epoch": 2.5504373177842563, "grad_norm": 0.556785523891449, "learning_rate": 6.605593367453833e-07, "loss": 0.5267, "step": 1458 }, { "epoch": 2.5521865889212827, "grad_norm": 0.5068785548210144, "learning_rate": 6.555047090991329e-07, "loss": 0.5071, "step": 1459 }, { "epoch": 2.553935860058309, "grad_norm": 0.5309472680091858, "learning_rate": 6.50468137956049e-07, "loss": 0.4877, "step": 1460 }, { "epoch": 2.555685131195335, "grad_norm": 0.5813840627670288, "learning_rate": 6.454496442490237e-07, "loss": 0.531, "step": 1461 }, { "epoch": 2.5574344023323614, "grad_norm": 0.5392652750015259, "learning_rate": 6.404492488358211e-07, "loss": 0.4966, "step": 1462 }, { "epoch": 2.559183673469388, "grad_norm": 0.5685400366783142, "learning_rate": 6.354669724989809e-07, "loss": 0.4876, "step": 1463 }, { "epoch": 2.5609329446064137, "grad_norm": 0.5395014882087708, "learning_rate": 6.30502835945741e-07, "loss": 0.5133, "step": 1464 }, { "epoch": 2.56268221574344, "grad_norm": 0.5754340887069702, "learning_rate": 6.255568598079431e-07, "loss": 0.4985, "step": 1465 }, { "epoch": 2.5644314868804665, "grad_norm": 0.5234715342521667, "learning_rate": 6.206290646419555e-07, "loss": 0.5118, "step": 1466 }, { "epoch": 2.5661807580174925, "grad_norm": 0.5732970833778381, "learning_rate": 6.15719470928578e-07, "loss": 0.5024, "step": 1467 }, { "epoch": 2.567930029154519, "grad_norm": 0.5817925930023193, "learning_rate": 6.108280990729631e-07, "loss": 0.5093, "step": 1468 }, { "epoch": 2.5696793002915452, "grad_norm": 0.5364826321601868, "learning_rate": 6.059549694045358e-07, "loss": 0.5489, "step": 1469 }, { "epoch": 2.571428571428571, "grad_norm": 0.5360261797904968, "learning_rate": 6.011001021768964e-07, "loss": 0.4987, "step": 1470 }, { "epoch": 2.5731778425655976, "grad_norm": 0.5495307445526123, "learning_rate": 5.96263517567745e-07, "loss": 0.5166, "step": 1471 }, { "epoch": 2.574927113702624, "grad_norm": 0.5540564060211182, "learning_rate": 5.914452356787958e-07, "loss": 0.5526, "step": 1472 }, { "epoch": 2.57667638483965, "grad_norm": 0.5600809454917908, "learning_rate": 5.866452765356956e-07, "loss": 0.5068, "step": 1473 }, { "epoch": 2.5784256559766763, "grad_norm": 0.540195107460022, "learning_rate": 5.818636600879374e-07, "loss": 0.5325, "step": 1474 }, { "epoch": 2.5801749271137027, "grad_norm": 0.554188072681427, "learning_rate": 5.77100406208777e-07, "loss": 0.5436, "step": 1475 }, { "epoch": 2.5819241982507286, "grad_norm": 0.5291545987129211, "learning_rate": 5.723555346951554e-07, "loss": 0.5089, "step": 1476 }, { "epoch": 2.583673469387755, "grad_norm": 0.571809709072113, "learning_rate": 5.676290652676092e-07, "loss": 0.5213, "step": 1477 }, { "epoch": 2.5854227405247814, "grad_norm": 0.5251079797744751, "learning_rate": 5.629210175701966e-07, "loss": 0.5111, "step": 1478 }, { "epoch": 2.5871720116618073, "grad_norm": 0.5705959796905518, "learning_rate": 5.582314111704084e-07, "loss": 0.5038, "step": 1479 }, { "epoch": 2.5889212827988337, "grad_norm": 0.5651777386665344, "learning_rate": 5.535602655590933e-07, "loss": 0.4956, "step": 1480 }, { "epoch": 2.59067055393586, "grad_norm": 0.5375713109970093, "learning_rate": 5.489076001503696e-07, "loss": 0.5063, "step": 1481 }, { "epoch": 2.592419825072886, "grad_norm": 0.5709909200668335, "learning_rate": 5.44273434281552e-07, "loss": 0.5233, "step": 1482 }, { "epoch": 2.5941690962099124, "grad_norm": 0.5434747338294983, "learning_rate": 5.396577872130676e-07, "loss": 0.4962, "step": 1483 }, { "epoch": 2.595918367346939, "grad_norm": 0.5386210083961487, "learning_rate": 5.350606781283746e-07, "loss": 0.517, "step": 1484 }, { "epoch": 2.5976676384839648, "grad_norm": 0.570266842842102, "learning_rate": 5.304821261338838e-07, "loss": 0.4758, "step": 1485 }, { "epoch": 2.599416909620991, "grad_norm": 0.5492234230041504, "learning_rate": 5.259221502588785e-07, "loss": 0.5078, "step": 1486 }, { "epoch": 2.6011661807580175, "grad_norm": 0.5483729839324951, "learning_rate": 5.213807694554418e-07, "loss": 0.515, "step": 1487 }, { "epoch": 2.6029154518950435, "grad_norm": 0.5586111545562744, "learning_rate": 5.168580025983661e-07, "loss": 0.5158, "step": 1488 }, { "epoch": 2.60466472303207, "grad_norm": 0.5542230606079102, "learning_rate": 5.123538684850826e-07, "loss": 0.5021, "step": 1489 }, { "epoch": 2.6064139941690962, "grad_norm": 0.5832734107971191, "learning_rate": 5.078683858355843e-07, "loss": 0.5036, "step": 1490 }, { "epoch": 2.608163265306122, "grad_norm": 0.6024210453033447, "learning_rate": 5.034015732923408e-07, "loss": 0.4779, "step": 1491 }, { "epoch": 2.6099125364431486, "grad_norm": 0.5830584764480591, "learning_rate": 4.989534494202303e-07, "loss": 0.4793, "step": 1492 }, { "epoch": 2.611661807580175, "grad_norm": 0.5585582256317139, "learning_rate": 4.945240327064521e-07, "loss": 0.5102, "step": 1493 }, { "epoch": 2.613411078717201, "grad_norm": 0.5456950068473816, "learning_rate": 4.901133415604603e-07, "loss": 0.4947, "step": 1494 }, { "epoch": 2.6151603498542273, "grad_norm": 0.5809838771820068, "learning_rate": 4.857213943138783e-07, "loss": 0.5097, "step": 1495 }, { "epoch": 2.6169096209912537, "grad_norm": 0.522596538066864, "learning_rate": 4.813482092204291e-07, "loss": 0.5394, "step": 1496 }, { "epoch": 2.6186588921282796, "grad_norm": 0.5998246073722839, "learning_rate": 4.769938044558564e-07, "loss": 0.5179, "step": 1497 }, { "epoch": 2.620408163265306, "grad_norm": 0.5542780160903931, "learning_rate": 4.726581981178485e-07, "loss": 0.5005, "step": 1498 }, { "epoch": 2.6221574344023324, "grad_norm": 0.5712290406227112, "learning_rate": 4.68341408225963e-07, "loss": 0.5216, "step": 1499 }, { "epoch": 2.6239067055393583, "grad_norm": 0.5135143995285034, "learning_rate": 4.640434527215554e-07, "loss": 0.5243, "step": 1500 }, { "epoch": 2.6256559766763847, "grad_norm": 0.5330318212509155, "learning_rate": 4.597643494677029e-07, "loss": 0.5222, "step": 1501 }, { "epoch": 2.627405247813411, "grad_norm": 0.5272485017776489, "learning_rate": 4.555041162491253e-07, "loss": 0.5229, "step": 1502 }, { "epoch": 2.629154518950437, "grad_norm": 0.5726596117019653, "learning_rate": 4.512627707721179e-07, "loss": 0.5351, "step": 1503 }, { "epoch": 2.6309037900874634, "grad_norm": 0.5291249752044678, "learning_rate": 4.4704033066447494e-07, "loss": 0.507, "step": 1504 }, { "epoch": 2.63265306122449, "grad_norm": 0.5335885882377625, "learning_rate": 4.428368134754174e-07, "loss": 0.5244, "step": 1505 }, { "epoch": 2.6344023323615158, "grad_norm": 0.5880476832389832, "learning_rate": 4.386522366755169e-07, "loss": 0.497, "step": 1506 }, { "epoch": 2.636151603498542, "grad_norm": 0.5444503426551819, "learning_rate": 4.344866176566259e-07, "loss": 0.5208, "step": 1507 }, { "epoch": 2.6379008746355685, "grad_norm": 0.5573350191116333, "learning_rate": 4.30339973731807e-07, "loss": 0.4996, "step": 1508 }, { "epoch": 2.6396501457725945, "grad_norm": 0.5412650108337402, "learning_rate": 4.2621232213525767e-07, "loss": 0.5193, "step": 1509 }, { "epoch": 2.641399416909621, "grad_norm": 0.5442783832550049, "learning_rate": 4.2210368002223833e-07, "loss": 0.5128, "step": 1510 }, { "epoch": 2.6431486880466473, "grad_norm": 0.5802140831947327, "learning_rate": 4.1801406446900563e-07, "loss": 0.4961, "step": 1511 }, { "epoch": 2.644897959183673, "grad_norm": 0.5686115026473999, "learning_rate": 4.139434924727359e-07, "loss": 0.506, "step": 1512 }, { "epoch": 2.6466472303206996, "grad_norm": 0.5195733904838562, "learning_rate": 4.0989198095145565e-07, "loss": 0.5048, "step": 1513 }, { "epoch": 2.648396501457726, "grad_norm": 0.5814155340194702, "learning_rate": 4.0585954674397964e-07, "loss": 0.5064, "step": 1514 }, { "epoch": 2.650145772594752, "grad_norm": 0.5381345748901367, "learning_rate": 4.018462066098261e-07, "loss": 0.513, "step": 1515 }, { "epoch": 2.6518950437317783, "grad_norm": 0.5991618037223816, "learning_rate": 3.978519772291578e-07, "loss": 0.4963, "step": 1516 }, { "epoch": 2.6536443148688047, "grad_norm": 0.5609515905380249, "learning_rate": 3.9387687520271e-07, "loss": 0.4996, "step": 1517 }, { "epoch": 2.6553935860058306, "grad_norm": 0.553184986114502, "learning_rate": 3.899209170517215e-07, "loss": 0.5089, "step": 1518 }, { "epoch": 2.657142857142857, "grad_norm": 0.5412158370018005, "learning_rate": 3.859841192178654e-07, "loss": 0.5437, "step": 1519 }, { "epoch": 2.6588921282798834, "grad_norm": 0.5803493857383728, "learning_rate": 3.8206649806318054e-07, "loss": 0.5047, "step": 1520 }, { "epoch": 2.6606413994169094, "grad_norm": 0.5883973836898804, "learning_rate": 3.781680698700052e-07, "loss": 0.5115, "step": 1521 }, { "epoch": 2.6623906705539357, "grad_norm": 0.5489740371704102, "learning_rate": 3.7428885084090594e-07, "loss": 0.533, "step": 1522 }, { "epoch": 2.664139941690962, "grad_norm": 0.5588462352752686, "learning_rate": 3.7042885709861586e-07, "loss": 0.4863, "step": 1523 }, { "epoch": 2.665889212827988, "grad_norm": 0.5239272713661194, "learning_rate": 3.665881046859615e-07, "loss": 0.4967, "step": 1524 }, { "epoch": 2.6676384839650145, "grad_norm": 0.542544960975647, "learning_rate": 3.627666095658017e-07, "loss": 0.5246, "step": 1525 }, { "epoch": 2.669387755102041, "grad_norm": 0.5593613386154175, "learning_rate": 3.589643876209542e-07, "loss": 0.4827, "step": 1526 }, { "epoch": 2.671137026239067, "grad_norm": 0.529664933681488, "learning_rate": 3.551814546541388e-07, "loss": 0.5302, "step": 1527 }, { "epoch": 2.672886297376093, "grad_norm": 0.564068078994751, "learning_rate": 3.514178263879048e-07, "loss": 0.5225, "step": 1528 }, { "epoch": 2.6746355685131196, "grad_norm": 0.5283238291740417, "learning_rate": 3.4767351846456744e-07, "loss": 0.5184, "step": 1529 }, { "epoch": 2.6763848396501455, "grad_norm": 0.545320451259613, "learning_rate": 3.439485464461423e-07, "loss": 0.5096, "step": 1530 }, { "epoch": 2.678134110787172, "grad_norm": 0.5564836859703064, "learning_rate": 3.40242925814282e-07, "loss": 0.5214, "step": 1531 }, { "epoch": 2.6798833819241983, "grad_norm": 0.5507330894470215, "learning_rate": 3.3655667197021455e-07, "loss": 0.5079, "step": 1532 }, { "epoch": 2.6816326530612242, "grad_norm": 0.5607947111129761, "learning_rate": 3.3288980023467146e-07, "loss": 0.5137, "step": 1533 }, { "epoch": 2.6833819241982506, "grad_norm": 0.5105504989624023, "learning_rate": 3.2924232584783e-07, "loss": 0.5137, "step": 1534 }, { "epoch": 2.685131195335277, "grad_norm": 0.5707457661628723, "learning_rate": 3.256142639692511e-07, "loss": 0.4913, "step": 1535 }, { "epoch": 2.686880466472303, "grad_norm": 0.5461696982383728, "learning_rate": 3.2200562967781015e-07, "loss": 0.5216, "step": 1536 }, { "epoch": 2.6886297376093293, "grad_norm": 0.6191033720970154, "learning_rate": 3.1841643797164145e-07, "loss": 0.4846, "step": 1537 }, { "epoch": 2.6903790087463557, "grad_norm": 0.5141412615776062, "learning_rate": 3.1484670376806856e-07, "loss": 0.5325, "step": 1538 }, { "epoch": 2.6921282798833817, "grad_norm": 0.5570558309555054, "learning_rate": 3.112964419035508e-07, "loss": 0.5097, "step": 1539 }, { "epoch": 2.693877551020408, "grad_norm": 0.5487263798713684, "learning_rate": 3.077656671336121e-07, "loss": 0.497, "step": 1540 }, { "epoch": 2.6956268221574344, "grad_norm": 0.5406529903411865, "learning_rate": 3.0425439413278855e-07, "loss": 0.5342, "step": 1541 }, { "epoch": 2.6973760932944604, "grad_norm": 0.5509239435195923, "learning_rate": 3.0076263749456156e-07, "loss": 0.487, "step": 1542 }, { "epoch": 2.6991253644314868, "grad_norm": 0.6025915741920471, "learning_rate": 2.972904117312997e-07, "loss": 0.5003, "step": 1543 }, { "epoch": 2.700874635568513, "grad_norm": 0.6044929027557373, "learning_rate": 2.938377312741952e-07, "loss": 0.5039, "step": 1544 }, { "epoch": 2.702623906705539, "grad_norm": 0.5352222323417664, "learning_rate": 2.9040461047320946e-07, "loss": 0.5214, "step": 1545 }, { "epoch": 2.7043731778425655, "grad_norm": 0.5507836937904358, "learning_rate": 2.869910635970108e-07, "loss": 0.4989, "step": 1546 }, { "epoch": 2.706122448979592, "grad_norm": 0.5685707330703735, "learning_rate": 2.835971048329128e-07, "loss": 0.5147, "step": 1547 }, { "epoch": 2.707871720116618, "grad_norm": 0.526738166809082, "learning_rate": 2.802227482868164e-07, "loss": 0.5357, "step": 1548 }, { "epoch": 2.709620991253644, "grad_norm": 0.5608901977539062, "learning_rate": 2.7686800798315536e-07, "loss": 0.5128, "step": 1549 }, { "epoch": 2.7113702623906706, "grad_norm": 0.5467588305473328, "learning_rate": 2.7353289786483384e-07, "loss": 0.523, "step": 1550 }, { "epoch": 2.7131195335276965, "grad_norm": 0.5658575892448425, "learning_rate": 2.7021743179316773e-07, "loss": 0.5305, "step": 1551 }, { "epoch": 2.714868804664723, "grad_norm": 0.5349102020263672, "learning_rate": 2.669216235478295e-07, "loss": 0.5098, "step": 1552 }, { "epoch": 2.7166180758017493, "grad_norm": 0.5319019556045532, "learning_rate": 2.636454868267918e-07, "loss": 0.5272, "step": 1553 }, { "epoch": 2.7183673469387752, "grad_norm": 0.5446067452430725, "learning_rate": 2.603890352462657e-07, "loss": 0.5221, "step": 1554 }, { "epoch": 2.7201166180758016, "grad_norm": 0.5547208786010742, "learning_rate": 2.5715228234065083e-07, "loss": 0.521, "step": 1555 }, { "epoch": 2.721865889212828, "grad_norm": 0.5646585822105408, "learning_rate": 2.539352415624741e-07, "loss": 0.4949, "step": 1556 }, { "epoch": 2.723615160349854, "grad_norm": 0.5565763711929321, "learning_rate": 2.5073792628233395e-07, "loss": 0.5257, "step": 1557 }, { "epoch": 2.7253644314868803, "grad_norm": 0.5628464221954346, "learning_rate": 2.4756034978884737e-07, "loss": 0.5212, "step": 1558 }, { "epoch": 2.7271137026239067, "grad_norm": 0.5718461275100708, "learning_rate": 2.4440252528859343e-07, "loss": 0.5112, "step": 1559 }, { "epoch": 2.7288629737609327, "grad_norm": 0.550563395023346, "learning_rate": 2.412644659060598e-07, "loss": 0.512, "step": 1560 }, { "epoch": 2.730612244897959, "grad_norm": 0.5817340612411499, "learning_rate": 2.381461846835831e-07, "loss": 0.5104, "step": 1561 }, { "epoch": 2.7323615160349854, "grad_norm": 0.5708975791931152, "learning_rate": 2.3504769458130127e-07, "loss": 0.509, "step": 1562 }, { "epoch": 2.7341107871720114, "grad_norm": 0.5502566695213318, "learning_rate": 2.3196900847709592e-07, "loss": 0.5196, "step": 1563 }, { "epoch": 2.735860058309038, "grad_norm": 0.5176409482955933, "learning_rate": 2.2891013916653992e-07, "loss": 0.5324, "step": 1564 }, { "epoch": 2.737609329446064, "grad_norm": 0.6496307253837585, "learning_rate": 2.2587109936284434e-07, "loss": 0.4896, "step": 1565 }, { "epoch": 2.73935860058309, "grad_norm": 0.5048460960388184, "learning_rate": 2.2285190169680281e-07, "loss": 0.5417, "step": 1566 }, { "epoch": 2.7411078717201165, "grad_norm": 0.4961240887641907, "learning_rate": 2.1985255871674548e-07, "loss": 0.5273, "step": 1567 }, { "epoch": 2.742857142857143, "grad_norm": 0.5291065573692322, "learning_rate": 2.168730828884802e-07, "loss": 0.5371, "step": 1568 }, { "epoch": 2.744606413994169, "grad_norm": 0.5688806772232056, "learning_rate": 2.1391348659524301e-07, "loss": 0.5096, "step": 1569 }, { "epoch": 2.746355685131195, "grad_norm": 0.549757182598114, "learning_rate": 2.1097378213764952e-07, "loss": 0.5145, "step": 1570 }, { "epoch": 2.7481049562682216, "grad_norm": 0.5596276521682739, "learning_rate": 2.0805398173363856e-07, "loss": 0.4947, "step": 1571 }, { "epoch": 2.7498542274052475, "grad_norm": 0.5566750764846802, "learning_rate": 2.051540975184263e-07, "loss": 0.5251, "step": 1572 }, { "epoch": 2.751603498542274, "grad_norm": 0.5373929142951965, "learning_rate": 2.0227414154445124e-07, "loss": 0.5276, "step": 1573 }, { "epoch": 2.7533527696793003, "grad_norm": 0.5685452818870544, "learning_rate": 1.9941412578132923e-07, "loss": 0.5114, "step": 1574 }, { "epoch": 2.7551020408163263, "grad_norm": 0.5587658882141113, "learning_rate": 1.9657406211579966e-07, "loss": 0.5118, "step": 1575 }, { "epoch": 2.7568513119533526, "grad_norm": 0.5619803071022034, "learning_rate": 1.9375396235167542e-07, "loss": 0.5052, "step": 1576 }, { "epoch": 2.758600583090379, "grad_norm": 0.5641624927520752, "learning_rate": 1.909538382098014e-07, "loss": 0.5015, "step": 1577 }, { "epoch": 2.760349854227405, "grad_norm": 0.5454655289649963, "learning_rate": 1.8817370132799496e-07, "loss": 0.5195, "step": 1578 }, { "epoch": 2.7620991253644314, "grad_norm": 0.5015835165977478, "learning_rate": 1.8541356326100436e-07, "loss": 0.5425, "step": 1579 }, { "epoch": 2.7638483965014577, "grad_norm": 0.528369665145874, "learning_rate": 1.826734354804588e-07, "loss": 0.4881, "step": 1580 }, { "epoch": 2.7655976676384837, "grad_norm": 0.5483030080795288, "learning_rate": 1.799533293748229e-07, "loss": 0.5119, "step": 1581 }, { "epoch": 2.76734693877551, "grad_norm": 0.5379408597946167, "learning_rate": 1.7725325624934676e-07, "loss": 0.5264, "step": 1582 }, { "epoch": 2.7690962099125365, "grad_norm": 0.555422842502594, "learning_rate": 1.7457322732601868e-07, "loss": 0.5092, "step": 1583 }, { "epoch": 2.7708454810495624, "grad_norm": 0.5653334856033325, "learning_rate": 1.7191325374352087e-07, "loss": 0.5157, "step": 1584 }, { "epoch": 2.772594752186589, "grad_norm": 0.5229434370994568, "learning_rate": 1.6927334655718107e-07, "loss": 0.5225, "step": 1585 }, { "epoch": 2.774344023323615, "grad_norm": 0.5698763132095337, "learning_rate": 1.6665351673892883e-07, "loss": 0.4854, "step": 1586 }, { "epoch": 2.776093294460641, "grad_norm": 0.511640727519989, "learning_rate": 1.640537751772464e-07, "loss": 0.5288, "step": 1587 }, { "epoch": 2.7778425655976675, "grad_norm": 0.5376846194267273, "learning_rate": 1.6147413267712852e-07, "loss": 0.5073, "step": 1588 }, { "epoch": 2.779591836734694, "grad_norm": 0.543543815612793, "learning_rate": 1.5891459996003166e-07, "loss": 0.5232, "step": 1589 }, { "epoch": 2.78134110787172, "grad_norm": 0.5183196663856506, "learning_rate": 1.5637518766383419e-07, "loss": 0.5055, "step": 1590 }, { "epoch": 2.7830903790087462, "grad_norm": 0.5270405411720276, "learning_rate": 1.5385590634279024e-07, "loss": 0.5263, "step": 1591 }, { "epoch": 2.7848396501457726, "grad_norm": 0.52741938829422, "learning_rate": 1.5135676646748587e-07, "loss": 0.5224, "step": 1592 }, { "epoch": 2.7865889212827986, "grad_norm": 0.5451001524925232, "learning_rate": 1.4887777842479412e-07, "loss": 0.5207, "step": 1593 }, { "epoch": 2.788338192419825, "grad_norm": 0.5300840735435486, "learning_rate": 1.464189525178361e-07, "loss": 0.5144, "step": 1594 }, { "epoch": 2.7900874635568513, "grad_norm": 0.5513871908187866, "learning_rate": 1.4398029896593447e-07, "loss": 0.5014, "step": 1595 }, { "epoch": 2.7918367346938773, "grad_norm": 0.5295477509498596, "learning_rate": 1.415618279045705e-07, "loss": 0.5028, "step": 1596 }, { "epoch": 2.7935860058309037, "grad_norm": 0.5609395503997803, "learning_rate": 1.3916354938534493e-07, "loss": 0.4989, "step": 1597 }, { "epoch": 2.79533527696793, "grad_norm": 0.5332491397857666, "learning_rate": 1.3678547337593494e-07, "loss": 0.5281, "step": 1598 }, { "epoch": 2.797084548104956, "grad_norm": 0.5447938442230225, "learning_rate": 1.3442760976005053e-07, "loss": 0.5104, "step": 1599 }, { "epoch": 2.7988338192419824, "grad_norm": 0.5559313297271729, "learning_rate": 1.3208996833739774e-07, "loss": 0.5037, "step": 1600 }, { "epoch": 2.8005830903790088, "grad_norm": 0.5697900652885437, "learning_rate": 1.2977255882363426e-07, "loss": 0.531, "step": 1601 }, { "epoch": 2.8023323615160347, "grad_norm": 0.5301501750946045, "learning_rate": 1.274753908503301e-07, "loss": 0.5319, "step": 1602 }, { "epoch": 2.804081632653061, "grad_norm": 0.5213930010795593, "learning_rate": 1.2519847396492757e-07, "loss": 0.5055, "step": 1603 }, { "epoch": 2.8058309037900875, "grad_norm": 0.5470053553581238, "learning_rate": 1.2294181763070345e-07, "loss": 0.5022, "step": 1604 }, { "epoch": 2.8075801749271134, "grad_norm": 0.5353496670722961, "learning_rate": 1.2070543122672695e-07, "loss": 0.4989, "step": 1605 }, { "epoch": 2.80932944606414, "grad_norm": 0.5346081256866455, "learning_rate": 1.1848932404782187e-07, "loss": 0.5237, "step": 1606 }, { "epoch": 2.811078717201166, "grad_norm": 0.5551982522010803, "learning_rate": 1.162935053045272e-07, "loss": 0.4859, "step": 1607 }, { "epoch": 2.8128279883381926, "grad_norm": 0.5483630299568176, "learning_rate": 1.1411798412306052e-07, "loss": 0.526, "step": 1608 }, { "epoch": 2.8145772594752185, "grad_norm": 0.5472379922866821, "learning_rate": 1.1196276954527907e-07, "loss": 0.4871, "step": 1609 }, { "epoch": 2.816326530612245, "grad_norm": 0.5801413655281067, "learning_rate": 1.0982787052864263e-07, "loss": 0.5028, "step": 1610 }, { "epoch": 2.8180758017492713, "grad_norm": 0.5389635562896729, "learning_rate": 1.0771329594617297e-07, "loss": 0.5107, "step": 1611 }, { "epoch": 2.8198250728862972, "grad_norm": 0.5470589399337769, "learning_rate": 1.0561905458642441e-07, "loss": 0.5245, "step": 1612 }, { "epoch": 2.8215743440233236, "grad_norm": 0.5346261858940125, "learning_rate": 1.0354515515343943e-07, "loss": 0.5061, "step": 1613 }, { "epoch": 2.82332361516035, "grad_norm": 0.5297913551330566, "learning_rate": 1.0149160626671595e-07, "loss": 0.5216, "step": 1614 }, { "epoch": 2.825072886297376, "grad_norm": 0.5592806339263916, "learning_rate": 9.945841646117393e-08, "loss": 0.5003, "step": 1615 }, { "epoch": 2.8268221574344023, "grad_norm": 0.5742281079292297, "learning_rate": 9.744559418711442e-08, "loss": 0.4919, "step": 1616 }, { "epoch": 2.8285714285714287, "grad_norm": 0.5436110496520996, "learning_rate": 9.545314781018889e-08, "loss": 0.5441, "step": 1617 }, { "epoch": 2.8303206997084547, "grad_norm": 0.534302294254303, "learning_rate": 9.34810856113616e-08, "loss": 0.5172, "step": 1618 }, { "epoch": 2.832069970845481, "grad_norm": 0.5853086709976196, "learning_rate": 9.152941578687902e-08, "loss": 0.5158, "step": 1619 }, { "epoch": 2.8338192419825075, "grad_norm": 0.5213744044303894, "learning_rate": 8.959814644823096e-08, "loss": 0.5417, "step": 1620 }, { "epoch": 2.8355685131195334, "grad_norm": 0.525771975517273, "learning_rate": 8.768728562211948e-08, "loss": 0.5274, "step": 1621 }, { "epoch": 2.83731778425656, "grad_norm": 0.5338490009307861, "learning_rate": 8.579684125042564e-08, "loss": 0.5245, "step": 1622 }, { "epoch": 2.839067055393586, "grad_norm": 0.5146360993385315, "learning_rate": 8.39268211901767e-08, "loss": 0.5164, "step": 1623 }, { "epoch": 2.840816326530612, "grad_norm": 0.5899013876914978, "learning_rate": 8.207723321351169e-08, "loss": 0.504, "step": 1624 }, { "epoch": 2.8425655976676385, "grad_norm": 0.5207250118255615, "learning_rate": 8.02480850076498e-08, "loss": 0.5054, "step": 1625 }, { "epoch": 2.844314868804665, "grad_norm": 0.5288645625114441, "learning_rate": 7.84393841748604e-08, "loss": 0.5058, "step": 1626 }, { "epoch": 2.846064139941691, "grad_norm": 0.5420721769332886, "learning_rate": 7.665113823243031e-08, "loss": 0.5222, "step": 1627 }, { "epoch": 2.847813411078717, "grad_norm": 0.5385634303092957, "learning_rate": 7.488335461262874e-08, "loss": 0.5169, "step": 1628 }, { "epoch": 2.8495626822157436, "grad_norm": 0.5309758186340332, "learning_rate": 7.313604066268409e-08, "loss": 0.5174, "step": 1629 }, { "epoch": 2.8513119533527695, "grad_norm": 0.5298731327056885, "learning_rate": 7.140920364474557e-08, "loss": 0.5143, "step": 1630 }, { "epoch": 2.853061224489796, "grad_norm": 0.5432310104370117, "learning_rate": 6.970285073585992e-08, "loss": 0.5, "step": 1631 }, { "epoch": 2.8548104956268223, "grad_norm": 0.5183479189872742, "learning_rate": 6.801698902793419e-08, "loss": 0.5116, "step": 1632 }, { "epoch": 2.8565597667638483, "grad_norm": 0.5376230478286743, "learning_rate": 6.635162552771468e-08, "loss": 0.5125, "step": 1633 }, { "epoch": 2.8583090379008746, "grad_norm": 0.5261098146438599, "learning_rate": 6.470676715675029e-08, "loss": 0.5022, "step": 1634 }, { "epoch": 2.860058309037901, "grad_norm": 0.5310938358306885, "learning_rate": 6.30824207513686e-08, "loss": 0.4844, "step": 1635 }, { "epoch": 2.861807580174927, "grad_norm": 0.531708836555481, "learning_rate": 6.147859306264493e-08, "loss": 0.5018, "step": 1636 }, { "epoch": 2.8635568513119534, "grad_norm": 0.527998149394989, "learning_rate": 5.98952907563749e-08, "loss": 0.5178, "step": 1637 }, { "epoch": 2.8653061224489798, "grad_norm": 0.5206876993179321, "learning_rate": 5.833252041304804e-08, "loss": 0.5193, "step": 1638 }, { "epoch": 2.8670553935860057, "grad_norm": 0.5256719589233398, "learning_rate": 5.6790288527818205e-08, "loss": 0.5192, "step": 1639 }, { "epoch": 2.868804664723032, "grad_norm": 0.5302660465240479, "learning_rate": 5.526860151047864e-08, "loss": 0.5368, "step": 1640 }, { "epoch": 2.8705539358600585, "grad_norm": 0.5012531876564026, "learning_rate": 5.376746568543423e-08, "loss": 0.5277, "step": 1641 }, { "epoch": 2.8723032069970844, "grad_norm": 0.5424719452857971, "learning_rate": 5.228688729167486e-08, "loss": 0.4783, "step": 1642 }, { "epoch": 2.874052478134111, "grad_norm": 0.512863039970398, "learning_rate": 5.082687248275098e-08, "loss": 0.5338, "step": 1643 }, { "epoch": 2.875801749271137, "grad_norm": 0.5619978904724121, "learning_rate": 4.9387427326745287e-08, "loss": 0.4935, "step": 1644 }, { "epoch": 2.877551020408163, "grad_norm": 0.5936635732650757, "learning_rate": 4.7968557806251645e-08, "loss": 0.488, "step": 1645 }, { "epoch": 2.8793002915451895, "grad_norm": 0.5491734743118286, "learning_rate": 4.657026981834623e-08, "loss": 0.5215, "step": 1646 }, { "epoch": 2.881049562682216, "grad_norm": 0.5370096564292908, "learning_rate": 4.5192569174565825e-08, "loss": 0.5187, "step": 1647 }, { "epoch": 2.882798833819242, "grad_norm": 0.5575273633003235, "learning_rate": 4.38354616008807e-08, "loss": 0.4985, "step": 1648 }, { "epoch": 2.8845481049562682, "grad_norm": 0.5481406450271606, "learning_rate": 4.2498952737675124e-08, "loss": 0.5126, "step": 1649 }, { "epoch": 2.8862973760932946, "grad_norm": 0.5205408334732056, "learning_rate": 4.118304813971963e-08, "loss": 0.5378, "step": 1650 }, { "epoch": 2.8880466472303206, "grad_norm": 0.546921968460083, "learning_rate": 3.988775327614991e-08, "loss": 0.4949, "step": 1651 }, { "epoch": 2.889795918367347, "grad_norm": 0.5364443063735962, "learning_rate": 3.8613073530444076e-08, "loss": 0.5137, "step": 1652 }, { "epoch": 2.8915451895043733, "grad_norm": 0.5273358821868896, "learning_rate": 3.7359014200401e-08, "loss": 0.5297, "step": 1653 }, { "epoch": 2.8932944606413993, "grad_norm": 0.5193560123443604, "learning_rate": 3.612558049811643e-08, "loss": 0.5233, "step": 1654 }, { "epoch": 2.8950437317784257, "grad_norm": 0.5287905335426331, "learning_rate": 3.491277754996192e-08, "loss": 0.5295, "step": 1655 }, { "epoch": 2.896793002915452, "grad_norm": 0.5730068683624268, "learning_rate": 3.3720610396564265e-08, "loss": 0.5025, "step": 1656 }, { "epoch": 2.898542274052478, "grad_norm": 0.5638485550880432, "learning_rate": 3.254908399278556e-08, "loss": 0.5159, "step": 1657 }, { "epoch": 2.9002915451895044, "grad_norm": 0.5666298270225525, "learning_rate": 3.1398203207699264e-08, "loss": 0.5164, "step": 1658 }, { "epoch": 2.9020408163265308, "grad_norm": 0.5241637825965881, "learning_rate": 3.0267972824573056e-08, "loss": 0.5385, "step": 1659 }, { "epoch": 2.9037900874635567, "grad_norm": 0.5395190715789795, "learning_rate": 2.9158397540846594e-08, "loss": 0.4978, "step": 1660 }, { "epoch": 2.905539358600583, "grad_norm": 0.5346434712409973, "learning_rate": 2.8069481968115985e-08, "loss": 0.5255, "step": 1661 }, { "epoch": 2.9072886297376095, "grad_norm": 0.5295270681381226, "learning_rate": 2.7001230632108245e-08, "loss": 0.5161, "step": 1662 }, { "epoch": 2.9090379008746354, "grad_norm": 0.5496765971183777, "learning_rate": 2.595364797266853e-08, "loss": 0.4887, "step": 1663 }, { "epoch": 2.910787172011662, "grad_norm": 0.5105111002922058, "learning_rate": 2.4926738343739044e-08, "loss": 0.5364, "step": 1664 }, { "epoch": 2.912536443148688, "grad_norm": 0.501460075378418, "learning_rate": 2.3920506013340727e-08, "loss": 0.5128, "step": 1665 }, { "epoch": 2.914285714285714, "grad_norm": 0.5282905101776123, "learning_rate": 2.2934955163555483e-08, "loss": 0.5213, "step": 1666 }, { "epoch": 2.9160349854227405, "grad_norm": 0.4911491572856903, "learning_rate": 2.1970089890509527e-08, "loss": 0.5391, "step": 1667 }, { "epoch": 2.917784256559767, "grad_norm": 0.527197539806366, "learning_rate": 2.1025914204357288e-08, "loss": 0.5152, "step": 1668 }, { "epoch": 2.919533527696793, "grad_norm": 0.5524855852127075, "learning_rate": 2.010243202926143e-08, "loss": 0.4912, "step": 1669 }, { "epoch": 2.9212827988338192, "grad_norm": 0.5504701137542725, "learning_rate": 1.919964720338119e-08, "loss": 0.4998, "step": 1670 }, { "epoch": 2.9230320699708456, "grad_norm": 0.5363790392875671, "learning_rate": 1.8317563478851275e-08, "loss": 0.5232, "step": 1671 }, { "epoch": 2.9247813411078716, "grad_norm": 0.5769304037094116, "learning_rate": 1.745618452177078e-08, "loss": 0.5121, "step": 1672 }, { "epoch": 2.926530612244898, "grad_norm": 0.5552437901496887, "learning_rate": 1.661551391218541e-08, "loss": 0.528, "step": 1673 }, { "epoch": 2.9282798833819244, "grad_norm": 0.568020224571228, "learning_rate": 1.579555514407305e-08, "loss": 0.5019, "step": 1674 }, { "epoch": 2.9300291545189503, "grad_norm": 0.5529026985168457, "learning_rate": 1.4996311625329886e-08, "loss": 0.5236, "step": 1675 }, { "epoch": 2.9317784256559767, "grad_norm": 0.538734495639801, "learning_rate": 1.4217786677755974e-08, "loss": 0.5363, "step": 1676 }, { "epoch": 2.933527696793003, "grad_norm": 0.5173326730728149, "learning_rate": 1.3459983537040255e-08, "loss": 0.5372, "step": 1677 }, { "epoch": 2.935276967930029, "grad_norm": 0.5513661503791809, "learning_rate": 1.2722905352749449e-08, "loss": 0.4995, "step": 1678 }, { "epoch": 2.9370262390670554, "grad_norm": 0.5056660175323486, "learning_rate": 1.2006555188311953e-08, "loss": 0.516, "step": 1679 }, { "epoch": 2.938775510204082, "grad_norm": 0.524448573589325, "learning_rate": 1.1310936021008411e-08, "loss": 0.5262, "step": 1680 }, { "epoch": 2.9405247813411077, "grad_norm": 0.4904527962207794, "learning_rate": 1.0636050741957282e-08, "loss": 0.5131, "step": 1681 }, { "epoch": 2.942274052478134, "grad_norm": 0.5269210338592529, "learning_rate": 9.981902156103174e-09, "loss": 0.516, "step": 1682 }, { "epoch": 2.9440233236151605, "grad_norm": 0.5365650653839111, "learning_rate": 9.348492982204083e-09, "loss": 0.5336, "step": 1683 }, { "epoch": 2.9457725947521864, "grad_norm": 0.553817093372345, "learning_rate": 8.73582585282362e-09, "loss": 0.4863, "step": 1684 }, { "epoch": 2.947521865889213, "grad_norm": 0.5731557607650757, "learning_rate": 8.143903314315471e-09, "loss": 0.4921, "step": 1685 }, { "epoch": 2.949271137026239, "grad_norm": 0.53493732213974, "learning_rate": 7.572727826817283e-09, "loss": 0.5169, "step": 1686 }, { "epoch": 2.951020408163265, "grad_norm": 0.4912327527999878, "learning_rate": 7.022301764235684e-09, "loss": 0.5242, "step": 1687 }, { "epoch": 2.9527696793002915, "grad_norm": 0.5187781453132629, "learning_rate": 6.492627414241282e-09, "loss": 0.4994, "step": 1688 }, { "epoch": 2.954518950437318, "grad_norm": 0.5247716307640076, "learning_rate": 5.983706978255343e-09, "loss": 0.5405, "step": 1689 }, { "epoch": 2.956268221574344, "grad_norm": 0.5549898147583008, "learning_rate": 5.495542571443135e-09, "loss": 0.5301, "step": 1690 }, { "epoch": 2.9580174927113703, "grad_norm": 0.5095196962356567, "learning_rate": 5.028136222702817e-09, "loss": 0.5321, "step": 1691 }, { "epoch": 2.9597667638483967, "grad_norm": 0.5471399426460266, "learning_rate": 4.581489874659895e-09, "loss": 0.5014, "step": 1692 }, { "epoch": 2.9615160349854226, "grad_norm": 0.4960484504699707, "learning_rate": 4.155605383656669e-09, "loss": 0.5224, "step": 1693 }, { "epoch": 2.963265306122449, "grad_norm": 0.5319105982780457, "learning_rate": 3.750484519745578e-09, "loss": 0.4781, "step": 1694 }, { "epoch": 2.9650145772594754, "grad_norm": 0.7028201818466187, "learning_rate": 3.366128966681981e-09, "loss": 0.5231, "step": 1695 }, { "epoch": 2.9667638483965013, "grad_norm": 0.5501881241798401, "learning_rate": 3.0025403219163806e-09, "loss": 0.4938, "step": 1696 }, { "epoch": 2.9685131195335277, "grad_norm": 0.5311588048934937, "learning_rate": 2.659720096588325e-09, "loss": 0.5083, "step": 1697 }, { "epoch": 2.970262390670554, "grad_norm": 0.5243121981620789, "learning_rate": 2.3376697155208516e-09, "loss": 0.5234, "step": 1698 }, { "epoch": 2.97201166180758, "grad_norm": 0.5697014927864075, "learning_rate": 2.036390517212716e-09, "loss": 0.4989, "step": 1699 }, { "epoch": 2.9737609329446064, "grad_norm": 0.515907347202301, "learning_rate": 1.7558837538345085e-09, "loss": 0.5217, "step": 1700 }, { "epoch": 2.975510204081633, "grad_norm": 0.5166839957237244, "learning_rate": 1.4961505912231e-09, "loss": 0.5074, "step": 1701 }, { "epoch": 2.9772594752186587, "grad_norm": 0.5417424440383911, "learning_rate": 1.2571921088755379e-09, "loss": 0.4981, "step": 1702 }, { "epoch": 2.979008746355685, "grad_norm": 0.5432780385017395, "learning_rate": 1.0390092999468248e-09, "loss": 0.5102, "step": 1703 }, { "epoch": 2.9807580174927115, "grad_norm": 0.5542859435081482, "learning_rate": 8.416030712432577e-10, "loss": 0.5122, "step": 1704 }, { "epoch": 2.9825072886297375, "grad_norm": 0.5469187498092651, "learning_rate": 6.649742432213169e-10, "loss": 0.534, "step": 1705 }, { "epoch": 2.984256559766764, "grad_norm": 0.5356168746948242, "learning_rate": 5.091235499821156e-10, "loss": 0.5014, "step": 1706 }, { "epoch": 2.9860058309037902, "grad_norm": 0.5511953234672546, "learning_rate": 3.740516392686244e-10, "loss": 0.5101, "step": 1707 }, { "epoch": 2.987755102040816, "grad_norm": 0.5562847852706909, "learning_rate": 2.5975907246400533e-10, "loss": 0.5127, "step": 1708 }, { "epoch": 2.9895043731778426, "grad_norm": 0.5612158179283142, "learning_rate": 1.6624632458939195e-10, "loss": 0.5106, "step": 1709 }, { "epoch": 2.991253644314869, "grad_norm": 0.555356502532959, "learning_rate": 9.351378430000335e-11, "loss": 0.5013, "step": 1710 }, { "epoch": 2.993002915451895, "grad_norm": 0.5495994091033936, "learning_rate": 4.1561753885144273e-11, "loss": 0.5058, "step": 1711 }, { "epoch": 2.9947521865889213, "grad_norm": 0.5467791557312012, "learning_rate": 1.0390449267649871e-11, "loss": 0.5011, "step": 1712 }, { "epoch": 2.9965014577259477, "grad_norm": 0.5400470495223999, "learning_rate": 0.0, "loss": 0.4957, "step": 1713 }, { "epoch": 2.9965014577259477, "step": 1713, "total_flos": 5.394627722522788e+18, "train_loss": 0.0, "train_runtime": 15.4777, "train_samples_per_second": 10636.432, "train_steps_per_second": 110.675 } ], "logging_steps": 1, "max_steps": 1713, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.394627722522788e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }